Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
dataframe(pd) learning
#1
Hi guys,

I would like to extract my code into columns.

Why do i get only last row in the excel file?

from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import urllib

headers = {
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://www.pararius.com/apartments/amsterdam',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
    'Content-Type': 'text/plain',
}

data = '{"tags":[{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"5f5a2718d3aa6d","id":11247563,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true},{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"66526a063a1a8c","id":11247564,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true}],"sdk":{"source":"pbjs","version":"2.19.0-pre"},"gdpr_consent":{"consent_string":"BOmDsv2OmDsv2BQABBENCN-AAAAmd7_______9______5uz_Ov_v_f__33e8__9v_l_7_-___u_-3zd4-_1vf99yfm1-7etr3tp_87ues2_Xur__59__3z3_9phPrsk89ryw","consent_required":true},"referrer_detection":{"rd_ref":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam","rd_top":true,"rd_ifs":1,"rd_stk":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam,https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam"}}'


#for n in range(1, num_pages):
page = 'https://www.pararius.com/apartments/amsterdam/page-1'

    
    
r = requests.get(page, headers=headers, data=data)
content = (r.text)
soup = BeautifulSoup(content, 'html.parser')


#pagination- find max pages
page1 = soup.find('ul', {'class': 'pagination'})
pages = page1.find_all('li')
last_page = pages[-3]
num_pages = last_page.find('a').text

#for n in range(1, int(num_pages)+1):
#    page = 'https://www.pararius.com/apartments/amsterdam/page-' + str(n)
#    print(page)


for section in soup.find_all(class_='property-list-item-container'):
    dlink = section.find('a').get('href')
    type = section.find('span', {'class': 'type'}).text
    neighborhood = section.find('a').text.strip().split()[1]
    size = section.find('li', {'class': 'surface'}).text.strip().split()[0]
    bedrooms = section.find('li', {'class': 'surface'}).text.strip().split()[2]
    furniture = section.find('li', {'class': 'surface'}).text.strip().split()[4]
    if furniture == 'upholstered':
        furniture = "Unfurnished"
    elif furniture == 'furnished or upholstered':
        furniture = "Furnished & Unfurnished"
    #availablefrom = size = section.find('li', {'class': 'surface'}).text.strip().split()[6]
    price = section.find('p', {'class': 'price '}).text.strip().split()[0]
    curr = "EUR" if "€" in price else "other"

    data = {
        'Direct Link':[dlink],
        'Typee':[type],
        'Neighborhood':[neighborhood],
        'Size':[size],
        'Bedrooms':[bedrooms],
        'Furniture':[furniture],
        'Price':[price],
        'Currency':[curr]
        }
    df = pd.DataFrame(data)
    print(df)
df.to_excel(r'C:\Users\xxx\Desktop\scrap_learning\tests\test.xlsx')
Reply


Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020