Hi guys,
I would like to extract my code into columns.
Why do i get only last row in the excel file?
I would like to extract my code into columns.
Why do i get only last row in the excel file?
from bs4 import BeautifulSoup import requests import pandas as pd import re import urllib headers = { 'Sec-Fetch-Mode': 'cors', 'Referer': 'https://www.pararius.com/apartments/amsterdam', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36', 'Content-Type': 'text/plain', } data = '{"tags":[{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"5f5a2718d3aa6d","id":11247563,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true},{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"66526a063a1a8c","id":11247564,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true}],"sdk":{"source":"pbjs","version":"2.19.0-pre"},"gdpr_consent":{"consent_string":"BOmDsv2OmDsv2BQABBENCN-AAAAmd7_______9______5uz_Ov_v_f__33e8__9v_l_7_-___u_-3zd4-_1vf99yfm1-7etr3tp_87ues2_Xur__59__3z3_9phPrsk89ryw","consent_required":true},"referrer_detection":{"rd_ref":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam","rd_top":true,"rd_ifs":1,"rd_stk":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam,https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam"}}' #for n in range(1, num_pages): page = 'https://www.pararius.com/apartments/amsterdam/page-1' r = requests.get(page, headers=headers, data=data) content = (r.text) soup = BeautifulSoup(content, 'html.parser') #pagination- find max pages page1 = soup.find('ul', {'class': 'pagination'}) pages = page1.find_all('li') last_page = pages[-3] num_pages = last_page.find('a').text #for n in range(1, int(num_pages)+1): # page = 'https://www.pararius.com/apartments/amsterdam/page-' + str(n) # print(page) for section in soup.find_all(class_='property-list-item-container'): dlink = section.find('a').get('href') type = section.find('span', {'class': 'type'}).text neighborhood = section.find('a').text.strip().split()[1] size = section.find('li', {'class': 'surface'}).text.strip().split()[0] bedrooms = section.find('li', {'class': 'surface'}).text.strip().split()[2] furniture = section.find('li', {'class': 'surface'}).text.strip().split()[4] if furniture == 'upholstered': furniture = "Unfurnished" elif furniture == 'furnished or upholstered': furniture = "Furnished & Unfurnished" #availablefrom = size = section.find('li', {'class': 'surface'}).text.strip().split()[6] price = section.find('p', {'class': 'price '}).text.strip().split()[0] curr = "EUR" if "€" in price else "other" data = { 'Direct Link':[dlink], 'Typee':[type], 'Neighborhood':[neighborhood], 'Size':[size], 'Bedrooms':[bedrooms], 'Furniture':[furniture], 'Price':[price], 'Currency':[curr] } df = pd.DataFrame(data) print(df) df.to_excel(r'C:\Users\xxx\Desktop\scrap_learning\tests\test.xlsx')