Hi guys,
I would like to extract my code into columns.
Why do i get only last row in the excel file?
I would like to extract my code into columns.
Why do i get only last row in the excel file?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from bs4 import BeautifulSoup import requests import pandas as pd import re import urllib headers = { 'Sec-Fetch-Mode' : 'cors' , 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36' , 'Content-Type' : 'text/plain' , } data = '{"tags":[{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"5f5a2718d3aa6d","id":11247563,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true},{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"66526a063a1a8c","id":11247564,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true}],"sdk":{"source":"pbjs","version":"2.19.0-pre"},"gdpr_consent":{"consent_string":"BOmDsv2OmDsv2BQABBENCN-AAAAmd7_______9______5uz_Ov_v_f__33e8__9v_l_7_-___u_-3zd4-_1vf99yfm1-7etr3tp_87ues2_Xur__59__3z3_9phPrsk89ryw","consent_required":true},"referrer_detection":{"rd_ref":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam","rd_top":true,"rd_ifs":1,"rd_stk":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam,https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam"}}' #for n in range(1, num_pages): r = requests.get(page, headers = headers, data = data) content = (r.text) soup = BeautifulSoup(content, 'html.parser' ) #pagination- find max pages page1 = soup.find( 'ul' , { 'class' : 'pagination' }) pages = page1.find_all( 'li' ) last_page = pages[ - 3 ] num_pages = last_page.find( 'a' ).text #for n in range(1, int(num_pages)+1): # page = 'https://www.pararius.com/apartments/amsterdam/page-' + str(n) # print(page) for section in soup.find_all( class_ = 'property-list-item-container' ): dlink = section.find( 'a' ).get( 'href' ) type = section.find( 'span' , { 'class' : 'type' }).text neighborhood = section.find( 'a' ).text.strip().split()[ 1 ] size = section.find( 'li' , { 'class' : 'surface' }).text.strip().split()[ 0 ] bedrooms = section.find( 'li' , { 'class' : 'surface' }).text.strip().split()[ 2 ] furniture = section.find( 'li' , { 'class' : 'surface' }).text.strip().split()[ 4 ] if furniture = = 'upholstered' : furniture = "Unfurnished" elif furniture = = 'furnished or upholstered' : furniture = "Furnished & Unfurnished" #availablefrom = size = section.find('li', {'class': 'surface'}).text.strip().split()[6] price = section.find( 'p' , { 'class' : 'price ' }).text.strip().split()[ 0 ] curr = "EUR" if "€" in price else "other" data = { 'Direct Link' :[dlink], 'Typee' :[ type ], 'Neighborhood' :[neighborhood], 'Size' :[size], 'Bedrooms' :[bedrooms], 'Furniture' :[furniture], 'Price' :[price], 'Currency' :[curr] } df = pd.DataFrame(data) print (df) df.to_excel(r 'C:\Users\xxx\Desktop\scrap_learning\tests\test.xlsx' ) |