hi guys,
i am learning scraping and i currently i ve stopped in point of doing pagination in script below:
what should be my next step?
I would appreciate any kind of help/tip!
i am learning scraping and i currently i ve stopped in point of doing pagination in script below:
from bs4 import BeautifulSoup import requests import pandas as pd import re import urllib headers = { 'Sec-Fetch-Mode': 'cors', 'Referer': 'https://www.pararius.com/apartments/amsterdam', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36', 'Content-Type': 'text/plain', } data = '{"tags":[{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"5f5a2718d3aa6d","id":11247563,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true},{"sizes":[{"width":728,"height":90},{"width":970,"height":250}],"primary_size":{"width":728,"height":90},"ad_types":["banner"],"uuid":"66526a063a1a8c","id":11247564,"allow_smaller_sizes":false,"use_pmt_rule":false,"prebid":true,"disable_psa":true}],"sdk":{"source":"pbjs","version":"2.19.0-pre"},"gdpr_consent":{"consent_string":"BOmDsv2OmDsv2BQABBENCN-AAAAmd7_______9______5uz_Ov_v_f__33e8__9v_l_7_-___u_-3zd4-_1vf99yfm1-7etr3tp_87ues2_Xur__59__3z3_9phPrsk89ryw","consent_required":true},"referrer_detection":{"rd_ref":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam","rd_top":true,"rd_ifs":1,"rd_stk":"https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam,https%3A%2F%2Fwww.pararius.com%2Fapartments%2Famsterdam"}}' page_number = 2 page = 'https://www.pararius.com/apartments/amsterdam/page-' + str(page_number) r = requests.get(page, headers=headers, data=data) content = (r.text) soup = BeautifulSoup(content, 'html.parser') for section in soup.find_all(class_='property-list-item-container'): dlink = section.find('a').get('href') type = section.find('span', {'class': 'type'}).text neighborhood = section.find('a').text.strip().split()[1] size = section.find('li', {'class': 'surface'}).text.strip().split()[0] bedrooms = section.find('li', {'class': 'surface'}).text.strip().split()[2] furniture = section.find('li', {'class': 'surface'}).text.strip().split()[4] if furniture == 'upholstered': furniture = "Unfurnished" elif furniture == 'furnished or upholstered': furniture = "Furnished & Unfurnished" availablefrom = size = section.find('li', {'class': 'surface'}).text.strip().split()[6] price = section.find('p', {'class': 'price '}).text.strip().split()[0] curr = "EUR" if "€" in price else "other" print(curr) breakI have to add that it might happend that result from the site has let's say 50 pages, and it can happen that it has 30 only... how to deal with it?
what should be my next step?
I would appreciate any kind of help/tip!