May-07-2020, 04:23 PM
(This post was last modified: May-07-2020, 04:23 PM by mbadatanut.)
hello - I am having difficult scraping more than one page on a real estate website. any tips or recommendations on my code would be very much appreciated. thanks!
import requests from bs4 import BeautifulSoup from csv import writer base_url = 'https://www.rew.ca/properties/areas/kelowna-bc' for i in range(1,26): url = '/page/' + str(i) while url: response = requests.get(f"{base_url}{url}") soup = BeautifulSoup(response.text, "html.parser") listings = soup.find_all("article") with open("property4.csv", "w") as csv_file: csv_writer = writer(csv_file) csv_writer.writerow(["title", "type", "price", "location", "bedrooms", "bathrooms", "square feet", "link"]) for listing in listings: location = listing.find(class_="displaypanel-info").get_text().strip() price = listing.find(class_="displaypanel-title hidden-xs").get_text().strip() link = listing.find("a").get('href').strip() title = listing.find("a").get('title').strip() type = (listing.find(class_="clearfix hidden-xs").find(class_="displaypanel-info")).get_text() bedrooms = (listing.find_all("li")[2]).get_text() bathrooms = (listing.find_all("li")[3]).get_text() square_feet = (listing.find_all("li")[4]).get_text() csv_writer.writerow([title, type, price, location, bedrooms, bathrooms, square_feet, link]) next_btn = soup.find(class_="paginator-next_page paginator-control") url = next_btn.find("a")["href"]