Jun-16-2020, 11:15 PM
I'm trying to get this code to open a page, pull item and info data, add it to a dictionary then repeat on the next page. It works fine on the first page, but then when it continues it will go to as many pages as needed but does not run the for loops. I can see it looking at each page but the dictionary only contains info from the first page. Thanks in advance!
from lxml import html import requests import re from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import time def proxi_scrape(auction_url): PATH = "/Users/jacobdonelson/Desktop/chromedriver" driver = webdriver.Chrome(PATH) driver.get(auction_url) print('Auction URL: ' + auction_url) page = requests.get(driver.current_url) tree = html.fromstring(page.content) number_of_pages = tree.xpath('//*[@id="Viewing"]/div[1]/text()') number_of_pages = number_of_pages[-1] number_of_pages = int(number_of_pages.replace(' of ', '')) desired_page = 1 clean_prices = [] total_items = [] while desired_page <= 3: go_to_page = WebDriverWait(driver, 20).until( EC.presence_of_element_located((By.XPATH, '//*[@id="txtGoToPage"]'))) go_to_page.clear() go_to_page.send_keys(desired_page) go_to_page.send_keys(Keys.RETURN) current_page = driver.current_url new_page = requests.get(current_page) new_tree = html.fromstring(new_page.content) new_items = new_tree.xpath('//a[@class="showVisited responsive-width "]/text()') new_prices = new_tree.xpath('//div[@class="smInstaBidButton"]/text()') for new_price in new_prices: clean_price = new_price.replace('BID NOW $', '') clean_price = clean_price.replace(',', '') clean_prices.append(clean_price) for new_item in new_items: total_items.append(new_item) desired_page += 1 auction_items = dict(zip(total_items, clean_prices)) return auction_items