Python Forum

Need some help.

Want I need is to pull the price from the below code. However whenever I run the script I get the following output: "2163/110-FRYMV40(CST-NG),,,Brand,Quest,,,Unit of Measure,EA,,,Spec Sheet,Quest - Fryer-mv-40.pdf None"

The "None" is suppose to be the price but for whatever reason I can't seen to get it. When I inspect the pace I can see the div calass with the price.

Any help is appreciated.

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

import requests
from bs4 import BeautifulSoup
import time

driver = webdriver.Chrome(executable_path = "D:\chromedriver\chromedriver.exe")
driver.maximize_window()
wait = WebDriverWait(driver, 30)
driver.get("https://www.russellhendrix.com/category/185/cooking-equipment?pagesize=600")

wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.entity-product-price-wrap.grid-item-price-wrap"))).click()

wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input#gv-postcalcode"))).send_keys("B3K 1X2")

wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a.gv-red-btn.gv-set-postal"))).click()

wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, "a.gv-red-btn.gv-set-postal")))

time.sleep(60)  # delays start of scrape for 60 secords for page to load

headers = {
	'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
url = 'https://www.russellhendrix.com/category/185/cooking-equipment'
r = requests.get(url, headers)
soup = BeautifulSoup(r.content, 'lxml')





baseurl = 'https://www.russellhendrix.com'

headers = {
	'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}

url = 'https://www.russellhendrix.com/category/185/cooking-equipment'

r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')

productlist = soup.find_all('div', class_='entity-product-image-wrap')

productlinks = []

for item in productlist:
	for link in item.find_all('a', href=True):
		productlinks.append(baseurl + link['href'])

for link in productlinks:
	r = requests.get(link, headers=headers)

	soup = BeautifulSoup(r.content, 'lxml')

	skunumber = soup.find('table', class_='product-details-table').text
	pricing = soup.find('div', class_='regPriceValue')

	print(skunumber, pricing)

You have to use same session from selenium as start with,can not jump to a new url with Requests as it have no connection with what's done in selenium.
So can pass driver.page_source to BS for parsing.

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import requests
from bs4 import BeautifulSoup
import time

driver = webdriver.Chrome(executable_path=r"D:\chromedriver\chromedriver.exe")
driver.maximize_window()
wait = WebDriverWait(driver, 10)
driver.get("https://www.russellhendrix.com/category/185/cooking-equipment?pagesize=600")
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.entity-product-price-wrap.grid-item-price-wrap"))).click()
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input#gv-postcalcode"))).send_keys("B3K 1X2")
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a.gv-red-btn.gv-set-postal"))).click()
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, "a.gv-red-btn.gv-set-postal")))
time.sleep(10)  # delays start of scrape for 60 secords for page to load

driver.get("https://www.russellhendrix.com/category/185/cooking-equipment")
soup = BeautifulSoup(driver.page_source, 'lxml')
baseurl = 'https://www.russellhendrix.com'
productlist = soup.find_all('div', class_='entity-product-image-wrap')
productlinks = []
for item in productlist:
    for link in item.find_all('a', href=True):
        productlinks.append(baseurl + link['href'])

for link in productlinks:
    driver.get(link)
    soup = BeautifulSoup(driver.page_source, 'lxml')
    skunumber = soup.find('table', class_='product-details-table').text
    pricing = soup.find('div', class_='regPriceValue')
    print(skunumber, pricing)

Output:U
                        
2163/24-BMV40TC


Brand
Quest


Unit of Measure
EA


 <div class="regPriceValue">$84.39</div>




                          SKU
                        
2163/110-FRYMV40(CST-NG)


Brand
Quest


Unit of Measure
EA


Spec Sheet
Quest - Fryer-mv-40.pdf


 <div class="regPriceValue">$4,880.70</div>




                          SKU
                        
2163/110-FRYMV40(CST-LP)


Brand
Quest


Unit of Measure
EA


Spec Sheet
Quest - Fryer-mv-40.pdf


 <div class="regPriceValue">$4,648.29</div>
.........

PythonNewbie999

snippsat