Python Forum

Full Version: [Solved]Help with BeautifulSoup.getText() Error
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hello,

I'm trying to test this code that checks the price of an Amazon product, but when I try running it, I get this error:
Error:
line 31, in checking_price product_title = soup.find('span', id='productTitle').getText() AttributeError: 'NoneType' object has no attribute 'getText'
How do I fix this?

Thanks in advance.

Code:
import requests #pip install requests
from bs4 import BeautifulSoup #pip install bs4
import os
import time
import json

# #Opening The Settings.json file
# with open('settings.json','r') as file:
#     settings = json.load(file)

# Set your budget
# my_price = settings['budget']
my_price = 400

# initializing Currency Symbols to substract it from our string
currency_symbols = ['€', '	£', '$', "¥", "HK$", "₹", "¥", "," ] 

# the URL we are going to use
# URL = settings['url']
URL = 'https://www.amazon.ca/MSI-Geforce-192-bit-Support-Graphics/dp/B07ZHDZ1K6/ref=sr_1_16?crid=1M9LHOYX99CQW&keywords=Nvidia%2BGTX%2B1060&qid=1670109381&sprefix=nvidia%2Bgtx%2B1060%2Caps%2C79&sr=8-16&th=1'

# Google "My User Agent" And Replace It
headers = {"User-Agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36'} 

#Checking the price
def checking_price():
    page = requests.get(URL, headers=headers)
    soup  = BeautifulSoup(page.text, 'html.parser')

    #Finding the elements
    product_title = soup.find('span', id='productTitle').getText()
    product_price = soup.find('span', class_ = "a-offscreen").getText()

    # using replace() to remove currency symbols
    for i in currency_symbols : 
        product_price = product_price.replace(i, '')

    #Converting the string to integer
    product_price = int(float(product_price))

    ProductTitleStrip = product_title.strip()
    print(ProductTitleStrip)
    print(product_price)



    # checking the price
    if(product_price<my_price):
        print("You Can Buy This Now!")
    else:
        print("The Price Is Too High!")


checking_price()
(Dec-04-2022, 12:06 AM)Extra Wrote: [ -> ]How do I fix this?
You most look at data of you get back print(soup),before you try to parse.
So it's blocked and no parsing at all will work.
Output:
<p class="a-last">Sorry, we just need to make sure you're not a robot. For best results, please make sure your browser is accepting cookies.</p>
Can use Selenium to bypass this,but also writing a better headers will fix it.
import requests
from bs4 import BeautifulSoup
from time import sleep

url = 'https://www.amazon.ca/MSI-Geforce-192-bit-Support-Graphics/dp/B07ZHDZ1K6/ref=sr_1_16?crid=1M9LHOYX99CQW&keywords=Nvidia%2BGTX%2B1060&qid=1670109381&sprefix=nvidia%2Bgtx%2B1060%2Caps%2C79&sr=8-16&th=1'
headers = {
    'authority': 'www.amazon.com',
    'pragma': 'no-cache',
    'cache-control': 'no-cache',
    'dnt': '1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'sec-fetch-site': 'none',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-dest': 'document',
    'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'lxml')
sleep(2)
#product_title = soup.find('span', id='productTitle')
product_title = soup.select_one('#productTitle')
>>> product_title
<span class="a-size-large product-title-word-break" id="productTitle">        MSI Gaming Geforce GTX 1660 Super 192-bit HDMI/DP 6GB GDRR6 HDCP Support DirectX 12 Dual Fan VR Ready OC Graphics Card       </span>
>>> 
>>> print(product_title.text.strip())
MSI Gaming Geforce GTX 1660 Super 192-bit HDMI/DP 6GB GDRR6 HDCP Support DirectX 12 Dual Fan VR Ready OC Graphics Card
Also see that i use response.content this means that bytes are taking into BS so it can deal with Unicode.
Using response.text it will try to convert before taking into BS.
Thanks for the help.

When I try to run it now, I get this error:
Error:
line 41, in checking_price soup = BeautifulSoup(response.content, 'lxml') bs4.FeatureNotFound: Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library?
Why is this?
(Dec-04-2022, 01:07 AM)Extra Wrote: [ -> ]Why is this?
Installing a parser
Can choice what parser to use,i use lxml because is fast.
pip install lxml
To choose that one that comes with Python can use.
soup = BeautifulSoup(response.content, "html.parser")
Thanks!

I got it to work.

Output:
MSI Gaming Geforce GTX 1660 Super 192-bit HDMI/DP 6GB GDRR6 HDCP Support DirectX 12 Dual Fan VR Ready OC Graphics Card 348.98 You Can Buy This Now!
Code:
import requests
from bs4 import BeautifulSoup

# Set your budget
my_price = 400

# initializing Currency Symbols to substract it from our string
currency_symbols = ['€', '	£', '$', "¥", "HK$", "₹", "¥", "," ] 

# the URL we are going to use
URL = 'https://www.amazon.ca/MSI-Geforce-192-bit-Support-Graphics/dp/B07ZHDZ1K6/ref=sr_1_16?crid=1M9LHOYX99CQW&keywords=Nvidia%2BGTX%2B1060&qid=1670109381&sprefix=nvidia%2Bgtx%2B1060%2Caps%2C79&sr=8-16&th=1'

headers = {
'authority': 'www.amazon.com',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'dnt': '1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-dest': 'document',
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
}

#Checking the price
def checking_price():
    # page = requests.get(URL, headers=headers)
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")

    #Finding the elements
    product_title = soup.find('span', class_ = "a-size-large product-title-word-break").getText()
    product_price = soup.find('span', class_ = "a-offscreen").getText()

    # using replace() to remove currency symbols
    for i in currency_symbols : 
        product_price = product_price.replace(i,'')

    ProductTitleStrip = product_title.strip()
    ProductPriceStrip = product_price.strip()
    print(ProductTitleStrip)
    print(ProductPriceStrip)

    #Converting the string to integer
    product_price = int(float(product_price))

    # checking the price
    if(product_price<my_price):
        print("You Can Buy This Now!")
    else:
        print("The Price Is Too High!")


checking_price()

# while True:
#     checking_price()
#     time.sleep(3600) #Run every hour 
use 'html.parser' for BeautifulSoup. It works better than lxml.