Python Forum

Full Version: Help to web scrape from 2 diffrent sources
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hello,

I have an Amazon Price Tracker program that takes links from Amazon and prints out the current price and a message that states whether I should buy it or not according to the budget price I set in my database (The database stores: ItemName, ItemLink, AlertPrice).

The problem is that it only recognizes Amazon links but not links from other websites like Walmart.ca.

How would I get my code to work with other sites like Walmart?

Thanks in advance.

import requests
from bs4 import BeautifulSoup
import sqlite3
from rich import print

#Initializing Currency Symbols to substract it from our string
currency_symbols = ['€', '	£', '$', "¥", "HK$", "₹", "¥", "," ] 

headers = {
'authority': 'www.amazon.com',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'dnt': '1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-dest': 'document',
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
}

#------------------------------------------
#            Get Price of Products
#------------------------------------------
#Get the price of each product
def get_price(URL):
    response = requests.get(URL, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")

    #Finding the elements
    product_title = soup.find('span', class_ = "a-size-large product-title-word-break").getText()
    product_price = soup.find('span', class_ = "a-offscreen").getText()

    # using replace() to remove currency symbols
    for i in currency_symbols : 
        product_price = product_price.replace(i,'')

    ProductTitleStrip = product_title.strip()
    ProductPriceStrip = product_price.strip()
    print("[bright_yellow]"+ProductTitleStrip)
    print("[bright_cyan]$" + ProductPriceStrip)

    #Converting the string to integer
    product_price = int(float(product_price))
    return(product_price)
#------------------------------------------


#------------------------------------------
#            Get Products to Track
#------------------------------------------
#Connect to the database
connection = sqlite3.connect('ProductTrackerDatabase.db')
cursor = connection.cursor()

for Product_Name, URL, my_price in cursor.execute("SELECT Product, URL, Alert_Price FROM AmazonPriceTracker"):
    current_price = get_price(URL)
    if current_price < float(my_price):
        print("[green]You Can Buy This Now!\n")
    else:
        print("[red]The Price Is Too High\n")

connection.close() #Close the connection
#------------------------------------------
This outputs 5 products from Amazon and the last item (the error) is linked to Walmart (The Link)
Output:
MSI Gaming Geforce GTX 1660 Super 192-bit HDMI/DP 6GB GDRR6 HDCP Support DirectX 12 Dual Fan VR Ready OC Graphics Card $401.81 The Price Is Too High Western Digital 2TB WD Blue 3D NAND Internal PC SSD - SATA III 6 Gb/s, 2.5"/7mm, Up to 560 MB/s - WDS200T2B0A $224.99 The Price Is Too High 12V 3000mAh Monitors Large Capacity Rechargeable Li-ion Storage Battery $23.26 You Can Buy This Now! ZOTAC Gaming GeForce GTX 1660 6GB GDDR5 192-bit Gaming Graphics Card, Super Compact, ZT-T16600K-10M $355.00 The Price Is Too High Hobart 770726 Shade 5, Mirrored Lens Safety Glasses $45.51 The Price Is Too High Traceback (most recent call last): File "C:\Users\BX-PC\Downloads\Python Programs\Amazon Price Tracker\AmazonPriceTracker.py", line 58, in <module> current_price = get_price(URL) File "C:\Users\BX-PC\Downloads\Python Programs\Amazon Price Tracker\AmazonPriceTracker.py", line 32, in get_price product_title = soup.find('span', class_ = "a-size-large product-title-word-break").getText() AttributeError: 'NoneType' object has no attribute 'getText'