Mar-10-2024, 09:18 AM
Hello. I have the follwoing Python code that checks a website for change. This script always gives me an error "Error checking website". What am I doing wrong?
import requests import os from bs4 import BeautifulSoup import time import logging import smtplib as smtp URL_TO_MONITOR = "https://www.yahoo.com/" #change this to the URL you want to monitor DELAY_TIME = 15 # seconds def process_html(string): soup = BeautifulSoup(string, features="lxml") # make the html look good soup.prettify() # remove script tags for s in soup.select('script'): s.extract() # remove meta tags for s in soup.select('meta'): s.extract() # convert to a string, remove '\r', and return return str(soup).replace('\r', '') def webpage_was_changed(): """Returns true if the webpage was changed, otherwise false.""" headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache'} response = requests.get(URL_TO_MONITOR, headers=headers) # create the previous_content.txt if it doesn't exist if not os.path.exists("previous_content.txt"): open("previous_content.txt", 'w+').close() filehandle = open("previous_content.txt", 'r') previous_response_html = filehandle.read() filehandle.close() processed_response_html = process_html(response.text) if processed_response_html == previous_response_html: return False else: filehandle = open("previous_content.txt", 'w') filehandle.write(processed_response_html) filehandle.close() return True def main(): log = logging.getLogger(__name__) logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"), format='%(asctime)s %(message)s') log.info("Running Website Monitor") while True: try: if webpage_was_changed(): log.info("WEBPAGE WAS CHANGED.") print("The website was changed") else: log.info("Webpage was not changed.") except: log.info("Error checking website.") time.sleep(DELAY_TIME) if __name__ == "__main__": main()