Mar-10-2024, 05:53 PM
import requests import os from bs4 import BeautifulSoup import time import logging import smtplib as smtp try: import lxml except ImportError: raise RuntimeError("Please install lxml") URL_TO_MONITOR = "https://www.yahoo.com/" # change this to the URL you want to monitor DELAY_TIME = 15 # seconds def process_html(string): soup = BeautifulSoup(string, features="lxml") # make the html look good soup.prettify() # remove script tags for s in soup.select("script"): s.extract() # remove meta tags for s in soup.select("meta"): s.extract() # convert to a string, remove '\r', and return return str(soup).replace("\r", "") def webpage_was_changed(): """Returns true if the webpage was changed, otherwise false.""" headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36", "Pragma": "no-cache", "Cache-Control": "no-cache", } response = requests.get(URL_TO_MONITOR, headers=headers) # create the previous_content.txt if it doesn't exist if not os.path.exists("previous_content.txt"): open("previous_content.txt", "w+").close() filehandle = open("previous_content.txt", "r") previous_response_html = filehandle.read() filehandle.close() processed_response_html = process_html(response.text) if processed_response_html == previous_response_html: return False else: filehandle = open("previous_content.txt", "w") filehandle.write(processed_response_html) filehandle.close() return True def main(): log = logging.getLogger(__name__) logging.basicConfig( level=os.environ.get("LOGLEVEL", "INFO"), format="%(asctime)s %(message)s" ) log.info("Running Website Monitor") while True: try: if webpage_was_changed(): log.info("WEBPAGE WAS CHANGED.") else: log.info("Webpage was not changed.") except Exception as e: log.exception(e) time.sleep(DELAY_TIME) if __name__ == "__main__": main()Do not use a bare except. It suppresses programming errors. In this case
lxml
wasn't installed, which is required by bs4 (explicit features="lxml").I didn't check the other stuff, just used an format tool (ruff format).
Almost dead, but too lazy to die: https://sourceserver.info
All humans together. We don't need politicians!
All humans together. We don't need politicians!