Oct-12-2018, 11:19 PM
I wanted to write a script to automatically archive the forum's latest activities from time to time. I dont see a method via their API so i just wrote a quick script to use with selenium to archive the forums, help pages, and the latest 50 threads.
It has some things hard-coded that are not professional, but i'll update it when i get the chance. If anyone is interested, they can run it on their own to help archive the forum forever. I could put it on the server to run every night, but im not sure if its worth the resources or not.
It has some things hard-coded that are not professional, but i'll update it when i get the chance. If anyone is interested, they can run it on their own to help archive the forum forever. I could put it on the server to run every night, but im not sure if its worth the resources or not.
from selenium import webdriver import time import os from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC CHROMEPATH = '/home/metulburr/chromedriver' PHANTOMPATH = '/home/metulburr/phantomjs' URLSAVE = 'https://python-forum.io' FORUMS = ['https://python-forum.io', 'https://python-forum.io/Forum-General-Coding-Help', 'https://python-forum.io/Forum-Homework', 'https://python-forum.io/Forum-GUI', 'https://python-forum.io/Forum-Game-Development', 'https://python-forum.io/Forum-Networking', 'https://python-forum.io/Forum-Web-Development', 'https://python-forum.io/Forum-General', 'https://python-forum.io/Forum-News-and-Discussions', 'https://python-forum.io/Forum-Tutorials', 'https://python-forum.io/Forum-Tutorial-Requests-and-Submissions', 'https://python-forum.io/Forum-Python-Installation-and-Execution', 'https://python-forum.io/Forum-Fundamentals', 'https://python-forum.io/Forum-Common-pitfalls-and-what-to-do', 'https://python-forum.io/Forum-Web-Scraping', 'https://python-forum.io/Forum-Web-Tutorials', 'https://python-forum.io/Forum-GUI-tutorials', 'https://python-forum.io/Forum-Game-Tutorials', 'https://python-forum.io/Forum-Networking-Tutorials', 'https://python-forum.io/Forum-Jobs', 'https://python-forum.io/Forum-Forum-Off-Topic', 'https://python-forum.io/Forum-Board', 'https://python-forum.io/Forum-Bar', 'https://python-forum.io/online.php', 'https://python-forum.io/online.php?action=today', 'https://python-forum.io/misc.php?action=help', 'https://python-forum.io/misc.php?action=help&hid=40', 'https://python-forum.io/misc.php?action=help&hid=41', 'https://python-forum.io/misc.php?action=help&hid=25', 'https://python-forum.io/misc.php?action=help&hid=35', 'https://python-forum.io/misc.php?action=help&hid=19', 'https://python-forum.io/misc.php?action=help&hid=20', 'https://python-forum.io/misc.php?action=help&hid=21', 'https://python-forum.io/misc.php?action=help&hid=22', 'https://python-forum.io/misc.php?action=help&hid=30', 'https://python-forum.io/misc.php?action=help&hid=32', 'https://python-forum.io/misc.php?action=help&hid=42', 'https://python-forum.io/misc.php?action=help&hid=46', 'https://python-forum.io/misc.php?action=help&hid=28', 'https://python-forum.io/misc.php?action=help&hid=33', 'https://python-forum.io/misc.php?action=help&hid=10', 'https://python-forum.io/misc.php?action=help&hid=11', 'https://python-forum.io/misc.php?action=help&hid=13', 'https://python-forum.io/misc.php?action=help&hid=37', 'https://python-forum.io/misc.php?action=help&hid=29', 'https://python-forum.io/misc.php?action=help&hid=31', 'https://python-forum.io/misc.php?action=help&hid=34', 'https://python-forum.io/misc.php?action=help&hid=38', 'https://python-forum.io/misc.php?action=help&hid=44', 'https://python-forum.io/misc.php?action=help&hid=45', 'https://python-forum.io/misc.php?action=help&hid=47', 'https://python-forum.io/misc.php?action=help&hid=14', 'https://python-forum.io/misc.php?action=help&hid=15', 'https://python-forum.io/misc.php?action=help&hid=16', 'https://python-forum.io/misc.php?action=help&hid=27', 'https://python-forum.io/misc.php?action=help&hid=24', 'https://python-forum.io/misc.php?action=help&hid=43', 'https://python-forum.io/misc.php?action=help&hid=48', 'https://python-forum.io/misc.php?action=help&hid=39', ] class App: def __init__(self): self.setup_chrome() #self.setup_headless() self.latest = [] for url in FORUMS: self.archive_url(url) self.get_latest() for url in self.latest: self.archive_url(url) def archive_url(self, url): self.browser.get('https://web.archive.org/') self.delay() self.browser.find_element_by_xpath("/html/body/div[3]/div/div[2]/div/div[3]/div[3]/div[2]/form/input[1]").click() self.delay() self.browser.find_element_by_class_name('web-save-url-input').send_keys(url) self.delay() self.browser.find_element_by_xpath('/html/body/div[3]/div/div[2]/div/div[3]/div[3]/div[2]/form/input[2]').click() WebDriverWait(self.browser, 10).until(EC.presence_of_element_located((By.ID,"wmtbURL"))) print(f'Archived: {url}') def delay(self): time.sleep(1.5) def setup_chrome(self): #options = self.chrome_prep() os.environ["webdriver.chrome.driver"] = CHROMEPATH self.browser = webdriver.Chrome(CHROMEPATH) self.browser.set_window_position(0,0) self.delay() def setup_headless(self): self.browser = webdriver.PhantomJS(PHANTOMPATH) self.delay() def get_latest(self): self.browser.get('https://python-forum.io/latest50.php') elems = self.browser.find_elements_by_xpath("//a[@href]") for elem in elems: url = elem.get_attribute("href") self.latest.append(url) App()
Recommended Tutorials: