Python Forum
python-forum.io on way back machine
Thread Rating:
  • 2 Vote(s) - 2 Average
  • 1
  • 2
  • 3
  • 4
  • 5
python-forum.io on way back machine
#1
I wanted to write a script to automatically archive the forum's latest activities from time to time. I dont see a method via their API so i just wrote a quick script to use with selenium to archive the forums, help pages, and the latest 50 threads.

It has some things hard-coded that are not professional, but i'll update it when i get the chance. If anyone is interested, they can run it on their own to help archive the forum forever. I could put it on the server to run every night, but im not sure if its worth the resources or not.

from selenium import webdriver
import time
import os
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC 

 
CHROMEPATH = '/home/metulburr/chromedriver'
PHANTOMPATH = '/home/metulburr/phantomjs'

URLSAVE = 'https://python-forum.io'

FORUMS = ['https://python-forum.io',
'https://python-forum.io/Forum-General-Coding-Help',
'https://python-forum.io/Forum-Homework',
'https://python-forum.io/Forum-GUI',
'https://python-forum.io/Forum-Game-Development',
'https://python-forum.io/Forum-Networking',
'https://python-forum.io/Forum-Web-Development',
'https://python-forum.io/Forum-General',
'https://python-forum.io/Forum-News-and-Discussions',
'https://python-forum.io/Forum-Tutorials',
'https://python-forum.io/Forum-Tutorial-Requests-and-Submissions',
'https://python-forum.io/Forum-Python-Installation-and-Execution',
'https://python-forum.io/Forum-Fundamentals',
'https://python-forum.io/Forum-Common-pitfalls-and-what-to-do',
'https://python-forum.io/Forum-Web-Scraping',
'https://python-forum.io/Forum-Web-Tutorials',
'https://python-forum.io/Forum-GUI-tutorials',
'https://python-forum.io/Forum-Game-Tutorials',
'https://python-forum.io/Forum-Networking-Tutorials',
'https://python-forum.io/Forum-Jobs',
'https://python-forum.io/Forum-Forum-Off-Topic',
'https://python-forum.io/Forum-Board',
'https://python-forum.io/Forum-Bar',
'https://python-forum.io/online.php',
'https://python-forum.io/online.php?action=today',
'https://python-forum.io/misc.php?action=help',
'https://python-forum.io/misc.php?action=help&hid=40',
'https://python-forum.io/misc.php?action=help&hid=41',
'https://python-forum.io/misc.php?action=help&hid=25',
'https://python-forum.io/misc.php?action=help&hid=35',
'https://python-forum.io/misc.php?action=help&hid=19',
'https://python-forum.io/misc.php?action=help&hid=20',
'https://python-forum.io/misc.php?action=help&hid=21',
'https://python-forum.io/misc.php?action=help&hid=22',
'https://python-forum.io/misc.php?action=help&hid=30',
'https://python-forum.io/misc.php?action=help&hid=32',
'https://python-forum.io/misc.php?action=help&hid=42',
'https://python-forum.io/misc.php?action=help&hid=46',
'https://python-forum.io/misc.php?action=help&hid=28',
'https://python-forum.io/misc.php?action=help&hid=33',
'https://python-forum.io/misc.php?action=help&hid=10',
'https://python-forum.io/misc.php?action=help&hid=11',
'https://python-forum.io/misc.php?action=help&hid=13',
'https://python-forum.io/misc.php?action=help&hid=37',
'https://python-forum.io/misc.php?action=help&hid=29',
'https://python-forum.io/misc.php?action=help&hid=31',
'https://python-forum.io/misc.php?action=help&hid=34',
'https://python-forum.io/misc.php?action=help&hid=38',
'https://python-forum.io/misc.php?action=help&hid=44',
'https://python-forum.io/misc.php?action=help&hid=45',
'https://python-forum.io/misc.php?action=help&hid=47',
'https://python-forum.io/misc.php?action=help&hid=14',
'https://python-forum.io/misc.php?action=help&hid=15',
'https://python-forum.io/misc.php?action=help&hid=16',
'https://python-forum.io/misc.php?action=help&hid=27',
'https://python-forum.io/misc.php?action=help&hid=24',
'https://python-forum.io/misc.php?action=help&hid=43',
'https://python-forum.io/misc.php?action=help&hid=48',
'https://python-forum.io/misc.php?action=help&hid=39',
]
 
class App:
    def __init__(self):
        self.setup_chrome()
        #self.setup_headless()
        
        self.latest = []
        
        for url in FORUMS:
            self.archive_url(url)
        self.get_latest()
        for url in self.latest:
            self.archive_url(url)
        
    def archive_url(self, url):
        self.browser.get('https://web.archive.org/')
        self.delay()
        self.browser.find_element_by_xpath("/html/body/div[3]/div/div[2]/div/div[3]/div[3]/div[2]/form/input[1]").click()
        self.delay()
        self.browser.find_element_by_class_name('web-save-url-input').send_keys(url)
        self.delay()
        self.browser.find_element_by_xpath('/html/body/div[3]/div/div[2]/div/div[3]/div[3]/div[2]/form/input[2]').click()
        WebDriverWait(self.browser, 10).until(EC.presence_of_element_located((By.ID,"wmtbURL")))
        print(f'Archived: {url}')
        
        
    def delay(self):
        time.sleep(1.5)
         
    def setup_chrome(self):
        #options = self.chrome_prep()
        os.environ["webdriver.chrome.driver"] = CHROMEPATH
        self.browser = webdriver.Chrome(CHROMEPATH)
        self.browser.set_window_position(0,0)
        self.delay()
         
    def setup_headless(self):
        self.browser = webdriver.PhantomJS(PHANTOMPATH)
        self.delay()
        
    def get_latest(self):
        self.browser.get('https://python-forum.io/latest50.php')
        elems = self.browser.find_elements_by_xpath("//a[@href]")
        for elem in elems:
            url = elem.get_attribute("href")
            self.latest.append(url)
         
App()
Recommended Tutorials:
Reply
#2
updated as xpath changed, and added tutorial links, removed time sleeps
from selenium import webdriver
import time
import os
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC 
from selenium.webdriver.chrome.options import Options

MEMBERS = 'https://python-forum.io/memberlist.php?sort=regdate&order=ascending&perpage=20&page=1'
CHROMEPATH = '/home/metulburr/chromedriver'

URLSAVE = 'https://python-forum.io'

FORUMS = ['https://python-forum.io',
'https://python-forum.io/Forum-General-Coding-Help',
'https://python-forum.io/Forum-Homework',
'https://python-forum.io/Forum-GUI',
'https://python-forum.io/Forum-Game-Development',
'https://python-forum.io/Forum-Networking',
'https://python-forum.io/Forum-Web-Development',
'https://python-forum.io/Forum-General',
'https://python-forum.io/Forum-News-and-Discussions',
'https://python-forum.io/Forum-Tutorials',
'https://python-forum.io/Forum-Tutorial-Requests-and-Submissions',
'https://python-forum.io/Forum-Python-Installation-and-Execution',
'https://python-forum.io/Forum-Fundamentals',
'https://python-forum.io/Forum-Common-pitfalls-and-what-to-do',
'https://python-forum.io/Forum-Web-Scraping',
'https://python-forum.io/Forum-Web-Tutorials',
'https://python-forum.io/Forum-GUI-tutorials',
'https://python-forum.io/Forum-Game-Tutorials',
'https://python-forum.io/Forum-Networking-Tutorials',
'https://python-forum.io/Forum-Jobs',
'https://python-forum.io/Forum-Forum-Off-Topic',
'https://python-forum.io/Forum-Board',
'https://python-forum.io/Forum-Bar',
'https://python-forum.io/online.php',
'https://python-forum.io/online.php?action=today',
'https://python-forum.io/misc.php?action=help',
'https://python-forum.io/misc.php?action=help&hid=40',
'https://python-forum.io/misc.php?action=help&hid=41',
'https://python-forum.io/misc.php?action=help&hid=25',
'https://python-forum.io/misc.php?action=help&hid=35',
'https://python-forum.io/misc.php?action=help&hid=19',
'https://python-forum.io/misc.php?action=help&hid=20',
'https://python-forum.io/misc.php?action=help&hid=21',
'https://python-forum.io/misc.php?action=help&hid=22',
'https://python-forum.io/misc.php?action=help&hid=30',
'https://python-forum.io/misc.php?action=help&hid=32',
'https://python-forum.io/misc.php?action=help&hid=42',
'https://python-forum.io/misc.php?action=help&hid=46',
'https://python-forum.io/misc.php?action=help&hid=28',
'https://python-forum.io/misc.php?action=help&hid=33',
'https://python-forum.io/misc.php?action=help&hid=10',
'https://python-forum.io/misc.php?action=help&hid=11',
'https://python-forum.io/misc.php?action=help&hid=13',
'https://python-forum.io/misc.php?action=help&hid=37',
'https://python-forum.io/misc.php?action=help&hid=29',
'https://python-forum.io/misc.php?action=help&hid=31',
'https://python-forum.io/misc.php?action=help&hid=34',
'https://python-forum.io/misc.php?action=help&hid=38',
'https://python-forum.io/misc.php?action=help&hid=44',
'https://python-forum.io/misc.php?action=help&hid=45',
'https://python-forum.io/misc.php?action=help&hid=47',
'https://python-forum.io/misc.php?action=help&hid=14',
'https://python-forum.io/misc.php?action=help&hid=15',
'https://python-forum.io/misc.php?action=help&hid=16',
'https://python-forum.io/misc.php?action=help&hid=27',
'https://python-forum.io/misc.php?action=help&hid=24',
'https://python-forum.io/misc.php?action=help&hid=43',
'https://python-forum.io/misc.php?action=help&hid=48',
'https://python-forum.io/misc.php?action=help&hid=39',
'https://python-forum.io/misc.php?action=help&hid=49',
'https://python-forum.io/showteam.php',
'https://python-forum.io/Thread-Python3-2-differences-input-vs-raw-input',
'https://python-forum.io/Thread-Basic-Part-1-Python-3-6-3-7-and-pip-installation-under-Windows',
'https://python-forum.io/Thread-Anaconda-and-other-ways-to-run-Python',
'https://python-forum.io/Thread-Packaging-Modules-Wheel-pip-setup-py-Freeze',
'https://python-forum.io/Thread-Part-1-Linux-Python-3-environment',
'https://python-forum.io/Thread-Part-2-Python-environment-Windows',
'https://python-forum.io/Thread-Install-a-library-manually',
'https://python-forum.io/Thread-Building-an-exe-with-Py2exe',
'https://python-forum.io/Thread-Creating-C-extensions',
'https://python-forum.io/Thread-windows-command-prompt-modifications-add-options-start-loc',
'https://python-forum.io/Thread-How-to-Execute-python-code',
'https://python-forum.io/Thread-Indentation',
'https://python-forum.io/Thread-Annotations',
'https://python-forum.io/Thread-Simple-debugging-and-how-to-read-tracebacks',
'https://python-forum.io/Thread-Generators-Iterators',
'https://python-forum.io/Thread-Comprehension-Expressions',
'https://python-forum.io/Thread-Classes-Classes-advanced-Dependent-attributes-and-Descriptors',
'https://python-forum.io/Thread-Classes-Classes-advanced-Descriptors-managed-attributes',
'https://python-forum.io/Thread-Basic-Ternary-Conditional-Expressions',
'https://python-forum.io/Thread-Lambda-How-Why-and-Why-not',
'https://python-forum.io/Thread-Functions',
'https://python-forum.io/Thread-Basic-Set-Windows-Python-Path-to-run-python-in-any-directory',
'https://python-forum.io/Thread-Basic-Lists',
'https://python-forum.io/Thread-Basic-Modules-part-2',
'https://python-forum.io/Thread-Basic-Strings-index-and-slicing',
'https://python-forum.io/Thread-Basic-string-format-and-string-expressions',
'https://python-forum.io/Thread-Basic-Dictionaries',
'https://python-forum.io/Thread-Files',
'https://python-forum.io/Thread-Classes-Class-Basics',
'https://python-forum.io/Thread-Classes-Class-Intermediate-Inheritance',
'https://python-forum.io/Thread-Classes-Class-Intermediate-Operator-Overloading',
'https://python-forum.io/Thread-Basic-Modules-part-3',
'https://python-forum.io/Thread-Basic-Modules-part-1',
'https://python-forum.io/Thread-Basic-Never-use-for-i-in-range-len-sequence',
'https://python-forum.io/Thread-Efficiency-Crash-Course',
'https://python-forum.io/Thread-Basic-Python-Gotchas',
'https://python-forum.io/Thread-Basic-Naming-Conventions-PEP-8',
'https://python-forum.io/Thread-Multiple-expressions-with-or-keyword',
'https://python-forum.io/Thread-Namespace-flooding-with-imports',
'https://python-forum.io/Thread-Web-scraping-part-2',
'https://python-forum.io/Thread-Web-Scraping-part-1',
'https://python-forum.io/Thread-Regular-Expression-re-module',
'https://python-forum.io/Thread-Web-scraping-with-Scrapy',
'https://python-forum.io/Thread-Flask-Weather-app-Updatet',
'https://python-forum.io/Thread-Flask-Bootstrap-node-npm-gulp-bower',
'https://python-forum.io/Thread-Flask-Trow-away-JS-function-and-use-a-Python-function',
'https://python-forum.io/Thread-Run-Python-CGI-from-Apache',
'https://python-forum.io/Thread-Flask-Starting-web-development-part-1',
'https://python-forum.io/Thread-Flask-Ajax-in-Flask',
'https://python-forum.io/Thread-wxPython-phoenix-install-step-by-step',
'https://python-forum.io/Thread-Tkinter-Getting-Tkinter-Grid-Sizing-Right-the-first-time',
'https://python-forum.io/Thread-WxPython-Very-Basic-Example-Only-Hello-World',
'https://python-forum.io/Thread-Tkinter-Very-Basic-Example-Only-Hello-World',
'https://python-forum.io/Thread-Tkinter-How-to-determine-if-tkinter-attribute-accepts-color',
'https://python-forum.io/Thread-Tkinter-Find-tkinter-widgets-for-arguments-or-arguments-for-widgets',
'https://python-forum.io/Thread-WxPython-Tutorial-Notespad-W-I-P',
'https://python-forum.io/Thread-PyGame-Creating-a-state-machine',
'https://python-forum.io/Thread-PyGame-User-Interface',
'https://python-forum.io/Thread-PyGame-Install-and-Troubleshooting',
'https://python-forum.io/Thread-PyGame-Structure-and-Organizing-part-8',
'https://python-forum.io/Thread-PyGame-Enemy-AI-and-collision-part-6',
'https://python-forum.io/Thread-PyGame-Flair-and-Organizing-part-7',
'https://python-forum.io/Thread-PyGame-Common-Tasks',
'https://python-forum.io/Thread-Intermediate-Command-Line-Interfaces',
'https://python-forum.io/Thread-PyGame-Basic-animation-part-5',
'https://python-forum.io/Thread-PyGame-Adding-player-effects-part-4',
'https://python-forum.io/Thread-PyGame-Basic-event-handling-part-3',
'https://python-forum.io/Thread-PyGame-Loading-images-transparency-handling-spritesheets-part-2',
'https://python-forum.io/Thread-PyGame-Creating-a-window-part-1',
'https://python-forum.io/Thread-PyGame-python3-x-installation',
'https://python-forum.io/Thread-Text-Adventure-Tutorial-if-structure-to-dictionary',
'https://python-forum.io/Thread-PyGame-warnings-of-sentdex-pygame-tutorials',
'https://python-forum.io/Thread-Bare-Minimum-Sockets',
'https://python-forum.io/Thread-Basic-IRC-bot-with-socket',
'https://python-forum.io/Thread-VS-Code-from-start',
'https://python-forum.io/Thread-A-Neat-way-to-use-pathlib',
'https://python-forum.io/Thread-A-look-at-dataclass',
'https://python-forum.io/Thread-Sound-player-standalone',
'https://python-forum.io/Thread-simulate-static-local-varibles-with-getattr',
'https://python-forum.io/Thread-Underscores-And-You-A-Guide-To-Turning-Magic-Into-Science',
'https://python-forum.io/Thread-Basic-DC-Electronics-Resistors',
'https://python-forum.io/Thread-Database-the-easy-way-dataset',
'https://python-forum.io/Thread-Basic-Creating-a-repo-for-your-completed-scripts',
'https://python-forum.io/Thread-A-List-of-Free-Python-Resources',
'https://python-forum.io/Thread-A-List-of-Free-Game-Resources',
'https://python-forum.io/Thread-Collection-of-programming-ideas-and-challenges',
'https://python-forum.io/Thread-New-Users-Introduce-Yourself',
]
 
class App:
    def __init__(self):
        self.setup_chrome()
        
        self.latest = []
        
        for url in FORUMS:
            self.archive_url(url)
        self.get_latest()
        for url in self.latest:
            self.archive_url(url)
        
    def archive_url(self, url):
        self.browser.get('https://web.archive.org/')
        WebDriverWait(self.browser, 10).until(EC.presence_of_element_located((By.ID,"web_save_div")))
        self.browser.find_element_by_xpath("/html/body/div[3]/div/div[3]/div/div[2]/div[3]/div[2]/form/input").click()
        self.browser.find_element_by_class_name('web-save-url-input').send_keys(url)
        self.delay()
        self.browser.find_element_by_xpath('/html/body/div[3]/div/div[3]/div/div[2]/div[3]/div[2]/form/button').click()
        WebDriverWait(self.browser, 10).until(EC.presence_of_element_located((By.ID,"wmtbURL")))
        print(f'Archived: {url}')
        
        
    def delay(self):
        time.sleep(1.5)
         
    def setup_chrome(self):
        options = Options()
        options.add_argument("--headless")
        os.environ["webdriver.chrome.driver"] = CHROMEPATH
        self.browser = webdriver.Chrome(CHROMEPATH, chrome_options=options)
        self.browser.set_window_position(0,0)
        self.delay()
        
    def get_latest(self):
        self.browser.get('https://python-forum.io/latest50.php')
        elems = self.browser.find_elements_by_xpath("//a[@href]")
        for elem in elems:
            url = elem.get_attribute("href")
            self.latest.append(url)
         
App()
Recommended Tutorials:
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  ATM machine demo with Python and Django Drone4four 1 2,253 May-20-2022, 06:34 PM
Last Post: Drone4four

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020