Hi Guys, I combined coding I found from someone, on the Internet for Web-Scraping ZIP Files.
With Your Code DeadEye, here is the Combined code :-
But I get the following Traceback Error, the coding runs ok initially, allowing me to type my Username. But I get the following Error Message after I hit enter :-
Eddie
With Your Code DeadEye, here is the Combined code :-
import sys import getpass import hashlib import requests BASE_URL = 'https://www.flightsim.com/' def do_login(credentials): session = requests.Session() session.get(BASE_URL) req = session.post(BASE_URL + LOGIN_PAGE, params={'do': 'login'}, data=credentials) if req.status_code != 200: print('Login not successful') sys.exit(1) # session is now logged in return session def get_credentials(): username = input('Username: ') password = getpass.getpass() password_md5 = hashlib.md5(password.encode()).hexdigest() return { 'cookieuser': 1, 'do': 'login', 's': '', 'securitytoken': 'guest', 'vb_login_md5_password': password_md5, 'vb_login_md5_password_utf': password_md5, 'vb_login_password': '', 'vb_login_password_hint': 'Password', 'vb_login_username': username, } credentials = get_credentials() session = do_login() import urllib2 from urllib2 import Request, urlopen, URLError #import urllib import os from bs4 import BeautifulSoup #Create a new directory to put the files into #Get the current working directory and create a new directory in it named test cwd = os.getcwd() newdir = cwd +"\\test" print "The current Working directory is " + cwd os.mkdir( newdir, 0777); print "Created new directory " + newdir newfile = open('zipfiles.txt','w') print newfile print "Running script.. " #Set variable for page to be open and url to be concatenated url = "http://www.flightsim.com" page = urllib2.urlopen('https://www.flightsim.com/vbfs/fslib.php?do=search&fsec=62').read() #File extension to be looked for. extension = ".zip" #Use BeautifulSoup to clean up the page soup = BeautifulSoup(page) soup.prettify() #Find all the links on the page that end in .zip for anchor in soup.findAll('a', href=True): links = url + anchor['href'] if links.endswith(extension): newfile.write(links + '\n') newfile.close() #Read what is saved in zipfiles.txt and output it to the user #This is done to create presistent data newfile = open('zipfiles.txt', 'r') for line in newfile: print line + '/n' newfile.close() #Read through the lines in the text file and download the zip files. #Handle exceptions and print exceptions to the console with open('zipfiles.txt', 'r') as url: for line in url: if line: try: ziplink = line #Removes the first 48 characters of the url to get the name of the file zipfile = line[48:] #Removes the last 4 characters to remove the .zip zipfile2 = zipfile[:3] print "Trying to reach " + ziplink response = urllib2.urlopen(ziplink) except URLError as e: if hasattr(e, 'reason'): print 'We failed to reach a server.' print 'Reason: ', e.reason continue elif hasattr(e, 'code'): print 'The server couldn\'t fulfill the request.' print 'Error code: ', e.code continue else: zipcontent = response.read() completeName = os.path.join(newdir, zipfile2+ ".zip") with open (completeName, 'w') as f: print "downloading.. " + zipfile f.write(zipcontent) f.close() print "Script completed"
But I get the following Traceback Error, the coding runs ok initially, allowing me to type my Username. But I get the following Error Message after I hit enter :-
Error:Traceback (most recent call last):
File "C:\Users\Edward\Desktop\Python 2.79\Web Scraping Code For .ZIP Files 3.py", line 38, in <module>
credentials = get_credentials()
File "C:\Users\Edward\Desktop\Python 2.79\Web Scraping Code For .ZIP Files 3.py", line 22, in get_credentials
username = input('Username: ')
File "<string>", line 1, in <module>
NameError: name '......' is not defined
Any ideas where I am going wrong ?Eddie