Hi Guys, I combined coding I found from someone, on the Internet for Web-Scraping ZIP Files.
With Your Code DeadEye, here is the Combined code :-
import sys
import getpass
import hashlib
import requests
BASE_URL = 'https://www.flightsim.com/'
def do_login(credentials):
session = requests.Session()
session.get(BASE_URL)
req = session.post(BASE_URL + LOGIN_PAGE, params={'do': 'login'}, data=credentials)
if req.status_code != 200:
print('Login not successful')
sys.exit(1)
# session is now logged in
return session
def get_credentials():
username = input('Username: ')
password = getpass.getpass()
password_md5 = hashlib.md5(password.encode()).hexdigest()
return {
'cookieuser': 1,
'do': 'login',
's': '',
'securitytoken': 'guest',
'vb_login_md5_password': password_md5,
'vb_login_md5_password_utf': password_md5,
'vb_login_password': '',
'vb_login_password_hint': 'Password',
'vb_login_username': username,
}
credentials = get_credentials()
session = do_login()
import urllib2
from urllib2 import Request, urlopen, URLError
#import urllib
import os
from bs4 import BeautifulSoup
#Create a new directory to put the files into
#Get the current working directory and create a new directory in it named test
cwd = os.getcwd()
newdir = cwd +"\\test"
print "The current Working directory is " + cwd
os.mkdir( newdir, 0777);
print "Created new directory " + newdir
newfile = open('zipfiles.txt','w')
print newfile
print "Running script.. "
#Set variable for page to be open and url to be concatenated
url = "http://www.flightsim.com"
page = urllib2.urlopen('https://www.flightsim.com/vbfs/fslib.php?do=search&fsec=62').read()
#File extension to be looked for.
extension = ".zip"
#Use BeautifulSoup to clean up the page
soup = BeautifulSoup(page)
soup.prettify()
#Find all the links on the page that end in .zip
for anchor in soup.findAll('a', href=True):
links = url + anchor['href']
if links.endswith(extension):
newfile.write(links + '\n')
newfile.close()
#Read what is saved in zipfiles.txt and output it to the user
#This is done to create presistent data
newfile = open('zipfiles.txt', 'r')
for line in newfile:
print line + '/n'
newfile.close()
#Read through the lines in the text file and download the zip files.
#Handle exceptions and print exceptions to the console
with open('zipfiles.txt', 'r') as url:
for line in url:
if line:
try:
ziplink = line
#Removes the first 48 characters of the url to get the name of the file
zipfile = line[48:]
#Removes the last 4 characters to remove the .zip
zipfile2 = zipfile[:3]
print "Trying to reach " + ziplink
response = urllib2.urlopen(ziplink)
except URLError as e:
if hasattr(e, 'reason'):
print 'We failed to reach a server.'
print 'Reason: ', e.reason
continue
elif hasattr(e, 'code'):
print 'The server couldn\'t fulfill the request.'
print 'Error code: ', e.code
continue
else:
zipcontent = response.read()
completeName = os.path.join(newdir, zipfile2+ ".zip")
with open (completeName, 'w') as f:
print "downloading.. " + zipfile
f.write(zipcontent)
f.close()
print "Script completed"
But I get the following Traceback Error, the coding runs ok initially, allowing me to type my Username. But I get the following Error Message after I hit enter :-
Error:
Traceback (most recent call last):
File "C:\Users\Edward\Desktop\Python 2.79\Web Scraping Code For .ZIP Files 3.py", line 38, in <module>
credentials = get_credentials()
File "C:\Users\Edward\Desktop\Python 2.79\Web Scraping Code For .ZIP Files 3.py", line 22, in get_credentials
username = input('Username: ')
File "<string>", line 1, in <module>
NameError: name '......' is not defined
Any ideas where I am going wrong ?
Eddie