May-23-2018, 02:28 PM
When I run the following
It doesn't create this text file.
I get the following error
Thanks!
Tonya
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import requests from bs4 import BeautifulSoup from pathlib import Path class GetCompletions: def __init__( self , infile): """Above will create a folder called comppdf, and geocorepdf wherever the WOGCC File Downloads file is run from as well as a text file for my api file to reside. """ self .homepath = Path( '.' ) self .completionspath = self .homepath / 'comppdf' self .completionspath.mkdir(exist_ok = True ) self .geocorepdf = self .homepath / 'geocorepdf' self .geocorepdf.mkdir(exist_ok = True ) self .textpath = self .homepath / 'text' self .text.mkdir(exist_ok = True ) self .infile = self .textpath / infile self .api = [] self .parse_and_save(getpdfs = True ) def get_url( self ): for entry in self .apis: """Above will get the URL that matches my API numbers.""" def parse_and_save( self , getpdfs = False ): for file in filelist: with file . open ( 'r' ) as f: soup = BeautifulSoup(f.read(), 'lxml' ) if getpdfs: links = soup.find_all( 'a' ) for link in links: url in link[ 'href' ] if 'www' in url: continue print ( 'downloading pdf at: {}' . format (url)) p = url.index( '=' ) response = requests.get(url, stream = True , allow_redirects = False ) if response.status_code = = 200 : try : header_info = response.headers[ 'Content-Disposition' ] idx = header_info.index( 'filename' ) filename = self .log_pdfpath / header[idx + 9 :] except ValueError: filename = self .log_pdfpath / 'comp{}' . format (url[p + 1 :]) print ( "couldn't locate filename for {} will use: {}" . format ( file , filename)) except KeyError: filename = self .log_pdfpath / 'comp{}.pdf' . format (url[p + 1 :]) print ( 'got KeyError on {}, respnse.headers = {}' . format ( file , response.headers)) print ( 'will use name: {}' . format (filename)) print (repsonse.headers) with filename. open ( 'wb' ) as f: f.write(respnse.content) sfname = self .textpath / 'summary_{}.txt' . format (( file .name.split( '_' ))[ 1 ].split( '.' )[ 0 ][ 3 : 10 ]) tds = soup.find_all( 'td' ) with sfname. open ( 'w' ) as f: for td in tds: if td.text: if any (field in td.text for field in self .fields): f.write( '{}\n' . format (td.text)) if __name__ = = '__main__' : GetCompletions( 'api.txt' ) |
1 2 |
self .textpath = self .homepath / 'text' self .text.mkdir(exist_ok = True ) |
Error: RESTART: C:\Users\toliver\AppData\Local\Programs\Python\Python36\WOGCC\WOGCC_File_Downloads.py
Traceback (most recent call last):
File "C:\Users\toliver\AppData\Local\Programs\Python\Python36\WOGCC\WOGCC_File_Downloads.py", line 71, in <module>
GetCompletions('api.txt')
File "C:\Users\toliver\AppData\Local\Programs\Python\Python36\WOGCC\WOGCC_File_Downloads.py", line 17, in __init__
self.text.mkdir(exist_ok=True)
AttributeError: 'GetCompletions' object has no attribute 'text'
I appreciate any help I can get!Thanks!
Tonya