Python Forum
"EOL While Scanning String Literal"
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
"EOL While Scanning String Literal"
#11
The following program will get the pdf files of the completion logs, as well as the html completion page which can be scanned to extract further information. as well as a summary file for each

I would suggest that if you plan on using this for future work that you move the api list to a file, and
read it in at the start. This will allow you to reuse without having to change the program each time.

The program will create three directories directly below the source code:

xx_completions_xx - contains the completion text pages by name
logpdfs - comtains completion logs in the followinf format -- compnnnn.pdf (example: comp18098.pdf)
text - contains summary information in unparsed string for each elemsnt in self.fields files are named:
summary_apino.txt
these are the completion logs in pdf format.

the files in the xx_completions_xx are the original html pages, so can be displayed in a browser as is (just double click on one)
Enjoy!
import requests
from bs4 import BeautifulSoup
from pathlib import Path

class GetCompletions:
    def __init__(self):
        self.homepath = Path('.')
        self.completionspath = self.homepath / 'xx_completions_xx'
        self.completionspath.mkdir(exist_ok=True)
        self.log_pdfpath = self.homepath / 'logpdfs'
        self.log_pdfpath.mkdir(exist_ok=True)
        self.textpath = self.homepath / 'text'
        self.textpath.mkdir(exist_ok=True)

        self.apis = ['49009229900000','49009226390000','49009278600000','49009226340000','49009200210000',
                     '49009065760000','49009201380000','49009230130000','49009278800000','49009222250000',
                     '49009225900000','49009219970000','49009225890000','49009225140000','49009225760000',
                     '49009212630000','49009205440000','49009211590000','49009203660000','49009203940000',
                     '49009204340000','49009226780000','49009220310000','49009229730000','49009212240000',
                     '49009214450000','49009213790000','49009222660000','49009227960000','49009222100000',
                     '49009228020000','49009228260000','49009228290000','49009229090000','49009228250000',
                     '49009229340000','49009229360000','49009227890000','49009228010000','49009228030000',
                     '49009228450000','49009224160000','49009221890000','49009222760000','49009214980000',
                     '49009214620000','49009213800000','49009214380000','49009214730000','49009228150000',
                     '49009228190000','49009227710000','49009215280000','49009228940000','49009227920000',
                     '49009227980000','49009228170000','49009219540000','49009227870000','49009228370000',
                     '49009204330000','49009205120000','49009227860000','49009228360000','49009228160000',
                     '49009216100000','49009229000000','49009229150000','49009229490000','49009215680000',
                     '49009229350000','49009215210000','49009217070000','49009216610000','49009206800000',
                     '49009205590000','49009206310000','49009217960000','49009223190000','49009210640000',
                     '49009209260000','49009213710000','49009212360000','49009212740000','49009218680000',
                     '49009210130000','49009211420000','49009224280000','49009213750000','49009220880000',
                     '49009225300000','49009218090000','49009227720000','49009225830000','49009223170000',
                     '49009209370000','49009214990000','49009207260000','49009211540000','49009227380000']

        self.fields = ['Spud Date', 'Total Depth', 'IP Oil Bbls', 'Reservoir Class', 'Completion Date',
                       'Plug Back', 'IP Gas Mcf', 'TD Formation', 'Formation', 'IP Water Bbls']
        self.get_all_pages()
        self.parse_and_save(getpdfs=True)

    def get_url(self):
        for entry in self.apis:
            yield (entry, "http://wogcc.state.wy.us/wyocomp.cfm?nAPI={}".format(entry[3:10]))

    def get_all_pages(self):
        for entry, url in self.get_url():
            print('Fetching main page for entry: {}'.format(entry))
            response = requests.get(url)
            if response.status_code == 200:
                filename = self.completionspath / 'api_{}.html'.format(entry)
                with filename.open('w') as f:
                    f.write(response.text)
            else:
                print('error downloading {}'.format(entry))

    def parse_and_save(self, getpdfs=False):
        filelist = [file for file in self.completionspath.iterdir() if file.is_file()]
        for file in filelist:
            with file.open('r') as f:
                soup = BeautifulSoup(f.read(), 'lxml')
            if getpdfs:
                links = soup.find_all('a')
                for link in links:
                    url = link['href']
                    if 'www' in url:
                        continue
                    print('downloading pdf at: {}'.format(url))
                    p = url.index('=')
                    filename = self.log_pdfpath / 'comp{}.pdf'.format(url[p+1:])
                    response = requests.get(url, stream=True, allow_redirects=False)
                    if response.status_code == 200:
                        with filename.open('wb') as f:
                            f.write(response.content)
            sfname = self.textpath / 'summary_{}.txt'.format((file.name.split('_'))[1].split('.')[0][3:10])
            tds = soup.find_all('td')
            with sfname.open('w') as f:
                for td in tds:
                    if td.text:
                        if any(field in td.text for field in self.fields):
                            f.write('{}\n'.format(td.text))

if __name__ == '__main__':
    GetCompletions()
Reply


Messages In This Thread
"EOL While Scanning String Literal" - by tjnichols - Apr-09-2018, 09:49 PM
RE: "EOL While Scanning String Literal" - by wavic - Apr-09-2018, 10:29 PM
So simple! - by tjnichols - Apr-09-2018, 11:14 PM
RE: "EOL While Scanning String Literal" - by Larz60+ - Apr-11-2018, 03:53 AM
RE: "EOL While Scanning String Literal" - by nilamo - Apr-12-2018, 03:57 PM

Possibly Related Threads…
Thread Author Replies Views Last Post
  Literal beginner - needs help warriordazza 2 1,875 Apr-27-2020, 11:15 AM
Last Post: warriordazza

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020