Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
managing new windows
#1
I need to control a website at which the links i interact with open in a new popup/tab. The href attribute of the links do me no good because using it in a separate request directs me back to the homepage rather than to the details they reference (is there a way to prevent this behaviour?). I am following code I copied from the web and using the context class of Playwright to obtain a handle on the new window but the code fails with the errors

Error:
Traceback (most recent call last): File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 140, in <module> miami('09/10/2022') File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 89, in miami with context.expect_page() as window: File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 78, in __exit__ self._event.value File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 58, in value raise exception File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 92, in miami print(type(window.value)) File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 58, in value raise exception playwright._impl._api_types.TimeoutError: Timeout 30000.0ms exceeded while waiting for event "page"
import os.path
import requests
from playwright.sync_api import sync_playwright
import datetime
from time import sleep
import csv
from bs4 import BeautifulSoup
import re


def miami(start_date):
    sd_obj_in = datetime.date(int(start_date[6:]), int(start_date[:2]), int(start_date[3:5]))
    save_date_obj = sd_obj_in

    with sync_playwright() as p:
        # open site and manipulate search with playwright
        browser = p.chromium.launch(headless=False, slow_mo=50)
        context = browser.new_context()
        page = context.new_page()
        # page = browser.new_page()
        page.goto('https://courts.miamicountyohio.gov/eservices/home.page.11')

        page.wait_for_selector('.anchorButton')
        page.locator('a.anchorButton').click()                              # disclaimer page click-button
        # sleep(3)

        page.wait_for_selector('#searchPageTabSection')
        # search page
        page.locator("xpath=//ul/li[5]/a").click()                          # click case type tab
        sleep(3)

        page.wait_for_selector('.hasDatepicker')
        # search dates entry
        page.query_selector('input.hasDatepicker').fill(start_date)
        sleep(2)
        end_date = datetime.date.today().strftime('%m/%d/%Y')
        page.query_selector("xpath=//div[contains(@class,'endDate')]/input").fill(end_date)
        sleep(2)

        # case type select (traffic)
        case_code = page.query_selector("xpath=//select[@name='caseCd']")
        sleep(1)
        option_tr = case_code.query_selector("xpath=option[starts-with(@value,'TR')]")
        option_tr.scroll_into_view_if_needed()
        option_tr.click()
        sleep(2)

        # status select (open)
        status = page.query_selector("xpath=//select[@name='statCd']")
        sleep(1)
        option_open = status.query_selector("xpath=option[text()='Open']")
        option_open.scroll_into_view_if_needed()
        option_open.click()
        sleep(2)

        # party type select (defendant)
        pty = page.query_selector("xpath=//select[@name='ptyCd']")
        sleep(1)
        option_def = pty.query_selector("xpath=option[text()='Defendant']")
        option_def.scroll_into_view_if_needed()
        option_def.click()
        sleep(2)

        # search button click
        page.query_selector("xpath=//input[@value='Search']").click()
        sleep(4)

        data_path = 'c:\\working\\access\\oh\\ohio\\miami.csv'
        header = ['Name', 'DOB', 'Case Number', 'File Date', 'Addr1', 'Addr2', 'Addr3', 'City', 'State', 'Zip',
                  'Statute', 'Degree', 'Description']
        if os.path.isfile(data_path):
            os.remove(data_path)

        with open(data_path, 'w', newline='') as f:
            export_file = csv.writer(f)
            export_file.writerow(header)

        for row in page.query_selector_all("xpath=//table[@id='grid']/tbody/tr"):
            link = row.query_selector("xpath=/td[5]/span/a")
            record = [
                row.query_selector_all("td")[2].inner_text(),       # name
                row.query_selector_all("td")[4].inner_text(),       # dob
                row.query_selector_all("td")[5].inner_text(),       # case number
                row.query_selector_all("td")[6].inner_text(),       # file date
            ]

            print(record)
            # page.wait_for_selector(".ptyContactInfo")
            with context.expect_page() as window:
                link.click()
                sleep(2)
                new_window = window.value
                case_details = parse_details(new_window.content())

            for detail in case_details['Charges']:
                rec = record
                rec.append(case_details['Addr1'])
                rec.append(case_details['Addr2'])
                rec.append(case_details['Addr3'])
                rec.append(case_details['City'])
                rec.append(case_details['State'])
                rec.append(case_details['Zip'])
                rec.append(detail['Statute'])
                rec.append(detail['Degree'])
                rec.append(detail['Desc'])

                print(rec)
                exit()
                export_file.writerow(rec)

        print('done')


def parse_details(content):
    soup = BeautifulSoup(content, 'html.parser')
    contact_info = soup.find("li", class_="ptyContactInfo")
    print(contact_info)
    docket = {'Addr1': contact_info.contents[0].get_text()}
    docket['Addr2'] = contact_info.contents[1].get_text()
    docket['Addr3'] = contact_info.contents[2].get_text()
    docket['City'] = contact_info.contents[3].get_text()
    docket['State'] = contact_info.contents[5].get_text()
    docket['Zip'] = contact_info.contents[6].get_text()
    charges = []

    for case_info in soup.find_all('div', class_='chrg'):
        charges.append({
            'Statute': case_info.find('span', 'chgHeadActn'),
            'Degree': case_info.find_all('span', 'chgHeadDeg'),
            'Desc': case_info.find_all('span', 'chgHeadDscr')
        })

    docket['Charges'] = charges
    return docket


miami('09/10/2022')
Can anybody tell me why window.value raises an error?
Reply
#2
Is what you call the 'anchor button' actually the 'Click Here' button?
If so, you need selenium to wait for css_selector: '#id3e'. or as an alternative, xpath: '//*[@id="id3e"]'
to do so, you will need to use selenium, not Beautifulsoup, or at least load the page with selenium before using Beautifulsoup.
The link is embedded in a JavaScript script, so not visible on front page.
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Managing Objects JoeDainton123 1 1,692 May-15-2021, 03:18 PM
Last Post: Yoriz
  Managing dependencies with pipenv t4keheart 6 2,922 Aug-05-2020, 12:39 AM
Last Post: t4keheart
  managing command codes for external controller box Oolongtea 0 1,909 Sep-19-2019, 08:32 AM
Last Post: Oolongtea
  Python what should be name of the module for managing data of users ? harun2525 3 3,407 Dec-06-2017, 06:11 PM
Last Post: nilamo
  managing modules/scripts dynamically hbknjr 2 3,074 Oct-06-2017, 05:07 PM
Last Post: hbknjr

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020