Sep-22-2022, 01:29 AM
I need to control a website at which the links i interact with open in a new popup/tab. The href attribute of the links do me no good because using it in a separate request directs me back to the homepage rather than to the details they reference (is there a way to prevent this behaviour?). I am following code I copied from the web and using the context class of Playwright to obtain a handle on the new window but the code fails with the errors
Error:Traceback (most recent call last):
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 140, in <module>
miami('09/10/2022')
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 89, in miami
with context.expect_page() as window:
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 78, in __exit__
self._event.value
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 58, in value
raise exception
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\ohio_miami.py", line 92, in miami
print(type(window.value))
File "C:\Wayne\Python\WebScraping\SCRAPYDEMO\StateMO\venv\lib\site-packages\playwright\_impl\_sync_base.py", line 58, in value
raise exception
playwright._impl._api_types.TimeoutError: Timeout 30000.0ms exceeded while waiting for event "page"
import os.path import requests from playwright.sync_api import sync_playwright import datetime from time import sleep import csv from bs4 import BeautifulSoup import re def miami(start_date): sd_obj_in = datetime.date(int(start_date[6:]), int(start_date[:2]), int(start_date[3:5])) save_date_obj = sd_obj_in with sync_playwright() as p: # open site and manipulate search with playwright browser = p.chromium.launch(headless=False, slow_mo=50) context = browser.new_context() page = context.new_page() # page = browser.new_page() page.goto('https://courts.miamicountyohio.gov/eservices/home.page.11') page.wait_for_selector('.anchorButton') page.locator('a.anchorButton').click() # disclaimer page click-button # sleep(3) page.wait_for_selector('#searchPageTabSection') # search page page.locator("xpath=//ul/li[5]/a").click() # click case type tab sleep(3) page.wait_for_selector('.hasDatepicker') # search dates entry page.query_selector('input.hasDatepicker').fill(start_date) sleep(2) end_date = datetime.date.today().strftime('%m/%d/%Y') page.query_selector("xpath=//div[contains(@class,'endDate')]/input").fill(end_date) sleep(2) # case type select (traffic) case_code = page.query_selector("xpath=//select[@name='caseCd']") sleep(1) option_tr = case_code.query_selector("xpath=option[starts-with(@value,'TR')]") option_tr.scroll_into_view_if_needed() option_tr.click() sleep(2) # status select (open) status = page.query_selector("xpath=//select[@name='statCd']") sleep(1) option_open = status.query_selector("xpath=option[text()='Open']") option_open.scroll_into_view_if_needed() option_open.click() sleep(2) # party type select (defendant) pty = page.query_selector("xpath=//select[@name='ptyCd']") sleep(1) option_def = pty.query_selector("xpath=option[text()='Defendant']") option_def.scroll_into_view_if_needed() option_def.click() sleep(2) # search button click page.query_selector("xpath=//input[@value='Search']").click() sleep(4) data_path = 'c:\\working\\access\\oh\\ohio\\miami.csv' header = ['Name', 'DOB', 'Case Number', 'File Date', 'Addr1', 'Addr2', 'Addr3', 'City', 'State', 'Zip', 'Statute', 'Degree', 'Description'] if os.path.isfile(data_path): os.remove(data_path) with open(data_path, 'w', newline='') as f: export_file = csv.writer(f) export_file.writerow(header) for row in page.query_selector_all("xpath=//table[@id='grid']/tbody/tr"): link = row.query_selector("xpath=/td[5]/span/a") record = [ row.query_selector_all("td")[2].inner_text(), # name row.query_selector_all("td")[4].inner_text(), # dob row.query_selector_all("td")[5].inner_text(), # case number row.query_selector_all("td")[6].inner_text(), # file date ] print(record) # page.wait_for_selector(".ptyContactInfo") with context.expect_page() as window: link.click() sleep(2) new_window = window.value case_details = parse_details(new_window.content()) for detail in case_details['Charges']: rec = record rec.append(case_details['Addr1']) rec.append(case_details['Addr2']) rec.append(case_details['Addr3']) rec.append(case_details['City']) rec.append(case_details['State']) rec.append(case_details['Zip']) rec.append(detail['Statute']) rec.append(detail['Degree']) rec.append(detail['Desc']) print(rec) exit() export_file.writerow(rec) print('done') def parse_details(content): soup = BeautifulSoup(content, 'html.parser') contact_info = soup.find("li", class_="ptyContactInfo") print(contact_info) docket = {'Addr1': contact_info.contents[0].get_text()} docket['Addr2'] = contact_info.contents[1].get_text() docket['Addr3'] = contact_info.contents[2].get_text() docket['City'] = contact_info.contents[3].get_text() docket['State'] = contact_info.contents[5].get_text() docket['Zip'] = contact_info.contents[6].get_text() charges = [] for case_info in soup.find_all('div', class_='chrg'): charges.append({ 'Statute': case_info.find('span', 'chgHeadActn'), 'Degree': case_info.find_all('span', 'chgHeadDeg'), 'Desc': case_info.find_all('span', 'chgHeadDscr') }) docket['Charges'] = charges return docket miami('09/10/2022')Can anybody tell me why window.value raises an error?