Dec-09-2022, 11:13 PM
I have some code that opens a website, submits search criteria, and loops through each resulting table row for the link to the details behind it. The results may be broken into pages so it keeps track of that too. It works largely fine with the exception that occasionally, and usually on the first row of a newly loaded page, it will just sit there and not follow the link. I can see the link indicate that it was hovered over, but nothing happens, until I click on it with the mouse, then the process continues. I just don't know what I am missing, as it is placed within a loop that should retry the click as long as the right page is there, which is identified by the presence of known elements. The while loop that starts at line 29 is the functional part in question, and line 31 is where the link is clicked. Code below:
def pagescrape(driver,page_no): global page_count, resume_record_number results_table_details_link_xpath="//table[@class='results']/tbody/tr/td[1]/a[contains(@href,'inquiryDetail')]" captcha_page_element_xpath="//*[@class='captcha__human__title' and text()='You have been blocked.']" try: wait=WebDriverWait(driver,20) wait.until(EC.element_to_be_clickable((By.XPATH,results_table_details_link_xpath))) try: result_banner=driver.find_element(By.CLASS_NAME,"pagebanner").text #Search Results page result_banner_list=result_banner.split() search_total=int(result_banner_list[0]) if search_total<=25: min_rec=1 max_rec=search_total else: min_rec = int(result_banner_list[4]) max_rec = int(result_banner_list[6].replace('.', '')) record_count=max_rec-min_rec+1 page_header_tag=str(str(page_no)+'/'+str(page_count)).ljust(10,' ') print(f"Page:{page_header_tag} Search Total:{str(search_total).ljust(6,' ')} Search Letter:{letter_current} Profile:{profile_current}") wait=WebDriverWait(driver,6) for i in range(resume_record_number,record_count+1): row_tag=f"{i}/{record_count}".ljust(10,' ') case_status=driver.find_element("xpath", f"//table[@class='results']/tbody/tr[{i}]/td[7]").text if case_status=='Open' or case_status=='ACTIVE CASE': Case_Number=driver.find_element("xpath", f"/html/body/div/table[4]/tbody/tr[{i}]/td[1]/a").text case_type=driver.find_element("xpath", f"/html/body/div/table[4]/tbody/tr[{i}]/td[6]").text time.sleep(1) while True: try: details_link=driver.find_element("xpath", f"/html/body/div/table[4]/tbody/tr[{i}]/td[1]/a") driver.execute_script("return arguments[0].scrollIntoView();", details_link) details_link.click() wait.until(EC.any_of (EC.text_to_be_present_in_element((By.CLASS_NAME,'InfoStatement'),'This is an electronic case record'), EC.element_to_be_clickable((By.XPATH,"//div[@class='Subheader']/a[text()='Go Back']")) )) html=driver.find_element(By.TAG_NAME, "html") #Details page for j in range(randrange(5)): html.send_keys(Keys.PAGE_DOWN) driver.find_element("xpath",f"/html/body").click(); for j in range(randrange(5)): html.send_keys(Keys.PAGE_UP) driver.find_element("xpath",f"/html/body").click(); time.sleep(1) details=getrecord(driver,Case_Number,page_no,case_type) values_list=list(details.values()) print(f"{row_tag} {values_list[8]}") with open(proc_path, 'a',encoding="utf-8",newline="") as f: writer = csv.writer(f) writer.writerow(values_list) driver.back() time.sleep(1) break except TimeoutException as timeout_results_page: try: wait.until( EC.any_of( EC.element_to_be_clickable((By.XPATH,results_table_details_link_xpath)), EC.text_to_be_present_in_element((By.CLASS_NAME,'captcha__human__title'),'You have been blocked.'), EC.text_to_be_present_in_element_value((By.NAME,'button'),'Go Back'), EC.element_to_be_clickable((By.ID,"btnDisclaimerAgree")), ) ) if len(driver.find_elements(By.XPATH,results_table_details_link_xpath))>0: #Search results page time.time(1) print(f'Retrying record: {i}') continue elif len(driver.find_elements(By.XPATH,captcha_page_element_xpath))>0: #captcha page continue elif len(driver.find_elements(By.NAME,"button"))>0: #Error page. Title: An Error has occurred print(f"{row_tag} -- Error page loaded") backbutton=driver.find_element(By.NAME,"button") backbutton.click() time.sleep(2) break else: #Back at home/disclaimer page resume_record_number=i return i except Exception as err_long_timeout: if 'Search Results' in driver.title: continue else: print(f"Page Title: {driver.title} - {err_long_timeout} -- in-search page wait") continue except Exception as err_other: print(err_other) continue while True: try: wait.until(EC.element_to_be_clickable((By.XPATH,results_table_details_link_xpath))) break except Exception as err_results: print(f"Page Title: {driver.title} - {err_results} -- Results page expected") else: print(f"{row_tag} -- Closed Case") continue resume_record_number=1 return -1 except Exception as err_results_page: return -3 except Exception as e: print(f"{e} -- Catch-all") return -3Any insight would be greatly appreciated