Python Selenium Document Lookup and Download Automation Error - Target Machine active - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: Web Scraping & Web Development (https://python-forum.io/forum-13.html) +--- Thread: Python Selenium Document Lookup and Download Automation Error - Target Machine active (/thread-2849.html) |
Python Selenium Document Lookup and Download Automation Error - Target Machine active - Guttmann - Apr-15-2017 Hi, I am using the below code to match some data from an input csv to 2 drop downs from a website, then download both a csv and pdf... (the csv, I am extracting info from, then overwriting it with the next download since I do not need it -- the pdf, I am keeping, if the day of the month is the 1st or the 15th... but, as you can see here, for testing, I have it set to today, the 14th, instead of 15th) My problem is that it will run Ok, for the first 10 lines or so (the csv list is between 1200-2500 lines, so i'm not even close..) but then, it will throw the below error: [Errno 10061] No connection could be made because the target machine actively refused it Can anyone help me resolve this? I am new to python, scraping AND selenium so would appreciate any help -- or suggestions regarding making the code more efficient in general. Thank you! [code]# -*- coding: utf-8 -*- from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import unittest, time, os, shutil, csv from fuzzywuzzy import process import datetime class Here(unittest.TestCase): def setUp(self): path_to_chromedriver = 'chromedriver.exe' # change path as needed self.driver = webdriver.Chrome(executable_path=path_to_chromedriver) self.base_url = "https://mybaseurl/" self.verificationErrors = [] self.accept_next_alert = True self.dlfolder = "C:/Users/bgutt/Downloads" self.assolist = [] self.reportlist = [] self.inputfile = 'C:\\Users\\bgutt\\Downloads\\Input.csv' def wait(self, by_selector, element): print "Inside wait" try: WebDriverWait(self.driver, 30).until(EC.visibility_of_element_located((by_selector, element))) except Exception as e: print e self.restart() def login(self): print "Inside login" driver = self.driver driver.get("https://mywebsite") # self.assertEqual("", driver.title) driver.find_element_by_id("MainContent_Login1_ErlLogin_UserName").clear() driver.find_element_by_id("MainContent_Login1_ErlLogin_UserName").send_keys("myusername") driver.find_element_by_id("MainContent_Login1_ErlLogin_Password").clear() driver.find_element_by_id("MainContent_Login1_ErlLogin_Password").send_keys("mypassword") driver.find_element_by_id("MainContent_Login1_ErlLogin_LoginButton").click() # self.driver.implicitly_wait(30) def test_here(self): driver = self.driver self.login() self.wait(By.CLASS_NAME, "k-input") print "Login Done" csvlist = [] csvlist_log = [] csvfile = "Account History Report.csv" pdffile = "Account History Report.pdf" # get dropdown lists print "Getting Dropdown lists" self.getlists(driver) rows = csv.reader(open(self.inputfile, "rb")) defaulter = {} next(rows) failedlist = [] j = 0 for c in rows: if j >= 200: break j += 1 print j, c if (c[1].strip() in defaulter) or (c[1].strip() == "MYCOMPANY.ORG, LLC") or (c[1].strip() == '') or (c[2].strip() == ''): print "taking from defaulter list" csvlist.append((c[0],)) csvlist_log.append(defaulter[c[1].strip()] if c[1].strip() in defaulter else (datetime.datetime.now(), c[0], c[1], c[2], '', '', 'Error')) continue # Pick list of available communities comm_match = self.get_community(driver, c) print " Got community ",comm_match # Choose correct item from search result add_match = self.get_address(driver, c) if add_match == '': defaulter[c[1]] = (datetime.datetime.now(), c[0], c[1], c[2], comm_match, '', 'Error') csvlist.append((c[0],)) csvlist_log.append((datetime.datetime.now(), c[0], c[1], c[2], comm_match, '', 'Error')) continue csvlist_log.append((datetime.datetime.now(), c[0], c[1], c[2], comm_match, add_match, 'Success')) print " Got address ", add_match # Pick account history report res = self.get_ahr(driver) print " Chose ahr", res if res == False: failedlist.append(c) continue # Download reports self.clear_downloads(csvfile) pdf = False if datetime.date.today().day in [1, 14]: self.clear_downloads(pdffile) pdf = True report = self.get_report(driver, True, pdf) if report == False: failedlist.append(c) continue print " Got AHR, Need PDF? ", pdf with open(os.path.join(self.dlfolder, csvfile), "r") as f: data = csv.reader(f) next(data) row = next(data) if pdf: filename = "Account History Report_" + c[0] + ".pdf" i = 0 while True: if os.path.isfile(os.path.join(os.getcwd(), filename)): i += 1 filename = "Account History Report_" + c[0] + "_" + str(i) + ".pdf" else: break dest = os.path.join(os.getcwd(), filename) src = os.path.join(self.dlfolder, pdffile) shutil.copy(src, dest) csvlist.append((c[0], row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8])) print len(defaulter), csvlist self.create_CSVFile(csvlist, csvlist_log) driver.quit() def getlists(self, driver): driver.find_element_by_css_selector("span.k-input").click() elements = driver.find_elements_by_xpath("//ul[@id='AssociationList_listbox']/li") self.assolist = [element.text for element in elements] driver.find_element_by_xpath( ".//*[@id='content-container']/div[4]/div/div[1]/div[2]/div[2]/span/span/span[1]").click() elements_report = driver.find_elements_by_xpath("//*[@id='TransactionDetail']/table/tbody//td[4]") self.reportlist = [element.text for element in elements_report] def get_community(self, driver, c): print "Inside get_community" try: self.wait(By.CLASS_NAME, "k-input") driver.find_element_by_css_selector("span.k-input").click() match = c[1].strip() if "," not in c[1] else c[1].strip().split(",")[0] check = process.extractOne(match, self.assolist) #print check, match element = driver.find_element_by_xpath("//ul[@id='AssociationList_listbox']/li[text()='%s']" % check[0]) element.click() return check[0] except Exception as e: print "No community ",e return '' def get_address(self, driver, c): print "Inside get_address" try: driver.find_element_by_css_selector("a.gridmenuitem.refresh").click() self.wait(By.ID, "TransactionDetail") text = driver.find_element_by_css_selector("input#SearchTransactions") text.send_keys(c[2].split()[0]) text.send_keys(Keys.RETURN) time.sleep(2) add = driver.find_elements_by_xpath("//*[@id='TransactionDetail']/table/tbody//td[4]") address = [ele.text for ele in add] check = process.extractOne(c[2].strip(), address) #print c[2].strip(), check, address checkbox = driver.find_element_by_xpath( "//*[@id='TransactionDetail']/table[.//td[.='%s']][1]//td[.='%s']/ancestor::tr[1]/td//input[@type='checkbox'][1]" % ( check[0], check[0])) checkbox.click() return check[0] except Exception as e: return '' def get_ahr(self, driver): print "Inside get_ahr" try: driver.find_element_by_xpath( ".//*[@id='content-container']/div[4]/div/div[1]/div[2]/div[2]/span/span/span[1]").click() self.wait(By.ID, 'ReportList_listbox') element = driver.find_element_by_xpath("//ul[@id='ReportList_listbox']/li[text()='Account History Report']") if element.text != 'Account History Report': self.get_ahr(driver) else: element.click() return True except Exception as e: print "Got error in AHR ",e self.restart() return False def restart(self): print "Inside restart" self.driver.quit() time.sleep(60) self.login() def get_report(self, driver, csv, pdf): print "Inside get_report" try: if csv == True: self.wait(By.ID,'iFrameReport') driver.switch_to.frame('iFrameReport') self.wait(By.ID, "ReportViewer_ctl05_ctl04_ctl00_Button") driver.find_element_by_css_selector("table#ReportViewer_ctl05_ctl04_ctl00_Button").click() ele = driver.find_element_by_xpath('//a[@title="CSV (comma delimited)"]') ele.click() if not os.path.join(self.dlfolder, "Account History Report.csv"): self.get_report(driver, csv, False) if pdf: # Putting sleep because script will open new tab to download CSV file # so waiting for it to come back to main reports page. time.sleep(3) driver.find_element_by_css_selector("table#ReportViewer_ctl05_ctl04_ctl00_Button").click() ele = driver.find_element_by_xpath('//a[@title="Acrobat (PDF) file"]') ele.click() if not os.path.join(self.dlfolder, "Account History Report.pdf"): self.get_report(driver, False, pdf) # Intentionally wait for 2 seconds so that all the file downloads are complete time.sleep(2) driver.switch_to_default_content() return True except Exception as e: print "Got error in get_report ", e self.restart() return False def clear_downloads(self, filename): print "Inside clear_downloads" for fname in os.listdir(self.dlfolder): if fname.startswith("Account History Report"): os.remove(os.path.join(self.dlfolder, fname)) def create_CSVFile(self, mylist, mylist_log): '''(list)->() List of tuples will be given, it will write to CSV file ''' print "Inside create_CSVFile" timestamp = datetime.datetime.now().strftime("%Y%m%d_%H-%M-%S") myfile = open('outputfile_' + timestamp +'.csv', 'wb') wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) wr.writerow(("Ourfile", "CommunityAddress", "SettlementDate", "CommunityUnitCityStateZip", "UnitType", "MailingAddress", "LastPaymentDate", "MailingUnitCityStateZip", "LastPaymentAmount", "CurrentBalance")) wr.writerows(mylist) myfile_log = open('logfile_' + timestamp +'.csv', 'wb') wr = csv.writer(myfile_log, quoting=csv.QUOTE_ALL) wr.writerow(("Date&Time", "Ourfile", "Creditor", "propert_add", "matchedCreditor", "matched property address", "successful or Error")) wr.writerows(mylist_log) if __name__ == "__main__": unittest.main() [/code] RE: Python Selenium Document Lookup and Download Automation Error - Target Machine active - SpeedyZapGaming - Apr-15-2017 I have no Idea what the target machine is... But if you knew what it is then you could try and find out, how to make the target machine not activley refuse it. RE: Python Selenium Document Lookup and Download Automation Error - Target Machine active - Guttmann - Apr-15-2017 Okay.. well, its a website.. what info do you need? I know it uses html and java... RE: Python Selenium Document Lookup and Download Automation Error - Target Machine active - Guttmann - Apr-17-2017 <<bump>> RE: Python Selenium Document Lookup and Download Automation Error - Target Machine active - metulburr - Apr-18-2017 Quote:My problem is that it will run Ok, for the first 10 lines or so (the csv list is between 1200-2500 lines, so i'm not even close..)I am not 100% sure. It could be a wide range of things. Their server being screwed up, trying to stop automated scripts, your script getting ahead of itself, who knows off hand. the only way to really find out is to run the program ourselves and pick it apart. But to do that we need the website in which you are scraping to run your program on. If this is a site you can create a dummy account on, create an account and hard code that account's details in the login process of your script. |