Jun-17-2021, 10:03 AM
Hey I have been trying to extract data from table from this website:
https://www.macrotrends.net/stocks/chart...ial-ratios
but the problem is that every time the page refreshes, the full Xpath also changes. Besides, I have also tried extracting with Panda and BeautifulSoup, I did fail with the last two approaches as I mostly use Selenium only. Here's the code I have written using modules: Selenium and XLS Writer. While the code works, the data is misplaced everytime I run the code as the website dynamically changes the Xpath.
https://www.macrotrends.net/stocks/chart...ial-ratios
but the problem is that every time the page refreshes, the full Xpath also changes. Besides, I have also tried extracting with Panda and BeautifulSoup, I did fail with the last two approaches as I mostly use Selenium only. Here's the code I have written using modules: Selenium and XLS Writer. While the code works, the data is misplaced everytime I run the code as the website dynamically changes the Xpath.
from selenium import webdriver from selenium.webdriver.common.keys import Keys import time from selenium.webdriver.common.action_chains import ActionChains import xlsxwriter workbook = xlsxwriter.Workbook('kr_1_BAC.xlsx') worksheet = workbook.add_worksheet() bold = workbook.add_format({'bold': True}) money_format = workbook.add_format({'num_format': '$#,##0'}) worksheet.write('A1', 'Particulars', bold) worksheet.write('B1', '2020', bold) worksheet.write('C1', '2019', bold) worksheet.write('D1', '2018', bold) worksheet.write('E1', '2017', bold) worksheet.write('F1', '2016', bold) worksheet.write('G1', '2015', bold) worksheet.write('H1', '2014', bold) worksheet.write('I1', '2013', bold) worksheet.write('J1', '2012', bold) worksheet.write('K1', '2011', bold) worksheet.write('L1', '2010', bold) worksheet.write('M1', '2009', bold) worksheet.write('N1', '2008', bold) worksheet.write('O1', '2007', bold) worksheet.write('P1', '2006', bold) worksheet.write('Q1', '2005', bold) options = webdriver.ChromeOptions() prefs = {"profile.managed_default_content_settings.images": 2} options.add_experimental_option("prefs", prefs) #options.add_argument('-headless') browser = webdriver.Chrome(options=options) #browser = webdriver.Chrome() #options.headless = True url = 'https://www.macrotrends.net/stocks/charts/BAC/bank-of-america/financial-ratios' browser.get(url) #Creating Lists: list_ind=[] list_yr1=[] list_yr2=[] list_yr3=[] list_yr4=[] list_yr5=[] list_yr6=[] list_yr7=[] list_yr8=[] list_yr9=[] list_yr10=[] list_yr11=[] list_yr12=[] list_yr13=[] list_yr14=[] list_yr15=[] list_yr16=[] #from 2020-2015: for n1 in range(1,21): ind=browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[1]/div') ind=ind.text y1 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[8]/div') y1 = y1.text y2 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[7]/div') y2 = y2.text y3 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[5]/div') y3 = y3.text y4 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[6]/div') y4 = y4.text y5 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[4]/div') y5 = y5.text y6 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[3]/div') y6 = y6.text indd = str(ind) y11 = str(y1) y21 = str(y2) y31 = str(y3) y41 = str(y4) y51 = str(y5) y61 = str(y6) list_ind.append(indd) list_yr1.append(y11) list_yr2.append(y21) list_yr3.append(y31) list_yr4.append(y41) list_yr5.append(y51) list_yr6.append(y61) #from 2014-12: #Clicking on the right button 33 times to scroll the table to the right as the xpath are named the same m=33 while m>0: element_to_hover_over1 = browser.find_element_by_xpath('/html/body/div[2]/div[3]/div[3]/div/div/div[6]/div/div/div[5]/div') hover = ActionChains(browser).move_to_element(element_to_hover_over1).click().perform() m=m-1 print('m',m) #hover1 = ActionChains(browser).click #time.sleep(1) continue #Back to extacting data for n2 in range(1, 21): y7 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n2}]/div[9]/div') y7 = y7.text y8 = browser.find_element_by_xpath(f'//html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n2}]/div[10]/div') y8 = y8.text y9 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n2}]/div[8]/div') y9 = y9.text y71 = str(y7) y81 = str(y8) y91 = str(y9) list_yr7.append(y71) list_yr8.append(y81) list_yr9.append(y91) time.sleep(3) #Moving right by clicking on the right button for 35 times m2=35 while m2>0: element_to_hover_over_r = browser.find_element_by_xpath('/html/body/div[2]/div[3]/div[3]/div/div/div[6]/div/div/div[5]/div') hover = ActionChains(browser).move_to_element(element_to_hover_over_r).click().perform() m2=m2-1 print('m',m2) #hover1 = ActionChains(browser).click #time.sleep(1) continue time.sleep(2) #2011 only for n3 in range(1, 21): y10 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n3}]/div[9]/div') y10 = y10.text y101 = str(y10) list_yr10.append(y101) time.sleep(3) #From 2010-2005/ Scrolling all the way to right hover5 = ActionChains(browser).click_and_hold(on_element=element_to_hover_over_r).perform() #time.sleep(1) for n4 in range(1, 21): y11 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[7]/div') y11 = y11.text y12 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[8]/div') y12 = y12.text y13 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[9]/div') y13 = y13.text y14 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[10]/div') y14 = y14.text y15 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[11]/div') y15 = y15.text y16 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[12]/div') y16 = y16.text y111=str(y11) y121=str(y12) y131=str(y13) y141=str(y14) y151=str(y15) y161=str(y16) list_yr11.append(y111) list_yr12.append(y121) list_yr13.append(y131) list_yr14.append(y141) list_yr15.append(y151) list_yr16.append(y161) #Counting for XLS writer: n= len(list_ind) # Concluding Excel: row = 1 col = 0 for item1 in (list_ind): worksheet.write(row, col, item1) row += 1 for item2 in (list_yr1): worksheet.write(row-(n*1), col + 1, item2) row += 1 for item3 in (list_yr2): worksheet.write(row-(n*2), col + 2, item3) row += 1 for item4 in (list_yr3): worksheet.write(row -(n*3) , col + 3, item4) row += 1 for item5 in (list_yr4): worksheet.write(row - (n * 4), col + 4, item5) row += 1 for item6 in (list_yr5): worksheet.write(row - (n * 5), col + 5, item6) row += 1 for item7 in (list_yr6): worksheet.write(row - (n * 6), col + 6, item7) row += 1 for item8 in (list_yr7): worksheet.write(row - (n * 7), col + 7, item8) row += 1 for item9 in (list_yr8): worksheet.write(row - (n * 8), col + 8, item9) row += 1 for item10 in (list_yr9): worksheet.write(row - (n * 9), col + 9, item10) row += 1 for item11 in (list_yr10): worksheet.write(row - (n * 10), col + 10, item11) row += 1 for item12 in (list_yr11): worksheet.write(row - (n * 11), col + 11, item12) row += 1 for item13 in (list_yr12): worksheet.write(row - (n * 12), col + 12, item13) row += 1 for item14 in (list_yr13): worksheet.write(row - (n * 13), col + 13, item14) row += 1 for item15 in (list_yr14): worksheet.write(row - (n * 14), col + 14, item15) row += 1 for item16 in (list_yr15): worksheet.write(row - (n * 15), col + 15, item16) row += 1 for item17 in (list_yr16): worksheet.write(row - (n * 16), col + 16, item17) row += 1 workbook.close() browser.quit()
Attached Files