Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Dynamically changing Xpath
#1
Hey I have been trying to extract data from table from this website:
https://www.macrotrends.net/stocks/chart...ial-ratios

but the problem is that every time the page refreshes, the full Xpath also changes. Besides, I have also tried extracting with Panda and BeautifulSoup, I did fail with the last two approaches as I mostly use Selenium only. Here's the code I have written using modules: Selenium and XLS Writer. While the code works, the data is misplaced everytime I run the code as the website dynamically changes the Xpath.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.common.action_chains import ActionChains
import xlsxwriter

workbook = xlsxwriter.Workbook('kr_1_BAC.xlsx')
worksheet = workbook.add_worksheet()
bold = workbook.add_format({'bold': True})
money_format = workbook.add_format({'num_format': '$#,##0'})
worksheet.write('A1', 'Particulars', bold)
worksheet.write('B1', '2020', bold)
worksheet.write('C1', '2019', bold)
worksheet.write('D1', '2018', bold)
worksheet.write('E1', '2017', bold)
worksheet.write('F1', '2016', bold)
worksheet.write('G1', '2015', bold)
worksheet.write('H1', '2014', bold)
worksheet.write('I1', '2013', bold)
worksheet.write('J1', '2012', bold)
worksheet.write('K1', '2011', bold)
worksheet.write('L1', '2010', bold)
worksheet.write('M1', '2009', bold)
worksheet.write('N1', '2008', bold)
worksheet.write('O1', '2007', bold)
worksheet.write('P1', '2006', bold)
worksheet.write('Q1', '2005', bold)

options = webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2}
options.add_experimental_option("prefs", prefs)
#options.add_argument('-headless')
browser = webdriver.Chrome(options=options)
#browser = webdriver.Chrome()
#options.headless = True

url = 'https://www.macrotrends.net/stocks/charts/BAC/bank-of-america/financial-ratios'
browser.get(url)

#Creating Lists:
list_ind=[]
list_yr1=[]
list_yr2=[]
list_yr3=[]
list_yr4=[]
list_yr5=[]
list_yr6=[]
list_yr7=[]
list_yr8=[]
list_yr9=[]
list_yr10=[]
list_yr11=[]
list_yr12=[]
list_yr13=[]
list_yr14=[]
list_yr15=[]
list_yr16=[]

#from 2020-2015:
for n1 in range(1,21):
    ind=browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[1]/div')
    ind=ind.text
    y1 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[8]/div')
    y1 = y1.text
    y2 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[7]/div')
    y2 = y2.text
    y3 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[5]/div')
    y3 = y3.text
    y4 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[6]/div')
    y4 = y4.text
    y5 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[4]/div')
    y5 = y5.text
    y6 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n1}]/div[3]/div')
    y6 = y6.text
    indd = str(ind)
    y11 = str(y1)
    y21 = str(y2)
    y31 = str(y3)
    y41 = str(y4)
    y51 = str(y5)
    y61 = str(y6)
    list_ind.append(indd)
    list_yr1.append(y11)
    list_yr2.append(y21)
    list_yr3.append(y31)
    list_yr4.append(y41)
    list_yr5.append(y51)
    list_yr6.append(y61)
#from 2014-12:
#Clicking on the right button 33 times to scroll the table to the right as the xpath are named the same
m=33
while m>0:
    element_to_hover_over1 = browser.find_element_by_xpath('/html/body/div[2]/div[3]/div[3]/div/div/div[6]/div/div/div[5]/div')
    hover = ActionChains(browser).move_to_element(element_to_hover_over1).click().perform()
    m=m-1
    print('m',m)
    #hover1 = ActionChains(browser).click
    #time.sleep(1)
    continue
    #Back to extacting data
for n2 in range(1, 21):
    y7 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n2}]/div[9]/div')
    y7 = y7.text
    y8 = browser.find_element_by_xpath(f'//html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n2}]/div[10]/div')
    y8 = y8.text
    y9 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n2}]/div[8]/div')
    y9 = y9.text
    y71 = str(y7)
    y81 = str(y8)
    y91 = str(y9)
    list_yr7.append(y71)
    list_yr8.append(y81)
    list_yr9.append(y91)
time.sleep(3)
#Moving right by clicking on the right button for 35 times
m2=35
while m2>0:
    element_to_hover_over_r = browser.find_element_by_xpath('/html/body/div[2]/div[3]/div[3]/div/div/div[6]/div/div/div[5]/div')
    hover = ActionChains(browser).move_to_element(element_to_hover_over_r).click().perform()
    m2=m2-1
    print('m',m2)
    #hover1 = ActionChains(browser).click
    #time.sleep(1)
    continue
time.sleep(2)
#2011 only
for n3 in range(1, 21):
    y10 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n3}]/div[9]/div')
    y10 = y10.text
    y101 = str(y10)
    list_yr10.append(y101)
time.sleep(3)
#From 2010-2005/ Scrolling all the way to right
hover5 = ActionChains(browser).click_and_hold(on_element=element_to_hover_over_r).perform()
#time.sleep(1)
for n4 in range(1, 21):
    y11 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[7]/div')
    y11 = y11.text
    y12 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[8]/div')
    y12 = y12.text
    y13 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[9]/div')
    y13 = y13.text
    y14 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[10]/div')
    y14 = y14.text
    y15 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[11]/div')
    y15 = y15.text
    y16 = browser.find_element_by_xpath(f'/html/body/div[2]/div[3]/div[3]/div/div/div[4]/div[2]/div/div[{n4}]/div[12]/div')
    y16 = y16.text
    y111=str(y11)
    y121=str(y12)
    y131=str(y13)
    y141=str(y14)
    y151=str(y15)
    y161=str(y16)
    list_yr11.append(y111)
    list_yr12.append(y121)
    list_yr13.append(y131)
    list_yr14.append(y141)
    list_yr15.append(y151)
    list_yr16.append(y161)
#Counting for XLS writer:
n= len(list_ind)
# Concluding Excel:
row = 1
col = 0
for item1 in (list_ind):
    worksheet.write(row, col, item1)
    row += 1
for item2 in (list_yr1):
    worksheet.write(row-(n*1), col + 1, item2)
    row += 1
for item3 in (list_yr2):
    worksheet.write(row-(n*2), col + 2, item3)
    row += 1
for item4 in (list_yr3):
    worksheet.write(row -(n*3) , col + 3, item4)
    row += 1
for item5 in (list_yr4):
    worksheet.write(row - (n * 4), col + 4, item5)
    row += 1
for item6 in (list_yr5):
    worksheet.write(row - (n * 5), col + 5, item6)
    row += 1
for item7 in (list_yr6):
    worksheet.write(row - (n * 6), col + 6, item7)
    row += 1
for item8 in (list_yr7):
    worksheet.write(row - (n * 7), col + 7, item8)
    row += 1
for item9 in (list_yr8):
    worksheet.write(row - (n * 8), col + 8, item9)
    row += 1
for item10 in (list_yr9):
    worksheet.write(row - (n * 9), col + 9, item10)
    row += 1
for item11 in (list_yr10):
    worksheet.write(row - (n * 10), col + 10, item11)
    row += 1
for item12 in (list_yr11):
    worksheet.write(row - (n * 11), col + 11, item12)
    row += 1
for item13 in (list_yr12):
    worksheet.write(row - (n * 12), col + 12, item13)
    row += 1
for item14 in (list_yr13):
    worksheet.write(row - (n * 13), col + 13, item14)
    row += 1
for item15 in (list_yr14):
    worksheet.write(row - (n * 14), col + 14, item15)
    row += 1
for item16 in (list_yr15):
    worksheet.write(row - (n * 15), col + 15, item16)
    row += 1
for item17 in (list_yr16):
    worksheet.write(row - (n * 16), col + 16, item17)
    row += 1
workbook.close()
browser.quit()

Attached Files

.py   Key Ratios.py (Size: 7.62 KB / Downloads: 271)
Reply


Messages In This Thread
Dynamically changing Xpath - by AgileAVS - Jun-17-2021, 10:03 AM
RE: Dynamically changing Xpath - by snippsat - Jun-17-2021, 12:57 PM
RE: Dynamically changing Xpath - by AgileAVS - Jun-17-2021, 02:33 PM
RE: Dynamically changing Xpath - by snippsat - Jun-17-2021, 04:27 PM

Possibly Related Threads…
Thread Author Replies Views Last Post
  need help with xpath pythonprogrammer 1 2,780 Jan-18-2020, 11:28 PM
Last Post: snippsat

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020