you can use beautiful soup in combination with selenium, but you need selenium to run the javascript first
This code will get you started:
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
import os
import PrettifyPage
import sys
class MatchData:
def __init__(self):
# anchor directory same as script
os.chdir(os.path.abspath(os.path.dirname(__file__)))
self.pp = PrettifyPage.PrettifyPage()
self.analyze_page()
def start_browser(self):
caps = webdriver.DesiredCapabilities().FIREFOX
caps["marionette"] = True
self.browser = webdriver.Firefox(capabilities=caps)
def stop_browser(self):
self.browser.close()
def analyze_page(self):
self.start_browser()
self.browser.get('https://www.oddschecker.com/tennis')
time.sleep(2)
self.browser.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div/div/div[1]/section[2]/div/div/table/tbody/tr[1]/td[1]/a/div/p[1]')
page = self.browser.page_source
soup = BeautifulSoup(page, 'lxml')
table = soup.find('table', {'class': "at-12 standard-list", 'data-ng-init': "MainController.mainNav.hideCouponRows = {};"})
trs = table.find_all('tr', {'class': "match-on"})
for n, tr in enumerate(trs):
tds = tr.find_all('td')
for n1, td in enumerate(tds):
print(f"\n================================= tr_{tr}, td_{n1} ================================= ")
print(f"{self.pp.prettify(td, 2)}")
self.stop_browser()
if __name__ == '__main__':
MatchData()
You'll also need this module in same directory (use PrettifyPage.py as script name or it won't work properly)
PrettifyPage.py
# PrettifyPage.py
from bs4 import BeautifulSoup
import requests
import pathlib
class PrettifyPage:
def __init__(self):
pass
def prettify(self, soup, indent):
pretty_soup = str()
previous_indent = 0
for line in soup.prettify().split("\n"):
current_indent = str(line).find("<")
if current_indent == -1 or current_indent > previous_indent + 2:
current_indent = previous_indent + 1
previous_indent = current_indent
pretty_soup += self.write_new_line(line, current_indent, indent)
return pretty_soup
def write_new_line(self, line, current_indent, desired_indent):
new_line = ""
spaces_to_add = (current_indent * desired_indent) - current_indent
if spaces_to_add > 0:
for i in range(spaces_to_add):
new_line += " "
new_line += str(line) + "\n"
return new_line
if __name__ == '__main__':
pp = PrettifyPage()
pfilename = pp.bpath.htmlpath / 'BusinessEntityRecordsAA.html'
with pfilename.open('rb') as fp:
page = fp.read()
soup = BeautifulSoup(page, 'lxml')
pretty = pp.prettify(soup, indent=2)
print(pretty)
here's a sample of the output, this is just printing the contents of the found data,
you'll have to extract what you need from that
partial results:
Output:
================================= tr_<tr class="match-on no-top-border" data-day="Today" data-mid="3464826451" data-ng-class="{ 'hide-row' : MainController.mainNav.hideCouponRows['2019-08-0338825577'] }"><td class="time all-odds-click"><div class="time-div beta-caption2 beta-mcaption4"><span class="time-digits beta-caption1 bold betam-caption2">11:30</span></div></td><td class="all-odds-click" colspan="2"><p class="fixtures-bet-name beta-footnote">Gille/Vliegen</p><p class="fixtures-bet-name beta-footnote">Oswald/Polasek</p></td><td class="basket-add" data-best-dig="1.88" data-bid="26247959786" data-track="&lid=card&lpos=basket-add" title="Add Gille/Vliegen to betslip"><p class="participant-name"><span class="odds beta-footnote bold add-to-bet-basket">7/8</span></p></td><td class="basket-add" data-best-dig="2.08" data-bid="26247959787" data-track="&lid=card&lpos=basket-add" title="Add Oswald/Polasek to betslip"><p class="participant-name participant-name-draw"><span class="odds beta-footnote bold add-to-bet-basket">13/12</span></p></td><td class="betting link-right"><a class="beta-callout full-height-link whole-row-link" data-event-name="Gille/Vliegen v Oswald/Polasek" href="/tennis/atp-kitzbuhel/gille-vliegen-v-oswald-polasek/winner" title="View all Gille/Vliegen v Oswald/Polasek odds"><span class="beta-footnote betam-caption2 comp-odds-text">All Odds</span><span class="beta-sprite big-arr right"></span></a></td></tr>, td_0 =================================
<td class="time all-odds-click">
<div class="time-div beta-caption2 beta-mcaption4">
<span class="time-digits beta-caption1 bold betam-caption2">
11:30
</span>
</div>
</td>
================================= tr_<tr class="match-on no-top-border" data-day="Today" data-mid="3464826451" data-ng-class="{ 'hide-row' : MainController.mainNav.hideCouponRows['2019-08-0338825577'] }"><td class="time all-odds-click"><div class="time-div beta-caption2 beta-mcaption4"><span class="time-digits beta-caption1 bold betam-caption2">11:30</span></div></td><td class="all-odds-click" colspan="2"><p class="fixtures-bet-name beta-footnote">Gille/Vliegen</p><p class="fixtures-bet-name beta-footnote">Oswald/Polasek</p></td><td class="basket-add" data-best-dig="1.88" data-bid="26247959786" data-track="&lid=card&lpos=basket-add" title="Add Gille/Vliegen to betslip"><p class="participant-name"><span class="odds beta-footnote bold add-to-bet-basket">7/8</span></p></td><td class="basket-add" data-best-dig="2.08" data-bid="26247959787" data-track="&lid=card&lpos=basket-add" title="Add Oswald/Polasek to betslip"><p class="participant-name participant-name-draw"><span class="odds beta-footnote bold add-to-bet-basket">13/12</span></p></td><td class="betting link-right"><a class="beta-callout full-height-link whole-row-link" data-event-name="Gille/Vliegen v Oswald/Polasek" href="/tennis/atp-kitzbuhel/gille-vliegen-v-oswald-polasek/winner" title="View all Gille/Vliegen v Oswald/Polasek odds"><span class="beta-footnote betam-caption2 comp-odds-text">All Odds</span><span class="beta-sprite big-arr right"></span></a></td></tr>, td_1 =================================
<td class="all-odds-click" colspan="2">
<p class="fixtures-bet-name beta-footnote">
Gille/Vliegen
</p>
<p class="fixtures-bet-name beta-footnote">
Oswald/Polasek
</p>
</td>
================================= tr_<tr class="match-on no-top-border" data-day="Today" data-mid="3464826451" data-ng-class="{ 'hide-row' : MainController.mainNav.hideCouponRows['2019-08-0338825577'] }"><td class="time all-odds-click"><div class="time-div beta-caption2 beta-mcaption4"><span class="time-digits beta-caption1 bold betam-caption2">11:30</span></div></td><td class="all-odds-click" colspan="2"><p class="fixtures-bet-name beta-footnote">Gille/Vliegen</p><p class="fixtures-bet-name beta-footnote">Oswald/Polasek</p></td><td class="basket-add" data-best-dig="1.88" data-bid="26247959786" data-track="&lid=card&lpos=basket-add" title="Add Gille/Vliegen to betslip"><p class="participant-name"><span class="odds beta-footnote bold add-to-bet-basket">7/8</span></p></td><td class="basket-add" data-best-dig="2.08" data-bid="26247959787" data-track="&lid=card&lpos=basket-add" title="Add Oswald/Polasek to betslip"><p class="participant-name participant-name-draw"><span class="odds beta-footnote bold add-to-bet-basket">13/12</span></p></td><td class="betting link-right"><a class="beta-callout full-height-link whole-row-link" data-event-name="Gille/Vliegen v Oswald/Polasek" href="/tennis/atp-kitzbuhel/gille-vliegen-v-oswald-polasek/winner" title="View all Gille/Vliegen v Oswald/Polasek odds"><span class="beta-footnote betam-caption2 comp-odds-text">All Odds</span><span class="beta-sprite big-arr right"></span></a></td></tr>, td_2 =================================
<td class="basket-add" data-best-dig="1.88" data-bid="26247959786" data-track="&lid=card&lpos=basket-add" title="Add Gille/Vliegen to betslip">
<p class="participant-name">
<span class="odds beta-footnote bold add-to-bet-basket">
7/8
</span>
</p>
</td>