Selenium XPATH

jimsxxl · Apr-05-2021, 05:23 PM

Hi guys,
Im a beginner when it comes to Python, and wanted to try code a scraper.
Im trying to scrape betfair.com for Team-names and Odds.

I successfully extracted the Team-names, but having abit of trouble getting the odds.
Here is my code:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time

# Site URL
web = 'https://www.betfair.se/exchange/plus/sv/fotboll-betting-1/today'
# chrome driver path
path = '/usr/bin/chromedriver' 

# Load site with URL
driver = webdriver.Chrome(path)
driver.get(web)

# Sleep/wait for site to load
time.sleep(15)
# Find the 'allow-cookies'-button, click it
accept = driver.find_element_by_xpath('//*[@id="onetrust-accept-btn-handler"]')
accept.click()

#sport_title = driver.find_element_by_xpath('//*[@id="main-wrapper"]/div/div[2]/div/ui-view/ui-view/div/div/div/div/div[1]/div/div[1]/bf-super-coupon/main/ng-include[1]/header/h2').text

# Get match-table
match = WebDriverWait(driver, 600).until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="main-wrapper"]//ul[@class="runners"]/li')))
# Get odds-table
one = WebDriverWait(driver, 600).until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="main-wrapper"]//div[@class="coupon-runner ng-scope"]/button')))

# Loop them and print output
for teams in match:
    print(teams.text)

for odds in one:
    print(odds.text)

# Quit chromedriver
driver.quit()

The output i get is:

$ python /home/jims/projects/jims-betfair_scraper/jims-betfair_scraper.py
Sheffield Wednesday
Cardiff
Everton
Crystal Palace
Wolves
...
...
...
...
1.04
303606 kr
1.05
298037 kr
26
3950 kr
28
425 kr
...
...
...

As you can see i also getting the "volume"/money extracted along with the odds.
How can i only get the Odds outputted ?

As i said in the beginning, im totally new to Python, so any comments on how i could improve this code is much appreciated as im still learning !

Thank you in advance !

**Larz60+** · (This post was last modified: Apr-06-2021, 01:16 AM by Larz60+.)

I took a different approach, and captured the match names, but couldn't find the odds data, so didn't include that part. In addition, I don't use chrome, but firefox, so used that browser.

At any rate, I used cacheing so that I didn't have to keep reloading the page (this to be removed when satisfied with the code. I also wrote a pretty page to help look for data, and added a dictionary to ho;d all data.

I see 'Odds' on a separate page, should I look there?

Here was my 'stab' at this:

from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
from PrettifyPage import PrettifyPage
from CreateDict import CreateDict
from pathlib import Path
import os


class ScrapeFootballScores:
    def __init__(self):
        os.chdir(os.path.abspath(os.path.dirname(__file__)))
        
        homepath = Path('.')
        footballpath = homepath / 'football'
        footballpath.mkdir(exist_ok=True)

        self.pp = PrettifyPage().prettify
        self.cd = CreateDict()

        self.url = 'https://www.betfair.se/exchange/plus/sv/fotboll-betting-1/today'
        self.cachefile = footballpath / 'mainpage.html'
        self.prettyfile = footballpath / 'FootballPretty.html'

    def start_browser(self):
        caps = webdriver.DesiredCapabilities().FIREFOX
        caps["marionette"] = True
        self.browser = webdriver.Firefox(capabilities=caps)

    def stop_browser(self):
        self.browser.close()

    def get_scores(self):
        pp = self.pp
        cd = self.cd
        matchlist = []

        if self.cachefile.exists():
            with self.cachefile.open() as fp:
                page = fp.read()
            soup = BeautifulSoup(page, "lxml")
        else:
            self.start_browser()
            self.browser.get(self.url)
            time.sleep(2)
            page = self.browser.page_source
            with self.cachefile.open('w') as fp:
                fp.write(page)
            soup = BeautifulSoup(page, "lxml")
            with self.prettyfile.open('w') as fp:
                fp.write(pp(soup, 2))
            self.stop_browser()
        
        cd.new_dict('Matches')
        target = soup.find('ul', {'class': "runners"})
        for li in target.find_all('li'):
            MatchName = li.text.strip()
            # print(MatchName)
            minfo = cd.add_node(cd.Matches, MatchName)
            matchlist.append(MatchName)

        # Display Dictionary
        print(cd.Matches)



if __name__ == '__main__':
    sfs = ScrapeFootballScores()
    sfs.get_scores()

and the started dictionary:

Output:
{'Western Sydney Wanderers': {}, 'Central Coast Mariners': {}}

I'll add odds if you can tell me where on the page (not XPath) they are located.

Additional code needed:
CreateDict.py

#  Author: Larz60+ Nov 22, 2018
import os


class CreateDict:
    def __init__(self):
        os.chdir(os.path.abspath(os.path.dirname(__file__)))

    def new_dict(self, dictname):
        setattr(self, dictname, {})

    def add_node(self, parent, nodename):
        node = parent[nodename] = {}
        return node

    def add_cell(self, nodename, cellname, value):
        cell =  nodename[cellname] = value
        return cell

    def display_dict(self, dictname, level=0):
        indent = " " * (4 * level)
        for key, value in dictname.items():
            if isinstance(value, dict):
                print(f'\n{indent}{key}')
                level += 1
                self.display_dict(value, level)
            else:
                print(f'{indent}{key}: {value}')
            if level > 0:
                level -= 1{'Western Sydney Wanderers': {}, 'Central Coast Mariners': {}}
def testit():
    cd = CreateDict()

    cd.new_dict('CityList')

    boston = cd.add_node(cd.CityList, 'Boston')
    bos_resturants = cd.add_node(boston, 'Resturants')

    spoke = cd.add_node(bos_resturants, 'Spoke Wine Bar')
    cd.add_cell(spoke, 'Addr1', '89 Holland St')
    cd.add_cell(spoke, 'City', 'Sommerville')
    cd.add_cell(spoke, 'Addr1', '02144')
    cd.add_cell(spoke, 'Phone', '617-718-9463')

    highland = cd.add_node(bos_resturants, 'Highland Kitchen')
    cd.add_cell(highland, 'Addr1', '150 Highland Ave')
    cd.add_cell(highland, 'City', 'Sommerville')
    cd.add_cell(highland, 'ZipCode', '02144')
    cd.add_cell(highland, 'Phone', '617-625-1131')

    print(f'\nCityList Dictionary')
    cd.display_dict(cd.CityList)
    print(f'\nraw data: {cd.CityList}')

if __name__ == '__main__':
    testit()

PrettifyPage.py

from bs4 import BeautifulSoup
import requests
import pathlib


class PrettifyPage:
    def __init__(self):
        pass

    def prettify(self, soup, indent):
        pretty_soup = str()
        previous_indent = 0
        for line in soup.prettify().split("\n"):
            current_indent = str(line).find("<")
            if current_indent == -1 or current_indent > previous_indent + 2:
                current_indent = previous_indent + 1
            previous_indent = current_indent
            pretty_soup += self.write_new_line(line, current_indent, indent)
        return pretty_soup

    def write_new_line(self, line, current_indent, desired_indent):
        new_line = ""
        spaces_to_add = (current_indent * desired_indent) - current_indent
        if spaces_to_add > 0:
            for i in range(spaces_to_add):
                new_line += " "		
        new_line += str(line) + "\n"
        return new_line

if __name__ == '__main__':
    pp = PrettifyPage()
    pfilename = pp.bpath.htmlpath / 'BusinessEntityRecordsAA.html'
    with pfilename.open('rb') as fp:
        page = fp.read()
    soup = BeautifulSoup(page, 'lxml')
    pretty = pp.prettify(soup, indent=2)
    print(pretty)

jimsxxl · Apr-06-2021, 03:19 PM

Hello Larz60 !
Thank you so much for your reply.

I re-wrote almost everything.
Its working like i want it now!
Very fun to learn new things, even though it might be an easy task for some..

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	need help with xpath	pythonprogrammer	1	3,369	Jan-18-2020, 11:28 PM Last Post: snippsat
	Selenium xpath finding gif to click	fyec	1	4,705	Jul-27-2018, 05:20 PM Last Post: snippsat
	Error in Selenium: CRITICAL:root:Selenium module is not installed...Exiting program.	AcszE	1	4,500	Nov-03-2017, 08:41 PM Last Post: metulburr
	[Selenium] Xpath Drop Down Use Variable	digitalmatic7	3	5,289	Oct-07-2017, 01:45 PM Last Post: buran

Selenium XPATH

User Panel Messages

Announcements