Selenium XPATH - Printable Version

Selenium XPATH - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: Web Scraping & Web Development (https://python-forum.io/forum-13.html)
+--- Thread: Selenium XPATH (/thread-33189.html)

Selenium XPATH - jimsxxl - Apr-05-2021

Hi guys,
Im a beginner when it comes to Python, and wanted to try code a scraper.
Im trying to scrape betfair.com for Team-names and Odds.

I successfully extracted the Team-names, but having abit of trouble getting the odds.
Here is my code:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time

# Site URL
web = 'https://www.betfair.se/exchange/plus/sv/fotboll-betting-1/today'
# chrome driver path
path = '/usr/bin/chromedriver' 

# Load site with URL
driver = webdriver.Chrome(path)
driver.get(web)

# Sleep/wait for site to load
time.sleep(15)
# Find the 'allow-cookies'-button, click it
accept = driver.find_element_by_xpath('//*[@id="onetrust-accept-btn-handler"]')
accept.click()

#sport_title = driver.find_element_by_xpath('//*[@id="main-wrapper"]/div/div[2]/div/ui-view/ui-view/div/div/div/div/div[1]/div/div[1]/bf-super-coupon/main/ng-include[1]/header/h2').text

# Get match-table
match = WebDriverWait(driver, 600).until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="main-wrapper"]//ul[@class="runners"]/li')))
# Get odds-table
one = WebDriverWait(driver, 600).until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="main-wrapper"]//div[@class="coupon-runner ng-scope"]/button')))

# Loop them and print output
for teams in match:
    print(teams.text)

for odds in one:
    print(odds.text)

# Quit chromedriver
driver.quit()

The output i get is:

$ python /home/jims/projects/jims-betfair_scraper/jims-betfair_scraper.py
Sheffield Wednesday
Cardiff
Everton
Crystal Palace
Wolves
...
...
...
...
1.04
303606 kr
1.05
298037 kr
26
3950 kr
28
425 kr
...
...
...

As you can see i also getting the "volume"/money extracted along with the odds.
How can i only get the Odds outputted ?

As i said in the beginning, im totally new to Python, so any comments on how i could improve this code is much appreciated as im still learning !

Thank you in advance !

RE: Selenium XPATH - Larz60+ - Apr-06-2021

I took a different approach, and captured the match names, but couldn't find the odds data, so didn't include that part. In addition, I don't use chrome, but firefox, so used that browser.

At any rate, I used cacheing so that I didn't have to keep reloading the page (this to be removed when satisfied with the code. I also wrote a pretty page to help look for data, and added a dictionary to ho;d all data.

I see 'Odds' on a separate page, should I look there?

Here was my 'stab' at this:

from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
from PrettifyPage import PrettifyPage
from CreateDict import CreateDict
from pathlib import Path
import os


class ScrapeFootballScores:
    def __init__(self):
        os.chdir(os.path.abspath(os.path.dirname(__file__)))
        
        homepath = Path('.')
        footballpath = homepath / 'football'
        footballpath.mkdir(exist_ok=True)

        self.pp = PrettifyPage().prettify
        self.cd = CreateDict()

        self.url = 'https://www.betfair.se/exchange/plus/sv/fotboll-betting-1/today'
        self.cachefile = footballpath / 'mainpage.html'
        self.prettyfile = footballpath / 'FootballPretty.html'

    def start_browser(self):
        caps = webdriver.DesiredCapabilities().FIREFOX
        caps["marionette"] = True
        self.browser = webdriver.Firefox(capabilities=caps)

    def stop_browser(self):
        self.browser.close()

    def get_scores(self):
        pp = self.pp
        cd = self.cd
        matchlist = []

        if self.cachefile.exists():
            with self.cachefile.open() as fp:
                page = fp.read()
            soup = BeautifulSoup(page, "lxml")
        else:
            self.start_browser()
            self.browser.get(self.url)
            time.sleep(2)
            page = self.browser.page_source
            with self.cachefile.open('w') as fp:
                fp.write(page)
            soup = BeautifulSoup(page, "lxml")
            with self.prettyfile.open('w') as fp:
                fp.write(pp(soup, 2))
            self.stop_browser()
        
        cd.new_dict('Matches')
        target = soup.find('ul', {'class': "runners"})
        for li in target.find_all('li'):
            MatchName = li.text.strip()
            # print(MatchName)
            minfo = cd.add_node(cd.Matches, MatchName)
            matchlist.append(MatchName)

        # Display Dictionary
        print(cd.Matches)



if __name__ == '__main__':
    sfs = ScrapeFootballScores()
    sfs.get_scores()

and the started dictionary:

Output:
{'Western Sydney Wanderers': {}, 'Central Coast Mariners': {}}

I'll add odds if you can tell me where on the page (not XPath) they are located.

Additional code needed:
CreateDict.py

#  Author: Larz60+ Nov 22, 2018
import os


class CreateDict:
    def __init__(self):
        os.chdir(os.path.abspath(os.path.dirname(__file__)))

    def new_dict(self, dictname):
        setattr(self, dictname, {})

    def add_node(self, parent, nodename):
        node = parent[nodename] = {}
        return node

    def add_cell(self, nodename, cellname, value):
        cell =  nodename[cellname] = value
        return cell

    def display_dict(self, dictname, level=0):
        indent = " " * (4 * level)
        for key, value in dictname.items():
            if isinstance(value, dict):
                print(f'\n{indent}{key}')
                level += 1
                self.display_dict(value, level)
            else:
                print(f'{indent}{key}: {value}')
            if level > 0:
                level -= 1{'Western Sydney Wanderers': {}, 'Central Coast Mariners': {}}
def testit():
    cd = CreateDict()

    cd.new_dict('CityList')

    boston = cd.add_node(cd.CityList, 'Boston')
    bos_resturants = cd.add_node(boston, 'Resturants')

    spoke = cd.add_node(bos_resturants, 'Spoke Wine Bar')
    cd.add_cell(spoke, 'Addr1', '89 Holland St')
    cd.add_cell(spoke, 'City', 'Sommerville')
    cd.add_cell(spoke, 'Addr1', '02144')
    cd.add_cell(spoke, 'Phone', '617-718-9463')

    highland = cd.add_node(bos_resturants, 'Highland Kitchen')
    cd.add_cell(highland, 'Addr1', '150 Highland Ave')
    cd.add_cell(highland, 'City', 'Sommerville')
    cd.add_cell(highland, 'ZipCode', '02144')
    cd.add_cell(highland, 'Phone', '617-625-1131')

    print(f'\nCityList Dictionary')
    cd.display_dict(cd.CityList)
    print(f'\nraw data: {cd.CityList}')

if __name__ == '__main__':
    testit()

PrettifyPage.py

from bs4 import BeautifulSoup
import requests
import pathlib


class PrettifyPage:
    def __init__(self):
        pass

    def prettify(self, soup, indent):
        pretty_soup = str()
        previous_indent = 0
        for line in soup.prettify().split("\n"):
            current_indent = str(line).find("<")
            if current_indent == -1 or current_indent > previous_indent + 2:
                current_indent = previous_indent + 1
            previous_indent = current_indent
            pretty_soup += self.write_new_line(line, current_indent, indent)
        return pretty_soup

    def write_new_line(self, line, current_indent, desired_indent):
        new_line = ""
        spaces_to_add = (current_indent * desired_indent) - current_indent
        if spaces_to_add > 0:
            for i in range(spaces_to_add):
                new_line += " "		
        new_line += str(line) + "\n"
        return new_line

if __name__ == '__main__':
    pp = PrettifyPage()
    pfilename = pp.bpath.htmlpath / 'BusinessEntityRecordsAA.html'
    with pfilename.open('rb') as fp:
        page = fp.read()
    soup = BeautifulSoup(page, 'lxml')
    pretty = pp.prettify(soup, indent=2)
    print(pretty)

RE: Selenium XPATH - jimsxxl - Apr-06-2021

Hello Larz60 !
Thank you so much for your reply.

I re-wrote almost everything.
Its working like i want it now!
Very fun to learn new things, even though it might be an easy task for some..