Posts: 7,317
Threads: 123
Joined: Sep 2016
Dec-08-2023, 09:25 PM
(This post was last modified: Dec-08-2023, 09:26 PM by snippsat.)
Copy Selector from browser in Inspect.
img = browser.find_element(By.CSS_SELECTOR, '#__layout > div > div.wrapper__inner > div.main > div.container > div > div > main > div:nth-child(3) > div.ml__wrap > div:nth-child(6) > div > a > div.match-teams > div:nth-child(1) > img')
print(img.get_attribute('src')) Output: https://cdn.oddspedia.com/images/teams/medium/1/3190.png
Posts: 14
Threads: 1
Joined: Dec 2023
Dec-09-2023, 02:59 PM
(This post was last modified: Dec-09-2023, 02:59 PM by nicoali.)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By # Add this import statement
import chromedriver_autoinstaller
import time
# Install ChromeDriver automatically
chromedriver_autoinstaller.install()
# Setup Chrome options
options = Options()
# Uncomment the next line if you want to run Chrome in headless mode
# options.add_argument("--headless")
# Create a Chrome WebDriver instance without specifying the driver executable path
browser = webdriver.Chrome(options=options)
# Navigate to the URL
url = 'https://oddspedia.com/football'
browser.get(url)
# Use sleep
time.sleep(3)
game = browser.find_element(By.CSS_SELECTOR, 'main > div:nth-child(3) > div.ml__wrap > div:nth-child(16) > div')
print(game.text)
img = browser.find_element(By.CSS_SELECTOR, 'main > div:nth-child(3) > div.ml__wrap > div:nth-child(16) > div > a > div.match-teams > div:nth-child(1) > img')
print(img.get_attribute('src'))
img = browser.find_element(By.CSS_SELECTOR, '#__layout > div > div.wrapper__inner > div.main > div.container > div > div > main > div:nth-child(3) > div.ml__wrap > div:nth-child(6) > div > a > div.match-teams > div:nth-child(1) > img')
print(img.get_attribute('src'))
# Close the browser
browser.quit() I didn't understood why starts from that match and not from the first of the list of website
I tryed to iterate with no success, can see names and time but only the first images
indices_to_check = range(1, 51)
# Loop through each index and print information
for index in indices_to_check:
try:
# Construct the CSS selector for the current index
css_selector = f'main > div:nth-child(3) > div.ml__wrap > div:nth-child({index}) > div'
# Find the element with the current CSS selector
game = browser.find_element(By.CSS_SELECTOR, css_selector)
print(game.text)
# Find the image element within the current element
img_css_selector = f'{css_selector} > a > div.match-teams > div:nth-child(1) > img'
img = browser.find_element(By.CSS_SELECTOR, img_css_selector)
print(img.get_attribute('src'))
except Exception as e:
print(f"Error processing index {index}: {e}")
# Close the browser
browser.quit() to obtain all the list of matches live and red cards?
<div class="event-red-card-indicator"><span class="event-red-card-indicator__icon">
</span></div> the strange is that ispecting seems empty the div! but the red card appears on website
Posts: 7,317
Threads: 123
Joined: Sep 2016
(Dec-09-2023, 02:59 PM)nicoali Wrote: the strange is that ispecting seems empty the div! but the red card appears on website It's load in CSS,but do need this just HTML tag staus.
.event-red-card-indicator__icon {
background: url(https://cdn.oddspedia.com/images/static/icons/red-card.svg); I would load source to BS can be eaiser to use when eg find many tags.
soup = BeautifulSoup(browser.page_source, 'lxml')
in_play = soup.find_all('div', class_="match-list-item match-list-item--inplay")
Posts: 14
Threads: 1
Joined: Dec 2023
Dec-10-2023, 10:00 AM
(This post was last modified: Dec-10-2023, 10:00 AM by nicoali.)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import chromedriver_autoinstaller
import time
import json
# Install ChromeDriver automaticamente
chromedriver_autoinstaller.install()
# Setup delle opzioni di Chrome
options = Options()
# Decommentare la riga successiva se si vuole eseguire Chrome in modalità headless
# options.add_argument("--headless")
# Creare un'istanza di Chrome WebDriver senza specificare il percorso dell'eseguibile del driver
browser = webdriver.Chrome(options=options)
# Navigare all'URL
url = 'https://oddspedia.com/football'
browser.get(url)
# Utilizzare sleep
time.sleep(3)
# Utilizzare BeautifulSoup per analizzare la sorgente della pagina
soup = BeautifulSoup(browser.page_source, 'lxml')
events = soup.find_all('script', type='application/ld+json')
# Processare i dati come necessario
for event in events:
event_data = json.loads(event.string)
# Estrai informazioni rilevanti
event_name = event_data.get('name', '')
event_url = event_data.get('url', '')
event_image = event_data.get('image', [])
start_date = event_data.get('startDate', '')
end_date = event_data.get('endDate', '')
# Verifica se 'hometeam' è presente e non è una lista vuota
hometeam_list = event_data.get('hometeam', [])
if hometeam_list:
home_team = hometeam_list[0].get('name', '')
home_team_image = hometeam_list[0].get('image', '')
else:
home_team = home_team_image = ''
# Verifica se 'awayteam' è presente e non è una lista vuota
awayteam_list = event_data.get('awayteam', [])
if awayteam_list:
away_team = awayteam_list[0].get('name', '')
away_team_image = awayteam_list[0].get('image', '')
else:
away_team = away_team_image = ''
# Stampare o elaborare le informazioni estratte come necessario
print(f"Event Name: {event_name}")
print(f"Event URL: {event_url}")
print(f"Event Image: {event_image}")
print(f"Start Date: {start_date}")
print(f"End Date: {end_date}")
print(f"Home Team: {home_team}")
print(f"Home Team Image: {home_team_image}")
print(f"Away Team: {away_team}")
print(f"Away Team Image: {away_team_image}")
print("\n")
# Chiudere il browser
browser.quit() tryed like this!
Posts: 7,317
Threads: 123
Joined: Sep 2016
Dec-10-2023, 11:27 AM
(This post was last modified: Dec-10-2023, 11:27 AM by snippsat.)
(Dec-10-2023, 10:00 AM)nicoali Wrote: tryed like this! That 's fine and getting the Json response can make it eaiser.
Also if use a Sports betting API then only get Json back.
Also this will find nothing as now are no games are in play.
soup = BeautifulSoup(browser.page_source, 'lxml')
in_play = soup.find_all('div', class_="match-list-item match-list-item--inplay") Now all games are in no score mode.
no_score = soup.find_all('div', class_="match-list-item match-list-item--no-score") >>> print(no_score[0].text)
17:00
Valerenga
Kristiansund BK
(2 - 0)
>>> print(no_score[1].text)
15:00
Everton
Chelsea
Posts: 14
Threads: 1
Joined: Dec 2023
(Dec-10-2023, 11:27 AM)snippsat Wrote: That 's fine and getting the Json response can make it eaiser.
Also this will find nothing as now are no games are in play.
soup = BeautifulSoup(browser.page_source, 'lxml')
in_play = soup.find_all('div', class_="match-list-item match-list-item--inplay") Now all games are in no score mode.
can i mix the json data that i read and need with this that you suggest to obtain only live matches and redcards?
Posts: 7,317
Threads: 123
Joined: Sep 2016
(Dec-10-2023, 11:40 AM)nicoali Wrote: can i mix the json data that i read and need with this that you suggest to obtain only live matches and redcards? Maybe with some work,is not like the Json data you get back is well organize like eg get from an API,just a bunch of scripts that loop over.
Posts: 14
Threads: 1
Joined: Dec 2023
so it's not possible to select all the live matches as does The live button in football section up on the website ?
Posts: 14
Threads: 1
Joined: Dec 2023
Dec-11-2023, 04:44 PM
(This post was last modified: Dec-11-2023, 04:44 PM by nicoali.)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
# Setup
options = Options()
# options.add_argument("--headless=new")
ser = Service(r"C:\Users\carto\Desktop\chromedriver-win64\chromedriver.exe")
browser = webdriver.Chrome(service=ser, options=options)
# Parse or automation
url = 'https://oddspedia.com/football'
browser.get(url)
# Clicca sul pulsante "Live"
live_button_xpath = '//button[contains(@class, "btn-filter") and contains(text(), "Live")]'
live_button = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, live_button_xpath)))
live_button.click()
# Attendi che la pagina sia completamente caricata
WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'main > div:nth-child(3) > div.ml__wrap > div:nth-child(16) > div')))
time.sleep(6)
# Ottieni il codice sorgente della pagina dopo il clic sul pulsante "Live"
page_source = browser.page_source
soup = BeautifulSoup(page_source, 'html.parser')
# Trova tutti gli elementi "a" con la classe "match-url--flex"
in_play_divs = soup.find_all('a', class_="match-url match-url--flex")
# Itera su ciascun elemento "in_play_div" e fai quello che serve
for div in in_play_divs:
# Esegui il tuo scraping specifico per ciascun elemento
# Ad esempio, stampa il testo e l'immagine all'interno di ciascun div
print("Title:", div['title'])
# Verifica se l'elemento è presente prima di cercare ulteriormente
match_status_element = div.find('div', class_='match-status--inplay')
if match_status_element:
print("Status:", match_status_element.get_text(strip=True))
else:
print("Status not found")
home_team_element = div.find('div', class_='match-teams').find('div', class_='match-team--home')
if home_team_element:
home_team_score_element = home_team_element.find('span', class_='match-score-result__score')
if home_team_score_element:
home_team_score = home_team_score_element.get_text(strip=True)
print("Home Team Score:", home_team_score)
else:
print("Home Team Score not found")
home_team_logo_element = home_team_element.find('img')
if home_team_logo_element:
home_team_logo = home_team_logo_element['src']
print("Home Team Logo:", home_team_logo)
else:
print("Home Team Logo not found")
else:
print("Home Team not found")
away_team_element = div.find('div', class_='match-teams').find('div', class_='match-team--away')
if away_team_element:
away_team_score_element = away_team_element.find('span', class_='match-score-result__score')
if away_team_score_element:
away_team_score = away_team_score_element.get_text(strip=True)
print("Away Team Score:", away_team_score)
else:
print("Away Team Score not found")
away_team_logo_element = away_team_element.find('img')
if away_team_logo_element:
away_team_logo = away_team_logo_element['src']
print("Away Team Logo:", away_team_logo)
else:
print("Away Team Logo not found")
else:
print("Away Team not found")
# Chiudi il browser alla fine
browser.quit() with this i can click on button live but i still cannot scrape all matches logo result and links of matches!
Posts: 7,317
Threads: 123
Joined: Sep 2016
Dec-11-2023, 10:57 PM
(This post was last modified: Dec-11-2023, 10:57 PM by snippsat.)
(Dec-11-2023, 04:44 PM)nicoali Wrote: with this i can click on button live but i still cannot scrape all matches logo result and links of matches! There two stats at least after click on live button,here i do a quick test.
live_bt = browser.find_element(By.CSS_SELECTOR, 'div.match-list-sub-nav > div.match-list-filter-buttons > button:nth-child(1)')
live_bt.click()
time.sleep(3)
soup = BeautifulSoup(browser.page_source, 'lxml')
in_play = soup.find_all('div', class_="match-list-item match-list-item--inplay")
in_play_noscore = soup.find_all('div', class_="match-list-item match-list-item--inplay match-list-item--no-score") So this is correct at time i test now late,only 3 live matches with no score
>>> len(in_play_noscore)
3
>>> len(in_play)
0
>>> print(in_play_noscore[1].text)
Inplay
Molynes United FC
Harbour View FC
>>> print(in_play_noscore[2].text)
Inplay
AS Douanes Nouakchott
Inter Nouakchott
|