i'm tryng to scrape a website..
i can see inspecting website from chrome this divs etc while from python script no!
are them on iframes ?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.support import expected_conditions as EC import time import json # Configura le opzioni del browser chrome_options = Options() chrome_user_data_dir = "C:\\Users\\carto\\AppData\\Local\\Google\\Chrome\\User Data\\" chrome_options.add_argument( f "user-data-dir={chrome_user_data_dir}" ) chrome_options.add_argument( "--remote-debugging-port=9222" ) chrome_options.add_argument( "--no-sandbox" ) chrome_options.add_argument( "--disable-dev-shm-usage" ) # Specifica il percorso del driver service = Service( 'C:\\chromedriver\\chromedriver.exe' ) # Avvia il browser driver = webdriver.Chrome(service = service, options = chrome_options) # Imposta gli headers custom_headers = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36" , "Accept-Language" : "it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7" } # Esegui il comando per impostare gli headers driver.execute_cdp_cmd( "Network.setUserAgentOverride" , { "userAgent" : custom_headers[ "User-Agent" ]}) driver.execute_cdp_cmd( "Network.setExtraHTTPHeaders" , { "headers" : custom_headers}) # Vai alla pagina iniziale # Attendi il caricamento della pagina time.sleep( 9 ) # Naviga alla pagina dei campionati # Attendi il caricamento della pagina time.sleep( 9 ) # Prova a cliccare sul pulsante "Tiri🎯" try : tiri_button = WebDriverWait(driver, 10 ).until( EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Tiri🎯')]" )) ) tiri_button.click() except Exception as e: print ( "Errore nel cliccare il pulsante:" , e) # Stampa l'HTML della pagina per il debug print (driver.page_source) # Attendi il caricamento dei dati time.sleep( 3 ) # Raccogli i dati event_boxes = driver.find_elements(By.CLASS_NAME, "EventBoxstyled__EventBoxWrapper-sc-ksk2ut-1" ) matches_data = [] for event in event_boxes: try : match_date = event.find_element(By.CLASS_NAME, "EventBoxstyled__DateTime-sc-ksk2ut-8" ).text team1 = event.find_elements(By.CLASS_NAME, "EventBoxCompetitorsstyled__CompetitorNameBase-sc-wpbfyn-0" )[ 0 ].text team2 = event.find_elements(By.CLASS_NAME, "EventBoxCompetitorsstyled__CompetitorNameBase-sc-wpbfyn-0" )[ 1 ].text odds = event.find_elements(By.CLASS_NAME, "OddBoxVariant0styled__OddValue-sc-1ypym0p-6" ) odds_data = { "1" : odds[ 0 ].text, "X" : odds[ 1 ].text, "2" : odds[ 2 ].text } matches_data.append({ "date" : match_date, "team1" : team1, "team2" : team2, "odds" : odds_data }) except Exception as e: print ( f "Errore durante l'estrazione dei dati per un evento: {e}" ) # Scrivi i dati in un file JSON with open ( 'matches_data.json' , 'w' ) as json_file: json.dump(matches_data, json_file, indent = 4 ) # Chiudi il driver driver.quit() |
are them on iframes ?