Oct-06-2023, 08:32 AM
from bs4 import BeautifulSoup from bs4.dammit import EncodingDetector import requests parser = 'html.parser' # or 'lxml' (preferred) or 'html5lib', if installed resp = requests.get("https://www.sbostats.com/soccer/league/italy/serie-a") http_encoding = resp.encoding if 'charset' in resp.headers.get('content-type', '').lower() else None html_encoding = EncodingDetector.find_declared_encoding(resp.content, is_html=True) encoding = html_encoding or http_encoding soup = BeautifulSoup(resp.content, parser, from_encoding=encoding) #print (soup) ##for link in soup.select('a[href^="/soccer/stats?"]'): ## #print ('https://www.sbostats.com/soccer/stats?country=italy&league=serie-a"e=1.44&direction=away&id=Mzk5OTk5MQ==') ## href1 = ['href'] ## # a"e ## c = ('https://www.sbostats.com'+link['href']) ## x = c.replace('"e', ""e") ## print (x) data = [] table = soup.find_all('table',attrs={'class':'updated_next_results_table'}) #, print (table) rows = soup.find_all('tr') for row in rows: cols = row.find_all('td') #, attrs={'class':'widget-results__team-name match-name'} cols = [ele.text.strip() for ele in cols] data.append([ele for ele in cols if ele]) # Get rid of empty values print (data)
i am able to take links and datas but my expected result is
from this link https://www.sbostats.com/soccer/league/italy/serie-a
for each match have values of names of teams from the table and the relative link.