from bs4 import BeautifulSoup from bs4.dammit import EncodingDetector import requests parser = 'html.parser' # or 'lxml' (preferred) or 'html5lib', if installed resp = requests.get("https://www.sbostats.com/soccer/league/italy/serie-a") http_encoding = resp.encoding if 'charset' in resp.headers.get('content-type', '').lower() else None html_encoding = EncodingDetector.find_declared_encoding(resp.content, is_html=True) encoding = html_encoding or http_encoding soup = BeautifulSoup(resp.content, parser, from_encoding=encoding) table = soup.find_all('table',attrs={'class':'updated_next_results_table'}) table = table[0] tr = table.find_all('tr') for row in tr: if row.text == None: pass if row.find('a') == None: pass else: #print(row.text) #print(' '.join(row.text.replace('STATS', '-').split())) #print(f"{row.find('a')['href']}\n") y= f"{row.find('a')['href']}\n" x= ' '.join(row.text.replace('STATS', '-').split()) q= ''.join([i for i in x if not i.isdigit()]) c = ('*https://www.sbostats.com' + y) z = c.replace('"e', ""e") #print(x + z) f = open("matches.txt", "a") #f.write([x] +[y]) f.write(str(q) + ' ' + str(z)) f.close()i edited the link for my needs i have only to understand how to remove all numbers of odds
tryed this q= ''.join([i for i in x if not i.isdigit()]) but in output i find . points that remains from decimals
so i added q1= ' '.join(q.replace('.', '').split())
does the work but i think is a very dirty solution.. i think that is the worst solution :)