Python Forum

Full Version: Horse racing stats output to csv
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
i would like the code to loop over all profile links and show the table text but will only parse one page and not loop over all profiles, please see below code, please can you advise

import requests
from bs4 import BeautifulSoup
import csv

page = 'https://gg.co.uk/tips/today'
tree = requests.get(page, headers = headers)
soup = BeautifulSoup(tree.content, 'html.parser')

courseLinks = []
links = soup.select("a.winning-post")

for i in range(0,1):   
    courseLinks.append(links[i].get("href"))
    #time.sleep(17.0)
    #For each location that we have taken, add the website before it - this allows us to call it later
for i in range(len(courseLinks)):
    courseLinks[i] = "https://gg.co.uk"+courseLinks[i]  
#output
['https://gg.co.uk/racing/15-jul-2020/great-yarmouth-1200']
['https://gg.co.uk/racing/15-jul-2020/great-yarmouth-1240']

profileLinks = []

#Run the scraper through each of our links
for i in range(len(courseLinks)):

    page = courseLinks[i]
    tree = requests.get(page, headers = headers)
    soup = BeautifulSoup(tree.content, 'html.parser')

    #Extract all links
    links = soup.select("a.horse")
    
    #For each link, extract the location that it is pointing to
    for j in range(len(links)):
        profileLinks.append("https://gg.co.uk" + links[j].get("href"))
        #print('Downloading page %s...' % url)
        #time.sleep(25.0)
    

    #The page list the profile 
    profileLinks = list(set(profileLinks))
    #time.sleep(14.0)
#output
['https://gg.co.uk/racing/form-profile-2723245',
'https://gg.co.uk/racing/form-profile-2713135',
'https://gg.co.uk/racing/form-profile-2672365',
'https://gg.co.uk/racing/form-profile-2652145',
'https://gg.co.uk/racing/form-profile-2723235']

the code works up until here - it will not process only one of the below links and output
for i in range(len(profileLinks)):

    page = profileLinks[i]
    tree = requests.get(page, headers = headers)
    soup = BeautifulSoup(tree.content, 'html.parser') 

#find tables data
tableData = soup.find_all('table', id='results-profile' )
last_links = soup.find(class_='border-bottom alt')
last_links.decompose()

for tables in tableData:
    for cell in tables.find_all('td'):
        print (cell.text)
#code output
10th3 10
15th Jul 2020 Good to Firm 1m 2f 23y Class 5
12:00 Great Yarmouth Canberra 94 Andrea Atzeni P W Chapple-Hyam
25/1101

#the above code will only parse one of profilelinks https://gg.co.uk/racing/form-profile-2723235, i would like it to loop over all links and output like #code output, then put into a csv file column by column