Python Forum
Python html herf to json dump help - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: General Coding Help (https://python-forum.io/forum-8.html)
+--- Thread: Python html herf to json dump help (/thread-24776.html)



Python html herf to json dump help - paulfearn100 - Mar-03-2020

please can you advise or help me on my code, the print out to the screen and csv works, but i would like a json outputfile
that can list all the /07-feb-2020/R1park-1400 /07-feb-2020/R2park-1530 /07-feb-2020/R3park-1600 with the beginning www.race.com/ so i can then import this in to another part of my program to run the next part of the scrape

thank you

import requests
from bs4 import BeautifulSoup
import csv
import json



#working links local
html_doc = open('D:\@users\python\raceing.html')

#example html doc
#
#href="/07-feb-2020/R1park-1400" class="winning">2:00</a></td><td
#
#href="/07-feb-2020/R2park-1530" class="winning">3:30</a></td><td
#
#href="/07-feb-2020/R3park-1600" class="winning">4:00</a></td><td
#
soup = BeautifulSoup(html_doc, 'html.parser')

link_set = set()
for link in soup.find_all('a',{'class' : 'winning'}):
    web_links = link.get("href")
    #links = [urljoin(start_url,['href'])  # convert relative url to absolute url
    print(web_links)
   # link_set.add(web_links)

csvfile = open('course.csv', 'w+', newline='')
writer = csv.writer(csvfile)
writer.writerow(['Links'])
for link in link_set:
    writer.writerow([link])
csvfile.close()


#working the json file where the output must be stored
html_doc = []
for url in html_doc:
    response = requests.get(url)
    url.append(res.text)
out_file = open("race.json", "w") 
  
json.dump(web_links,out_file, indent = 6)
out_file.close()