Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
help with python
#1
Hi, guys, I need help with this code please help. Right now my data is missing and This is what I am getting right now.
--------------------------------------
,Total,Change in last 24 hours
Number of recovered cases,1142
Number of recovered cases,1142,24
------------------------------------------
But want my output like this
--------------------------------------------
,Total,Change in last 24 hours
Number of confirmed cases in New Zealand,1121,4
Number of probable cases,349,5
Number of confirmed and probable cases,1470,9
Number of cases currently in hospital,7,0
Number of recovered cases,1142,24
Number of deaths,18,0
---------------------------------------------------
This is my python code

import requests
    from bs4 import BeautifulSoup
    import csv
    
    result = requests.get("https://www.health.govt.nz/our-work/diseases-and-conditions/covid-19-novel-coronavirus/covid-19-current-situation/covid-19-current-cases")
    src = result.content
    soup = BeautifulSoup(src,'lxml')
    
    
    cov19_table = soup.find("table", attrs={"class": "table-style-two"})
    cov19_table_head = cov19_table.find_all("thead")
    cov19_table_data = cov19_table.tbody.find_all("tr")
    
    head = []
    for th in cov19_table_head[0].find_all("th"):
        head.append(th.text.replace('\n', '').strip())
    
    num = 0
    
    while num <= 5:
        with open('nt.csv', 'w', newline="") as new_file:
            data = {}
            headings = []
            csv_writer = csv.writer(new_file, delimiter=',')
            csv_writer.writerow(head)
            for th in cov19_table_data[num].find_all("th"):
                headings.append(th.text.replace('\n', '').strip())
                t_data = []
                for td in cov19_table_data[num].find_all("td"):
                    value = td.text.strip().replace(',', '')
                    t_data.append(int(value))
                    data = headings + t_data
                    print(num)
                    num = num+1
                    #csv_writer = csv.writer(new_file, delimiter=',')
                    #csv_writer.writerow(head)
                    csv_writer.writerow(data)
                    print(data)
Reply
#2
This is just the from end of your code, modified to get all values:
it created a dictionary with all values, and a separate headers list
import requests
from bs4 import BeautifulSoup
from PrettifyPage import PrettifyPage
import zipfile
import csv


pp = PrettifyPage()
result = requests.get("https://www.health.govt.nz/our-work/diseases-and-conditions/covid-19-novel-coronavirus/covid-19-current-situation/covid-19-current-cases")
src = result.content
soup = BeautifulSoup(src,'lxml')
cov19_table = soup.find("table", attrs={"class": "table-style-two"})

# print(f"{pp.prettify(cov19_table, 2)}")
headers = []
keys = []
values = []
trs = cov19_table.find_all('tr')
for n, tr in enumerate(trs):
    ths = tr.find_all('th')
    for n1, th in enumerate(ths):
        # print(f"\n-------------- tr{n} th{n1} --------------")
        # print(f"{pp.prettify(th, 2)}")
        if n == 0:
            headers.append(th.text.strip())
        else:
            keys.append(th.text.strip())

# print(f"\n============================================================\n")

    tds = tr.find_all('td')
    tvalues = []
    for n1, td in enumerate(tds):
        # print(f"\n-------------- tr{n} td{n1} --------------")
        # print(f"{pp.prettify(td, 2)}")
        if n1 < 2:
            tvalues.append(td.text.strip())
        if n1 == 1:
            values.append(tvalues)

stat_dict = dict(zip(keys, values))
headers[0] = 'Title'
print(f"{headers[0]:42}{headers[1]:>10}{headers[2]:>25}")
print()
for key, value in stat_dict.items():
    value[0] = value[0].replace(',','')
    value[1] = value[1].replace(',','')
    print(f"{key:42}{int(value[0]):10}{int(value[1]):25}")
output:
Output:
Title Total Change in last 24 hours Number of confirmed cases in New Zealand 1121 4 Number of probable cases 349 5 Number of confirmed and probable cases 1470 9 Number of cases currently in hospital 7 0 Number of recovered cases 1142 24 Number of deaths 18 0
Reply
#3
I tried running this but it doesn't work. This is the error message I am getting.
---------------------------------------
Traceback (most recent call last):
File "cov19.py", line 3, in <module>
from PrettifyPage import PrettifyPage
ModuleNotFoundError: No module named 'PrettifyPage'
Reply
#4
Sorry, forgot that module (save in same directory as main script):
from bs4 import BeautifulSoup
import requests
import pathlib


class PrettifyPage:
    def __init__(self):
        pass

    def prettify(self, soup, indent):
        pretty_soup = str()
        previous_indent = 0
        for line in soup.prettify().split("\n"):
            current_indent = str(line).find("<")
            if current_indent == -1 or current_indent > previous_indent + 2:
                current_indent = previous_indent + 1
            previous_indent = current_indent
            pretty_soup += self.write_new_line(line, current_indent, indent)
        return pretty_soup

    def write_new_line(self, line, current_indent, desired_indent):
        new_line = ""
        spaces_to_add = (current_indent * desired_indent) - current_indent
        if spaces_to_add > 0:
            for i in range(spaces_to_add):
                new_line += " "		
        new_line += str(line) + "\n"
        return new_line

if __name__ == '__main__':
    pp = PrettifyPage()
    pfilename = pp.bpath.htmlpath / 'BusinessEntityRecordsAA.html'
    with pfilename.open('rb') as fp:
        page = fp.read()
    soup = BeautifulSoup(page, 'lxml')
    pretty = pp.prettify(soup, indent=2)
    print(pretty)
Reply
#5
To much work here is the easy way with the power of Pandas Wink
>>> import pandas as pd

>>> df = pd.read_html('url')[0]
>>> df.rename(columns={'Unnamed: 0': 'Tile'}, inplace=True)
>>> df
Output:
Tile Total Change in last 24 hours 0 Number of confirmed cases in New Zealand 1122 1 1 Number of probable cases 347 -2 2 Number of confirmed and probable cases 1469 -1 3 Number of cases currently in hospital 7 0 4 Number of recovered cases 1180 38 5 Number of deaths (as at 1 pm 27 April) 19 1
Here how it look in a Notebook and it find all tables,so here make a Graph with table 6 and i use Altair.
Reply


Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020