Python Forum

Full Version: BeautifulSoup attribute problem
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
In line 16 where it says "for tr in soup.find("tbody").children:", it keeps telling me that there is no such attribute. The code in the example video works just fine. Can someone please help?


import requests
from bs4 import BeautifulSoup
import bs4

def getHtmlText(url):
    try:
        r = requests.get(url)
        r.raise_for_status()
        r.encoding = r.appearent_encoding
        return r.text
    except:
        return ""

def fillUniversList(ulist, html):
    soup = BeautifulSoup(html, "html.parser")
    for tr in soup.find("tbody").children:
        if isinstance(tr, bs4.element.Tag):
            tds = tr("td")
            ulist.append([tds[1].children.string, tds[2].string, tds[3].string, tds[4].string])
               
def printUniversList(ulist, num):
    print("{:^10}\t{:^10}\t{:10}".format("rank", "name", "location"))
    for i in range(num):
        u = ulist[i]
        print("{:^10}\t{:^10}\t{:^10}".format(u[0], u[1], u[2]))
    print("Suc" + str(num))

def main():
    url = 'https://www.shanghairanking.cn/rankings/bcur/2020'
    uinfo = []
    html = getHtmlText(url)
    fillUniversList(uinfo, html)
    printUniversList(uinfo, 20)
main()
    
I can not read the output, but there is an output

import requests
from bs4 import BeautifulSoup
import bs4
 
def getHtmlText(url):
    try:
        r = requests.get(url)
        r.raise_for_status()
        #r.encoding = "utf-8" # r.appearent_encoding
        if r:
            return r.content
    except:
        return ""
 
def fillUniversList(ulist, html):
    soup = BeautifulSoup(html, "html.parser")
    for tr in soup.find("tbody").children:
        if isinstance(tr, bs4.element.Tag):
            tds = tr("td")
            ulist.append([tds[1], tds[2], tds[3], tds[4]])
                
def printUniversList(ulist, num):
    tab = ("\t")
    print(f"rank{tab}name{tab}location")
    for a in ulist[:num]:
        print(f"{str(a[0])}{tab}{str(a[1])}{tab}{str(a[2])}")
    print("\nSuc" + str(num))
 
def main():
    url = 'https://www.shanghairanking.cn/rankings/bcur/2020'
    uinfo = []
    html = getHtmlText(url)
    fillUniversList(uinfo, html)
    printUniversList(uinfo, 20)
    
main()
maybe this is better

import requests
from bs4 import BeautifulSoup
 
def getHtmlText(url):
    r = requests.get(url)
    r.raise_for_status()
    if r:
        return r.content
    else:
        return ""
 
def fillUniversList(ulist, html):
    soup = BeautifulSoup(html, "html.parser")
    tables = soup.findChildren('tbody')
    my_table = tables[0]
    rows = my_table.findChildren(['th', 'tr'])
    headers = ["rank", "name", "location"]
    print('\t'.join(headers).expandtabs(20))
    for row in rows[:20]:
        mlist = []
        cells = row.findChildren('td')[:3]
        for cell in cells:
            value = cell.text.strip()
            mlist.append(value)
        print('\t'.join(mlist).expandtabs(20))
 
def main():
    url = 'https://www.shanghairanking.cn/rankings/bcur/2020'
    uinfo = []
    html = getHtmlText(url)
    fillUniversList(uinfo, html)
    
main()
Output:
rank name location 1 清华大学 北京 2 北京大学 北京 3 浙江大学 浙江 4 上海交通大学 上海 5 南京大学 江苏 6 复旦大学 上海 7 中国科学技术大学 安徽 8 华中科技大学 湖北 9 武汉大学 湖北 10 中山大学 广东 11 西安交通大学 陕西 12 哈尔滨工业大学 黑龙江 13 北京航空航天大学 北京 14 北京师范大学 北京 15 同济大学 上海 16 四川大学 四川 17 东南大学 江苏 18 中国人民大学 北京 19 南开大学 天津 20 北京理工大学 北京
Thanks a lot, I must have done something wrong somewhere.