Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
BeautifulSoup attribute problem
#1
In line 16 where it says "for tr in soup.find("tbody").children:", it keeps telling me that there is no such attribute. The code in the example video works just fine. Can someone please help?


import requests
from bs4 import BeautifulSoup
import bs4

def getHtmlText(url):
    try:
        r = requests.get(url)
        r.raise_for_status()
        r.encoding = r.appearent_encoding
        return r.text
    except:
        return ""

def fillUniversList(ulist, html):
    soup = BeautifulSoup(html, "html.parser")
    for tr in soup.find("tbody").children:
        if isinstance(tr, bs4.element.Tag):
            tds = tr("td")
            ulist.append([tds[1].children.string, tds[2].string, tds[3].string, tds[4].string])
               
def printUniversList(ulist, num):
    print("{:^10}\t{:^10}\t{:10}".format("rank", "name", "location"))
    for i in range(num):
        u = ulist[i]
        print("{:^10}\t{:^10}\t{:^10}".format(u[0], u[1], u[2]))
    print("Suc" + str(num))

def main():
    url = 'https://www.shanghairanking.cn/rankings/bcur/2020'
    uinfo = []
    html = getHtmlText(url)
    fillUniversList(uinfo, html)
    printUniversList(uinfo, 20)
main()
    
Reply
#2
I can not read the output, but there is an output

import requests
from bs4 import BeautifulSoup
import bs4
 
def getHtmlText(url):
    try:
        r = requests.get(url)
        r.raise_for_status()
        #r.encoding = "utf-8" # r.appearent_encoding
        if r:
            return r.content
    except:
        return ""
 
def fillUniversList(ulist, html):
    soup = BeautifulSoup(html, "html.parser")
    for tr in soup.find("tbody").children:
        if isinstance(tr, bs4.element.Tag):
            tds = tr("td")
            ulist.append([tds[1], tds[2], tds[3], tds[4]])
                
def printUniversList(ulist, num):
    tab = ("\t")
    print(f"rank{tab}name{tab}location")
    for a in ulist[:num]:
        print(f"{str(a[0])}{tab}{str(a[1])}{tab}{str(a[2])}")
    print("\nSuc" + str(num))
 
def main():
    url = 'https://www.shanghairanking.cn/rankings/bcur/2020'
    uinfo = []
    html = getHtmlText(url)
    fillUniversList(uinfo, html)
    printUniversList(uinfo, 20)
    
main()
Reply
#3
maybe this is better

import requests
from bs4 import BeautifulSoup
 
def getHtmlText(url):
    r = requests.get(url)
    r.raise_for_status()
    if r:
        return r.content
    else:
        return ""
 
def fillUniversList(ulist, html):
    soup = BeautifulSoup(html, "html.parser")
    tables = soup.findChildren('tbody')
    my_table = tables[0]
    rows = my_table.findChildren(['th', 'tr'])
    headers = ["rank", "name", "location"]
    print('\t'.join(headers).expandtabs(20))
    for row in rows[:20]:
        mlist = []
        cells = row.findChildren('td')[:3]
        for cell in cells:
            value = cell.text.strip()
            mlist.append(value)
        print('\t'.join(mlist).expandtabs(20))
 
def main():
    url = 'https://www.shanghairanking.cn/rankings/bcur/2020'
    uinfo = []
    html = getHtmlText(url)
    fillUniversList(uinfo, html)
    
main()
Output:
rank name location 1 清华大学 北京 2 北京大学 北京 3 浙江大学 浙江 4 上海交通大学 上海 5 南京大学 江苏 6 复旦大学 上海 7 中国科学技术大学 安徽 8 华中科技大学 湖北 9 武汉大学 湖北 10 中山大学 广东 11 西安交通大学 陕西 12 哈尔滨工业大学 黑龙江 13 北京航空航天大学 北京 14 北京师范大学 北京 15 同济大学 上海 16 四川大学 四川 17 东南大学 江苏 18 中国人民大学 北京 19 南开大学 天津 20 北京理工大学 北京
zzy likes this post
Reply
#4
Thanks a lot, I must have done something wrong somewhere.
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Trying to extract style attribute with BeautifulSoup knight2000 1 3,086 Dec-28-2022, 03:06 AM
Last Post: knight2000
  Python 3.9 : BeautifulSoup: 'NoneType' object has no attribute 'text' fudgemasterultra 1 8,910 Mar-03-2021, 09:40 AM
Last Post: Larz60+
  form.populate_obj problem "object has no attribute translate" pascale 0 3,653 Jun-12-2019, 07:30 PM
Last Post: pascale
  BeautifulSoup 'NoneType' object has no attribute 'text' bmccollum 9 14,640 Sep-14-2018, 12:56 PM
Last Post: bmccollum

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020