hello. I test a code, and it's not working:
from bs4 import BeautifulSoup import bs4 import requests import os import re def getHTMLText(url): try: headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'} r = requests.get(url, headers=headers) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return "NO" def main(): for i in range(100): url = 'https://movie.douban.com/subject/30362186/comments?start' url = url +str(20*i) html = getHTMLText(url) soup = BeautifulSoup(html, 'html.parser') for new in soup.select('.comments'): for b in new.select('a'): u1=b.text for c in new.select('.short'): u2=c.text print(u1+u2) main()
Error:Traceback (most recent call last):
File "C:\Folder2\html parser.py", line 32, in <module>
print(u1+u2)
NameError: name 'u1' is not defined