Sep-24-2018, 10:21 PM
html_doc = """ <html><head><title>The Dormouse's story</title></head> <body> <p class="title"><b>The Dormouse's story</b></p> <p class="story">Once upon a time there were three little sisters; and their names were <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>, <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>; and they lived at the bottom of a well.</p> <p class="story">...</p> """ from bs4 import BeautifulSoup from bs4 import SoupStrainer def is_short_string(string): return len(string) < 10 only_short_strings = SoupStrainer(string=is_short_string) print(BeautifulSoup(html_doc, "html.parser", parse_only=only_short_strings).prettify())
Error:Traceback (most recent call last):
File "C:\Python36\kodovi\sstrainer.py", line 19, in <module>
print(BeautifulSoup(html_doc, "html.parser", parse_only=only_short_strings).
prettify())
File "C:\Python36\lib\site-packages\bs4\__init__.py", line 228, in __init__
self._feed()
File "C:\Python36\lib\site-packages\bs4\__init__.py", line 289, in _feed
self.builder.feed(self.markup)
File "C:\Python36\lib\site-packages\bs4\builder\_htmlparser.py", line 215, in
feed
parser.feed(markup)
File "C:\Python36\lib\html\parser.py", line 111, in feed
self.goahead(0)
File "C:\Python36\lib\html\parser.py", line 171, in goahead
k = self.parse_starttag(i)
File "C:\Python36\lib\html\parser.py", line 345, in parse_starttag
self.handle_starttag(tag, attrs)
File "C:\Python36\lib\site-packages\bs4\builder\_htmlparser.py", line 90, in h
andle_starttag
tag = self.soup.handle_starttag(name, None, None, attr_dict)
File "C:\Python36\lib\site-packages\bs4\__init__.py", line 461, in handle_star
ttag
or not self.parse_only.search_tag(name, attrs))):
File "C:\Python36\lib\site-packages\bs4\element.py", line 1676, in search_tag
if not self._matches(attr_value, match_against):
File "C:\Python36\lib\site-packages\bs4\element.py", line 1736, in _matches
return match_against(markup)
File "C:\Python36\kodovi\sstrainer.py", line 17, in is_short_string
return len(string) < 10
TypeError: object of type 'NoneType' has no len()
I assume that the problem here is that computer doesn't know what argument string
is but not sure how to solve this problem.