Jan-15-2019, 04:25 PM
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import bs4 as bs import urllib.request import re import os from colorama import Fore, Back, Style, init init() def highlight(word): if word in keywords: return Fore.RED + str (word) + Fore.RESET else : return str (word) for newurl in newurls: url = urllib.request.urlopen(newurl) soup1 = bs.BeautifulSoup(url, 'lxml' ) paragraphs = soup1.findAll( 'p' ) print (Fore.GREEN + soup1.h2.text + Fore.RESET) print ('') for paragraph in paragraphs: if paragraph ! = None : textpara = paragraph.text.strip().split( ' ' ) colored_words = list ( map (highlight, textpara)) print ( " " .join(colored_words).encode( "utf-8" )) #encode("utf-8") else : pass |
Output:b'\x1b[31mthe desired \x1b[31mmystery corners \x1b[31mthe differential .
\x1b[31mthe back \x1b[31mpretends to be \x1b[31mthe'
I removed encode("utf-8") and I get encoding errorOutput:Traceback (most recent call last):
File "C:\Users\resea\Desktop\Python Projects\Try 3.py", line 52, in
<module>
print(" ".join(colored_words)) #encode("utf-8")
File "C:\Python34\lib\site-packages\colorama\ansitowin32.py", line 41, in
write
self.__convertor.write(text)
File "C:\Python34\lib\site-packages\colorama\ansitowin32.py", line 162,
in write
self.write_and_convert(text)
File "C:\Python34\lib\site-packages\colorama\ansitowin32.py", line 190,
in write_and_convert
self.write_plain_text(text, cursor, len(text))
File "C:\Python34\lib\site-packages\colorama\ansitowin32.py", line 195, in
write_plain_text
self.wrapped.write(text[start:end])
File "C:\Python34\lib\encodings\cp850.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\u2019' in
position 23: character maps to <undefined>
Can you help where I am going wrong please? ThanksDo i need to use different encoding?