Here is the code:
Thanks that solved the problem,this issue wasted few days.
# --- pythonTwitterTest.py --- The main client module which is using all other code import tweepy import json from TwitterPoint.TwitterEngine import * from dbLayer.dbHelper import rHelper from process.processData import * processAndCleanData('trumpFollowers') print('breakpoint here')
# --- processData.py --- from geotext import GeoText # for classifying and seperating City , Country and States/Provinces #import pycountry # for en to English import simplejson #import babelfish from dbLayer.dbHelper import rHelper from langcodes import Language def getLocations(array): return [] def processAndCleanData(tableName): rdb = rHelper('tw') #gets data from rethinkdb (in json format) rawCursor = rdb.getRawDumpCursor(tableName) cleanData = [] langPyCountry = '' for raw in rawCursor: langPyCountry = Language.get(raw['lang']).language_name() places = GeoText(x.location) cleanData.append({"id": raw['id_str'], "name": raw['name'], "location": raw['location'], "locationExtract" : places.[color=#000000]cities,[/color] "language": raw['lang'], "LangExtract": langPyCountry}) dumpToFile(cleanData) def dumpToFile(array): f = open('dump.txt', 'w') simplejson.dump(array, f) f.close()
Quote:Traceback (most recent call last):
File "pythonTwitterTest.py", line 5, in <module>
from process.processData import *
File "C:\OwaisWorkx\Courses\5th Semester\Project\pythonTwitterTest\pythonTwitterTest\process\processData.py", line 1, in <module>
from geotext import GeoText # for classifying and seperating City , Country and States/Provinces
File "c:\Python33\lib\site-packages\geotext\__init__.py", line 7, in <module>
from .geotext import GeoText
File "c:\Python33\lib\site-packages\geotext\geotext.py", line 87, in <module>
class GeoText(object):
File "c:\Python33\lib\site-packages\geotext\geotext.py", line 103, in GeoText
index = build_index()
File "c:\Python33\lib\site-packages\geotext\geotext.py", line 77, in build_index
cities = read_table(get_data_path('cities15000.txt'), usecols=[1, 8])
File "c:\Python33\lib\site-packages\geotext\geotext.py", line 54, in read_table
for line in lines:
File "c:\Python33\lib\site-packages\geotext\geotext.py", line 51, in <genexpr>
lines = (line for line in f if not line.startswith(comment))
File "c:\Python33\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 165: character maps to <undefined>
(Mar-21-2017, 10:18 PM)snippsat Wrote: Testing and fixing his code.
Fix:
Line 45geotext.py
:
with open(filename, 'r') as f:To:
with open(filename, 'r', encoding='utf-8') as f:Test.
(geo_test) C:\Python36\geo_test λ python Python 3.6.0 (v3.6.0:41df79263a11, Dec 23 2016, 07:18:10) [MSC v.1900 32 bit (Intel)] on win32 Type "help", "copyright", "credits" or "license" for more information. >>> from geotext import GeoText >>> places = GeoText("London is a great city") >>> places.cities ['London'] >>> GeoText('New York, Texas, and also China').country_mentions OrderedDict([('US', 2), ('CN', 1)]) >>> places = GeoText("Oslo is a great city") >>> places.cities ['Oslo']Edit:
There where 1-bug rapport about this.
So i have given this info to author of geotext on GitHub.
Thanks that solved the problem,this issue wasted few days.