Watson Personality Insight: minimum number of words

Watson Personality Insight: minimum number of words - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: General Coding Help (https://python-forum.io/forum-8.html)
+--- Thread: Watson Personality Insight: minimum number of words (/thread-9795.html)

Watson Personality Insight: minimum number of words - kiton - Apr-28-2018

Hello dear forum members,

I currently have a task of running about 9,000 files through IBM Watson Personality Insight api. To do this, together with my colleague we created the following code (see below). I realize it's not perfect in any way, but it does the job. However, with small files of <100 words we get an error from the api (also see below). Could you please help me address this issue by improving the code, as my programming skills are insufficient :(( Basically, I'd like the code to skip any file that is <100 words and move on with the rest of the batch. Thank you in advance for your help :)

import re
import json
from os.path import join, dirname
from watson_developer_cloud import PersonalityInsightsV2
import csv
import sys
import glob


digits=(glob.glob('/Users/.../Desktop/Watson Analysis/2013/*.txt'))

personality_insights = PersonalityInsightsV2(
    username='......',
    password='......')


with open('/Users/.../Desktop/Watson Analysis/2013/watson_twitter_2013.csv', 'wt') as csvfile:
    spamwriter = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    with open(digits[0]) as \
            personality_text:
                parse=json.dumps(personality_insights.profile(
                    text=personality_text.read()), indent=2)
    newp=re.split("}|{", parse)
    v=[ x for x in newp if "id" in x and "percentage" in x ]
    d=(re.findall(r'id": (.*?),', str(v)))
    w=(re.findall(r'percentage": (.*?),', str(v)))
    len(d)
    len(w)
    print(d)
    print(w)
    spamwriter.writerow(['doc']+['word_count']+d)
    for number in digits:
            with open(str(number)) as \
                personality_text:
                    parse=json.dumps(personality_insights.profile(
                        text=personality_text.read()), indent=2)
            newp=re.split("}|{", parse)
            v=[ x for x in newp if "id" in x and "percentage" in x ]
            d=(re.findall(r'id": (.*?),', str(v)))
            w=(re.findall(r'percentage": (.*?),', str(v)))
            len(d)
            len(w)
            print(d)
            print(w)
            wordcount=(re.findall(r'word_count": (.*?),', parse))
            spamwriter.writerow([number]+wordcount+w)

Error:---------------------------------------------------------------------------
WatsonApiException                        Traceback (most recent call last)
<ipython-input-3-26e015583735> in <module>()
     33             with open(str(number)) as                 personality_text:
     34                     parse=json.dumps(personality_insights.profile(
---> 35                         text=personality_text.read()), indent=2)
     36             newp=re.split("}|{", parse)
     37             v=[ x for x in newp if "id" in x and "percentage" in x ]

/anaconda3/lib/python3.6/site-packages/watson_developer_cloud/personality_insights_v2.py in profile(self, text, content_type, accept, language, csv_headers)
     59         response = self.request(
     60             method='POST', url='/v2/profile', data=text, params=params,
---> 61             headers=headers)
     62         if accept == 'application/json':
     63             return response.json()

/anaconda3/lib/python3.6/site-packages/watson_developer_cloud/watson_service.py in request(self, method, url, accept_json, headers, params, json, data, files, **kwargs)
    446             error_info = self._get_error_info(response)
    447             raise WatsonApiException(response.status_code, error_message,
--> 448                                      info=error_info, httpResponse=response)

WatsonApiException: Error: The number of words 94 is less than the minimum number of words required for analysis: 100, Code: 400 , X-dp-watson-tran-id: gateway01-2094016729 , X-global-transaction-id: 7ecac92c5ae406a47cd028d9

We made some improvements to pass on any file that is <100 words; however, it still gives the same error.

import json
from os.path import join, dirname
from watson_developer_cloud import PersonalityInsightsV2
import csv
import re
import json
from os.path import join, dirname
from watson_developer_cloud import PersonalityInsightsV2
import csv
import sys
import glob


digits=(glob.glob('/Users/.../Desktop/Test/*.txt'))

personality_insights = PersonalityInsightsV2(
    username='...',
    password='...')


with open('/Users/.../Desktop/Test/Test.csv', 'wt') as csvfile:
    spamwriter = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    with open(digits[0]) as \
            personality_text:
                parse=json.dumps(personality_insights.profile(
                    text=personality_text.read()), indent=2)
    newp=re.split("}|{", parse)
    v=[ x for x in newp if "id" in x and "percentage" in x ]
    d=(re.findall(r'id": (.*?),', str(v)))
    w=(re.findall(r'percentage": (.*?),', str(v)))
    len(d)
    len(w)
    print(d)
    print(w)
    spamwriter.writerow(['doc']+['word_count']+d)
    for number in digits:
            with open(str(number)) as \
                personality_text:
                    f = open(str(number),"r")
                    string = f.read()
                    s=string.split(" ")
                    if len(s)<100:
                            pass
                    else:
                            parse=json.dumps(personality_insights.profile(
                                text=personality_text.read()), indent=2)
            newp=re.split("}|{", parse)
            v=[ x for x in newp if "id" in x and "percentage" in x ]
            d=(re.findall(r'id": (.*?),', str(v)))
            w=(re.findall(r'percentage": (.*?),', str(v)))
            len(d)
            len(w)
            print(d)
            print(w)
            wordcount=(re.findall(r'word_count": (.*?),', parse))
            spamwriter.writerow([number]+wordcount+w)