chatbot errors - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: GUI (https://python-forum.io/forum-10.html) +--- Thread: chatbot errors (/thread-17528.html) |
chatbot errors - francisco_neves2020 - Apr-14-2019 import nltk import numpy as np import random import string # to process standard python strings f=open('chatbot.txt','r',errors = 'ignore') raw=f.read() raw=raw.lower()# converts to lowercase nltk.download('punkt') # first-time use only nltk.download('wordnet') # first-time use only sent_tokens = nltk.sent_tokenize(raw)# converts to list of sentences word_tokens = nltk.word_tokenize(raw)# converts to list of words sent_tokens[:2] ['a chatbot (also known as a talkbot, chatterbot, bot, im bot, interactive agent, or artificial conversational entity) is a computer program or an artificial intelligence which conducts a conversation via auditory or textual methods.', 'such programs are often designed to convincingly simulate how a human would behave as a conversational partner, thereby passing the turing test.'] word_tokens[:2] ['a', 'chatbot', '(', 'also', 'known'] lemmer = nltk.stem.WordNetLemmatizer() #WordNet is a semantically-oriented dictionary of English included in NLTK. def LemTokens(tokens): return [lemmer.lemmatize(token) for token in tokens] remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation) def LemNormalize(text): return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict))) GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up","hey",) GREETING_RESPONSES = ["hi", "hey", "*nods*", "hi there", "hello", "I am glad! You are talking to me"] def greeting(sentence): for word in sentence.split(): if word.lower() in GREETING_INPUTS: return random.choice(GREETING_RESPONSES) from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity def response(user_response): robo_response='' sent_tokens.append(user_response) TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english') tfidf = TfidfVec.fit_transform(sent_tokens) vals = cosine_similarity(tfidf[-1], tfidf) idx=vals.argsort()[0][-2] flat = vals.flatten() flat.sort() req_tfidf = flat[-2] if(req_tfidf==0): robo_response=robo_response+"I am sorry! I don't understand you" return robo_response else: robo_response = robo_response+sent_tokens[idx] return robo_response flag=True print("ROBO: My name is Robo. I will answer your queries about Chatbots. If you want to exit, type Bye!") while(flag==True): user_response = input() user_response=user_response.lower() if(user_response!='bye'): if(user_response=='thanks' or user_response=='thank you' ): flag=False print("ROBO: You are welcome..") else: if(greeting(user_response)!=None): print("ROBO: "+greeting(user_response)) else: print("ROBO: ",end="") print(response(user_response)) sent_tokens.remove(user_response) else: flag=False print("ROBO: Bye! take care..") I found this code online to create a bot, but i don't get the error, it works for first sentence and then dies. Python 3.7.3 (v3.7.3:ef4ec6ed12, Mar 25 2019, 21:26:53) [MSC v.1916 32 bit (Intel)] on win32 Type "help", "copyright", "credits" or "license()" for more information. >>> ================= RESTART: C:\Users\Nelson\Desktop\robo.pyw ================= [nltk_data] Downloading package punkt to [nltk_data] C:\Users\Nelson\AppData\Roaming\nltk_data... [nltk_data] Package punkt is already up-to-date! [nltk_data] Downloading package wordnet to [nltk_data] C:\Users\Nelson\AppData\Roaming\nltk_data... [nltk_data] Package wordnet is already up-to-date! ROBO: My name is Robo. I will answer your queries about Chatbots. If you want to exit, type Bye! hi ROBO: hello how are you ROBO: Warning (from warnings module): File "C:\Users\Nelson\AppData\Local\Programs\Python\Python37-32\lib\site-packages\sklearn\feature_extraction\text.py", line 301 'stop_words.' % sorted(inconsistent)) UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ha', 'le', 'u', 'wa'] not in stop_words. Traceback (most recent call last): File "C:\Users\Nelson\Desktop\robo.pyw", line 70, in <module> print(response(user_response)) File "C:\Users\Nelson\Desktop\robo.pyw", line 43, in response tfidf = TfidfVec.fit_transform(sent_tokens) File "C:\Users\Nelson\AppData\Local\Programs\Python\Python37-32\lib\site-packages\sklearn\feature_extraction\text.py", line 1613, in fit_transform X = super(TfidfVectorizer, self).fit_transform(raw_documents) File "C:\Users\Nelson\AppData\Local\Programs\Python\Python37-32\lib\site-packages\sklearn\feature_extraction\text.py", line 1031, in fit_transform self.fixed_vocabulary_) File "C:\Users\Nelson\AppData\Local\Programs\Python\Python37-32\lib\site-packages\sklearn\feature_extraction\text.py", line 962, in _count_vocab raise ValueError("empty vocabulary; perhaps the documents only" ValueError: empty vocabulary; perhaps the documents only contain stop words >>> |