Apr-14-2019, 11:46 PM
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import nltk import numpy as np import random import string # to process standard python strings f = open ( 'chatbot.txt' , 'r' ,errors = 'ignore' ) raw = f.read() raw = raw.lower() # converts to lowercase nltk.download( 'punkt' ) # first-time use only nltk.download( 'wordnet' ) # first-time use only sent_tokens = nltk.sent_tokenize(raw) # converts to list of sentences word_tokens = nltk.word_tokenize(raw) # converts to list of words sent_tokens[: 2 ] [ 'a chatbot (also known as a talkbot, chatterbot, bot, im bot, interactive agent, or artificial conversational entity) is a computer program or an artificial intelligence which conducts a conversation via auditory or textual methods.' , 'such programs are often designed to convincingly simulate how a human would behave as a conversational partner, thereby passing the turing test.' ] word_tokens[: 2 ] [ 'a' , 'chatbot' , '(' , 'also' , 'known' ] lemmer = nltk.stem.WordNetLemmatizer() #WordNet is a semantically-oriented dictionary of English included in NLTK. def LemTokens(tokens): return [lemmer.lemmatize(token) for token in tokens] remove_punct_dict = dict (( ord (punct), None ) for punct in string.punctuation) def LemNormalize(text): return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict))) GREETING_INPUTS = ( "hello" , "hi" , "greetings" , "sup" , "what's up" , "hey" ,) GREETING_RESPONSES = [ "hi" , "hey" , "*nods*" , "hi there" , "hello" , "I am glad! You are talking to me" ] def greeting(sentence): for word in sentence.split(): if word.lower() in GREETING_INPUTS: return random.choice(GREETING_RESPONSES) from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity def response(user_response): robo_response = '' sent_tokens.append(user_response) TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words = 'english' ) tfidf = TfidfVec.fit_transform(sent_tokens) vals = cosine_similarity(tfidf[ - 1 ], tfidf) idx = vals.argsort()[ 0 ][ - 2 ] flat = vals.flatten() flat.sort() req_tfidf = flat[ - 2 ] if (req_tfidf = = 0 ): robo_response = robo_response + "I am sorry! I don't understand you" return robo_response else : robo_response = robo_response + sent_tokens[idx] return robo_response flag = True print ( "ROBO: My name is Robo. I will answer your queries about Chatbots. If you want to exit, type Bye!" ) while (flag = = True ): user_response = input () user_response = user_response.lower() if (user_response! = 'bye' ): if (user_response = = 'thanks' or user_response = = 'thank you' ): flag = False print ( "ROBO: You are welcome.." ) else : if (greeting(user_response)! = None ): print ( "ROBO: " + greeting(user_response)) else : print ( "ROBO: " ,end = "") print (response(user_response)) sent_tokens.remove(user_response) else : flag = False print ( "ROBO: Bye! take care.." ) |
I found this code online to create a bot, but i don't get the error, it works for first sentence and then dies.

Python 3.7.3 (v3.7.3:ef4ec6ed12, Mar 25 2019, 21:26:53) [MSC v.1916 32 bit (Intel)] on win32
Type "help", "copyright", "credits" or "license()" for more information.
>>>
================= RESTART: C:\Users\Nelson\Desktop\robo.pyw =================
[nltk_data] Downloading package punkt to
[nltk_data] C:\Users\Nelson\AppData\Roaming\nltk_data...
[nltk_data] Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data] C:\Users\Nelson\AppData\Roaming\nltk_data...
[nltk_data] Package wordnet is already up-to-date!
ROBO: My name is Robo. I will answer your queries about Chatbots. If you want to exit, type Bye!
hi
ROBO: hello
how are you
ROBO:
Warning (from warnings module):
File "C:\Users\Nelson\AppData\Local\Programs\Python\Python37-32\lib\site-packages\sklearn\feature_extraction\text.py", line 301
'stop_words.' % sorted(inconsistent))
UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ha', 'le', 'u', 'wa'] not in stop_words.
Traceback (most recent call last):
File "C:\Users\Nelson\Desktop\robo.pyw", line 70, in <module>
print(response(user_response))
File "C:\Users\Nelson\Desktop\robo.pyw", line 43, in response
tfidf = TfidfVec.fit_transform(sent_tokens)
File "C:\Users\Nelson\AppData\Local\Programs\Python\Python37-32\lib\site-packages\sklearn\feature_extraction\text.py", line 1613, in fit_transform
X = super(TfidfVectorizer, self).fit_transform(raw_documents)
File "C:\Users\Nelson\AppData\Local\Programs\Python\Python37-32\lib\site-packages\sklearn\feature_extraction\text.py", line 1031, in fit_transform
self.fixed_vocabulary_)
File "C:\Users\Nelson\AppData\Local\Programs\Python\Python37-32\lib\site-packages\sklearn\feature_extraction\text.py", line 962, in _count_vocab
raise ValueError("empty vocabulary; perhaps the documents only"
ValueError: empty vocabulary; perhaps the documents only contain stop words
>>>