Error found! please help.

bisma · (This post was last modified: Mar-01-2021, 06:58 AM by buran.)

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
#making corpus or words from comments
import re
from nltk.stem.porter import PorterStemmer
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from nltk import word_tokenize

dataset = pd.read_csv(r'D:\thesis material\DataSet.csv',encoding='cp437', names=['comment', 'sentiment', 'nan'])

print(dataset.head())

Pos = dataset[dataset['sentiment'] == 'Positive'].shape[0]
Neg = dataset[dataset['sentiment'] == 'Negative'].shape[0]
Neu = dataset[dataset['sentiment'] == 'Neutral'].shape[0]
# bar plot of the 3 classes
plt.bar(10,Pos,3, label="Positve")
plt.bar(15,Neg,3, label="Negative")
plt.bar(20,Neu,3, label="Neutral")
plt.legend()
plt.ylabel('Number of examples')
plt.title('Proportion of examples')
plt.show()


#another code from github

#Step 4: We have y in form of categorical data

y = dataset['sentiment']
stopwords=['ai', 'ayi', 'hy', 'hai', 'main', 'ki', 'tha', 'koi', 'ko', 'sy',
           'woh', 'bhi', 'aur', 'wo', 'yeh', 'rha', 'hota', 'ho', 'ga', 'ka', 'le', 'lye',
           'kr', 'kar', 'lye', 'liye', 'hotay', 'waisay', 'gya', 'gaya', 'kch', 'ab', 'thy',
           'thay', 'houn', 'hain', 'han', 'to', 'is', 'hi', 'jo', 'kya', 'thi', 'se', 'pe',
           'phr', 'wala', 'waisay', 'us', 'na', 'ny', 'hun', 'rha', 'raha', 'ja', 'rahay',
           'abi', 'uski', 'ne', 'haan', 'acha', 'nai', 'sent', 'photo', 'you', 'kafi', 'gai',
           'rhy', 'kuch', 'jata', 'aye', 'ya', 'dono', 'hoa', 'aese', 'de', 'wohi', 'jati',
           'jb', 'krta', 'lg', 'rahi', 'hui', 'karna', 'krna', 'gi', 'hova', 'yehi', 'jana',
           'jye', 'chal', 'mil', 'tu', 'hum', 'par', 'hay', 'kis', 'sb', 'gy', 'dain', 'krny', 'tou']

def clean(x):
    review_with_no_special_character = re.sub('[^a-zA-Z]',' ',str(x))
    review_in_lowercase = review_with_no_special_character.lower()
    review_in_tokens = word_tokenize(review_in_lowercase)
    review_with_no_stopwords = [word for word in review_in_tokens if not word in stopwords]
    review_in_sentence = ' '.join(review_with_no_stopwords)
    return review_in_sentence

dataset['comment'] = dataset['comment'].apply(lambda x:clean(x))

x = dataset['comment']

#Step 6: Split data set into training and testing sets

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20)

#Step 6: convert a collection of raw documents to a matrix

from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
x_train_vector = vectorizer.fit_transform(x_train)
x_test_vector = vectorizer.transform(x_test)


#Step 8: Creating classifier and fitting data in classifier

from sklearn.svm import SVC
classifier = SVC(kernel='linear', C=1.0, degree=3, random_state=0)
classifier.fit(x_train_vector, y_train)



#Step 9 : Pickling teh Model

import pickle
#To reuse, we can dump the model and load whenever or where-ever you want.
#Vocabulary is also needed to vectorize teh new documents while predicting teh label.

# pickling the vectorizer
pickle.dump(vectorizer, open('vectorizer.sav', 'wb'))
# pickling the model
pickle.dump(classifier, open('classifier.sav', 'wb'))


#Step 9: Perform Prediction

y_pred=classifier.predict(x_test_vector)


#Step 10: Create Confusion Matrix

ConfusionMatrix=confusion_matrix(y_test, y_pred)


#Step 11: Evaluation


Accuracy = format(classifier.score(x_test_vector, y_test)*100, '.2f')+ ' %'
file = open('AccuracyPercentage', 'wb')
pickle.dump(Accuracy, file)
file.close()

print('Learning end')
labels=['Positive','Neutral','Negative']
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(ConfusionMatrix)
plt.title('Confusion matrix of the classifier \n')
fig.colorbar(cax)
ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

#classifier1
classifier=LogisticRegression(random_state=0,solver='liblinear',multi_class='ovr')
classifier.fit(x_train,y_train)
y_pred=classifier.predict(x_test)
print(y_pred)
#confusion matrix
cm=confusion_matrix(y_test,y_pred)
print(cm)
#accuracy of LogisticRegression
print('Accuracy is {} '.format(accuracy_score(y_test, y_pred)))
labels=['Positive','Neutral','Negative']
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(cm)
plt.title('Confusion matrix of the classifier \n')
fig.colorbar(cax)
ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

Error:Error: return array(a, dtype, copy=False, order=order)
ValueError: could not convert string to float:

buran write Mar-01-2021, 06:58 AM:
Please, use proper tags when post code, traceback, output, etc. This time I have added tags for you.
See BBcode help for more info.

also, please, post the entire traceback that you get. We need to see the whole thing. Do not just give us the last line.
Take a time to read What to include in a post

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	Module not found error	minifizikus	1	4,172	Sep-14-2018, 08:25 AM Last Post: Axel_Erfurt

Error found! please help.

User Panel Messages

Announcements