Mar-13-2018, 06:02 PM
Hi everyone, I'm currently in the process of trying to use machine learning to sort through emails but whenever I run the code, I keep getting errors, one such error is saying 'list' object has no attribute 'most_common'. I have tried to figure out why it is doing this but can not figure out why nor how to fix it. Below is my code, if anyone can help I'd be really thankful. The error appears at the dictionary = dictionary.most_common side. I dont know if there are any more errors as the program will not go past this part
import os import numpy as np from collections import Counter from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB from sklearn.svm import SVC, NuSVC, LinearSVC def make_dictionary(train_dir): emails = [os.path.join(train_dir,f) for f in os.listdir(train_dir)] all_words = [] for mail in emails: with open(mail) as m: for i,line in enumerate(m): if i == 2: words = line.split() all_words += words dictionary = Counter(all_words) return dictionary train_dir = 'train-mails' dictionary = make_dictionary(train_dir) list_to_remove = dictionary.keys() for item in list_to_remove: if item.isalpha() == False: del dictionary[item] elif len(item) == 1: del dictionary[item] dictionary = dictionary.most_common(3000) def extract_features(mail_dir): files = [os.path.join(mail_dir,fi) for fi in os.listdir(mail_dir)] features_matrix = np.zeros((len(files),3000)) docID = 0; for fil in files: with open(fil) as fi: for i,line in enumerate(fi): if i == 2: words = line.split() for word in words: wordID = 0 for i,d in enumerate(dictionary): if d[0] == word: wordID = i features_matrix[docID,wordID] = words.count(word) docID = docID + 1 return features_matrix train_labels = np.zeros(702) train_labels[351:701] = 1 train_matrix = extract_features(train_dir) model1 = MultinomialNB() model2 = LinearSVC() model1.fit(train_matrix,train_labels) model2.fit(train_matrix,train_labels) test_dir = 'test-mails' test_matrix = extract_features(test_dir) test_labels = np.zeros(260) test_labels[130:260] = 1 result1 = model1.predict(test_matrix) result2 = model2.predict(test_matrix) print confusion_matrix(test_labels,result1) print confusion_matrix(test_labels,result2)