Posts: 12
Threads: 2
Joined: Dec 2018
Hi, I am a newbie (relatively). In the following code, I have calculated function called get_standardised_matrix. Before this function, I have created another function which is get_standard_deviation. I need to standardise a matrix by using get_standard_deviation. This all should be done without using numpy.
Now I am getting the following error, I am stuck in this code from many hours now and quite frustrated at the moment . THerefore, any URGENT help will be highly appreciated please?
get_k_nearest_labels('Data.csv','Learning_Data.csv','Learning_Data_Labels.csv',1)
[21:51, 12/24/2018] Nida: Traceback (most recent call last):
File "C:/Users/Nida/Desktop/Nida-CSEE/Python/python-assignment.py", line 87, in <module>
get_k_nearest_labels('Data.csv','Learning_Data.csv','Learning_Data_Labels.csv',1)
File "C:/Users/Nida/Desktop/Nida-CSEE/Python/python-assignment.py", line 84, in get_k_nearest_labels
distance=get_distance(file_open[0],fopen[i])
File "C:/Users/Nida/Desktop/Nida-CSEE/Python/python-assignment.py", line 23, in get_distance
euclidean += pow((test2[i][k]-test1[j][k]),2)
TypeError: unsupported operand type(s) for -: 'str' and 'str'
>>>
Following is my script.
import csv
def load_from_csv(filename):
with open(filename,'r') as csvfile:
readCSV=list(csv.reader(csvfile,delimiter=','))
for row in readCSV:
return readCSV
print('Data from "Data.csv" file :',load_from_csv('Data.csv'))
import math
def get_distance(test1,test2):
euclidean = 0
euclidean_list_complete = []
for i in range(len(test2)):
euclidean_list = []
for j in range(len(test1)):
for k in range(len(test1[0])):
euclidean += pow((test2[i][k]-test1[j][k]),2)
euclidean_list.append(math.sqrt(euclidean))
euclidean = 0
euclidean_list.sort(reverse=True)
euclidean_list_complete.append(euclidean_list)
del euclidean_list
print ('Euclidean Distance between two matrix :\n',euclidean_list_complete)
get_distance([[2,3,5],[12,3,5],[2,3,6]],[[1,2,3],[3,4,6],[4,5,8]])
import statistics
def get_standard_deviation(matrix, col):
#list1=matrix
#print(list1)
#print(matrix.columns.values)
i=col-1
new_list=[item[i] for item in matrix]
result= statistics.stdev(new_list)
return result
print('The Standard Deviation of the elements in the column number passed as a parameter :',get_standard_deviation([[2,5,8],[4,8,2]],1))
def ave(matrix):
average=sum(matrix)/len(matrix)
print('Average of a matrix :',average)
#print(ave[2,3,4])
def get_standardised_matrix(matrix):
col = 0
i = 0
getdeviation=get_standard_deviation(matrix, col)
mycolunmlist = []
while i < len(matrix):
mycolunmlist.append(matrix[i][col])
i += 1
print('My colunm List :',mycolunmlist)
for i in range(len(mycolunmlist)):
#print(matrix[i][col])
standard =(matrix[i][col]-ave(mycolunmlist))/get_standard_deviation(matrix, col)
matrix.insert(matrix[i][col],standard)
print('standardised matrix is :',matrix)
#not working
#print('The standardised matrix',get_standardised_matrix([[2,5,8],[4,8,2]]))
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
file_open=load_from_csv(rowmatrix)
print(file_open[0])
fopen=load_from_csv(matrix)
newfile=load_from_csv(new_matrix)
rowmatrix[0]
for i in range(len(fopen)):
print(fopen[i])
distance=get_distance(file_open[0],fopen[i])
print(distance)
Posts: 444
Threads: 1
Joined: Sep 2018
On line 23, try converting the values to numbers:
euclidean += pow((int(test2[i][k]) - int(test1[j][k])),2)
Posts: 12
Threads: 2
Joined: Dec 2018
Thanks Stullis,
I already did this and when I am running the code it is printing infinite stream of euclidean distance?
Posts: 444
Threads: 1
Joined: Sep 2018
I've done some refactoring. Everything should still work,; I just made it more pythonic.
You can't have an infinite loop - for loops are inherently finite - but the code does have an exponential problem in get_distance(). If each list contains three items and each item is a list of three numbers, then the code iterates through them a total of 27 times; four items each would be 64 times; etc.
import csv
import math
import statistics
def load_from_csv(filename):
with open(filename,'r') as csvfile:
return list(csv.reader(csvfile,delimiter=','))
def get_distance(test1,test2):
euclidean_list_complete = []
outer_length = len(test1)
inner_length = len(test1[0])
for i in range(outer_length):
euclidean_list = []
for j in range(outer_length):
euclidean = 0
for k in range(inner_length):
euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
euclidean_list.append(math.sqrt(euclidean))
euclidean_list.sort(reverse = True)
euclidean_list_complete.append(euclidean_list)
return euclidean_list_complete
def get_standardised_matrix(matrix):
col = 0
mycolunmlist = [item[col] for item in matrix]
print('My colunm List :', mycolunmlist)
for item in mycolunmlist:
standard = (item[col] - ave(mycolunmlist)) / get_standard_deviation(matrix, col)
matrix.insert(item[col], standard)
return matrix
def ave(matrix):
return sum(matrix)/len(matrix)
def get_standard_deviation(matrix, col):
return statistics.stdev([item[col - 1] for item in matrix])
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
file_open = load_from_csv(rowmatrix)
print(file_open[0])
fopen = load_from_csv(matrix)
newfile = load_from_csv(new_matrix)
for item in fopen:
print(item)
print(
'Euclidean Distance between two matrix :\n',
get_distance(file_open[0], item)
)
def main():
print('Data from "Data.csv" file :', load_from_csv('Data.csv'))
print(
'Euclidean Distance between two matrix :\n',
get_distance(
[[2,3,5],[12,3,5],[2,3,6]],
[[1,2,3],[3,4,6],[4,5,8]]
)
)
print('The Standard Deviation of the elements in the column number passed as a parameter :', get_standard_deviation([[2,5,8],[4,8,2]],1))
print('Average of a matrix :', ave([2,3,4]))
print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
main()
Posts: 12
Threads: 2
Joined: Dec 2018
Dec-25-2018, 02:24 PM
(This post was last modified: Dec-25-2018, 03:41 PM by Larz60+.)
hi thanks for the help.
still its not working ,here is the error.
Output: Data from "Data.csv" file :
Euclidean Distance between two matrix :
[[11.224972160321824, 3.3166247903554, 2.449489742783178], [9.1104335791443, 1.7320508075688772, 1.4142135623730951], [8.774964387392123, 4.123105625617661, 3.4641016151377544]]
The Standard Deviation of the elements in the column number passed as a parameter : 1.4142135623730951
Average of a matrix : 3.0
My colunm List : [2, 4]
Error: Traceback (most recent call last):
File "C:/Users/Desktop/Python/pythonnew.py", line 74, in <module>
main()
File "C:/Users/Desktop/Python/pythonnew.py", line 72, in main
print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
File "C:/Users/Desktop/Python/python-assignnew.py", line 34, in get_standardised_matrix
standard = (item[col] - ave(mycolunmlist)) / get_standard_deviation(matrix, col)
TypeError: 'int' object is not subscriptable
>>>
Posts: 444
Threads: 1
Joined: Sep 2018
Sorry, I didn't remove the slicing. It should work now.
import csv
import math
import statistics
def load_from_csv(filename):
with open(filename,'r') as csvfile:
return list(csv.reader(csvfile,delimiter=','))
def get_distance(test1,test2):
euclidean_list_complete = []
outer_length = len(test1)
inner_length = len(test1[0])
for i in range(outer_length):
euclidean_list = []
for j in range(outer_length):
euclidean = 0
for k in range(inner_length):
euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
euclidean_list.append(math.sqrt(euclidean))
euclidean_list.sort(reverse = True)
euclidean_list_complete.append(euclidean_list)
return euclidean_list_complete
def get_standardised_matrix(matrix):
col = 0
mycolunmlist = [item[col] for item in matrix]
print('My colunm List :', mycolunmlist)
for item in mycolunmlist:
standard = (item - ave(mycolunmlist)) / get_standard_deviation(matrix, col)
matrix.insert(item, standard)
return matrix
def ave(matrix):
return sum(matrix)/len(matrix)
def get_standard_deviation(matrix, col):
return statistics.stdev([item[col - 1] for item in matrix])
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
file_open = load_from_csv(rowmatrix)
print(file_open[0])
fopen = load_from_csv(matrix)
newfile = load_from_csv(new_matrix)
for item in fopen:
print(item)
print(
'Euclidean Distance between two matrix :\n',
get_distance(file_open[0], item)
)
def main():
print('Data from "Data.csv" file :', load_from_csv('Data.csv'))
print(
'Euclidean Distance between two matrix :\n',
get_distance(
[[2,3,5],[12,3,5],[2,3,6]],
[[1,2,3],[3,4,6],[4,5,8]]
)
)
print('The Standard Deviation of the elements in the column number passed as a parameter :', get_standard_deviation([[2,5,8],[4,8,2]],1))
print('Average of a matrix :', ave([2,3,4]))
print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
main()
Posts: 12
Threads: 2
Joined: Dec 2018
Dec-25-2018, 03:42 PM
(This post was last modified: Dec-25-2018, 03:43 PM by shan1403.)
Thanks Stullis for your help but still this is the error:
Traceback (most recent call last):
File "C:\Users\Python\Tims test.py", line 74, in <module>
main()
File "C:\Users\Python\Tims test.py", line 72, in main
print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
File "C:\Users\Python\Tims test.py", line 34, in get_standardised_matrix
standard = (item - ave(mycolunmlist)) / get_standard_deviation(matrix, col)
File "C:\Users\Python\Tims test.py", line 43, in get_standard_deviation
return statistics.stdev([item[col - 1] for item in matrix])
File "C:\Users\Python\Tims test.py", line 43, in <listcomp>
return statistics.stdev([item[col - 1] for item in matrix])
TypeError: 'float' object is not subscriptable
Posts: 444
Threads: 1
Joined: Sep 2018
Ah, the problem stemmed from line 37. I hadn't noticed that the script added data to matrix with each iteration. The script had completed the loop from line 34 - 37 once and then getting an error because matrix had a float added to it.
This now runs without any errors, but I'm not sure if it's providing what you want.
import csv
import math
import statistics
def load_from_csv(filename):
with open(filename,'r') as csvfile:
return list(csv.reader(csvfile,delimiter=','))
def get_distance(test1,test2):
euclidean_list_complete = []
outer_length = len(test1)
inner_length = len(test1[0])
for i in range(outer_length):
euclidean_list = []
for j in range(outer_length):
euclidean = 0
for k in range(inner_length):
euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
euclidean_list.append(math.sqrt(euclidean))
euclidean_list.sort(reverse = True)
euclidean_list_complete.append(euclidean_list)
return euclidean_list_complete
def get_standardised_matrix(matrix):
col = 0
out_matrix = []
mycolumnlist = [item[col] for item in matrix]
print('My column List :', mycolumnlist)
for item in mycolumnlist:
print(matrix)
standard = (item - ave(mycolumnlist)) / get_standard_deviation(matrix, col)
out_matrix.insert(item, standard)
return out_matrix
def ave(matrix):
return sum(matrix)/len(matrix)
def get_standard_deviation(matrix, col):
return statistics.stdev([item[col] for item in matrix])
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
file_open = load_from_csv(rowmatrix)
print(file_open[0])
fopen = load_from_csv(matrix)
newfile = load_from_csv(new_matrix)
for item in fopen:
print(item)
print(
'Euclidean Distance between two matrix :\n',
get_distance(file_open[0], item)
)
def main():
print('Data from "Data.csv" file :', load_from_csv('Data.csv'))
print(
'Euclidean Distance between two matrix :\n',
get_distance(
[[2,3,5],[12,3,5],[2,3,6]],
[[1,2,3],[3,4,6],[4,5,8]]
)
)
print('The Standard Deviation of the elements in the column number passed as a parameter :', get_standard_deviation([[2,5,8],[4,8,2]], 0))
print('Average of a matrix :', ave([2,3,4]))
print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
main()
Posts: 12
Threads: 2
Joined: Dec 2018
Thank you very much for the help.its working however, there is one error coming while using get distance in get_k_nearest_labels()
here is the code:
import csv
import math
import statistics
def load_from_csv(filename):
with open(filename,'r') as csvfile:
return list(csv.reader(csvfile,delimiter=','))
def get_distance(test1,test2):
euclidean_list_complete = []
outer_length = len(test1)
inner_length = len(test1[0])
for i in range(outer_length):
euclidean_list = []
for j in range(outer_length):
euclidean = 0
for k in range(inner_length):
euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
euclidean_list.append(math.sqrt(euclidean))
euclidean_list.sort(reverse = True)
euclidean_list_complete.append(euclidean_list)
return euclidean_list_complete
def get_standardised_matrix(matrix):
col = 0
out_matrix = []
mycolumnlist = [item[col] for item in matrix]
print('My column List :', mycolumnlist)
for item in mycolumnlist:
print(matrix)
standard = (item - ave(mycolumnlist)) / get_standard_deviation(matrix, col)
out_matrix.insert(item, standard)
return out_matrix
def ave(matrix):
return sum(matrix)/len(matrix)
def get_standard_deviation(matrix, col):
return statistics.stdev([item[col] for item in matrix])
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
file_open = load_from_csv(rowmatrix)
print(file_open[0])
fopen = load_from_csv(matrix)
newfile = load_from_csv(new_matrix)
for item in fopen:
print(
'Nearest Distance between two matrix :\n',
get_distance(file_open[0], item)
)
def main():
print('Data from "Data.csv" file :', load_from_csv('Data.csv'))
print(
'Euclidean Distance between two matrix :\n',
get_distance(
[[2,3,5],[12,3,5],[2,3,6]],
[[1,2,3],[3,4,6],[4,5,8]]
)
)
print('The Standard Deviation of the elements in the column number passed as a parameter :', get_standard_deviation([[2,5,8],[4,8,2]], 0))
print('Average of a matrix :', ave([2,3,4]))
print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
print('Nearest vale :',get_k_nearest_labels('Data.csv','Data_Labels.csv','Learning_Data.csv',1))
main() Traceback (most recent call last):
File "C:/Users/Python/assignment-forum.py", line 76, in <module>
main()
File "C:/Users/Python/assignment-forum.py", line 75, in main
print('Nearest vale :',get_k_nearest_labels('Data.csv','Data_Labels.csv','Learning_Data.csv',1))
File "C:/Users/Python/assignment-forum.py", line 56, in get_k_nearest_labels
get_distance(file_open[0], item)
File "C:/Users/Python/assignment-forum.py", line 19, in get_distance
euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
IndexError: list index out of range
Posts: 444
Threads: 1
Joined: Sep 2018
My understanding was that the matrices had the same dimensions. Evidently, that was incorrect. It's an easy fix.
import csv
import math
import statistics
def load_from_csv(filename):
with open(filename,'r') as csvfile:
return list(csv.reader(csvfile,delimiter=','))
def get_distance(test1,test2):
euclidean_list_complete = []
outer_length1 = len(test1)
outer_length2 = len(test2)
inner_length = len(test2[0])
for i in range(outer_length1):
euclidean_list = []
for j in range(outer_length2):
euclidean = 0
for k in range(inner_length):
euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
euclidean_list.append(math.sqrt(euclidean))
euclidean_list.sort(reverse = True)
euclidean_list_complete.append(euclidean_list)
return euclidean_list_complete
def get_standardised_matrix(matrix):
col = 0
out_matrix = []
mycolumnlist = [item[col] for item in matrix]
print('My column List :', mycolumnlist)
for item in mycolumnlist:
print(matrix)
standard = (item - ave(mycolumnlist)) / get_standard_deviation(matrix, col)
out_matrix.insert(item, standard)
return out_matrix
def ave(matrix):
return sum(matrix)/len(matrix)
def get_standard_deviation(matrix, col):
return statistics.stdev([item[col] for item in matrix])
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
file_open = load_from_csv(rowmatrix)
print(file_open[0])
fopen = load_from_csv(matrix)
newfile = load_from_csv(new_matrix)
for item in fopen:
print(
'Nearest Distance between two matrix :\n',
get_distance(file_open[0], item)
)
def main():
print('Data from "Data.csv" file :', load_from_csv('Data.csv'))
print(
'Euclidean Distance between two matrix :\n',
get_distance(
[[2,3,5],[12,3,5],[2,3,6]],
[[1,2,3],[3,4,6],[4,5,8]]
)
)
print('The Standard Deviation of the elements in the column number passed as a parameter :', get_standard_deviation([[2,5,8],[4,8,2]], 0))
print('Average of a matrix :', ave([2,3,4]))
print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
print('Nearest vale :',get_k_nearest_labels('Data.csv','Data_Labels.csv','Learning_Data.csv',1))
main()
|