Posts: 12
Threads: 2
Joined: Dec 2018
Hi everyone, I need to find the distance between the two rows of the matrix in two different .CSV file. Following is the part of the code:
for item in len(fopen):
print(item)
print('Distance between two matrix :\n',
get_distance(file_open[0], item))
newfile = load_from_csv(new_matrix)
print('TEST',newfile[0])
for data in len(newfile):
print(data)
print('Distance between two matrix :\n',
get_distance(file_open[0], data))
def main():
print('Data from "Data.csv" file :', load_from_csv('Data.csv'))
print('Euclidean Distance between two matrix :\n',
get_distance( [[2,3,5],[12,3,5],[2,3,6]],
[[1,2,3],[3,4,6],[4,5,8]]))
print('The Standard Deviation of the elements in the column number passed as a parameter :',
get_standard_deviation([[2,5,8],[4,8,2]],1))
print('Average of a matrix :', ave([2,3,4]))
#print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
print('Nearest labels between data.csv and learning_data',
get_k_nearest_labels('Data.csv','Learning_Data.csv','Learning_Data_Labels.csv',1))
main() this is the TRaceback error I am getting:
Data from "Data.csv" file :
Euclidean Distance between two matrix :
[[11.224972160321824, 3.3166247903554, 2.449489742783178], [9.1104335791443, 1.7320508075688772, 1.4142135623730951], [8.774964387392123, 4.123105625617661, 3.4641016151377544]]
The Standard Deviation of the elements in the column number passed as a parameter : 1.4142135623730951
Average of a matrix : 3.0
['5', '3', '2', '8', '5', '10', '8', '1', '2']
Traceback (most recent call last):
File "C:/Users/Python/python-assignment-new.py", line 86, in <module>
main()
File "C:/Users/Python/python-assignment-new.py", line 83, in main
get_k_nearest_labels('Data.csv','Learning_Data.csv','Learning_Data_Labels.csv',1))
File "C:/Users/Python/python-assignment-new.py", line 50, in get_k_nearest_labels
for item in len(fopen):
TypeError: 'int' object is not iterable
>>>
Appreciate any help please???
Posts: 4,220
Threads: 97
Joined: Sep 2016
The traceback says the error is in the function get_k_nearest_labels, which you did not provide. We will need to see that function to figure out how fopen became an int.
Posts: 12
Threads: 2
Joined: Dec 2018
So sorry, please find below the code again:
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
file_open = load_from_csv(rowmatrix)
print(file_open[0])
fopen = load_from_csv(matrix)
for item in len(fopen):
print(item)
print('Distance between two matrix :\n',
get_distance(file_open[0], item))
newfile = load_from_csv(new_matrix)
print('TEST',newfile[0])
for data in len(newfile):
print(data)
print('Distance between two matrix :\n',
get_distance(file_open[0], data))
Posts: 4,220
Threads: 97
Joined: Sep 2016
Well then, the problem is that load_from_csv() is returning an integer instead of a list or something else that can be iterated over. But again, not knowing how load_from_csv works, I can't really say why it does that.
Posts: 12
Threads: 2
Joined: Dec 2018
Thanks ichabod801, here is the complete code. From top to bottom. Appreciate your reply.
import csv
import math
import statistics
def load_from_csv(filename):
with open(filename,'r') as csvfile:
return list(csv.reader(csvfile,delimiter=','))
def get_distance(test1,test2):
euclidean_list_complete = []
outer_length = len(test1)
inner_length = len(test1[0])
for i in range(outer_length):
euclidean_list = []
for j in range(outer_length):
euclidean = 0
for k in range(inner_length):
euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
euclidean_list.append(math.sqrt(euclidean))
euclidean_list.sort(reverse = True)
euclidean_list_complete.append(euclidean_list)
return euclidean_list_complete
##def get_standardised_matrix(matrix):
## col = 0
## mycolunmlist = [item[col] for item in matrix]
## print('My colunm List :', mycolunmlist)
##
## for item in mycolunmlist:
## standard = (item[col] - ave(mycolunmlist)) /get_standard_deviation(matrix, col)
## matrix.insert(item[col], standard)
return matrix
def ave(matrix):
return sum(matrix)/len(matrix)
def get_standard_deviation(matrix, col):
return statistics.stdev([item[col - 1] for item in matrix])
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
file_open = load_from_csv(rowmatrix)
print(file_open[0])
fopen = load_from_csv(matrix)
for item in len(fopen):
print(item)
print('Distance between two matrix :\n',
get_distance(file_open[0], item))
newfile = load_from_csv(new_matrix)
print('TEST',newfile[0])
for data in len(newfile):
print(data)
print('Distance between two matrix :\n',
get_distance(file_open[0], data))
def main():
print('Data from "Data.csv" file :', load_from_csv('Data.csv'))
print('Euclidean Distance between two matrix :\n',
get_distance( [[2,3,5],[12,3,5],[2,3,6]],
[[1,2,3],[3,4,6],[4,5,8]]))
print('The Standard Deviation of the elements in the column number passed as a parameter :',
get_standard_deviation([[2,5,8],[4,8,2]],1))
print('Average of a matrix :', ave([2,3,4]))
#print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
print('Nearest labels between data.csv and learning_data',
get_k_nearest_labels('Data.csv','Learning_Data.csv','Learning_Data_Labels.csv',1))
main()
Posts: 4,220
Threads: 97
Joined: Sep 2016
Duh, should have seen that earlier. You are looping through len(fopen). That is an integer. You probably want to be looping through fopen itself.
Posts: 12
Threads: 2
Joined: Dec 2018
thanks for your help. i have fixed that however i am using get_distance function in get_k_nearest_label(),i am getting this error
Data from "Data.csv" file :
Euclidean Distance between two matrix :
[[11.224972160321824, 3.3166247903554, 2.449489742783178], [9.1104335791443, 1.7320508075688772, 1.4142135623730951], [8.774964387392123, 4.123105625617661, 3.4641016151377544]]
The Standard Deviation of the elements in the column number passed as a parameter : 1.4142135623730951
Average of a matrix : 3.0
My column List : [2, 4]
[[2, 5, 8], [4, 8, 2]]
[[2, 5, 8], [4, 8, 2]]
standardised matrix is : [-0.7071067811865475, 0.7071067811865475]
['5', '3', '2', '8', '5', '10', '8', '1', '2']
Traceback (most recent call last):
File "C:/Users/Python/assignment-forum.py", line 76, in <module>
main()
File "C:/Users/Python/assignment-forum.py", line 75, in main
print('Nearest vale :',get_k_nearest_labels('Data.csv','Data_Labels.csv','Learning_Data.csv',1))
File "C:/Users/Python/assignment-forum.py", line 56, in get_k_nearest_labels
get_distance(file_open[0], item)
File "C:/Users/Python/assignment-forum.py", line 19, in get_distance
euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
IndexError: list index out of range
>>>
this is the updated code:
import csv
import math
import statistics
def load_from_csv(filename):
with open(filename,'r') as csvfile:
return list(csv.reader(csvfile,delimiter=','))
def get_distance(test1,test2):
euclidean_list_complete = []
outer_length = len(test1)
inner_length = len(test1[0])
for i in range(outer_length):
euclidean_list = []
for j in range(outer_length):
euclidean = 0
for k in range(inner_length):
euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
euclidean_list.append(math.sqrt(euclidean))
euclidean_list.sort(reverse = True)
euclidean_list_complete.append(euclidean_list)
return euclidean_list_complete
def get_standardised_matrix(matrix):
col = 0
out_matrix = []
mycolumnlist = [item[col] for item in matrix]
print('My column List :', mycolumnlist)
for item in mycolumnlist:
print(matrix)
standard = (item - ave(mycolumnlist)) / get_standard_deviation(matrix, col)
out_matrix.insert(item, standard)
return out_matrix
def ave(matrix):
return sum(matrix)/len(matrix)
def get_standard_deviation(matrix, col):
return statistics.stdev([item[col] for item in matrix])
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
file_open = load_from_csv(rowmatrix)
print(file_open[0])
fopen = load_from_csv(matrix)
newfile = load_from_csv(new_matrix)
for item in fopen:
print(
'Nearest Distance between two matrix :\n',
get_distance(file_open[0], item)
)
def main():
print('Data from "Data.csv" file :', load_from_csv('Data.csv'))
print(
'Euclidean Distance between two matrix :\n',
get_distance(
[[2,3,5],[12,3,5],[2,3,6]],
[[1,2,3],[3,4,6],[4,5,8]]
)
)
print('The Standard Deviation of the elements in the column number passed as a parameter :', get_standard_deviation([[2,5,8],[4,8,2]], 0))
print('Average of a matrix :', ave([2,3,4]))
print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
print('Nearest vale :',get_k_nearest_labels('Data.csv','Data_Labels.csv','Learning_Data.csv',1))
main()
Posts: 4,220
Threads: 97
Joined: Sep 2016
It looks like your matrices are not the same size. You code will only work if they are. I would print len() of test1, test1[0], test2, and test2[0]. If those match up correctly, I would guess there is a row that is not the right size, either in test1 or test2.
|