TypeError: unsupported operand type(s) for -: 'str' and 'str'

shan1403 · Dec-24-2018, 09:58 PM

Hi, I am a newbie (relatively). In the following code, I have calculated function called get_standardised_matrix. Before this function, I have created another function which is get_standard_deviation. I need to standardise a matrix by using get_standard_deviation. This all should be done without using numpy.

Now I am getting the following error, I am stuck in this code from many hours now and quite frustrated at the moment Angry

. THerefore, any URGENT help will be highly appreciated please?

get_k_nearest_labels('Data.csv','Learning_Data.csv','Learning_Data_Labels.csv',1)
[21:51, 12/24/2018] Nida: Traceback (most recent call last):
File "C:/Users/Nida/Desktop/Nida-CSEE/Python/python-assignment.py", line 87, in <module>
get_k_nearest_labels('Data.csv','Learning_Data.csv','Learning_Data_Labels.csv',1)
File "C:/Users/Nida/Desktop/Nida-CSEE/Python/python-assignment.py", line 84, in get_k_nearest_labels
distance=get_distance(file_open[0],fopen[i])
File "C:/Users/Nida/Desktop/Nida-CSEE/Python/python-assignment.py", line 23, in get_distance
euclidean += pow((test2[i][k]-test1[j][k]),2)
TypeError: unsupported operand type(s) for -: 'str' and 'str'
>>>

Following is my script.

import csv

def load_from_csv(filename):
    
    with open(filename,'r') as csvfile:
        readCSV=list(csv.reader(csvfile,delimiter=','))
        for row in readCSV:           
         return readCSV

print('Data from "Data.csv" file :',load_from_csv('Data.csv'))           


import math
def get_distance(test1,test2):
    euclidean = 0

    euclidean_list_complete = []

    for i in range(len(test2)):
        euclidean_list = []
        for j in range(len(test1)):
            for k in range(len(test1[0])):
                euclidean += pow((test2[i][k]-test1[j][k]),2)      
            euclidean_list.append(math.sqrt(euclidean))
            euclidean = 0
            euclidean_list.sort(reverse=True)
        euclidean_list_complete.append(euclidean_list)
        del euclidean_list

    print ('Euclidean Distance between two matrix :\n',euclidean_list_complete)
get_distance([[2,3,5],[12,3,5],[2,3,6]],[[1,2,3],[3,4,6],[4,5,8]])


import statistics

def get_standard_deviation(matrix, col):
    
    #list1=matrix
    #print(list1)
    #print(matrix.columns.values)
    i=col-1
    new_list=[item[i] for item in matrix]
    result= statistics.stdev(new_list)
    return result
print('The Standard Deviation of the elements in the column number passed as a parameter :',get_standard_deviation([[2,5,8],[4,8,2]],1))



def ave(matrix):
    average=sum(matrix)/len(matrix)
    print('Average of a matrix :',average)
    #print(ave[2,3,4])
    


def get_standardised_matrix(matrix):
    
    col = 0
    i = 0
    getdeviation=get_standard_deviation(matrix, col)
    mycolunmlist = []
    while i < len(matrix):
        mycolunmlist.append(matrix[i][col])
        i += 1
    print('My colunm List :',mycolunmlist)
    for i in range(len(mycolunmlist)):
        #print(matrix[i][col])
        standard =(matrix[i][col]-ave(mycolunmlist))/get_standard_deviation(matrix, col)
        matrix.insert(matrix[i][col],standard)
        
    print('standardised matrix is :',matrix)
    #not working
#print('The standardised matrix',get_standardised_matrix([[2,5,8],[4,8,2]]))



def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
    file_open=load_from_csv(rowmatrix)
    print(file_open[0])
    fopen=load_from_csv(matrix)
    newfile=load_from_csv(new_matrix)
    rowmatrix[0]
    for i in range(len(fopen)):
        print(fopen[i])
        distance=get_distance(file_open[0],fopen[i])
        print(distance)

stullis · Dec-24-2018, 10:32 PM

On line 23, try converting the values to numbers:

euclidean += pow((int(test2[i][k]) - int(test1[j][k])),2)

shan1403 · Dec-24-2018, 10:37 PM

Thanks Stullis,

I already did this and when I am running the code it is printing infinite stream of euclidean distance?

stullis · Dec-25-2018, 12:36 AM

I've done some refactoring. Everything should still work,; I just made it more pythonic.

You can't have an infinite loop - for loops are inherently finite - but the code does have an exponential problem in get_distance(). If each list contains three items and each item is a list of three numbers, then the code iterates through them a total of 27 times; four items each would be 64 times; etc.

import csv
import math
import statistics
 
def load_from_csv(filename):
    with open(filename,'r') as csvfile:
        return list(csv.reader(csvfile,delimiter=','))
 
def get_distance(test1,test2):
    euclidean_list_complete = []
    outer_length = len(test1)
    inner_length = len(test1[0])

    for i in range(outer_length):
        euclidean_list = []
        for j in range(outer_length):
            euclidean = 0
            for k in range(inner_length):
                euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)

            euclidean_list.append(math.sqrt(euclidean))

        euclidean_list.sort(reverse = True)
        euclidean_list_complete.append(euclidean_list)

    return euclidean_list_complete

def get_standardised_matrix(matrix):
    col = 0
    mycolunmlist = [item[col] for item in matrix]
    print('My colunm List :', mycolunmlist)

    for item in mycolunmlist:
        standard = (item[col] - ave(mycolunmlist)) / get_standard_deviation(matrix, col)
        matrix.insert(item[col], standard)

    return matrix

def ave(matrix):
    return sum(matrix)/len(matrix)
    
def get_standard_deviation(matrix, col):
    return statistics.stdev([item[col - 1] for item in matrix])
 
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
    file_open = load_from_csv(rowmatrix)
    print(file_open[0])
    fopen = load_from_csv(matrix)
    newfile = load_from_csv(new_matrix)
    for item in fopen:
        print(item)
        print(
            'Euclidean Distance between two matrix :\n',
            get_distance(file_open[0], item)
        )

def main():
    print('Data from "Data.csv" file :', load_from_csv('Data.csv'))

    print(
        'Euclidean Distance between two matrix :\n',
        get_distance(
            [[2,3,5],[12,3,5],[2,3,6]],
            [[1,2,3],[3,4,6],[4,5,8]]
        )
    )

    print('The Standard Deviation of the elements in the column number passed as a parameter :', get_standard_deviation([[2,5,8],[4,8,2]],1))

    print('Average of a matrix :', ave([2,3,4]))

    print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))

main()

shan1403 · (This post was last modified: Dec-25-2018, 03:41 PM by Larz60+.)

hi thanks for the help.
still its not working ,here is the error.

Output:Data from "Data.csv" file : 
Euclidean Distance between two matrix :
 [[11.224972160321824, 3.3166247903554, 2.449489742783178], [9.1104335791443, 1.7320508075688772, 1.4142135623730951], [8.774964387392123, 4.123105625617661, 3.4641016151377544]]
The Standard Deviation of the elements in the column number passed as a parameter : 1.4142135623730951
Average of a matrix : 3.0
My colunm List : [2, 4]

Error:Traceback (most recent call last):
  File "C:/Users/Desktop/Python/pythonnew.py", line 74, in <module>
    main()
  File "C:/Users/Desktop/Python/pythonnew.py", line 72, in main
    print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
  File "C:/Users/Desktop/Python/python-assignnew.py", line 34, in get_standardised_matrix
    standard = (item[col] - ave(mycolunmlist)) / get_standard_deviation(matrix, col)
TypeError: 'int' object is not subscriptable

>>>

stullis · Dec-25-2018, 03:38 PM

Sorry, I didn't remove the slicing. It should work now.

import csv
import math
import statistics
  
def load_from_csv(filename):
    with open(filename,'r') as csvfile:
        return list(csv.reader(csvfile,delimiter=','))
  
def get_distance(test1,test2):
    euclidean_list_complete = []
    outer_length = len(test1)
    inner_length = len(test1[0])
 
    for i in range(outer_length):
        euclidean_list = []
        for j in range(outer_length):
            euclidean = 0
            for k in range(inner_length):
                euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
 
            euclidean_list.append(math.sqrt(euclidean))
 
        euclidean_list.sort(reverse = True)
        euclidean_list_complete.append(euclidean_list)
 
    return euclidean_list_complete
 
def get_standardised_matrix(matrix):
    col = 0
    mycolunmlist = [item[col] for item in matrix]
    print('My colunm List :', mycolunmlist)
 
    for item in mycolunmlist:
        standard = (item - ave(mycolunmlist)) / get_standard_deviation(matrix, col)
        matrix.insert(item, standard)
 
    return matrix
 
def ave(matrix):
    return sum(matrix)/len(matrix)
     
def get_standard_deviation(matrix, col):
    return statistics.stdev([item[col - 1] for item in matrix])
  
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
    file_open = load_from_csv(rowmatrix)
    print(file_open[0])
    fopen = load_from_csv(matrix)
    newfile = load_from_csv(new_matrix)
    for item in fopen:
        print(item)
        print(
            'Euclidean Distance between two matrix :\n',
            get_distance(file_open[0], item)
        )
 
def main():
    print('Data from "Data.csv" file :', load_from_csv('Data.csv'))
 
    print(
        'Euclidean Distance between two matrix :\n',
        get_distance(
            [[2,3,5],[12,3,5],[2,3,6]],
            [[1,2,3],[3,4,6],[4,5,8]]
        )
    )
 
    print('The Standard Deviation of the elements in the column number passed as a parameter :', get_standard_deviation([[2,5,8],[4,8,2]],1))
 
    print('Average of a matrix :', ave([2,3,4]))
 
    print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
 
main()

shan1403 · (This post was last modified: Dec-25-2018, 03:43 PM by shan1403.)

Thanks Stullis for your help but still this is the error:

Traceback (most recent call last):
File "C:\Users\Python\Tims test.py", line 74, in <module>
main()
File "C:\Users\Python\Tims test.py", line 72, in main
print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
File "C:\Users\Python\Tims test.py", line 34, in get_standardised_matrix
standard = (item - ave(mycolunmlist)) / get_standard_deviation(matrix, col)
File "C:\Users\Python\Tims test.py", line 43, in get_standard_deviation
return statistics.stdev([item[col - 1] for item in matrix])
File "C:\Users\Python\Tims test.py", line 43, in <listcomp>
return statistics.stdev([item[col - 1] for item in matrix])
TypeError: 'float' object is not subscriptable

stullis · Dec-25-2018, 05:37 PM

Ah, the problem stemmed from line 37. I hadn't noticed that the script added data to matrix with each iteration. The script had completed the loop from line 34 - 37 once and then getting an error because matrix had a float added to it.

This now runs without any errors, but I'm not sure if it's providing what you want.

import csv
import math
import statistics

def load_from_csv(filename):
    with open(filename,'r') as csvfile:
        return list(csv.reader(csvfile,delimiter=','))

def get_distance(test1,test2):
    euclidean_list_complete = []
    outer_length = len(test1)
    inner_length = len(test1[0])

    for i in range(outer_length):
        euclidean_list = []
        for j in range(outer_length):
            euclidean = 0
            for k in range(inner_length):
                euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)

            euclidean_list.append(math.sqrt(euclidean))

        euclidean_list.sort(reverse = True)
        euclidean_list_complete.append(euclidean_list)

    return euclidean_list_complete

def get_standardised_matrix(matrix):
    col = 0
    out_matrix = []
    mycolumnlist = [item[col] for item in matrix]
    print('My column List :', mycolumnlist)

    for item in mycolumnlist:
        print(matrix)
        standard = (item - ave(mycolumnlist)) / get_standard_deviation(matrix, col)
        out_matrix.insert(item, standard)

    return out_matrix

def ave(matrix):
    return sum(matrix)/len(matrix)

def get_standard_deviation(matrix, col):
    return statistics.stdev([item[col] for item in matrix])

def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
    file_open = load_from_csv(rowmatrix)
    print(file_open[0])
    fopen = load_from_csv(matrix)
    newfile = load_from_csv(new_matrix)
    for item in fopen:
        print(item)
        print(
            'Euclidean Distance between two matrix :\n',
            get_distance(file_open[0], item)
        )

def main():
    print('Data from "Data.csv" file :', load_from_csv('Data.csv'))

    print(
        'Euclidean Distance between two matrix :\n',
        get_distance(
            [[2,3,5],[12,3,5],[2,3,6]],
            [[1,2,3],[3,4,6],[4,5,8]]
        )
    )

    print('The Standard Deviation of the elements in the column number passed as a parameter :', get_standard_deviation([[2,5,8],[4,8,2]], 0))

    print('Average of a matrix :', ave([2,3,4]))

    print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))

main()

shan1403 · Dec-25-2018, 05:59 PM

Thank you very much for the help.its working however, there is one error coming while using get distance in get_k_nearest_labels()

here is the code:

import csv
import math
import statistics
 
def load_from_csv(filename):
    with open(filename,'r') as csvfile:
        return list(csv.reader(csvfile,delimiter=','))
 
def get_distance(test1,test2):
    euclidean_list_complete = []
    outer_length = len(test1)
    inner_length = len(test1[0])
 
    for i in range(outer_length):
        euclidean_list = []
        for j in range(outer_length):
            euclidean = 0
            for k in range(inner_length):
                euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
 
            euclidean_list.append(math.sqrt(euclidean))
 
        euclidean_list.sort(reverse = True)
        euclidean_list_complete.append(euclidean_list)
 
    return euclidean_list_complete
 
def get_standardised_matrix(matrix):
    col = 0
    out_matrix = []
    mycolumnlist = [item[col] for item in matrix]
    print('My column List :', mycolumnlist)
 
    for item in mycolumnlist:
        print(matrix)
        standard = (item - ave(mycolumnlist)) / get_standard_deviation(matrix, col)
        out_matrix.insert(item, standard)
 
    return out_matrix
 
def ave(matrix):
    return sum(matrix)/len(matrix)
 
def get_standard_deviation(matrix, col):
    return statistics.stdev([item[col] for item in matrix])
 
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
    file_open = load_from_csv(rowmatrix)
    print(file_open[0])
    fopen = load_from_csv(matrix)
    newfile = load_from_csv(new_matrix)
    for item in fopen:
         print(
            'Nearest Distance between two matrix :\n',
            get_distance(file_open[0], item)
        )
 
def main():
    print('Data from "Data.csv" file :', load_from_csv('Data.csv'))
 
    print(
        'Euclidean Distance between two matrix :\n',
        get_distance(
            [[2,3,5],[12,3,5],[2,3,6]],
            [[1,2,3],[3,4,6],[4,5,8]]
        )
    )
 
    print('The Standard Deviation of the elements in the column number passed as a parameter :', get_standard_deviation([[2,5,8],[4,8,2]], 0))
 
    print('Average of a matrix :', ave([2,3,4]))
 
    print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
    print('Nearest vale  :',get_k_nearest_labels('Data.csv','Data_Labels.csv','Learning_Data.csv',1))
main()

Traceback (most recent call last):
File "C:/Users/Python/assignment-forum.py", line 76, in <module>
main()
File "C:/Users/Python/assignment-forum.py", line 75, in main
print('Nearest vale :',get_k_nearest_labels('Data.csv','Data_Labels.csv','Learning_Data.csv',1))
File "C:/Users/Python/assignment-forum.py", line 56, in get_k_nearest_labels
get_distance(file_open[0], item)
File "C:/Users/Python/assignment-forum.py", line 19, in get_distance
euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
IndexError: list index out of range

stullis · Dec-25-2018, 08:40 PM

My understanding was that the matrices had the same dimensions. Evidently, that was incorrect. It's an easy fix.

import csv
import math
import statistics
  
def load_from_csv(filename):
    with open(filename,'r') as csvfile:
        return list(csv.reader(csvfile,delimiter=','))
  
def get_distance(test1,test2):
    euclidean_list_complete = []
    outer_length1 = len(test1)
    outer_length2 = len(test2)
    inner_length = len(test2[0])
  
    for i in range(outer_length1):
        euclidean_list = []
        for j in range(outer_length2):
            euclidean = 0
            for k in range(inner_length):
                euclidean += pow(float(test2[i][k]) - float(test1[j][k]), 2)
  
            euclidean_list.append(math.sqrt(euclidean))
  
        euclidean_list.sort(reverse = True)
        euclidean_list_complete.append(euclidean_list)
  
    return euclidean_list_complete
  
def get_standardised_matrix(matrix):
    col = 0
    out_matrix = []
    mycolumnlist = [item[col] for item in matrix]
    print('My column List :', mycolumnlist)
  
    for item in mycolumnlist:
        print(matrix)
        standard = (item - ave(mycolumnlist)) / get_standard_deviation(matrix, col)
        out_matrix.insert(item, standard)
  
    return out_matrix
  
def ave(matrix):
    return sum(matrix)/len(matrix)
  
def get_standard_deviation(matrix, col):
    return statistics.stdev([item[col] for item in matrix])
  
def get_k_nearest_labels(rowmatrix, matrix, new_matrix, k):
    file_open = load_from_csv(rowmatrix)
    print(file_open[0])
    fopen = load_from_csv(matrix)
    newfile = load_from_csv(new_matrix)
    for item in fopen:
         print(
            'Nearest Distance between two matrix :\n',
            get_distance(file_open[0], item)
        )
  
def main():
    print('Data from "Data.csv" file :', load_from_csv('Data.csv'))
  
    print(
        'Euclidean Distance between two matrix :\n',
        get_distance(
            [[2,3,5],[12,3,5],[2,3,6]],
            [[1,2,3],[3,4,6],[4,5,8]]
        )
    )
  
    print('The Standard Deviation of the elements in the column number passed as a parameter :', get_standard_deviation([[2,5,8],[4,8,2]], 0))
  
    print('Average of a matrix :', ave([2,3,4]))
  
    print('standardised matrix is :', get_standardised_matrix([[2,5,8],[4,8,2]]))
    print('Nearest vale  :',get_k_nearest_labels('Data.csv','Data_Labels.csv','Learning_Data.csv',1))
main()

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	Type Error: Unsupported Operand	jhancock	2	1,202	Jul-22-2023, 11:33 PM Last Post: jhancock
	TypeError: unsupported operand type(s) for +: 'dict' and 'int'	nick12341234	1	9,334	Jul-15-2022, 04:04 AM Last Post: ndc85430
	TypeError: unsupported opperand type(s) for %: 'int' and 'list'	cool_person	7	2,177	May-07-2022, 08:40 AM Last Post: ibreeden
	unsupported operand type(s) for %: 'list' and 'int'	RandomCoder	4	32,889	May-07-2022, 08:07 AM Last Post: menator01
	You have any idea, how fix TypeError: unhashable type: 'list'	lsepolis123	2	3,015	Jun-02-2021, 07:55 AM Last Post: supuflounder
	TypeError: __str__ returned non-string (type tuple)	Anldra12	1	7,416	Apr-13-2021, 07:50 AM Last Post: Anldra12
	unsupported operand type(s) for /: 'str' and 'int' Error for boxplot	soft	1	3,068	Feb-09-2021, 05:40 PM Last Post: soft
	TypeError: 'type' object is not subscriptable	Stef	1	4,538	Aug-28-2020, 03:01 PM Last Post: Gribouillis
	TypeError: unhashable type: 'set'	Stager	1	2,613	Jun-08-2020, 04:11 PM Last Post: bowlofred
	TypeError: __repr__ returned non-string (type dict)	shockwave	0	3,199	May-17-2020, 05:56 PM Last Post: shockwave

TypeError: unsupported operand type(s) for -: 'str' and 'str'

User Panel Messages

Announcements