Python Forum

Full Version: Unable to get my code to output
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hello

I need help figuring out why the code is not outputting anything (I dont get any error) .
Any suggestions

import math
import random
#import plotly
#from plotly.graph_objs import Scatter, Scatter3d, Layout

def main():

    # The K in k-means. How many clusters do we assume exist?
    k = 3

    # When do we say the optimization has 'converged' and stop updating clusters
    cutoff = 0.2

    # Generate some points to cluster
    x=[65,73,59,61,75,67,68,70,62,66,77,75,74,70,61,58,66,59,68,61]
    y=[220,160,110,120,150,240,230,220,130,210,190,180,170,210,110,100,230,120,210,130]
    points = [[x[i],y[i]] for i in range(20)]
    
    # Cluster those data!
    #0 = euclidean distance, 1 = manhattan distance
    clusters1 = kmeans(points, k, cutoff, 0)
    clusters2 = kmeans(points, k, cutoff, 1)

    # Print our clusters
    print ("Euclidean Distance clusters: ")
    for i, c in enumerate(clusters1):
        for p in c.points:
            print (" Cluster: ", i, "\t Point :", p)

    print ("Manhattan Distance clusters: ")
    for i, c in enumerate(clusters2):
        for p in c.points:
            print (" Cluster: ", i, "\t Point :", p)
    
    # Display clusters using plotly for 2d data
    #if plotly:
    #    print ("Plotting points, launching browser ...")
    #    plotClusters(clusters)

class Point(object):
    '''
    A point in n dimensional space
    '''
    def __init__(self, coords):
        '''
        coords - A list of values, one per dimension
        '''

        self.coords = coords
        self.n = len(coords)

    def __repr__(self):
        return str(self.coords)

class Cluster(object):
    '''
    A set of points and their centroid
    '''

    def __init__(self, points):
        '''
        points - A list of point objects
        '''

        if len(points) == 0:
            raise Exception("ERROR: empty cluster")

        # The points that belong to this cluster
        self.points = points

        # The dimensionality of the points in this cluster
        self.n = points[0].n

        # Assert that all points are of the same dimensionality
        for p in points:
            if p.n != self.n:
                raise Exception("ERROR: inconsistent dimensions")

        # Set up the initial centroid (this is usually based off one point)
        self.centroid = self.calculateCentroid()

    def __repr__(self):
        '''
        String representation of this object
        '''
        return str(self.points)

    def update(self, points):
        '''
        Returns the distance between the previous centroid and the new after
        recalculating and storing the new centroid.
        Note: Initially we expect centroids to shift around a lot and then
        gradually settle down.
        '''
        old_centroid = self.centroid
        self.points = points
        self.centroid = self.calculateCentroid()
        shift = getDistance(old_centroid, self.centroid)
        return shift

    def calculateCentroid(self):
        '''
        Finds a virtual center point for a group of n-dimensional points
        '''
        numPoints = len(self.points)
        # Get a list of all coordinates in this cluster
        coords = [p.coords for p in self.points]
        # Reformat that so all x's are together, all y'z etc.
        unzipped = zip(*coords)
        # Calculate the mean for each dimension
        centroid_coords = [math.fsum(dList)/numPoints for dList in unzipped]

        return Point(centroid_coords)

def kmeans(points, k, cutoff, dist):

    # Pick out k random points to use as our initial centroids
    centroid = [points[i] for i in range(k)]

    # Create k clusters using those centroids
    # Note: Cluster takes lists, so we wrap each point in a list here.
    clusters = [Cluster([p]) for p in centroid]

    # Loop through the dataset until the clusters stabilize
    loopCounter = 0
    while True:
        # Create a list of lists to hold the points in each cluster
        lists = [[i] for i in clusters]
        clusterCount = len(clusters)

        # Start counting loops
        loopCounter += 1
        # For every point in the dataset ...
        for p in points:
            # Get the distance between that point and the centroid of the first
            # cluster.
            if dist == 0:
                smallest_distance = getEuclidDistance(p, clusters[0].centroid)
            else:
                smallest_distance = getManhattanDistance(p, clusters[0].centroid)
                
            # Set the cluster this point belongs to
            clusterIndex = 0

            # For the remainder of the clusters ...
            for i in range(clusterCount - 1):
                # calculate the distance of that point to each other cluster's
                # centroid.
                if dist == 0:
                    new_distance = getEuclidDistance(p, clusters[i+1].centroid)
                else:
                    new_distance = getManhattanDistance(p, clusters[i+1].centroid)
                # If it's closer to that cluster's centroid update what we
                # think the smallest distance is
                if new_distance < smallest_distance:
                    smallest_distance = new_distance
                    clusterIndex = i+1
            # After finding the cluster the smallest distance away
            # set the point to belong to that cluster
            lists[clusterIndex].append(p)

        # Set our biggest_shift to zero for this iteration
        biggest_shift = 0.0

        # For each cluster ...
        for i in range(clusterCount):
            # Calculate how far the centroid moved in this iteration
            shift = clusters[i].update(lists[i])
            # Keep track of the largest move from all cluster centroid updates
            biggest_shift = max(biggest_shift, shift)

        # If the centroids have stopped moving much, say we're done!
        if biggest_shift < cutoff:
            print ("Converged after %s iterations" % loopCounter)
            break
    return clusters

def getEuclidDistance(a, b):
    '''
    Euclidean distance between two n-dimensional points.
    https://en.wikipedia.org/wiki/Euclidean_distance#n_dimensions
    Note: This can be very slow and does not scale well
    '''
    if a.n != b.n:
        raise Exception("ERROR: non comparable points")

    for i in range(a.n):
        squareDifference += (a.coords[i]-b.coords[i]) ** 2
    return math.sqrt(accumulatedDifference)

def getManhattanDistance(a, b):
    '''
    Manhattan distance between 2 dimensional points
    '''
    if a.n != b.n:
        raise Exception("ERROR: non comparable points")

    for i in range(a.n):
        total += abs(a.coords[i] - a.coords[i+1]) + abs(b.coords[i] - b.coords[i+1]) 
    return total

def plotClusters(data):
    '''
    This uses the plotly offline mode to create a local HTML file.
    This should open your default web browser.
    '''
    # Convert data into plotly format.
    traceList = []
    for i, c in enumerate(data):
        # Get a list of x,y coordinates for the points in this cluster.
        cluster_data = []
        for point in c.points:
            cluster_data.append(point.coords)

        trace = {}
        centroid = {}
##        if dimensions == 2:
            # Convert our list of x,y's into an x list and a y list.
        trace['x'], trace['y'] = zip(*cluster_data)
        trace['mode'] = 'markers'
        trace['marker'] = {}
        trace['marker']['symbol'] = i
        trace['marker']['size'] = 12
        trace['name'] = "Cluster " + str(i)
        traceList.append(Scatter(**trace))
            # Centroid (A trace of length 1)
        centroid['x'] = [c.centroid.coords[0]]
        centroid['y'] = [c.centroid.coords[1]]
        centroid['mode'] = 'markers'
        centroid['marker'] = {}
        centroid['marker']['symbol'] = i
        centroid['marker']['color'] = 'rgb(200,10,10)'
        centroid['name'] = "Centroid " + str(i)
        traceList.append(Scatter(**centroid))
##        else:
##            symbols = [
##                "circle",
##                "square",
##                "diamond",
##                "circle-open",
##                "square-open",
##                "diamond-open",
##                "cross", "x"
##            ]
##            symbol_count = len(symbols)
##            if i > symbol_count:
##                print ("Warning: Not enough marker symbols to go around")
##            # Convert our list of x,y,z's separate lists.
##            trace['x'], trace['y'], trace['z'] = zip(*cluster_data)
##            trace['mode'] = 'markers'
##            trace['marker'] = {}
##            trace['marker']['symbol'] = symbols[i]
##            trace['marker']['size'] = 12
##            trace['name'] = "Cluster " + str(i)
##            traceList.append(Scatter3d(**trace))
##            # Centroid (A trace of length 1)
##            centroid['x'] = [c.centroid.coords[0]]
##            centroid['y'] = [c.centroid.coords[1]]
##            centroid['z'] = [c.centroid.coords[2]]
##            centroid['mode'] = 'markers'
##            centroid['marker'] = {}
##            centroid['marker']['symbol'] = symbols[i]
##            centroid['marker']['color'] = 'rgb(200,10,10)'
##            centroid['name'] = "Centroid " + str(i)
##            traceList.append(Scatter3d(**centroid))

    title = "K-means clustering with %s clusters" % str(len(data))
    plotly.offline.plot({
        "data": traceList,
        "layout": Layout(title=title)
    })

if __name__ == "main()":
    main()
change:
if __name__ == "main()":
to
if __name__ == "__main__":
These the error I am getting. Is the problem in the classes or because of var "n"

Error:
raceback (most recent call last): File "/Users/Fadi/Desktop/test updated.py", line 270, in <module> main() File "/Users/Fadi/Desktop/test updated.py", line 21, in main clusters1 = kmeans(points, k, cutoff, 0) File "/Users/Fadi/Desktop/test updated.py", line 122, in kmeans clusters = [Cluster([p]) for p in centroid] File "/Users/Fadi/Desktop/test updated.py", line 122, in <listcomp> clusters = [Cluster([p]) for p in centroid] File "/Users/Fadi/Desktop/test updated.py", line 72, in __init__ self.n = points[0].n AttributeError: 'list' object has no attribute 'n'
what is
points[0].n
supposed to be?

what is .n supposed to refer to?