Feb-06-2018, 07:49 PM
Hello
I need help figuring out why the code is not outputting anything (I dont get any error) .
Any suggestions
I need help figuring out why the code is not outputting anything (I dont get any error) .
Any suggestions
import math import random #import plotly #from plotly.graph_objs import Scatter, Scatter3d, Layout def main(): # The K in k-means. How many clusters do we assume exist? k = 3 # When do we say the optimization has 'converged' and stop updating clusters cutoff = 0.2 # Generate some points to cluster x=[65,73,59,61,75,67,68,70,62,66,77,75,74,70,61,58,66,59,68,61] y=[220,160,110,120,150,240,230,220,130,210,190,180,170,210,110,100,230,120,210,130] points = [[x[i],y[i]] for i in range(20)] # Cluster those data! #0 = euclidean distance, 1 = manhattan distance clusters1 = kmeans(points, k, cutoff, 0) clusters2 = kmeans(points, k, cutoff, 1) # Print our clusters print ("Euclidean Distance clusters: ") for i, c in enumerate(clusters1): for p in c.points: print (" Cluster: ", i, "\t Point :", p) print ("Manhattan Distance clusters: ") for i, c in enumerate(clusters2): for p in c.points: print (" Cluster: ", i, "\t Point :", p) # Display clusters using plotly for 2d data #if plotly: # print ("Plotting points, launching browser ...") # plotClusters(clusters) class Point(object): ''' A point in n dimensional space ''' def __init__(self, coords): ''' coords - A list of values, one per dimension ''' self.coords = coords self.n = len(coords) def __repr__(self): return str(self.coords) class Cluster(object): ''' A set of points and their centroid ''' def __init__(self, points): ''' points - A list of point objects ''' if len(points) == 0: raise Exception("ERROR: empty cluster") # The points that belong to this cluster self.points = points # The dimensionality of the points in this cluster self.n = points[0].n # Assert that all points are of the same dimensionality for p in points: if p.n != self.n: raise Exception("ERROR: inconsistent dimensions") # Set up the initial centroid (this is usually based off one point) self.centroid = self.calculateCentroid() def __repr__(self): ''' String representation of this object ''' return str(self.points) def update(self, points): ''' Returns the distance between the previous centroid and the new after recalculating and storing the new centroid. Note: Initially we expect centroids to shift around a lot and then gradually settle down. ''' old_centroid = self.centroid self.points = points self.centroid = self.calculateCentroid() shift = getDistance(old_centroid, self.centroid) return shift def calculateCentroid(self): ''' Finds a virtual center point for a group of n-dimensional points ''' numPoints = len(self.points) # Get a list of all coordinates in this cluster coords = [p.coords for p in self.points] # Reformat that so all x's are together, all y'z etc. unzipped = zip(*coords) # Calculate the mean for each dimension centroid_coords = [math.fsum(dList)/numPoints for dList in unzipped] return Point(centroid_coords) def kmeans(points, k, cutoff, dist): # Pick out k random points to use as our initial centroids centroid = [points[i] for i in range(k)] # Create k clusters using those centroids # Note: Cluster takes lists, so we wrap each point in a list here. clusters = [Cluster([p]) for p in centroid] # Loop through the dataset until the clusters stabilize loopCounter = 0 while True: # Create a list of lists to hold the points in each cluster lists = [[i] for i in clusters] clusterCount = len(clusters) # Start counting loops loopCounter += 1 # For every point in the dataset ... for p in points: # Get the distance between that point and the centroid of the first # cluster. if dist == 0: smallest_distance = getEuclidDistance(p, clusters[0].centroid) else: smallest_distance = getManhattanDistance(p, clusters[0].centroid) # Set the cluster this point belongs to clusterIndex = 0 # For the remainder of the clusters ... for i in range(clusterCount - 1): # calculate the distance of that point to each other cluster's # centroid. if dist == 0: new_distance = getEuclidDistance(p, clusters[i+1].centroid) else: new_distance = getManhattanDistance(p, clusters[i+1].centroid) # If it's closer to that cluster's centroid update what we # think the smallest distance is if new_distance < smallest_distance: smallest_distance = new_distance clusterIndex = i+1 # After finding the cluster the smallest distance away # set the point to belong to that cluster lists[clusterIndex].append(p) # Set our biggest_shift to zero for this iteration biggest_shift = 0.0 # For each cluster ... for i in range(clusterCount): # Calculate how far the centroid moved in this iteration shift = clusters[i].update(lists[i]) # Keep track of the largest move from all cluster centroid updates biggest_shift = max(biggest_shift, shift) # If the centroids have stopped moving much, say we're done! if biggest_shift < cutoff: print ("Converged after %s iterations" % loopCounter) break return clusters def getEuclidDistance(a, b): ''' Euclidean distance between two n-dimensional points. https://en.wikipedia.org/wiki/Euclidean_distance#n_dimensions Note: This can be very slow and does not scale well ''' if a.n != b.n: raise Exception("ERROR: non comparable points") for i in range(a.n): squareDifference += (a.coords[i]-b.coords[i]) ** 2 return math.sqrt(accumulatedDifference) def getManhattanDistance(a, b): ''' Manhattan distance between 2 dimensional points ''' if a.n != b.n: raise Exception("ERROR: non comparable points") for i in range(a.n): total += abs(a.coords[i] - a.coords[i+1]) + abs(b.coords[i] - b.coords[i+1]) return total def plotClusters(data): ''' This uses the plotly offline mode to create a local HTML file. This should open your default web browser. ''' # Convert data into plotly format. traceList = [] for i, c in enumerate(data): # Get a list of x,y coordinates for the points in this cluster. cluster_data = [] for point in c.points: cluster_data.append(point.coords) trace = {} centroid = {} ## if dimensions == 2: # Convert our list of x,y's into an x list and a y list. trace['x'], trace['y'] = zip(*cluster_data) trace['mode'] = 'markers' trace['marker'] = {} trace['marker']['symbol'] = i trace['marker']['size'] = 12 trace['name'] = "Cluster " + str(i) traceList.append(Scatter(**trace)) # Centroid (A trace of length 1) centroid['x'] = [c.centroid.coords[0]] centroid['y'] = [c.centroid.coords[1]] centroid['mode'] = 'markers' centroid['marker'] = {} centroid['marker']['symbol'] = i centroid['marker']['color'] = 'rgb(200,10,10)' centroid['name'] = "Centroid " + str(i) traceList.append(Scatter(**centroid)) ## else: ## symbols = [ ## "circle", ## "square", ## "diamond", ## "circle-open", ## "square-open", ## "diamond-open", ## "cross", "x" ## ] ## symbol_count = len(symbols) ## if i > symbol_count: ## print ("Warning: Not enough marker symbols to go around") ## # Convert our list of x,y,z's separate lists. ## trace['x'], trace['y'], trace['z'] = zip(*cluster_data) ## trace['mode'] = 'markers' ## trace['marker'] = {} ## trace['marker']['symbol'] = symbols[i] ## trace['marker']['size'] = 12 ## trace['name'] = "Cluster " + str(i) ## traceList.append(Scatter3d(**trace)) ## # Centroid (A trace of length 1) ## centroid['x'] = [c.centroid.coords[0]] ## centroid['y'] = [c.centroid.coords[1]] ## centroid['z'] = [c.centroid.coords[2]] ## centroid['mode'] = 'markers' ## centroid['marker'] = {} ## centroid['marker']['symbol'] = symbols[i] ## centroid['marker']['color'] = 'rgb(200,10,10)' ## centroid['name'] = "Centroid " + str(i) ## traceList.append(Scatter3d(**centroid)) title = "K-means clustering with %s clusters" % str(len(data)) plotly.offline.plot({ "data": traceList, "layout": Layout(title=title) }) if __name__ == "main()": main()