compare algorithms - Printable Version

compare algorithms - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: General Coding Help (https://python-forum.io/forum-8.html)
+--- Thread: compare algorithms (/thread-8319.html)

compare algorithms - tygaf - Feb-14-2018

Hello ,
I want to write a python script that would compare the three algorithms with different parameters.
For the script I don't have to modify the existing clustering, make a separate instance call the clustering functions from there.
Any help would be appreciated.
import arcpy, os, math
import datetime, time, json
from arcpy import env

class Toolbox(object):
def __init__(self):
"""Define the toolbox (the name of the toolbox is the name of the
.pyt file)."""
self.label = "Clustering"
self.alias = "clr"
print("clr")
# List of tool classes associated with this toolbox
self.tools = [CMPM, UpdateCMPM, LocationAllocation]

########################################################################################################################
########################################################################################################################
# Clustering (Arslan) Cost Matrix Penalty Matrix Threshold (without threshold)
########################################################################################################################
########################################################################################################################

class CMPM(object):
def __init__(self):
"""Define the tool (tool name is the name of the class)."""
self.label = "CMPM"
self.description = "Cost Matrix Penalty Matrix (CMPM) Clustering"
self.canRunInBackground = False

def getParameterInfo(self):
"""Define parameter definitions"""

# Network dataset to work with
network = arcpy.Parameter(
displayName="Network dataset",
name="network",
datatype="DENetworkDataset",
parameterType="Required",
direction="Input")

# Nodes to be clustered
nodes = arcpy.Parameter(
displayName="Nodes",
name="nodes",
datatype="DEFeatureClass",
parameterType="Required",
direction="Input")

nodes.filter.list = ["Point"]

# Facilities
facilities_in = arcpy.Parameter(
displayName = "Facilities",
name="fac",
datatype="String",
parameterType="Required",
direction="Input")

facilities_in.filter.type = "ValueList"
facilities_in.filter.list = ['Nodes', 'Intersections']

# Splitting ratio
sr = arcpy.Parameter(
displayName="Splitting ratio",
name="SR",
datatype="Double",
parameterType="Required",
direction="Input")

# Geography mode
geo_mode = arcpy.Parameter(
displayName="Geography mode",
name="gm",
datatype="String",
parameterType="Required",
direction="Input")

geo_mode.filter.type = "ValueList"
geo_mode.filter.list = ['OD Cost Matrix', 'Closest Facility']

# Cost metric
cost_met = arcpy.Parameter(
displayName="Penalty metric",
name="cm",
datatype="String",
parameterType="Required",
direction="Input")

cost_met.filter.type = "ValueList"
cost_met.filter.list = ['Accumulated length', 'Maximum length']

# Cost direction
cost_dir = arcpy.Parameter(
displayName = "Penalty matrix sorting direction",
name="cd",
datatype="String",
parameterType="Required",
direction="Input")

cost_dir.filter.type = "ValueList"
cost_dir.filter.list = ['Ascending', 'Descending']

# Intersection points
int_points = arcpy.Parameter(
displayName = "Intersection points",
name="int",
datatype="DEFeatureClass",
parameterType="Required",
direction="Input")

int_points.filter.list = ["Point"]

# Run Identifier
rid = arcpy.Parameter(
displayName = "Run Identifier",
name="rid",
datatype="String",
parameterType="Optional",
direction="Input")

# Output penalty matrix, metrics depends on the input: OD - euclidian, Closest facility - network
clusters = arcpy.Parameter(
displayName = "Output Features",
name = "out_features",
datatype = "Table",
parameterType="Derived",
direction="Output")

params = [network, nodes, facilities_in, geo_mode, sr, cost_met, cost_dir, int_points, rid,
clusters]
return params

def isLicensed(self):
"""Set whether tool is licensed to execute."""
return True

def updateParameters(self, parameters):
"""Modify the values and properties of parameters before internal
validation is performed. This method is called whenever a parameter
has been changed."""
return

def updateMessages(self, parameters):
"""Modify the messages created by internal validation for each tool
parameter. This method is called after internal validation."""
return

def execute(self, parameters, messages):
# Acknowledgements
msg = 'This toolbox was developed by the Chair of Communication Networks, Technical University of Munich, Germany. ' \
'More information under https://www.lkn.ei.tum.de/en/home.html ' \
'This work has been funded by the German Research Foundation (DFG) ' \
'under grant numbers MA6529/2-1 and KE1863/4-1.'
arcpy.AddMessage(msg)

# Check out the Network Analyst extension license
arcpy.CheckOutExtension("Network")
arcpy.management.Delete('in_memory/')
# Set overwriting out the files to TRUE
arcpy.overwriteOutput = 1

# Get input network dataset
network = parameters[0].value # Network dataset

# Set environment
# Check the input database
descript = arcpy.Describe(network)
path_test = descript.catalogPath
index = path_test.find('gdb') # gives the index of the first symbol in 'gdb'
gdb_path = path_test[:(index+3)]
arcpy.env.workspace = os.path.join(gdb_path) # Setting the database, where the output table will be created
output_dir = str(gdb_path)

# Get all the input parameters
nodes_in = parameters[1].value # BS locations
nodes = arcpy.CopyFeatures_management(nodes_in, 'in_memory/nodes')
arcpy.AddXY_management(nodes) # Add coordinates to the nodes
facilities_in = parameters[2].value # Which nodes to use as prospective facility, i.e., PS, location
geo_mode = parameters[3].value # Which optimization to use
sr = parameters[4].value # Splitting ratio
# Penalty matrix sorting parameters
cost_met = parameters[5].value # max or accumulated length
cost_dir = parameters[6].value # ascending or descending
int_points_in = parameters[7].value # Intersection points
int_points = arcpy.CopyFeatures_management(int_points_in, 'in_memory/int_points')
arcpy.AddXY_management(int_points) # Add coordinates to the intersection points
# Optional
rid = parameters[8].value # Run Identifier: used only for differentiating the runs, ie´.e.,
# naming of the files

numb = int(arcpy.GetCount_management(nodes).getOutput(0)) # number of RSUs(BSs)

arcpy.AddMessage('Your output location is '+ output_dir)

arcpy.AddMessage('Calculating cost matrix.')

if geo_mode == 'OD Cost Matrix':
###########################################################################################################
# Get the cost matrix: OD
###########################################################################################################

# Set local variables
layer_name = "ODcostMatrix"
impedance = "Length"
output_layer_file = os.path.join(output_dir, layer_name + ".lyrx")

result_object = arcpy.na.MakeODCostMatrixLayer(network, layer_name, impedance)

# Get the layer object from the result object. The OD cost matrix layer can
# now be referenced using the layer object.
layer_object = result_object.getOutput(0)

# Get the names of all the sublayers within the OD cost matrix layer.
sublayer_names = arcpy.na.GetNAClassNames(layer_object)

# Stores the layer names that we will use later
origins_layer_name = sublayer_names["Origins"]
destinations_layer_name = sublayer_names["Destinations"]
lines_layer_name = sublayer_names["ODLines"]

# Load the BS locations (nodes) as both origins and destinations.
if facilities_in == 'Nodes':
arcpy.na.AddLocations(layer_object, origins_layer_name, nodes)
else:
arcpy.na.AddLocations(layer_object, origins_layer_name, int_points)

arcpy.na.AddLocations(layer_object, destinations_layer_name, nodes)

# Solve the OD cost matrix layer
arcpy.na.Solve(layer_object)

# Save the solved OD cost matrix layer as a layer file on disk
layer_object.saveACopy(output_layer_file)

# Get the Lines Sublayer (all the distances)
lines_sublayer = arcpy.mapping.ListLayers(layer_object, lines_layer_name)[0]
layer_out_path = os.path.join('in_memory', lines_layer_name)

if facilities_in != 'Nodes':
# Add xy coordinates to facilities.
facilities = arcpy.mapping.ListLayers(layer_object, origins_layer_name)[0]
facilities_out_path = os.path.join('in_memory', origins_layer_name)
# arcpy.management.CopyFeatures(facilities, facilities_out_path)
facilities_point = arcpy.FeatureToPoint_management(facilities, facilities_out_path, "CENTROID")
facilities_in = 'in_memory/' + origins_layer_name
facilities_xy = arcpy.AddXY_management(facilities_in)

arcpy.management.CopyFeatures(lines_sublayer, layer_out_path)
lines = 'in_memory/Lines'

else:
###########################################################################################################
# Get the cost matrix: Closest facility
###########################################################################################################
# Set local variables
layer_name = "ClosestFacility"
impedance = "Length"

output_layer_file = os.path.join(output_dir, layer_name + ".lyrx")

# MakeClosestFacilityLayer_na (in_network_dataset, out_network_analysis_layer, impedance_attribute,
# {travel_from_to}, {default_cutoff}, {default_number_facilities_to_find}, {accumulate_attribute_name},
# {UTurn_policy}, {restriction_attribute_name}, {hierarchy}, {hierarchy_settings}, {output_path_shape},
# {time_of_day}, {time_of_day_usage})
#
# http://desktop.arcgis.com/en/arcmap/10.3/tools/network-analyst-toolbox/make-closest-facility-layer.htm
result_object = arcpy.na.MakeClosestFacilityLayer(network, layer_name, impedance,'TRAVEL_TO', None,
numb, '', '', '', '', '', 'TRUE_LINES_WITH_MEASURES')

# Get the layer object from the result object. The Closest facility layer can
# now be referenced using the layer object.
layer_object = result_object.getOutput(0)

# Get the names of all the sublayers within the Closest facility layer.
sublayer_names = arcpy.na.GetNAClassNames(layer_object)

# Stores the layer names that we will use later
origins_layer_name = sublayer_names["Incidents"] # as origins in OD cost matrix
destinations_layer_name = sublayer_names["Facilities"] # as destinations in OD cost matrix
lines_layer_name = sublayer_names["CFRoutes"] # as lines in OD cost matrix

# Load the BS locations (nodes) as both origins and destinations.
if facilities_in == 'Nodes':
arcpy.na.AddLocations(layer_object, origins_layer_name, nodes)
else:
arcpy.na.AddLocations(layer_object, origins_layer_name, int_points)

arcpy.na.AddLocations(layer_object, destinations_layer_name, nodes)

# Solve the Closest facility layer
arcpy.na.Solve(layer_object)

# Save the solved Closest facility layer as a layer file on disk
layer_object.saveACopy(output_layer_file)

if facilities_in != 'Nodes':
# Add xy coordinates to facilities.
facilities = arcpy.mapping.ListLayers(layer_object, destinations_layer_name)[0]
facilities_out_path = os.path.join('in_memory', destinations_layer_name)
# arcpy.management.CopyFeatures(facilities, facilities_out_path)
facilities_point = arcpy.FeatureToPoint_management(facilities, facilities_out_path, "CENTROID")
facilities_in = 'in_memory/' + destinations_layer_name
facilities_xy = arcpy.AddXY_management(facilities_in)

# Get the Lines Sublayer (all the distances)
lines_sublayer = arcpy.mapping.ListLayers(layer_object, lines_layer_name)[0]
layer_out_path = os.path.join('in_memory', lines_layer_name)
lines = 'in_memory/'+ lines_layer_name

arcpy.management.CopyFeatures(lines_sublayer, layer_out_path)

################################################################################################################
# Gathering the data for the penalty matrix
################################################################################################################
leng = int(arcpy.GetCount_management(lines_sublayer).getOutput(0)) # Number of paths from every BS to every BS

if sr < numb:
thr = int(sr)
else:
thr = int(numb)

# Convert an attribute table into python dictionary
# http://gis.stackexchange.com/questions/54804/fastest-methods-for-modifying-attribute-tables-with-python
def make_attribute_dict(fc, key_field, attr_list=['*']):

attdict = {}

# define an empty dictionary
fc_field_objects = arcpy.ListFields(fc)
fc_fields = [field.name for field in fc_field_objects if field.type != 'Geometry']

# checking input parameters
if attr_list == ['*']:
valid_fields = fc_fields
else:
valid_fields = [field for field in attr_list if field in fc_fields]

# Ensure that key_field is always the first field in the field list
cursor_fields = [key_field] + list(set(valid_fields) - set([key_field]))

# step through the table
with arcpy.da.SearchCursor(fc, cursor_fields) as cursor:
for row in cursor:
attdict[row[0]] = dict(zip(cursor.fields, row))
return attdict

# Convert attribute table of lines from optimization to python nested dict
if geo_mode == 'OD Cost Matrix':
cost = make_attribute_dict(lines, "ObjectID", ["OriginID", 'DestinationID', 'DestinationRank',
'Total_Length'])
cost_keys = ["ObjectID", "OriginID", 'DestinationID', 'DestinationRank', 'Total_Length']
else:
cost = make_attribute_dict(lines, "ObjectID", ["IncidentID", 'FacilityID', 'FacilityRank',
'Total_Length'])
cost_keys = ["ObjectID", "IncidentID", 'FacilityID', 'FacilityRank', 'Total_Length']

# Convert nodes to python nested dictionary to get the coordinates for the clustering
fields = arcpy.ListFields(nodes)

for field in fields:
if field.name == "ObjectID":
nodes_d = make_attribute_dict(nodes, "ObjectID", ['POINT_X', 'POINT_Y'])
elif field.name == 'OID':
nodes_d = make_attribute_dict(nodes, 'OID', ['POINT_X', 'POINT_Y'])
elif field.name == "OBJECTID":
nodes_d = make_attribute_dict(nodes, 'OBJECTID', ['POINT_X', 'POINT_Y'])

if facilities_in != 'Nodes':
fields = arcpy.ListFields(facilities_xy)
for field in fields:
if field.name == "ObjectID":
facilities_d = make_attribute_dict(facilities_xy, "ObjectID", ['POINT_X', 'POINT_Y'])
elif field.name == 'OID':
facilities_d = make_attribute_dict(facilities_xy, "OID", ['POINT_X', 'POINT_Y'])
elif field.name == "OBJECTID":
facilities_d = make_attribute_dict(facilities_xy, "OBJECTID", ['POINT_X', 'POINT_Y'])

def penalty_update(numb_in, thr_in, cost_matrix, keys, count_in=0):
arcpy.AddMessage('Calculating Penalty Matrix.')
penalty = {} # empty penalty dictionary
length = len(cost_matrix.keys()) # Number of entries in cost matrix
if count_in != 0:
co = count_in+1
else:
co = numb_in+1

# Construct penalty dictionary
for h in range(1, co):
if count_in == 0:
# index to iterate through cost, as we need not all the entries, we make a custom iteration
g = numb_in*(h-1) + 1
if g < length:
penalty[h] = {}
penalty[h]['max_cost'] = 0
penalty[h]['accum_cost'] = 0
# Calculate accumulated cost = sum of indiv. total lengths
n = 0
while n < thr_in:
if g <= length:
penalty[h]['accum_cost'] += cost_matrix[g][keys[4]]
if penalty[h]['max_cost'] < cost_matrix[g][keys[4]]:
penalty[h]['max_cost'] = cost_matrix[g][keys[4]]
n += 1
if g < length:
g += 1
if g > numb_in*(h-1)+numb_in:
break

elif count_in != 0:
# index to iterate through cost, as we need not all the entries, we make a custom iteration
g = numb_in*(h-1) + 1
if g < length:
penalty[h] = {}
penalty[h]['max_cost'] = 0
penalty[h]['accum_cost'] = 0
# Calculate accumulated cost = sum of indiv. total lengths
n = 0
while n < thr_in:
if g <= length:
penalty[h]['accum_cost'] += cost_matrix[g][keys[4]]
if penalty[h]['max_cost'] < cost_matrix[g][keys[4]]:
penalty[h]['max_cost'] = cost_matrix[g][keys[4]]
n += 1
if g < length:
g += 1
if g > numb_in*(h-1)+numb_in:
break
else:
break

# Sort penalty matrix
acc_list = [] # List for total (accumulated cost)
max_list = [] # Maximum cost (length)

# Make a sortable structure out of a penalty dict.
for key_i in penalty.keys(): # for all the base stations
acc_list.append((key_i, penalty[key_i]['accum_cost']))
max_list.append((key_i, penalty[key_i]['max_cost']))

# Sort the values by length; sorting depends on user input
if cost_met == 'Accumulated length':
if cost_dir == 'Ascending':
sort_in = sorted(acc_list, key=lambda x: x[1])
else:
sort_in = sorted(acc_list, key=lambda x: x[1], reverse=True)
else:
if cost_dir == 'Ascending':
sort_in = sorted(max_list, key=lambda x: x[1])
else:
sort_in = sorted(max_list, key=lambda x: x[1], reverse=True)

return sort_in

################################################################################################################
# Clustering
################################################################################################################
arcpy.AddMessage('Performing clustering.')

# Define a list of non-available nodes for clustering
flags = set()
# Define clustering dictionary
clustering = {}

if facilities_in == 'Nodes':
count = numb
index = 0
else:
count = len(facilities_d)
index = len(facilities_d)

j = 1 # iterator through the cost matrix
cl = 1 # counter fr the cluster

sort = penalty_update(numb, thr, cost, cost_keys, index) # calculate penalty matrix

for i in range(0, count): # for all the nodes
k = 0 # counter for number of cluster members
if j < leng:
if len(flags) != numb:
node = sort[i][0]
#Nodes
if facilities_in == 'Nodes':
if node not in flags:
clustering[cl] = {}
clustering[cl]['tot_cost'] = 0
# We need to traverse the cost matrix. However we start not from the first element. As the matrix
# structure is regular we can calculate the starting index for our array to assign the (X,Y)
# to the seed master. E.g., we have three elements [1, 2, 3] then we have 111222333 structure, where
# 3 starts from position 7 number of nodes is 3, thus 3*(3-1)+1 = 7. for 1 it is 3*(1-1)+1 = 1 and
# for 2 is 3*(2-1)+1 = 4
j = len(nodes_d)*(node-1) + 1

while k < thr:
# now we need to populate the cluster dict with non-clustered yet members
if cost[j][cost_keys[2]] not in flags:
member_id = cost[j][cost_keys[2]]
clustering[cl][member_id] = {}
clustering[cl][member_id]['ind'] = j
clustering[cl][member_id]['XY'] = (nodes_d[member_id]['POINT_X'],
nodes_d[member_id]['POINT_Y'])
clustering[cl][member_id]['cost'] = cost[j][cost_keys[4]]
clustering[cl]['tot_cost'] += cost[j][cost_keys[4]]
flags.add(member_id)
k += 1
if j < leng:
j += 1
else:
break
else:
if j < leng:
j += 1
if len(flags) == numb:
break
cl += 1

# Intersections
else:
clustering[cl] = {}
clustering[cl]['tot_cost'] = 0
# We need to traverse the cost matrix. However we start not from the first element. As the matrix
# structure is regular we can calculate the starting index for our array to assign the (X,Y)
# to the seed master. E.g., we have three elements [1, 2, 3] then we have 111222333 structure, where
# 3 starts from position 7 number of nodes is 3, thus 3*(3-1)+1 = 7. for 1 it is 3*(1-1)+1 = 1 and
# for 2 is 3*(2-1)+1 = 4
clustering[cl]['Cluster_head'] = (facilities_d[sort[0][0]]['POINT_X'],
facilities_d[sort[0][0]]['POINT_Y'])
j = len(nodes_d)*(node-1) + 1
while k < thr:
# now we need to populate the cluster dict with non-clustered yet members
if cost[j][cost_keys[2]] not in flags:
# Only if the cost threshold was provided by the user
member_id = cost[j][cost_keys[2]]
clustering[cl][member_id] = {}
clustering[cl][member_id]['ind'] = j
clustering[cl][member_id]['XY'] = (nodes_d[member_id]['POINT_X'],
nodes_d[member_id]['POINT_Y'])
clustering[cl][member_id]['cost'] = cost[j][cost_keys[4]]
clustering[cl]['tot_cost'] += cost[j][cost_keys[4]]
flags.add(member_id)
k += 1

if j < leng:
j += 1
if k == thr:
cl+=1
else:
break
else:
if j < leng:
j += 1
if len(flags) == numb:
break
else:
break

arcpy.AddMessage('Writing results.')
if facilities_in != 'Nodes':

point = arcpy.Point()

# Get the spatial reference from the input so that the results can be mapped to the original data
desc = arcpy.Describe(nodes)
spatial_ref = desc.spatialReference

# Write clusters to a point file
for key in clustering.keys():
# vary cluster name
if len(clustering[key]) > 2:
if rid and rid != "#":
if cost_met == 'Accumulated length':
if cost_dir == 'Ascending':
cluster = 'CMPM_Cluster_' + str(key) + '_Intersections_'+'sr' + str(int(sr))\
+ '_acc_asc_' + str(rid)
cluster_head = 'CMPM_Cluster_head_' + str(key) + '_Intersections_' + 'sr' \
+ str(int(sr))+'_acc_asc_' + str(rid)
else:
cluster = 'CMPM_Cluster_' + str(key) + '_Intersections_'+'sr' + str(int(sr))\
+ '_acc_des'+str(rid)
cluster_head = 'CMPM_Cluster_head_' + str(key) + '_Intersections_'+'sr' + \
str(int(sr)) + '_acc_des_' + str(rid)
else:
if cost_dir == 'Ascending':
cluster = 'CMPM_Cluster_' + str(key) + '_Intersections_'+'sr' + \
str(int(sr)) + '_max_asc_' + str(rid)
cluster_head = 'CMPM_Cluster_head_' + str(key) + '_Intersections_'+'sr' + \
str(int(sr)) + '_max_asc_' + str(rid)
else:
cluster = 'CMPM_Cluster_' + str(key) + '_Intersections_'+'sr' + \
str(int(sr)) + '_max_des_' + str(rid)
cluster_head = 'CMPM_Cluster_head_' + str(key) + '_Intersections_'+'sr' + \
str(int(sr)) + '_max_des_' + str(rid)

else:
if cost_met == 'Accumulated length':
if cost_dir == 'Ascending':
cluster = 'CMPM_Cluster_' + str(key) + '_Intersections_'+'sr' + str(int(sr))\
+ '_acc_asc'
cluster_head = 'CMPM_Cluster_head_' + str(key) + '_Intersections_' + 'sr' \
+ str(int(sr))+'_acc_asc'
else:
cluster = 'CMPM_Cluster_' + str(key) + '_Intersections_'+'sr' + str(int(sr))\
+ '_acc_des'
cluster_head = 'CMPM_Cluster_head_' + str(key) + '_Intersections_'+'sr' + \
str(int(sr)) + '_acc_des'
else:
if cost_dir == 'Ascending':
cluster = 'CMPM_Cluster_' + str(key) + '_Intersections_'+'sr' + \
str(int(sr)) + '_max_asc'
cluster_head = 'CMPM_Cluster_head_' + str(key) + '_Intersections_'+'sr' + \
str(int(sr)) + '_max_asc'
else:
cluster = 'CMPM_Cluster_' + str(key) + '_Intersections_'+'sr' + \
str(int(sr)) + '_max_des'
cluster_head = 'CMPM_Cluster_head_' + str(key) + '_Intersections_'+'sr' + \
str(int(sr)) + '_max_des'

out_cluster = os.path.join(output_dir, cluster)
out_cluster_head = os.path.join(output_dir, cluster_head)

point.X = clustering[key]['Cluster_head'][0]
point.Y = clustering[key]['Cluster_head'][1]
point_geometry = arcpy.PointGeometry(point, spatial_ref)
arcpy.CopyFeatures_management(point_geometry, out_cluster_head)

# Do it for the cluster members
# It has to be greater than two as we always have total cost and coordinates
if len(clustering[key].keys()) > 2:
# A list to hold the PointGeometry objects
point_list = []
for k in clustering[key].keys():
if k != 'tot_cost' and k != 'Cluster_head':
point.X = clustering[key][k]['XY'][0]
point.Y = clustering[key][k]['XY'][1]
point_geometry = arcpy.PointGeometry(point, spatial_ref)
# Aggregate cluster members
point_list.append(point_geometry)
# Write cluster members to a file and define coordinate system
arcpy.CopyFeatures_management(point_list, out_cluster)
else:

# Create an empty Point object
point = arcpy.Point()

# Get the spatial reference from the input so that the results can be mapped to the original data
desc = arcpy.Describe(nodes)
spatial_ref = desc.spatialReference

# Write clusters to a point file
for key in clustering.keys():
# vary cluster name
if rid and rid != "#":
if cost_met == 'Accumulated length':
if cost_dir == 'Ascending':
cluster = 'CMPM_Cluster_' + str(key) + '_Nodes_'+'sr' + str(int(sr)) \
+ '_acc_asc_' + str(rid)
else:
cluster = 'CMPM_Cluster_' + str(key) + '_Nodes_'+'sr' + str(int(sr)) \
+ '_acc_des_'+str(rid)
else:
if cost_dir == 'Ascending':
cluster = 'CMPM_Cluster_' + str(key) + '_Nodes_'+'sr' + \
str(int(sr)) + '_max_asc_' + str(rid)
else:
cluster = 'CMPM_Cluster_' + str(key) + '_Nodes_'+'sr' + \
str(int(sr)) + '_max_des_' + str(rid)

else:
if cost_met == 'Accumulated length':
if cost_dir == 'Ascending':
cluster = 'CMPM_Cluster_' + str(key) + '_Nodes_'+'sr' + str(int(sr)) \
+ '_acc_asc'
else:
cluster = 'CMPM_Cluster_' + str(key) + '_Nodes_'+'sr' + str(int(sr)) \
+ '_acc_des'
else:
if cost_dir == 'Ascending':
cluster = 'CMPM_Cluster_' + str(key) + '_Nodes_'+'sr' + \
str(int(sr)) + '_max_asc'
else:
cluster = 'CMPM_Cluster_' + str(key) + '_Nodes_'+'sr' + \
str(int(sr)) + '_max_des'

out_cluster = os.path.join(output_dir, cluster)

# It has to be greater than one as we always have total cost
if len(clustering[key].keys()) > 1:
# A list to hold the PointGeometry objects
point_list = []
for k in clustering[key].keys():
if k != 'tot_cost':
point.X = clustering[key][k]['XY'][0]
point.Y = clustering[key][k]['XY'][1]
point_geometry = arcpy.PointGeometry(point, spatial_ref)

# Aggregate cluster members
point_list.append(point_geometry)

# Write cluster members to a file and define coordinate system
arcpy.CopyFeatures_management(point_list, out_cluster)

# Find centroid of the cluster
# First project the data into meters
name_proj = 'Proj_'+cluster
output_features_class = os.path.join(output_dir, name_proj)
#out_coordinate_system = arcpy.SpatialReference(spatial_ref)
arcpy.Project_management(out_cluster, output_features_class, spatial_ref)

# Use mean center tool to find the centroid TODO median center
cluster_head = 'Proj_Cluster_head_'+ str(key)
mean_output = os.path.join(output_dir, cluster_head)
arcpy.MeanCenter_stats(output_features_class, mean_output, "#", "#", "#")

arcpy.management.Delete(name_proj)

# Project cluster head to original projection
name_proj = 'Prel_Cluster_head_'+ str(key)
output_features_class = os.path.join(output_dir, name_proj)
arcpy.Project_management(mean_output, output_features_class, spatial_ref)

# Create near table to closest Intersection node
in_features = output_features_class
near_features = int_points
out_table = 'Cluster_head_Near_Table'+str(key)
location = 'LOCATION'
angle = 'ANGLE'
arcpy.GenerateNearTable_analysis(in_features, near_features, out_table, '', location, angle, '', '')

# Plot near_X and near_Y
out_name = 'Cluster_head'
path_output = os.path.join(output_dir, out_name)
head = arcpy.MakeXYEventLayer_management(out_table, "NEAR_X", "NEAR_Y", path_output, spatial_ref, '')
if rid and rid != "#":
if cost_met == 'Accumulated length':
if cost_dir == 'Ascending':
out_name = 'CMPM_Cluster_head_' + str(key) + '_Nodes_'+'sr' + str(int(sr)) \
+ '_acc_asc_' + str(rid)
else:
out_name = 'CMPM_Cluster_head_' + str(key) + '_Nodes_'+'sr' + str(int(sr)) \
+ '_acc_des_'+str(rid)
else:
if cost_dir == 'Ascending':
out_name = 'CMPM_Cluster_head_' + str(key) + '_Nodes_'+'sr' + \
str(int(sr)) + '_max_asc_' + str(rid)
else:
out_name = 'CMPM_Cluster_head_' + str(key) + '_Nodes_'+'sr' + \
str(int(sr)) + '_max_des_' + str(rid)

else:
if cost_met == 'Accumulated length':
if cost_dir == 'Ascending':
out_name = 'CMPM_Cluster_head_' + str(key) + '_Nodes_'+'sr' + str(int(sr)) \
+ '_acc_asc'
else:
out_name = 'CMPM_Cluster_head_' + str(key) + '_Nodes_'+'sr' + str(int(sr)) \
+ '_acc_des'
else:
if cost_dir == 'Ascending':
out_name = 'CMPM_Cluster_head_' + str(key) + '_Nodes_'+'sr' + \
str(int(sr)) + '_max_asc'
else:
out_name = 'CMPM_Cluster_head_' + str(key) + '_Nodes_'+'sr' + \
str(int(sr)) + '_max_des'

path_output = os.path.join(output_dir, out_name)
arcpy.CopyFeatures_management(head, path_output)
arcpy.management.Delete(out_table)
arcpy.management.Delete(name_proj)
arcpy.management.Delete(cluster_head)
return

########################################################################################################################
########################################################################################################################
# CMPM Clustering with penalty matrix update (Elena)
########################################################################################################################
########################################################################################################################

class UpdateCMPM(object):
def __init__(self):
"""Define the tool (tool name is the name of the class)."""
self.label = "UpdateCMPM"
self.description = "CMPM Clustering with penalty matrix update"
self.canRunInBackground = False

def getParameterInfo(self):
"""Define parameter definitions"""

# Network dataset to work with
network = arcpy.Parameter(
displayName="Network dataset",
name="network",
datatype="DENetworkDataset",
parameterType="Required",
direction="Input")

# Nodes to be clustered
nodes = arcpy.Parameter(
displayName="Nodes",
name="nodes",
datatype="DEFeatureClass",
parameterType="Required",
direction="Input")

# Facilities
facilities_in = arcpy.Parameter(
displayName = "Facilities",
name="fac",
datatype="String",
parameterType="Required",
direction="Input")

facilities_in.filter.type = "ValueList"
facilities_in.filter.list = ['Nodes', 'Intersections']

# Splitting ratio
sr = arcpy.Parameter(
displayName="Splitting ratio",
name="SR",
datatype="Double",
parameterType="Required",
direction="Input")

# Geography mode
geo_mode = arcpy.Parameter(
displayName="Geography mode",
name="gm",
datatype="String",
parameterType="Required",
direction="Input")

geo_mode.filter.type = "ValueList"
geo_mode.filter.list = ['OD Cost Matrix', 'Closest Facility']

# Cost metric
cost_met = arcpy.Parameter(
displayName="Penalty metric",
name="cm",
datatype="String",
parameterType="Required",
direction="Input")

cost_met.filter.type = "ValueList"
cost_met.filter.list = ['Accumulated length', 'Maximum length']

# Cost direction
cost_dir = arcpy.Parameter(
displayName = "Penalty matrix sorting direction",
name="cd",
datatype="String",
parameterType="Required",
direction="Input")

cost_dir.filter.type = "ValueList"
cost_dir.filter.list = ['Ascending', 'Descending']

# Intersection points
int_points = arcpy.Parameter(
displayName = "Intersection points",
name="int",
datatype="DEFeatureClass",
parameterType="Required",
direction="Input")

# Run Identifier
rid = arcpy.Parameter(
displayName = "Run Identifier",
name="rid",
datatype="String",
parameterType="Optional",
direction="Input")

# Output penalty matrix, metrics depends on the input: OD - euclidian, Closest facility - network
clusters = arcpy.Parameter(
displayName = "Output Features",
name = "out_features",
datatype = "Table",
parameterType="Derived",
direction="Output")

params = [network, nodes, facilities_in, geo_mode, sr, cost_met, cost_dir, int_points, rid, clusters]
return params

def isLicensed(self):
"""Set whether tool is licensed to execute."""
return True

def updateParameters(self, parameters):
"""Modify the values and properties of parameters before internal
validation is performed. This method is called whenever a parameter
has been changed."""
return

def updateMessages(self, parameters):
"""Modify the messages created by internal validation for each tool
parameter. This method is called after internal validation."""
return

def execute(self, parameters, messages):
# Acknowledgements
msg = 'This toolbox was developed by the Chair of Communication Networks, Technical University of Munich, Germany. ' \
'More information under https://www.lkn.ei.tum.de/en/home.html ' \
'This work has been funded by the German Research Foundation (DFG) ' \
'under grant numbers MA6529/2-1 and KE1863/4-1.'
arcpy.AddMessage(msg)

# Check out the Network Analyst extension license
arcpy.CheckOutExtension("Network")
arcpy.management.Delete('in_memory/')

# Set overwriting out the files to TRUE
arcpy.overwriteOutput = 1

# Get input network dataset
network = parameters[0].value # Network dataset

# Set environment
# Check the input database
descript = arcpy.Describe(network)
path_test = descript.catalogPath
index = path_test.find('gdb') # gives the index of the first symbol in 'gdb'
gdb_path = path_test[:(index+3)]
arcpy.env.workspace = os.path.join(gdb_path) # Setting the database, where the output table will be created
# out_path = arcpy.env.workspace

output_dir = str(gdb_path)

# Ge all the input parameters
nodes_in = parameters[1].value # BS locations
nodes = arcpy.CopyFeatures_management(nodes_in, 'in_memory/nodes')
arcpy.AddXY_management(nodes)
facilities_in = parameters[2].value # Facility locations choice: nodes or intersections
geo_mode = parameters[3].value # Which optimization to use
sr = parameters[4].value # Splitting ratio
# Penalty matrix sorting parameters
cost_met = parameters[5].value # max or accumulated length
cost_dir = parameters[6].value # ascending or descending
int_points_in = parameters[7].value # Intersection points
int_points = arcpy.CopyFeatures_management(int_points_in, 'in_memory/int_points')
arcpy.AddXY_management(int_points)
# Optional
rid = parameters[8].value # Run Identifier

arcpy.AddMessage('Your output location is '+ output_dir)

arcpy.AddMessage('Calculating cost matrix.')

numb = int(arcpy.GetCount_management(nodes).getOutput(0)) # number of RSUs(BSs)

if geo_mode == 'OD Cost Matrix':
###########################################################################################################
# Get the cost matrix: OD
###########################################################################################################

# Set local variables
layer_name = "ODcostMatrix"
impedance = "Length"
output_layer_file = os.path.join(output_dir, layer_name + ".lyrx")

result_object = arcpy.na.MakeODCostMatrixLayer(network, layer_name, impedance)

# Get the layer object from the result object. The OD cost matrix layer can
# now be referenced using the layer object.
layer_object = result_object.getOutput(0)

# Get the names of all the sublayers within the OD cost matrix layer.
sublayer_names = arcpy.na.GetNAClassNames(layer_object)

# Stores the layer names that we will use later
origins_layer_name = sublayer_names["Origins"]
destinations_layer_name = sublayer_names["Destinations"]
lines_layer_name = sublayer_names["ODLines"]

# Load the BS locations (nodes) as both origins and destinations.
if facilities_in == 'Nodes':
arcpy.na.AddLocations(layer_object, origins_layer_name, nodes)
else:
arcpy.na.AddLocations(layer_object, origins_layer_name, int_points)

arcpy.na.AddLocations(layer_object, destinations_layer_name, nodes)

# Solve the OD cost matrix layer
arcpy.na.Solve(layer_object)

# Save the solved OD cost matrix layer as a layer file on disk
layer_object.saveACopy(output_layer_file)

# Get the Lines Sublayer (all the distances)
lines_sublayer = arcpy.mapping.ListLayers(layer_object, lines_layer_name)[0]
layer_out_path = os.path.join('in_memory',lines_layer_name)

if facilities_in != 'Nodes':
# Add xy coordinates to facilities.
facilities = arcpy.mapping.ListLayers(layer_object, origins_layer_name)[0]
facilities_out_path = os.path.join('in_memory', origins_layer_name)
# arcpy.management.CopyFeatures(facilities, facilities_out_path)
facilities_point = arcpy.FeatureToPoint_management(facilities, facilities_out_path, "CENTROID")
facilities_m = 'in_memory/' + origins_layer_name
facilities_xy = arcpy.AddXY_management(facilities_m)

arcpy.management.CopyFeatures(lines_sublayer, layer_out_path)
lines = 'in_memory/' + lines_layer_name

else:
###########################################################################################################
# Get the cost matrix: Closest facility
###########################################################################################################
# Set local variables
layer_name = "ClosestFacility"
impedance = "Length"

output_layer_file = os.path.join(output_dir, layer_name + ".lyrx")

# MakeClosestFacilityLayer_na (in_network_dataset, out_network_analysis_layer, impedance_attribute,
# {travel_from_to}, {default_cutoff}, {default_number_facilities_to_find}, {accumulate_attribute_name},
# {UTurn_policy}, {restriction_attribute_name}, {hierarchy}, {hierarchy_settings}, {output_path_shape},
# {time_of_day}, {time_of_day_usage})
#
# http://desktop.arcgis.com/en/arcmap/10.3/tools/network-analyst-toolbox/make-closest-facility-layer.htm
result_object = arcpy.na.MakeClosestFacilityLayer(network, layer_name, impedance,'TRAVEL_TO', None,
numb, '', '', '', '', '', 'TRUE_LINES_WITH_MEASURES')

# Get the layer object from the result object. The Closest facility layer can
# now be referenced using the layer object.
layer_object = result_object.getOutput(0)

# Get the names of all the sublayers within the Closest facility layer.
sublayer_names = arcpy.na.GetNAClassNames(layer_object)

# Stores the layer names that we will use later
origins_layer_name = sublayer_names["Incidents"] # as origins in OD cost matrix
destinations_layer_name = sublayer_names["Facilities"] # as destinations in OD cost matrix
lines_layer_name = sublayer_names["CFRoutes"] # as lines in OD cost matrix

# Load the BS locations (nodes) as both origins and destinations.
if facilities_in == 'Nodes':
arcpy.na.AddLocations(layer_object, origins_layer_name, nodes)
else:
arcpy.na.AddLocations(layer_object, origins_layer_name, int_points)

arcpy.na.AddLocations(layer_object, destinations_layer_name, nodes)

# Solve the OD cost matrix layer
arcpy.na.Solve(layer_object)

# Save the solved OD cost matrix layer as a layer file on disk
layer_object.saveACopy(output_layer_file)

# Get the Lines Sublayer (all the distances)
lines_sublayer = arcpy.mapping.ListLayers(layer_object, lines_layer_name)[0]
layer_out_path = os.path.join('in_memory', lines_layer_name)

if facilities_in != 'Nodes':
# Add xy coordinates to facilities.
facilities = arcpy.mapping.ListLayers(layer_object, origins_layer_name)[0]
facilities_out_path = os.path.join('in_memory', origins_layer_name)
# arcpy.management.CopyFeatures(facilities, facilities_out_path)
facilities_point = arcpy.FeatureToPoint_management(facilities, facilities_out_path, "CENTROID")
facilities_m = 'in_memory/' + origins_layer_name
facilities_xy = arcpy.AddXY_management(facilities_m)

arcpy.management.CopyFeatures(lines_sublayer, layer_out_path)
lines = 'in_memory/' + lines_layer_name

################################################################################################################
# Gathering the data for the penalty matrix
################################################################################################################

leng = int(arcpy.GetCount_management(lines).getOutput(0)) # Number of paths from every BS to every BS

if sr < numb:
thr = int(sr)
else:
thr = int(numb)

# Convert an attribute table into python dictionary
# http://gis.stackexchange.com/questions/54804/fastest-methods-for-modifying-attribute-tables-with-python
def make_attribute_dict(fc, key_field, attr_list=['*']):

attdict = {}

# define an empty dictionary
fc_field_objects = arcpy.ListFields(fc)
fc_fields = [field.name for field in fc_field_objects if field.type != 'Geometry']

# checking input parameters
if attr_list == ['*']:
valid_fields = fc_fields
else:
valid_fields = [field for field in attr_list if field in fc_fields]

# Ensure that key_field is always the first field in the field list
cursor_fields = [key_field] + list(set(valid_fields) - set([key_field]))

# step through the table
with arcpy.da.SearchCursor(fc, cursor_fields) as cursor:
for row in cursor:
attdict[row[0]] = dict(zip(cursor.fields, row))
return attdict

# Convert attribute table of lines from optimization to python nested dict
if geo_mode == 'OD Cost Matrix':
cost = make_attribute_dict(lines, "ObjectID", ["OriginID", 'DestinationID', 'DestinationRank',
'Total_Length'])
cost_keys = ["ObjectID", "OriginID", 'DestinationID', 'DestinationRank', 'Total_Length']
else:
cost = make_attribute_dict(lines, "ObjectID", ["IncidentID", 'FacilityID', 'FacilityRank',
'Total_Length'])
cost_keys = ["ObjectID", "IncidentID", 'FacilityID', 'FacilityRank', 'Total_Length']

# Convert nodes to python nested dictionary to get the coordinates for the clustering
fields = arcpy.ListFields(nodes)

for field in fields:
if field.name == "ObjectID":
nodes_d = make_attribute_dict(nodes, "ObjectID", ['POINT_X', 'POINT_Y'])
elif field.name == 'OID':
nodes_d = make_attribute_dict(nodes, 'OID', ['POINT_X', 'POINT_Y'])
elif field.name == "OBJECTID":
nodes_d = make_attribute_dict(nodes, 'OBJECTID', ['POINT_X', 'POINT_Y'])

if facilities_in != 'Nodes':
fields = arcpy.ListFields(facilities_xy)
for field in fields:
if field.name == "ObjectID":
facilities_d = make_attribute_dict(facilities_xy, "ObjectID", ['POINT_X', 'POINT_Y'])
elif field.name == 'OID':
facilities_d = make_attribute_dict(facilities_xy, "OID", ['POINT_X', 'POINT_Y'])
elif field.name == "OBJECTID":
facilities_d = make_attribute_dict(facilities_xy, "OBJECTID", ['POINT_X', 'POINT_Y'])

################################################################################################################
# Function for penalty matrix calculation/recalculation
################################################################################################################

def penalty_update(numb_in, thr_in, cost_matrix, keys, count_in=0, clustered=set()):
arcpy.AddMessage('Calculating Penalty Matrix.')
penalty = {} # empty penalty dictionary
length = len(cost_matrix.keys()) # Number of entries in cost matrix
if count_in != 0:
co = count_in+1
else:
co = numb_in+1

# Construct penalty dictionary
for h in range(1, co):
if count_in == 0 and h not in clustered:
# index to iterate through cost, as we need not all the entries, we make a custom iteration
g = numb_in*(h-1) + 1
if g < length:
penalty[h] = {}
penalty[h]['max_cost'] = 0
penalty[h]['accum_cost'] = 0
# Calculate accumulated cost = sum of indiv. total lengths
n = 0
while n < thr_in:
if g <= length:
if cost_matrix[g][keys[2]] not in clustered:
penalty[h]['accum_cost'] += cost_matrix[g][keys[4]]
if penalty[h]['max_cost'] < cost_matrix[g][keys[4]]:
penalty[h]['max_cost'] = cost_matrix[g][keys[4]]
n += 1
if g < length:
g += 1
if g > numb_in*(h-1)+numb_in:
break
else:
break
elif count_in != 0:
# index to iterate through cost, as we need not all the entries, we make a custom iteration
g = numb_in*(h-1) + 1
if g < length:
if cost_matrix[g][keys[2]] not in clustered:
penalty[h] = {}
penalty[h]['max_cost'] = 0
penalty[h]['accum_cost'] = 0
# Calculate accumulated cost = sum of indiv. total lengths
n = 0
while n < thr_in:
if g <= length:
if cost_matrix[g][keys[2]] not in clustered:
penalty[h]['accum_cost'] += cost_matrix[g][keys[4]]
if penalty[h]['max_cost'] < cost_matrix[g][keys[4]]:
penalty[h]['max_cost'] = cost_matrix[g][keys[4]]
n += 1
if g < length:
g += 1
if g > numb_in*(h-1)+numb_in:
break
else:
break

# Sort penalty matrix
acc_list = [] # List for total (accumulated cost)
max_list = [] # Maximum cost (length)

# Make a sortable structure out of a penalty dict.
for key_i in penalty.keys(): # for all the base stations
acc_list.append((key_i, penalty[key_i]['accum_cost']))
max_list.append((key_i, penalty[key_i]['max_cost']))

# Sort the values by length; sorting depends on user input
if cost_met == 'Accumulated length':
if cost_dir == 'Ascending':
sort_in = sorted(acc_list, key=lambda x: x[1])
else:
sort_in = sorted(acc_list, key=lambda x: x[1], reverse=True)
else:
if cost_dir == 'Ascending':
sort_in = sorted(max_list, key=lambda x: x[1])
else:
sort_in = sorted(max_list, key=lambda x: x[1], reverse=True)

return sort_in

################################################################################################################
# Clustering
################################################################################################################

# Define a list of non-available nodes for clustering
flags = set()
# Define clustering dictionary
clustering = {}

if facilities_in == 'Nodes':
count = numb
index = 0
else:
count = len(facilities_d)
index = len(facilities_d)

j = 1 # iterator through the cost matrix
cl = 1 # counter fr the cluster
arcpy.AddMessage('Performing clustering.')
for i in range(0, count): # for all the nodes
k = 0 # counter for number of cluster members
if j < leng:
if len(flags) != numb:
sort = penalty_update(numb, thr, cost, cost_keys, index,flags) # update penalty matrix
node = sort[0][0]
#Nodes
if facilities_in == 'Nodes':
if node not in flags:
clustering[cl] = {}
clustering[cl]['tot_cost'] = 0
# We need to traverse the cost matrix. However we start not from the first element. As the matrix
# structure is regular we can calculate the starting index for our array to assign the (X,Y)
# to the seed master. E.g., we have three elements [1, 2, 3] then we have 111222333 structure, where
# 3 starts from position 7 number of nodes is 3, thus 3*(3-1)+1 = 7. for 1 it is 3*(1-1)+1 = 1 and
# for 2 is 3*(2-1)+1 = 4
j = len(nodes_d)*(node-1) + 1

while k < thr:
# now we need to populate the cluster dict with non-clustered yet members
if cost[j][cost_keys[2]] not in flags:
member_id = cost[j][cost_keys[2]]
clustering[cl][member_id] = {}
clustering[cl][member_id]['ind'] = j
clustering[cl][member_id]['XY'] = (nodes_d[member_id]['POINT_X'],
nodes_d[member_id]['POINT_Y'])
clustering[cl][member_id]['cost'] = cost[j][cost_keys[4]]
clustering[cl]['tot_cost'] += cost[j][cost_keys[4]]
flags.add(member_id)
k += 1
if j < leng:
j += 1
else:
break
else:
if j < leng:
j += 1
if len(flags) == numb:
break
cl += 1
# Intersections
else:
clustering[cl] = {}
clustering[cl]['tot_cost'] = 0
# We need to traverse the cost matrix. However we start not from the first element. As the matrix
# structure is regular we can calculate the starting index for our array to assign the (X,Y)
# to the seed master. E.g., we have three elements [1, 2, 3] then we have 111222333 structure, where
# 3 starts from position 7 number of nodes is 3, thus 3*(3-1)+1 = 7. for 1 it is 3*(1-1)+1 = 1

RE: compare algorithms - buran - Feb-14-2018

Please, use proper tags when post code, traceback, output, etc.
See BBcode help for more info.
However I doubt someone will bother to look at 1285 lines of code. You were about to exceed maximum number of chars per post.