Oct-22-2017, 07:42 PM
I have the following problem. I have a written code attach below. The code constructs a tree based on the information in "my_data". The main idea is to construct different trees and see how they align the data. What I need is to construct more "trees" given "my_data". For example, I need Multi-Level Node-Leaf Tree, Dictionary-based Tree, Indented Text-based Tree. Please, advise me how can I proceed with the code.
my_data=[['slashdot','USA','yes',18,'None'], ['google','France','yes',23,'Premium'], ['digg','USA','yes',24,'Basic'], ['kiwitobes','France','yes',23,'Basic'], ['google','UK','no',21,'Premium'], ['(direct)','New Zealand','no',12,'None'], ['(direct)','UK','no',21,'Basic'], ['google','USA','no',24,'Premium'], ['slashdot','France','yes',19,'None'], ['digg','USA','no',18,'None'], ['google','UK','no',18,'None'], ['kiwitobes','UK','no',19,'None'], ['digg','New Zealand','yes',12,'Basic'], ['slashdot','UK','no',21,'None'], ['google','UK','yes',18,'Basic'], ['kiwitobes','France','yes',19,'Basic']] class decisionnode: def __init__(self,col=-1,value=None,results=None,tb=None,fb=None): self.col=col self.value=value self.results=results self.tb=tb self.fb=fb # Divides a set on a specific column. Can handle numeric or nominal values def divideset(rows,column,value): # Make a function that tells us if a row is in # the first group (true) or the second group (false) split_function=None if isinstance(value,int) or isinstance(value,float): #value int or float? split_function=lambda row:row[column]>=value else: split_function=lambda row:row[column]==value # Divide the rows into two sets and return them set1=[row for row in rows if split_function(row)] set2=[row for row in rows if not split_function(row)] return (set1,set2) #Split the sample into subgroups print("Subgroups of Yes/No") a = divideset(my_data,2,'yes') print(a) def uniquecounts(rows): results={} for row in rows: # The result is the last column r=row[len(row)-1] if r not in results: results[r]=0 results[r]+=1 return results print("") print('The data set has the following characteristics:') print(uniquecounts(my_data)) # Entropy is the sum of p(x)log(p(x)) across all the different possible results def entropy(rows): from math import log log2=lambda x:log(x)/log(2) results=uniquecounts(rows) # Now calculate the entropy ent=0.0 for r in results.keys(): p=float(results[r])/len(rows) ent=ent-p*log2(p) return ent set1,set2=divideset(my_data,3,20) print("") a = entropy(my_data) print("Total entropy is %.2f" % a) def buildtree(rows,scoref=entropy): if len(rows)==0: return decisionnode() current_score=scoref(rows) best_gain=0.0 best_criteria=None best_sets=None column_count=len(rows[0])-1 for col in range(0,column_count): global column_values column_values={} for row in rows: column_values[row[col]]=1 for value in column_values.keys(): (set1,set2)=divideset(rows,col,value) p=float(len(set1))/len(rows) gain=current_score-p*scoref(set1)-(1-p)*scoref(set2) if gain>best_gain and len(set1)>0 and len(set2)>0: best_gain=gain best_criteria=(col,value) best_sets=(set1,set2) if best_gain>0: trueBranch=buildtree(best_sets[0]) falseBranch=buildtree(best_sets[1]) return decisionnode(col=best_criteria[0],value=best_criteria[1], tb=trueBranch,fb=falseBranch) else: return decisionnode(results=uniquecounts(rows)) tree=buildtree(my_data) def printtree(tree,indent=''): # Is this a leaf node? if tree.results!=None: print(str(tree.results)) else: print(str(tree.col)+':'+str(tree.value)+'? ') # Print the branches print(indent+'T->', end=" ") printtree(tree.tb,indent+' ') print(indent+'F->', end=" ") printtree(tree.fb,indent+' ') printtree(tree) def getwidth(tree): if tree.tb==None and tree.fb==None: return 1 return getwidth(tree.tb)+getwidth(tree.fb) def getdepth(tree): if tree.tb==None and tree.fb==None: return 0 return max(getdepth(tree.tb),getdepth(tree.fb))+1 from PIL import Image,ImageDraw def drawtree(tree,jpeg='tree.jpg'): w=getwidth(tree)*100 h=getdepth(tree)*100+120 img=Image.new('RGB',(w,h),(255,255,255)) draw=ImageDraw.Draw(img) drawnode(draw,tree,w/2,20) img.save(jpeg,'JPEG') def drawnode(draw,tree,x,y): if tree.results==None: # Get the width of each branch w1=getwidth(tree.fb)*100 w2=getwidth(tree.tb)*100 # Determine the total space required by this node left=x-(w1+w2)/2 right=x+(w1+w2)/2 # Draw the condition string draw.text((x-20,y-10),str(tree.col)+':'+str(tree.value),(0,0,0)) # Draw links to the branches draw.line((x,y,left+w1/2,y+100),fill=(255,0,0)) draw.line((x,y,right-w2/2,y+100),fill=(255,0,0)) # Draw the branch nodes drawnode(draw,tree.fb,left+w1/2,y+100) drawnode(draw,tree.tb,right-w2/2,y+100) else: txt=' \n'.join(['%s:%d'%v for v in tree.results.items()]) draw.text((x-20,y),txt,(0,0,0)) drawtree(tree,jpeg='treeview.jpg')