#I am trying to create a dendrogram for my ionomics research and we have thousands of data points. Basically we have 8 different species and I want to figure out if the data for each species cluster with each other or not with the help of dendrogram.
![[Image: dendrograms.png]](https://kovacslab.slack.com/files/UTWLYJQEQ/FUD2M0D7C/dendrograms.png)
#This is the link for the dendrogram
import numpy as np import pandas as pd import os import matplotlib.pyplot as plt import scipy.cluster.hierarchy as sch from sklearn.preprocessing import StandardScaler from sklearn.cluster import KMeans from sklearn.cluster import AgglomerativeClustering dataset=pd.read_csv("Geneva Ionomics_X_Spec_as_int.csv") dataset.drop([24,], inplace = True) dataset= dataset.iloc[:, 1:] genotype = pd.get_dummies(dataset, drop_first = True) dataset.isnull().values.any() dataset.isnull().sum().sum() new_dataset = pd.concat([genotype, dataset], axis = 1) X= new_dataset.iloc[:, :].values sc= StandardScaler() X= sc.fit_transform(X) wss=[] for i in range (1,11): kmeanscluster = KMeans(n_clusters = i, init = 'k-means++') kmeanscluster.fit(X) kmeanscluster.inertia_ wss.append(kmeanscluster.inertia_) plt.plot(range(1,11), wss) kmeanscluster = KMeans(n_clusters= 5, init = 'k-means++') kmeanscluster.fit(X) Y_pred = kmeanscluster.predict(X) dendogram = sch.dendrogram(sch.linkage(X, method= 'ward', metric = 'euclidean')) from scipy.cluster import hierarchy Z = hierarchy.linkage(dataset, 'ward') h =hierarchy.dendrogram(Z, leaf_rotation=90, leaf_font_size=8, labels=dataset.index) plt.savefig("dendogram X")#I have also uploaded a screenshot of the dendrogram with this. I hate to say but I am kind of a beginner and guidance would be great.
![[Image: dendrograms.png]](https://kovacslab.slack.com/files/UTWLYJQEQ/FUD2M0D7C/dendrograms.png)
#This is the link for the dendrogram