Dear Friends,
I am trying clustering of dataset, however my code is ploting centroid but I didn't find the way to plot clusters and store the data of clustering in csv format.
Please help!
I am trying clustering of dataset, however my code is ploting centroid but I didn't find the way to plot clusters and store the data of clustering in csv format.
Please help!
import tweepy import numpy as np from tweepy import Stream from tweepy import OAuthHandler from tweepy.streaming import StreamListener import os import json import re import pandas as pd import csv import re #regular expressionfrom textblob import TextBlob import string import preprocessor as p import textblob from textblob import TextBlob import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from sklearn.cluster import KMeans from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import MinMaxScaler #import seaborn as sns import matplotlib.pyplot as plt from scipy import stats #Twitter credentials for the app consumer_key = '' consumer_secret = '' access_key= '' access_secret = '' #pass twitter credentials to tweepy auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_key, access_secret) api = tweepy.API(auth) #columns of the csv file COLS = ['id', 'created_at', 'source', 'original_text','clean_text', 'sentiment','polarity','subjectivity', 'lang', 'favorite_count', 'retweet_count', 'original_author', 'possibly_sensitive', 'hashtags', 'user_mentions', 'place', 'place_coord_boundaries'] df1 = pd.read_csv('C:/Users/Neha Garg/AppData/Local/Programs/Python/data/chandrayan1.csv',encoding ='unicode_escape') df=pd.DataFrame(data={"hashtags":["chandrayan","chandrayan2","pulwama","Indiafail","Medicine","Health","ISRO"]}) df['hashtags']=df['hashtags'].astype('category').cat.codes #Make a copy of DF df_tr = df kmeans= KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300, n_clusters=7, n_init=10, n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001, verbose=0).fit(df) y = kmeans.predict(df) labels = kmeans.labels_ #Glue back to originaal data df_tr['clusters'] = labels centroids=kmeans.cluster_centers_ print(centroids) #plt.scatter(df[:,0],df[:,6], c= y,s=50,alpha=0.5, cmap='viridis') plt.scatter(centroids[:, 0], centroids[:, 0], c='red', s=50,alpha=0.5) plt.show()