Oct-02-2021, 09:01 AM
I generate different data-sets, and I want to save them into excel file in the form of columns. But, excel file always returns the data-sets row-wise.
Here's the code;
Here's the code;
""" Data Generator for Multiple Use; """ import numpy as np import pandas as pd import random import xlsxwriter import math from numpy import linalg as LA import matplotlib.pyplot as plt import matplotlib.colors from scipy.stats import poisson from scipy.stats import bernoulli from sklearn.model_selection import train_test_split from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.cross_decomposition import PLSRegression from sklearn.metrics import accuracy_score, mean_squared_error from tqdm import tqdm_notebook from sklearn.preprocessing import OneHotEncoder from sklearn.datasets import make_blobs class DataGenerator: """ Pre-defined Features """ def __init__(self): self.LengthSample = 100 self.mu = 0.1 self.sigma = 0.5 self.data_set_no = 3 self.PoissonPar = 3 self.BenoulliPar = 0.6 self.DataIdx = ['Gaussian', 'Bernoulli', 'Poisson'] self.PercNo_Layer = np.asarray([200, 1000, 300, 400, 200]) self.Active_Layer = np.asarray([10/100*self.PercNo_Layer[0], self.PercNo_Layer[1], 3/100*self.PercNo_Layer[2], 3/100*self.PercNo_Layer[3], 10/100*self.PercNo_Layer[4]]) # ==============================================================================================================# """ Data set; Mixture of python blobs, Bernoulli, Gaussian, Poisson data distribution """ def GenerateData(self): # Usual python blobs data_blobs, _ = make_blobs(n_samples=self.LengthSample, centers=1, n_features=int(self.Active_Layer[0]), cluster_std=5.0, random_state=1000) data_blobs = data_blobs.transpose() # Some Poisson distribution data_Poisson = np.asarray( [poisson.rvs(mu=self.PoissonPar, size=self.LengthSample) for _ in range(int(self.Active_Layer[0]))]).reshape( int(self.LengthSample), int(self.Active_Layer[0])) data_Poisson = data_Poisson.transpose() # Some Gaussian distribution data_Gaussian = np.asarray([np.random.normal(self.mu, self.sigma, size=self.LengthSample) for _ in range(int(self.Active_Layer[0]))]).reshape(int(self.LengthSample), int(self.Active_Layer[0])) data_Gaussian = data_Gaussian.transpose() # Some Bernoulli data_bern = np.asarray( [bernoulli.rvs(size=self.LengthSample, p=self.BenoulliPar) for _ in range(int(self.Active_Layer[0]))]).reshape( int(self.LengthSample), int(self.Active_Layer[0])) data_bern = data_bern.transpose() return data_blobs, data_Gaussian, data_bern, data_Poisson def Save2Xls(self): # Call the data data_blobs, data_Gaussian, data_bern, data_Poisson = self.GenerateData() # Individual data frames per data-set data_Poisson_df = pd.DataFrame({'Poisson': [data_Poisson]}) data_Gaussian_df = pd.DataFrame({'Gaussian': [data_Gaussian]}) data_blobs_df = pd.DataFrame({'Blobs': [data_blobs]}) data_Bernoulli_df = pd.DataFrame({'Bernoulli': [data_bern]}) data_sheets = {'Blobs':data_blobs_df, 'Poisson':data_Poisson_df, 'Gaussian':data_Gaussian_df, 'Bernoulli':data_Bernoulli_df} data_sets_ind = pd.ExcelWriter('./DataSets.xlsx', engine='xlsxwriter') for sheet_name in data_sheets.keys(): # data_sheets[sheet_name].to_excel(data_sets_ind, sheet_name=sheet_name) data_sets_ind.save() # Dictionary of data-sets data_dictionary = pd.DataFrame({'Blobs':[data_blobs], 'Poisson':[data_Poisson], 'Gaussian':[data_Gaussian], 'Bernoulli':[data_bern]}) data_dictionary.to_excel('./data_dictionary.xlsx', sheet_name='Data dictionary', index=False) return data_dictionary, data_sets_ind if __name__ == '__main__': runner = DataGenerator() runner.Save2Xls()