Python Forum
How to use a tfrecord file for training an autoencoder
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
How to use a tfrecord file for training an autoencoder
#1
I don't know how to get the number of features from a tfrecord file to make them as input to a stacked autoencoder.

I used the follwing function for stacked autoencoder:
from __future__ import print_function
import keras
import numpy
from keras.models import Sequential
from keras.layers.core import *
from sklearn.model_selection import train_test_split
from app_flag import FLAGS

class StackedAutoencoder(object):
    """
    Implementation of stacked autoencoder multi-class classifier using the Keras Python package.
    This classifier is used to classify cells to cell cycle phases S, G1 or G2M.
    """
    def __init__(self, features, labels, num_labels):
        self.features = features
        self.labels = labels
        self.auto_encoder = None
        self.encoding_dim = num_labels
        

        # fix random seed for reproducibility
        self.seed = 7
        numpy.random.seed(7)

    def create_autoencoder(self):
        """
        Build the stacked auto-encoder using multiple hidden layers.
        The stacked auto-encoder is then trained and weights are freezed afterwards.
        A softmax classification layer is that appended to the last layer, replacing the input
        re-constructed layer of the auto-encoder.
        :return: Compiled classification neural network model.
        """
        self.auto_encoder = Sequential()
        self.auto_encoder.add(Dense(3000, activation='relu', input_dim=self.features.shape[1]))
        self.auto_encoder.add(Dense(1000, activation='relu'))
        self.auto_encoder.add(Dense(30, activation='relu'))

        self.auto_encoder.add(Dense(3000, activation='relu'))
        self.auto_encoder.add(Dense(self.features.shape[1], activation='sigmoid'))

        self.auto_encoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        self.auto_encoder.fit(self.features, self.features,
                              epochs=10,
                              batch_size=5,
                              shuffle=True,
                              validation_split=0.33,
                              validation_data=None)

        self.auto_encoder.layers.pop()
        self.auto_encoder.add(Dense(self.encoding_dim, activation='softmax'))
        self.auto_encoder.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        print(self.auto_encoder.summary())

        # Freeze all weights after training the stacked auto-encoder and all the classification layer
        for i in range(0, len(self.auto_encoder.layers)-1):
            self.auto_encoder.layers[i].trainable = False

        return self.auto_encoder

    def evaluate_autoencoder(self):
        """
        Fit the trained neural network and validate it using splitting the dataset to training and testing sets.
        :return: Accuracy score of the classification.
        """
        self.auto_encoder.fit(self.features, self.labels,
                              epochs=10,
                              batch_size=5,
                              shuffle=True)

        X_train, X_test, Y_train, Y_test = train_test_split(self.features, self.labels, test_size=0.33, random_state=self.seed)
        #predictions = self.auto_encoder.predict_classes(X_test)
        #print(predictions)
        #print(self.label_encoder.inverse_transform(predictions))
        score = self.auto_encoder.evaluate(X_test, Y_test, batch_size=5, verbose=1)
        return score
When I run the code I get an error in the line :

self.auto_encoder.add(Dense(3000, activation='relu', input_dim=self.features.shape[1]))

indicating :TypeError: float() argument must be a string or a number.

So, how can I use the Tfrecord file to get the input dimentionality (the number of features)
Reply
#2
try

add(Dense(3000.0, ...

or

add(Dense('3000', ...
Reply
#3
Thanks for your response. I try the two suggestions but I get the same error, I think that the problem is from input_dim=self.features.shape[1] given that I can't get the number of features from the tfrecord file
Reply
#4
What is underlying data used to run the model? Is it a data frame or numpy array?
May be self.features includes non-numerical, e.g. np.object cells?
Reply
#5
I used a tfrecord file that containes only binary values. it is a numpy array.
Reply
#6
I ran the following code
from __future__ import print_function
import keras
import numpy
from keras.models import Sequential
from keras.layers.core import *
from sklearn.model_selection import train_test_split

 
class StackedAutoencoder(object):
    """
    Implementation of stacked autoencoder multi-class classifier using the Keras Python package.
    This classifier is used to classify cells to cell cycle phases S, G1 or G2M.
    """
    def __init__(self, features, labels, num_labels):
        self.features = features
        self.labels = labels
        self.auto_encoder = None
        self.encoding_dim = num_labels
         
 
        # fix random seed for reproducibility
        self.seed = 7
        numpy.random.seed(7)
 
    def create_autoencoder(self):
        """
        Build the stacked auto-encoder using multiple hidden layers.
        The stacked auto-encoder is then trained and weights are freezed afterwards.
        A softmax classification layer is that appended to the last layer, replacing the input
        re-constructed layer of the auto-encoder.
        :return: Compiled classification neural network model.
        """
        self.auto_encoder = Sequential()
        self.auto_encoder.add(Dense(3000, activation='relu', input_dim=self.features.shape[1]))
        self.auto_encoder.add(Dense(1000, activation='relu'))
        self.auto_encoder.add(Dense(30, activation='relu'))
 
        self.auto_encoder.add(Dense(3000, activation='relu'))
        self.auto_encoder.add(Dense(self.features.shape[1], activation='sigmoid'))
 
        self.auto_encoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        self.auto_encoder.fit(self.features, self.features,
                              epochs=10,
                              batch_size=5,
                              shuffle=True,
                              validation_split=0.33,
                              validation_data=None)
 
        self.auto_encoder.layers.pop()
        self.auto_encoder.add(Dense(self.encoding_dim, activation='softmax'))
        
        for i in range(0, len(self.auto_encoder.layers)-1):
            self.auto_encoder.layers[i].trainable = False
        
        self.auto_encoder.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        print(self.auto_encoder.summary())
 
 
        return self.auto_encoder
 
    def evaluate_autoencoder(self):
        """
        Fit the trained neural network and validate it using splitting the dataset to training and testing sets.
        :return: Accuracy score of the classification.
        """
        self.auto_encoder.fit(self.features, self.labels,
                              epochs=10,
                              batch_size=5,
                              shuffle=True)
        print("Fitted")
        X_train, X_test, Y_train, Y_test = train_test_split(self.features, self.labels, test_size=0.33, random_state=self.seed)
        #predictions = self.auto_encoder.predict_classes(X_test)
        #print(predictions)
        #print(self.label_encoder.inverse_transform(predictions))
        score = self.auto_encoder.evaluate(X_test, Y_test, batch_size=5, verbose=1)
        return score
    

sa = StackedAutoencoder(np.random.rand(100,10).astype(bool), np.random.randint(2, 5, size=100), 3)
sa.create_autoencoder()
sa.evaluate_autoencoder()
and got the output:

Output:
Using TensorFlow backend. Train on 67 samples, validate on 33 samples Epoch 1/10 67/67 [==============================] - 2s 31ms/step - loss: 0.2513 - acc: 0.9254 - val_loss: 1.1921e-07 - val_acc: 1.0000 Epoch 2/10 67/67 [==============================] - 2s 24ms/step - loss: 1.1921e-07 - acc: 1.0000 - val_loss: 1.1921e-07 - val_acc: 1.0000 Epoch 3/10 67/67 [==============================] - 2s 25ms/step - loss: 1.1921e-07 - acc: 1.0000 - val_loss: 1.1921e-07 - val_acc: 1.0000 Epoch 4/10 67/67 [==============================] - 2s 24ms/step - loss: 1.1921e-07 - acc: 1.0000 - val_loss: 1.1921e-07 - val_acc: 1.0000 Epoch 5/10 67/67 [==============================] - 2s 24ms/step - loss: 1.1921e-07 - acc: 1.0000 - val_loss: 1.1921e-07 - val_acc: 1.0000 Epoch 6/10 67/67 [==============================] - 2s 23ms/step - loss: 1.1921e-07 - acc: 1.0000 - val_loss: 1.1921e-07 - val_acc: 1.0000 Epoch 7/10 67/67 [==============================] - 2s 24ms/step - loss: 1.1921e-07 - acc: 1.0000 - val_loss: 1.1921e-07 - val_acc: 1.0000 Epoch 8/10 67/67 [==============================] - 2s 25ms/step - loss: 1.1921e-07 - acc: 1.0000 - val_loss: 1.1921e-07 - val_acc: 1.0000 Epoch 9/10 67/67 [==============================] - 2s 24ms/step - loss: 1.1921e-07 - acc: 1.0000 - val_loss: 1.1921e-07 - val_acc: 1.0000 Epoch 10/10 67/67 [==============================] - 2s 23ms/step - loss: 1.1921e-07 - acc: 1.0000 - val_loss: 1.1921e-07 - val_acc: 1.0000 _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_1 (Dense) (None, 3000) 33000 _________________________________________________________________ dense_2 (Dense) (None, 1000) 3001000 _________________________________________________________________ dense_3 (Dense) (None, 30) 30030 _________________________________________________________________ dense_4 (Dense) (None, 3000) 93000 _________________________________________________________________ dense_5 (Dense) (None, 10) 30010 _________________________________________________________________ dense_6 (Dense) (None, 3) 33 ================================================================= Total params: 3,187,073 Trainable params: 33 Non-trainable params: 3,187,040 _________________________________________________________________ None
and error:
Output:
ValueError: Error when checking target: expected dense_6 to have shape (3,) but got array with shape (1,)
So, it is almost working fine... No TypeError rised.
Reply
#7
I used a tfrecord file to train the stacked autoencoder. My function is:
import tensorflow as tf
import numpy as np
import readers
import pre_precessing
from app_flag import FLAGS
from StackedAutoencoder import StackedAutoencoder


def write_and_encode(data_list, tfrecord_filename):
    writer = tf.python_io.TFRecordWriter(tfrecord_filename)
    for label, data_matrix in data_list:
        example = tf.train.Example(features=tf.train.Features(
            feature={
                "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
                "data_raw": tf.train.Feature(bytes_list=tf.train.BytesList(value=[data_matrix.tostring()]))
            }
        ))
        writer.write(example.SerializeToString())

    writer.close()


def read_and_decode(tfrecord_filename):
    reader = tf.TFRecordReader()
    filename_queue = tf.train.string_input_producer([tfrecord_filename],)
    _, serialized_example = reader.read(filename_queue)
    feature = tf.parse_single_example(serialized_example,
                                      features={
                                          "label": tf.FixedLenFeature([], tf.int64),
                                          "data_raw": tf.FixedLenFeature([], tf.string)
                                      })
    data = tf.decode_raw(feature["data_raw"], tf.float64)
    data = tf.reshape(data, [FLAGS.image_rows, FLAGS.image_cols])
    return data, feature["label"]



def train_input_fn():

    tfrecord_file = "../resources/train_tfrecord"  
    dataset = tf.data.TFRecordDataset(tfrecord_file)
    dataset = dataset.map(parser)

    train_dataset = dataset.repeat(FLAGS.num_epochs).batch(FLAGS.batch_size)
    train_iterator = train_dataset.make_one_shot_iterator()

    features, labels = train_iterator.get_next()

    return features, labels


def parser(record_line):

    features = {
        "label": tf.FixedLenFeature([], tf.int64),
        "data_raw": tf.FixedLenFeature([], tf.string)
    }
    parsed = tf.parse_single_example(record_line, features=features)
    label = tf.cast(parsed["label"], tf.int32) - 1  
    data = tf.decode_raw(parsed["data_raw"], tf.float64)
    data = tf.reshape(data, [FLAGS.image_rows, FLAGS.image_cols])
    data = tf.cast(data, tf.float32)
    return data, label




def write_user_instances_to_tfrecord():
    
    users = ["0"+str(i) for i in range(1, 10)]
    users.extend([str(i) for i in range(10, 17)])
    users.extend(["32", "40", "41", "42", "43", "49", "50", "51"])

   
    instances = []
    for user in users:
        train_data = readers.read_user_files(user)
        for label, instance in train_data.items():
            instances.append((label, instance))

 
    formalized_instances = pre_precessing.extend_to_maxsize(instances)

   
    train_instances = formalized_instances[:100]
    write_and_encode(train_instances, "../resources/train_tfrecord")

 

def main():
    build_stacked_ae("../resources/train_tfrecord")

def build_stacked_ae(path):
    """
    Build the stacked auto-encoder neural network, and evaluate its performance
    """
    ############### Stacked Auto-Encoders ##############
    features,labels=train_input_fn()
    ae = StackedAutoencoder(features,labels,5)
    ae.create_autoencoder()
    result = ae.evaluate_autoencoder()
    return result[1] * 100
    print("Accuracy: %.2f%%" % (result[1] * 100))

if __name__ == "__main__":
    main() 
I can't fix the error any help please?
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Sample training small model AndrzejB 3 1,346 Mar-22-2023, 07:37 PM
Last Post: jefsummers
  Is it normal so much time training for Training Custom Object Detector?? hobbyist 2 2,926 May-31-2022, 08:55 AM
Last Post: aserikova
  Using Autoencoder for Data Augmentation of numerical Dataset in Python Marvin93 2 3,524 Jul-10-2020, 07:18 PM
Last Post: Marvin93
  How to save predictions made by an autoencoder Glasgow1988 0 1,631 Jul-03-2020, 12:43 PM
Last Post: Glasgow1988
  Differencing Time series and Inverse after Training donnertrud 0 4,233 May-27-2020, 06:11 AM
Last Post: donnertrud
  stacked autoencoder training JohnMarie 0 2,740 Feb-24-2019, 12:23 AM
Last Post: JohnMarie

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020