Posts: 8
Threads: 3
Joined: May 2020
Recall and F1-Measure values are greater than 1.
Recall = 2.54
F-measure = 1.31
Anyone can help?
Posts: 12,023
Threads: 484
Joined: Sep 2016
Posts: 8
Threads: 3
Joined: May 2020
MAX_NB_WORDS = 50000
# Max number of words in each complaint.
MAX_SEQUENCE_LENGTH = 250
# This is fixed.
EMBEDDING_DIM = 100
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True)
tokenizer.fit_on_texts(data['stemming'].values)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))
#Truncate and pad the input sequences so that they are
#all in the same length for modeling.
X = tokenizer.texts_to_sequences(data['stemming'].values)
X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of data tensor:', X.shape)
#Converting categorical labels to numbers.
Y = pd.get_dummies(data['Priority']).values
print('Shape of label tensor:', Y.shape)
#Train test split.
X_train, X_test, y_train, y_test = train_test_split(X, Y,
test_size=0.20,
random_state=42)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
model = Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam', metrics=['accuracy'])
epochs = 5
batch_size = 64
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
validation_split=0.1, #0.001
callbacks=[EarlyStopping(monitor='val_loss',
patience=3, min_delta=0.0001)])
accr = model.evaluate(X_test, y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f} '.format(accr[0], accr[1]))
new_complaint = ['ui']
seq = tokenizer.texts_to_sequences(new_complaint)
padded = pad_sequences(seq, maxlen=MAX_SEQUENCE_LENGTH)
pred = model.predict(padded)
labels = ['high','low']
print(pred, labels[np.argmax(pred)])
#======================================================================================================
y_pred = model.predict(X_test) #
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score, precision_score, matthews_corrcoef,\
classification_report
def computeMetrics(true, pred): # considering sigmoid activation, threshold = 0.5
#pred = keras.backend.cast(K.greater(pred, 0.5), keras.backend.floatx())
pred = keras.backend.cast(keras.backend.greater(pred, 0.5), keras.backend.floatx())
groundPositives = keras.backend.cast(keras.backend.sum(true), keras.backend.floatx()) + keras.backend.epsilon() #100
correctPositives = keras.backend.sum(true * pred) + keras.backend.epsilon() #90
predictedPositives = keras.backend.sum(pred) + keras.backend.epsilon() #120
#print('*************** ',np.float32(groundPositives),np.float32(correctPositives), np.float32(predictedPositives))
precision = correctPositives / predictedPositives
recall = correctPositives / groundPositives
f1 = (2 * precision * recall) / (precision + recall)
return f1.numpy(), precision.numpy(), recall.numpy()
f1, precision, recall = computeMetrics(y_test, y_pred)
print('metrics: ', np.float32(f1), precision, recall)
Posts: 8
Threads: 3
Joined: May 2020
(May-11-2020, 11:32 PM)Larz60+ Wrote: can you elaborate? MAX_NB_WORDS = 50000
# Max number of words in each complaint.
MAX_SEQUENCE_LENGTH = 250
# This is fixed.
EMBEDDING_DIM = 100
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True)
tokenizer.fit_on_texts(data['stemming'].values)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))
#Truncate and pad the input sequences so that they are
#all in the same length for modeling.
X = tokenizer.texts_to_sequences(data['stemming'].values)
X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of data tensor:', X.shape)
#Converting categorical labels to numbers.
Y = pd.get_dummies(data['Priority']).values
print('Shape of label tensor:', Y.shape)
#Train test split.
X_train, X_test, y_train, y_test = train_test_split(X, Y,
test_size=0.20,
random_state=42)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
model = Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam', metrics=['accuracy'])
epochs = 5
batch_size = 64
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
validation_split=0.1, #0.001
callbacks=[EarlyStopping(monitor='val_loss',
patience=3, min_delta=0.0001)])
accr = model.evaluate(X_test, y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f} '.format(accr[0], accr[1]))
new_complaint = ['ui']
seq = tokenizer.texts_to_sequences(new_complaint)
padded = pad_sequences(seq, maxlen=MAX_SEQUENCE_LENGTH)
pred = model.predict(padded)
labels = ['high','low']
print(pred, labels[np.argmax(pred)])
#======================================================================================================
y_pred = model.predict(X_test) #
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score, precision_score, matthews_corrcoef,\
classification_report
def computeMetrics(true, pred): # considering sigmoid activation, threshold = 0.5
#pred = keras.backend.cast(K.greater(pred, 0.5), keras.backend.floatx())
pred = keras.backend.cast(keras.backend.greater(pred, 0.5), keras.backend.floatx())
groundPositives = keras.backend.cast(keras.backend.sum(true), keras.backend.floatx()) + keras.backend.epsilon() #100
correctPositives = keras.backend.sum(true * pred) + keras.backend.epsilon() #90
predictedPositives = keras.backend.sum(pred) + keras.backend.epsilon() #120
#print('*************** ',np.float32(groundPositives),np.float32(correctPositives), np.float32(predictedPositives))
precision = correctPositives / predictedPositives
recall = correctPositives / groundPositives
f1 = (2 * precision * recall) / (precision + recall)
return f1.numpy(), precision.numpy(), recall.numpy()
f1, precision, recall = computeMetrics(y_test, y_pred)
print('metrics: ', np.float32(f1), precision, recall)
Posts: 12,023
Threads: 484
Joined: Sep 2016
BY elaborate, I mean better explain the goal and the problem
Posts: 8
Threads: 3
Joined: May 2020
(May-13-2020, 01:15 AM)Larz60+ Wrote: BY elaborate, I mean better explain the goal and the problem The goal is to measure LSTM improvement compared to other ML algorithms.
The problem is having values greater than 1 for both of Recall and F-measure.
|