Python Forum
F-score and Recall values Greater Than 1
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
F-score and Recall values Greater Than 1
#1
Recall and F1-Measure values are greater than 1.
Recall = 2.54
F-measure = 1.31

Anyone can help?
Reply
#2
can you elaborate?
Reply
#3
MAX_NB_WORDS = 50000
# Max number of words in each complaint.
MAX_SEQUENCE_LENGTH = 250
# This is fixed.
EMBEDDING_DIM = 100
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True)
tokenizer.fit_on_texts(data['stemming'].values)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

#Truncate and pad the input sequences so that they are
#all in the same length for modeling.
X = tokenizer.texts_to_sequences(data['stemming'].values)
X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of data tensor:', X.shape)

#Converting categorical labels to numbers.
Y = pd.get_dummies(data['Priority']).values
print('Shape of label tensor:', Y.shape)

#Train test split.
X_train, X_test, y_train, y_test = train_test_split(X, Y,
                                                    test_size=0.20,
                                                    random_state=42)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

model = Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

epochs = 5
batch_size = 64

history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
                    validation_split=0.1, #0.001
                    callbacks=[EarlyStopping(monitor='val_loss',
                                             patience=3, min_delta=0.0001)])

accr = model.evaluate(X_test, y_test)
print('Test set\n  Loss: {:0.3f}\n  Accuracy: {:0.3f} '.format(accr[0], accr[1]))

new_complaint = ['ui']
seq = tokenizer.texts_to_sequences(new_complaint)
padded = pad_sequences(seq, maxlen=MAX_SEQUENCE_LENGTH)

pred = model.predict(padded)
labels = ['high','low']
print(pred, labels[np.argmax(pred)])
#======================================================================================================
y_pred = model.predict(X_test) #

from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score, precision_score, matthews_corrcoef,\
    classification_report


def computeMetrics(true, pred):  # considering sigmoid activation, threshold = 0.5
    #pred = keras.backend.cast(K.greater(pred, 0.5), keras.backend.floatx())
    pred = keras.backend.cast(keras.backend.greater(pred, 0.5), keras.backend.floatx())

    groundPositives = keras.backend.cast(keras.backend.sum(true), keras.backend.floatx()) + keras.backend.epsilon() #100
    correctPositives = keras.backend.sum(true * pred) + keras.backend.epsilon()  #90
    predictedPositives = keras.backend.sum(pred) + keras.backend.epsilon()   #120

    #print('*************** ',np.float32(groundPositives),np.float32(correctPositives), np.float32(predictedPositives))
    
    precision = correctPositives / predictedPositives
    recall = correctPositives / groundPositives
    
    f1 = (2 * precision * recall) / (precision + recall)  
    return f1.numpy(), precision.numpy(), recall.numpy()

f1, precision, recall = computeMetrics(y_test, y_pred)
print('metrics: ', np.float32(f1), precision, recall)
Reply
#4
(May-11-2020, 11:32 PM)Larz60+ Wrote: can you elaborate?
MAX_NB_WORDS = 50000
# Max number of words in each complaint.
MAX_SEQUENCE_LENGTH = 250
# This is fixed.
EMBEDDING_DIM = 100
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True)
tokenizer.fit_on_texts(data['stemming'].values)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

#Truncate and pad the input sequences so that they are
#all in the same length for modeling.
X = tokenizer.texts_to_sequences(data['stemming'].values)
X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of data tensor:', X.shape)

#Converting categorical labels to numbers.
Y = pd.get_dummies(data['Priority']).values
print('Shape of label tensor:', Y.shape)

#Train test split.
X_train, X_test, y_train, y_test = train_test_split(X, Y,
                                                    test_size=0.20,
                                                    random_state=42)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

model = Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

epochs = 5
batch_size = 64

history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
                    validation_split=0.1, #0.001
                    callbacks=[EarlyStopping(monitor='val_loss',
                                             patience=3, min_delta=0.0001)])

accr = model.evaluate(X_test, y_test)
print('Test set\n  Loss: {:0.3f}\n  Accuracy: {:0.3f} '.format(accr[0], accr[1]))

new_complaint = ['ui']
seq = tokenizer.texts_to_sequences(new_complaint)
padded = pad_sequences(seq, maxlen=MAX_SEQUENCE_LENGTH)

pred = model.predict(padded)
labels = ['high','low']
print(pred, labels[np.argmax(pred)])
#======================================================================================================
y_pred = model.predict(X_test) #

from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score, precision_score, matthews_corrcoef,\
    classification_report


def computeMetrics(true, pred):  # considering sigmoid activation, threshold = 0.5
    #pred = keras.backend.cast(K.greater(pred, 0.5), keras.backend.floatx())
    pred = keras.backend.cast(keras.backend.greater(pred, 0.5), keras.backend.floatx())

    groundPositives = keras.backend.cast(keras.backend.sum(true), keras.backend.floatx()) + keras.backend.epsilon() #100
    correctPositives = keras.backend.sum(true * pred) + keras.backend.epsilon()  #90
    predictedPositives = keras.backend.sum(pred) + keras.backend.epsilon()   #120

    #print('*************** ',np.float32(groundPositives),np.float32(correctPositives), np.float32(predictedPositives))
    
    precision = correctPositives / predictedPositives
    recall = correctPositives / groundPositives
    
    f1 = (2 * precision * recall) / (precision + recall)  
    return f1.numpy(), precision.numpy(), recall.numpy()

f1, precision, recall = computeMetrics(y_test, y_pred)
print('metrics: ', np.float32(f1), precision, recall)
Reply
#5
BY elaborate, I mean better explain the goal and the problem
Reply
#6
(May-13-2020, 01:15 AM)Larz60+ Wrote: BY elaborate, I mean better explain the goal and the problem
The goal is to measure LSTM improvement compared to other ML algorithms.
The problem is having values greater than 1 for both of Recall and F-measure.
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  precision & recall gracenz 1 977 Sep-21-2022, 02:14 AM
Last Post: jefsummers
  F-score, Precision, and Recall values Hani 3 2,504 May-09-2020, 08:16 AM
Last Post: ThomasL
  Difference between R^2 and .score donnertrud 1 6,874 Jan-08-2020, 05:14 PM
Last Post: jefsummers
  problem to add precision, recall and f1-score edys 7 4,071 May-28-2019, 04:48 AM
Last Post: heiner55
  Greater Than - Less Than stilllearning 1 2,460 Dec-05-2017, 11:47 AM
Last Post: DeaD_EyE

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020