GridSearchCV for multi-label classification

mapypy · Mar-29-2021, 01:58 AM

hi,
I am doing a multi-label text classification, and I am getting the following errors when I attempt to do a Gridsearch to get best params over multiple different algos.
the error occurs when i am fitting the Multinomial_NB to xtrain and ytrain.

My code and error are as follows:

param = [{'alpha': [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]}]  # creating parameter grid

# Optimal C by grid search 
NB_clf = OneVsRestClassifier(MultinomialNB(), n_jobs=-1)
Multinomial_NB = GridSearchCV(NB_clf, param, scoring = 'f1_micro', cv=5)

#fit and print best estimator  & CV Score
Multinomial_NB.fit(X_train, ytrain)
print(Multinomial_NB.best_estimator_)
print("Best Cross Validation Score: ",Multinomial_NB.best_score_)

#fit tuned clf on test set and gather accuracy and hamming loss
predictions = Multinomial_NB.predict(X_test)
print("\n Accuracy :",metrics.accuracy_score(ytest, predictions))
print("Hamming loss ",metrics.hamming_loss(ytest,predictions))

#micro- precision, recall, f1
precision=precision_score(ytest, predictions, average='micro')
recall=recall_score(ytest, predictions, average='micro')
f1=f1_score(ytest, predictions, average='micro')

#print precision, recall, f1 scores 
print("\n Micro-average")
print("Precision: {:.4f}, Recall: {:.4f}, F1: {:.4f}".format(precision, recall, f1))

Error:ValueError                                Traceback (most recent call last)
<ipython-input-62-e2bd7defbbd2> in <module>
      6 
      7 #fit and print best estimator  & CV Score
----> 8 Multinomial_NB.fit(X_train, ytrain)
      9 print(Multinomial_NB.best_estimator_)
     10 print("Best Cross Validation Score: ",Multinomial_NB.best_score_)

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
    839                 return results
    840 
--> 841             self._run_search(evaluate_candidates)
    842 
    843             # multimetric is determined here because in the case of a callable

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
   1286     def _run_search(self, evaluate_candidates):
   1287         """Search all candidates in param_grid"""
-> 1288         evaluate_candidates(ParameterGrid(self.param_grid))
   1289 
   1290 

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params, cv, more_results)
    807                                    (split_idx, (train, test)) in product(
    808                                    enumerate(candidate_params),
--> 809                                    enumerate(cv.split(X, y, groups))))
    810 
    811                 if len(out) < 1:

~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
   1002             # remaining jobs.
   1003             self._iterating = False
-> 1004             if self.dispatch_one_batch(iterator):
   1005                 self._iterating = self._original_iterator is not None
   1006 

~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
    833                 return False
    834             else:
--> 835                 self._dispatch(tasks)
    836                 return True
    837 

~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in _dispatch(self, batch)
    752         with self._lock:
    753             job_idx = len(self._jobs)
--> 754             job = self._backend.apply_async(batch, callback=cb)
    755             # A job can complete so quickly than its callback is
    756             # called before we get here, causing self._jobs to

~/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
    207     def apply_async(self, func, callback=None):
    208         """Schedule a func to be run"""
--> 209         result = ImmediateResult(func)
    210         if callback:
    211             callback(result)

~/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
    588         # Don't delay the application, to avoid keeping the input
    589         # arguments in memory
--> 590         self.results = batch()
    591 
    592     def get(self):

~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self)
    254         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    255             return [func(*args, **kwargs)
--> 256                     for func, args, kwargs in self.items]
    257 
    258     def __len__(self):

~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in <listcomp>(.0)
    254         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    255             return [func(*args, **kwargs)
--> 256                     for func, args, kwargs in self.items]
    257 
    258     def __len__(self):

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/fixes.py in __call__(self, *args, **kwargs)
    220     def __call__(self, *args, **kwargs):
    221         with config_context(**self.config):
--> 222             return self.function(*args, **kwargs)

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)
    579             cloned_parameters[k] = clone(v, safe=False)
    580 
--> 581         estimator = estimator.set_params(**cloned_parameters)
    582 
    583     start_time = time.time()

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/base.py in set_params(self, **params)
    231                                  'Check the list of available parameters '
    232                                  'with `estimator.get_params().keys()`.' %
--> 233                                  (key, self))
    234 
    235             if delim:

ValueError: Invalid parameter alpha for estimator OneVsRestClassifier(estimator=MultinomialNB(), n_jobs=-1). Check the list of available parameters with `estimator.get_params().keys()`.

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	Miss-Classification problem with shuffled batch	Faebs94	0	2,096	Sep-02-2021, 11:55 AM Last Post: Faebs94
	Probabilities of binary classification problem	Troublesome1996	0	2,854	Apr-19-2021, 06:40 PM Last Post: Troublesome1996
	GridSearchCV generates unexpected different best parameters. Please help!	yili2005	0	2,054	Feb-25-2021, 03:52 PM Last Post: yili2005
	GNN For Graph "Classification"	BennyS	1	2,385	Feb-09-2021, 12:09 PM Last Post: BennyS
	Help with multiclass classification in perceptron code	Nimo_47	0	4,495	Nov-09-2020, 10:32 PM Last Post: Nimo_47
	Classification and Regression tree (CART)	kumarants	2	3,489	May-26-2020, 11:04 AM Last Post: Larz60+
	Classification of Request	PythonLearner703	8	5,453	Dec-09-2019, 08:56 PM Last Post: micseydel
	CNN Speech Classification	Mitchie87	0	2,054	Dec-06-2019, 06:17 PM Last Post: Mitchie87
	Keras: Time series classification	midarq	0	2,558	Sep-25-2019, 09:03 AM Last Post: midarq
	Classification with shuffling	PythonNewbie	1	2,988	Nov-12-2017, 10:23 AM Last Post: PythonNewbie

GridSearchCV for multi-label classification

User Panel Messages

Announcements