لماذا cross_val_score لا تعمل مع roc_auc ويظهر لي هذا الخطأ: ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

Meezo ML · 16 يونيو 2021

لدي الكود التالي:

from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
dataset = load_iris()
Xdata = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
encoder = OneHotEncoder()
ydata = encoder.fit_transform(pd.DataFrame(dataset.target)).toarray()
model = DecisionTreeClassifier(max_depth=1)
cross_val_score(model, Xdata, ydata, cv=4, scoring="roc_auc")
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-20-6be5be8289ab> in <module>
     11 model = DecisionTreeClassifier(max_depth=1)
     12 
---> 13 cross_val_score(model, Xdata, ydata, cv=4, scoring="roc_auc")

~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
    388                                 fit_params=fit_params,
    389                                 pre_dispatch=pre_dispatch,
--> 390                                 error_score=error_score)
    391     return cv_results['test_score']
    392 

~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    234             return_times=True, return_estimator=return_estimator,
    235             error_score=error_score)
--> 236         for train, test in cv.split(X, y, groups))
    237 
    238     zipped_scores = list(zip(*scores))

~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
   1002             # remaining jobs.
   1003             self._iterating = False
-> 1004             if self.dispatch_one_batch(iterator):
   1005                 self._iterating = self._original_iterator is not None
   1006 

~\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
    833                 return False
    834             else:
--> 835                 self._dispatch(tasks)
    836                 return True
    837 

~\anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
    752         with self._lock:
    753             job_idx = len(self._jobs)
--> 754             job = self._backend.apply_async(batch, callback=cb)
    755             # A job can complete so quickly than its callback is
    756             # called before we get here, causing self._jobs to

~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
    207     def apply_async(self, func, callback=None):
    208         """Schedule a func to be run"""
--> 209         result = ImmediateResult(func)
    210         if callback:
    211             callback(result)

~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
    588         # Don't delay the application, to avoid keeping the input
    589         # arguments in memory
--> 590         self.results = batch()
    591 
    592     def get(self):

~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
    254         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    255             return [func(*args, **kwargs)
--> 256                     for func, args, kwargs in self.items]
    257 
    258     def __len__(self):

~\anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
    254         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    255             return [func(*args, **kwargs)
--> 256                     for func, args, kwargs in self.items]
    257 
    258     def __len__(self):

~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
    542     else:
    543         fit_time = time.time() - start_time
--> 544         test_scores = _score(estimator, X_test, y_test, scorer)
    545         score_time = time.time() - start_time - fit_time
    546         if return_train_score:

~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _score(estimator, X_test, y_test, scorer)
    589         scores = scorer(estimator, X_test)
    590     else:
--> 591         scores = scorer(estimator, X_test, y_test)
    592 
    593     error_msg = ("scoring must return a number, got %s (%s) "

~\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in __call__(self, estimator, *args, **kwargs)
     85             if isinstance(scorer, _BaseScorer):
     86                 score = scorer._score(cached_call, estimator,
---> 87                                       *args, **kwargs)
     88             else:
     89                 score = scorer(estimator, *args, **kwargs)

~\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in _score(self, method_caller, clf, X, y, sample_weight)
    330                                                  **self._kwargs)
    331         else:
--> 332             return self._sign * self._score_func(y, y_pred, **self._kwargs)
    333 
    334     def _factory_args(self):

~\anaconda3\lib\site-packages\sklearn\metrics\_ranking.py in roc_auc_score(y_true, y_score, average, sample_weight, max_fpr, multi_class, labels)
    393                                              max_fpr=max_fpr),
    394                                      y_true, y_score, average,
--> 395                                      sample_weight=sample_weight)
    396 
    397 

~\anaconda3\lib\site-packages\sklearn\metrics\_base.py in _average_binary_score(binary_metric, y_true, y_score, average, sample_weight)
    118         y_score_c = y_score.take([c], axis=not_average_axis).ravel()
    119         score[c] = binary_metric(y_true_c, y_score_c,
--> 120                                  sample_weight=score_weight)
    121 
    122     # Average the results

~\anaconda3\lib\site-packages\sklearn\metrics\_ranking.py in _binary_roc_auc_score(y_true, y_score, sample_weight, max_fpr)
    219     """Binary roc auc score"""
    220     if len(np.unique(y_true)) != 2:
--> 221         raise ValueError("Only one class present in y_true. ROC AUC score "
    222                          "is not defined in that case.")
    223 

ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

Ali Haidar Ahmad · 16 يونيو 2021

بشكل عام الخطأ ValueError: Only one class present in y_true. ROC AUC score is not defined in that case. يظهر عندما تكون قيم y_true واحدة (مثلاً كلها أصفار أو كلها واحدات..) في هذه الحالة فإن المعيار ROC AUC غير معرف ولايمكن تطبيقه.
معالجة هذا ال Error تختلف باختلاف المسألة وتكون عادة بالتقاط هذا الخطأ (عن طريق رمي استثناء).
لكن دعنا نعالج مشكلتك:
أولاً أنت تستخدم مجموعة البيانات iris dataset وهذه الداتاسيت قيم ال target لها مرتبة أي إذا جربت استعراضها ستجد التالي:

dataset.target[0:50]
# result
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0])

dataset.target[50:100]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1])

dataset.target[100:150]
array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2])

لاحظ أن أول 50 قيمة كلها 0 ثاني 50 قيمة هي 1..إلخ.
حسناً هنا صحيح لدينا أكثر من فئة (لدينا 0 و 1) وقد تقول لايجب أن يظهر الخطأ، وهذا الكلام صحيح تماماً إلا في حالة استخدامك لل cross_val_score فهنا تقوم بتقسيم البيانات إلى عدة أقسام (في حالتك قمت بقسمها إلى 4 أقسام وبالتالي من المرجح جداً أن تكون إحدى هذه الأقسام لاتحوي إلى على قيمة واحدة (تذكر البيانات مرتبة هنا) ).
لذا لحل هذا المشكلة قم بخلط البيانات قبل عملية ال cross_val_score وهذا سيضمن لك عدم حدوث ذلك (ستكون جعلت البيانات غير مرتبة).

from sklearn.utils import shuffle
X_new, y_new = shuffle(Xdata,ydata)
cross_val_score(model, X_new, y_new, cv=4, scoring="roc_auc")
#النتيجة بعد إضافة الكود
array([0.82692308, 0.85704023, 0.84974359, 0.79734848])

لماذا cross_val_score لا تعمل مع roc_auc ويظهر لي هذا الخطأ: ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

السؤال

Meezo ML

رابط هذا التعليق

شارك على الشبكات الإجتماعية

1 جواب على هذا السؤال

Recommended Posts

Ali Haidar Ahmad

رابط هذا التعليق

شارك على الشبكات الإجتماعية

انضم إلى النقاش

إعلانات

تابعنا على

الرئيسية

تابعنا

دروس ومقالات

أسئلة وأجوبة

كتب

دورات

بطاقات هدية