اذهب إلى المحتوى

Meezo ML

الأعضاء
  • المساهمات

    197
  • تاريخ الانضمام

  • تاريخ آخر زيارة

أجوبة بواسطة Meezo ML

  1. قمت ببناء نموذج  BaggingClassifier  لكن يظهر لي الخطأ التالي:

    from sklearn.ensemble import BaggingClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import confusion_matrix
    from sklearn.datasets import load_breast_cancer
    import seaborn as sns
    import matplotlib.pyplot as plt
    from sklearn.svm import SVR
    Data = load_breast_cancer()
    X = Data.data
    y = Data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44, shuffle =True)
    clf = BaggingClassifier(base_estimator=SVR(),n_estimators=150, random_state=444)
    clf.fit(X_train, y_train)
    print('BaggingClassifier Train Score is : ' , clf.score(X_train, y_train)) 
    print('BaggingClassifier Test Score is : ' , clf.score(X_test, y_test)) 
    c = confusion_matrix(y_test, clf.predict(X_test))
    print('Confusion Matrix is : \n', c)
    sns.heatmap(c, center = True)
    plt.show()
    ---------------------------------------------------------------------------
    IndexError                                Traceback (most recent call last)
    <ipython-input-45-1d3261b5c2fc> in <module>
         12 clf = BaggingClassifier(base_estimator=SVR(),n_estimators=150, random_state=444)
         13 clf.fit(X_train, y_train)
    ---> 14 print('BaggingClassifier Train Score is : ' , clf.score(X_train, y_train))
         15 print('BaggingClassifier Test Score is : ' , clf.score(X_test, y_test))
         16 c = confusion_matrix(y_test, clf.predict(X_test))
    
    ~\anaconda3\lib\site-packages\sklearn\base.py in score(self, X, y, sample_weight)
        367         """
        368         from .metrics import accuracy_score
    --> 369         return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
        370 
        371 
    
    ~\anaconda3\lib\site-packages\sklearn\ensemble\_bagging.py in predict(self, X)
        668             The predicted classes.
        669         """
    --> 670         predicted_probabilitiy = self.predict_proba(X)
        671         return self.classes_.take((np.argmax(predicted_probabilitiy, axis=1)),
        672                                   axis=0)
    
    ~\anaconda3\lib\site-packages\sklearn\ensemble\_bagging.py in predict_proba(self, X)
        718                 X,
        719                 self.n_classes_)
    --> 720             for i in range(n_jobs))
        721 
        722         # Reduce
    
    ~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
       1002             # remaining jobs.
       1003             self._iterating = False
    -> 1004             if self.dispatch_one_batch(iterator):
       1005                 self._iterating = self._original_iterator is not None
       1006 
    
    ~\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
        833                 return False
        834             else:
    --> 835                 self._dispatch(tasks)
        836                 return True
        837 
    
    ~\anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
        752         with self._lock:
        753             job_idx = len(self._jobs)
    --> 754             job = self._backend.apply_async(batch, callback=cb)
        755             # A job can complete so quickly than its callback is
        756             # called before we get here, causing self._jobs to
    
    ~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
        207     def apply_async(self, func, callback=None):
        208         """Schedule a func to be run"""
    --> 209         result = ImmediateResult(func)
        210         if callback:
        211             callback(result)
    
    ~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
        588         # Don't delay the application, to avoid keeping the input
        589         # arguments in memory
    --> 590         self.results = batch()
        591 
        592     def get(self):
    
    ~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
        254         with parallel_backend(self._backend, n_jobs=self._n_jobs):
        255             return [func(*args, **kwargs)
    --> 256                     for func, args, kwargs in self.items]
        257 
        258     def __len__(self):
    
    ~\anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
        254         with parallel_backend(self._backend, n_jobs=self._n_jobs):
        255             return [func(*args, **kwargs)
    --> 256                     for func, args, kwargs in self.items]
        257 
        258     def __len__(self):
    
    ~\anaconda3\lib\site-packages\sklearn\ensemble\_bagging.py in _parallel_predict_proba(estimators, estimators_features, X, n_classes)
        140 
        141             for i in range(n_samples):
    --> 142                 proba[i, predictions[i]] += 1
        143 
        144     return proba
    
    IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

    بماذا أخطأت؟

  2. عندما أحاول حساب مصفوفة ال confusion matrix أحصل على الخطأ التالي:

    ValueError: Multilabel-indicator is not supported for confusion matrix

    علماً أن  البيانات التي أريد بناء المصفوفة عليها هي من الشكل (One-Hot):

    [[1, 0, 0,0,0,0],
     [0, 0, 1,0,0,0],
     [0, 0, 0,0,1,0],
     [0, 0, 0,0,1,0]
     ...		]

     

  3. يظهر لي الخطأ التالي في هذا السطر:

    X,X_val, y, y_val=train_test_split(data,label,test_size=0.2,stratify=label)

    الخطأ:

    ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.

    مالسبب؟

  4. لدي كود بالشكل التالي:

    data = pd.read_csv('D:\\house.csv');
    X = data['size']
    y = data['price']
    lr = LinearRegression()
    lr.fit(X,y)

    لكنه يعطيني الخطأ التالي:

    ValueError: Found input variables with inconsistent numbers of samples: [1, 1000]

    ما السبب؟ وما الحل؟
     

  5. أحاول تقسيم البيانات باستخدام StratifiedKFold لكن يظهر لي الخطأ التالي :

    import numpy as np
    from sklearn.model_selection import StratifiedKFold
    X = np.array([[1,4],[2,1],[3,4],[7,8],[2,8]])
    y = np.array([2,1,3,4,4])
    skf = StratifiedKFold(n_splits=3)
    print(skf.get_n_splits(X, y))
    
    for train_index, test_index in skf.split(X, y):
        print("TRAIN:"+str(train_index)+'\n'+"TEST:"+str(test_index),end='\n\n')
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        print('X_train:\n '+str(X_train),end='\n\n')
        print('X_test:\n '+str(X_test),end='\n\n')
        print('y_train:\n '+str(y_train),end='\n\n')
        print('y_test:\n' +str(y_test),end='\n\n')
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-56-6c55afa3238f> in <module>
          6 print(skf.get_n_splits(X, y))
          7 
    ----> 8 for train_index, test_index in skf.split(X, y):
          9     # للتقسيمة index  عرض ال
         10     print("TRAIN:"+str(train_index)+'\n'+"TEST:"+str(test_index),end='\n\n')
    
    ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups)
        333                 .format(self.n_splits, n_samples))
        334 
    --> 335         for train, test in super().split(X, y, groups):
        336             yield train, test
        337 
    
    ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups)
         78         X, y, groups = indexable(X, y, groups)
         79         indices = np.arange(_num_samples(X))
    ---> 80         for test_index in self._iter_test_masks(X, y, groups):
         81             train_index = indices[np.logical_not(test_index)]
         82             test_index = indices[test_index]
    
    ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in _iter_test_masks(self, X, y, groups)
        690 
        691     def _iter_test_masks(self, X, y=None, groups=None):
    --> 692         test_folds = self._make_test_folds(X, y)
        693         for i in range(self.n_splits):
        694             yield test_folds == i
    
    ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in _make_test_folds(self, X, y)
        661             raise ValueError("n_splits=%d cannot be greater than the"
        662                              " number of members in each class."
    --> 663                              % (self.n_splits))
        664         if self.n_splits > min_groups:
        665             warnings.warn(("The least populated class in y has only %d"
    
    ValueError: n_splits=3 cannot be greater than the number of members in each class.

    ماهو عدد الأعضاء؟ وكيف نحل المشكلة؟

  6. لماذا يظهر لي الخطأ التالي عند محاولة استيراد الكلاس Imputer:

    from sklearn.preprocessing import Imputer
    ---------------------------------------------------------------------------
    ImportError                               Traceback (most recent call last)
    <ipython-input-15-1ff1e063db9c> in <module>
    ----> 1 from sklearn.preprocessing import Imputer
    ImportError: cannot import name 'Imputer' from 'sklearn.preprocessing' (C:\Users\Windows.10\anaconda3\lib\site-packages\sklearn\preprocessing\__init__.py)
    

     

  7. قمت ببناء نموذج تصنيف باستخدام خوارزمية ExtraTrees لكنه يعطيني الخطأ التالي عندما يحاول عمل ال fitting على البيانات:

    clf = ExtraTreesClassifier(n_estimators=150, random_state=444)
    clf.fit(X_train, y_train)
    ----------------------------------------------------------------------
    ValueError: Unknown label type: 'unknown'

    علماً أن قيم ال y طبيعية:

    y is array([1.0, 0.0, 0.0, ..., 1.0, 1.0, 0.0], dtype=object) size is (800,)

    ماهي المشكلة؟

  8. استخدام الصف VarianceThreshold من Sklearn لحذف ال Features ذات التباين القليل (أغلب قيمها مكررة) حيث أنه لدي بيانات بوليانية لكن بعض ال Features فيها لديها تباين قريب من الصفر؟

     

  9. أحاول تقسيم البيانات باستخدام RepeatedKFold لكن يظهر لي الخطأ التالي:

    import numpy as np
    from sklearn.model_selection import RepeatedKFold
    X = np.array([[3, 32], [2, 9], [15, 8]])
    y = np.array([11, 22, 33])
    rkf = RepeatedKFold(n_splits=4, n_repeats=4, random_state=44)
    for train_index, test_index in rkf.split(X):
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-30-da9fc9c92435> in <module>
          4 y = np.array([11, 22, 33])
          5 rkf = RepeatedKFold(n_splits=4, n_repeats=4, random_state=44)
    ----> 6 for train_index, test_index in rkf.split(X):
          7     print("TRAIN:", train_index, "TEST:", test_index)
          8     X_train, X_test = X[train_index], X[test_index]
    
    ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups)
       1145             cv = self.cv(random_state=rng, shuffle=True,
       1146                          **self.cvargs)
    -> 1147             for train_index, test_index in cv.split(X, y, groups):
       1148                 yield train_index, test_index
       1149 
    
    ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups)
        331                 ("Cannot have number of splits n_splits={0} greater"
        332                  " than the number of samples: n_samples={1}.")
    --> 333                 .format(self.n_splits, n_samples))
        334 
        335         for train, test in super().split(X, y, groups):
    
    ValueError: Cannot have number of splits n_splits=4 greater than the number of samples: n_samples=3.

     

  10. عندما أحاول تنفيذ الكود التالي لخوارزمية HistGradientBoostingClassifier  في Sklearn يظهر لي هذا الخطأ:

    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import HistGradientBoostingClassifier
    Data = load_breast_cancer()
    X = Data.data
    y = Data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44, shuffle =True)
    clf = BaggingClassifier(n_estimators=150, random_state=444)
    clf.fit(X_train, y_train)
    ---------------------------------------------------------------------------
    ImportError                               Traceback (most recent call last)
    <ipython-input-1-433e975cd38a> in <module>
         22 from sklearn.model_selection import train_test_split
    	 23 from sklearn.datasets import load_breast_cancer
    ---> 24 from sklearn.ensemble import HistGradientBoostingClassifier
    
    ImportError: cannot import name 'HistGradientBoostingClassifier' from 'sklearn.ensemble' (C:\Users\Windows.10\anaconda3\lib\site-packages\sklearn\ensemble\__init__.py)

     

  11. قمت ببناء نموذج لكن عندما أحاول تقسيم البيانات يظهر لي الخطأ التالي:

    from sklearn.datasets import load_breast_cancer
    from sklearn.ensemble import BaggingClassifier
    from sklearn.model_selection import train_test_split
    Data = load_breast_cancer()
    X = Data.data
    y = Data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44, shuffle =True)
    clf = BaggingClassifier(n_estimators=150, random_state=444)
    clf.fit(X_train, y_train)
    -------------------------------------------------------------
    ImportError: No module named model_selection

     

×
×
  • أضف...