قمت ببناء نموذج BaggingClassifier لكن يظهر لي الخطأ التالي: from sklearn.ensemble import BaggingClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix from sklearn.datasets import load_breast_cancer import seaborn as sns import matplotlib.pyplot as plt from sklearn.svm import SVR Data = load_breast_cancer() X = Data.data y = Data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44, shuffle =True) clf = BaggingClassifier(base_estimator=SVR(),n_estimators=150, random_state=444) clf.fit(X_train, y_train) print('BaggingClassifier Train Score is : ' , clf.score(X_train, y_train)) print('BaggingClassifier Test Score is : ' , clf.score(X_test, y_test)) c = confusion_matrix(y_test, clf.predict(X_test)) print('Confusion Matrix is : \n', c) sns.heatmap(c, center = True) plt.show() --------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-45-1d3261b5c2fc> in <module> 12 clf = BaggingClassifier(base_estimator=SVR(),n_estimators=150, random_state=444) 13 clf.fit(X_train, y_train) ---> 14 print('BaggingClassifier Train Score is : ' , clf.score(X_train, y_train)) 15 print('BaggingClassifier Test Score is : ' , clf.score(X_test, y_test)) 16 c = confusion_matrix(y_test, clf.predict(X_test)) ~\anaconda3\lib\site-packages\sklearn\base.py in score(self, X, y, sample_weight) 367 """ 368 from .metrics import accuracy_score --> 369 return accuracy_score(y, self.predict(X), sample_weight=sample_weight) 370 371 ~\anaconda3\lib\site-packages\sklearn\ensemble\_bagging.py in predict(self, X) 668 The predicted classes. 669 """ --> 670 predicted_probabilitiy = self.predict_proba(X) 671 return self.classes_.take((np.argmax(predicted_probabilitiy, axis=1)), 672 axis=0) ~\anaconda3\lib\site-packages\sklearn\ensemble\_bagging.py in predict_proba(self, X) 718 X, 719 self.n_classes_) --> 720 for i in range(n_jobs)) 721 722 # Reduce ~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable) 1002 # remaining jobs. 1003 self._iterating = False -> 1004 if self.dispatch_one_batch(iterator): 1005 self._iterating = self._original_iterator is not None 1006 ~\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator) 833 return False 834 else: --> 835 self._dispatch(tasks) 836 return True 837 ~\anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch) 752 with self._lock: 753 job_idx = len(self._jobs) --> 754 job = self._backend.apply_async(batch, callback=cb) 755 # A job can complete so quickly than its callback is 756 # called before we get here, causing self._jobs to ~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback) 207 def apply_async(self, func, callback=None): 208 """Schedule a func to be run""" --> 209 result = ImmediateResult(func) 210 if callback: 211 callback(result) ~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch) 588 # Don't delay the application, to avoid keeping the input 589 # arguments in memory --> 590 self.results = batch() 591 592 def get(self): ~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self) 254 with parallel_backend(self._backend, n_jobs=self._n_jobs): 255 return [func(*args, **kwargs) --> 256 for func, args, kwargs in self.items] 257 258 def __len__(self): ~\anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0) 254 with parallel_backend(self._backend, n_jobs=self._n_jobs): 255 return [func(*args, **kwargs) --> 256 for func, args, kwargs in self.items] 257 258 def __len__(self): ~\anaconda3\lib\site-packages\sklearn\ensemble\_bagging.py in _parallel_predict_proba(estimators, estimators_features, X, n_classes) 140 141 for i in range(n_samples): --> 142 proba[i, predictions[i]] += 1 143 144 return proba IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices بماذا أخطأت؟
أريد استخدام learning_curve و grid_search من مكتبة Sklearn ولكن يظهر لي الخطأ التالي: from sklearn.grid_search import GridSearchCV from sklearn.learning_curve import learning_curve ImportError: No module named grid_search, learning_curve ما المشكلة؟
أحاول تقسيم البيانات باستخدام StratifiedKFold لكن يظهر لي الخطأ التالي : import numpy as np from sklearn.model_selection import StratifiedKFold X = np.array([[1,4],[2,1],[3,4],[7,8],[2,8]]) y = np.array([2,1,3,4,4]) skf = StratifiedKFold(n_splits=3) print(skf.get_n_splits(X, y)) for train_index, test_index in skf.split(X, y): print("TRAIN:"+str(train_index)+'\n'+"TEST:"+str(test_index),end='\n\n') X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] print('X_train:\n '+str(X_train),end='\n\n') print('X_test:\n '+str(X_test),end='\n\n') print('y_train:\n '+str(y_train),end='\n\n') print('y_test:\n' +str(y_test),end='\n\n') --------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-56-6c55afa3238f> in <module> 6 print(skf.get_n_splits(X, y)) 7 ----> 8 for train_index, test_index in skf.split(X, y): 9 # للتقسيمة index عرض ال 10 print("TRAIN:"+str(train_index)+'\n'+"TEST:"+str(test_index),end='\n\n') ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups) 333 .format(self.n_splits, n_samples)) 334 --> 335 for train, test in super().split(X, y, groups): 336 yield train, test 337 ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups) 78 X, y, groups = indexable(X, y, groups) 79 indices = np.arange(_num_samples(X)) ---> 80 for test_index in self._iter_test_masks(X, y, groups): 81 train_index = indices[np.logical_not(test_index)] 82 test_index = indices[test_index] ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in _iter_test_masks(self, X, y, groups) 690 691 def _iter_test_masks(self, X, y=None, groups=None): --> 692 test_folds = self._make_test_folds(X, y) 693 for i in range(self.n_splits): 694 yield test_folds == i ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in _make_test_folds(self, X, y) 661 raise ValueError("n_splits=%d cannot be greater than the" 662 " number of members in each class." --> 663 % (self.n_splits)) 664 if self.n_splits > min_groups: 665 warnings.warn(("The least populated class in y has only %d" ValueError: n_splits=3 cannot be greater than the number of members in each class. ماهو عدد الأعضاء؟ وكيف نحل المشكلة؟
كيف نقوم بتقسيم البيانات باستخدام ShuffleSplit في مكتبة Sklearn، وكيف يتم الأمر؟
كيف نقوم بتقسيم البيانات باستخدام LeavePOut في مكتبة Sklearn؟
لماذا يظهر لي الخطأ التالي عند محاولة استيراد الكلاس Imputer: from sklearn.preprocessing import Imputer --------------------------------------------------------------------------- ImportError Traceback (most recent call last) <ipython-input-15-1ff1e063db9c> in <module> ----> 1 from sklearn.preprocessing import Imputer ImportError: cannot import name 'Imputer' from 'sklearn.preprocessing' (C:\Users\Windows.10\anaconda3\lib\site-packages\sklearn\preprocessing\__init__.py)
قمت ببناء نموذج تصنيف باستخدام خوارزمية ExtraTrees لكنه يعطيني الخطأ التالي عندما يحاول عمل ال fitting على البيانات: clf = ExtraTreesClassifier(n_estimators=150, random_state=444) clf.fit(X_train, y_train) ---------------------------------------------------------------------- ValueError: Unknown label type: 'unknown' علماً أن قيم ال y طبيعية: y is array([1.0, 0.0, 0.0, ..., 1.0, 1.0, 0.0], dtype=object) size is (800,) ماهي المشكلة؟
كيف نقوم بتقسيم البيانات باستخدام LeaveOneOut في مكتبة Sklearn؟
كيف نقوم بتطبيق cross_val_score باستخدام مكتبة Sklearn لقياس كفاءة نماذج مختلفة عبر عدة Folds؟
كيف نقوم بتطبيق مفهوم ال Feature Selection باستخدام الصف SelectKBest في مكتبة SklearnK؟
أحاول تقسيم البيانات باستخدام RepeatedKFold لكن يظهر لي الخطأ التالي: import numpy as np from sklearn.model_selection import RepeatedKFold X = np.array([[3, 32], [2, 9], [15, 8]]) y = np.array([11, 22, 33]) rkf = RepeatedKFold(n_splits=4, n_repeats=4, random_state=44) for train_index, test_index in rkf.split(X): print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] --------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-30-da9fc9c92435> in <module> 4 y = np.array([11, 22, 33]) 5 rkf = RepeatedKFold(n_splits=4, n_repeats=4, random_state=44) ----> 6 for train_index, test_index in rkf.split(X): 7 print("TRAIN:", train_index, "TEST:", test_index) 8 X_train, X_test = X[train_index], X[test_index] ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups) 1145 cv = self.cv(random_state=rng, shuffle=True, 1146 **self.cvargs) -> 1147 for train_index, test_index in cv.split(X, y, groups): 1148 yield train_index, test_index 1149 ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups) 331 ("Cannot have number of splits n_splits={0} greater" 332 " than the number of samples: n_samples={1}.") --> 333 .format(self.n_splits, n_samples)) 334 335 for train, test in super().split(X, y, groups): ValueError: Cannot have number of splits n_splits=4 greater than the number of samples: n_samples=3.
كيف نقوم بتطبيق مفهوم ال Feature Selection باستخدام الصف SelectPercentile في مكتبة Sklearn؟
عندما أحاول تنفيذ الكود التالي لخوارزمية HistGradientBoostingClassifier في Sklearn يظهر لي هذا الخطأ: from sklearn.model_selection import train_test_split from sklearn.ensemble import HistGradientBoostingClassifier Data = load_breast_cancer() X = Data.data y = Data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44, shuffle =True) clf = BaggingClassifier(n_estimators=150, random_state=444) clf.fit(X_train, y_train) --------------------------------------------------------------------------- ImportError Traceback (most recent call last) <ipython-input-1-433e975cd38a> in <module> 22 from sklearn.model_selection import train_test_split 23 from sklearn.datasets import load_breast_cancer ---> 24 from sklearn.ensemble import HistGradientBoostingClassifier ImportError: cannot import name 'HistGradientBoostingClassifier' from 'sklearn.ensemble' (C:\Users\Windows.10\anaconda3\lib\site-packages\sklearn\ensemble\__init__.py)
كيف نقوم بتطبيق خوارزمية Hierarchical clustering باستخدام مكتبة Sklearn ؟
قمت ببناء نموذج لكن عندما أحاول تقسيم البيانات يظهر لي الخطأ التالي: from sklearn.datasets import load_breast_cancer from sklearn.ensemble import BaggingClassifier from sklearn.model_selection import train_test_split Data = load_breast_cancer() X = Data.data y = Data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44, shuffle =True) clf = BaggingClassifier(n_estimators=150, random_state=444) clf.fit(X_train, y_train) ------------------------------------------------------------- ImportError: No module named model_selection