اذهب إلى المحتوى

Meezo ML

الأعضاء
  • المساهمات

    197
  • تاريخ الانضمام

  • تاريخ آخر زيارة

أجوبة بواسطة Meezo ML

  1. قمت ببناء مودل LogisticRegression لكن عند محاولة قياس الكفاءة يظهر لي الخطأالتالي:

    import numpy as np
    from tensorflow.keras.datasets import mnist
    from sklearn.linear_model import LogisticRegression
    from sklearn.pipeline import Pipeline
    from tensorflow.keras.utils import to_categorical
    from sklearn.metrics import f1_score,precision_score,recall_score,accuracy_score,log_loss
    (x_train, y_train),(x_test, y_test) = mnist.load_data()
    x_train.shape
    image_size = x_train.shape[1]
    input_size = image_size * image_size
    x_train = np.reshape(x_train, [-1, input_size])/ 255
    x_test = np.reshape(x_test, [-1, input_size]) / 255
    #y_train = to_categorical(y_train)
    #y_test = to_categorical(y_test)
    t =LogisticRegression()
    t.fit(x_train, y_train)
    f1_score(y_test,t.predict(x_test))
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-1-bbb2bd2287ff> in <module>
         15 t =LogisticRegression()
         16 t.fit(x_train, y_train)
    ---> 17 f1_score(y_test,t.predict(x_test))
         18 
         19 #ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].
    
    ~\anaconda3\lib\site-packages\sklearn\metrics\_classification.py in f1_score(y_true, y_pred, labels, pos_label, average, sample_weight, zero_division)
       1097                        pos_label=pos_label, average=average,
       1098                        sample_weight=sample_weight,
    -> 1099                        zero_division=zero_division)
       1100 
       1101 
    
    ~\anaconda3\lib\site-packages\sklearn\metrics\_classification.py in fbeta_score(y_true, y_pred, beta, labels, pos_label, average, sample_weight, zero_division)
       1224                                                  warn_for=('f-score',),
       1225                                                  sample_weight=sample_weight,
    -> 1226                                                  zero_division=zero_division)
       1227     return f
       1228 
    
    ~\anaconda3\lib\site-packages\sklearn\metrics\_classification.py in precision_recall_fscore_support(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight, zero_division)
       1482         raise ValueError("beta should be >=0 in the F-beta score")
       1483     labels = _check_set_wise_labels(y_true, y_pred, average, labels,
    -> 1484                                     pos_label)
       1485 
       1486     # Calculate tp_sum, pred_sum, true_sum ###
    
    ~\anaconda3\lib\site-packages\sklearn\metrics\_classification.py in _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
       1314             raise ValueError("Target is %s but average='binary'. Please "
       1315                              "choose another average setting, one of %r."
    -> 1316                              % (y_type, average_options))
       1317     elif pos_label not in (None, 1):
       1318         warnings.warn("Note that pos_label (set to %r) is ignored when "
    
    ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

    ما الحل؟

  2. ظهور الخطأ التالي  ValueError: bad input shape  عند محاولة تدريب نموذج باستخدام خوارزمية MultinomialNB.
    الكود مع الخطأ:

    import numpy as np
    from sklearn.naive_bayes import MultinomialNB
    from tensorflow.keras.utils import to_categorical
    from tensorflow.keras.datasets import mnist
    from sklearn.metrics import accuracy_score
    (X, Y),(Xtest, Ytest) = mnist.load_data()
    Y = to_categorical(Y)
    Ytest = to_categorical(Ytest)
    X = np.reshape(X, [-1, X.shape[1]*X.shape[1]])
    Xtest = np.reshape(Xtest, [-1, input_size])
    M =MultinomialNB()
    M.fit(X, Y)
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-19-161d6ac448e8> in <module>
         10 Xtest = np.reshape(Xtest, [-1, input_size])
         11 t =MultinomialNB()
    ---> 12 t.fit(X, Y)
         13 accuracy_score(Ytest,t.predict(Xtest)) # 0.8357
    
    ~\anaconda3\lib\site-packages\sklearn\naive_bayes.py in fit(self, X, y, sample_weight)
        607         self : object
        608         """
    --> 609         X, y = self._check_X_y(X, y)
        610         _, n_features = X.shape
        611         self.n_features_ = n_features
    
    ~\anaconda3\lib\site-packages\sklearn\naive_bayes.py in _check_X_y(self, X, y)
        473 
        474     def _check_X_y(self, X, y):
    --> 475         return check_X_y(X, y, accept_sparse='csr')
        476 
        477     def _update_class_log_prior(self, class_prior=None):
    
    ~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
        758                         dtype=None)
        759     else:
    --> 760         y = column_or_1d(y, warn=True)
        761         _assert_all_finite(y)
        762     if y_numeric and y.dtype.kind == 'O':
    
    ~\anaconda3\lib\site-packages\sklearn\utils\validation.py in column_or_1d(y, warn)
        795         return np.ravel(y)
        796 
    --> 797     raise ValueError("bad input shape {0}".format(shape))
        798 
        799 
    
    ValueError: bad input shape (60000, 10)

     

  3. أحاول أن أقوم بترميز النص باستخدام ال TF-IDF لكن يظهر لي الخطأ  ValueError: np.nan is an invalid document, expected byte or unicode string. في هذا المقطع البرمجي التالي:

    from sklearn.feature_extraction.text import TfidfVectorizer
    import pandas as pd
    data = pd.read_csv("train.csv")
    tfidf = TfidfVectorizer(encoding='utf-8',decode_error='replace')
    enc = tfidf.fit_transform(data['tweets'])

    ما السبب؟
    ظهور الخطأ ValueError: np.nan is an invalid document, expected byte or unicode string عند محاولة ترميز النص باستخدام TfidfVectorizer

  4. ظهور الخطأ التالي ValueError: Found array with dim 3. Estimator expected <= 2 عند محاولة تدريب نموذج باستخدام خوارزمية  MultinomialNB؟

    import numpy as np
    from tensorflow.keras.datasets import mnist
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.pipeline import Pipeline
    from tensorflow.keras.utils import to_categorical
    from sklearn.metrics import f1_score,precision_score,recall_score,accuracy_score,log_loss
    (x_train, y_train),(x_test, y_test) = mnist.load_data()
    x_train.shape
    t = Pipeline([('clf',MultinomialNB())])
    t = text_clf.fit(x_train, y_train)
    accuracy_score(y_test,t.predict(x_test))
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-25-434bf8f46433> in <module>
         10 # تعريف pipline
         11 t = Pipeline([('clf',MultinomialNB())])
    ---> 12 t = text_clf.fit(x_train, y_train)
         13 accuracy_score(y_test,t.predict(x_test))
    
    ~\anaconda3\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
        352                                  self._log_message(len(self.steps) - 1)):
        353             if self._final_estimator != 'passthrough':
    --> 354                 self._final_estimator.fit(Xt, y, **fit_params)
        355         return self
        356 
    
    ~\anaconda3\lib\site-packages\sklearn\naive_bayes.py in fit(self, X, y, sample_weight)
        607         self : object
        608         """
    --> 609         X, y = self._check_X_y(X, y)
        610         _, n_features = X.shape
        611         self.n_features_ = n_features
    
    ~\anaconda3\lib\site-packages\sklearn\naive_bayes.py in _check_X_y(self, X, y)
        473 
        474     def _check_X_y(self, X, y):
    --> 475         return check_X_y(X, y, accept_sparse='csr')
        476 
        477     def _update_class_log_prior(self, class_prior=None):
    
    ~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
        753                     ensure_min_features=ensure_min_features,
        754                     warn_on_dtype=warn_on_dtype,
    --> 755                     estimator=estimator)
        756     if multi_output:
        757         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,
    
    ~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
        572         if not allow_nd and array.ndim >= 3:
        573             raise ValueError("Found array with dim %d. %s expected <= 2."
    --> 574                              % (array.ndim, estimator_name))
        575 
        576         if force_all_finite:
    
    ValueError: Found array with dim 3. Estimator expected <= 2.

     

  5. عند محاولة تدريب model ( نموذج ) يظهر لي الخطأ التالي Expected 2D array, got 1D array instead  الكود:

    from sklearn.preprocessing  import LabelEncoder
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LinearRegression
    # إنشاءداتا 
    data = {'size': [100, 30, 50, 200, 2],
            'class': ['big', 'small', 'medium', 'verybig', 'verysmall']}
    df = pd.DataFrame(data)
    le = LabelEncoder()
    le.fit(df['class'])
    df['class'] = le.transform(df['class'])
    X_train, X_test, y_train, y_test = train_test_split(df['size'], df['class'], random_state = 42, test_size = 0.33)
    linreg = LinearRegression()
    linreg.fit(X_train, y_train)
    linreg.score(x_test,y_test)
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-9-f740a0daa981> in <module>
         12 X_train, X_test, y_train, y_test = train_test_split(df['size'], df['class'], random_state = 42, test_size = 0.33)
         13 linreg = LinearRegression()
    ---> 14 linreg.fit(X_train, y_train)
         15 linreg.score(x_test,y_test)
    
    ~\anaconda3\lib\site-packages\sklearn\linear_model\_base.py in fit(self, X, y, sample_weight)
        490         n_jobs_ = self.n_jobs
        491         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
    --> 492                          y_numeric=True, multi_output=True)
        493 
        494         if sample_weight is not None:
    
    ~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
        753                     ensure_min_features=ensure_min_features,
        754                     warn_on_dtype=warn_on_dtype,
    --> 755                     estimator=estimator)
        756     if multi_output:
        757         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,
    
    ~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
        554                     "Reshape your data either using array.reshape(-1, 1) if "
        555                     "your data has a single feature or array.reshape(1, -1) "
    --> 556                     "if it contains a single sample.".format(array))
        557 
        558         # in the future np.flexible dtypes will be handled like object dtypes
    
    ValueError: Expected 2D array, got 1D array instead:
    array=[ 50 100 200].

     

  6. لدي الكود التالي:

    from sklearn.model_selection import cross_val_score
    from sklearn.datasets import load_iris
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.tree import DecisionTreeClassifier
    dataset = load_iris()
    Xdata = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
    encoder = OneHotEncoder()
    ydata = encoder.fit_transform(pd.DataFrame(dataset.target)).toarray()
    model = DecisionTreeClassifier(max_depth=1)
    cross_val_score(model, Xdata, ydata, cv=4, scoring="roc_auc")
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-20-6be5be8289ab> in <module>
         11 model = DecisionTreeClassifier(max_depth=1)
         12 
    ---> 13 cross_val_score(model, Xdata, ydata, cv=4, scoring="roc_auc")
    
    ~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
        388                                 fit_params=fit_params,
        389                                 pre_dispatch=pre_dispatch,
    --> 390                                 error_score=error_score)
        391     return cv_results['test_score']
        392 
    
    ~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
        234             return_times=True, return_estimator=return_estimator,
        235             error_score=error_score)
    --> 236         for train, test in cv.split(X, y, groups))
        237 
        238     zipped_scores = list(zip(*scores))
    
    ~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
       1002             # remaining jobs.
       1003             self._iterating = False
    -> 1004             if self.dispatch_one_batch(iterator):
       1005                 self._iterating = self._original_iterator is not None
       1006 
    
    ~\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
        833                 return False
        834             else:
    --> 835                 self._dispatch(tasks)
        836                 return True
        837 
    
    ~\anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
        752         with self._lock:
        753             job_idx = len(self._jobs)
    --> 754             job = self._backend.apply_async(batch, callback=cb)
        755             # A job can complete so quickly than its callback is
        756             # called before we get here, causing self._jobs to
    
    ~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
        207     def apply_async(self, func, callback=None):
        208         """Schedule a func to be run"""
    --> 209         result = ImmediateResult(func)
        210         if callback:
        211             callback(result)
    
    ~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
        588         # Don't delay the application, to avoid keeping the input
        589         # arguments in memory
    --> 590         self.results = batch()
        591 
        592     def get(self):
    
    ~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
        254         with parallel_backend(self._backend, n_jobs=self._n_jobs):
        255             return [func(*args, **kwargs)
    --> 256                     for func, args, kwargs in self.items]
        257 
        258     def __len__(self):
    
    ~\anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
        254         with parallel_backend(self._backend, n_jobs=self._n_jobs):
        255             return [func(*args, **kwargs)
    --> 256                     for func, args, kwargs in self.items]
        257 
        258     def __len__(self):
    
    ~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
        542     else:
        543         fit_time = time.time() - start_time
    --> 544         test_scores = _score(estimator, X_test, y_test, scorer)
        545         score_time = time.time() - start_time - fit_time
        546         if return_train_score:
    
    ~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _score(estimator, X_test, y_test, scorer)
        589         scores = scorer(estimator, X_test)
        590     else:
    --> 591         scores = scorer(estimator, X_test, y_test)
        592 
        593     error_msg = ("scoring must return a number, got %s (%s) "
    
    ~\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in __call__(self, estimator, *args, **kwargs)
         85             if isinstance(scorer, _BaseScorer):
         86                 score = scorer._score(cached_call, estimator,
    ---> 87                                       *args, **kwargs)
         88             else:
         89                 score = scorer(estimator, *args, **kwargs)
    
    ~\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in _score(self, method_caller, clf, X, y, sample_weight)
        330                                                  **self._kwargs)
        331         else:
    --> 332             return self._sign * self._score_func(y, y_pred, **self._kwargs)
        333 
        334     def _factory_args(self):
    
    ~\anaconda3\lib\site-packages\sklearn\metrics\_ranking.py in roc_auc_score(y_true, y_score, average, sample_weight, max_fpr, multi_class, labels)
        393                                              max_fpr=max_fpr),
        394                                      y_true, y_score, average,
    --> 395                                      sample_weight=sample_weight)
        396 
        397 
    
    ~\anaconda3\lib\site-packages\sklearn\metrics\_base.py in _average_binary_score(binary_metric, y_true, y_score, average, sample_weight)
        118         y_score_c = y_score.take([c], axis=not_average_axis).ravel()
        119         score[c] = binary_metric(y_true_c, y_score_c,
    --> 120                                  sample_weight=score_weight)
        121 
        122     # Average the results
    
    ~\anaconda3\lib\site-packages\sklearn\metrics\_ranking.py in _binary_roc_auc_score(y_true, y_score, sample_weight, max_fpr)
        219     """Binary roc auc score"""
        220     if len(np.unique(y_true)) != 2:
    --> 221         raise ValueError("Only one class present in y_true. ROC AUC score "
        222                          "is not defined in that case.")
        223 
    
    ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

     

  7. قمت ببناء نموذج، لكن يظهر لي دوماً الخطأ التالي:

    #استيراد المكتبات
    import numpy as np
    from sklearn.linear_model import LogisticRegression
    from sklearn.naive_bayes import GaussianNB
    from sklearn.ensemble import RandomForestClassifier, VotingClassifier
    from sklearn.datasets import load_breast_cancer
    from sklearn.model_selection import train_test_split
    #تحميل البيانات
    data = load_breast_cancer().data
    labels = load_breast_cancer().target
    # تقسيم البيانات
    X_train, y_train,X_test, y_test = train_test_split(data, labels, test_size=0.2, random_state=2021, shuffle =True)
    c1 = LogisticRegression(multi_class='multinomial', random_state=1)
    c2 = RandomForestClassifier(n_estimators=50, random_state=1)
    c3 = GaussianNB()
    ec1 = VotingClassifier(estimators=[ ('lr', c1), ('rf', c2), ('gnb', c3)], voting='hard')
    ec1 = ec1.fit(X_train, y_train)
    print(ec1.score(X_test,y_test))
    
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-16-46caf1469fab> in <module>
         17 
         18 ec1 = VotingClassifier(estimators=[ ('lr', c1), ('rf', c2), ('gnb', c3)], voting='hard')
    ---> 19 ec1 = ec1.fit(X_train, y_train)
         20 print(ec1.score(X_test,y_test))#0.9385964912280702
    
    ~\anaconda3\lib\site-packages\sklearn\ensemble\_voting.py in fit(self, X, y, sample_weight)
        207 
        208         """
    --> 209         check_classification_targets(y)
        210         if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:
        211             raise NotImplementedError('Multilabel and multi-output'
    
    ~\anaconda3\lib\site-packages\sklearn\utils\multiclass.py in check_classification_targets(y)
        167     if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
        168                       'multilabel-indicator', 'multilabel-sequences']:
    --> 169         raise ValueError("Unknown label type: %r" % y_type)
        170 
        171 
    
    ValueError: Unknown label type: 'continuous-multioutput'
    
    

     

×
×
  • أضف...