طيب ده الكود بتاعي
from sklearn.compose import ColumnTransformer
from sklearn.impute import IterativeImputer # Import IterativeImputer for handling missing values
import pandas as pd
# Identify numeric and text (object) columns in the DataFrame
numeric_features = df.select_dtypes(include=['number']).columns.tolist()
text_features = df.select_dtypes(include=['object']).columns.tolist()
# Create a ColumnTransformer to apply IterativeImputer to numeric columns
# and pass through text columns without any transformation
preprocessor = ColumnTransformer(
transformers=[
('num', IterativeImputer(missing_values=-1.0), numeric_features), # Apply IterativeImputer to numeric features
('text', 'passthrough', text_features) # Leave text features unchanged
]
)
# Fit the transformer to the data and transform it
df_transformed = preprocessor.fit_transform(df)
# Reconstruct the transformed data into a DataFrame with original column names
feature_names = numeric_features + text_features
df_final = pd.DataFrame(df_transformed, columns=feature_names)
بس بيطهر الخطاء ده
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_34/678675408.py in <cell line: 0>()
15
16 # Fit the transformer to the data and transform it
---> 17 df_transformed = preprocessor.fit_transform(data_train)
18
19 # Reconstruct the transformed data into a DataFrame with original column names
/usr/local/lib/python3.11/dist-packages/sklearn/utils/_set_output.py in wrapped(self, X, *args, **kwargs)
138 @wraps(f)
139 def wrapped(self, X, *args, **kwargs):
--> 140 data_to_wrap = f(self, X, *args, **kwargs)
141 if isinstance(data_to_wrap, tuple):
142 # only wrap the first output for cross decomposition
/usr/local/lib/python3.11/dist-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
725 self._validate_remainder(X)
726
--> 727 result = self._fit_transform(X, y, _fit_transform_one)
728
729 if not result:
/usr/local/lib/python3.11/dist-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted, column_as_strings)
656 )
657 try:
--> 658 return Parallel(n_jobs=self.n_jobs)(
659 delayed(func)(
660 transformer=clone(trans) if not fitted else trans,
/usr/local/lib/python3.11/dist-packages/sklearn/utils/parallel.py in __call__(self, iterable)
61 for delayed_func, args, kwargs in iterable
62 )
---> 63 return super().__call__(iterable_with_config)
64
65
/usr/local/lib/python3.11/dist-packages/joblib/parallel.py in __call__(self, iterable)
1983 output = self._get_sequential_output(iterable)
1984 next(output)
-> 1985 return output if self.return_generator else list(output)
1986
1987 # Let's create an ID that uniquely identifies the current call. If the
/usr/local/lib/python3.11/dist-packages/joblib/parallel.py in _get_sequential_output(self, iterable)
1911 self.n_dispatched_batches += 1
1912 self.n_dispatched_tasks += 1
-> 1913 res = func(*args, **kwargs)
1914 self.n_completed_tasks += 1
1915 self.print_progress()
/usr/local/lib/python3.11/dist-packages/sklearn/utils/parallel.py in __call__(self, *args, **kwargs)
121 config = {}
122 with config_context(**config):
--> 123 return self.function(*args, **kwargs)
/usr/local/lib/python3.11/dist-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
891 with _print_elapsed_time(message_clsname, message):
892 if hasattr(transformer, "fit_transform"):
--> 893 res = transformer.fit_transform(X, y, **fit_params)
894 else:
895 res = transformer.fit(X, y, **fit_params).transform(X)
/usr/local/lib/python3.11/dist-packages/sklearn/utils/_set_output.py in wrapped(self, X, *args, **kwargs)
138 @wraps(f)
139 def wrapped(self, X, *args, **kwargs):
--> 140 data_to_wrap = f(self, X, *args, **kwargs)
141 if isinstance(data_to_wrap, tuple):
142 # only wrap the first output for cross decomposition
/usr/local/lib/python3.11/dist-packages/sklearn/impute/_iterative.py in fit_transform(self, X, y)
702 self.initial_imputer_ = None
703
--> 704 X, Xt, mask_missing_values, complete_mask = self._initial_imputation(
705 X, in_fit=True
706 )
/usr/local/lib/python3.11/dist-packages/sklearn/impute/_iterative.py in _initial_imputation(self, X, in_fit)
599 force_all_finite = True
600
--> 601 X = self._validate_data(
602 X,
603 dtype=FLOAT_DTYPES,
/usr/local/lib/python3.11/dist-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
563 raise ValueError("Validation should be done on X, y or both.")
564 elif not no_val_X and no_val_y:
--> 565 X = check_array(X, input_name="X", **check_params)
566 out = X
567 elif no_val_X and not no_val_y:
/usr/local/lib/python3.11/dist-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
919
920 if force_all_finite:
--> 921 _assert_all_finite(
922 array,
923 input_name=input_name,
/usr/local/lib/python3.11/dist-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype, estimator_name, input_name)
159 "#estimators-that-handle-nan-values"
160 )
--> 161 raise ValueError(msg_err)
162
163
ValueError: Input X contains NaN.
IterativeImputer does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values
ليه بيظهر الخطاء ده ؟