هذا لكود تحليلي للبيانات:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_context('notebook')
sns.set_style('white')
import dtreeviz
# Data Preparation
# 1.Download Data and some preprocessing
training = pd.read_csv(r'C:\Users\Dell\Downloads\dataa\UNSW_NB15_training-set.csv')
testing = pd.read_csv(r'C:\Users\Dell\Downloads\dataa\UNSW_NB15_testing-set.csv')
print("training ",training.shape)
print("testing ",testing.shape)
df = pd.concat([training,testing]).drop('id',axis=1)
df = df.reset_index(drop=True)
df.head()
df.attack_cat.unique()
# Categorical data
for col in ['proto', 'service', 'state']:
df[col] = df[col].astype('category').cat.codes
df['attack_cat'] = df['attack_cat'].astype('category')
# Visualizing attacks categories
validAttacks = df[df['label']==1]['attack_cat'].value_counts()
print(validAttacks)
plt.figure(figsize = (15,8))
plt.pie(validAttacks,labels = validAttacks.index, autopct = '%1.1f%%',explode = [0,0,0,0,0,0.2,0.2,0.2,0.2,1.2])
plt.show()
from sklearn.model_selection import train_test_split
X = df.drop(columns = ['attack_cat', 'label'])
y = df['label'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=11)
feature_names = list(X.columns)
print("X_train shape: ", X_train.shape)
print("y_train shape: ", y_train.shape)
print("X_test shape: ", X_test.shape)
print("y_test shape: ", y_test.shape)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
def evaluate_anomaly_detection_models(X_train, X_test, y_test):
# Initialize models
isolation_forest = IsolationForest(contamination=0.1) # Adjust contamination based on your dataset
lof = LocalOutlierFactor(contamination=0.1) # Adjust contamination based on your dataset
# Fit models on training data
isolation_forest.fit(X_train)
lof.fit(X_train)
# Predict anomalies
if_preds = isolation_forest.predict(X_test)
lof_preds = lof.fit_predict(X_test)
المشكل هنا رغم اني استعملت افرايج f1_score,recall_score...
# Evaluate Isolation Forest
if_accuracy = accuracy_score(y_test, if_preds)
if_precision = precision_score(y_test, if_preds)
if_recall = recall_score(y_test, if_preds)
if_f1 = f1_score(y_test, if_preds,average='micro')
# Evaluate LOF
lof_accuracy = accuracy_score(y_test, lof_preds)
lof_precision = precision_score(y_test, lof_preds)
lof_recall = recall_score(y_test, lof_preds)
lof_f1 = f1_score(y_test, lof_preds,average='micro')
# Create a dictionary to store the results
results = {
'Isolation Forest': {
'Accuracy': if_accuracy,
'Precision': if_precision,
'Recall': if_recall,
'F1 Score': if_f1
},
'Local Outlier Factor': {
'Accuracy': lof_accuracy,
'Precision': lof_precision,
'Recall': lof_recall,
'F1 Score': lof_f1
}
}
return results
# Example usage:
# Assuming you have X_train, X_test, and y_test as your training and test data
results = evaluate_anomaly_detection_models(X_train, X_test, y_test)
print(results)
الجواب:
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].