يعني الكود ده مبتدي تمام
# Construct
# Separate features (X) and target variables (y)
x = data_train.drop(['efs', 'efs_time'], axis=1, inplace=False) # Features (all columns except 'efs' and 'efs_time')
y_event = data_train['efs'] # First target variable (event outcome)
y_time = data_train['efs_time'] # Second target variable (event time)
# Step 1: Split data into 70% training and 30% temporary set (which will be further split)
x_train, x_temp, y_event_train, y_event_temp, y_time_train, y_time_temp = train_test_split(x, y_event, y_time, test_size=0.3, random_state=42)
# Step 2: Split the temporary set into 15% validation (dev) and 15% test
x_dev, x_test, y_event_dev, y_event_test, y_time_dev, y_time_test = train_test_split(x_temp, y_event_temp, y_time_temp, test_size=0.5, random_state=42)
# Print dataset sizes for verification
#print(f"X_train: {x_train.shape}, x_dev: {x_dev.shape}, X_test: {x_test.shape}")
#print(f"y_event_train: {y_event_train.shape}, y_event_dev: {y_event_dev.shape}, y_event_test: {y_event_test.shape}")
#print(f"y_time_train: {y_time_train.shape}, y_time_dev: {y_time_dev.shape}, y_time_test: {y_time_test.shape}")
NUM_DURATIONS = 10
# Step 3: Apply standard scaling to the features to standardize the data
scaler = StandardScaler() # Initialize the scaler
x_train_scaled = scaler.fit_transform(x_train) # Fit the scaler on the training data and transform it
x_test_scaled = scaler.transform(x_test) # Transform the test data based on the scaler fit on the training data
# Define the input dimension based on the number of features in the training data
input_dim = x_train_scaled.shape[1]
# Define the input layer with the shape matching the feature dimension
inputs = keras.layers.Input(shape=(input_dim,))
# Add the first dense layer with 128 neurons and ReLU activation
# This layer processes the input data to extract complex features
x = keras.layers.Dense(128, activation='relu')(inputs)
x = keras.layers.BatchNormalization()(x)
# Add the second dense layer with 64 neurons and ReLU activation
# This further processes the output from the previous layer to capture more intricate patterns
x = keras.layers.Dense(64, activation='relu')(x)
x = keras.layers.BatchNormalization()(x)
# Add the third dense layer with 32 neurons and ReLU activation
# This layer continues refining the learned features from the previous layers
x = keras.layers.Dense(32, activation='relu')(x)
# The output layer for predicting the time-to-event intervals, using the 'softmax' activation function.
# This allows the model to predict the probability distribution over multiple time bins (e.g., different durations).
output_time = keras.layers.Dense(1, activation='relu', name='time-output')(x)
# The output layer for predicting the event outcome (e.g., whether the event occurred or was censored),
# using the 'sigmoid' activation function. This gives a probability value between 0 and 1.
output_event = keras.layers.Dense(1, activation='sigmoid', name='event-output')(x)
# Constructing the final model, which takes the 'inputs' and outputs both the time-to-event predictions
# and the event predictions. This is a multi-output model designed for survival analysis tasks.
deep_hit_model = keras.models.Model(inputs=inputs, outputs=[output_time, output_event])
# Compiling the Keras model with the specified optimizer, loss function, and metrics
deep_hit_model.compile(
optimizer = tf.keras.optimizers.AdamW(
learning_rate=0.001,
weight_decay=0.004,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-07,
amsgrad=False,
clipnorm=None,
clipvalue=None,
global_clipnorm=None,
use_ema=False,
ema_momentum=0.99,
ema_overwrite_frequency=None,
loss_scale_factor=None,
gradient_accumulation_steps=None,
name='adamw',),
loss = {"time-output": "mean_squared_error", "event-output": "mean_squared_error"},
metrics = {"time-output": "mean_absolute_error", "event-output": "mean_absolute_error"}
)
# Training the Keras model with the specified data, epochs, batch size, and callbacks
deep_hit_model.fit(
x_train_scaled,
{"time-output": y_time_train, "event-output": y_event_train},
validation_data=(x_test_scaled, {"time-output": y_time_test, "event-output": y_event_test}),
epochs=50,
batch_size=128,
callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)]
)