65
65
#
66
66
# Tensors for training data and labels
67
67
#
68
- training_x = [tf .constant (X_train )]
69
- train_labels = [tf .constant (y_train )]
68
+
69
+ # Training data for baseline model
70
+ baseline_train_x = tf .constant (X_train )
71
+ baseline_train_y = tf .constant (y_train )
72
+
73
+ # Packaged for Cerebros (multimodal, takes inputs as a list)
74
+ training_x = [baseline_train_x ]
75
+ train_labels = [baseline_train_y ]
76
+
70
77
#
71
78
# Input and output shapes
72
79
#
@@ -90,9 +97,9 @@ def __init__(self, max_seq_length, **kwargs):
90
97
# Set whether the GPT2 model's layers are trainable
91
98
#self.encoder.trainable = False
92
99
for layer in self .encoder .layers :
93
- layer .trainable = False
100
+ layer .trainable = True
94
101
#
95
- self .encoder .layers [- 2 ].trainable = True
102
+ # self.encoder.layers[-2].trainable = True
96
103
#
97
104
# Set the maximum sequence length for tokenization
98
105
self .max_seq_length = max_seq_length
@@ -121,101 +128,156 @@ def from_config(cls, config):
121
128
# GPT2 configurables
122
129
max_seq_length = 96
123
130
124
- # Base model
131
+ # GPT Baseline Model
125
132
input_layer = Input (shape = (), dtype = tf .string )
126
133
gpt2_layer = GPT2Layer (max_seq_length )(input_layer )
127
134
#output = Flatten()(gpt2_layer)
128
- base_model = Model (inputs = input_layer , outputs = gpt2_layer )
129
- base_model .summary ()
130
-
131
- """### Cerebros search for the best model"""
132
-
133
- #
134
- # Cerebros configurables
135
- #
136
- activation = 'gelu'
137
- predecessor_level_connection_affinity_factor_first = 49.9999
138
- predecessor_level_connection_affinity_factor_main = 0.31456
139
- max_consecutive_lateral_connections = 22
140
- p_lateral_connection = 0.39256
141
- num_lateral_connection_tries_per_unit = 10
142
- learning_rate = 0.0000511065
143
- epochs = 6 # [1, 100]
144
- batch_size = 13
145
- maximum_levels = 4 # [3,7]
146
- maximum_units_per_level = 8 # [2,10]
147
- maximum_neurons_per_unit = 5 # [2,20]
148
-
149
- #
150
- # Logging
151
- #
152
- TIME = pendulum .now (tz = 'America/New_York' ).__str__ ()[:16 ]\
153
- .replace ('T' , '_' )\
154
- .replace (':' , '_' )\
155
- .replace ('-' , '_' )
156
- PROJECT_NAME = f'{ TIME } _cerebros_auto_ml_phishing_email_test'
157
-
158
- meta_trial_number = 42 # irrelevant unless in distributed training
159
-
160
- cerebros_automl = SimpleCerebrosRandomSearch (
161
- unit_type = DenseUnit ,
162
- input_shapes = INPUT_SHAPES ,
163
- output_shapes = OUTPUT_SHAPES ,
164
- training_data = training_x ,
165
- labels = train_labels ,
166
- validation_split = 0.35 ,
167
- direction = 'maximize' ,
168
- metric_to_rank_by = "val_binary_accuracy" ,
169
- minimum_levels = 2 ,
170
- maximum_levels = maximum_levels ,
171
- minimum_units_per_level = 1 ,
172
- maximum_units_per_level = maximum_units_per_level ,
173
- minimum_neurons_per_unit = 1 ,
174
- maximum_neurons_per_unit = maximum_neurons_per_unit ,
175
- activation = activation ,
176
- final_activation = 'sigmoid' ,
177
- number_of_architecture_moities_to_try = 2 ,
178
- number_of_tries_per_architecture_moity = 1 ,
179
- minimum_skip_connection_depth = 1 ,
180
- maximum_skip_connection_depth = 7 ,
181
- predecessor_level_connection_affinity_factor_first = predecessor_level_connection_affinity_factor_first ,
182
- predecessor_level_connection_affinity_factor_first_rounding_rule = 'ceil' ,
183
- predecessor_level_connection_affinity_factor_main = predecessor_level_connection_affinity_factor_main ,
184
- predecessor_level_connection_affinity_factor_main_rounding_rule = 'ceil' ,
185
- predecessor_level_connection_affinity_factor_decay_main = zero_7_exp_decay ,
186
- seed = 8675309 ,
187
- max_consecutive_lateral_connections = max_consecutive_lateral_connections ,
188
- gate_after_n_lateral_connections = 3 ,
189
- gate_activation_function = simple_sigmoid ,
190
- p_lateral_connection = p_lateral_connection ,
191
- p_lateral_connection_decay = zero_95_exp_decay ,
192
- num_lateral_connection_tries_per_unit = num_lateral_connection_tries_per_unit ,
193
- learning_rate = learning_rate ,
194
- loss = tf .keras .losses .CategoricalHinge (),
195
- metrics = [tf .keras .metrics .BinaryAccuracy (),
196
- tf .keras .metrics .Precision (),
197
- tf .keras .metrics .Recall ()],
198
- epochs = epochs ,
199
- project_name = f"{ PROJECT_NAME } _meta_{ meta_trial_number } " ,
200
- model_graphs = 'model_graphs' ,
201
- batch_size = batch_size ,
202
- meta_trial_number = meta_trial_number ,
203
- base_models = [base_model ],
204
- train_data_dtype = tf .string )
205
-
206
- result = cerebros_automl .run_random_search ()
207
-
208
- print (f'Best accuracy achieved is { result } ' )
209
- print (f'binary accuracy' )
210
-
211
- """### Testing the best model found"""
212
-
213
- #
214
- # Load the best model (taking into account that it has a custom layer)
215
- #
216
- best_model_found = \
217
- tf .keras .models .load_model (cerebros_automl .best_model_path ,\
218
- custom_objects = {'GPT2Layer' : GPT2Layer (max_seq_length )})
219
-
220
- print ('Evaluating on the test dataset' )
221
- best_model_found .evaluate (X_test , y_test )
135
+ binary_output = tf .keras .layers .Dense (1 , activation = 'sigmoid' )(gpt2_layer )
136
+ gpt_baseline_model = Model (inputs = input_layer , outputs = binary_output )
137
+
138
+ gpt_baseline_model .compile (
139
+ optimizer = Adam (learning_rate = 1e-4 ), # Small LR since we're fine-tuning GPT
140
+ loss = 'binary_crossentropy' ,
141
+ metrics = ['accuracy' , tf .keras .metrics .AUC (name = 'auc' )]
142
+ )
143
+
144
+ history = gpt_baseline_model .fit (
145
+ x = X_train , # Input data
146
+ y = y_train , # Labels
147
+ epochs = 20 , # Number of training iterations
148
+ batch_size = 16 , # Batch size small due to GPU memory constraints
149
+ validation_split = 0.2 , # Hold out 20% of training data for validation
150
+ shuffle = True , # Shuffle data at each epoch
151
+ callbacks = [
152
+ tf .keras .callbacks .EarlyStopping (
153
+ monitor = 'val_loss' ,
154
+ patience = 3 ,
155
+ restore_best_weights = True ,
156
+ min_delta = 0.001
157
+ ),
158
+ tf .keras .callbacks .ReduceLROnPlateau (
159
+ monitor = 'val_loss' ,
160
+ factor = 0.2 ,
161
+ patience = 2 ,
162
+ min_lr = 1e-6
163
+ )
164
+ ]
165
+ )
166
+
167
+ hy = history ["history" ]
168
+ hy_df = pd .DataFrame (hy )
169
+ print (hy_df )
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
+
183
+ # base_model = Model(inputs=input_layer, outputs=gpt2_layer)
184
+ # base_model.summary()
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+
193
+ # """### Cerebros search for the best model"""
194
+
195
+ # #
196
+ # # Cerebros configurables
197
+ # #
198
+ # activation = 'gelu'
199
+ # predecessor_level_connection_affinity_factor_first = 49.9999
200
+ # predecessor_level_connection_affinity_factor_main = 0.31456
201
+ # max_consecutive_lateral_connections = 22
202
+ # p_lateral_connection = 0.39256
203
+ # num_lateral_connection_tries_per_unit = 10
204
+ # learning_rate = 0.0000511065
205
+ # epochs = 6 # [1, 100]
206
+ # batch_size = 13
207
+ # maximum_levels = 4 # [3,7]
208
+ # maximum_units_per_level = 8 # [2,10]
209
+ # maximum_neurons_per_unit = 5 # [2,20]
210
+
211
+ # #
212
+ # # Logging
213
+ # #
214
+ # TIME = pendulum.now(tz='America/New_York').__str__()[:16]\
215
+ # .replace('T', '_')\
216
+ # .replace(':', '_')\
217
+ # .replace('-', '_')
218
+ # PROJECT_NAME = f'{TIME}_cerebros_auto_ml_phishing_email_test'
219
+
220
+ # meta_trial_number = 42 # irrelevant unless in distributed training
221
+
222
+ # cerebros_automl = SimpleCerebrosRandomSearch(
223
+ # unit_type=DenseUnit,
224
+ # input_shapes=INPUT_SHAPES,
225
+ # output_shapes=OUTPUT_SHAPES,
226
+ # training_data=training_x,
227
+ # labels=train_labels,
228
+ # validation_split=0.35,
229
+ # direction='maximize',
230
+ # metric_to_rank_by="val_binary_accuracy",
231
+ # minimum_levels=2,
232
+ # maximum_levels=maximum_levels,
233
+ # minimum_units_per_level=1,
234
+ # maximum_units_per_level=maximum_units_per_level,
235
+ # minimum_neurons_per_unit=1,
236
+ # maximum_neurons_per_unit=maximum_neurons_per_unit,
237
+ # activation=activation,
238
+ # final_activation='sigmoid',
239
+ # number_of_architecture_moities_to_try=2,
240
+ # number_of_tries_per_architecture_moity=1,
241
+ # minimum_skip_connection_depth=1,
242
+ # maximum_skip_connection_depth=7,
243
+ # predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first,
244
+ # predecessor_level_connection_affinity_factor_first_rounding_rule='ceil',
245
+ # predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main,
246
+ # predecessor_level_connection_affinity_factor_main_rounding_rule='ceil',
247
+ # predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay,
248
+ # seed=8675309,
249
+ # max_consecutive_lateral_connections=max_consecutive_lateral_connections,
250
+ # gate_after_n_lateral_connections=3,
251
+ # gate_activation_function=simple_sigmoid,
252
+ # p_lateral_connection=p_lateral_connection,
253
+ # p_lateral_connection_decay=zero_95_exp_decay,
254
+ # num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit,
255
+ # learning_rate=learning_rate,
256
+ # loss=tf.keras.losses.CategoricalHinge(),
257
+ # metrics=[tf.keras.metrics.BinaryAccuracy(),
258
+ # tf.keras.metrics.Precision(),
259
+ # tf.keras.metrics.Recall()],
260
+ # epochs=epochs,
261
+ # project_name=f"{PROJECT_NAME}_meta_{meta_trial_number}",
262
+ # model_graphs='model_graphs',
263
+ # batch_size=batch_size,
264
+ # meta_trial_number=meta_trial_number,
265
+ # base_models=[base_model],
266
+ # train_data_dtype=tf.string)
267
+
268
+ # result = cerebros_automl.run_random_search()
269
+
270
+ # print(f'Best accuracy achieved is {result}')
271
+ # print(f'binary accuracy')
272
+
273
+ # """### Testing the best model found"""
274
+
275
+ # #
276
+ # # Load the best model (taking into account that it has a custom layer)
277
+ # #
278
+ # best_model_found =\
279
+ # tf.keras.models.load_model(cerebros_automl.best_model_path,\
280
+ # custom_objects={'GPT2Layer': GPT2Layer(max_seq_length)})
281
+
282
+ # print('Evaluating on the test dataset')
283
+ # best_model_found.evaluate(X_test, y_test)
0 commit comments