29
29
from cerebros .denseautomlstructuralcomponent .dense_automl_structural_component \
30
30
import zero_7_exp_decay , zero_95_exp_decay , simple_sigmoid
31
31
from ast import literal_eval
32
+ import time
33
+
32
34
33
35
#
34
36
# Load the email data
@@ -141,10 +143,14 @@ def from_config(cls, config):
141
143
metrics = ['accuracy' , tf .keras .metrics .AUC (name = 'auc' )]
142
144
)
143
145
146
+ gpt_t0 = time .time ()
147
+
148
+ print (gpt_baseline_model .summary ())
149
+
144
150
history = gpt_baseline_model .fit (
145
151
x = X_train , # Input data
146
152
y = y_train , # Labels
147
- epochs = 20 , # Number of training iterations
153
+ epochs = 4 , # Number of training iterations
148
154
batch_size = 16 , # Batch size small due to GPU memory constraints
149
155
validation_split = 0.2 , # Hold out 20% of training data for validation
150
156
shuffle = True , # Shuffle data at each epoch
@@ -164,111 +170,146 @@ def from_config(cls, config):
164
170
]
165
171
)
166
172
173
+ gpt_t1 = time .time ()
174
+ gpt_time_on_one_model_min = (gpt_t1 - gpt_t1 ) / 60
175
+
167
176
hy = history ["history" ]
168
177
hy_df = pd .DataFrame (hy )
169
178
print (hy_df )
170
179
180
+ ### Cerebros model:
171
181
182
+ # TokenizerLayer class to handle tokenization and return only token_ids
183
+ class TokenizerLayer (tf .keras .layers .Layer ):
184
+ def __init__ (self , max_seq_length , ** kwargs ):
185
+ super ().__init__ (** kwargs )
186
+ self .tokenizer = GPT2Tokenizer .from_preset ("gpt2_base_en" )
187
+ self .preprocessor = GPT2Preprocessor (self .tokenizer , sequence_length = max_seq_length )
188
+ self .max_seq_length = max_seq_length
172
189
190
+ def call (self , inputs ):
191
+ processed = self .preprocessor (inputs ) # Accepts tensor of strings, outputs {"token_ids": ...}
192
+ return processed ["token_ids" ] # Output shape: (batch_size, max_seq_length)
173
193
194
+ def get_config (self ):
195
+ base_config = super ().get_config ()
196
+ base_config .update ({"max_seq_length" : self .max_seq_length })
197
+ return base_config
174
198
175
199
200
+ VOCAB_SIZE = GPT2Tokenizer .vocabulary_size ()
176
201
202
+ # Create cerebros_base_model
203
+ def build_cerebros_base_model (max_seq_length = 96 , embedding_dim = 256 , output_dim = VOCAB_SIZE ):
204
+ input_layer = Input (shape = (), dtype = tf .string ) # Text input
205
+ token_ids = TokenizerLayer (max_seq_length )(input_layer )
206
+ # Build embedding layer with GPT2 tokenizer's vocabulary size (50257 for GPT2Base)
207
+ embedded = tf .keras .layers .Embedding (
208
+ input_dim = GPT2Tokenizer .vocabulary_size (), # Uses standard GPT-2 vocab size
209
+ output_dim = embedding_dim ,
210
+ mask_zero = True , # Handle <PAD> tokens
211
+ name = "custom_embedding"
212
+ )(token_ids )
213
+
214
+ # Flatten for downstream models
215
+ flattened = Flatten ()(embedded )
216
+ model = Model (inputs = input_layer , outputs = flattened )
217
+ return model
177
218
219
+ # Example usage (outputs depend on parameters, set embedding_dim as desired)
220
+ cerebros_base_model = build_cerebros_base_model (max_seq_length = 96 )
178
221
179
222
223
+ """### Cerebros search for the best model"""
180
224
225
+ #
226
+ # Cerebros configurables
227
+ #
228
+ activation = 'gelu'
229
+ predecessor_level_connection_affinity_factor_first = 49.9999
230
+ predecessor_level_connection_affinity_factor_main = 0.31456
231
+ max_consecutive_lateral_connections = 22
232
+ p_lateral_connection = 0.39256
233
+ num_lateral_connection_tries_per_unit = 10
234
+ learning_rate = 0.0000511065
235
+ epochs = 6 # [1, 100]
236
+ batch_size = 13
237
+ maximum_levels = 4 # [3,7]
238
+ maximum_units_per_level = 8 # [2,10]
239
+ maximum_neurons_per_unit = 5 # [2,20]
240
+ moities_to_try = 2
241
+ tries_per_moity = 1
181
242
182
-
183
- # base_model = Model(inputs=input_layer, outputs=gpt2_layer)
184
- # base_model.summary()
185
-
186
-
187
-
188
-
189
-
190
-
191
-
192
-
193
- # """### Cerebros search for the best model"""
194
-
195
- # #
196
- # # Cerebros configurables
197
- # #
198
- # activation = 'gelu'
199
- # predecessor_level_connection_affinity_factor_first = 49.9999
200
- # predecessor_level_connection_affinity_factor_main = 0.31456
201
- # max_consecutive_lateral_connections = 22
202
- # p_lateral_connection = 0.39256
203
- # num_lateral_connection_tries_per_unit = 10
204
- # learning_rate = 0.0000511065
205
- # epochs = 6 # [1, 100]
206
- # batch_size = 13
207
- # maximum_levels = 4 # [3,7]
208
- # maximum_units_per_level = 8 # [2,10]
209
- # maximum_neurons_per_unit = 5 # [2,20]
210
-
211
- # #
212
- # # Logging
213
- # #
214
- # TIME = pendulum.now(tz='America/New_York').__str__()[:16]\
215
- # .replace('T', '_')\
216
- # .replace(':', '_')\
217
- # .replace('-', '_')
218
- # PROJECT_NAME = f'{TIME}_cerebros_auto_ml_phishing_email_test'
219
-
220
- # meta_trial_number = 42 # irrelevant unless in distributed training
221
-
222
- # cerebros_automl = SimpleCerebrosRandomSearch(
223
- # unit_type=DenseUnit,
224
- # input_shapes=INPUT_SHAPES,
225
- # output_shapes=OUTPUT_SHAPES,
226
- # training_data=training_x,
227
- # labels=train_labels,
228
- # validation_split=0.35,
229
- # direction='maximize',
230
- # metric_to_rank_by="val_binary_accuracy",
231
- # minimum_levels=2,
232
- # maximum_levels=maximum_levels,
233
- # minimum_units_per_level=1,
234
- # maximum_units_per_level=maximum_units_per_level,
235
- # minimum_neurons_per_unit=1,
236
- # maximum_neurons_per_unit=maximum_neurons_per_unit,
237
- # activation=activation,
238
- # final_activation='sigmoid',
239
- # number_of_architecture_moities_to_try=2,
240
- # number_of_tries_per_architecture_moity=1,
241
- # minimum_skip_connection_depth=1,
242
- # maximum_skip_connection_depth=7,
243
- # predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first,
244
- # predecessor_level_connection_affinity_factor_first_rounding_rule='ceil',
245
- # predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main,
246
- # predecessor_level_connection_affinity_factor_main_rounding_rule='ceil',
247
- # predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay,
248
- # seed=8675309,
249
- # max_consecutive_lateral_connections=max_consecutive_lateral_connections,
250
- # gate_after_n_lateral_connections=3,
251
- # gate_activation_function=simple_sigmoid,
252
- # p_lateral_connection=p_lateral_connection,
253
- # p_lateral_connection_decay=zero_95_exp_decay,
254
- # num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit,
255
- # learning_rate=learning_rate,
256
- # loss=tf.keras.losses.CategoricalHinge(),
257
- # metrics=[tf.keras.metrics.BinaryAccuracy(),
258
- # tf.keras.metrics.Precision(),
259
- # tf.keras.metrics.Recall()],
260
- # epochs=epochs,
261
- # project_name=f"{PROJECT_NAME}_meta_{meta_trial_number}",
262
- # model_graphs='model_graphs',
263
- # batch_size=batch_size,
264
- # meta_trial_number=meta_trial_number,
265
- # base_models=[base_model],
266
- # train_data_dtype=tf.string)
267
-
268
- # result = cerebros_automl.run_random_search()
269
-
270
- # print(f'Best accuracy achieved is {result}')
271
- # print(f'binary accuracy')
243
+ #
244
+ # Logging
245
+ #
246
+ TIME = pendulum .now (tz = 'America/New_York' ).__str__ ()[:16 ]\
247
+ .replace ('T' , '_' )\
248
+ .replace (':' , '_' )\
249
+ .replace ('-' , '_' )
250
+ PROJECT_NAME = f'{ TIME } _cerebros_auto_ml_phishing_email_test'
251
+
252
+ meta_trial_number = 42 # irrelevant unless in distributed training
253
+
254
+
255
+ cerebros_automl = SimpleCerebrosRandomSearch (
256
+ unit_type = DenseUnit ,
257
+ input_shapes = INPUT_SHAPES ,
258
+ output_shapes = OUTPUT_SHAPES ,
259
+ training_data = training_x ,
260
+ labels = train_labels ,
261
+ validation_split = 0.35 ,
262
+ direction = 'maximize' ,
263
+ metric_to_rank_by = "val_accuracy" ,
264
+ minimum_levels = 2 ,
265
+ maximum_levels = maximum_levels ,
266
+ minimum_units_per_level = 1 ,
267
+ maximum_units_per_level = maximum_units_per_level ,
268
+ minimum_neurons_per_unit = 1 ,
269
+ maximum_neurons_per_unit = maximum_neurons_per_unit ,
270
+ activation = activation ,
271
+ final_activation = 'sigmoid' ,
272
+ number_of_architecture_moities_to_try = moities_to_try ,
273
+ number_of_tries_per_architecture_moity = tries_per_moity ,
274
+ minimum_skip_connection_depth = 1 ,
275
+ maximum_skip_connection_depth = 7 ,
276
+ predecessor_level_connection_affinity_factor_first = predecessor_level_connection_affinity_factor_first ,
277
+ predecessor_level_connection_affinity_factor_first_rounding_rule = 'ceil' ,
278
+ predecessor_level_connection_affinity_factor_main = predecessor_level_connection_affinity_factor_main ,
279
+ predecessor_level_connection_affinity_factor_main_rounding_rule = 'ceil' ,
280
+ predecessor_level_connection_affinity_factor_decay_main = zero_7_exp_decay ,
281
+ seed = 8675309 ,
282
+ max_consecutive_lateral_connections = max_consecutive_lateral_connections ,
283
+ gate_after_n_lateral_connections = 3 ,
284
+ gate_activation_function = simple_sigmoid ,
285
+ p_lateral_connection = p_lateral_connection ,
286
+ p_lateral_connection_decay = zero_95_exp_decay ,
287
+ num_lateral_connection_tries_per_unit = num_lateral_connection_tries_per_unit ,
288
+ learning_rate = learning_rate ,
289
+ loss = tf .keras .losses .CategoricalHinge (),
290
+ metrics = [tf .keras .metrics .Accuracy (),
291
+ tf .keras .metrics .Precision (),
292
+ tf .keras .metrics .Recall ()],
293
+ epochs = epochs ,
294
+ project_name = f"{ PROJECT_NAME } _meta_{ meta_trial_number } " ,
295
+ model_graphs = 'model_graphs' ,
296
+ batch_size = batch_size ,
297
+ meta_trial_number = meta_trial_number ,
298
+ base_models = [cerebros_base_model ],
299
+ train_data_dtype = tf .string )
300
+
301
+ cerebros_t0 = time .time ()
302
+ result = cerebros_automl .run_random_search ()
303
+ cerebros_t1 = time .time ()
304
+ cerebros_time_all_models_min = (cerebros_t1 - cerebros_t0 ) / 60
305
+ cerebros_time_per_model = cerebros_time_all_models_min / (moities_to_try * tries_per_moity )
306
+
307
+ print (f"Cerebros trained 2 models FROM A COLD START in ONLY { cerebros_time_all_models_min } min. Cerebros took only { cerebros_time_per_model } minutes on average per model." )
308
+ print (f"GPT2 took { gpt_time_on_one_model_min } just to FINE TUNE one PRE - TRAINED model. Although this is a small scale test, this shows the advantage of scaling in ON timing VS ON**2 timing." )
309
+
310
+
311
+ print (f'Cerebros best accuracy achieved is { result } ' )
312
+ print (f'val set accuracy' )
272
313
273
314
# """### Testing the best model found"""
274
315
0 commit comments