Skip to content
8 changes: 8 additions & 0 deletions configs/7B_isp_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,18 @@
cur_iter=-1,
)

# cpu_offloading = dict(
# enable=True,
# num_layers=3,
# )
# selective_checkpoint = True
# selective_checkpoint_offload = False

use_fp32_norm = False
model = dict(
checkpoint=False, # The proportion of layers for activation aheckpointing, the optional value are True/False/[0-1]
num_attention_heads=NUM_ATTENTION_HEAD,
num_kv_attention_heads=NUM_KV_ATTENTION_HEAD,
embed_split_hidden=True,
vocab_size=VOCAB_SIZE,
embed_grad_scale=1,
Expand Down
Loading
Loading