add llama3.1-8b config (#126)

zpcore · web-flow · commit e9d00513398d · 2025-02-27T11:17:02.000-08:00
* add llama3.1-8b config

* nit
diff --git a/torchprime/torch_xla_models/configs/model/llama-3.1-8b.yaml b/torchprime/torch_xla_models/configs/model/llama-3.1-8b.yaml
@@ -0,0 +1,27 @@
+defaults:
+  - _self_  # refers to this config file
+  - scaling: llama-fsdp  # refers to scaling/llama-fsdp.yaml
+
+model_class: llama.LlamaForCausalLM  # Used to import the model from this class
+vocab_size: 128256
+hidden_size: 4096
+intermediate_size: 14336
+num_hidden_layers: 32
+num_attention_heads: 32
+num_key_value_heads: 8
+hidden_act: silu
+max_position_embeddings: 131072
+bos_token_id: 128000
+eos_token_id: 128001
+tokenizer_name: meta-llama/Meta-Llama-3.1-8B
+initializer_range: 0.02
+rms_norm_eps: 1.0e-05
+attention_dropout: false
+attention_bias: false
+flash_attention: true
+rope_theta: 500000.0
+rope_scaling:
+  factor: 8.0
+  low_freq_factor: 1.0
+  high_freq_factor: 4.0
+  original_context_len: 8192