diff --git a/mergekit/_data/architectures/cohere2.json b/mergekit/_data/architectures/cohere2.json new file mode 100644 index 00000000..755e7db2 --- /dev/null +++ b/mergekit/_data/architectures/cohere2.json @@ -0,0 +1,51 @@ +{ + "model_type": "cohere2", + "architectures": [ + "Cohere2ForCausalLM" + ], + "pre_weights": [ + { + "name": "model.embed_tokens.weight", + "is_embed": true + } + ], + "post_weights": [ + { + "name": "model.norm.weight" + }, + { + "name": "lm_head.weight", + "is_embed": true, + "optional": true + } + ], + "num_layers_config_key": "num_hidden_layers", + "layer_templates": { + "weights": [ + { + "name": "model.layers.${layer_index}.self_attn.q_proj.weight" + }, + { + "name": "model.layers.${layer_index}.self_attn.k_proj.weight" + }, + { + "name": "model.layers.${layer_index}.self_attn.v_proj.weight" + }, + { + "name": "model.layers.${layer_index}.self_attn.o_proj.weight" + }, + { + "name": "model.layers.${layer_index}.mlp.gate_proj.weight" + }, + { + "name": "model.layers.${layer_index}.mlp.up_proj.weight" + }, + { + "name": "model.layers.${layer_index}.mlp.down_proj.weight" + }, + { + "name": "model.layers.${layer_index}.input_layernorm.weight" + } + ] + } +}