@@ -469,7 +469,7 @@ def to_ggml(self) -> 'UnquantizedTensor':
469
469
470
470
def permute_part (self , n_part : int , n_head : int ) -> 'UnquantizedTensor' :
471
471
r = self .ndarray .shape [0 ] // 3
472
- return UnquantizedTensor (permute (self .ndarray [r * n_part : r * n_part + r , ...], n_head ))
472
+ return UnquantizedTensor (permute (self .ndarray [r * n_part : r * n_part + r , ...], n_head , n_head ))
473
473
474
474
def part (self , n_part : int ) -> 'UnquantizedTensor' :
475
475
r = self .ndarray .shape [0 ] // 3
@@ -952,9 +952,10 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
952
952
#tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
953
953
elif f"model.layers.{ i } .self_attn.W_pack.weight" in model :
954
954
print (f"Unpacking and permuting layer { i } " )
955
- tmp [f"model.layers.{ i } .self_attn.q_proj.weight" ] = permute_part_lazy (model [f"model.layers.{ i } .self_attn.W_pack.weight" ], 0 , params .n_head , params . n_head )
956
- tmp [f"model.layers.{ i } .self_attn.k_proj.weight" ] = permute_part_lazy (model [f"model.layers.{ i } .self_attn.W_pack.weight" ], 1 , params .n_head , params . n_head_kv )
955
+ tmp [f"model.layers.{ i } .self_attn.q_proj.weight" ] = permute_part_lazy (model [f"model.layers.{ i } .self_attn.W_pack.weight" ], 0 , params .n_head )
956
+ tmp [f"model.layers.{ i } .self_attn.k_proj.weight" ] = permute_part_lazy (model [f"model.layers.{ i } .self_attn.W_pack.weight" ], 1 , params .n_head )
957
957
tmp [f"model.layers.{ i } .self_attn.v_proj.weight" ] = part_lazy (model [f"model.layers.{ i } .self_attn.W_pack.weight" ], 2 )
958
+ del tmp [f"model.layers.{ i } .self_attn.W_pack.weight" ]
958
959
else :
959
960
break
960
961
0 commit comments