diff --git a/src/llama-model.cpp b/src/llama-model.cpp index e424350bdd783..4468c837f1c28 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -4555,7 +4555,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) { // output output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {hidden_size, n_vocab}, TENSOR_NOT_REQUIRED); output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {hidden_size}, 0); - + // if output is NULL, init from the input tok embed if (output == NULL) { output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {hidden_size, n_vocab}, TENSOR_DUPLICATED);