@@ -4688,9 +4688,6 @@ static bool llm_load_tensors(
4688
4688
4689
4689
auto & layer = model.layers[i];
4690
4690
4691
- // TODO: what's the difference between ctx_layer and ctx_split?
4692
- // A: It seems that ctx_split is for matrices (2d???) while ctx_layer is for other things (like 1D bias and norms, probably.)
4693
-
4694
4691
// norm
4695
4692
layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd});
4696
4693
@@ -7901,7 +7898,6 @@ struct llm_build_context {
7901
7898
7902
7899
const int32_t n_tok = batch.n_tokens;
7903
7900
7904
- // hopefully the compiler does constant folding
7905
7901
const int64_t d_model = n_embd;
7906
7902
const int64_t d_inner = n_head;
7907
7903
GGML_ASSERT(2 * d_model == d_inner);
@@ -7948,8 +7944,8 @@ struct llm_build_context {
7948
7944
7949
7945
// The following tensor is too big in order to avoid an assertion error when making an overlapping view.
7950
7946
// TODO: in ggml_new_tensor_impl, handle overlapping data range in data size calculation
7951
- // This could then be a tensor with ne[] = {(d_conv-1)+n_tok, d_inner}
7952
- // which is around (d_conv-1) times as small as its current size .
7947
+ // This could then be a tensor with ne[] = {(d_conv-1)+n_tok, d_inner},
7948
+ // but the size difference is not that big (d_conv is usually 4) .
7953
7949
struct ggml_tensor * conv_x = ggml_new_tensor_1d(ctx0, conv_state->type, d_conv*d_inner*n_tok);
7954
7950
const size_t conv_x_nb1 = (d_conv - 1 + n_tok) * ggml_element_size(conv_x);
7955
7951
0 commit comments