@@ -4441,9 +4441,6 @@ static bool llm_load_tensors(
4441
4441
4442
4442
auto & layer = model.layers [i];
4443
4443
4444
- // TODO: what's the difference between ctx_layer and ctx_split?
4445
- // A: It seems that ctx_split is for matrices (2d???) while ctx_layer is for other things (like 1D bias and norms, probably.)
4446
-
4447
4444
// norm
4448
4445
layer.attn_norm = ml.create_tensor (ctx_layer, tn (LLM_TENSOR_ATTN_NORM, " weight" , i), {n_embd});
4449
4446
@@ -7460,7 +7457,6 @@ struct llm_build_context {
7460
7457
7461
7458
const int32_t n_tok = batch.n_tokens ;
7462
7459
7463
- // hopefully the compiler does constant folding
7464
7460
const int64_t d_model = n_embd;
7465
7461
const int64_t d_inner = n_head;
7466
7462
GGML_ASSERT (2 * d_model == d_inner);
@@ -7507,8 +7503,8 @@ struct llm_build_context {
7507
7503
7508
7504
// The following tensor is too big in order to avoid an assertion error when making an overlapping view.
7509
7505
// TODO: in ggml_new_tensor_impl, handle overlapping data range in data size calculation
7510
- // This could then be a tensor with ne[] = {(d_conv-1)+n_tok, d_inner}
7511
- // which is around (d_conv-1) times as small as its current size .
7506
+ // This could then be a tensor with ne[] = {(d_conv-1)+n_tok, d_inner},
7507
+ // but the size difference is not that big (d_conv is usually 4) .
7512
7508
struct ggml_tensor * conv_x = ggml_new_tensor_1d (ctx0, conv_state->type , d_conv*d_inner*n_tok);
7513
7509
const size_t conv_x_nb1 = (d_conv - 1 + n_tok) * ggml_element_size (conv_x);
7514
7510
0 commit comments