Skip to content

Commit 3753b30

Browse files
committed
context : fix n_outputs init
ggml-ci
1 parent f588a70 commit 3753b30

File tree

2 files changed

+5
-7
lines changed

2 files changed

+5
-7
lines changed

src/llama-context.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,14 +1274,13 @@ int32_t llama_context::output_reserve(int32_t n_outputs) {
12741274
logits = has_logits ? output_base : nullptr;
12751275
embd = has_embd ? output_base + logits_size : nullptr;
12761276

1277-
output_size = n_outputs_max;
1278-
12791277
// set all ids as invalid (negative)
12801278
std::fill(output_ids.begin(), output_ids.end(), -1);
12811279

12821280
ggml_backend_buffer_clear(buf_output.get(), 0);
12831281

1284-
n_outputs = 0;
1282+
this->n_outputs = 0;
1283+
this->n_outputs_max = n_outputs_max;
12851284

12861285
return n_outputs_max;
12871286
}
@@ -2131,7 +2130,7 @@ size_t llama_context::state_get_data(llama_io_write_i & io) {
21312130

21322131
std::vector<int32_t> w_output_pos;
21332132

2134-
GGML_ASSERT(n_outputs <= output_size);
2133+
GGML_ASSERT(n_outputs <= n_outputs_max);
21352134

21362135
w_output_pos.resize(n_outputs);
21372136

@@ -2682,7 +2681,6 @@ int llama_context_kv_self::decode(llama_batch & inp_batch) {
26822681
/* logits_all */ logits_all);
26832682

26842683
// reserve output buffer
2685-
// TODO: move to batch manager?
26862684
if (output_reserve(n_outputs_all) < n_outputs_all) {
26872685
LLAMA_LOG_ERROR("%s: could not reserve space for batch with %" PRId64 " outputs\n", __func__, n_outputs_all);
26882686
return -2;

src/llama-context.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -375,8 +375,8 @@ struct llama_context : public llama_graph_i {
375375
// populated only when pooling_type != LLAMA_POOLING_TYPE_NONE
376376
std::map<llama_seq_id, std::vector<float>> embd_seq;
377377

378-
int32_t output_size = 0; // capacity (of tokens positions) for the output buffers
379-
int32_t n_outputs = 0; // number of actually-used outputs in the current ubatch or last logical batch
378+
int32_t n_outputs = 0; // number of actually-used outputs in the current ubatch or last logical batch
379+
int32_t n_outputs_max = 0; // capacity (of tokens positions) for the output buffers
380380

381381
std::vector<int32_t> output_ids; // map batch token positions to ids of the logits and embd buffers
382382

0 commit comments

Comments
 (0)