File tree Expand file tree Collapse file tree 2 files changed +5
-7
lines changed Expand file tree Collapse file tree 2 files changed +5
-7
lines changed Original file line number Diff line number Diff line change @@ -1274,14 +1274,13 @@ int32_t llama_context::output_reserve(int32_t n_outputs) {
1274
1274
logits = has_logits ? output_base : nullptr ;
1275
1275
embd = has_embd ? output_base + logits_size : nullptr ;
1276
1276
1277
- output_size = n_outputs_max;
1278
-
1279
1277
// set all ids as invalid (negative)
1280
1278
std::fill (output_ids.begin (), output_ids.end (), -1 );
1281
1279
1282
1280
ggml_backend_buffer_clear (buf_output.get (), 0 );
1283
1281
1284
- n_outputs = 0 ;
1282
+ this ->n_outputs = 0 ;
1283
+ this ->n_outputs_max = n_outputs_max;
1285
1284
1286
1285
return n_outputs_max;
1287
1286
}
@@ -2131,7 +2130,7 @@ size_t llama_context::state_get_data(llama_io_write_i & io) {
2131
2130
2132
2131
std::vector<int32_t > w_output_pos;
2133
2132
2134
- GGML_ASSERT (n_outputs <= output_size );
2133
+ GGML_ASSERT (n_outputs <= n_outputs_max );
2135
2134
2136
2135
w_output_pos.resize (n_outputs);
2137
2136
@@ -2682,7 +2681,6 @@ int llama_context_kv_self::decode(llama_batch & inp_batch) {
2682
2681
/* logits_all */ logits_all);
2683
2682
2684
2683
// reserve output buffer
2685
- // TODO: move to batch manager?
2686
2684
if (output_reserve (n_outputs_all) < n_outputs_all) {
2687
2685
LLAMA_LOG_ERROR (" %s: could not reserve space for batch with %" PRId64 " outputs\n " , __func__, n_outputs_all);
2688
2686
return -2 ;
Original file line number Diff line number Diff line change @@ -375,8 +375,8 @@ struct llama_context : public llama_graph_i {
375
375
// populated only when pooling_type != LLAMA_POOLING_TYPE_NONE
376
376
std::map<llama_seq_id, std::vector<float >> embd_seq;
377
377
378
- int32_t output_size = 0 ; // capacity ( of tokens positions) for the output buffers
379
- int32_t n_outputs = 0 ; // number of actually-used outputs in the current ubatch or last logical batch
378
+ int32_t n_outputs = 0 ; // number of actually-used outputs in the current ubatch or last logical batch
379
+ int32_t n_outputs_max = 0 ; // capacity ( of tokens positions) for the output buffers
380
380
381
381
std::vector<int32_t > output_ids; // map batch token positions to ids of the logits and embd buffers
382
382
You can’t perform that action at this time.
0 commit comments