File tree Expand file tree Collapse file tree 3 files changed +6
-0
lines changed Expand file tree Collapse file tree 3 files changed +6
-0
lines changed Original file line number Diff line number Diff line change @@ -224,6 +224,7 @@ extern "C" {
224
224
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
225
225
GGML_API ggml_backend_t ggml_backend_init_best (void );
226
226
227
+ GGML_API void ggml_backend_set_size (ggml_backend_buffer_t buffer , size_t cur_size );
227
228
// Load a backend from a dynamic library and register it
228
229
GGML_API ggml_backend_reg_t ggml_backend_load (const char * path );
229
230
// Unload a backend if loaded dynamically and unregister it
Original file line number Diff line number Diff line change @@ -536,6 +536,10 @@ struct ggml_backend_multi_buffer_context {
536
536
size_t n_buffers;
537
537
};
538
538
539
+ void ggml_backend_set_size (ggml_backend_buffer_t buffer, size_t cur_size){
540
+ buffer->size = cur_size;
541
+ }
542
+
539
543
static void ggml_backend_multi_buffer_free_buffer (ggml_backend_buffer_t buffer) {
540
544
ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context ;
541
545
for (size_t i = 0 ; i < ctx->n_buffers ; i++) {
Original file line number Diff line number Diff line change @@ -1548,6 +1548,7 @@ int32_t llama_context::output_reserve(int32_t n_outputs) {
1548
1548
std::fill (output_ids.begin (), output_ids.end (), -1 );
1549
1549
1550
1550
ggml_backend_buffer_clear (buf_output.get (), 0 );
1551
+ ggml_backend_set_size (buf_output.get (), new_size);
1551
1552
1552
1553
this ->n_outputs = 0 ;
1553
1554
this ->n_outputs_max = n_outputs_max;
You can’t perform that action at this time.
0 commit comments