@@ -261,7 +261,6 @@ struct cmd_params {
261
261
std::vector<bool > use_mmap;
262
262
std::vector<bool > embeddings;
263
263
std::vector<bool > no_op_offload;
264
- std::vector<bool > graph_reuse;
265
264
ggml_numa_strategy numa;
266
265
int reps;
267
266
ggml_sched_priority prio;
@@ -299,7 +298,6 @@ static const cmd_params cmd_params_defaults = {
299
298
/* use_mmap */ { true },
300
299
/* embeddings */ { false },
301
300
/* no_op_offload */ { false },
302
- /* graph_reuse */ { false },
303
301
/* numa */ GGML_NUMA_STRATEGY_DISABLED,
304
302
/* reps */ 5 ,
305
303
/* prio */ GGML_SCHED_PRIO_NORMAL,
@@ -379,7 +377,6 @@ static void print_usage(int /* argc */, char ** argv) {
379
377
printf (" -ot --override-tensors <tensor name pattern>=<buffer type>;...\n " );
380
378
printf (" (default: disabled)\n " );
381
379
printf (" -nopo, --no-op-offload <0|1> (default: 0)\n " );
382
- printf (" -gr, --graph-reuse <0|1> (default: 0)\n " );
383
380
printf (" \n " );
384
381
printf (
385
382
" Multiple values can be given for each parameter by separating them with ','\n "
@@ -623,13 +620,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
623
620
}
624
621
auto p = string_split<bool >(argv[i], split_delim);
625
622
params.no_kv_offload .insert (params.no_kv_offload .end (), p.begin (), p.end ());
626
- } else if (arg == " -gr" || arg == " --graph-reuse" ) {
627
- if (++i >= argc) {
628
- invalid_param = true ;
629
- break ;
630
- }
631
- auto p = string_split<bool >(argv[i], split_delim);
632
- params.graph_reuse .insert (params.graph_reuse .end (), p.begin (), p.end ());
633
623
} else if (arg == " --numa" ) {
634
624
if (++i >= argc) {
635
625
invalid_param = true ;
@@ -895,9 +885,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
895
885
if (params.no_op_offload .empty ()) {
896
886
params.no_op_offload = cmd_params_defaults.no_op_offload ;
897
887
}
898
- if (params.graph_reuse .empty ()) {
899
- params.graph_reuse = cmd_params_defaults.graph_reuse ;
900
- }
901
888
if (params.n_threads .empty ()) {
902
889
params.n_threads = cmd_params_defaults.n_threads ;
903
890
}
@@ -939,7 +926,6 @@ struct cmd_params_instance {
939
926
bool use_mmap;
940
927
bool embeddings;
941
928
bool no_op_offload;
942
- bool graph_reuse;
943
929
944
930
llama_model_params to_llama_mparams () const {
945
931
llama_model_params mparams = llama_model_default_params ();
@@ -1012,7 +998,6 @@ struct cmd_params_instance {
1012
998
cparams.embeddings = embeddings;
1013
999
cparams.op_offload = !no_op_offload;
1014
1000
cparams.swa_full = false ;
1015
- cparams.graph_reuse = graph_reuse;
1016
1001
1017
1002
return cparams;
1018
1003
}
@@ -1033,7 +1018,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
1033
1018
for (const auto & mmp : params.use_mmap )
1034
1019
for (const auto & embd : params.embeddings )
1035
1020
for (const auto & nopo : params.no_op_offload )
1036
- for (const auto & gr : params.graph_reuse )
1037
1021
for (const auto & nb : params.n_batch )
1038
1022
for (const auto & nub : params.n_ubatch )
1039
1023
for (const auto & tk : params.type_k )
@@ -1075,7 +1059,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
1075
1059
/* .use_mmap = */ mmp,
1076
1060
/* .embeddings = */ embd,
1077
1061
/* .no_op_offload= */ nopo,
1078
- /* .graph_reuse = */ gr,
1079
1062
};
1080
1063
instances.push_back (instance);
1081
1064
}
@@ -1109,7 +1092,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
1109
1092
/* .use_mmap = */ mmp,
1110
1093
/* .embeddings = */ embd,
1111
1094
/* .no_op_offload= */ nopo,
1112
- /* .graph_reuse = */ gr,
1113
1095
};
1114
1096
instances.push_back (instance);
1115
1097
}
@@ -1143,7 +1125,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
1143
1125
/* .use_mmap = */ mmp,
1144
1126
/* .embeddings = */ embd,
1145
1127
/* .no_op_offload= */ nopo,
1146
- /* .graph_reuse = */ gr,
1147
1128
};
1148
1129
instances.push_back (instance);
1149
1130
}
@@ -1181,7 +1162,6 @@ struct test {
1181
1162
bool use_mmap;
1182
1163
bool embeddings;
1183
1164
bool no_op_offload;
1184
- bool graph_reuse;
1185
1165
int n_prompt;
1186
1166
int n_gen;
1187
1167
int n_depth;
@@ -1217,7 +1197,6 @@ struct test {
1217
1197
use_mmap = inst.use_mmap ;
1218
1198
embeddings = inst.embeddings ;
1219
1199
no_op_offload = inst.no_op_offload ;
1220
- graph_reuse = inst.graph_reuse ;
1221
1200
n_prompt = inst.n_prompt ;
1222
1201
n_gen = inst.n_gen ;
1223
1202
n_depth = inst.n_depth ;
@@ -1264,8 +1243,8 @@ struct test {
1264
1243
" cpu_mask" , " cpu_strict" , " poll" , " type_k" , " type_v" , " n_gpu_layers" ,
1265
1244
" split_mode" , " main_gpu" , " no_kv_offload" , " flash_attn" , " tensor_split" , " tensor_buft_overrides" ,
1266
1245
" defrag_thold" ,
1267
- " use_mmap" , " embeddings" , " no_op_offload" , " graph_reuse " , " n_prompt" , " n_gen" , " n_depth" ,
1268
- " test_time " , " avg_ns" , " stddev_ns" , " avg_ts" , " stddev_ts" ,
1246
+ " use_mmap" , " embeddings" , " no_op_offload" , " n_prompt" , " n_gen" , " n_depth" , " test_time " ,
1247
+ " avg_ns" , " stddev_ns" , " avg_ts" , " stddev_ts" ,
1269
1248
};
1270
1249
return fields;
1271
1250
}
@@ -1280,7 +1259,7 @@ struct test {
1280
1259
return INT;
1281
1260
}
1282
1261
if (field == " f16_kv" || field == " no_kv_offload" || field == " cpu_strict" || field == " flash_attn" ||
1283
- field == " use_mmap" || field == " embeddings" || field == " graph_reuse " ) {
1262
+ field == " use_mmap" || field == " embeddings" ) {
1284
1263
return BOOL;
1285
1264
}
1286
1265
if (field == " avg_ts" || field == " stddev_ts" || field == " defrag_thold" ) {
@@ -1354,7 +1333,6 @@ struct test {
1354
1333
std::to_string (use_mmap),
1355
1334
std::to_string (embeddings),
1356
1335
std::to_string (no_op_offload),
1357
- std::to_string (graph_reuse),
1358
1336
std::to_string (n_prompt),
1359
1337
std::to_string (n_gen),
1360
1338
std::to_string (n_depth),
@@ -1540,9 +1518,6 @@ struct markdown_printer : public printer {
1540
1518
if (field == " no_op_offload" ) {
1541
1519
return 4 ;
1542
1520
}
1543
- if (field == " graph_reuse" ) {
1544
- return 4 ;
1545
- }
1546
1521
1547
1522
int width = std::max ((int ) field.length (), 10 );
1548
1523
@@ -1577,9 +1552,6 @@ struct markdown_printer : public printer {
1577
1552
if (field == " no_op_offload" ) {
1578
1553
return " nopo" ;
1579
1554
}
1580
- if (field == " graph_reuse" ) {
1581
- return " gr" ;
1582
- }
1583
1555
if (field == " tensor_split" ) {
1584
1556
return " ts" ;
1585
1557
}
@@ -1654,9 +1626,6 @@ struct markdown_printer : public printer {
1654
1626
if (params.no_op_offload .size () > 1 || params.no_op_offload != cmd_params_defaults.no_op_offload ) {
1655
1627
fields.emplace_back (" no_op_offload" );
1656
1628
}
1657
- if (params.graph_reuse .size () > 1 || params.graph_reuse != cmd_params_defaults.graph_reuse ) {
1658
- fields.emplace_back (" graph_reuse" );
1659
- }
1660
1629
fields.emplace_back (" test" );
1661
1630
fields.emplace_back (" t/s" );
1662
1631
0 commit comments