We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e65bbf6 commit 66ef1ceCopy full SHA for 66ef1ce
ggml-metal.m
@@ -1862,9 +1862,10 @@ static enum ggml_status ggml_metal_graph_compute(
1862
// ne21 = n_rows
1863
const int dst_rows = ne20*ne21;
1864
const int dst_rows_min = n_as;
1865
+ const int dst_rows_max = (ctx->device.maxThreadgroupMemoryLength - 32 - 8192)/4;
1866
1867
// max size of the rowids array in the kernel shared buffer
- GGML_ASSERT(dst_rows <= 2048);
1868
+ GGML_ASSERT(dst_rows <= dst_rows_max);
1869
1870
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
1871
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
0 commit comments