|
18 | 18 |
|
19 | 19 | /* Constants */
|
20 | 20 |
|
21 |
| -// TODO: find a better way to get the memory available |
22 |
| -#define WEBGPU_MAX_BUFFERS 32 |
23 |
| - |
24 | 21 | #define WEBGPU_MUL_MAT_WG_SIZE 64
|
25 | 22 | #define WEBGPU_MUL_MAT_PARAMS_SIZE (13 * sizeof(uint32_t)) // M, N, K, batch sizes, broadcasts
|
26 | 23 | #define WEBGPU_CPY_PARAMS_SIZE (15 * sizeof(uint32_t)) // strides and offsets
|
@@ -119,7 +116,7 @@ static void ggml_webgpu_create_pipeline(wgpu::Device &device, wgpu::ComputePipel
|
119 | 116 | pipeline_desc.label = label;
|
120 | 117 | pipeline_desc.compute.module = shader_module;
|
121 | 118 | pipeline_desc.compute.entryPoint = "main"; // Entry point in the WGSL code
|
122 |
| - pipeline_desc.layout = nullptr; // Guessing that nullptr means auto layout |
| 119 | + pipeline_desc.layout = nullptr; // nullptr means auto layout |
123 | 120 | if (constants.size() > 0) {
|
124 | 121 | pipeline_desc.compute.constants = constants.data();
|
125 | 122 | pipeline_desc.compute.constantCount = constants.size();
|
@@ -199,7 +196,6 @@ static void ggml_backend_webgpu_buffer_memset(webgpu_context ctx, wgpu::Buffer b
|
199 | 196 | pass.End();
|
200 | 197 | wgpu::CommandBuffer commands = encoder.Finish();
|
201 | 198 |
|
202 |
| - // TODO, async, do we need to wait on this? |
203 | 199 | ctx->queue.Submit(1, &commands);
|
204 | 200 | }
|
205 | 201 |
|
@@ -489,7 +485,6 @@ static void ggml_backend_webgpu_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
489 | 485 |
|
490 | 486 | size_t total_offset = webgpu_tensor_offset(tensor) + tensor->view_offs + offset;
|
491 | 487 |
|
492 |
| - // TODO: wait on this? |
493 | 488 | webgpu_ctx->queue.WriteBuffer(buf_ctx->buffer, total_offset, data, (size/4)*4);
|
494 | 489 |
|
495 | 490 | if (size % 4 != 0) {
|
@@ -617,9 +612,9 @@ static const char * ggml_backend_webgpu_device_get_description(ggml_backend_dev_
|
617 | 612 |
|
618 | 613 | static void ggml_backend_webgpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
619 | 614 | ggml_backend_webgpu_device_context * ctx = static_cast<ggml_backend_webgpu_device_context *>(dev->context);
|
620 |
| - // TODO: what do we actually want to return here? |
621 |
| - *free = ctx->webgpu_ctx->limits.maxBufferSize * WEBGPU_MAX_BUFFERS; |
622 |
| - *total = ctx->webgpu_ctx->limits.maxBufferSize * WEBGPU_MAX_BUFFERS; |
| 615 | + // TODO: what do we actually want to return here? maxBufferSize might not be the full available memory. |
| 616 | + *free = ctx->webgpu_ctx->limits.maxBufferSize; |
| 617 | + *total = ctx->webgpu_ctx->limits.maxBufferSize; |
623 | 618 | }
|
624 | 619 |
|
625 | 620 | static enum ggml_backend_dev_type ggml_backend_webgpu_device_get_type(ggml_backend_dev_t dev) {
|
|
0 commit comments