@@ -358,7 +358,7 @@ static __global__ void cpy_q_f32(const char * cx, char * cdst_direct, const int
358
358
// Copy destination pointers to GPU to be available when pointer indirection is in use
359
359
360
360
void ggml_cuda_cpy_dest_ptrs_copy (ggml_cuda_graph * cuda_graph, char ** host_dest_ptrs, const int host_dest_ptrs_size, cudaStream_t stream) {
361
- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
361
+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
362
362
if (cuda_graph->dest_ptrs_size < host_dest_ptrs_size) { // (re-)allocate GPU memory for destination pointers
363
363
CUDA_CHECK (cudaStreamSynchronize (stream));
364
364
if (cuda_graph->dest_ptrs_d != nullptr ) {
@@ -590,7 +590,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
590
590
591
591
char ** dest_ptrs_d = nullptr ;
592
592
int graph_cpynode_index = -1 ;
593
- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
593
+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
594
594
if (ctx.cuda_graph ->use_cpy_indirection && !disable_indirection_for_this_node) {
595
595
dest_ptrs_d = ctx.cuda_graph ->dest_ptrs_d ;
596
596
graph_cpynode_index = ctx.cuda_graph ->graph_cpynode_index ;
@@ -647,7 +647,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
647
647
GGML_ABORT (" %s: unsupported type combination (%s to %s)\n " , __func__,
648
648
ggml_type_name (src0->type ), ggml_type_name (src1->type ));
649
649
}
650
- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
650
+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
651
651
if (ctx.cuda_graph ->use_cpy_indirection && !disable_indirection_for_this_node) {
652
652
ctx.cuda_graph ->graph_cpynode_index = graph_cpynode_index;
653
653
}
0 commit comments