Skip to content

Commit c768e54

Browse files
authored
musa: upgrade musa sdk to rc4.2.0 (#13)
* musa: upgrade musa sdk to 4.2.0 Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * musa: remove mudnn and apply wa for libmusa.so Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * musa: restore rc in docker image tag Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
1 parent 994a860 commit c768e54

File tree

5 files changed

+33
-14
lines changed

5 files changed

+33
-14
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ if(MUSAToolkit_FOUND)
159159
install(TARGETS ggml-musa
160160
RUNTIME_DEPENDENCIES
161161
DIRECTORIES ${MUSAToolkit_BIN_DIR} ${MUSAToolkit_LIBRARY_DIR}
162-
PRE_INCLUDE_REGEXES mudnn mublas musart musa
162+
PRE_INCLUDE_REGEXES mublas musart musa
163163
PRE_EXCLUDE_REGEXES ".*"
164164
RUNTIME DESTINATION ${OLLAMA_MUSA_INSTALL_DIR} COMPONENT MUSA
165165
LIBRARY DESTINATION ${OLLAMA_MUSA_INSTALL_DIR} COMPONENT MUSA

Dockerfile

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ ARG ROCMVERSION=6.3.3
66
ARG JETPACK5VERSION=r35.4.1
77
ARG JETPACK6VERSION=r36.4.0
88
ARG CMAKEVERSION=3.31.2
9-
ARG MUSAVERSION=rc4.0.1
9+
ARG MUSAVERSION=rc4.2.0
10+
ARG UBUNTUVERSION=22.04
1011

1112
# We require gcc v10 minimum. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
1213
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
@@ -103,7 +104,7 @@ FROM scratch AS rocm
103104
COPY --from=rocm-6 dist/lib/ollama /lib/ollama
104105

105106
# Moore Threads (MUSA) build stages
106-
FROM mthreads/musa:${MUSAVERSION}-mudnn-devel-ubuntu22.04 AS musa-4
107+
FROM mthreads/musa:${MUSAVERSION}-devel-ubuntu${UBUNTUVERSION}-amd64 AS musa-4
107108
RUN apt-get update \
108109
&& apt-get install -y curl \
109110
&& apt-get clean \
@@ -117,6 +118,14 @@ RUN --mount=type=cache,target=/root/.ccache \
117118
cmake --preset 'MUSA 4' \
118119
&& cmake --build --parallel --preset 'MUSA 4' \
119120
&& cmake --install build --component MUSA --strip --parallel 8
121+
# TODO: Remove the following lines in next release
122+
RUN cd dist/lib/ollama/musa_v4 && \
123+
for f in libmusa.so.*; do \
124+
if [ -f "$f" ] && [[ "$f" =~ ^libmusa\.so\.[0-9]+$ ]]; then \
125+
ln -sf "$f" libmusa.so; \
126+
break; \
127+
fi; \
128+
done
120129

121130
FROM scratch AS musa
122131
COPY --from=musa-4 dist/lib/ollama/musa_v4 /lib/ollama/musa_v4
@@ -125,7 +134,7 @@ FROM ${FLAVOR} AS archive
125134
COPY --from=cpu dist/lib/ollama /lib/ollama
126135
COPY --from=build /bin/ollama /bin/ollama
127136

128-
FROM ubuntu:22.04
137+
FROM ubuntu:${UBUNTUVERSION}
129138
RUN apt-get update \
130139
&& apt-get install -y ca-certificates \
131140
&& apt-get clean \

ml/backend/ggml/ggml/src/ggml-cuda/cpy.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#include "cpy.cuh"
22
#include "dequantize.cuh"
3-
#ifdef GGML_USE_MUSA
3+
#if defined(GGML_USE_MUSA) && defined(GGML_MUSA_MUDNN_COPY)
44
#include "ggml-musa/mudnn.cuh"
5-
#endif // GGML_USE_MUSA
5+
#endif // GGML_USE_MUSA && GGML_MUSA_MUDNN_COPY
66

77
typedef void (*cpy_kernel_t)(const char * cx, char * cdst);
88

@@ -645,11 +645,11 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
645645
#endif
646646
if (src0->type == src1->type && ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
647647
GGML_ASSERT(ggml_nbytes(src0) == ggml_nbytes(src1));
648-
#ifdef GGML_USE_MUSA
648+
#if defined(GGML_USE_MUSA) && defined(GGML_MUSA_MUDNN_COPY)
649649
if (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16) {
650650
CUDA_CHECK(mudnnMemcpyAsync(ctx, src1, src0));
651651
} else
652-
#endif // GGML_USE_MUSA
652+
#endif // GGML_USE_MUSA && GGML_MUSA_MUDNN_COPY
653653
{
654654
CUDA_CHECK(cudaMemcpyAsync(src1_ddc, src0_ddc, ggml_nbytes(src0), cudaMemcpyDeviceToDevice, main_stream));
655655
}

ml/backend/ggml/ggml/src/ggml-cuda/vendors/musa.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#define CUBLAS_OP_N MUBLAS_OP_N
1414
#define CUBLAS_OP_T MUBLAS_OP_T
1515
#define CUBLAS_STATUS_SUCCESS MUBLAS_STATUS_SUCCESS
16-
#define CUBLAS_TF32_TENSOR_OP_MATH MUBLAS_MATH_MODE_DEFAULT
16+
#define CUBLAS_TF32_TENSOR_OP_MATH MUBLAS_DEFAULT_MATH
1717
#define CUDA_R_16F MUSA_R_16F
1818
#define CUDA_R_16BF MUSA_R_16BF
1919
#define CUDA_R_32F MUSA_R_32F
@@ -29,7 +29,7 @@
2929
#define cublasSgemm mublasSgemm
3030
#define cublasStatus_t mublasStatus_t
3131
#define cublasOperation_t mublasOperation_t
32-
#define cublasGetStatusString mublasStatus_to_string
32+
#define cublasGetStatusString mublasGetStatusString
3333
#define cudaDataType_t musaDataType_t
3434
#define cudaDeviceCanAccessPeer musaDeviceCanAccessPeer
3535
#define cudaDeviceDisablePeerAccess musaDeviceDisablePeerAccess

ml/backend/ggml/ggml/src/ggml-musa/CMakeLists.txt

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,12 @@ if (MUSAToolkit_FOUND)
3434
list(APPEND GGML_SOURCES_MUSA ${SRCS})
3535
file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu")
3636
list(APPEND GGML_SOURCES_MUSA ${SRCS})
37-
file(GLOB SRCS "../ggml-musa/*.cu")
38-
list(APPEND GGML_SOURCES_MUSA ${SRCS})
37+
38+
if (GGML_MUSA_MUDNN_COPY)
39+
file(GLOB SRCS "../ggml-musa/*.cu")
40+
list(APPEND GGML_SOURCES_MUSA ${SRCS})
41+
add_compile_definitions(GGML_MUSA_MUDNN_COPY)
42+
endif()
3943

4044
if (GGML_CUDA_FA_ALL_QUANTS)
4145
file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*.cu")
@@ -97,10 +101,16 @@ if (MUSAToolkit_FOUND)
97101
endif()
98102

99103
if (GGML_STATIC)
100-
# TODO: mudnn has not provided static libraries yet
101104
target_link_libraries(ggml-musa PRIVATE MUSA::musart_static MUSA::mublas_static)
105+
# TODO: mudnn has not provided static libraries yet
106+
# if (GGML_MUSA_MUDNN_COPY)
107+
# target_link_libraries(ggml-musa PRIVATE mudnn_static)
108+
# endif()
102109
else()
103-
target_link_libraries(ggml-musa PRIVATE MUSA::musart MUSA::mublas mudnn)
110+
target_link_libraries(ggml-musa PRIVATE MUSA::musart MUSA::mublas)
111+
if (GGML_MUSA_MUDNN_COPY)
112+
target_link_libraries(ggml-musa PRIVATE mudnn)
113+
endif()
104114
endif()
105115

106116
if (GGML_CUDA_NO_VMM)

0 commit comments

Comments
 (0)