Skip to content

Commit cf8e0a3

Browse files
authored
musa: add docker image support (#9685)
* mtgpu: add docker image support Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * mtgpu: enable docker workflow Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
1 parent c7499c5 commit cf8e0a3

File tree

6 files changed

+134
-3
lines changed

6 files changed

+134
-3
lines changed

.devops/full-musa.Dockerfile

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
ARG UBUNTU_VERSION=22.04
2+
# This needs to generally match the container host's environment.
3+
ARG MUSA_VERSION=rc3.1.0
4+
# Target the MUSA build image
5+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
7+
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
8+
9+
RUN apt-get update && \
10+
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
11+
12+
COPY requirements.txt requirements.txt
13+
COPY requirements requirements
14+
15+
RUN pip install --upgrade pip setuptools wheel \
16+
&& pip install -r requirements.txt
17+
18+
WORKDIR /app
19+
20+
COPY . .
21+
22+
RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
23+
cmake --build build --config Release -j$(nproc) && \
24+
cp build/bin/* .
25+
26+
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/llama-cli-musa.Dockerfile

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
ARG UBUNTU_VERSION=22.04
2+
# This needs to generally match the container host's environment.
3+
ARG MUSA_VERSION=rc3.1.0
4+
# Target the MUSA build image
5+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
# Target the MUSA runtime image
7+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10+
11+
RUN apt-get update && \
12+
apt-get install -y build-essential git cmake
13+
14+
WORKDIR /app
15+
16+
COPY . .
17+
18+
RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
19+
cmake --build build --config Release --target llama-cli -j$(nproc)
20+
21+
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
22+
23+
RUN apt-get update && \
24+
apt-get install -y libgomp1
25+
26+
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
27+
COPY --from=build /app/build/src/libllama.so /libllama.so
28+
COPY --from=build /app/build/bin/llama-cli /llama-cli
29+
30+
ENTRYPOINT [ "/llama-cli" ]

.devops/llama-server-musa.Dockerfile

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
ARG UBUNTU_VERSION=22.04
2+
# This needs to generally match the container host's environment.
3+
ARG MUSA_VERSION=rc3.1.0
4+
# Target the MUSA build image
5+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
# Target the MUSA runtime image
7+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10+
11+
RUN apt-get update && \
12+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
13+
14+
WORKDIR /app
15+
16+
COPY . .
17+
18+
RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
19+
cmake --build build --config Release --target llama-server -j$(nproc)
20+
21+
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
22+
23+
RUN apt-get update && \
24+
apt-get install -y libcurl4-openssl-dev libgomp1 curl
25+
26+
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
27+
COPY --from=build /app/build/src/libllama.so /libllama.so
28+
COPY --from=build /app/build/bin/llama-server /llama-server
29+
30+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
31+
ENV LLAMA_ARG_HOST=0.0.0.0
32+
33+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
34+
35+
ENTRYPOINT [ "/llama-server" ]

.github/workflows/docker.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ jobs:
4343
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
4444
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
4545
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
46+
- { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" }
47+
- { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" }
48+
- { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" }
4649
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
4750
#- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
4851
#- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }

docs/docker.md

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,11 @@ Additionally, there the following images, similar to the above:
1919
- `ghcr.io/ggerganov/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
2020
- `ghcr.io/ggerganov/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
2121
- `ghcr.io/ggerganov/llama.cpp:server-rocm`: Same as `server` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
22+
- `ghcr.io/ggerganov/llama.cpp:full-musa`: Same as `full` but compiled with MUSA support. (platforms: `linux/amd64`)
23+
- `ghcr.io/ggerganov/llama.cpp:light-musa`: Same as `light` but compiled with MUSA support. (platforms: `linux/amd64`)
24+
- `ghcr.io/ggerganov/llama.cpp:server-musa`: Same as `server` but compiled with MUSA support. (platforms: `linux/amd64`)
2225

23-
The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](../.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](../.github/workflows/docker.yml). If you need different settings (for example, a different CUDA or ROCm library, you'll need to build the images locally for now).
26+
The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](../.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](../.github/workflows/docker.yml). If you need different settings (for example, a different CUDA, ROCm or MUSA library, you'll need to build the images locally for now).
2427

2528
## Usage
2629

@@ -84,3 +87,37 @@ docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run
8487
docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
8588
docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1
8689
```
90+
91+
## Docker With MUSA
92+
93+
Assuming one has the [mt-container-toolkit](https://developer.mthreads.com/musa/native) properly installed on Linux, `muBLAS` should be accessible inside the container.
94+
95+
## Building Docker locally
96+
97+
```bash
98+
docker build -t local/llama.cpp:full-musa -f .devops/full-musa.Dockerfile .
99+
docker build -t local/llama.cpp:light-musa -f .devops/llama-cli-musa.Dockerfile .
100+
docker build -t local/llama.cpp:server-musa -f .devops/llama-server-musa.Dockerfile .
101+
```
102+
103+
You may want to pass in some different `ARGS`, depending on the MUSA environment supported by your container host, as well as the GPU architecture.
104+
105+
The defaults are:
106+
107+
- `MUSA_VERSION` set to `rc3.1.0`
108+
109+
The resulting images, are essentially the same as the non-MUSA images:
110+
111+
1. `local/llama.cpp:full-musa`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization.
112+
2. `local/llama.cpp:light-musa`: This image only includes the main executable file.
113+
3. `local/llama.cpp:server-musa`: This image only includes the server executable file.
114+
115+
## Usage
116+
117+
After building locally, Usage is similar to the non-MUSA examples, but you'll need to set `mthreads` as default Docker runtime. This can be done by executing `(cd /usr/bin/musa && sudo ./docker setup $PWD)` and verifying the changes by executing `docker info | grep mthreads` on the host machine. You will also want to use the `--n-gpu-layers` flag.
118+
119+
```bash
120+
docker run -v /path/to/models:/models local/llama.cpp:full-musa --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
121+
docker run -v /path/to/models:/models local/llama.cpp:light-musa -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
122+
docker run -v /path/to/models:/models local/llama.cpp:server-musa -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1
123+
```

ggml/src/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,8 @@ if (GGML_OPENMP)
163163
list(APPEND GGML_EXTRA_LIBS_PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
164164

165165
if (GGML_MUSA)
166-
list(APPEND GGML_EXTRA_INCLUDES "/usr/lib/llvm-10/include/openmp")
167-
list(APPEND GGML_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-10/lib/libomp.so")
166+
list(APPEND GGML_EXTRA_INCLUDES "/usr/lib/llvm-14/lib/clang/14.0.0/include")
167+
list(APPEND GGML_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-14/lib/libomp.so")
168168
endif()
169169
else()
170170
message(WARNING "OpenMP not found")

0 commit comments

Comments
 (0)