ggml-org
diff --git a/‎.devops/full-cuda.Dockerfile
Lines changed: 1 addition & 1 deletion b/‎.devops/full-cuda.Dockerfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎.devops/full.Dockerfile
Lines changed: 1 addition & 1 deletion b/‎.devops/full.Dockerfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎.devops/main-cuda.Dockerfile
Lines changed: 4 additions & 1 deletion b/‎.devops/main-cuda.Dockerfile
Lines changed: 4 additions & 1 deletion
diff --git a/‎.devops/main-rocm.Dockerfile
Lines changed: 1 addition & 1 deletion b/‎.devops/main-rocm.Dockerfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎.devops/main-vulkan.Dockerfile
Lines changed: 1 addition & 1 deletion b/‎.devops/main-vulkan.Dockerfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎.devops/main.Dockerfile
Lines changed: 4 additions & 1 deletion b/‎.devops/main.Dockerfile
Lines changed: 4 additions & 1 deletion
diff --git a/‎.devops/server-cuda.Dockerfile
Lines changed: 2 additions & 2 deletions b/‎.devops/server-cuda.Dockerfile
Lines changed: 2 additions & 2 deletions
diff --git a/‎.devops/server.Dockerfile
Lines changed: 2 additions & 2 deletions b/‎.devops/server.Dockerfile
Lines changed: 2 additions & 2 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 4 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 4 additions & 0 deletions
diff --git a/‎Makefile
Lines changed: 1 addition & 0 deletions b/‎Makefile
Lines changed: 1 addition & 0 deletions
@@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
 ARG CUDA_DOCKER_ARCH=all
 
 RUN apt-get update && \
-    apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev
+    apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
 
 COPY requirements.txt   requirements.txt
 COPY requirements       requirements
 
@@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
 FROM ubuntu:$UBUNTU_VERSION as build
 
 RUN apt-get update && \
-    apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev
+    apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
 
 COPY requirements.txt   requirements.txt
 COPY requirements       requirements
 
@@ -23,10 +23,13 @@ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
 # Enable CUDA
 ENV LLAMA_CUDA=1
 
-RUN make -j$(nproc)
+RUN make -j$(nproc) main
 
 FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
 
+RUN apt-get update && \
+    apt-get install -y libgomp1
+
 COPY --from=build /app/main /main
 
 ENTRYPOINT [ "/main" ]
@@ -40,6 +40,6 @@ ENV LLAMA_HIPBLAS=1
 ENV CC=/opt/rocm/llvm/bin/clang
 ENV CXX=/opt/rocm/llvm/bin/clang++
 
-RUN make -j$(nproc)
+RUN make -j$(nproc) main
 
 ENTRYPOINT [ "/app/main" ]
@@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=jammy
 FROM ubuntu:$UBUNTU_VERSION as build
 
 # Install build tools
-RUN apt update && apt install -y git build-essential cmake wget
+RUN apt update && apt install -y git build-essential cmake wget libgomp1
 
 # Install Vulkan SDK
 RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
 
@@ -9,10 +9,13 @@ WORKDIR /app
 
 COPY . .
 
-RUN make -j$(nproc)
+RUN make -j$(nproc) main
 
 FROM ubuntu:$UBUNTU_VERSION as runtime
 
+RUN apt-get update && \
+    apt-get install -y libgomp1
+
 COPY --from=build /app/main /main
 
 ENV LC_ALL=C.utf8
 
@@ -25,12 +25,12 @@ ENV LLAMA_CUDA=1
 # Enable cURL
 ENV LLAMA_CURL=1
 
-RUN make -j$(nproc)
+RUN make -j$(nproc) server
 
 FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
 
 RUN apt-get update && \
-    apt-get install -y libcurl4-openssl-dev
+    apt-get install -y libcurl4-openssl-dev libgomp1
 
 COPY --from=build /app/server /server
 
 
@@ -11,12 +11,12 @@ COPY . .
 
 ENV LLAMA_CURL=1
 
-RUN make -j$(nproc)
+RUN make -j$(nproc) server
 
 FROM ubuntu:$UBUNTU_VERSION as runtime
 
 RUN apt-get update && \
-    apt-get install -y libcurl4-openssl-dev
+    apt-get install -y libcurl4-openssl-dev libgomp1
 
 COPY --from=build /app/server /server
 
 
@@ -416,6 +416,8 @@ if (LLAMA_CUDA)
         list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu")
         file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
         list(APPEND GGML_SOURCES_CUDA ${SRCS})
+        file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
+        list(APPEND GGML_SOURCES_CUDA ${SRCS})
 
         add_compile_definitions(GGML_USE_CUDA)
         add_compile_definitions(GGML_CUDA_USE_GRAPHS)
@@ -588,6 +590,8 @@ if (LLAMA_HIPBLAS)
     list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu")
     file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
     list(APPEND GGML_SOURCES_ROCM ${SRCS})
+    file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
+    list(APPEND GGML_SOURCES_ROCM ${SRCS})
 
     add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA)
 
 
@@ -444,6 +444,7 @@ ifdef LLAMA_CUBLAS
 endif
 
 OBJS_CUDA_TEMP_INST      = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
+OBJS_CUDA_TEMP_INST     += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
 ifdef LLAMA_CUDA_FA_ALL_QUANTS
 	OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
 else