Updated

djthorpe · djthorpe · commit 1864caa26660 · 2024-08-09T12:09:46.000+02:00
diff --git a/Makefile b/Makefile
@@ -17,6 +17,15 @@ DOCKER_TAG_BASE_BUILD="${DOCKER_REGISTRY}/cuda-dev-${OS}-${ARCH}:${VERSION}"
 DOCKER_TAG_BASE_RUNTIME="${DOCKER_REGISTRY}/cuda-rt-${OS}-${ARCH}:${VERSION}"
 DOCKER_TAG_LLAMACPP="${DOCKER_REGISTRY}/llamacpp-${OS}-${ARCH}:${VERSION}"
 
+# ONNXRuntime flags 
+ONNXRUNTIME_FLAGS := --config Release --build_shared_lib
+
+# CUDA
+ifdef CUDA_HOME
+  GGML_CUDA := 1
+  ONNXRUNTIME_FLAGS += --use_cuda --cuda_home=${CUDA_HOME} --cudnn_home=${CUDA_HOME}
+endif
+
 # Base images for building and running CUDA containers
 docker-base: docker-dep
 	@echo "Building ${DOCKER_TAG_BASE_BUILD}"
@@ -49,7 +58,11 @@ llamacpp: submodule-checkout
 
 onnxruntime: submodule-checkout
 	@echo "Building onnxruntime"
-	@cd onnxruntime && ./build.sh --config Release --build_shared_lib --parallel --compile_no_warning_as_error --skip_submodule_sync
+	@cd onnxruntime && ./build.sh \
+	  --parallel \
+	  --compile_no_warning_as_error \
+	  --skip_submodule_sync \
+	  ${ONNXRUNTIME_FLAGS}
 
 # Push docker container
 docker-push: docker-dep 
diff --git a/README.md b/README.md
@@ -24,7 +24,14 @@ You can then access the Llama server on port 8080.
 
 ## Building
 
-The following will build the docker image and push to the repository:
+To build either the llama.cpp library or the onnxruntime library:
+
+```bash
+CUDA_HOME=/usr/local/cuda make llamacpp onnxruntime
+```
+
+You can omit the CUDA_HOME environment variable if you don't want to build with CUDA support.
+The following will build a docker image and push to the repository:
 
 ```bash
 git checkout git@github.com:mutablelogic/docker-llamacpp.git