From 1e82ac9d6f5913d7c77fee134df9b076b221136f Mon Sep 17 00:00:00 2001 From: Javier Martinez Date: Mon, 5 Aug 2024 17:47:53 +0200 Subject: [PATCH 1/2] feat: add cuda dockerfile --- Dockerfile.local-cuda | 84 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 Dockerfile.local-cuda diff --git a/Dockerfile.local-cuda b/Dockerfile.local-cuda new file mode 100644 index 000000000..33590e03c --- /dev/null +++ b/Dockerfile.local-cuda @@ -0,0 +1,84 @@ +FROM nvidia/cuda:12.5.1-cudnn-devel-ubuntu22.04 as base + +# For tzdata +ENV DEBIAN_FRONTEND="noninteractive" TZ="Etc/UTC" + +RUN apt-get update && apt-get upgrade -y \ + && apt-get install -y git build-essential \ + python3 python3-pip python3.11-venv gcc wget \ + ocl-icd-opencl-dev opencl-headers clinfo \ + libclblast-dev libopenblas-dev \ + && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \ + && ln -sf /usr/bin/python3.11 /usr/bin/python3 \ + && python3 --version + +# Install poetry +RUN pip install pipx +RUN python3 -m pipx ensurepath +RUN pipx install poetry==1.8.3 +ENV PATH="/root/.local/bin:$PATH" +ENV PATH=".venv/bin/:$PATH" + +# Dependencies to build llama-cpp +RUN apt update && apt install -y \ + libopenblas-dev\ + ninja-build\ + build-essential\ + pkg-config\ + wget + +# https://python-poetry.org/docs/configuration/#virtualenvsin-project +ENV POETRY_VIRTUALENVS_IN_PROJECT=true + +FROM base as dependencies +WORKDIR /home/worker/app +COPY pyproject.toml poetry.lock ./ + +ARG POETRY_EXTRAS="ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant" +RUN poetry install --no-root --extras "${POETRY_EXTRAS}" + +# Enable GPU support +ENV CUDA_DOCKER_ARCH=all +ENV GGML_CUDA=1 +ENV TOKENIZERS_PARALLELISM=true +RUN CMAKE_ARGS="-DGGML_CUDA=on" \ + poetry run pip install \ + --force-reinstall \ + --no-cache-dir \ + --verbose \ + llama-cpp-python==0.2.84 \ + numpy==1.26.0 + +FROM base as app + +ENV PYTHONUNBUFFERED=1 +ENV PORT=8080 +ENV APP_ENV=prod +ENV PYTHONPATH="$PYTHONPATH:/home/worker/app/private_gpt/" +EXPOSE 8080 + +# Prepare a non-root user +# More info about how to configure UIDs and GIDs in Docker: +# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md + +# Define the User ID (UID) for the non-root user +# UID 100 is chosen to avoid conflicts with existing system users +ARG UID=1000 + +# Define the Group ID (GID) for the non-root user +# GID 65534 is often used for the 'nogroup' or 'nobody' group +ARG GID=65534 + +RUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker +WORKDIR /home/worker/app + +RUN chown worker /home/worker/app +RUN mkdir local_data && chown worker local_data +RUN mkdir models && chown worker models +COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv +COPY --chown=worker private_gpt/ private_gpt +COPY --chown=worker *.yaml ./ +COPY --chown=worker scripts/ scripts + +USER worker +ENTRYPOINT python -m private_gpt \ No newline at end of file From 23704d23ad867a182c368b52f9b30af3ab1e0196 Mon Sep 17 00:00:00 2001 From: Javier Martinez Date: Mon, 5 Aug 2024 17:48:14 +0200 Subject: [PATCH 2/2] feat: add new cuda profile --- docker-compose.yaml | 20 ++++++++++++++++++++ fern/docs/pages/quickstart/quickstart.mdx | 15 +++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index a5df4647e..3a021f8f5 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -48,6 +48,26 @@ services: profiles: - llamacpp-cpu + # Private-GPT service for the local mode (with CUDA support) + # This service builds from a local Dockerfile and runs the application in local mode. + private-gpt-llamacpp-cuda: + image: ${PGPT_IMAGE:-zylonai/private-gpt}${PGPT_TAG:-0.6.1}-llamacpp-cuda + build: + context: . + dockerfile: Dockerfile.llamacpp-cuda + volumes: + - ./local_data/:/home/worker/app/local_data + - ./models/:/home/worker/app/models + entrypoint: sh -c ".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt" + ports: + - "8001:8001" + environment: + PORT: 8001 + PGPT_PROFILES: local + HF_TOKEN: ${HF_TOKEN} + profiles: + - llamacpp-cuda + #----------------------------------- #---- Ollama services -------------- #----------------------------------- diff --git a/fern/docs/pages/quickstart/quickstart.mdx b/fern/docs/pages/quickstart/quickstart.mdx index 09877ce25..f0c05684d 100644 --- a/fern/docs/pages/quickstart/quickstart.mdx +++ b/fern/docs/pages/quickstart/quickstart.mdx @@ -82,6 +82,21 @@ HF_TOKEN= docker-compose --profile llamacpp-cpu up ``` Replace `` with your actual Hugging Face token. +#### 2. LlamaCPP CUDA + +**Description:** +This profile runs the Private-GPT services locally using `llama-cpp` and Hugging Face models. + +**Requirements:** +A **Hugging Face Token (HF_TOKEN)** is required for accessing Hugging Face models. Obtain your token following [this guide](/installation/getting-started/troubleshooting#downloading-gated-and-private-models). + +**Run:** +Start the services with your Hugging Face token using pre-built images: +```sh +HF_TOKEN= docker-compose --profile llamacpp-cuda up +``` +Replace `` with your actual Hugging Face token. + ## Building Locally If you prefer to build Docker images locally, which is useful when making changes to the codebase or the Dockerfiles, follow these steps: