diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..4488695a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +# General Ignore +.git +.github +.vscode +.dockerignore +.gitignore +README.md +Dockerfile +docker-compose.yaml + +# Unique to distributed-llama +report +docs +examples +models diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..1074daec --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM debian:bookworm-slim + +RUN apt-get update && apt-get install -y \ + git build-essential \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY . /app +RUN make dllama && make dllama-api + +# Default ports for root node + worker node +EXPOSE 5000 +EXPOSE 9999 + +# TODO: Consider putting the binary on a smaller image layer +CMD ["./dllama"] diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 00000000..637a7be8 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,47 @@ +services: + root: + build: . + networks: + - llama-net + volumes: + - ./models:/models + depends_on: + - worker1 + - worker2 + - worker3 + ports: + - "5000:5000" + restart: on-failure + command: >- + ./dllama-api + --model /models/dllama_model_llama3.2-1b-instruct_q40.m + --tokenizer /models/dllama_tokenizer_llama3_2.t + --buffer-float-type q80 --nthreads 2 --port 5000 + --workers worker1:9999 worker2:9999 worker3:9999 + + worker1: + build: . + ports: + - "9999:9999" + networks: + - llama-net + command: > + ./dllama worker --port 9999 --nthreads 2 + + worker2: + build: . + networks: + - llama-net + command: > + ./dllama worker --port 9999 --nthreads 2 + + worker3: + build: . + networks: + - llama-net + command: > + ./dllama worker --port 9999 --nthreads 2 + +networks: + llama-net: + driver: bridge \ No newline at end of file