Skip to content

Commit 7411a01

Browse files
authored
Local llm mutli-gpu support (#1391)
1 parent 60d91ff commit 7411a01

File tree

8 files changed

+165
-8
lines changed

8 files changed

+165
-8
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,4 @@ COPY --from=compile-image /root/nltk_data /root/nltk_data
3636

3737
ENV PATH="/opt/venv/bin:$PATH"
3838

39-
EXPOSE 8001
39+
EXPOSE 8001

Dockerfile-gpu

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Define the CUDA SDK version you need
2+
ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
3+
FROM nvidia/cuda:${CUDA_IMAGE}
4+
5+
ENV DEBIAN_FRONTEND=noninteractive
6+
7+
WORKDIR /app
8+
9+
RUN apt-get update && apt-get upgrade -y \
10+
&& apt-get install -y git build-essential \
11+
python3 python3-pip python3.10-venv libpq-dev gcc wget \
12+
ocl-icd-opencl-dev opencl-headers clinfo \
13+
libclblast-dev libopenblas-dev \
14+
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
15+
16+
# Create a virtual environment and activate it
17+
RUN python3 -m venv /opt/venv
18+
ENV PATH="/opt/venv/bin:$PATH"
19+
20+
# Install Python dependencies from requirements.txt
21+
COPY requirements.txt .
22+
RUN pip install --upgrade pip && \
23+
pip install --no-cache-dir -r requirements.txt
24+
25+
# Running nltk setup as you mentioned
26+
RUN python3.10 -c "import nltk; nltk.download('punkt')" && \
27+
python3.10 -c "import nltk; nltk.download('averaged_perceptron_tagger')"
28+
29+
# Copy the application code
30+
COPY . .
31+
32+
ENV CUDA_DOCKER_ARCH=all
33+
ENV LLAMA_CUBLAS=1
34+
35+
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.7 --force-reinstall --upgrade --no-cache-dir
36+
37+
# Make necessary scripts executable
38+
RUN chmod +x ./entrypoint.sh ./wait-for-it.sh ./install_tool_dependencies.sh ./entrypoint_celery.sh
39+
40+
# Set environment variable to point to the custom libllama.so
41+
# ENV LLAMA_CPP_LIB=/app/llama.cpp/libllama.so
42+
43+
EXPOSE 8001
44+
45+
CMD ["./entrypoint.sh"]

README.MD

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,18 @@ cd SuperAGI
126126

127127
4. Ensure that Docker is installed on your system. You can download and install it from [here](https://docs.docker.com/get-docker/).
128128

129-
5. Once you have Docker Desktop running, run the following command in the in the SuperAGI directory :
130-
```
131-
docker-compose up --build
132-
```
129+
5. Once you have Docker Desktop running, run the following command in the SuperAGI directory:
130+
131+
a. For regular usage:
132+
```
133+
docker compose -f docker-compose.yaml up --build
134+
```
135+
136+
b. If you want to use SuperAGI with Local LLMs and have GPU, run the following command:
137+
```
138+
docker compose -f docker-compose-gpu.yml up --build
139+
```
140+
133141
134142
6. Open your web browser and navigate to http://localhost:3000 to access SuperAGI.
135143

config_template.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,4 @@ ENGINE_ID: "stable-diffusion-xl-beta-v2-2-2"
122122
## To use Qdrant for vector store
123123
#QDRANT_HOST_NAME: YOUR_QDRANT_HOST_NAME
124124
#QDRANT_PORT: YOUR_QDRANT_PORT
125+
#GPU_LAYERS: GPU LAYERS THAT YOU WANT TO OFFLOAD TO THE GPU WHILE USING LOCAL LLMS

docker-compose-gpu.yml

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
version: '3.8'
2+
services:
3+
backend:
4+
volumes:
5+
- "./:/app"
6+
- "/home/ubuntu/models/vicuna-7B-v1.5-GGUF/vicuna-7b-v1.5.Q5_K_M.gguf:/app/local_model_path"
7+
build:
8+
context: .
9+
dockerfile: Dockerfile-gpu
10+
depends_on:
11+
- super__redis
12+
- super__postgres
13+
networks:
14+
- super_network
15+
command: ["/app/wait-for-it.sh", "super__postgres:5432","-t","60","--","/app/entrypoint.sh"]
16+
deploy:
17+
resources:
18+
reservations:
19+
devices:
20+
- driver: nvidia
21+
count: all
22+
capabilities: [gpu]
23+
24+
celery:
25+
volumes:
26+
- "./:/app"
27+
- "${EXTERNAL_RESOURCE_DIR:-./workspace}:/app/ext"
28+
- "/home/ubuntu/models/vicuna-7B-v1.5-GGUF/vicuna-7b-v1.5.Q5_K_M.gguf:/app/local_model_path"
29+
build:
30+
context: .
31+
dockerfile: Dockerfile-gpu
32+
depends_on:
33+
- super__redis
34+
- super__postgres
35+
networks:
36+
- super_network
37+
command: ["/app/entrypoint_celery.sh"]
38+
deploy:
39+
resources:
40+
reservations:
41+
devices:
42+
- driver: nvidia
43+
count: all
44+
capabilities: [gpu]
45+
gui:
46+
build:
47+
context: ./gui
48+
args:
49+
NEXT_PUBLIC_API_BASE_URL: "/api"
50+
networks:
51+
- super_network
52+
# volumes:
53+
# - ./gui:/app
54+
# - /app/node_modules/
55+
# - /app/.next/
56+
super__redis:
57+
image: "redis/redis-stack-server:latest"
58+
networks:
59+
- super_network
60+
# uncomment to expose redis port to host
61+
# ports:
62+
# - "6379:6379"
63+
volumes:
64+
- redis_data:/data
65+
66+
super__postgres:
67+
image: "docker.io/library/postgres:15"
68+
environment:
69+
- POSTGRES_USER=superagi
70+
- POSTGRES_PASSWORD=password
71+
- POSTGRES_DB=super_agi_main
72+
volumes:
73+
- superagi_postgres_data:/var/lib/postgresql/data/
74+
networks:
75+
- super_network
76+
# uncomment to expose postgres port to host
77+
# ports:
78+
# - "5432:5432"
79+
80+
proxy:
81+
image: nginx:stable-alpine
82+
ports:
83+
- "3000:80"
84+
networks:
85+
- super_network
86+
depends_on:
87+
- backend
88+
- gui
89+
volumes:
90+
- ./nginx/default.conf:/etc/nginx/conf.d/default.conf
91+
92+
networks:
93+
super_network:
94+
driver: bridge
95+
volumes:
96+
superagi_postgres_data:
97+
redis_data:

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,4 @@ google-generativeai==0.1.0
158158
unstructured==0.8.1
159159
ai21==1.2.6
160160
typing-extensions==4.5.0
161-
llama_cpp_python==0.2.7
161+
llama_cpp_python==0.2.7

superagi/helper/llm_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def model(self):
2222
if self._model is None:
2323
try:
2424
self._model = Llama(
25-
model_path="/app/local_model_path", n_ctx=self.context_length)
25+
model_path="/app/local_model_path", n_ctx=self.context_length, n_gpu_layers=get_config('GPU_LAYERS', '-1'))
2626
except Exception as e:
2727
logger.error(e)
2828
return self._model

tests/unit_tests/tools/duck_duck_go/test_duckduckgo_results.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import unittest
2+
from unittest.mock import patch
13
import pytest
24
from superagi.tools.duck_duck_go.duck_duck_go_search import DuckDuckGoSearchTool
35

@@ -11,9 +13,13 @@ def test_get_raw_duckduckgo_results_empty_query(self):
1113
result = self.your_obj.get_raw_duckduckgo_results(query)
1214
assert result == expected_result
1315

14-
def test_get_raw_duckduckgo_results_valid_query(self):
16+
@patch('superagi.tools.duck_duck_go.duck_duck_go_search.DuckDuckGoSearchTool.get_raw_duckduckgo_results')
17+
def test_get_raw_duckduckgo_results_valid_query(self, mock_get_raw_duckduckgo_results):
1518
query = "python"
1619
expected_result_length = 10
20+
mock_results = ['result1', 'result2', 'result3', 'result4', 'result5',
21+
'result6', 'result7', 'result8', 'result9', 'result10']
22+
mock_get_raw_duckduckgo_results.return_value = mock_results
1723
result = self.your_obj.get_raw_duckduckgo_results(query)
1824
assert len(result) == expected_result_length
1925

0 commit comments

Comments
 (0)