Skip to content

Commit bc49f2f

Browse files
authored
Infinity improvements (#26)
- Uses v2 command to launch Infinity - Added embedding model "BAAI/bge-small-en-v1.5" - Added reranking model "mixedbread-ai/mxbai-rerank-xsmall-v1"
1 parent f9a5c0a commit bc49f2f

File tree

5 files changed

+44
-17
lines changed

5 files changed

+44
-17
lines changed

.env.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
2727
RERANKER_MODEL=cross-encoder/ms-marco-TinyBERT-L-2
2828
OWL_CONCURRENT_ROWS_BATCH_SIZE=5
2929
OWL_CONCURRENT_COLS_BATCH_SIZE=5
30-
OWL_MAX_WRITE_BATCH_SIZE=1000
30+
OWL_MAX_WRITE_BATCH_SIZE=1000

docker/compose.cpu.yml

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
services:
22
infinity:
3-
image: michaelf34/infinity:0.0.55
4-
entrypoint:
5-
[
6-
"/bin/sh",
7-
"-c",
8-
"(. /app/.venv/bin/activate && infinity_emb v2 --port 6909 --model-id $${EMBEDDING_MODEL} --model-warmup --device cpu &);(. /app/.venv/bin/activate && infinity_emb v2 --port 6919 --model-id $${RERANKER_MODEL} --model-warmup --device cpu )",
9-
]
3+
image: michaelf34/infinity:0.0.70-cpu
4+
container_name: jamai_infinity
5+
command: [
6+
"v2",
7+
"--engine", "torch",
8+
"--port", "6909",
9+
"--model-warmup",
10+
"--model-id", "${EMBEDDING_MODEL}",
11+
"--model-id", "${RERANKER_MODEL}",
12+
]
1013
healthcheck:
11-
test: ["CMD-SHELL", "curl --fail http://localhost:6909/health && curl --fail http://localhost:6919/health || exit 1"]
14+
test: ["CMD-SHELL", "curl --fail http://localhost:6909/health"]
1215
interval: 10s
1316
timeout: 2s
1417
retries: 20

docker/compose.nvidia.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
include:
22
- path:
33
- compose.cpu.yml
4-
- nvidia.yml
4+
- nvidia.yml

docker/nvidia.yml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
services:
22
infinity:
3-
entrypoint:
4-
[
5-
"/bin/sh",
6-
"-c",
7-
"(. /app/.venv/bin/activate && infinity_emb v2 --port 6909 --model-id $${EMBEDDING_MODEL} --model-warmup --device cuda &);(. /app/.venv/bin/activate && infinity_emb v2 --port 6919 --model-id $${RERANKER_MODEL} --model-warmup --device cuda )",
8-
]
3+
image: michaelf34/infinity:0.0.70 # Use GPU-compatible image, instead of torch cpu-only.
94
deploy:
105
resources:
116
reservations:

services/api/src/owl/configs/models.json

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,21 @@
5959
}
6060
]
6161
},
62+
{
63+
"id": "ellm/BAAI/bge-small-en-v1.5",
64+
"litellm_id": "openai/BAAI/bge-small-en-v1.5",
65+
"context_length": 512,
66+
"embedding_size": 1024,
67+
"languages": ["mul"],
68+
"capabilities": ["embed"],
69+
"deployments": [
70+
{
71+
"litellm_id": "openai/BAAI/bge-small-en-v1.5",
72+
"api_base": "http://infinity:6909",
73+
"provider": "ellm"
74+
}
75+
]
76+
},
6277
{
6378
"id": "openai/text-embedding-3-large-3072",
6479
"name": "OpenAI Text Embedding 3 Large (3072-dim)",
@@ -123,6 +138,20 @@
123138
}
124139
],
125140
"rerank_models": [
141+
{
142+
"id": "ellm/mixedbread-ai/mxbai-rerank-xsmall-v1",
143+
"name": "ELLM mxbai-rerank-xsmall v1",
144+
"context_length": 512,
145+
"languages": ["en"],
146+
"capabilities": ["rerank"],
147+
"deployments": [
148+
{
149+
"litellm_id": "",
150+
"api_base": "http://infinity:6919",
151+
"provider": "ellm"
152+
}
153+
]
154+
},
126155
{
127156
"id": "ellm/cross-encoder/ms-marco-TinyBERT-L-2",
128157
"name": "ELLM TinyBERT L2",
@@ -152,4 +181,4 @@
152181
]
153182
}
154183
]
155-
}
184+
}

0 commit comments

Comments
 (0)