Fix model JSON ; Prettier format (#27)

jiahuei · web-flow · commit e4940bc2de50 · 2024-11-22T06:44:04.000Z
diff --git a/.env.example b/.env.example
@@ -23,8 +23,8 @@ OWL_PORT=6969
 OWL_WORKERS=3
 DOCIO_WORKERS=1
 DOCIO_DEVICE=cpu
-EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
-RERANKER_MODEL=cross-encoder/ms-marco-TinyBERT-L-2
+EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
+RERANKER_MODEL=mixedbread-ai/mxbai-rerank-xsmall-v1
 OWL_CONCURRENT_ROWS_BATCH_SIZE=5
 OWL_CONCURRENT_COLS_BATCH_SIZE=5
-OWL_MAX_WRITE_BATCH_SIZE=1000
+OWL_MAX_WRITE_BATCH_SIZE=1000
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -78,6 +78,9 @@ jobs:
             # Replace the org with the key in the .env file
             sed -i "s/$org=.*/$org=$key/g" .env
           done
+          sed -i "s:EMBEDDING_MODEL=.*:EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2:g" .env
+          sed -i "s:RERANKER_MODEL=.*:RERANKER_MODEL=cross-encoder/ms-marco-TinyBERT-L-2:g" .env
+          echo 'OWL_MODELS_CONFIG=models_ci.json' >> .env
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -91,7 +94,6 @@ jobs:
         run: |
           set -e
           docker compose -p jamai -f docker/compose.cpu.yml --profile minio up --quiet-pull -d --wait
-
         env:
           COMPOSE_DOCKER_CLI_BUILD: 1
           DOCKER_BUILDKIT: 1
@@ -118,6 +120,7 @@ jobs:
 
       - name: Inspect owl logs if Python SDK tests failed
         if: failure() &&  steps.python_sdk_test_oss.outcome == 'failure'
+        timeout-minutes: 1
         run: docker exec jamai-owl-1 cat /app/api/logs/owl.log
 
       - name: Upload Pytest Test Results
@@ -139,6 +142,7 @@ jobs:
 
       - name: Inspect owl logs if TS/JS SDK tests failed
         if: failure() &&  steps.ts_sdk_test_oss.outcome == 'failure'
+        timeout-minutes: 1
         run: docker exec jamai-owl-1 cat /app/api/logs/owl.log
 
       - name: Update owl service for S3 test
@@ -168,6 +172,7 @@ jobs:
 
       - name: Inspect owl logs if Python SDK tests failed
         if: failure() &&  steps.python_sdk_test_oss_file.outcome == 'failure'
+        timeout-minutes: 1
         run: docker exec jamai-owl-1 cat /app/api/logs/owl.log
 
   lance_tests:
diff --git a/docker/amd.yml b/docker/amd.yml
@@ -5,7 +5,7 @@ services:
       [
         "/bin/sh",
         "-c",
-        "(. /app/.venv/bin/activate && HIP_VISIBLE_DEVICES=0 infinity_emb v2 --port 6909 --model-id $${EMBEDDING_MODEL} --model-warmup --device cuda --engine torch --no-bettertransformer --no-compile  &);(. /app/.venv/bin/activate && HIP_VISIBLE_DEVICES=1 infinity_emb v2 --port 6919 --model-id $${RERANKER_MODEL} --model-warmup --device cuda --engine torch --no-bettertransformer --no-compile)",
+        ". /app/.venv/bin/activate && HIP_VISIBLE_DEVICES=0 infinity_emb v2 --port 6909 --model-id $${EMBEDDING_MODEL} --model-id $${RERANKER_MODEL} --model-warmup --device cuda --engine torch --no-bettertransformer --no-compile",
       ]
     # # https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html
     # # instruction to specify the AMD GPU resources
diff --git a/docker/compose.cpu.yml b/docker/compose.cpu.yml
@@ -2,14 +2,7 @@ services:
   infinity:
     image: michaelf34/infinity:0.0.70-cpu
     container_name: jamai_infinity
-    command: [
-        "v2",
-        "--engine", "torch",
-        "--port", "6909",
-        "--model-warmup",
-        "--model-id", "${EMBEDDING_MODEL}", 
-        "--model-id", "${RERANKER_MODEL}",
-    ]
+    command: ["v2", "--engine", "torch", "--port", "6909", "--model-warmup", "--model-id", "${EMBEDDING_MODEL}", "--model-id", "${RERANKER_MODEL}"]
     healthcheck:
       test: ["CMD-SHELL", "curl --fail http://localhost:6909/health"]
       interval: 10s
diff --git a/docker/compose.nvidia.yml b/docker/compose.nvidia.yml
@@ -1,4 +1,4 @@
 include:
   - path:
       - compose.cpu.yml
-      - nvidia.yml
+      - nvidia.yml
diff --git a/docker/nvidia.yml b/docker/nvidia.yml
@@ -1,6 +1,6 @@
 services:
   infinity:
-    image: michaelf34/infinity:0.0.70  # Use GPU-compatible image, instead of torch cpu-only.
+    image: michaelf34/infinity:0.0.70 # Use GPU-compatible image, instead of torch cpu-only.
     deploy:
       resources:
         reservations:
diff --git a/services/api/src/owl/configs/models.json b/services/api/src/owl/configs/models.json
@@ -44,24 +44,9 @@
     }
   ],
   "embed_models": [
-    {
-      "id": "ellm/sentence-transformers/all-MiniLM-L6-v2",
-      "name": "ELLM MiniLM L6 v2",
-      "context_length": 512,
-      "embedding_size": 384,
-      "languages": ["mul"],
-      "capabilities": ["embed"],
-      "deployments": [
-        {
-          "litellm_id": "openai/sentence-transformers/all-MiniLM-L6-v2",
-          "api_base": "http://infinity:6909",
-          "provider": "ellm"
-        }
-      ]
-    },
     {
       "id": "ellm/BAAI/bge-small-en-v1.5",
-      "litellm_id": "openai/BAAI/bge-small-en-v1.5",
+      "name": "ELLM BAAI BGE Small EN v1.5",
       "context_length": 512,
       "embedding_size": 1024,
       "languages": ["mul"],
@@ -140,28 +125,14 @@
   "rerank_models": [
     {
       "id": "ellm/mixedbread-ai/mxbai-rerank-xsmall-v1",
-      "name": "ELLM mxbai-rerank-xsmall v1",
+      "name": "ELLM MxBAI Rerank XSmall v1",
       "context_length": 512,
       "languages": ["en"],
       "capabilities": ["rerank"],
       "deployments": [
         {
           "litellm_id": "",
-          "api_base": "http://infinity:6919",
-          "provider": "ellm"
-        }
-      ]
-    },
-    {
-      "id": "ellm/cross-encoder/ms-marco-TinyBERT-L-2",
-      "name": "ELLM TinyBERT L2",
-      "context_length": 512,
-      "languages": ["en"],
-      "capabilities": ["rerank"],
-      "deployments": [
-        {
-          "litellm_id": "",
-          "api_base": "http://infinity:6919",
+          "api_base": "http://infinity:6909",
           "provider": "ellm"
         }
       ]
@@ -181,4 +152,4 @@
       ]
     }
   ]
-}
+}
diff --git a/services/api/src/owl/configs/models_aipc.json b/services/api/src/owl/configs/models_aipc.json
@@ -129,15 +129,15 @@
   ],
   "embed_models": [
     {
-      "id": "ellm/sentence-transformers/all-MiniLM-L6-v2",
-      "name": "ELLM MiniLM L6 v2",
+      "id": "ellm/BAAI/bge-small-en-v1.5",
+      "name": "ELLM BAAI BGE Small EN v1.5",
       "context_length": 512,
-      "embedding_size": 384,
+      "embedding_size": 1024,
       "languages": ["mul"],
       "capabilities": ["embed"],
       "deployments": [
         {
-          "litellm_id": "openai/sentence-transformers/all-MiniLM-L6-v2",
+          "litellm_id": "openai/BAAI/bge-small-en-v1.5",
           "api_base": "http://infinity:6909",
           "provider": "ellm"
         }
@@ -209,15 +209,15 @@
   ],
   "rerank_models": [
     {
-      "id": "ellm/cross-encoder/ms-marco-TinyBERT-L-2",
-      "name": "ELLM TinyBERT L2",
+      "id": "ellm/mixedbread-ai/mxbai-rerank-xsmall-v1",
+      "name": "ELLM MxBAI Rerank XSmall v1",
       "context_length": 512,
       "languages": ["en"],
       "capabilities": ["rerank"],
       "deployments": [
         {
           "litellm_id": "",
-          "api_base": "http://infinity:6919",
+          "api_base": "http://infinity:6909",
           "provider": "ellm"
         }
       ]
diff --git a/services/api/src/owl/configs/models_ci.json b/services/api/src/owl/configs/models_ci.json
@@ -0,0 +1,124 @@
+{
+  "llm_models": [
+    {
+      "id": "openai/gpt-4o-mini",
+      "name": "OpenAI GPT-4o Mini",
+      "context_length": 128000,
+      "languages": ["mul"],
+      "capabilities": ["chat", "image"],
+      "deployments": [
+        {
+          "litellm_id": "",
+          "api_base": "",
+          "provider": "openai"
+        }
+      ]
+    },
+    {
+      "id": "anthropic/claude-3-haiku-20240307",
+      "name": "Anthropic Claude 3 Haiku",
+      "context_length": 200000,
+      "languages": ["mul"],
+      "capabilities": ["chat"],
+      "deployments": [
+        {
+          "litellm_id": "",
+          "api_base": "",
+          "provider": "anthropic"
+        }
+      ]
+    },
+    {
+      "id": "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+      "name": "Together AI Meta Llama 3.1 (8B)",
+      "context_length": 130000,
+      "languages": ["mul"],
+      "capabilities": ["chat"],
+      "deployments": [
+        {
+          "litellm_id": "",
+          "api_base": "",
+          "provider": "together_ai"
+        }
+      ]
+    }
+  ],
+  "embed_models": [
+    {
+      "id": "ellm/sentence-transformers/all-MiniLM-L6-v2",
+      "name": "ELLM MiniLM L6 v2",
+      "context_length": 512,
+      "embedding_size": 384,
+      "languages": ["mul"],
+      "capabilities": ["embed"],
+      "deployments": [
+        {
+          "litellm_id": "openai/sentence-transformers/all-MiniLM-L6-v2",
+          "api_base": "http://infinity:6909",
+          "provider": "ellm"
+        }
+      ]
+    },
+    {
+      "id": "openai/text-embedding-3-small-512",
+      "name": "OpenAI Text Embedding 3 Small (512-dim)",
+      "context_length": 8192,
+      "embedding_size": 512,
+      "dimensions": 512,
+      "languages": ["mul"],
+      "capabilities": ["embed"],
+      "deployments": [
+        {
+          "litellm_id": "text-embedding-3-small",
+          "api_base": "",
+          "provider": "openai"
+        }
+      ]
+    },
+    {
+      "id": "cohere/embed-multilingual-v3.0",
+      "name": "Cohere Embed Multilingual v3.0",
+      "context_length": 512,
+      "embedding_size": 1024,
+      "languages": ["mul"],
+      "capabilities": ["embed"],
+      "deployments": [
+        {
+          "litellm_id": "embed-multilingual-v3.0",
+          "api_base": "",
+          "provider": "cohere"
+        }
+      ]
+    }
+  ],
+  "rerank_models": [
+    {
+      "id": "ellm/cross-encoder/ms-marco-TinyBERT-L-2",
+      "name": "ELLM TinyBERT L2",
+      "context_length": 512,
+      "languages": ["en"],
+      "capabilities": ["rerank"],
+      "deployments": [
+        {
+          "litellm_id": "",
+          "api_base": "http://infinity:6909",
+          "provider": "ellm"
+        }
+      ]
+    },
+    {
+      "id": "cohere/rerank-multilingual-v3.0",
+      "name": "Cohere Rerank Multilingual v3.0",
+      "context_length": 512,
+      "languages": ["mul"],
+      "capabilities": ["rerank"],
+      "deployments": [
+        {
+          "litellm_id": "",
+          "api_base": "",
+          "provider": "cohere"
+        }
+      ]
+    }
+  ]
+}
diff --git a/services/api/src/owl/configs/models_ollama.json b/services/api/src/owl/configs/models_ollama.json
@@ -59,15 +59,15 @@
   ],
   "embed_models": [
     {
-      "id": "ellm/sentence-transformers/all-MiniLM-L6-v2",
-      "name": "ELLM MiniLM L6 v2",
+      "id": "ellm/BAAI/bge-small-en-v1.5",
+      "name": "ELLM BAAI BGE Small EN v1.5",
       "context_length": 512,
-      "embedding_size": 384,
+      "embedding_size": 1024,
       "languages": ["mul"],
       "capabilities": ["embed"],
       "deployments": [
         {
-          "litellm_id": "openai/sentence-transformers/all-MiniLM-L6-v2",
+          "litellm_id": "openai/BAAI/bge-small-en-v1.5",
           "api_base": "http://infinity:6909",
           "provider": "ellm"
         }
@@ -139,15 +139,15 @@
   ],
   "rerank_models": [
     {
-      "id": "ellm/cross-encoder/ms-marco-TinyBERT-L-2",
-      "name": "ELLM TinyBERT L2",
+      "id": "ellm/mixedbread-ai/mxbai-rerank-xsmall-v1",
+      "name": "ELLM MxBAI Rerank XSmall v1",
       "context_length": 512,
       "languages": ["en"],
       "capabilities": ["rerank"],
       "deployments": [
         {
           "litellm_id": "",
-          "api_base": "http://infinity:6919",
+          "api_base": "http://infinity:6909",
           "provider": "ellm"
         }
       ]
diff --git a/services/app/electron/main.js b/services/app/electron/main.js
@@ -68,10 +68,6 @@ app.whenReady().then(() => {
 		// 	cmd: [path.resolve('resources/infinity_server/infinity_server.exe'), 'v1', '--host', '127.0.0.1', '--port', '6909', '--model-warmup', '--device', 'cpu', '--model-name-or-path', 'sentence-transformers/all-MiniLM-L6-v2'],
 		// 	cwd: path.resolve('resources/infinity_server'),
 		// },
-		// reranker: {
-		// 	cmd: [path.resolve('resources/infinity_server/infinity_server.exe'), 'v1', '--host', '127.0.0.1', '--port', '6919', '--model-warmup', '--device', 'cpu', '--model-name-or-path', 'cross-encoder/ms-marco-TinyBERT-L-2'],
-		// 	cwd: path.resolve('resources/infinity_server'),
-		// },
 		// ellm_api_server: {
 		// 	cmd: [path.resolve('resources/ellm_api_server/ellm_api_server.exe'), '--model_path', path.resolve('resources/phi3-mini-directml-int4-awq-block-128'), '--port', '5555'],
 		// 	cwd: path.resolve('resources'),

Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@ services:`
`5`	`5`	`[`
`6`	`6`	`"/bin/sh",`
`7`	`7`	`"-c",`
`8`		`- "(. /app/.venv/bin/activate && HIP_VISIBLE_DEVICES=0 infinity_emb v2 --port 6909 --model-id $${EMBEDDING_MODEL} --model-warmup --device cuda --engine torch --no-bettertransformer --no-compile &);(. /app/.venv/bin/activate && HIP_VISIBLE_DEVICES=1 infinity_emb v2 --port 6919 --model-id $${RERANKER_MODEL} --model-warmup --device cuda --engine torch --no-bettertransformer --no-compile)",`
	`8`	`+ ". /app/.venv/bin/activate && HIP_VISIBLE_DEVICES=0 infinity_emb v2 --port 6909 --model-id $${EMBEDDING_MODEL} --model-id $${RERANKER_MODEL} --model-warmup --device cuda --engine torch --no-bettertransformer --no-compile",`
`9`	`9`	`]`
`10`	`10`	`# # https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html`
`11`	`11`	`# # instruction to specify the AMD GPU resources`
Original file line number	Diff line number	Diff line change
`@@ -129,15 +129,15 @@`
`129`	`129`	`],`
`130`	`130`	`"embed_models": [`
`131`	`131`	`{`
`132`		`- "id": "ellm/sentence-transformers/all-MiniLM-L6-v2",`
`133`		`- "name": "ELLM MiniLM L6 v2",`
	`132`	`+ "id": "ellm/BAAI/bge-small-en-v1.5",`
	`133`	`+ "name": "ELLM BAAI BGE Small EN v1.5",`
`134`	`134`	`"context_length": 512,`
`135`		`- "embedding_size": 384,`
	`135`	`+ "embedding_size": 1024,`
`136`	`136`	`"languages": ["mul"],`
`137`	`137`	`"capabilities": ["embed"],`
`138`	`138`	`"deployments": [`
`139`	`139`	`{`
`140`		`- "litellm_id": "openai/sentence-transformers/all-MiniLM-L6-v2",`
	`140`	`+ "litellm_id": "openai/BAAI/bge-small-en-v1.5",`
`141`	`141`	`"api_base": "http://infinity:6909",`
`142`	`142`	`"provider": "ellm"`
`143`	`143`	`}`
`@@ -209,15 +209,15 @@`
`209`	`209`	`],`
`210`	`210`	`"rerank_models": [`
`211`	`211`	`{`
`212`		`- "id": "ellm/cross-encoder/ms-marco-TinyBERT-L-2",`
`213`		`- "name": "ELLM TinyBERT L2",`
	`212`	`+ "id": "ellm/mixedbread-ai/mxbai-rerank-xsmall-v1",`
	`213`	`+ "name": "ELLM MxBAI Rerank XSmall v1",`
`214`	`214`	`"context_length": 512,`
`215`	`215`	`"languages": ["en"],`
`216`	`216`	`"capabilities": ["rerank"],`
`217`	`217`	`"deployments": [`
`218`	`218`	`{`
`219`	`219`	`"litellm_id": "",`
`220`		`- "api_base": "http://infinity:6919",`
	`220`	`+ "api_base": "http://infinity:6909",`
`221`	`221`	`"provider": "ellm"`
`222`	`222`	`}`
`223`	`223`	`]`
Original file line number	Diff line number	Diff line change
`@@ -59,15 +59,15 @@`
`59`	`59`	`],`
`60`	`60`	`"embed_models": [`
`61`	`61`	`{`
`62`		`- "id": "ellm/sentence-transformers/all-MiniLM-L6-v2",`
`63`		`- "name": "ELLM MiniLM L6 v2",`
	`62`	`+ "id": "ellm/BAAI/bge-small-en-v1.5",`
	`63`	`+ "name": "ELLM BAAI BGE Small EN v1.5",`
`64`	`64`	`"context_length": 512,`
`65`		`- "embedding_size": 384,`
	`65`	`+ "embedding_size": 1024,`
`66`	`66`	`"languages": ["mul"],`
`67`	`67`	`"capabilities": ["embed"],`
`68`	`68`	`"deployments": [`
`69`	`69`	`{`
`70`		`- "litellm_id": "openai/sentence-transformers/all-MiniLM-L6-v2",`
	`70`	`+ "litellm_id": "openai/BAAI/bge-small-en-v1.5",`
`71`	`71`	`"api_base": "http://infinity:6909",`
`72`	`72`	`"provider": "ellm"`
`73`	`73`	`}`
`@@ -139,15 +139,15 @@`
`139`	`139`	`],`
`140`	`140`	`"rerank_models": [`
`141`	`141`	`{`
`142`		`- "id": "ellm/cross-encoder/ms-marco-TinyBERT-L-2",`
`143`		`- "name": "ELLM TinyBERT L2",`
	`142`	`+ "id": "ellm/mixedbread-ai/mxbai-rerank-xsmall-v1",`
	`143`	`+ "name": "ELLM MxBAI Rerank XSmall v1",`
`144`	`144`	`"context_length": 512,`
`145`	`145`	`"languages": ["en"],`
`146`	`146`	`"capabilities": ["rerank"],`
`147`	`147`	`"deployments": [`
`148`	`148`	`{`
`149`	`149`	`"litellm_id": "",`
`150`		`- "api_base": "http://infinity:6919",`
	`150`	`+ "api_base": "http://infinity:6909",`
`151`	`151`	`"provider": "ellm"`
`152`	`152`	`}`
`153`	`153`	`]`