BerriAI
diff --git a/‎.env.example
Lines changed: 2 additions & 0 deletions b/‎.env.example
Lines changed: 2 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/my-website/docs/providers/infinity.md
Lines changed: 135 additions & 19 deletions b/‎docs/my-website/docs/providers/infinity.md
Lines changed: 135 additions & 19 deletions
diff --git a/‎litellm/__init__.py
Lines changed: 6 additions & 0 deletions b/‎litellm/__init__.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎litellm/litellm_core_utils/get_supported_openai_params.py
Lines changed: 2 additions & 0 deletions b/‎litellm/litellm_core_utils/get_supported_openai_params.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎litellm/llms/infinity/rerank/common_utils.py renamed to ‎litellm/llms/infinity/common_utils.py
Lines changed: 8 additions & 1 deletion b/‎litellm/llms/infinity/rerank/common_utils.py renamed to ‎litellm/llms/infinity/common_utils.py
Lines changed: 8 additions & 1 deletion
diff --git a/‎litellm/llms/infinity/embedding/handler.py
Lines changed: 5 additions & 0 deletions b/‎litellm/llms/infinity/embedding/handler.py
Lines changed: 5 additions & 0 deletions
@@ -20,6 +20,8 @@ REPLICATE_API_TOKEN = ""
 ANTHROPIC_API_KEY = ""
 # Infisical
 INFISICAL_TOKEN = ""
+# INFINITY
+INFINITY_API_KEY = ""
 
 # Development Configs
 LITELLM_MASTER_KEY = "sk-1234"
 
@@ -86,4 +86,5 @@ litellm/proxy/db/migrations/0_init/migration.sql
 litellm/proxy/db/migrations/*
 litellm/proxy/migrations/*config.yaml
 litellm/proxy/migrations/*
+config.yaml
 tests/litellm/litellm_core_utils/llm_cost_calc/log.txt
@@ -3,18 +3,17 @@ import TabItem from '@theme/TabItem';
 
 # Infinity
 
-| Property | Details |
-|-------|-------|
-| Description | Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip|
-| Provider Route on LiteLLM | `infinity/` |
-| Supported Operations | `/rerank` |
-| Link to Provider Doc | [Infinity ↗](https://github.com/michaelfeil/infinity) |
-
+| Property                  | Details                                                                                                    |
+| ------------------------- | ---------------------------------------------------------------------------------------------------------- |
+| Description               | Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip |
+| Provider Route on LiteLLM | `infinity/`                                                                                                |
+| Supported Operations      | `/rerank`, `/embeddings`                                                                                   |
+| Link to Provider Doc      | [Infinity ↗](https://github.com/michaelfeil/infinity)                                                      |
 
 ## **Usage - LiteLLM Python SDK**
 
 ```python
-from litellm import rerank
+from litellm import rerank, embedding
 import os
 
 os.environ["INFINITY_API_BASE"] = "http://localhost:8080"
@@ -39,8 +38,8 @@ model_list:
   - model_name: custom-infinity-rerank
     litellm_params:
       model: infinity/rerank
-      api_key: os.environ/INFINITY_API_KEY
       api_base: https://localhost:8080
+      api_key: os.environ/INFINITY_API_KEY
 ```
 
 Start litellm
@@ -51,7 +50,9 @@ litellm --config /path/to/config.yaml
 # RUNNING on http://0.0.0.0:4000
 ```
 
-Test request
+## Test request:
+
+### Rerank
 
 ```bash
 curl http://0.0.0.0:4000/rerank \
@@ -70,15 +71,14 @@ curl http://0.0.0.0:4000/rerank \
   }'
 ```
 
+#### Supported Cohere Rerank API Params
 
-## Supported Cohere Rerank API Params
-
-| Param | Type | Description |
-|-------|-------|-------|
-| `query` | `str` | The query to rerank the documents against |
-| `documents` | `list[str]` | The documents to rerank |
-| `top_n` | `int` | The number of documents to return |
-| `return_documents` | `bool` | Whether to return the documents in the response |
+| Param              | Type        | Description                                     |
+| ------------------ | ----------- | ----------------------------------------------- |
+| `query`            | `str`       | The query to rerank the documents against       |
+| `documents`        | `list[str]` | The documents to rerank                         |
+| `top_n`            | `int`       | The number of documents to return               |
+| `return_documents` | `bool`      | Whether to return the documents in the response |
 
 ### Usage - Return Documents
 
@@ -138,6 +138,7 @@ response = rerank(
     raw_scores=True, # 👈 PROVIDER-SPECIFIC PARAM
 )
 ```
+
 </TabItem>
 
 <TabItem value="proxy" label="PROXY">
@@ -161,7 +162,7 @@ litellm --config /path/to/config.yaml
 # RUNNING on http://0.0.0.0:4000
 ```
 
-3. Test it!  
+3. Test it!
 
 ```bash
 curl http://0.0.0.0:4000/rerank \
@@ -179,6 +180,121 @@ curl http://0.0.0.0:4000/rerank \
     "raw_scores": True # 👈 PROVIDER-SPECIFIC PARAM
   }'
 ```
+
 </TabItem>
 
 </Tabs>
+
+## Embeddings
+
+LiteLLM provides an OpenAI api compatible `/embeddings` endpoint for embedding calls.
+
+**Setup**
+
+Add this to your litellm proxy config.yaml
+
+```yaml
+model_list:
+  - model_name: custom-infinity-embedding
+    litellm_params:
+      model: infinity/provider/custom-embedding-v1
+      api_base: http://localhost:8080
+      api_key: os.environ/INFINITY_API_KEY
+```
+
+### Test request:
+
+```bash
+curl http://0.0.0.0:4000/embeddings \
+  -H "Authorization: Bearer sk-1234" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "custom-infinity-embedding",
+    "input": ["hello"]
+  }'
+```
+
+#### Supported Embedding API Params
+
+| Param             | Type        | Description                                                 |
+| ----------------- | ----------- | ----------------------------------------------------------- |
+| `model`           | `str`       | The embedding model to use                                  |
+| `input`           | `list[str]` | The text inputs to generate embeddings for                  |
+| `encoding_format` | `str`       | The format to return embeddings in (e.g. "float", "base64") |
+| `modality`        | `str`       | The type of input (e.g. "text", "image", "audio")           |
+
+### Usage - Basic Examples
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import embedding
+import os
+
+os.environ["INFINITY_API_BASE"] = "http://localhost:8080"
+
+response = embedding(
+    model="infinity/bge-small",
+    input=["good morning from litellm"]
+)
+
+print(response.data[0]['embedding'])
+```
+
+</TabItem>
+
+<TabItem value="proxy" label="PROXY">
+
+```bash
+curl http://0.0.0.0:4000/embeddings \
+  -H "Authorization: Bearer sk-1234" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "custom-infinity-embedding",
+    "input": ["hello"]
+  }'
+```
+
+</TabItem>
+</Tabs>
+
+### Usage - OpenAI Client
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+  api_key="<LITELLM_MASTER_KEY>",
+  base_url="<LITELLM_URL>"
+)
+
+response = client.embeddings.create(
+  model="bge-small",
+  input=["The food was delicious and the waiter..."],
+  encoding_format="float"
+)
+
+print(response.data[0].embedding)
+```
+
+</TabItem>
+
+<TabItem value="proxy" label="PROXY">
+
+```bash
+curl http://0.0.0.0:4000/embeddings \
+  -H "Authorization: Bearer sk-1234" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "bge-small",
+    "input": ["The food was delicious and the waiter..."],
+    "encoding_format": "float"
+  }'
+```
+
+</TabItem>
+</Tabs>
@@ -415,6 +415,7 @@ def identify(event_details):
 azure_ai_models: List = []
 jina_ai_models: List = []
 voyage_models: List = []
+infinity_models: List = []
 databricks_models: List = []
 cloudflare_models: List = []
 codestral_models: List = []
@@ -556,6 +557,8 @@ def add_known_models():
             azure_ai_models.append(key)
         elif value.get("litellm_provider") == "voyage":
             voyage_models.append(key)
+        elif value.get("litellm_provider") == "infinity":
+            infinity_models.append(key)
         elif value.get("litellm_provider") == "databricks":
             databricks_models.append(key)
         elif value.get("litellm_provider") == "cloudflare":
@@ -644,6 +647,7 @@ def add_known_models():
     + deepseek_models
     + azure_ai_models
     + voyage_models
+    + infinity_models
     + databricks_models
     + cloudflare_models
     + codestral_models
@@ -699,6 +703,7 @@ def add_known_models():
     "mistral": mistral_chat_models,
     "azure_ai": azure_ai_models,
     "voyage": voyage_models,
+    "infinity": infinity_models,
     "databricks": databricks_models,
     "cloudflare": cloudflare_models,
     "codestral": codestral_models,
@@ -946,6 +951,7 @@ def add_known_models():
 from litellm.llms.openai.completion.transformation import OpenAITextCompletionConfig
 from .llms.groq.chat.transformation import GroqChatConfig
 from .llms.voyage.embedding.transformation import VoyageEmbeddingConfig
+from .llms.infinity.embedding.transformation import InfinityEmbeddingConfig
 from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
 from .llms.mistral.mistral_chat_transformation import MistralConfig
 from .llms.openai.responses.transformation import OpenAIResponsesAPIConfig
 
@@ -221,6 +221,8 @@ def get_supported_openai_params(  # noqa: PLR0915
         return litellm.PredibaseConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "voyage":
         return litellm.VoyageEmbeddingConfig().get_supported_openai_params(model=model)
+    elif custom_llm_provider == "infinity":
+        return litellm.InfinityEmbeddingConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "triton":
         if request_type == "embeddings":
             return litellm.TritonEmbeddingConfig().get_supported_openai_params(
 
@@ -1,10 +1,16 @@
+from typing import Union
 import httpx
 
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
 
 
 class InfinityError(BaseLLMException):
-    def __init__(self, status_code, message):
+    def __init__(
+        self, 
+        status_code: int, 
+        message: str,
+        headers: Union[dict, httpx.Headers] = {}
+        ):
         self.status_code = status_code
         self.message = message
         self.request = httpx.Request(
@@ -16,4 +22,5 @@ def __init__(self, status_code, message):
             message=message,
             request=self.request,
             response=self.response,
+            headers=headers,
         )  # Call the base class constructor with the parameters it needs
@@ -0,0 +1,5 @@
+"""
+Infinity Embedding - uses `llm_http_handler.py` to make httpx requests
+
+Request/Response transformation is handled in `transformation.py`
+"""
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +"""
 +Infinity Embedding - uses `llm_http_handler.py` to make httpx requests
++
 +Request/Response transformation is handled in `transformation.py`
 +"""