Added headers parameters

lu-ohai · lu-ohai · commit 238b6d90d2f8 · 2024-12-06T10:40:31.000-05:00
diff --git a/ads/llm/langchain/plugins/chat_models/oci_data_science.py b/ads/llm/langchain/plugins/chat_models/oci_data_science.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*--
 
-# Copyright (c) 2023 Oracle and/or its affiliates.
+# Copyright (c) 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 """Chat model for OCI data science model deployment endpoint."""
 
@@ -50,6 +49,7 @@
 )
 
 logger = logging.getLogger(__name__)
+DEFAULT_INFERENCE_ENDPOINT_CHAT = "/v1/chat/completions"
 
 
 def _is_pydantic_class(obj: Any) -> bool:
@@ -93,6 +93,8 @@ class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment):
     Key init args — client params:
         auth: dict
             ADS auth dictionary for OCI authentication.
+        headers: Optional[Dict]
+            The headers to be added to the Model Deployment request.
 
     Instantiate:
         .. code-block:: python
@@ -109,6 +111,10 @@ class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment):
                     "temperature": 0.2,
                     # other model parameters ...
                 },
+                headers={
+                    "route": "/v1/chat/completions",
+                    # other request headers ...
+                },
             )
 
     Invocation:
@@ -257,6 +263,9 @@ def _construct_json_body(self, messages: list, params: dict) -> dict:
     """Stop words to use when generating. Model output is cut off
     at the first occurrence of any of these substrings."""
 
+    headers: Optional[Dict[str, Any]] = {"route": DEFAULT_INFERENCE_ENDPOINT_CHAT}
+    """The headers to be added to the Model Deployment request."""
+
     @model_validator(mode="before")
     @classmethod
     def validate_openai(cls, values: Any) -> Any:
@@ -704,7 +713,7 @@ def _process_response(self, response_json: dict) -> ChatResult:
 
         for choice in choices:
             message = _convert_dict_to_message(choice["message"])
-            generation_info = dict(finish_reason=choice.get("finish_reason"))
+            generation_info = {"finish_reason": choice.get("finish_reason")}
             if "logprobs" in choice:
                 generation_info["logprobs"] = choice["logprobs"]
 
@@ -794,7 +803,7 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment):
     """Number of most likely tokens to consider at each step."""
 
     min_p: Optional[float] = 0.0
-    """Float that represents the minimum probability for a token to be considered. 
+    """Float that represents the minimum probability for a token to be considered.
     Must be in [0,1]. 0 to disable this."""
 
     repetition_penalty: Optional[float] = 1.0
@@ -818,7 +827,7 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment):
     the EOS token is generated."""
 
     min_tokens: Optional[int] = 0
-    """Minimum number of tokens to generate per output sequence before 
+    """Minimum number of tokens to generate per output sequence before
     EOS or stop_token_ids can be generated"""
 
     stop_token_ids: Optional[List[int]] = None
@@ -836,7 +845,7 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment):
     tool_choice: Optional[str] = None
     """Whether to use tool calling.
     Defaults to None, tool calling is disabled.
-    Tool calling requires model support and the vLLM to be configured 
+    Tool calling requires model support and the vLLM to be configured
     with `--tool-call-parser`.
     Set this to `auto` for the model to make tool calls automatically.
     Set this to `required` to force the model to always call one or more tools.
@@ -956,9 +965,9 @@ class ChatOCIModelDeploymentTGI(ChatOCIModelDeployment):
     """Total probability mass of tokens to consider at each step."""
 
     top_logprobs: Optional[int] = None
-    """An integer between 0 and 5 specifying the number of most 
-    likely tokens to return at each token position, each with an 
-    associated log probability. logprobs must be set to true if 
+    """An integer between 0 and 5 specifying the number of most
+    likely tokens to return at each token position, each with an
+    associated log probability. logprobs must be set to true if
     this parameter is used."""
 
     @property
diff --git a/ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py b/ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*--
 
-# Copyright (c) 2023 Oracle and/or its affiliates.
+# Copyright (c) 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 
@@ -24,6 +23,7 @@
 
 import aiohttp
 import requests
+from langchain_community.utilities.requests import Requests
 from langchain_core.callbacks import (
     AsyncCallbackManagerForLLMRun,
     CallbackManagerForLLMRun,
@@ -34,14 +34,13 @@
 from langchain_core.utils import get_from_dict_or_env
 from pydantic import Field, model_validator
 
-from langchain_community.utilities.requests import Requests
-
 logger = logging.getLogger(__name__)
 
 
 DEFAULT_TIME_OUT = 300
 DEFAULT_CONTENT_TYPE_JSON = "application/json"
 DEFAULT_MODEL_NAME = "odsc-llm"
+DEFAULT_INFERENCE_ENDPOINT = "/v1/completions"
 
 
 class TokenExpiredError(Exception):
@@ -86,6 +85,9 @@ class BaseOCIModelDeployment(Serializable):
     max_retries: int = 3
     """Maximum number of retries to make when generating."""
 
+    headers: Optional[Dict[str, Any]] = {"route": DEFAULT_INFERENCE_ENDPOINT}
+    """The headers to be added to the Model Deployment request."""
+
     @model_validator(mode="before")
     @classmethod
     def validate_environment(cls, values: Dict) -> Dict:
@@ -101,7 +103,7 @@ def validate_environment(cls, values: Dict) -> Dict:
                 "Please install it with `pip install oracle_ads`."
             ) from ex
 
-        if not values.get("auth", None):
+        if not values.get("auth"):
             values["auth"] = ads.common.auth.default_signer()
 
         values["endpoint"] = get_from_dict_or_env(
@@ -125,12 +127,12 @@ def _headers(
         Returns:
             Dict: A dictionary containing the appropriate headers for the request.
         """
+        headers = self.headers
         if is_async:
             signer = self.auth["signer"]
             _req = requests.Request("POST", self.endpoint, json=body)
             req = _req.prepare()
             req = signer(req)
-            headers = {}
             for key, value in req.headers.items():
                 headers[key] = value
 
@@ -140,7 +142,7 @@ def _headers(
                 )
             return headers
 
-        return (
+        headers.update(
             {
                 "Content-Type": DEFAULT_CONTENT_TYPE_JSON,
                 "enable-streaming": "true",
@@ -152,6 +154,8 @@ def _headers(
             }
         )
 
+        return headers
+
     def completion_with_retry(
         self, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any
     ) -> Any:
@@ -357,7 +361,7 @@ def _refresh_signer(self) -> bool:
             self.auth["signer"].refresh_security_token()
             return True
         return False
-    
+
     @classmethod
     def is_lc_serializable(cls) -> bool:
         """Return whether this model can be serialized by LangChain."""
@@ -388,6 +392,10 @@ class OCIModelDeploymentLLM(BaseLLM, BaseOCIModelDeployment):
                 model="odsc-llm",
                 streaming=True,
                 model_kwargs={"frequency_penalty": 1.0},
+                headers={
+                    "route": "/v1/completions",
+                    # other request headers ...
+                }
             )
             llm.invoke("tell me a joke.")
 
@@ -712,9 +720,9 @@ def _process_response(self, response_json: dict) -> List[Generation]:
     def _generate_info(self, choice: dict) -> Any:
         """Extracts generation info from the response."""
         gen_info = {}
-        finish_reason = choice.get("finish_reason", None)
-        logprobs = choice.get("logprobs", None)
-        index = choice.get("index", None)
+        finish_reason = choice.get("finish_reason")
+        logprobs = choice.get("logprobs")
+        index = choice.get("index")
         if finish_reason:
             gen_info.update({"finish_reason": finish_reason})
         if logprobs is not None: