Update Generative AI LLM model to use new APIs. (#523)

qiuosier · web-flow · commit d1b0edeccd79 · 2024-01-16T16:33:10.000-05:00
diff --git a/ads/llm/langchain/plugins/base.py b/ads/llm/langchain/plugins/base.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2023 Oracle and/or its affiliates.
+# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 from typing import Any, Dict, List, Optional
 
@@ -77,27 +77,32 @@ class GenerativeAiClientModel(BaseModel):
     client_kwargs: Dict[str, Any] = {}
     """Holds any client parameters for creating GenerativeAiClient"""
 
-    @root_validator()
-    def validate_environment(  # pylint: disable=no-self-argument
-        cls, values: Dict
-    ) -> Dict:
-        """Validate that python package exists in environment."""
+    @staticmethod
+    def _import_client():
         try:
-            # Import the GenerativeAIClient here so that there will be no error when user import ads.llm
-            # and the install OCI SDK does not support generative AI service yet.
-            from oci.generative_ai import GenerativeAiClient
+            from oci.generative_ai_inference import GenerativeAiInferenceClient
         except ImportError as ex:
             raise ImportError(
-                "Could not import GenerativeAIClient from oci. "
+                "Could not import GenerativeAiInferenceClient from oci. "
                 "The OCI SDK installed does not support generative AI service."
             ) from ex
+        return GenerativeAiInferenceClient
+
+    @root_validator()
+    def validate_environment(  # pylint: disable=no-self-argument
+        cls, values: Dict
+    ) -> Dict:
+        """Validate that python package exists in environment."""
         # Initialize client only if user does not pass in client.
         # Users may choose to initialize the OCI client by themselves and pass it into this model.
         if not values.get("client"):
             auth = values.get("auth", {})
             client_kwargs = auth.get("client_kwargs") or {}
             client_kwargs.update(values["client_kwargs"])
-            values["client"] = GenerativeAiClient(**auth, **client_kwargs)
+            # Import the GenerativeAIClient here so that there will be no error when user import ads.llm
+            # and the install OCI SDK does not support generative AI service yet.
+            client_class = cls._import_client()
+            values["client"] = client_class(**auth, **client_kwargs)
         # Set default compartment ID
         if not values.get("compartment_id"):
             if COMPARTMENT_OCID:
diff --git a/ads/llm/langchain/plugins/contant.py b/ads/llm/langchain/plugins/contant.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2023 Oracle and/or its affiliates.
+# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 from enum import Enum
 
@@ -21,7 +21,7 @@ class StrEnum(str, Enum):
 
 class Task(StrEnum):
     TEXT_GENERATION = "text_generation"
-    SUMMARY_TEXT = "summary_text"
+    TEXT_SUMMARIZATION = "text_summarization"
 
 
 class LengthParam(StrEnum):
@@ -42,8 +42,3 @@ class ExtractivenessParam(StrEnum):
     MEDIUM = "MEDIUM"
     HIGH = "HIGH"
     AUTO = "AUTO"
-
-
-class OCIGenerativeAIModel(StrEnum):
-    COHERE_COMMAND = "cohere.command"
-    COHERE_COMMAND_LIGHT = "cohere.command-light"
diff --git a/ads/llm/langchain/plugins/embeddings.py b/ads/llm/langchain/plugins/embeddings.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2023 Oracle and/or its affiliates.
+# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 from typing import List, Optional
@@ -38,7 +38,10 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]:
         Returns:
             List of embeddings, one for each text.
         """
-        from oci.generative_ai.models import EmbedTextDetails, OnDemandServingMode
+        from oci.generative_ai_inference.models import (
+            EmbedTextDetails,
+            OnDemandServingMode,
+        )
 
         details = EmbedTextDetails(
             compartment_id=self.compartment_id,
diff --git a/ads/llm/langchain/plugins/llm_gen_ai.py b/ads/llm/langchain/plugins/llm_gen_ai.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2023 Oracle and/or its affiliates.
+# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import logging
@@ -10,7 +10,7 @@
 from langchain.callbacks.manager import CallbackManagerForLLMRun
 
 from ads.llm.langchain.plugins.base import BaseLLM, GenerativeAiClientModel
-from ads.llm.langchain.plugins.contant import *
+from ads.llm.langchain.plugins.contant import Task
 
 logger = logging.getLogger(__name__)
 
@@ -32,7 +32,7 @@ class GenerativeAI(GenerativeAiClientModel, BaseLLM):
     """
 
     task: str = "text_generation"
-    """Indicates the task."""
+    """Task can be either text_generation or text_summarization."""
 
     model: Optional[str] = "cohere.command"
     """Model name to use."""
@@ -106,7 +106,7 @@ def _default_params(self) -> Dict[str, Any]:
 
     def _invocation_params(self, stop: Optional[List[str]], **kwargs: Any) -> dict:
         params = self._default_params
-        if self.task == Task.SUMMARY_TEXT:
+        if self.task == Task.TEXT_SUMMARIZATION:
             return {**params}
 
         if self.stop is not None and stop is not None:
@@ -149,11 +149,7 @@ def _call(
         self._print_request(prompt, params)
 
         try:
-            response = (
-                self.completion_with_retry(prompts=[prompt], **params)
-                if self.task == Task.TEXT_GENERATION
-                else self.completion_with_retry(input=prompt, **params)
-            )
+            completion = self.completion_with_retry(prompt=prompt, **params)
         except Exception:
             logger.error(
                 "Error occur when invoking oci service api."
@@ -164,39 +160,95 @@ def _call(
             )
             raise
 
-        completion = self._process_response(response, params.get("num_generations", 1))
-        self._print_response(completion, response)
         return completion
 
-    def _process_response(self, response: Any, num_generations: int = 1) -> str:
-        if self.task == Task.SUMMARY_TEXT:
-            return response.data.summary
+    def _text_generation(self, request_class, serving_mode, **kwargs):
+        from oci.generative_ai_inference.models import (
+            GenerateTextDetails,
+            GenerateTextResult,
+        )
 
-        return (
-            response.data.generated_texts[0][0].text
-            if num_generations == 1
-            else [gen.text for gen in response.data.generated_texts[0]]
+        compartment_id = kwargs.pop("compartment_id")
+        inference_request = request_class(**kwargs)
+        response = self.client.generate_text(
+            GenerateTextDetails(
+                compartment_id=compartment_id,
+                serving_mode=serving_mode,
+                inference_request=inference_request,
+            ),
+            **self.endpoint_kwargs,
+        ).data
+        response: GenerateTextResult
+        return response.inference_response
+
+    def _cohere_completion(self, serving_mode, **kwargs) -> str:
+        from oci.generative_ai_inference.models import (
+            CohereLlmInferenceRequest,
+            CohereLlmInferenceResponse,
         )
 
-    def completion_with_retry(self, **kwargs: Any) -> Any:
-        from oci.generative_ai.models import (
-            GenerateTextDetails,
-            OnDemandServingMode,
-            SummarizeTextDetails,
+        response = self._text_generation(
+            CohereLlmInferenceRequest, serving_mode, **kwargs
         )
+        response: CohereLlmInferenceResponse
+        if kwargs.get("num_generations", 1) == 1:
+            completion = response.generated_texts[0].text
+        else:
+            completion = [result.text for result in response.generated_texts]
+        self._print_response(completion, response)
+        return completion
 
-        # TODO: Add retry logic for OCI
-        # Convert the ``model`` parameter to OCI ``ServingMode``
-        # Note that "ServingMode` is not JSON serializable.
-        kwargs["serving_mode"] = OnDemandServingMode(model_id=self.model)
-        if self.task == Task.TEXT_GENERATION:
-            return self.client.generate_text(
-                GenerateTextDetails(**kwargs), **self.endpoint_kwargs
-            )
+    def _llama_completion(self, serving_mode, **kwargs) -> str:
+        from oci.generative_ai_inference.models import (
+            LlamaLlmInferenceRequest,
+            LlamaLlmInferenceResponse,
+        )
+
+        # truncate and stop_sequence are not supported.
+        kwargs.pop("truncate", None)
+        kwargs.pop("stop_sequences", None)
+        # top_k must be >1 or -1
+        if "top_k" in kwargs and kwargs["top_k"] == 0:
+            kwargs.pop("top_k")
+
+        # top_p must be 1 when temperature is 0
+        if kwargs.get("temperature") == 0:
+            kwargs["top_p"] = 1
+
+        response = self._text_generation(
+            LlamaLlmInferenceRequest, serving_mode, **kwargs
+        )
+        response: LlamaLlmInferenceResponse
+        if kwargs.get("num_generations", 1) == 1:
+            completion = response.choices[0].text
         else:
-            return self.client.summarize_text(
-                SummarizeTextDetails(**kwargs), **self.endpoint_kwargs
-            )
+            completion = [result.text for result in response.choices]
+        self._print_response(completion, response)
+        return completion
+
+    def _cohere_summarize(self, serving_mode, **kwargs) -> str:
+        from oci.generative_ai_inference.models import SummarizeTextDetails
+
+        kwargs["input"] = kwargs.pop("prompt")
+
+        response = self.client.summarize_text(
+            SummarizeTextDetails(serving_mode=serving_mode, **kwargs),
+            **self.endpoint_kwargs,
+        )
+        return response.data.summary
+
+    def completion_with_retry(self, **kwargs: Any) -> Any:
+        from oci.generative_ai_inference.models import OnDemandServingMode
+
+        serving_mode = OnDemandServingMode(model_id=self.model)
+
+        if self.task == Task.TEXT_SUMMARIZATION:
+            return self._cohere_summarize(serving_mode, **kwargs)
+        elif self.model.startswith("cohere"):
+            return self._cohere_completion(serving_mode, **kwargs)
+        elif self.model.startswith("meta.llama"):
+            return self._llama_completion(serving_mode, **kwargs)
+        raise ValueError(f"Model {self.model} is not supported.")
 
     def batch_completion(
         self,
@@ -235,9 +287,9 @@ def batch_completion(
                 responses = gen_ai.batch_completion("Tell me a joke.", num_generations=5)
 
         """
-        if self.task == Task.SUMMARY_TEXT:
+        if self.task == Task.TEXT_SUMMARIZATION:
             raise NotImplementedError(
-                f"task={Task.SUMMARY_TEXT} does not support batch_completion. "
+                f"task={Task.TEXT_SUMMARIZATION} does not support batch_completion. "
             )
 
         return self._call(
diff --git a/docs/source/user_guide/large_language_model/index.rst b/docs/source/user_guide/large_language_model/index.rst
@@ -19,6 +19,9 @@ Integration with LangChain
 **************************
 ADS is designed to work with LangChain, enabling developers to incorporate various LangChain components and models deployed on OCI seamlessly into their applications. Additionally, ADS can package LangChain applications and deploy it as a REST API endpoint using OCI Data Science Model Deployment.
 
+* `Bridging cloud and conversational AI: LangChain and OCI Data Science platform <https://blogs.oracle.com/ai-and-datascience/post/cloud-conversational-ai-langchain-oci-data-science>`_
+* `Deploy LangChain applications as OCI model deployments <https://blogs.oracle.com/ai-and-datascience/post/deploy-langchain-application-as-model-deployment>`_
+
 
 .. admonition:: Installation
   :class: note
diff --git a/docs/source/user_guide/large_language_model/langchain_models.rst b/docs/source/user_guide/large_language_model/langchain_models.rst
@@ -26,7 +26,7 @@ To use the text generation model as LLM in LangChain:
         compartment_id="<compartment_ocid>",
         # Optionally you can specify keyword arguments for the OCI client, e.g. service_endpoint.
         client_kwargs={
-            "service_endpoint": "https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com"
+            "service_endpoint": "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
         },
     )
 
@@ -44,22 +44,22 @@ Here is an example of using prompt template and OCI generative AI LLM to build a
     map_input = RunnableParallel(text=RunnablePassthrough())
     # Template for the input text.
     template = PromptTemplate.from_template(
-        "Translate the text into French.\nText:{text}\nFrench translation: "
+        "Translate English into French. Do not ask any questions.\nEnglish: Hello!\nFrench: "
     )
     llm = GenerativeAI(
         compartment_id="<compartment_ocid>",
         # Optionally you can specify keyword arguments for the OCI client, e.g. service_endpoint.
         client_kwargs={
-            "service_endpoint": "https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com"
+            "service_endpoint": "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
         },
     )
 
     # Build the app as a chain
     translation_app = map_input | template | llm
 
     # Now you have a translation app.
-    translation_app.invoke("How are you?")
-    # "Comment ça va?"
+    translation_app.invoke("Hello!")
+    # "Bonjour!"
 
 Similarly, you can use the embedding model:
 
@@ -71,7 +71,7 @@ Similarly, you can use the embedding model:
         compartment_id="<compartment_ocid>",
         # Optionally you can specify keyword arguments for the OCI client, e.g. service_endpoint.
         client_kwargs={
-            "service_endpoint": "https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com"
+            "service_endpoint": "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
         },
     )
 
@@ -80,6 +80,11 @@ Similarly, you can use the embedding model:
 Integration with Model Deployment
 =================================
 
+.. admonition:: Available in LangChain
+  :class: note
+
+  The same ``OCIModelDeploymentVLLM`` and ``ModelDeploymentTGI`` classes are also `available from LangChain <https://python.langchain.com/docs/integrations/llms/oci_model_deployment_endpoint>`_.
+
 If you deploy open-source or your own LLM on OCI model deployment service using `vLLM <https://docs.vllm.ai/en/latest/>`_ or `HuggingFace TGI <https://huggingface.co/docs/text-generation-inference/index>`_ , you can use the ``ModelDeploymentVLLM`` or ``ModelDeploymentTGI`` to integrate your model with LangChain.
 
 .. code-block:: python3
@@ -115,7 +120,7 @@ By default, the integration uses the same authentication method configured with
         compartment_id="<compartment_ocid>",
         # Optionally you can specify keyword arguments for the OCI client, e.g. service_endpoint.
         client_kwargs={
-            "service_endpoint": "https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com"
+            "service_endpoint": "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
         },
     )
 
@@ -132,6 +137,6 @@ Alternatively, you may use specific authentication for the model:
         compartment_id="<compartment_ocid>",
         # Optionally you can specify keyword arguments for the OCI client, e.g. service_endpoint.
         client_kwargs={
-            "service_endpoint": "https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com"
+            "service_endpoint": "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
         },
-    )
+    )
diff --git a/docs/source/user_guide/large_language_model/retrieval.rst b/docs/source/user_guide/large_language_model/retrieval.rst