Support OCI generative AI inference client.

qiuosier · qiuosier · commit fe3aac6db268 · 2024-01-03T16:58:14.000-05:00
diff --git a/ads/llm/langchain/plugins/base.py b/ads/llm/langchain/plugins/base.py
@@ -77,27 +77,52 @@ class GenerativeAiClientModel(BaseModel):
     client_kwargs: Dict[str, Any] = {}
     """Holds any client parameters for creating GenerativeAiClient"""
 
+    @staticmethod
+    def _import_client_v1():
+        from oci.generative_ai import GenerativeAiClient
+
+        return GenerativeAiClient
+
+    @staticmethod
+    def _import_client_v2():
+        from oci.generative_ai_inference import GenerativeAiInferenceClient
+
+        return GenerativeAiInferenceClient
+
+    @staticmethod
+    def _import_client():
+        import_methods = [
+            GenerativeAiClientModel._import_client_v1,
+            GenerativeAiClientModel._import_client_v2,
+        ]
+        client_class = None
+        for import_client_method in import_methods:
+            try:
+                client_class = import_client_method()
+            except ImportError:
+                pass
+        if not client_class:
+            raise ImportError(
+                "Could not import GenerativeAIClient or GenerativeAiInferenceClient from oci. "
+                "The OCI SDK installed does not support generative AI service."
+            )
+        return client_class
+
     @root_validator()
     def validate_environment(  # pylint: disable=no-self-argument
         cls, values: Dict
     ) -> Dict:
         """Validate that python package exists in environment."""
-        try:
-            # Import the GenerativeAIClient here so that there will be no error when user import ads.llm
-            # and the install OCI SDK does not support generative AI service yet.
-            from oci.generative_ai import GenerativeAiClient
-        except ImportError as ex:
-            raise ImportError(
-                "Could not import GenerativeAIClient from oci. "
-                "The OCI SDK installed does not support generative AI service."
-            ) from ex
         # Initialize client only if user does not pass in client.
         # Users may choose to initialize the OCI client by themselves and pass it into this model.
         if not values.get("client"):
             auth = values.get("auth", {})
             client_kwargs = auth.get("client_kwargs") or {}
             client_kwargs.update(values["client_kwargs"])
-            values["client"] = GenerativeAiClient(**auth, **client_kwargs)
+            # Import the GenerativeAIClient here so that there will be no error when user import ads.llm
+            # and the install OCI SDK does not support generative AI service yet.
+            client_class = cls._import_client()
+            values["client"] = client_class(**auth, **client_kwargs)
         # Set default compartment ID
         if not values.get("compartment_id"):
             if COMPARTMENT_OCID:
diff --git a/ads/llm/langchain/plugins/llm_gen_ai.py b/ads/llm/langchain/plugins/llm_gen_ai.py
@@ -149,8 +149,8 @@ def _call(
         self._print_request(prompt, params)
 
         try:
-            response = (
-                self.completion_with_retry(prompts=[prompt], **params)
+            completion = (
+                self.completion_with_retry(prompt=prompt, **params)
                 if self.task == Task.TEXT_GENERATION
                 else self.completion_with_retry(input=prompt, **params)
             )
@@ -164,8 +164,8 @@ def _call(
             )
             raise
 
-        completion = self._process_response(response, params.get("num_generations", 1))
-        self._print_response(completion, response)
+        # completion = self._process_response(response, params.get("num_generations", 1))
+        # self._print_response(completion, response)
         return completion
 
     def _process_response(self, response: Any, num_generations: int = 1) -> str:
@@ -178,7 +178,7 @@ def _process_response(self, response: Any, num_generations: int = 1) -> str:
             else [gen.text for gen in response.data.generated_texts[0]]
         )
 
-    def completion_with_retry(self, **kwargs: Any) -> Any:
+    def _completion_with_retry_v1(self, **kwargs: Any):
         from oci.generative_ai.models import (
             GenerateTextDetails,
             OnDemandServingMode,
@@ -188,15 +188,79 @@ def completion_with_retry(self, **kwargs: Any) -> Any:
         # TODO: Add retry logic for OCI
         # Convert the ``model`` parameter to OCI ``ServingMode``
         # Note that "ServingMode` is not JSON serializable.
+        kwargs["prompts"] = [kwargs.pop("prompt")]
         kwargs["serving_mode"] = OnDemandServingMode(model_id=self.model)
         if self.task == Task.TEXT_GENERATION:
-            return self.client.generate_text(
+            response = self.client.generate_text(
                 GenerateTextDetails(**kwargs), **self.endpoint_kwargs
             )
+            if kwargs.get("num_generations", 1) == 1:
+                completion = response.data.generated_texts[0][0].text
+            else:
+                completion = [gen.text for gen in response.data.generated_texts[0]]
         else:
-            return self.client.summarize_text(
+            response = self.client.summarize_text(
                 SummarizeTextDetails(**kwargs), **self.endpoint_kwargs
             )
+            completion = response.data.summary
+        self._print_response(completion, response)
+        return completion
+
+    def _completion_with_retry_v2(self, **kwargs: Any):
+        from oci.generative_ai_inference.models import (
+            GenerateTextDetails,
+            OnDemandServingMode,
+            SummarizeTextDetails,
+            CohereLlmInferenceRequest,
+            LlamaLlmInferenceRequest,
+        )
+
+        request_class_mapping = {
+            "cohere": CohereLlmInferenceRequest,
+            "llama": LlamaLlmInferenceRequest,
+        }
+
+        request_class = None
+        for prefix, oci_request_class in request_class_mapping.items():
+            if self.model.startswith(prefix):
+                request_class = oci_request_class
+        if not request_class:
+            raise ValueError(f"Model {self.model} is not supported.")
+
+        if self.model.startswith("llama"):
+            kwargs.pop("truncate", None)
+            kwargs.pop("stop_sequences", None)
+
+        serving_mode = OnDemandServingMode(model_id=self.model)
+        if self.task == Task.TEXT_GENERATION:
+            compartment_id = kwargs.pop("compartment_id")
+            inference_request = request_class(**kwargs)
+            response = self.client.generate_text(
+                GenerateTextDetails(
+                    compartment_id=compartment_id,
+                    serving_mode=serving_mode,
+                    inference_request=inference_request,
+                ),
+                **self.endpoint_kwargs,
+            )
+            if kwargs.get("num_generations", 1) == 1:
+                completion = response.data.inference_response.generated_texts[0].text
+            else:
+                completion = [gen.text for gen in response.data.generated_texts]
+
+        else:
+            response = self.client.summarize_text(
+                SummarizeTextDetails(serving_mode=serving_mode, **kwargs),
+                **self.endpoint_kwargs,
+            )
+            completion = response.data.summary
+        self._print_response(completion, response)
+        return completion
+
+    def completion_with_retry(self, **kwargs: Any) -> Any:
+        if self.client.__class__.__name__ == "GenerativeAiClient":
+            return self._completion_with_retry_v1(**kwargs)
+        return self._completion_with_retry_v2(**kwargs)
 
     def batch_completion(
         self,