Update LangChain Tests (#498)

qiuosier · web-flow · commit 68c9ec4ecea0 · 2023-12-13T14:56:55.000-05:00
diff --git a/ads/jobs/templates/driver_utils.py b/ads/jobs/templates/driver_utils.py
@@ -413,7 +413,7 @@ def run_command(
                     logger.log(level=level, msg=msg)
             # Add a small delay so that
             # outputs from the subsequent code will have different timestamp for oci logging
-            time.sleep(0.05)
+            time.sleep(0.02)
         if check and process.returncode != 0:
             # If there is an error, exit the main process with the same return code.
             sys.exit(process.returncode)
diff --git a/tests/unitary/with_extras/langchain/test_serialization.py b/tests/unitary/with_extras/langchain/test_serialization.py
@@ -6,6 +6,7 @@
 
 
 import os
+from copy import deepcopy
 from unittest import TestCase, mock, SkipTest
 
 from langchain.llms import Cohere
@@ -25,6 +26,11 @@
 class ChainSerializationTest(TestCase):
     """Contains tests for chain serialization."""
 
+    # LangChain is updating frequently on the module organization,
+    # mainly affecting the id field of the serialization.
+    # In the test, we will not check the id field of some components.
+    # We expect users to use the same LangChain version for serialize and de-serialize
+
     def setUp(self) -> None:
         self.maxDiff = None
         return super().setUp()
@@ -75,7 +81,6 @@ def setUp(self) -> None:
             "prompt": {
                 "lc": 1,
                 "type": "constructor",
-                "id": ["langchain_core", "prompts", "prompt", "PromptTemplate"],
                 "kwargs": {
                     "input_variables": ["subject"],
                     "template": "Tell me a joke about {subject}",
@@ -118,12 +123,10 @@ def setUp(self) -> None:
     EXPECTED_RUNNABLE_SEQUENCE = {
         "lc": 1,
         "type": "constructor",
-        "id": ["langchain_core", "runnables", "RunnableSequence"],
         "kwargs": {
             "first": {
                 "lc": 1,
                 "type": "constructor",
-                "id": ["langchain_core", "runnables", "RunnableParallel"],
                 "kwargs": {
                     "steps": {
                         "text": {
@@ -144,7 +147,6 @@ def setUp(self) -> None:
                 {
                     "lc": 1,
                     "type": "constructor",
-                    "id": ["langchain_core", "prompts", "prompt", "PromptTemplate"],
                     "kwargs": {
                         "input_variables": ["subject"],
                         "template": "Tell me a joke about {subject}",
@@ -185,7 +187,10 @@ def test_llm_chain_serialization_with_oci(self):
         template = PromptTemplate.from_template(self.PROMPT_TEMPLATE)
         llm_chain = LLMChain(prompt=template, llm=llm)
         serialized = dump(llm_chain)
-        self.assertEqual(serialized, self.EXPECTED_LLM_CHAIN_WITH_OCI_MD)
+        # Do not check the ID field.
+        expected = deepcopy(self.EXPECTED_LLM_CHAIN_WITH_OCI_MD)
+        expected["kwargs"]["prompt"]["id"] = serialized["kwargs"]["prompt"]["id"]
+        self.assertEqual(serialized, expected)
         llm_chain = load(serialized)
         self.assertIsInstance(llm_chain, LLMChain)
         self.assertIsInstance(llm_chain.prompt, PromptTemplate)
@@ -202,8 +207,8 @@ def test_oci_gen_ai_serialization(self):
                 compartment_id=self.COMPARTMENT_ID,
                 client_kwargs=self.GEN_AI_KWARGS,
             )
-        except ImportError:
-            raise SkipTest("OCI SDK does not support Generative AI.")
+        except ImportError as ex:
+            raise SkipTest("OCI SDK does not support Generative AI.") from ex
         serialized = dump(llm)
         self.assertEqual(serialized, self.EXPECTED_GEN_AI_LLM)
         llm = load(serialized)
@@ -216,8 +221,8 @@ def test_gen_ai_embeddings_serialization(self):
             embeddings = GenerativeAIEmbeddings(
                 compartment_id=self.COMPARTMENT_ID, client_kwargs=self.GEN_AI_KWARGS
             )
-        except ImportError:
-            raise SkipTest("OCI SDK does not support Generative AI.")
+        except ImportError as ex:
+            raise SkipTest("OCI SDK does not support Generative AI.") from ex
         serialized = dump(embeddings)
         self.assertEqual(serialized, self.EXPECTED_GEN_AI_EMBEDDINGS)
         embeddings = load(serialized)
@@ -232,7 +237,15 @@ def test_runnable_sequence_serialization(self):
 
         chain = map_input | template | llm
         serialized = dump(chain)
-        self.assertEqual(serialized, self.EXPECTED_RUNNABLE_SEQUENCE)
+        # Do not check the ID fields.
+        expected = deepcopy(self.EXPECTED_RUNNABLE_SEQUENCE)
+        expected["id"] = serialized["id"]
+        expected["kwargs"]["first"]["id"] = serialized["kwargs"]["first"]["id"]
+        expected["kwargs"]["first"]["kwargs"]["steps"]["text"]["id"] = serialized[
+            "kwargs"
+        ]["first"]["kwargs"]["steps"]["text"]["id"]
+        expected["kwargs"]["middle"][0]["id"] = serialized["kwargs"]["middle"][0]["id"]
+        self.assertEqual(serialized, expected)
         chain = load(serialized)
         self.assertEqual(len(chain.steps), 3)
         self.assertIsInstance(chain.steps[0], RunnableParallel)
diff --git a/tests/unitary/with_extras/langchain/test_serializers.py b/tests/unitary/with_extras/langchain/test_serializers.py
@@ -13,15 +13,13 @@
 from langchain.schema.embeddings import Embeddings
 from langchain.vectorstores import OpenSearchVectorSearch, FAISS
 from langchain.chains import RetrievalQA
-from langchain import llms
-from langchain.llms import loading
+from langchain.llms import Cohere
 
 from ads.llm.serializers.retrieval_qa import (
     OpenSearchVectorDBSerializer,
     FaissSerializer,
     RetrievalQASerializer,
 )
-from tests.unitary.with_extras.langchain.test_guardrails import FakeLLM
 
 
 class FakeEmbeddings(Serializable, Embeddings):
@@ -82,8 +80,7 @@ def test_type(self):
 
     def test_save(self):
         serialized = self.serializer.save(self.opensearch)
-        assert serialized["id"] == [
-            "langchain",
+        assert serialized["id"][-3:] == [
             "vectorstores",
             "opensearch_vector_search",
             "OpenSearchVectorSearch",
@@ -135,7 +132,7 @@ class TestRetrievalQASerializer(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         # Create a sample RetrieverQA object for testing
-        cls.llm = FakeLLM()
+        cls.llm = Cohere(cohere_api_key="api_key")
         cls.embeddings = FakeEmbeddings()
         text_embedding_pair = [("test", [1] * 1024)]
         try:
@@ -148,18 +145,6 @@ def setUpClass(cls):
             llm=cls.llm, chain_type="stuff", retriever=cls.retriever
         )
         cls.serializer = RetrievalQASerializer()
-        from copy import deepcopy
-
-        cls.original_type_to_cls_dict = deepcopy(llms.get_type_to_cls_dict())
-        __lc_llm_dict = llms.get_type_to_cls_dict()
-        __lc_llm_dict["custom_embedding"] = lambda: FakeEmbeddings
-        __lc_llm_dict["custom"] = lambda: FakeLLM
-
-        def __new_type_to_cls_dict():
-            return __lc_llm_dict
-
-        llms.get_type_to_cls_dict = __new_type_to_cls_dict
-        loading.get_type_to_cls_dict = __new_type_to_cls_dict
 
     def test_type(self):
         self.assertEqual(self.serializer.type(), "retrieval_qa")
@@ -176,6 +161,7 @@ def test_save(self):
         self.assertIn("retriever_kwargs", serialized)
         serialized["vectordb"]["class"] == "FAISS"
 
+    @mock.patch.dict(os.environ, {"COHERE_API_KEY": "api_key"})
     def test_load(self):
         # Create a sample config dictionary
         serialized = self.serializer.save(self.qa)
@@ -186,12 +172,6 @@ def test_load(self):
         # Ensure that the deserialized object is an instance of RetrieverQA
         self.assertIsInstance(deserialized, RetrievalQA)
 
-    @classmethod
-    def tearDownClass(cls) -> None:
-        llms.get_type_to_cls_dict = cls.original_type_to_cls_dict
-        loading.get_type_to_cls_dict = cls.original_type_to_cls_dict
-        return super().tearDownClass()
-
 
 if __name__ == "__main__":
     unittest.main()