Merge commit 'ff08a04a7d7c2183cd72c66bf063be594b5b68c1'

qiuosier · qiuosier · commit 18d43ba3e3bc · 2023-12-06T14:38:55.000-05:00
Conflicts:
	docs/source/user_guide/large_language_model/deploy_langchain_application.rst
diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt
@@ -54,6 +54,12 @@ docker
 * Source code: https://github.com/docker
 * Project home: https://www.docker.com/
 
+evaluate
+* Copyright 2023 HuggingFace Inc.
+* License: Apache-2.0 license
+* Source code: https://github.com/huggingface/evaluate
+* Project home: https://huggingface.co/docs/evaluate/index
+
 fastavro
 * Copyright (c) 2011 Miki Tebeka
 * License: MIT License
@@ -133,6 +139,12 @@ jinja2
 * Source code: https://github.com/pallets/jinja/
 * Project home: https://palletsprojects.com/p/jinja/
 
+langchain
+* Copyright (c) 2023 LangChain, Inc.
+* License: MIT license
+* Source code: https://github.com/langchain-ai/langchain
+* Project home: https://www.langchain.com/
+
 lightgbm
 * Copyright (c) 2023 Microsoft Corporation
 * License: MIT license
diff --git a/ads/llm/serialize.py b/ads/llm/serialize.py
@@ -15,7 +15,6 @@
 from langchain.chains import RetrievalQA
 from langchain.chains.loading import load_chain_from_config
 from langchain.llms import loading
-from langchain.load import dumpd
 from langchain.load.load import Reviver
 from langchain.load.serializable import Serializable
 from langchain.schema.runnable import RunnableParallel
diff --git a/ads/llm/serializers/retrieval_qa.py b/ads/llm/serializers/retrieval_qa.py
@@ -1,3 +1,9 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2023 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
 import base64
 import json
 import os
@@ -44,18 +50,18 @@ def save(obj):
         serialized["type"] = "constructor"
         serialized["_type"] = OpenSearchVectorDBSerializer.type()
         kwargs = {}
-        for key, val in obj.__dict__.items():
-            if key == "client":
-                if isinstance(val, OpenSearch):
-                    client_info = val.transport.hosts[0]
+        for component_name, component in obj.__dict__.items():
+            if component_name == "client":
+                if isinstance(component, OpenSearch):
+                    client_info = component.transport.hosts[0]
                     opensearch_url = (
                         f"https://{client_info['host']}:{client_info['port']}"
                     )
                     kwargs.update({"opensearch_url": opensearch_url})
                 else:
                     raise NotImplementedError("Only support OpenSearch client.")
                 continue
-            kwargs[key] = dump(val)
+            kwargs[component_name] = dump(component)
         serialized["kwargs"] = kwargs
         return serialized
 
diff --git a/docs/source/user_guide/large_language_model/deploy_langchain_application.rst b/docs/source/user_guide/large_language_model/deploy_langchain_application.rst
@@ -68,15 +68,18 @@ In this example, we're using a temporary folder generated by ``tempfile.mkdtemp(
 Prepare the Model Artifacts
 ***************************
 
-Call ``prepare()`` from ``ChainDeployment`` to generate the ``score.py`` and serialize the LangChain application to ``chain.yaml`` file under ``artifact_dir`` folder.
-Parameters ``inference_conda_env`` and ``inference_python_version`` are passed to define the conda environment where your LangChain application will be running on OCI.
-Here we're using ``pytorch21_p39_gpu_v1`` with python 3.9.
+Call ``prepare`` from ``ChainDeployment`` to generate the ``score.py`` and serialize the LangChain application to ``chain.yaml`` file under ``artifact_dir`` folder. 
+Parameters ``inference_conda_env`` and ``inference_python_version`` are passed to define the conda environment where your LangChain application will be running on OCI cloud. 
+Here, replace ``custom_conda_environment_uri`` with your conda environment uri that has the latest ADS 2.9.1 and replace ``python_version`` with your conda environment python version.
+
+.. note::
+    For how to customize and publish conda environment, take reference to `Publishing a Conda Environment to an Object Storage Bucket <https://docs.oracle.com/en-us/iaas/data-science/using/conda_publishs_object.htm>`_
 
 .. code-block:: python3
 
     chain_deployment.prepare(
-        inference_conda_env="pytorch21_p39_gpu_v1",
-        inference_python_version="3.9",
+        inference_conda_env="<custom_conda_environment_uri>",
+        inference_python_version="<python_version>",
     )
 
 Below is the ``chain.yaml`` file that was saved from ``llm_chain`` object.
diff --git a/docs/source/user_guide/large_language_model/retrieval.rst b/docs/source/user_guide/large_language_model/retrieval.rst
@@ -1,20 +1,167 @@
 .. _vector_store:
 
-########################
-Vector Store integration
-########################
+#################################################
+Integration with OCI Generative AI and OpenSearch
+#################################################
 
 .. versionadded:: 2.9.1
 
-Current version of Langchain does not support serialization of any vector stores. This will be a problem when you want to deploy a langchain application with the vector store being one of the components using data science model deployment service. To solve this problem, we extended our support of vector stores serialization:
+OCI Generative Embedding
+========================
+
+The Generative AI Embedding Models convert textual input - ranging from phrases and sentences to entire paragraphs - into a structured format known as embeddings. Each piece of text input is transformed into a numerical array consisting of 1024 distinct numbers. The following pretrained model is available for creating text embeddings:
+
+- embed-english-light-v2.0
+
+To find out the latest supported embedding model, check the `documentation <https://docs.oracle.com/en-us/iaas/Content/generative-ai/embed-models.htm>`_.
+
+The following code snippet shows how to use the Generative AI Embedding Models:
+
+.. code-block:: python3
+
+    import ads
+    ads.set_auth("resource_principal")
+
+    oci_embedings = GenerativeAIEmbeddings(
+        compartment_id="ocid1.compartment.####",
+        client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA.
+    )
+
+Retrieval QA with OpenSearch
+============================
+
+OCI OpenSearch
+--------------
+
+OCI Search with OpenSearch is a fully managed service which makes searching vast datasets and getting quick results fast and easy. In large language model world, you can use it as a vector store to store your documents and conduct keyword search or semantic search with help of a text embedding model. For a complete walk through on spinning up a OCI OpenSearch Cluster, see `Search and visualize data using OCI Search Service with OpenSearch <https://docs.oracle.com/en/learn/oci-opensearch/index.html#introduction>`_. 
+
+Semantic Search with OCI OpenSearch
+-----------------------------------
+
+With the OCI OpenSearch and OCI Generative Embedding, you can do semantic search by using langchain. The following code snippet shows how to do semantic search with OCI OpenSearch:
+
+.. code-block:: python3
+
+    import os
+    os.environ['OCI_OPENSEARCH_USERNAME'] = "username"
+    os.environ['OCI_OPENSEARCH_PASSWORD'] = "password"
+    os.environ['OCI_OPENSEARCH_VERIFY_CERTS'] = "False" 
+
+    # specify the index name that you would like to conduct semantic search on.
+    INDEX_NAME = "your_index_name" 
+
+    opensearch_vector_search = OpenSearchVectorSearch(
+        "https://localhost:9200", # your oci opensearch private endpoint
+        embedding_function=oci_embedings,
+        index_name=INDEX_NAME,
+        engine="lucene",
+        http_auth=(os.environ["OCI_OPENSEARCH_USERNAME"], os.environ["OCI_OPENSEARCH_PASSWORD"]),
+        verify_certs=os.environ["OCI_OPENSEARCH_VERIFY_CERTS"],
+    )
+    opensearch_vector_search.similarity_search("your query", k=2, size=2)
+
+Retrieval QA Using OCI OpenSearch as a Retriever
+------------------------------------------------
+
+Since the search result usually cannot be directly used to answer a specific question. More practical solution is to send the origiral query along with the searched results to a Large Language model to get a more coherent answer. You can also use OCI OpenSearch as a retriever for retrieval QA. The following code snippet shows how to use OCI OpenSearch as a retriever:
+
+.. code-block:: python3
+
+    from langchain.chains import RetrievalQA
+    from ads.llm import GenerativeAI
+
+    ads.set_auth("resource_principal")
+    
+    oci_llm = GenerativeAI(
+        compartment_id="ocid1.compartment.####",
+        client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA.
+    )
+
+    retriever = opensearch_vector_search.as_retriever(search_kwargs={"vector_field": "embeds", 
+                                                                    "text_field": "text", 
+                                                                    "k": 3, 
+                                                                    "size": 3})
+    qa = RetrievalQA.from_chain_type(
+        llm=oci_llm,
+        chain_type="stuff",
+        retriever=retriever,
+        chain_type_kwargs={
+            "verbose": True
+        }
+    )
+    qa.run("your question")
+
+Retrieval QA with FAISS
+=======================
+
+FAISS as Vector DB
+------------------
+
+A lot of the time, your documents are not that large and you dont have a OCI OpenSearch cluster set up. In that case, you can use ``FAISS`` as your in-memory vector store, which can also do similarty search very efficiently. 
+
+The following code snippet shows how to use ``FAISS`` along with OCI Embedding Model to do semantic search:
+
+.. code-block:: python3
+
+    from langchain.document_loaders import TextLoader
+    from langchain.text_splitter import CharacterTextSplitter
+    from langchain.vectorstores import FAISS
+
+    loader = TextLoader("your.txt")
+    documents = loader.load()
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
+    docs = text_splitter.split_documents(documents)
+
+    l = len(docs)
+    embeddings = []
+    for i in range(l // 16 + 1):
+        subdocs = [item.page_content for item in docs[i * 16: (i + 1) * 16]]
+        embeddings.extend(oci_embedings.embed_documents(subdocs))
+
+    texts = [item.page_content for item in docs]
+    text_embedding_pairs = [(text, embed) for text, embed in  zip(texts, embeddings)]
+    db = FAISS.from_embeddings(text_embedding_pairs, oci_embedings)
+    db.similarity_search("your query", k=2, size=2)
+
+Retrieval QA Using FAISS Vector Store as a Retriever
+----------------------------------------------------
+
+Similarly, you can use FAISS Vector Store as a retriever to build a retrieval QA engine using langchain. The following code snippet shows how to use OCI OpenSearch as a retriever:
+
+.. code-block:: python3
+
+    from langchain.chains import RetrievalQA
+    from ads.llm import GenerativeAI
+
+    ads.set_auth("resource_principal")
+    
+    oci_llm = GenerativeAI(
+        compartment_id="ocid1.compartment.####",
+        client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA.
+    )
+    retriever = db.as_retriever()
+    qa = RetrievalQA.from_chain_type(
+        llm=oci_llm,
+        chain_type="stuff",
+        retriever=retriever,
+        chain_type_kwargs={
+            "verbose": True
+        }
+    )
+    qa.run("your question")
+
+Deployment of Retrieval QA
+==========================
+
+As of version 0.0.346, Langchain does not support serialization of any vector stores. This will be a problem when you want to deploy a Retrieval QA langchain application. To solve this problem, we extended our support of vector stores serialization:
 
 - ``OpenSearchVectorSearch``
 - ``FAISS``
 
 OpenSearchVectorSearch Serialization
 ------------------------------------
 
-langchain does not automatically support serialization of ``OpenSearchVectorSearch``. However, ADS provides a way to serialize ``OpenSearchVectorSearch``. To serialize ``OpenSearchVectorSearch``, you need to use environment variables to pass in the credentials. The following variables can be passed in through the corresponding environment variables:
+langchain does not automatically support serialization of ``OpenSearchVectorSearch``. However, ADS provides a way to serialize ``OpenSearchVectorSearch``. To serialize ``OpenSearchVectorSearch``, you need to use environment variables to store the credentials. The following variables can be passed in through the corresponding environment variables:
 
 - http_auth: (``OCI_OPENSEARCH_USERNAME``, ``OCI_OPENSEARCH_PASSWORD``)
 - verify_certs: ``OCI_OPENSEARCH_VERIFY_CERTS``
@@ -52,10 +199,10 @@ During deployment, it is very important that you remember to pass in those envir
                                     "OCI_OPENSEARCH_PASSWORD": "<oci_opensearch_password>",
                                     "OCI_OPENSEARCH_VERIFY_CERTS": "<oci_opensearch_verify_certs>",)
 
-OpenSearchVectorSearch Deployment
----------------------------------
+Deployment of Retrieval QA with OpenSearch
+------------------------------------------
 
-Here is an example code snippet for OpenSearchVectorSearch deployment:
+Here is an example code snippet for deployment of Retrieval QA using OpenSearch as a retriever:
 
 .. code-block:: python3
 
@@ -66,12 +213,12 @@ Here is an example code snippet for OpenSearchVectorSearch deployment:
     ads.set_auth("resource_principal")
 
     oci_embedings = GenerativeAIEmbeddings(
-        compartment_id="ocid1.compartment.oc1..aaaaaaaapvb3hearqum6wjvlcpzm5ptfxqa7xfftpth4h72xx46ygavkqteq",
+        compartment_id="ocid1.compartment.####",
         client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA.
     )
 
     oci_llm = GenerativeAI(
-        compartment_id="ocid1.compartment.oc1..aaaaaaaapvb3hearqum6wjvlcpzm5ptfxqa7xfftpth4h72xx46ygavkqteq",
+        compartment_id="ocid1.compartment.####",
         client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA.
     )
 
@@ -95,8 +242,7 @@ Here is an example code snippet for OpenSearchVectorSearch deployment:
     retriever = opensearch_vector_search.as_retriever(search_kwargs={"vector_field": "embeds", 
                                                                     "text_field": "text", 
                                                                     "k": 3, 
-                                                                    "size": 3},
-                                                    max_tokens_limit=1000)
+                                                                    "size": 3})
     qa = RetrievalQA.from_chain_type(
         llm=oci_llm,
         chain_type="stuff",
@@ -108,7 +254,8 @@ Here is an example code snippet for OpenSearchVectorSearch deployment:
     from ads.llm.deploy import ChainDeployment
     model = ChainDeployment(qa)
     model.prepare(force_overwrite=True,
-            inference_conda_env="your_conda_pack",
+            inference_conda_env="<custom_conda_environment_uri>",
+            inference_python_version="<python_version>",
             )
 
     model.save()
@@ -124,16 +271,10 @@ Here is an example code snippet for OpenSearchVectorSearch deployment:
     model.predict("your prompt")
 
 
-FAISS Serialization
--------------------
-
-If your documents are not too large and you dont have a OCI OpenSearch cluster, you can use ``FAISS`` as your in-memory vector store, which can also do similarty search very efficiently. For ``FAISS``, you can just use it and deploy it as it is.
+Deployment of Retrieval QA with FAISS
+-------------------------------------
 
-
-FAISS Deployment
-----------------
-
-Here is an example code snippet for FAISS deployment:
+Here is an example code snippet for deployment of Retrieval QA using FAISS as a retriever:
 
 .. code-block:: python3
 
@@ -181,8 +322,10 @@ Here is an example code snippet for FAISS deployment:
     )
 
     from ads.llm.deploy import ChainDeployment
+    model = ChainDeployment(qa)
     model.prepare(force_overwrite=True,
-            inference_conda_env="your_conda_pack",
+            inference_conda_env="<custom_conda_environment_uri>",
+            inference_python_version="<python_version>",
             )
 
     model.save()