Skip to content

Commit 7e86d16

Browse files
authored
Add Copyright and Documentation for Retrieval QA and Vector DB (#478)
2 parents 9963f11 + 8f9c7f5 commit 7e86d16

File tree

5 files changed

+246
-7
lines changed

5 files changed

+246
-7
lines changed

THIRD_PARTY_LICENSES.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ docker
5454
* Source code: https://github.com/docker
5555
* Project home: https://www.docker.com/
5656

57+
evaluate
58+
* Copyright 2023 HuggingFace Inc.
59+
* License: Apache-2.0 license
60+
* Source code: https://github.com/huggingface/evaluate
61+
* Project home: https://huggingface.co/docs/evaluate/index
62+
5763
fastavro
5864
* Copyright (c) 2011 Miki Tebeka
5965
* License: MIT License
@@ -133,6 +139,12 @@ jinja2
133139
* Source code: https://github.com/pallets/jinja/
134140
* Project home: https://palletsprojects.com/p/jinja/
135141

142+
langchain
143+
* Copyright (c) 2023 LangChain, Inc.
144+
* License: MIT license
145+
* Source code: https://github.com/langchain-ai/langchain
146+
* Project home: https://www.langchain.com/
147+
136148
lightgbm
137149
* Copyright (c) 2023 Microsoft Corporation
138150
* License: MIT license

ads/llm/serialize.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from langchain.chains import RetrievalQA
1616
from langchain.chains.loading import load_chain_from_config
1717
from langchain.llms import loading
18-
from langchain.load import dumpd
1918
from langchain.load.load import Reviver
2019
from langchain.load.serializable import Serializable
2120
from langchain.schema.runnable import RunnableParallel

ads/llm/serializers/retrieval_qa.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2023 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
17
import base64
28
import json
39
import os
@@ -30,7 +36,7 @@ def load(config: dict, **kwargs):
3036
os.environ.get("OCI_OPENSEARCH_PASSWORD", None),
3137
),
3238
verify_certs=True
33-
if os.environ.get("OCI_OPENSEARCH_VERIFY_CERTS", None).lower() == "true"
39+
if os.environ.get("OCI_OPENSEARCH_VERIFY_CERTS", None) == "True"
3440
else False,
3541
ca_certs=os.environ.get("OCI_OPENSEARCH_CA_CERTS", None),
3642
)
@@ -44,18 +50,18 @@ def save(obj):
4450
serialized["type"] = "constructor"
4551
serialized["_type"] = OpenSearchVectorDBSerializer.type()
4652
kwargs = {}
47-
for key, val in obj.__dict__.items():
48-
if key == "client":
49-
if isinstance(val, OpenSearch):
50-
client_info = val.transport.hosts[0]
53+
for component_name, component in obj.__dict__.items():
54+
if component_name == "client":
55+
if isinstance(component, OpenSearch):
56+
client_info = component.transport.hosts[0]
5157
opensearch_url = (
5258
f"https://{client_info['host']}:{client_info['port']}"
5359
)
5460
kwargs.update({"opensearch_url": opensearch_url})
5561
else:
5662
raise NotImplementedError("Only support OpenSearch client.")
5763
continue
58-
kwargs[key] = dump(val)
64+
kwargs[component_name] = dump(component)
5965
serialized["kwargs"] = kwargs
6066
return serialized
6167

docs/source/user_guide/large_language_model/index.rst

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,37 @@
44
Large Language Model
55
####################
66

7+
Oracle Cloud Infrastructure (OCI) provides fully managed infrastructure to work with Large Language Model (LLM).
8+
9+
Train and Deploy LLM
10+
********************
11+
You can train LLM at scale with multi-node and multi-GPU using `Data Science Jobs (Jobs) <https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm>`_, and deploy it with `Data Science Model Deployment (Model Deployments) <https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm>`_. The following blog posts show examples training and deploying Llama2 models:
12+
13+
* `Multi-GPU multinode fine-tuning Llama2 on OCI Data Science <https://blogs.oracle.com/ai-and-datascience/post/multi-gpu-multi-node-finetuning-llama2-oci>`_
14+
* `Deploy Llama 2 in OCI Data Science <https://blogs.oracle.com/ai-and-datascience/post/llama2-oci-data-science-cloud-platform>`_
15+
* `Quantize and deploy Llama 2 70B on cost-effective NVIDIA A10 Tensor Core GPUs in OCI Data Science <https://blogs.oracle.com/ai-and-datascience/post/quantize-deploy-llama2-70b-costeffective-a10s-oci>`_
16+
17+
18+
Integration with LangChain
19+
**************************
20+
ADS is designed to work with LangChain, enabling developers to incorporate various LangChain components and models deployed on OCI seamlessly into their applications. Additionally, ADS can package LangChain applications and deploy it as a REST API endpoint using OCI Data Science Model Deployment.
21+
22+
23+
.. admonition:: Installation
24+
:class: note
25+
26+
Install ADS and other dependencies for LLM integrations.
27+
28+
.. code-block:: bash
29+
30+
$ python3 -m pip install "oracle-ads[llm]"
31+
32+
733
834
.. toctree::
935
:hidden:
1036
:maxdepth: 2
1137

38+
training_llm
1239
deploy_langchain_application
40+
retrieval
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
.. _vector_store:
2+
3+
########################
4+
Vector Store integration
5+
########################
6+
7+
.. versionadded:: 2.9.1
8+
9+
Current version of Langchain does not support serialization of any vector stores. This will be a problem when you want to deploy a langchain application with the vector store being one of the components using data science model deployment service. To solve this problem, we extended our support of vector stores serialization:
10+
11+
- ``OpenSearchVectorSearch``
12+
- ``FAISS``
13+
14+
OpenSearchVectorSearch Serialization
15+
------------------------------------
16+
17+
langchain does not automatically support serialization of ``OpenSearchVectorSearch``. However, ADS provides a way to serialize ``OpenSearchVectorSearch``. To serialize ``OpenSearchVectorSearch``, you need to use environment variables to pass in the credentials. The following variables can be passed in through the corresponding environment variables:
18+
19+
- http_auth: (``OCI_OPENSEARCH_USERNAME``, ``OCI_OPENSEARCH_PASSWORD``)
20+
- verify_certs: ``OCI_OPENSEARCH_VERIFY_CERTS``
21+
- ca_certs: ``OCI_OPENSEARCH_CA_CERTS``
22+
23+
The following code snippet shows how to use ``OpenSearchVectorSearch`` with environment variables:
24+
25+
.. code-block:: python3
26+
27+
os.environ['OCI_OPENSEARCH_USERNAME'] = "username"
28+
os.environ['OCI_OPENSEARCH_PASSWORD'] = "password"
29+
os.environ['OCI_OPENSEARCH_VERIFY_CERTS'] = "False"
30+
31+
INDEX_NAME = "your_index_name"
32+
opensearch_vector_search = OpenSearchVectorSearch(
33+
"https://localhost:9200",
34+
embedding_function=oci_embedings,
35+
index_name=INDEX_NAME,
36+
engine="lucene",
37+
http_auth=(os.environ["OCI_OPENSEARCH_USERNAME"], os.environ["OCI_OPENSEARCH_PASSWORD"]),
38+
verify_certs=os.environ["OCI_OPENSEARCH_VERIFY_CERTS"],
39+
)
40+
41+
.. admonition:: Deployment
42+
:class: note
43+
44+
During deployment, it is very important that you remember to pass in those environment variables as well:
45+
46+
.. code-block:: python3
47+
48+
.deploy(deployment_log_group_id="ocid1.loggroup.####",
49+
deployment_access_log_id="ocid1.log.####",
50+
deployment_predict_log_id="ocid1.log.####",
51+
environment_variables={"OCI_OPENSEARCH_USERNAME":"<oci_opensearch_username>",
52+
"OCI_OPENSEARCH_PASSWORD": "<oci_opensearch_password>",
53+
"OCI_OPENSEARCH_VERIFY_CERTS": "<oci_opensearch_verify_certs>",)
54+
55+
OpenSearchVectorSearch Deployment
56+
---------------------------------
57+
58+
Here is an example code snippet for OpenSearchVectorSearch deployment:
59+
60+
.. code-block:: python3
61+
62+
from langchain.vectorstores import OpenSearchVectorSearch
63+
from ads.llm import GenerativeAIEmbeddings, GenerativeAI
64+
import ads
65+
66+
ads.set_auth("resource_principal")
67+
68+
oci_embedings = GenerativeAIEmbeddings(
69+
compartment_id="ocid1.compartment.oc1..aaaaaaaapvb3hearqum6wjvlcpzm5ptfxqa7xfftpth4h72xx46ygavkqteq",
70+
client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA.
71+
)
72+
73+
oci_llm = GenerativeAI(
74+
compartment_id="ocid1.compartment.oc1..aaaaaaaapvb3hearqum6wjvlcpzm5ptfxqa7xfftpth4h72xx46ygavkqteq",
75+
client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA.
76+
)
77+
78+
import os
79+
os.environ['OCI_OPENSEARCH_USERNAME'] = "username"
80+
os.environ['OCI_OPENSEARCH_PASSWORD'] = "password"
81+
os.environ['OCI_OPENSEARCH_VERIFY_CERTS'] = "True" # make sure this is capitalized.
82+
os.environ['OCI_OPENSEARCH_CA_CERTS'] = "path/to/oci_opensearch_ca.pem"
83+
84+
INDEX_NAME = "your_index_name"
85+
opensearch_vector_search = OpenSearchVectorSearch(
86+
"https://localhost:9200", # your endpoint
87+
embedding_function=oci_embedings,
88+
index_name=INDEX_NAME,
89+
engine="lucene",
90+
http_auth=(os.environ["OCI_OPENSEARCH_USERNAME"], os.environ["OCI_OPENSEARCH_PASSWORD"]),
91+
verify_certs=os.environ["OCI_OPENSEARCH_VERIFY_CERTS"],
92+
ca_certs=os.environ["OCI_OPENSEARCH_CA_CERTS"],
93+
)
94+
from langchain.chains import RetrievalQA
95+
retriever = opensearch_vector_search.as_retriever(search_kwargs={"vector_field": "embeds",
96+
"text_field": "text",
97+
"k": 3,
98+
"size": 3})
99+
qa = RetrievalQA.from_chain_type(
100+
llm=oci_llm,
101+
chain_type="stuff",
102+
retriever=retriever,
103+
chain_type_kwargs={
104+
"verbose": True
105+
}
106+
)
107+
from ads.llm.deploy import ChainDeployment
108+
model = ChainDeployment(qa)
109+
model.prepare(force_overwrite=True,
110+
inference_conda_env="your_conda_pack",
111+
)
112+
113+
model.save()
114+
res = model.verify("your prompt")
115+
model.deploy(deployment_log_group_id="ocid1.loggroup.####",
116+
deployment_access_log_id="ocid1.log.####",
117+
deployment_predict_log_id="ocid1.log.####",
118+
environment_variables={"OCI_OPENSEARCH_USERNAME":"<oci_opensearch_username>",
119+
"OCI_OPENSEARCH_PASSWORD": "<oci_opensearch_password>",
120+
"OCI_OPENSEARCH_VERIFY_CERTS": "<oci_opensearch_verify_certs>",
121+
"OCI_OPENSEARCH_CA_CERTS": "<oci_opensearch_ca_certs>"},)
122+
123+
model.predict("your prompt")
124+
125+
126+
FAISS Serialization
127+
-------------------
128+
129+
If your documents are not too large and you dont have a OCI OpenSearch cluster, you can use ``FAISS`` as your in-memory vector store, which can also do similarty search very efficiently. For ``FAISS``, you can just use it and deploy it as it is.
130+
131+
132+
FAISS Deployment
133+
----------------
134+
135+
Here is an example code snippet for FAISS deployment:
136+
137+
.. code-block:: python3
138+
139+
import ads
140+
from ads.llm import GenerativeAIEmbeddings, GenerativeAI
141+
from langchain.document_loaders import TextLoader
142+
from langchain.text_splitter import CharacterTextSplitter
143+
from langchain.vectorstores import FAISS
144+
from langchain.chains import RetrievalQA
145+
146+
ads.set_auth("resource_principal")
147+
oci_embedings = GenerativeAIEmbeddings(
148+
compartment_id="ocid1.compartment.####",
149+
client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA.
150+
)
151+
152+
oci_llm = GenerativeAI(
153+
compartment_id="ocid1.compartment.####",
154+
client_kwargs=dict(service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com") # this can be omitted after Generative AI service is GA.
155+
)
156+
157+
loader = TextLoader("your.txt")
158+
documents = loader.load()
159+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
160+
docs = text_splitter.split_documents(documents)
161+
162+
l = len(docs)
163+
embeddings = []
164+
for i in range(l // 16 + 1):
165+
subdocs = [item.page_content for item in docs[i * 16: (i + 1) * 16]]
166+
embeddings.extend(oci_embedings.embed_documents(subdocs))
167+
168+
texts = [item.page_content for item in docs]
169+
text_embedding_pairs = [(text, embed) for text, embed in zip(texts, embeddings)]
170+
db = FAISS.from_embeddings(text_embedding_pairs, oci_embedings)
171+
172+
retriever = db.as_retriever()
173+
qa = RetrievalQA.from_chain_type(
174+
llm=oci_llm,
175+
chain_type="stuff",
176+
retriever=retriever,
177+
chain_type_kwargs={
178+
"verbose": True
179+
}
180+
)
181+
182+
from ads.llm.deploy import ChainDeployment
183+
model = ChainDeployment(qa)
184+
model.prepare(force_overwrite=True,
185+
inference_conda_env="your_conda_pack",
186+
)
187+
188+
model.save()
189+
res = model.verify("your prompt")
190+
model.deploy(deployment_log_group_id="ocid1.loggroup.####",
191+
deployment_access_log_id="ocid1.log.####",
192+
deployment_predict_log_id="ocid1.log.####")
193+
194+
model.predict("your prompt")

0 commit comments

Comments
 (0)