Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions configs/dbgpt-bm25-rag.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
[system]
# Load language from environment variable(It is set by the hook)
language = "${env:DBGPT_LANG:-zh}"
log_level = "INFO"
api_keys = []
encrypt_key = "your_secret_key"

# Server Configurations
[service.web]
host = "0.0.0.0"
port = 5670

[service.web.database]
type = "sqlite"
path = "pilot/meta_data/dbgpt.db"


[rag]
chunk_size=1000
chunk_overlap=100
similarity_top_k=5
similarity_score_threshold=0.0
max_chunks_once_load=10
max_threads=1
rerank_top_k=3

[rag.storage]
[rag.storage.vector]
type = "Chroma"
persist_path = "pilot/data"

[rag.storage.full_text]
type = "ElasticSearch"
host="127.0.0.1"
port=9200


# Model Configurations
[models]
[[models.llms]]
name = "${env:LLM_MODEL_NAME:-gpt-4o}"
provider = "${env:LLM_MODEL_PROVIDER:-proxy/openai}"
api_base = "${env:OPENAI_API_BASE:-https://api.openai.com/v1}"
api_key = "${env:OPENAI_API_KEY}"

[[models.embeddings]]
name = "${env:EMBEDDING_MODEL_NAME:-text-embedding-3-small}"
provider = "${env:EMBEDDING_MODEL_PROVIDER:-proxy/openai}"
api_url = "${env:EMBEDDING_MODEL_API_URL:-https://api.openai.com/v1/embeddings}"
api_key = "${env:OPENAI_API_KEY}"
45 changes: 45 additions & 0 deletions docs/docs/installation/integrations/bm25_rag_install.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# BM25 RAG

In this example, we will show how to use the Elasticsearch as in DB-GPT RAG Storage. Using a Elasticsearch database to implement RAG can, to some extent, alleviate the uncertainty and interpretability issues brought about by Elasticsearch database retrieval.

### Install Dependencies

First, you need to install the `dbgpt elasticsearch storage` library.

```bash
uv sync --all-packages --frozen \
--extra "base" \
--extra "proxy_openai" \
--extra "rag" \
--extra "storage_elasticsearch" \
--extra "dbgpts"
````

### Prepare Elasticsearch

Prepare Elasticsearch database service, reference-[Elasticsearch Installation](https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html) .

### Elasticsearch Configuration


Set rag storage variables below in `configs/dbgpt-bm25-rag.toml` file, let DB-GPT know how to connect to Elasticsearch.

```

[rag.storage]
[rag.storage.full_text]
type = "ElasticSearch"
uri = "127.0.0.1"
port = "9200"
```

Then run the following command to start the webserver:
```bash
uv run python packages/dbgpt-app/src/dbgpt_app/dbgpt_server.py --config configs/dbgpt-bm25-rag.toml
```

Optionally
```bash
uv run python packages/dbgpt-app/src/dbgpt_app/dbgpt_server.py --config configs/dbgpt-bm25-rag.toml
```

4 changes: 4 additions & 0 deletions docs/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ const sidebars = {
type: "doc",
id: "installation/integrations/oceanbase_rag_install"
},
{
type: "doc",
id: "installation/integrations/bm25_rag_install"
},
{
type: "doc",
id: "installation/integrations/milvus_rag_install"
Expand Down
5 changes: 2 additions & 3 deletions examples/rag/bm25_retriever_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dbgpt_ext.rag import ChunkParameters
from dbgpt_ext.rag.assembler.bm25 import BM25Assembler
from dbgpt_ext.rag.knowledge import KnowledgeFactory
from dbgpt_ext.storage.vector_store.elastic_store import ElasticsearchVectorConfig
from dbgpt_ext.storage.vector_store.elastic_store import ElasticsearchStoreConfig

"""Embedding rag example.
pre-requirements:
Expand All @@ -19,8 +19,7 @@

def _create_es_config():
"""Create vector connector."""
return ElasticsearchVectorConfig(
name="bm25_es_dbgpt",
return ElasticsearchStoreConfig(
uri="localhost",
port="9200",
user="elastic",
Expand Down
6 changes: 4 additions & 2 deletions examples/rag/cross_encoder_rerank_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
)

return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)

return ChromaStore(config)


async def main():
file_path = os.path.join(ROOT_PATH, "docs/docs/awel/awel.md")
Expand Down
15 changes: 7 additions & 8 deletions examples/rag/db_schema_rag_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from dbgpt.rag.embedding import DefaultEmbeddingFactory
from dbgpt_ext.datasource.rdbms.conn_sqlite import SQLiteTempConnector
from dbgpt_ext.rag.assembler import DBSchemaAssembler
from dbgpt_ext.storage.vector_store.chroma_store import ChromaVectorConfig
from dbgpt_serve.rag.connector import VectorStoreConnector
from dbgpt_ext.storage.vector_store.chroma_store import ChromaStore, ChromaVectorConfig

"""DB struct rag example.
pre-requirements:
Expand Down Expand Up @@ -46,12 +45,12 @@ def _create_temporary_connection():

def _create_vector_connector():
"""Create vector connector."""
return VectorStoreConnector.from_default(
"Chroma",
vector_store_config=ChromaVectorConfig(
name="db_schema_vector_store_name",
persist_path=os.path.join(PILOT_PATH, "data"),
),
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
Expand Down
6 changes: 4 additions & 2 deletions examples/rag/embedding_rag_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
)

return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)

return ChromaStore(config)


async def main():
file_path = os.path.join(ROOT_PATH, "docs/docs/awel/awel.md")
Expand Down
27 changes: 11 additions & 16 deletions examples/rag/graph_rag_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@
from dbgpt_ext.rag import ChunkParameters
from dbgpt_ext.rag.assembler import EmbeddingAssembler
from dbgpt_ext.rag.knowledge import KnowledgeFactory
from dbgpt_ext.storage.graph_store.tugraph_store import TuGraphStoreConfig
from dbgpt_ext.storage.knowledge_graph.community_summary import (
CommunitySummaryKnowledgeGraph,
CommunitySummaryKnowledgeGraphConfig,
)
from dbgpt_ext.storage.knowledge_graph.knowledge_graph import (
BuiltinKnowledgeGraph,
BuiltinKnowledgeGraphConfig,
)

"""GraphRAG example.
Expand Down Expand Up @@ -61,26 +60,22 @@ async def test_community_graph_rag():
def __create_naive_kg_connector():
"""Create knowledge graph connector."""
return BuiltinKnowledgeGraph(
config=BuiltinKnowledgeGraphConfig(
name="naive_graph_rag_test",
embedding_fn=None,
llm_client=llm_client,
model_name=model_name,
graph_store_type="MemoryGraph",
),
config=TuGraphStoreConfig(),
name="naive_graph_rag_test",
embedding_fn=None,
llm_client=llm_client,
llm_model=model_name,
)


def __create_community_kg_connector():
"""Create community knowledge graph connector."""
return CommunitySummaryKnowledgeGraph(
config=CommunitySummaryKnowledgeGraphConfig(
name="community_graph_rag_test",
embedding_fn=DefaultEmbeddingFactory.openai(),
llm_client=llm_client,
model_name=model_name,
graph_store_type="TuGraphGraph",
),
config=TuGraphStoreConfig(),
name="community_graph_rag_test",
embedding_fn=DefaultEmbeddingFactory.openai(),
llm_client=llm_client,
llm_model=model_name,
)


Expand Down
7 changes: 3 additions & 4 deletions examples/rag/keyword_rag_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from dbgpt_ext.rag.assembler import EmbeddingAssembler
from dbgpt_ext.rag.knowledge import KnowledgeFactory
from dbgpt_ext.storage.full_text.elasticsearch import (
ElasticDocumentConfig,
ElasticDocumentStore,
ElasticsearchStoreConfig,
)

"""Keyword rag example.
Expand All @@ -22,15 +22,14 @@

def _create_es_connector():
"""Create es connector."""
config = ElasticDocumentConfig(
name="keyword_rag_test",
config = ElasticsearchStoreConfig(
uri="localhost",
port="9200",
user="elastic",
password="dbgpt",
)

return ElasticDocumentStore(config)
return ElasticDocumentStore(config, name="keyword_rag_test")


async def main():
Expand Down
8 changes: 5 additions & 3 deletions examples/rag/metadata_filter_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
name="metadata_rag_test",
)

return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)

return ChromaStore(config)


async def main():
file_path = os.path.join(ROOT_PATH, "docs/docs/awel/awel.md")
Expand Down
8 changes: 5 additions & 3 deletions examples/rag/rag_embedding_api_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,13 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
name="embedding_api_rag_test",
embedding_fn=_create_embeddings(),
)

return ChromaStore(config)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=_create_embeddings(),
)


async def main():
Expand Down
10 changes: 6 additions & 4 deletions examples/rag/retriever_evaluation_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,17 @@ def _create_embeddings(
).create()


def _create_vector_connector(embeddings: Embeddings):
def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
name="embedding_rag_test",
embedding_fn=embeddings,
)

return ChromaStore(config)
return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=_create_embeddings(),
)


async def main():
Expand Down
10 changes: 6 additions & 4 deletions examples/rag/simple_dbschema_retriever_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,17 @@
def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=os.path.join(PILOT_PATH, "data"),
name="vector_name",
persist_path=PILOT_PATH,
)

return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)

return ChromaStore(config)


def _create_temporary_connection():
"""Create a temporary database connection for testing."""
Expand Down
6 changes: 4 additions & 2 deletions examples/rag/simple_rag_embedding_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
)

return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)

return ChromaStore(config)


class TriggerReqBody(BaseModel):
url: str = Field(..., description="url")
Expand Down
6 changes: 4 additions & 2 deletions examples/rag/simple_rag_retriever_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,16 @@ def _create_vector_connector():
"""Create vector connector."""
config = ChromaVectorConfig(
persist_path=PILOT_PATH,
)

return ChromaStore(
config,
name="embedding_rag_test",
embedding_fn=DefaultEmbeddingFactory(
default_model_name=os.path.join(MODEL_PATH, "text2vec-large-chinese"),
).create(),
)

return ChromaStore(config)


with DAG("simple_sdk_rag_retriever_example") as dag:
vector_store = _create_vector_connector()
Expand Down
Loading