diff --git a/mindsdb_sql/__about__.py b/mindsdb_sql/__about__.py index a960e8d4..8969960a 100644 --- a/mindsdb_sql/__about__.py +++ b/mindsdb_sql/__about__.py @@ -1,6 +1,6 @@ __title__ = 'mindsdb_sql' __package_name__ = 'mindsdb_sql' -__version__ = '0.7.4' +__version__ = '0.7.5' __description__ = "Pure python SQL parser" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' diff --git a/mindsdb_sql/parser/dialects/mindsdb/__init__.py b/mindsdb_sql/parser/dialects/mindsdb/__init__.py index aa26ef4b..4c80dbb8 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/__init__.py +++ b/mindsdb_sql/parser/dialects/mindsdb/__init__.py @@ -17,6 +17,7 @@ from .chatbot import CreateChatBot, UpdateChatBot, DropChatBot from .trigger import CreateTrigger, DropTrigger from .knowledge_base import CreateKnowledgeBase, DropKnowledgeBase +from .rag import CreateRAG, DropRAG, UpdateRAG from .skills import CreateSkill, DropSkill, UpdateSkill # remove it in next release diff --git a/mindsdb_sql/parser/dialects/mindsdb/knowledge_base.py b/mindsdb_sql/parser/dialects/mindsdb/knowledge_base.py index 0ea4b4d2..a9303bce 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/knowledge_base.py +++ b/mindsdb_sql/parser/dialects/mindsdb/knowledge_base.py @@ -9,7 +9,7 @@ class CreateKnowledgeBase(ASTNode): def __init__( self, name, - model, + model=None, storage=None, from_select=None, params=None, @@ -37,13 +37,13 @@ def __init__( def to_tree(self, *args, level=0, **kwargs): ind = indent(level) storage_str = f"{ind} storage={self.storage.to_string()},\n" if self.storage else "" + model_str = f"{ind} model={self.model.to_string()},\n" if self.model else "" out_str = f""" {ind}CreateKnowledgeBase( {ind} if_not_exists={self.if_not_exists}, {ind} name={self.name.to_string()}, {ind} from_query={self.from_query.to_tree(level=level + 1) if self.from_query else None}, - {ind} model={self.model.to_string()}, - {storage_str}{ind} params={self.params} + {model_str}{storage_str}{ind} params={self.params} {ind}) """ return out_str @@ -56,13 +56,13 @@ def get_string(self, *args, **kwargs): f"FROM ({self.from_query.get_string()})" if self.from_query else "" ) storage_str = f" STORAGE = {self.storage.to_string()}" if self.storage else "" + model_str = f" MODEL = {self.model.to_string()},\n" if self.model else "" out_str = ( f"CREATE KNOWLEDGE_BASE {'IF NOT EXISTS' if self.if_not_exists else ''}{self.name.to_string()} " f"{from_query_str} " f"USING {using_str}," - f" MODEL = {self.model.to_string()}, " - f"{storage_str}" + f"{model_str}{storage_str}" ) return out_str diff --git a/mindsdb_sql/parser/dialects/mindsdb/lexer.py b/mindsdb_sql/parser/dialects/mindsdb/lexer.py index 752dddaf..a4ce05d1 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/lexer.py +++ b/mindsdb_sql/parser/dialects/mindsdb/lexer.py @@ -31,6 +31,7 @@ class MindsDBLexer(Lexer): ENGINE, TRAIN, PREDICT, PARAMETERS, JOB, CHATBOT, EVERY,PROJECT, ANOMALY, DETECTION, KNOWLEDGE_BASE, KNOWLEDGE_BASES, + RAG, RAGS, SKILL, AGENT, @@ -121,6 +122,8 @@ class MindsDBLexer(Lexer): KNOWLEDGE_BASE = r'\bKNOWLEDGE[_|\s]BASE\b' KNOWLEDGE_BASES = r'\bKNOWLEDGE[_|\s]BASES\b' + RAG = r'\bRAG\b' + RAGS = r'\bRAGS\b' SKILL = r'\bSKILL\b' AGENT = r'\bAGENT\b' diff --git a/mindsdb_sql/parser/dialects/mindsdb/parser.py b/mindsdb_sql/parser/dialects/mindsdb/parser.py index a2bfefe1..63b6b20e 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/parser.py +++ b/mindsdb_sql/parser/dialects/mindsdb/parser.py @@ -1,3 +1,4 @@ +from mindsdb_sql.parser.dialects.mindsdb.rag import CreateRAG, DropRAG, UpdateRAG from sly import Parser from mindsdb_sql.parser.ast import * from mindsdb_sql.parser.ast.drop import DropDatabase, DropView @@ -87,6 +88,9 @@ class MindsDBParser(Parser): 'drop_trigger', 'create_kb', 'drop_kb', + 'create_rag', + 'drop_rag', + 'update_rag', 'create_skill', 'drop_skill', 'update_skill', @@ -138,6 +142,41 @@ def create_kb(self, p): def drop_kb(self, p): return DropKnowledgeBase(name=p.identifier, if_exists=p.if_exists_or_empty) + # -- RAG -- + @_('CREATE RAG if_not_exists_or_empty identifier USING kw_parameter_list') + def create_rag(self, p): + params = p.kw_parameter_list + + llm = params.pop('llm', None) + knowledge_base_store = params.pop('knowledge_base_store', None) + + if not llm: + raise ParsingException('Missing llm parameter') + + if isinstance(llm, str): + # convert to identifier + llm = Identifier(llm) + + if isinstance(knowledge_base_store, str): + # convert to identifier + knowledge_base_store = Identifier(knowledge_base_store) + + return CreateRAG( + name=p.identifier, + llm=llm, + knowledge_base_store=knowledge_base_store, + params=params, + if_not_exists=p.if_not_exists_or_empty + ) + + @_('DROP RAG if_exists_or_empty identifier') + def drop_rag(self, p): + return DropRAG(name=p.identifier, if_exists=p.if_exists_or_empty) + + @_('UPDATE RAG identifier SET kw_parameter_list') + def update_rag(self, p): + return UpdateRAG(name=p.identifier, updated_params=p.kw_parameter_list) + # -- Skills -- @_('CREATE SKILL if_not_exists_or_empty identifier USING kw_parameter_list') def create_skill(self, p): diff --git a/mindsdb_sql/parser/dialects/mindsdb/rag.py b/mindsdb_sql/parser/dialects/mindsdb/rag.py new file mode 100644 index 00000000..1c38646e --- /dev/null +++ b/mindsdb_sql/parser/dialects/mindsdb/rag.py @@ -0,0 +1,130 @@ +from mindsdb_sql.parser.ast.base import ASTNode +from mindsdb_sql.parser.utils import indent + + +class CreateRAG(ASTNode): + """ + Create a RAG + """ + def __init__( + self, + name, + llm, + knowledge_base_store=None, + from_select=None, + params=None, + if_not_exists=False, + *args, + **kwargs, + ): + """ + Args: + name: Identifier -- name of the RAG + llm: Identifier -- name of the LLM to use + knowledge_base_store: Identifier -- name of the knowledge_base_store to use + from_select: SelectStatement -- select statement to use as the source of the RAG + params: dict -- additional parameters to pass to the RAG. + if_not_exists: bool -- if True, do not raise an error if the RAG already exists + """ + super().__init__(*args, **kwargs) + self.name = name + self.llm = llm + self.knowledge_base_store = knowledge_base_store + self.params = params + self.if_not_exists = if_not_exists + self.from_query = from_select + + def to_tree(self, *args, level=0, **kwargs): + ind = indent(level) + kb_str = f"{ind} knowledge_base_store={self.knowledge_base_store.to_string()},\n" if self.knowledge_base_store else "" + out_str = f""" + {ind}CreateRAG( + {ind} if_not_exists={self.if_not_exists}, + {ind} name={self.name.to_string()}, + {ind} from_query={self.from_query.to_tree(level=level + 1) if self.from_query else None}, + {ind} llm={self.llm.to_string()}, + {kb_str}{ind} params={self.params} + {ind}) + """ + return out_str + + def get_string(self, *args, **kwargs): + params = self.params.copy() + using_ar = [f"{k}={repr(v)}" for k, v in params.items()] + using_str = ", ".join(using_ar) + from_query_str = ( + f"FROM ({self.from_query.get_string()})" if self.from_query else "" + ) + # only add knowledge base if it is provided, else we will use the default + knowledge_base_str = f" knowledge_base_store = {self.knowledge_base_store.to_string()}" if self.knowledge_base_store else "" + + out_str = ( + f"CREATE RAG {'IF NOT EXISTS' if self.if_not_exists else ''}{self.name.to_string()} " + f"{from_query_str} " + f"USING {using_str}," + f" LLM = {self.llm.to_string()}, " + f"{knowledge_base_str}" + ) + + return out_str + + def __repr__(self) -> str: + return self.to_tree() + + +class DropRAG(ASTNode): + """ + Delete a RAG + """ + def __init__(self, name, if_exists=False, *args, **kwargs): + """ + Args: + name: Identifier -- name of the RAG + if_exists: bool -- if True, do not raise an error if the RAG does not exist + """ + super().__init__(*args, **kwargs) + self.name = name + self.if_exists = if_exists + + def to_tree(self, *args, level=0, **kwargs): + ind = indent(level) + out_str = ( + f"{ind}DropRAG(" + f"{ind} if_exists={self.if_exists}," + f"name={self.name.to_string()})" + ) + return out_str + + def get_string(self, *args, **kwargs): + out_str = f'DROP RAG {"IF EXISTS" if self.if_exists else ""}{self.name.to_string()}' + return out_str + + +class UpdateRAG(ASTNode): + """ + Node for updating a RAG + """ + + def __init__(self, name, updated_params, *args, **kwargs): + """ + Parameters: + name (Identifier): name of the RAG to update + updated_params (dict): new SET parameters of the RAG to update + """ + super().__init__(*args, **kwargs) + self.name = name + self.params = updated_params + + def to_tree(self, level=0, *args, **kwargs): + ind = indent(level) + out_str = f'{ind}UpdateRAG(' \ + f'name={self.name.to_string()}, ' \ + f'updated_params={self.params})' + return out_str + + def get_string(self, *args, **kwargs): + set_ar = [f'{k}={repr(v)}' for k, v in self.params.items()] + set_str = ', '.join(set_ar) + + out_str = f'UPDATE RAG {self.name.to_string()} SET {set_str}' + return out_str \ No newline at end of file diff --git a/tests/test_parser/test_mindsdb/test_knowledgebase.py b/tests/test_parser/test_mindsdb/test_knowledgebase.py index 944a7e33..b04c37fc 100644 --- a/tests/test_parser/test_mindsdb/test_knowledgebase.py +++ b/tests/test_parser/test_mindsdb/test_knowledgebase.py @@ -18,7 +18,7 @@ ) -def test_create_knowledeg_base(): +def test_create_knowledge_base(): # create without select sql = """ CREATE KNOWLEDGE_BASE my_knowledge_base diff --git a/tests/test_parser/test_mindsdb/test_rag.py b/tests/test_parser/test_mindsdb/test_rag.py new file mode 100644 index 00000000..6a3fa5b9 --- /dev/null +++ b/tests/test_parser/test_mindsdb/test_rag.py @@ -0,0 +1,184 @@ +import pytest +from mindsdb_sql import parse_sql +from mindsdb_sql.parser.dialects.mindsdb.rag import ( + CreateRAG, + DropRAG +) +from mindsdb_sql.parser.ast import ( + Select, + Identifier, + Join, + Show, + BinaryOperation, + Constant, + Star, + Delete, + Insert, + OrderBy, +) + + +def test_create_rag(): + # create without select + sql = """ + CREATE RAG my_rag + USING + llm=mindsdb.my_llm, + knowledge_base_store = mindsdb.my_kb + """ + ast = parse_sql(sql, dialect="mindsdb") + expected_ast = CreateRAG( + name=Identifier("my_rag"), + if_not_exists=False, + llm=Identifier(parts=["mindsdb", "my_llm"]), + knowledge_base_store=Identifier(parts=["mindsdb", "my_kb"]), + from_select=None, + params={}, + ) + assert ast == expected_ast + + # the order of llm and knowledge_base_store should not matter + sql = """ + CREATE RAG my_rag + USING + knowledge_base_store = mindsdb.my_kb, + llm = mindsdb.my_llm + """ + ast = parse_sql(sql, dialect="mindsdb") + assert ast == expected_ast + + # create without llm + # we may allow this in the future when we have a default llm + sql = """ + CREATE RAG my_rag + USING + knowledge_base_store = mindsdb.my_kb + """ + + with pytest.raises(Exception): + _ = parse_sql(sql, dialect="mindsdb") + + # create without knowledge_base_store + sql = """ + CREATE RAG my_rag + USING + llm = mindsdb.my_llm + """ + + expected_ast = CreateRAG( + name=Identifier("my_rag"), + if_not_exists=False, + llm=Identifier(parts=["mindsdb", "my_llm"]), + from_select=None, + params={}, + ) + + ast = parse_sql(sql, dialect="mindsdb") + + assert ast == expected_ast + + # create if not exists + sql = """ + CREATE RAG IF NOT EXISTS my_rag + USING + llm = mindsdb.my_llm, + knowledge_base_store = mindsdb.my_kb + """ + ast = parse_sql(sql, dialect="mindsdb") + expected_ast = CreateRAG( + name=Identifier("my_rag"), + if_not_exists=True, + llm=Identifier(parts=["mindsdb", "my_llm"]), + knowledge_base_store=Identifier(parts=["mindsdb", "my_kb"]), + from_select=None, + params={}, + ) + assert ast == expected_ast + + # create with params + sql = """ + CREATE RAG my_rag + USING + llm = mindsdb.my_llm, + knowledge_base_store = mindsdb.my_kb, + some_param = 'some value', + other_param = 'other value' + """ + ast = parse_sql(sql, dialect="mindsdb") + expected_ast = CreateRAG( + name=Identifier("my_rag"), + if_not_exists=False, + llm=Identifier(parts=["mindsdb", "my_llm"]), + knowledge_base_store=Identifier(parts=["mindsdb", "my_kb"]), + from_select=None, + params={"some_param": "some value", "other_param": "other value"}, + ) + assert ast == expected_ast + + +def test_drop_rag(): + # drop if exists + sql = """ + DROP RAG IF EXISTS my_rag + """ + ast = parse_sql(sql, dialect="mindsdb") + expected_ast = DropRAG( + name=Identifier("my_rag"), if_exists=True + ) + assert ast == expected_ast + + # drop without if exists + sql = """ + DROP RAG my_rag + """ + ast = parse_sql(sql, dialect="mindsdb") + + expected_ast = DropRAG( + name=Identifier("my_rag"), if_exists=False + ) + assert ast == expected_ast + + +@pytest.mark.skip(reason="not implemented") +def test_alter_rag(): + ... + +@pytest.mark.skip(reason="not working") +def test_show_rag(): + sql = """ + SHOW RAGS + """ + ast = parse_sql(sql, dialect="mindsdb") + expected_ast = Show( + category="RAGS", + ) + assert ast == expected_ast + + +def test_select_from_rag(): + + sql = """ + SELECT * + FROM my_rag + WHERE question = 'what is the answer?' + """ + ast = parse_sql(sql, dialect="mindsdb") + + expected_ast = Select( + targets=[Star()], + from_table=Identifier("my_rag"), + where=BinaryOperation( + op="=", + args=[Identifier("question"), Constant('what is the answer?')], + ) + ) + assert ast == expected_ast + +@pytest.mark.skip(reason="not implemented") +def test_delete_from_rag(): + ... + + +@pytest.mark.skip(reason="not implemented") +def test_insert_into_rag(): + ... \ No newline at end of file