From 3da7982400624ba5336fd3cf8a61ec25f5d46a92 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Fri, 1 Dec 2023 12:29:45 +0900 Subject: [PATCH 1/3] add basic support for typed forecasting models --- .../parser/dialects/mindsdb/__init__.py | 2 +- .../dialects/mindsdb/create_predictor.py | 7 ++++ mindsdb_sql/parser/dialects/mindsdb/lexer.py | 2 ++ mindsdb_sql/parser/dialects/mindsdb/parser.py | 36 ++++++++++++++++++- .../test_mindsdb/test_create_predictor.py | 25 +++++++++++++ 5 files changed, 70 insertions(+), 2 deletions(-) diff --git a/mindsdb_sql/parser/dialects/mindsdb/__init__.py b/mindsdb_sql/parser/dialects/mindsdb/__init__.py index aa26ef4b..f0c945bd 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/__init__.py +++ b/mindsdb_sql/parser/dialects/mindsdb/__init__.py @@ -1,7 +1,7 @@ from .agents import CreateAgent, DropAgent, UpdateAgent from .create_view import CreateView from .create_database import CreateDatabase -from .create_predictor import CreatePredictor, CreateAnomalyDetectionModel +from .create_predictor import CreatePredictor, CreateAnomalyDetectionModel, CreateForecastingModel from .drop_predictor import DropPredictor from .retrain_predictor import RetrainPredictor from .finetune_predictor import FinetunePredictor diff --git a/mindsdb_sql/parser/dialects/mindsdb/create_predictor.py b/mindsdb_sql/parser/dialects/mindsdb/create_predictor.py index f98d48f1..9c103888 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/create_predictor.py +++ b/mindsdb_sql/parser/dialects/mindsdb/create_predictor.py @@ -152,3 +152,10 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._command = 'CREATE ANOMALY DETECTION MODEL' self.task = Identifier('AnomalyDetection') + + +class CreateForecastingModel(CreatePredictorBase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._command = 'CREATE FORECASTING MODEL' + self.task = Identifier('Forecasting') diff --git a/mindsdb_sql/parser/dialects/mindsdb/lexer.py b/mindsdb_sql/parser/dialects/mindsdb/lexer.py index 752dddaf..10bd205c 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/lexer.py +++ b/mindsdb_sql/parser/dialects/mindsdb/lexer.py @@ -30,6 +30,7 @@ class MindsDBLexer(Lexer): LATEST, LAST, HORIZON, USING, ENGINE, TRAIN, PREDICT, PARAMETERS, JOB, CHATBOT, EVERY,PROJECT, ANOMALY, DETECTION, + FORECASTING, KNOWLEDGE_BASE, KNOWLEDGE_BASES, SKILL, AGENT, @@ -118,6 +119,7 @@ class MindsDBLexer(Lexer): # Typed models ANOMALY = r'\bANOMALY\b' DETECTION = r'\bDETECTION\b' + FORECASTING = r'\bFORECASTING\b' KNOWLEDGE_BASE = r'\bKNOWLEDGE[_|\s]BASE\b' KNOWLEDGE_BASES = r'\bKNOWLEDGE[_|\s]BASES\b' diff --git a/mindsdb_sql/parser/dialects/mindsdb/parser.py b/mindsdb_sql/parser/dialects/mindsdb/parser.py index a2bfefe1..1d8233d6 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/parser.py +++ b/mindsdb_sql/parser/dialects/mindsdb/parser.py @@ -6,7 +6,8 @@ from mindsdb_sql.parser.dialects.mindsdb.drop_predictor import DropPredictor from mindsdb_sql.parser.dialects.mindsdb.drop_dataset import DropDataset from mindsdb_sql.parser.dialects.mindsdb.drop_ml_engine import DropMLEngine -from mindsdb_sql.parser.dialects.mindsdb.create_predictor import CreatePredictor, CreateAnomalyDetectionModel +from mindsdb_sql.parser.dialects.mindsdb.create_predictor import CreatePredictor +from mindsdb_sql.parser.dialects.mindsdb.create_predictor import CreateAnomalyDetectionModel, CreateForecastingModel from mindsdb_sql.parser.dialects.mindsdb.create_database import CreateDatabase from mindsdb_sql.parser.dialects.mindsdb.create_ml_engine import CreateMLEngine from mindsdb_sql.parser.dialects.mindsdb.create_view import CreateView @@ -64,6 +65,7 @@ class MindsDBParser(Parser): 'create_integration', 'create_view', 'create_anomaly_detection_model', + 'create_forecasting_model', 'drop_predictor', 'drop_datasource', 'drop_dataset', @@ -819,6 +821,38 @@ def create_anomaly_detection_model(self, p): p.create_anomaly_detection_model.using = p.kw_parameter_list return p.create_anomaly_detection_model + ## Forecasting + @_( + 'CREATE FORECASTING MODEL identifier', # for methods that do not require training (e.g. TimeGPT) + 'CREATE FORECASTING MODEL identifier FROM identifier LPAREN raw_query RPAREN', + 'CREATE FORECASTING MODEL identifier PREDICT result_columns', + 'CREATE FORECASTING MODEL identifier PREDICT result_columns FROM identifier LPAREN raw_query RPAREN', + # TODO add IF_NOT_EXISTS elegantly (should be low level BNF expansion) + ) + def create_forecasting_model(self, p): + query_str = None + if hasattr(p, 'raw_query'): + query_str = tokens_to_string(p.raw_query) + + if hasattr(p, 'identifier'): + # single identifier field + name = p.identifier + else: + name = p.identifier0 + + return CreateForecastingModel( + name=name, + targets=getattr(p, 'result_columns', None), + integration_name=getattr(p, 'identifier1', None), + query_str=query_str, + if_not_exists=hasattr(p, 'IF_NOT_EXISTS') + ) + + @_('create_forecasting_model USING kw_parameter_list') + def create_forecasting_model(self, p): + p.create_forecasting_model.using = p.kw_parameter_list + return p.create_forecasting_model + # RETRAIN PREDICTOR @_('RETRAIN identifier', diff --git a/tests/test_parser/test_mindsdb/test_create_predictor.py b/tests/test_parser/test_mindsdb/test_create_predictor.py index b035ab7a..5d5d7cad 100644 --- a/tests/test_parser/test_mindsdb/test_create_predictor.py +++ b/tests/test_parser/test_mindsdb/test_create_predictor.py @@ -185,3 +185,28 @@ def test_create_anomaly_detection_model(self): assert to_single_line(str(ast)) == to_single_line(str(expected_ast)) assert ast.to_tree() == expected_ast.to_tree() + + def test_create_forecasting_model(self): + for predict_clause in ["", " PREDICT y "]: + create_clause = """CREATE FORECASTING MODEL forecasting_model """ + rest_clause = """ + FROM integration_name (select * FROM table) + USING + window2=10 + """ + sql = create_clause + predict_clause + rest_clause + ast = parse_sql(sql, dialect='mindsdb') + + expected_ast = CreateForecastingModel( + name=Identifier('forecasting_model'), + task=Identifier('Forecasting'), + integration_name=Identifier('integration_name'), + query_str='select * FROM table', + targets=[Identifier('y')] if predict_clause else None, + using={ + 'window2': 10 # TODO: make it work with `window` + } + ) + + assert to_single_line(str(ast)) == to_single_line(str(expected_ast)) + assert ast.to_tree() == expected_ast.to_tree() From 229d869114e96b32e6e0a0b547ee1ff34de8f9cf Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Fri, 1 Dec 2023 20:44:03 +0900 Subject: [PATCH 2/3] fix: revert to using window and other TS tokens within expression, not inside using --- mindsdb_sql/parser/dialects/mindsdb/parser.py | 24 +++++++++++++++++++ .../test_mindsdb/test_create_predictor.py | 12 ++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/mindsdb_sql/parser/dialects/mindsdb/parser.py b/mindsdb_sql/parser/dialects/mindsdb/parser.py index 1d8233d6..3e051db0 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/parser.py +++ b/mindsdb_sql/parser/dialects/mindsdb/parser.py @@ -848,6 +848,30 @@ def create_forecasting_model(self, p): if_not_exists=hasattr(p, 'IF_NOT_EXISTS') ) + @_('create_forecasting_model WINDOW integer') + def create_forecasting_model(self, p): + p.create_forecasting_model.window = p.integer + return p.create_forecasting_model + + @_('create_forecasting_model HORIZON integer') + def create_forecasting_model(self, p): + p.create_forecasting_model.horizon = p.integer + return p.create_forecasting_model + + @_('create_forecasting_model GROUP_BY expr_list') + def create_forecasting_model(self, p): + group_by = p.expr_list + if not isinstance(group_by, list): + group_by = [group_by] + + p.create_forecasting_model.group_by = group_by + return p.create_forecasting_model + + @_('create_forecasting_model ORDER_BY ordering_terms') + def create_forecasting_model(self, p): + p.create_forecasting_model.order_by = p.ordering_terms + return p.create_forecasting_model + @_('create_forecasting_model USING kw_parameter_list') def create_forecasting_model(self, p): p.create_forecasting_model.using = p.kw_parameter_list diff --git a/tests/test_parser/test_mindsdb/test_create_predictor.py b/tests/test_parser/test_mindsdb/test_create_predictor.py index 5d5d7cad..e8c53bce 100644 --- a/tests/test_parser/test_mindsdb/test_create_predictor.py +++ b/tests/test_parser/test_mindsdb/test_create_predictor.py @@ -191,8 +191,12 @@ def test_create_forecasting_model(self): create_clause = """CREATE FORECASTING MODEL forecasting_model """ rest_clause = """ FROM integration_name (select * FROM table) + WINDOW 10 + HORIZON 5 + ORDER BY time + GROUP BY group USING - window2=10 + param='a' """ sql = create_clause + predict_clause + rest_clause ast = parse_sql(sql, dialect='mindsdb') @@ -203,8 +207,12 @@ def test_create_forecasting_model(self): integration_name=Identifier('integration_name'), query_str='select * FROM table', targets=[Identifier('y')] if predict_clause else None, + window=10, + horizon=5, + order_by=[OrderBy(Identifier('time'), direction='default')], + group_by=[Identifier('group')], using={ - 'window2': 10 # TODO: make it work with `window` + 'param': 'a' } ) From 2ec4c4d4ffbbf1a161a5ddc573f7c29fcdc23227 Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Fri, 1 Dec 2023 20:56:38 +0900 Subject: [PATCH 3/3] fix: predict clause order --- mindsdb_sql/parser/dialects/mindsdb/parser.py | 6 +- .../test_mindsdb/test_create_predictor.py | 58 +++++++++---------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/mindsdb_sql/parser/dialects/mindsdb/parser.py b/mindsdb_sql/parser/dialects/mindsdb/parser.py index 3e051db0..05da89f9 100644 --- a/mindsdb_sql/parser/dialects/mindsdb/parser.py +++ b/mindsdb_sql/parser/dialects/mindsdb/parser.py @@ -823,10 +823,8 @@ def create_anomaly_detection_model(self, p): ## Forecasting @_( - 'CREATE FORECASTING MODEL identifier', # for methods that do not require training (e.g. TimeGPT) - 'CREATE FORECASTING MODEL identifier FROM identifier LPAREN raw_query RPAREN', - 'CREATE FORECASTING MODEL identifier PREDICT result_columns', - 'CREATE FORECASTING MODEL identifier PREDICT result_columns FROM identifier LPAREN raw_query RPAREN', + 'CREATE FORECASTING MODEL identifier PREDICT result_columns', # for pre-trained models (e.g. TimeGPT) + 'CREATE FORECASTING MODEL identifier FROM identifier LPAREN raw_query RPAREN PREDICT result_columns', # TODO add IF_NOT_EXISTS elegantly (should be low level BNF expansion) ) def create_forecasting_model(self, p): diff --git a/tests/test_parser/test_mindsdb/test_create_predictor.py b/tests/test_parser/test_mindsdb/test_create_predictor.py index e8c53bce..cbb42da4 100644 --- a/tests/test_parser/test_mindsdb/test_create_predictor.py +++ b/tests/test_parser/test_mindsdb/test_create_predictor.py @@ -187,34 +187,34 @@ def test_create_anomaly_detection_model(self): assert ast.to_tree() == expected_ast.to_tree() def test_create_forecasting_model(self): - for predict_clause in ["", " PREDICT y "]: - create_clause = """CREATE FORECASTING MODEL forecasting_model """ - rest_clause = """ - FROM integration_name (select * FROM table) - WINDOW 10 - HORIZON 5 - ORDER BY time - GROUP BY group - USING - param='a' - """ - sql = create_clause + predict_clause + rest_clause - ast = parse_sql(sql, dialect='mindsdb') + create_clause = "CREATE FORECASTING MODEL forecasting_model" + rest_clause = """ + FROM integration_name (select * FROM table) + PREDICT y + WINDOW 10 + HORIZON 5 + ORDER BY time + GROUP BY group + USING + param='a' + """ + sql = create_clause + rest_clause + ast = parse_sql(sql, dialect='mindsdb') - expected_ast = CreateForecastingModel( - name=Identifier('forecasting_model'), - task=Identifier('Forecasting'), - integration_name=Identifier('integration_name'), - query_str='select * FROM table', - targets=[Identifier('y')] if predict_clause else None, - window=10, - horizon=5, - order_by=[OrderBy(Identifier('time'), direction='default')], - group_by=[Identifier('group')], - using={ - 'param': 'a' - } - ) + expected_ast = CreateForecastingModel( + name=Identifier('forecasting_model'), + task=Identifier('Forecasting'), + integration_name=Identifier('integration_name'), + query_str='select * FROM table', + targets=[Identifier('y')], + window=10, + horizon=5, + order_by=[OrderBy(Identifier('time'), direction='default')], + group_by=[Identifier('group')], + using={ + 'param': 'a' + } + ) - assert to_single_line(str(ast)) == to_single_line(str(expected_ast)) - assert ast.to_tree() == expected_ast.to_tree() + assert to_single_line(str(ast)) == to_single_line(str(expected_ast)) + assert ast.to_tree() == expected_ast.to_tree()