From 1924f4f25325b752c87264e02138866f0067a9ee Mon Sep 17 00:00:00 2001 From: Martin Naude Date: Thu, 13 Feb 2025 10:39:05 -0600 Subject: [PATCH] added recursive optimization for lookups and move to top --- docs/docs/sql-syntax/arrays.md | 14 + lib/optimizer.js | 108 ++- package.json | 2 +- test/optimizer.js | 1192 +++++++++++++++++++++++++++----- 4 files changed, 1159 insertions(+), 157 deletions(-) diff --git a/docs/docs/sql-syntax/arrays.md b/docs/docs/sql-syntax/arrays.md index 09390954..d5369782 100644 --- a/docs/docs/sql-syntax/arrays.md +++ b/docs/docs/sql-syntax/arrays.md @@ -107,6 +107,20 @@ Returns true when all elements in the array are true. FROM `customers`; ``` +### JOIN + +`JOIN(array expr, delimiter)` + +Joins all values into a string. + +???+ example "Example `JOIN` usage" + + ```sql + SELECT id, + JOIN((SELECT Name FROM Rentals),',') AS RentalNames + FROM customers; + ``` + ### ANY_ELEMENT_TRUE `ANY_ELEMENT_TRUE(array expr)` diff --git a/lib/optimizer.js b/lib/optimizer.js index 2e3ddf08..920a825c 100644 --- a/lib/optimizer.js +++ b/lib/optimizer.js @@ -5,7 +5,7 @@ const projectIsRoot = require('./projectIsRoot'); const projectIsSimple = require('./projectIsSimple'); const lodash = require('lodash'); const arraySequenceIndexOf = require('./arraySequenceIndexOf'); - +const $check = require('check-types'); /** * Extracts and returns the names of the stages in a MongoDB aggregation pipeline. * @param {Array} mongoAggregate - An array representing the MongoDB aggregation pipeline, @@ -375,6 +375,98 @@ const _patternsToFix = [ }, ]; +/** + * Determines whether the provided object value satisfies a "simple match" condition based on the given prefix. + * @param {any} objVal - The object or value to evaluate. It can be an object, array, string, or other types. + * @param {string} prefix - The prefix string used for validating keys or string values in the object. + * @returns {boolean} Returns true if the object or value matches the "simple match" criteria with the prefix, otherwise false. + */ +function _matchPieceIsSimple(objVal, prefix) { + if ($check.object(objVal)) { + let isSimple = false; + for (const objKey of Object.keys(objVal)) { + if (!objKey.startsWith('$')) { + if (!objKey.startsWith(prefix)) { + return false; + } else { + isSimple = + isSimple || _matchPieceIsSimple(objVal[objKey], prefix); + } + } else { + isSimple = + isSimple || _matchPieceIsSimple(objVal[objKey], prefix); + } + } + return isSimple; + } else if ($check.array(objVal)) { + for (const obj of objVal) { + if (!_matchPieceIsSimple(obj, prefix)) { + return false; + } + } + return true; + } else if ($check.string(objVal)) { + return objVal.startsWith('$') ? objVal.startsWith('$' + prefix) : true; + } else { + return true; + } +} + +/** + * Determines if a given match stage in a pipeline is simple. + * @param {object} stage - The pipeline stage to evaluate. + * @param {string} prefix - The prefix to use when processing the match object. + * @returns {boolean} Returns true if the match stage is simple, false otherwise. + */ +function _matchIsSimple(stage, prefix) { + if (!stage) { + return false; + } + if (!stage['$match']) { + return false; + } + const match = stage['$match']; + return _matchPieceIsSimple(match, prefix + '.'); +} + +/** + * Adjusts the order and reference of the `$match` and `$project` stages in a MongoDB aggregation pipeline. + * Used when the where is further down the pipeline stack. + * Ensures that the pipeline maintains proper structure when specific `$match` and `$project` stages are present. + * @param {Array} mongoAggregate - The MongoDB aggregation pipeline to be modified. + * @returns {Array} The modified MongoDB aggregation pipeline with corrected stage order and references. + */ +function _fixEndWhere(mongoAggregate) { + const stages = _getStageNames(mongoAggregate); + + const firstStage = mongoAggregate[0]; + if (projectIsRoot(firstStage) && stages.includes('$match')) { + const projectRootField = Object.keys(firstStage['$project'])[0]; + let lastMatch = null; + let lastMatchIndex = -1; + for (let i = 1; i < mongoAggregate.length; i++) { + const stage = mongoAggregate[i]; + if (stage['$project']) { + break; + } + if (stage['$match'] && _matchIsSimple(stage, projectRootField)) { + lastMatch = stage; + lastMatchIndex = i; + break; + } + } + + if (lastMatch) { + mongoAggregate.splice(lastMatchIndex, 1); + mongoAggregate.unshift( + _changeReference(lastMatch, projectRootField) + ); + } + } + + return mongoAggregate; +} + /** * Optimizes a given MongoDB aggregation pipeline by repeatedly applying transformation rules * to remove redundant or unneeded operations for improved performance. @@ -391,6 +483,19 @@ function optimizeMongoAggregate(mongoAggregate, options = {}) { while (iteration > 0 && lastHash !== $hash(newAggregate)) { lastHash = $hash(newAggregate); + for (const pipelineStage of newAggregate) { + if ( + pipelineStage.$lookup && + pipelineStage.$lookup.pipeline && + pipelineStage.$lookup.pipeline.length > 0 + ) { + pipelineStage.$lookup.pipeline = optimizeMongoAggregate( + pipelineStage.$lookup.pipeline, + options + ); + } + } + for (const pattern of _patternsToFix) { newAggregate = _patternFixer( newAggregate, @@ -400,6 +505,7 @@ function optimizeMongoAggregate(mongoAggregate, options = {}) { ); } + newAggregate = _fixEndWhere(newAggregate); iteration--; } diff --git a/package.json b/package.json index 78846fbf..aee364c6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@synatic/noql", - "version": "4.2.7", + "version": "4.2.8", "description": "Convert SQL statements to mongo queries or aggregates", "main": "index.js", "files": [ diff --git a/test/optimizer.js b/test/optimizer.js index 16aabc79..86760580 100644 --- a/test/optimizer.js +++ b/test/optimizer.js @@ -1499,197 +1499,1079 @@ limit 501`; optimized, [ { - "$match": { - "$and": [ + $match: { + $and: [ { - "$and": [ + $and: [ { - "$and": [ + $and: [ { - "$and": [ + $and: [ { - "$and": [ + $and: [ { - "AccountType": { - "$eq": "EXPENSE" - } + AccountType: + { + $eq: 'EXPENSE', + }, }, { - "$expr": { - "$eq": [ + $expr: { + $eq: [ { - "$convert": { - "input": "$Year", - "to": "decimal" - } + $convert: + { + input: '$Year', + to: 'decimal', + }, }, { - "$convert": { - "input": 2024, - "to": "decimal" - } - } - ] - } - } - ] + $convert: + { + input: 2024, + to: 'decimal', + }, + }, + ], + }, + }, + ], }, { - "$nor": [ + $nor: [ { - "$expr": { - "$eq": [ + $expr: { + $eq: [ { - "$add": [ + $add: [ { - "$indexOfCP": [ - { - "$switch": { - "branches": [ + $indexOfCP: + [ + { + $switch: { - "case": { - "$ne": [ - "$AccountName", - null - ] - }, - "then": "$AccountName" - } - ], - "default": { - "$literal": "" - } - } - }, - { - "$literal": "Unrealized Currency" - } - ] + branches: + [ + { + case: { + $ne: [ + '$AccountName', + null, + ], + }, + then: '$AccountName', + }, + ], + default: + { + $literal: + '', + }, + }, + }, + { + $literal: + 'Unrealized Currency', + }, + ], }, - 1 - ] + 1, + ], }, - 1 - ] - } - } - ] - } - ] + 1, + ], + }, + }, + ], + }, + ], }, { - "$nor": [ + $nor: [ { - "$expr": { - "$eq": [ + $expr: { + $eq: [ { - "$add": [ + $add: [ { - "$indexOfCP": [ - { - "$switch": { - "branches": [ + $indexOfCP: + [ + { + $switch: { - "case": { - "$ne": [ - "$AccountName", - null - ] - }, - "then": "$AccountName" - } - ], - "default": { - "$literal": "" - } - } - }, - { - "$literal": "EE: Leave Pay" - } - ] + branches: + [ + { + case: { + $ne: [ + '$AccountName', + null, + ], + }, + then: '$AccountName', + }, + ], + default: + { + $literal: + '', + }, + }, + }, + { + $literal: + 'EE: Leave Pay', + }, + ], }, - 1 - ] + 1, + ], }, - 1 - ] - } - } - ] - } - ] + 1, + ], + }, + }, + ], + }, + ], }, { - "$nor": [ + $nor: [ { - "$expr": { - "$eq": [ + $expr: { + $eq: [ { - "$add": [ + $add: [ { - "$indexOfCP": [ - { - "$switch": { - "branches": [ + $indexOfCP: + [ + { + $switch: { - "case": { - "$ne": [ - "$AccountName", - null - ] - }, - "then": "$AccountName" - } - ], - "default": { - "$literal": "" - } - } - }, - { - "$literal": "Inter-company" - } - ] + branches: + [ + { + case: { + $ne: [ + '$AccountName', + null, + ], + }, + then: '$AccountName', + }, + ], + default: + { + $literal: + '', + }, + }, + }, + { + $literal: + 'Inter-company', + }, + ], }, - 1 - ] + 1, + ], }, - 1 - ] - } - } - ] - } - ] + 1, + ], + }, + }, + ], + }, + ], + }, + { + Year: { + $gte: 2022, + }, + }, + ], + }, + }, + { + $group: { + _id: {}, + a0: { + $sum: { + $convert: { + input: '$ValueUSD', + to: 'decimal', + }, + }, + }, + }, + }, + { + $project: { + _id: 0, + a0: '$a0', + }, + }, + ], + 'did not optimize' + ); + }); + + it('should optimize aggregates recursively', function () { + const sql = ` + select t2.name from table1 t1 inner join (select _table2.id2 from ( + select "id2","val" + from "Table2" "_Table" + ) _table2 where _table2.val>3) t2 on t1.id1=t2.id2`; + const aggr = SQLParser.parseSQL(sql); + // console.log(JSON.stringify(aggr.pipeline, null, 4)); + const optimized = optimizer.optimizeMongoAggregate( + aggr.pipeline, + {} + ); + assert.deepStrictEqual( + optimized, + [ + { + $project: { + t1: '$$ROOT', + }, + }, + { + $lookup: { + from: 'Table2', + as: 't2', + let: { + t1_id1: '$t1.id1', + }, + pipeline: [ + { + $project: { + id2: '$id2', + val: '$val', + }, + }, + { + $match: { + val: { + $gt: 3, + }, + }, + }, + { + $project: { + id2: '$id2', + }, + }, + { + $match: { + $expr: { + $eq: ['$$t1_id1', '$id2'], + }, + }, + }, + ], + }, + }, + { + $match: { + $expr: { + $gt: [ + { + $size: '$t2', + }, + 0, + ], + }, + }, + }, + { + $project: { + name: '$t2.name', + }, + }, + ], + 'did not optimize' + ); + }); + + it('should optimize with simple join1', function () { + const sql = ` + select t2.name from (select * from table1 where x=1) t1 inner join (select _table2.id2 from ( + select "id2","val" + from "Table2" "_Table" + ) _table2 where _table2.val>3) t2 on t1.id1=t2.id2`; + const aggr = SQLParser.parseSQL(sql); + // console.log(JSON.stringify(aggr.pipeline, null, 4)); + const optimized = optimizer.optimizeMongoAggregate( + aggr.pipeline, + {} + ); + assert.deepStrictEqual( + optimized, + [ + { + $match: { + x: { + $eq: 1, + }, + }, + }, + { + $project: { + t1: '$$ROOT', + }, + }, + { + $lookup: { + from: 'Table2', + as: 't2', + let: { + t1_id1: '$t1.id1', + }, + pipeline: [ + { + $project: { + id2: '$id2', + val: '$val', + }, + }, + { + $match: { + val: { + $gt: 3, + }, + }, + }, + { + $project: { + id2: '$id2', + }, + }, + { + $match: { + $expr: { + $eq: ['$$t1_id1', '$id2'], + }, + }, + }, + ], + }, + }, + { + $match: { + $expr: { + $gt: [ + { + $size: '$t2', + }, + 0, + ], + }, + }, + }, + { + $project: { + name: '$t2.name', + }, + }, + ], + 'did not optimize' + ); + }); + + it('should optimize where at end', function () { + const sql = ` + select t2.name from table1 t1 inner join (select _table2.id2 from ( + select "id2","val" + from "Table2" "_Table" +) _table2 where _table2.val>3) t2 on t1.id1=t2.id2 where t1.val2>100 and to_int(t1.val2)<200`; + const aggr = SQLParser.parseSQL(sql); + // console.log(JSON.stringify(aggr.pipeline, null, 4)); + const optimized = optimizer.optimizeMongoAggregate( + aggr.pipeline, + {} + ); + assert.deepStrictEqual( + optimized, + [ + { + $match: { + $and: [ + { + val2: { + $gt: 100, + }, + }, + { + $expr: { + $lt: [ + { + $toInt: '$val2', + }, + 200, + ], + }, + }, + ], + }, + }, + { + $project: { + t1: '$$ROOT', + }, + }, + { + $lookup: { + from: 'Table2', + as: 't2', + let: { + t1_id1: '$t1.id1', + }, + pipeline: [ + { + $project: { + id2: '$id2', + val: '$val', + }, + }, + { + $match: { + val: { + $gt: 3, + }, + }, + }, + { + $project: { + id2: '$id2', + }, }, { - "Year": { - "$gte": 2022 - } - } - ] - } - }, - { - "$group": { - "_id": {}, - "a0": { - "$sum": { - "$convert": { - "input": "$ValueUSD", - "to": "decimal" - } - } - } - } - }, - { - "$project": { - "_id": 0, - "a0": "$a0" - } - } + $match: { + $expr: { + $eq: ['$$t1_id1', '$id2'], + }, + }, + }, + ], + }, + }, + { + $match: { + $expr: { + $gt: [ + { + $size: '$t2', + }, + 0, + ], + }, + }, + }, + { + $project: { + name: '$t2.name', + }, + }, + ], + 'did not optimize' + ); + }); + + it('should optimize where at end 2', function () { + const sql = ` + select t2.name from table1 t1 inner join "Table2" t2 on t1.id1=t2.id2 where t1.val2>100 and to_int(t1.val2)<200`; + const aggr = SQLParser.parseSQL(sql); + // console.log(JSON.stringify(aggr.pipeline, null, 4)); + const optimized = optimizer.optimizeMongoAggregate( + aggr.pipeline, + {} + ); + assert.deepStrictEqual( + optimized, + [ + { + $match: { + $and: [ + { + val2: { + $gt: 100, + }, + }, + { + $expr: { + $lt: [ + { + $toInt: '$val2', + }, + 200, + ], + }, + }, + ], + }, + }, + { + $project: { + t1: '$$ROOT', + }, + }, + { + $lookup: { + from: 'Table2', + as: 't2', + localField: 't1.id1', + foreignField: 'id2', + }, + }, + { + $match: { + $expr: { + $gt: [ + { + $size: '$t2', + }, + 0, + ], + }, + }, + }, + { + $project: { + name: '$t2.name', + }, + }, + ], + 'did not optimize' + ); + }); + + it('should not optimize when cross table', function () { + const sql = ` + select t2.name from table1 t1 inner join "Table2" t2 on t1.id1=t2.id2 where t1.val2>100 and to_int(t2.val2)<200`; + const aggr = SQLParser.parseSQL(sql); + // console.log(JSON.stringify(aggr.pipeline, null, 4)); + const optimized = optimizer.optimizeMongoAggregate( + aggr.pipeline, + {} + ); + assert.deepStrictEqual( + optimized, + [ + { + $project: { + t1: '$$ROOT', + }, + }, + { + $lookup: { + from: 'Table2', + as: 't2', + localField: 't1.id1', + foreignField: 'id2', + }, + }, + { + $match: { + $expr: { + $gt: [ + { + $size: '$t2', + }, + 0, + ], + }, + }, + }, + { + $match: { + $and: [ + { + 't1.val2': { + $gt: 100, + }, + }, + { + $expr: { + $lt: [ + { + $toInt: '$t2.val2', + }, + 200, + ], + }, + }, + ], + }, + }, + { + $project: { + name: '$t2.name', + }, + }, + ], + 'did not optimize' + ); + }); + + it('should optimize when match isnt at end in lookup', function () { + const pipeline = [ + { + $project: { + cust: '$$ROOT', + }, + }, + { + $lookup: { + from: 'buffer664390eeefc55bc633d3189b', + as: 'empsCsr', + let: {}, + pipeline: [ + { + $project: { + customer: '$$ROOT', + }, + }, + { + $lookup: { + from: 'buffer664b503510da713033caa13e', + as: 'empCsr', + let: {}, + pipeline: [ + { + $match: { + $expr: { + $eq: [ + '$Id', + '0056g000003H71UAAS', + ], + }, + }, + }, + ], + }, + }, + { + $set: { + empCsr: { + $first: '$empCsr', + }, + }, + }, + { + $match: { + empCsr: { + $ne: null, + }, + }, + }, + { + $lookup: { + from: 'buffer66420ac72c5748d51acd2808', + as: 'certiAgents', + localField: 'empCsr.Id', + foreignField: 'Id', + }, + }, + { + $set: { + certiAgents: { + $first: '$certiAgents', + }, + }, + }, + { + $match: { + certiAgents: { + $ne: null, + }, + }, + }, + { + $match: { + 'customer.Id': { + $eq: '0016g00000RwgtpAAB', + }, + }, + }, + { + $group: { + _id: { + Id: '$empCsr.Id', + CUST: '$cust.Id', + userId: '$certiAgents.userId', + }, + }, + }, + { + $project: { + Id: '$_id.Id', + CUST: '$_id.CUST', + userId: '$_id.userId', + _id: 0, + }, + }, + { + $match: { + $expr: { + $eq: ['$Id', '0056g000003H71UAAS'], + }, + }, + }, + ], + }, + }, + { + $lookup: { + from: 'buffer664390eeefc55bc633d3189b', + as: 'empsProd', + let: {}, + pipeline: [ + { + $project: { + customer: '$$ROOT', + }, + }, + { + $lookup: { + from: 'buffer664b503510da713033caa13e', + as: 'empProd', + let: {}, + pipeline: [ + { + $match: { + $expr: { + $eq: [ + '$Id', + '0056g000005UjGgAAK', + ], + }, + }, + }, + ], + }, + }, + { + $set: { + empProd: { + $first: '$empProd', + }, + }, + }, + { + $match: { + empProd: { + $ne: null, + }, + }, + }, + { + $lookup: { + from: 'buffer66420ac72c5748d51acd2808', + as: 'certiAgents', + localField: 'empProd.Id', + foreignField: 'Id', + }, + }, + { + $set: { + certiAgents: { + $first: '$certiAgents', + }, + }, + }, + { + $match: { + certiAgents: { + $ne: null, + }, + }, + }, + { + $match: { + 'customer.Id': { + $eq: '0016g00000RwgtpAAB', + }, + }, + }, + { + $group: { + _id: { + Id: '$empProd.Id', + CUST: '$cust.Id', + userId: '$certiAgents.userId', + }, + }, + }, + { + $project: { + Id: '$_id.Id', + CUST: '$_id.CUST', + userId: '$_id.userId', + _id: 0, + }, + }, + { + $match: { + $expr: { + $eq: ['$Id', '0056g000005UjGgAAK'], + }, + }, + }, + ], + }, + }, + { + $match: { + 'cust.Id': { + $eq: '0016g00000RwgtpAAB', + }, + }, + }, + { + $project: { + agentIds: { + $concatArrays: [ + '$empsCsr.userId', + '$empsProd.userId', + ], + }, + }, + }, + { + $limit: 1, + }, + ]; + // console.log(JSON.stringify(aggr.pipeline, null, 4)); + const optimized = optimizer.optimizeMongoAggregate(pipeline, {}); + assert.deepStrictEqual( + optimized, + [ + { + $match: { + Id: { + $eq: '0016g00000RwgtpAAB', + }, + }, + }, + { + $project: { + cust: '$$ROOT', + }, + }, + { + $lookup: { + from: 'buffer664390eeefc55bc633d3189b', + as: 'empsCsr', + let: {}, + pipeline: [ + { + $match: { + Id: { + $eq: '0016g00000RwgtpAAB', + }, + }, + }, + { + $project: { + customer: '$$ROOT', + }, + }, + { + $lookup: { + from: 'buffer664b503510da713033caa13e', + as: 'empCsr', + let: {}, + pipeline: [ + { + $match: { + $expr: { + $eq: [ + '$Id', + '0056g000003H71UAAS', + ], + }, + }, + }, + ], + }, + }, + { + $set: { + empCsr: { + $first: '$empCsr', + }, + }, + }, + { + $match: { + empCsr: { + $ne: null, + }, + }, + }, + { + $lookup: { + from: 'buffer66420ac72c5748d51acd2808', + as: 'certiAgents', + localField: 'empCsr.Id', + foreignField: 'Id', + }, + }, + { + $set: { + certiAgents: { + $first: '$certiAgents', + }, + }, + }, + { + $match: { + certiAgents: { + $ne: null, + }, + }, + }, + { + $group: { + _id: { + Id: '$empCsr.Id', + CUST: '$cust.Id', + userId: '$certiAgents.userId', + }, + }, + }, + { + $project: { + Id: '$_id.Id', + CUST: '$_id.CUST', + userId: '$_id.userId', + _id: 0, + }, + }, + { + $match: { + $expr: { + $eq: ['$Id', '0056g000003H71UAAS'], + }, + }, + }, + ], + }, + }, + { + $lookup: { + from: 'buffer664390eeefc55bc633d3189b', + as: 'empsProd', + let: {}, + pipeline: [ + { + $match: { + Id: { + $eq: '0016g00000RwgtpAAB', + }, + }, + }, + { + $project: { + customer: '$$ROOT', + }, + }, + { + $lookup: { + from: 'buffer664b503510da713033caa13e', + as: 'empProd', + let: {}, + pipeline: [ + { + $match: { + $expr: { + $eq: [ + '$Id', + '0056g000005UjGgAAK', + ], + }, + }, + }, + ], + }, + }, + { + $set: { + empProd: { + $first: '$empProd', + }, + }, + }, + { + $match: { + empProd: { + $ne: null, + }, + }, + }, + { + $lookup: { + from: 'buffer66420ac72c5748d51acd2808', + as: 'certiAgents', + localField: 'empProd.Id', + foreignField: 'Id', + }, + }, + { + $set: { + certiAgents: { + $first: '$certiAgents', + }, + }, + }, + { + $match: { + certiAgents: { + $ne: null, + }, + }, + }, + { + $group: { + _id: { + Id: '$empProd.Id', + CUST: '$cust.Id', + userId: '$certiAgents.userId', + }, + }, + }, + { + $project: { + Id: '$_id.Id', + CUST: '$_id.CUST', + userId: '$_id.userId', + _id: 0, + }, + }, + { + $match: { + $expr: { + $eq: ['$Id', '0056g000005UjGgAAK'], + }, + }, + }, + ], + }, + }, + { + $project: { + agentIds: { + $concatArrays: [ + '$empsCsr.userId', + '$empsProd.userId', + ], + }, + }, + }, + { + $limit: 1, + }, ], 'did not optimize' );