From 0646139a5b8afbc0df7cb98fa90847189a599b44 Mon Sep 17 00:00:00 2001 From: Drew Davis Date: Wed, 15 Oct 2025 10:35:42 -0400 Subject: [PATCH] feat: Optimize and fix filtering on toStartOfX primary key expressions --- .changeset/fluffy-mails-sparkle.md | 6 + .../src/__tests__/renderChartConfig.test.ts | 223 +++++++++++++++++- .../common-utils/src/__tests__/utils.test.ts | 200 ++++++++++++++++ .../common-utils/src/renderChartConfig.ts | 59 ++++- packages/common-utils/src/utils.ts | 79 +++++++ 5 files changed, 550 insertions(+), 17 deletions(-) create mode 100644 .changeset/fluffy-mails-sparkle.md diff --git a/.changeset/fluffy-mails-sparkle.md b/.changeset/fluffy-mails-sparkle.md new file mode 100644 index 000000000..8a01b8087 --- /dev/null +++ b/.changeset/fluffy-mails-sparkle.md @@ -0,0 +1,6 @@ +--- +"@hyperdx/common-utils": patch +"@hyperdx/app": patch +--- + +feat: Optimize and fix filtering on toStartOfX primary key expressions diff --git a/packages/common-utils/src/__tests__/renderChartConfig.test.ts b/packages/common-utils/src/__tests__/renderChartConfig.test.ts index da3593ff1..8f7e7e8d4 100644 --- a/packages/common-utils/src/__tests__/renderChartConfig.test.ts +++ b/packages/common-utils/src/__tests__/renderChartConfig.test.ts @@ -1,4 +1,4 @@ -import { chSql, parameterizedQueryToSql } from '@/clickhouse'; +import { chSql, ColumnMeta, parameterizedQueryToSql } from '@/clickhouse'; import { Metadata } from '@/metadata'; import { ChartConfigWithOptDateRange, @@ -6,10 +6,10 @@ import { MetricsDataType, } from '@/types'; -import { renderChartConfig } from '../renderChartConfig'; +import { renderChartConfig, timeFilterExpr } from '../renderChartConfig'; describe('renderChartConfig', () => { - let mockMetadata: Metadata; + let mockMetadata: jest.Mocked; beforeEach(() => { mockMetadata = { @@ -19,7 +19,10 @@ describe('renderChartConfig', () => { ]), getMaterializedColumnsLookupTable: jest.fn().mockResolvedValue(null), getColumn: jest.fn().mockResolvedValue({ type: 'DateTime' }), - } as unknown as Metadata; + getTableMetadata: jest + .fn() + .mockResolvedValue({ primary_key: 'timestamp' }), + } as unknown as jest.Mocked; }); const gaugeConfiguration: ChartConfigWithOptDateRange = { @@ -630,4 +633,216 @@ describe('renderChartConfig', () => { expect(actual).toMatchSnapshot(); }); }); + + describe('timeFilterExpr', () => { + type TimeFilterExprTestCase = { + timestampValueExpression: string; + dateRangeStartInclusive?: boolean; + dateRangeEndInclusive?: boolean; + dateRange: [Date, Date]; + includedDataInterval?: string; + expected: string; + description: string; + tableName?: string; + databaseName?: string; + primaryKey?: string; + }; + + const testCases: TimeFilterExprTestCase[] = [ + { + description: 'with basic timestampValueExpression', + timestampValueExpression: 'timestamp', + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + expected: `(timestamp >= fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()}) AND timestamp <= fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()}))`, + }, + { + description: 'with dateRangeEndInclusive=false', + timestampValueExpression: 'timestamp', + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + dateRangeEndInclusive: false, + expected: `(timestamp >= fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()}) AND timestamp < fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()}))`, + }, + { + description: 'with dateRangeStartInclusive=false', + timestampValueExpression: 'timestamp', + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + dateRangeStartInclusive: false, + expected: `(timestamp > fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()}) AND timestamp <= fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()}))`, + }, + { + description: 'with includedDataInterval', + timestampValueExpression: 'timestamp', + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + includedDataInterval: '1 WEEK', + expected: `(timestamp >= toStartOfInterval(fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()}), INTERVAL 1 WEEK) - INTERVAL 1 WEEK AND timestamp <= toStartOfInterval(fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()}), INTERVAL 1 WEEK) + INTERVAL 1 WEEK)`, + }, + { + description: 'with date type timestampValueExpression', + timestampValueExpression: 'date', + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + expected: `(date >= toDate(fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()})) AND date <= toDate(fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()})))`, + }, + { + description: 'with multiple timestampValueExpression parts', + timestampValueExpression: 'timestamp, date', + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + expected: `(timestamp >= fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()}) AND timestamp <= fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()}))AND(date >= toDate(fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()})) AND date <= toDate(fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()})))`, + }, + { + description: 'with toStartOfDay() in timestampExpr', + timestampValueExpression: 'toStartOfDay(timestamp)', + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + expected: `(toStartOfDay(timestamp) >= toStartOfDay(fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()})) AND toStartOfDay(timestamp) <= toStartOfDay(fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()})))`, + }, + { + description: 'with toStartOfDay () in timestampExpr', + timestampValueExpression: 'toStartOfDay (timestamp)', + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + expected: `(toStartOfDay (timestamp) >= toStartOfDay(fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()})) AND toStartOfDay (timestamp) <= toStartOfDay(fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()})))`, + }, + { + description: 'with toStartOfInterval() in timestampExpr', + timestampValueExpression: + 'toStartOfInterval(timestamp, INTERVAL 12 MINUTE)', + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + expected: `(toStartOfInterval(timestamp, INTERVAL 12 MINUTE) >= toStartOfInterval(fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()}), INTERVAL 12 MINUTE) AND toStartOfInterval(timestamp, INTERVAL 12 MINUTE) <= toStartOfInterval(fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()}), INTERVAL 12 MINUTE))`, + }, + { + description: + 'with toStartOfInterval() with lowercase interval in timestampExpr', + timestampValueExpression: + 'toStartOfInterval(timestamp, interval 1 minute)', + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + expected: `(toStartOfInterval(timestamp, interval 1 minute) >= toStartOfInterval(fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()}), interval 1 minute) AND toStartOfInterval(timestamp, interval 1 minute) <= toStartOfInterval(fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()}), interval 1 minute))`, + }, + { + description: 'with toStartOfInterval() with timezone and offset', + timestampValueExpression: `toStartOfInterval(timestamp, INTERVAL 1 MINUTE, toDateTime('2023-01-01 14:35:30'), 'America/New_York')`, + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + expected: `(toStartOfInterval(timestamp, INTERVAL 1 MINUTE, toDateTime('2023-01-01 14:35:30'), 'America/New_York') >= toStartOfInterval(fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()}), INTERVAL 1 MINUTE, toDateTime('2023-01-01 14:35:30'), 'America/New_York') AND toStartOfInterval(timestamp, INTERVAL 1 MINUTE, toDateTime('2023-01-01 14:35:30'), 'America/New_York') <= toStartOfInterval(fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()}), INTERVAL 1 MINUTE, toDateTime('2023-01-01 14:35:30'), 'America/New_York'))`, + }, + { + description: 'with nonstandard spacing', + timestampValueExpression: ` toStartOfInterval ( timestamp , INTERVAL 1 MINUTE , toDateTime ( '2023-01-01 14:35:30' ), 'America/New_York' ) `, + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + expected: `(toStartOfInterval ( timestamp , INTERVAL 1 MINUTE , toDateTime ( '2023-01-01 14:35:30' ), 'America/New_York' ) >= toStartOfInterval(fromUnixTimestamp64Milli(${new Date('2025-02-12 00:12:34Z').getTime()}), INTERVAL 1 MINUTE, toDateTime ( '2023-01-01 14:35:30' ), 'America/New_York') AND toStartOfInterval ( timestamp , INTERVAL 1 MINUTE , toDateTime ( '2023-01-01 14:35:30' ), 'America/New_York' ) <= toStartOfInterval(fromUnixTimestamp64Milli(${new Date('2025-02-14 00:12:34Z').getTime()}), INTERVAL 1 MINUTE, toDateTime ( '2023-01-01 14:35:30' ), 'America/New_York'))`, + }, + { + description: 'with optimizable timestampValueExpression', + timestampValueExpression: `timestamp`, + primaryKey: + "toStartOfMinute(timestamp), ServiceName, ResourceAttributes['timestamp'], timestamp", + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + expected: `(timestamp >= fromUnixTimestamp64Milli(1739319154000) AND timestamp <= fromUnixTimestamp64Milli(1739491954000))AND(toStartOfMinute(timestamp) >= toStartOfMinute(fromUnixTimestamp64Milli(1739319154000)) AND toStartOfMinute(timestamp) <= toStartOfMinute(fromUnixTimestamp64Milli(1739491954000)))`, + }, + { + description: 'with synthetic timestamp value expression for CTE', + timestampValueExpression: `__hdx_time_bucket`, + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + databaseName: '', + tableName: 'Bucketed', + primaryKey: + "toStartOfMinute(timestamp), ServiceName, ResourceAttributes['timestamp'], timestamp", + expected: `(__hdx_time_bucket >= fromUnixTimestamp64Milli(1739319154000) AND __hdx_time_bucket <= fromUnixTimestamp64Milli(1739491954000))`, + }, + + { + description: 'with toStartOfMinute in timestampValueExpression', + timestampValueExpression: `toStartOfMinute(timestamp)`, + dateRange: [ + new Date('2025-02-12 00:12:34Z'), + new Date('2025-02-14 00:12:34Z'), + ], + primaryKey: + "toStartOfMinute(timestamp), ServiceName, ResourceAttributes['timestamp'], timestamp", + expected: `(toStartOfMinute(timestamp) >= toStartOfMinute(fromUnixTimestamp64Milli(1739319154000)) AND toStartOfMinute(timestamp) <= toStartOfMinute(fromUnixTimestamp64Milli(1739491954000)))`, + }, + ]; + + beforeEach(() => { + mockMetadata.getColumn.mockImplementation(async ({ column }) => + column === 'date' + ? ({ type: 'Date' } as ColumnMeta) + : ({ type: 'DateTime' } as ColumnMeta), + ); + }); + + it.each(testCases)( + 'should generate a time filter expression $description', + async ({ + timestampValueExpression, + dateRangeEndInclusive = true, + dateRangeStartInclusive = true, + dateRange, + expected, + includedDataInterval, + tableName = 'target_table', + databaseName = 'default', + primaryKey, + }) => { + if (primaryKey) { + mockMetadata.getTableMetadata.mockResolvedValue({ + primary_key: primaryKey, + } as any); + } + + const actual = await timeFilterExpr({ + timestampValueExpression, + dateRangeEndInclusive, + dateRangeStartInclusive, + dateRange, + connectionId: 'test-connection', + databaseName, + tableName, + metadata: mockMetadata, + includedDataInterval, + }); + + const actualSql = parameterizedQueryToSql(actual); + expect(actualSql).toBe(expected); + }, + ); + }); }); diff --git a/packages/common-utils/src/__tests__/utils.test.ts b/packages/common-utils/src/__tests__/utils.test.ts index 90482d9b7..bc0b1119a 100644 --- a/packages/common-utils/src/__tests__/utils.test.ts +++ b/packages/common-utils/src/__tests__/utils.test.ts @@ -16,6 +16,8 @@ import { isFirstOrderByAscending, isJsonExpression, isTimestampExpressionInFirstOrderBy, + optimizeTimestampValueExpression, + parseToStartOfFunction, replaceJsonExpressions, splitAndTrimCSV, splitAndTrimWithBracket, @@ -1115,4 +1117,202 @@ describe('utils', () => { expect(actual).toEqual(expected); }); }); + + describe('parseToStartOfFunction', () => { + it.each([ + { + expr: 'toStartOfDay(a.date)', + expected: { + function: 'toStartOfDay', + columnArgument: 'a.date', + formattedRemainingArgs: '', + }, + }, + { + expr: "toStartOfMinute(toDate(ResourceAttributes['timestamp']))", + expected: { + function: 'toStartOfMinute', + columnArgument: "toDate(ResourceAttributes['timestamp'])", + formattedRemainingArgs: '', + }, + }, + { + expr: "toStartOfMonth(timestamp, 'America/Los_Angeles')", + expected: { + function: 'toStartOfMonth', + columnArgument: 'timestamp', + formattedRemainingArgs: ", 'America/Los_Angeles'", + }, + }, + { + expr: 'toStartOfMonth(`time stamp`)', + expected: { + function: 'toStartOfMonth', + columnArgument: '`time stamp`', + formattedRemainingArgs: '', + }, + }, + { + expr: 'toStartOfInterval(timestamp, INTERVAL 1 DAY)', + expected: { + function: 'toStartOfInterval', + columnArgument: 'timestamp', + formattedRemainingArgs: ', INTERVAL 1 DAY', + }, + }, + { + expr: "toStartOfInterval(timestamp, INTERVAL 1 DAY, toDateTime('2025-01-01'), 'America/Los_Angeles')", + expected: { + function: 'toStartOfInterval', + columnArgument: 'timestamp', + formattedRemainingArgs: + ", INTERVAL 1 DAY, toDateTime('2025-01-01'), 'America/Los_Angeles'", + }, + }, + { + expr: " toStartOfInterval ( timestamp, INTERVAL 10 DAY, toDateTime('2025-01-01' ), 'America/Los_Angeles' ) ", + expected: { + function: 'toStartOfInterval', + columnArgument: 'timestamp', + formattedRemainingArgs: + ", INTERVAL 10 DAY, toDateTime('2025-01-01' ), 'America/Los_Angeles'", + }, + }, + { + expr: 'timestamp', + expected: undefined, + }, + { + expr: 'toDate(timestamp)', + expected: undefined, + }, + { + expr: 'toDate(toStartOfDay(timestamp))', + expected: undefined, + }, + { + expr: 'toStartOfDay(timestamp), toDate(timestamp)', + expected: undefined, + }, + { + expr: 'toDate(timestamp), toStartOfDay(timestamp)', + expected: undefined, + }, + { + expr: '', + expected: undefined, + }, + { + expr: '(toStartOfDay(timestamp))', + expected: undefined, + }, + { + expr: 'toStartOfDay(', + expected: undefined, + }, + ])('Should parse $expr', ({ expr, expected }) => { + expect(parseToStartOfFunction(expr)).toEqual(expected); + }); + }); + + describe('optimizeTimestampValueExpression', () => { + const testCases = [ + { + timestampValueExpression: 'Timestamp', + primaryKey: 'Timestamp', + expected: 'Timestamp', + }, + { + timestampValueExpression: 'Timestamp', + primaryKey: undefined, + expected: 'Timestamp', + }, + { + timestampValueExpression: 'Timestamp', + primaryKey: '', + expected: 'Timestamp', + }, + { + // Traces Table + timestampValueExpression: 'Timestamp', + primaryKey: 'ServiceName, SpanName, toDateTime(Timestamp)', + expected: 'Timestamp', + }, + { + // Optimized Traces Table + timestampValueExpression: 'Timestamp', + primaryKey: + 'toStartOfHour(Timestamp), ServiceName, SpanName, toDateTime(Timestamp)', + expected: 'Timestamp, toStartOfHour(Timestamp)', + }, + { + // Unsupported for now as it's not a great primary key, want to just + // use default behavior for this + timestampValueExpression: 'Timestamp', + primaryKey: 'toDateTime(Timestamp), ServiceName, SpanName, Timestamp', + expected: 'Timestamp', + }, + { + // Inverted primary key order, we should not try to optimize this + timestampValueExpression: 'Timestamp', + primaryKey: + 'ServiceName, toDateTime(Timestamp), SeverityText, toStartOfHour(Timestamp)', + expected: 'Timestamp', + }, + { + timestampValueExpression: 'Timestamp', + primaryKey: 'toStartOfHour(Timestamp), other_column, Timestamp', + expected: 'Timestamp, toStartOfHour(Timestamp)', + }, + { + // When the user has already manually configured an optimized timestamp value expression + timestampValueExpression: ' toStartOfHour(Timestamp), Timestamp', + primaryKey: 'toStartOfHour(Timestamp), other_column, Timestamp', + expected: ' toStartOfHour(Timestamp), Timestamp', + }, + { + timestampValueExpression: 'Timestamp', + primaryKey: + 'toStartOfInterval(Timestamp, INTERVAL 1 HOUR), other_column, Timestamp', + expected: 'Timestamp, toStartOfInterval(Timestamp, INTERVAL 1 HOUR)', + }, + { + // test variation of toUnixTimestamp + timestampValueExpression: 'Timestamp', + primaryKey: + 'toStartOfMinute(Timestamp), user_id, status, toUnixTimestamp64Nano(Timestamp)', + expected: 'Timestamp, toStartOfMinute(Timestamp)', + }, + { + // TimestampTime is not matched since it is not in the timestampValueExpression + timestampValueExpression: 'Timestamp', + primaryKey: + 'toStartOfMinute(TimestampTime), user_id, status, Timestamp', + expected: 'Timestamp', + }, + { + timestampValueExpression: 'Timestamp', + primaryKey: + '909]`23`9082eh[928e1p92e81hp92, d81p92d817h1p-93287dh129d7812hgpd91832h, toStartOfMinute(Timestamp), other_column, Timestamp', + expected: 'Timestamp, toStartOfMinute(Timestamp)', + }, + { + timestampValueExpression: '`Time stamp`', + primaryKey: 'toStartOfMinute(`Time stamp`), other_column, `Time stamp`', + expected: '`Time stamp`, toStartOfMinute(`Time stamp`)', + }, + ] as const; + + it.each(testCases)( + 'should return optimized expression $expected for original expression $timestampValueExpression and primary key $primaryKey', + ({ timestampValueExpression, primaryKey, expected }) => { + const actual = optimizeTimestampValueExpression( + timestampValueExpression, + primaryKey, + ); + + expect(actual).toBe(expected); + }, + ); + }); }); diff --git a/packages/common-utils/src/renderChartConfig.ts b/packages/common-utils/src/renderChartConfig.ts index 0c89096d8..5b19574d8 100644 --- a/packages/common-utils/src/renderChartConfig.ts +++ b/packages/common-utils/src/renderChartConfig.ts @@ -42,6 +42,8 @@ import { convertDateRangeToGranularityString, convertGranularityToSeconds, getFirstTimestampValueExpression, + optimizeTimestampValueExpression, + parseToStartOfFunction, splitAndTrimWithBracket, } from '@/utils'; @@ -476,7 +478,7 @@ function timeBucketExpr({ }}\``; } -async function timeFilterExpr({ +export async function timeFilterExpr({ connectionId, databaseName, dateRange, @@ -499,27 +501,54 @@ async function timeFilterExpr({ timestampValueExpression: string; with?: ChartConfigWithDateRange['with']; }) { - const valueExpressions = splitAndTrimWithBracket(timestampValueExpression); const startTime = dateRange[0].getTime(); const endTime = dateRange[1].getTime(); + let optimizedTimestampValueExpression = timestampValueExpression; + try { + // Not all of these will be available when selecting from a CTE + if (databaseName && tableName && connectionId) { + const { primary_key } = await metadata.getTableMetadata({ + databaseName, + tableName, + connectionId, + }); + optimizedTimestampValueExpression = optimizeTimestampValueExpression( + timestampValueExpression, + primary_key, + ); + } + } catch (e) { + console.log('Failed to optimize timestampValueExpression', e); + } + + const valueExpressions = splitAndTrimWithBracket( + optimizedTimestampValueExpression, + ); + const whereExprs = await Promise.all( valueExpressions.map(async expr => { const col = expr.trim(); - const columnMeta = withClauses?.length - ? null - : await metadata.getColumn({ - databaseName, - tableName, - column: col, - connectionId, - }); + + // If the expression includes a toStartOf...(...) function, the RHS of the + // timestamp comparison must also have the same function + const toStartOf = parseToStartOfFunction(col); + + const columnMeta = + withClauses?.length || toStartOf + ? null + : await metadata.getColumn({ + databaseName, + tableName, + column: col, + connectionId, + }); const unsafeTimestampValueExpression = { UNSAFE_RAW_SQL: col, }; - if (columnMeta == null && !withClauses?.length) { + if (columnMeta == null && !withClauses?.length && !toStartOf) { console.warn( `Column ${col} not found in ${databaseName}.${tableName} while inferring type for time filter`, ); @@ -527,11 +556,15 @@ async function timeFilterExpr({ const startTimeCond = includedDataInterval ? chSql`toStartOfInterval(fromUnixTimestamp64Milli(${{ Int64: startTime }}), INTERVAL ${includedDataInterval}) - INTERVAL ${includedDataInterval}` - : chSql`fromUnixTimestamp64Milli(${{ Int64: startTime }})`; + : toStartOf + ? chSql`${toStartOf.function}(fromUnixTimestamp64Milli(${{ Int64: startTime }})${toStartOf.formattedRemainingArgs})` + : chSql`fromUnixTimestamp64Milli(${{ Int64: startTime }})`; const endTimeCond = includedDataInterval ? chSql`toStartOfInterval(fromUnixTimestamp64Milli(${{ Int64: endTime }}), INTERVAL ${includedDataInterval}) + INTERVAL ${includedDataInterval}` - : chSql`fromUnixTimestamp64Milli(${{ Int64: endTime }})`; + : toStartOf + ? chSql`${toStartOf.function}(fromUnixTimestamp64Milli(${{ Int64: endTime }})${toStartOf.formattedRemainingArgs})` + : chSql`fromUnixTimestamp64Milli(${{ Int64: endTime }})`; // If it's a date type if (columnMeta?.type === 'Date') { diff --git a/packages/common-utils/src/utils.ts b/packages/common-utils/src/utils.ts index 29cc532d2..3ebccb31b 100644 --- a/packages/common-utils/src/utils.ts +++ b/packages/common-utils/src/utils.ts @@ -581,3 +581,82 @@ export const isFirstOrderByAscending = ( return !isDescending; }; + +/** + * Parses a single expression of the form + * `toStartOf(column[, timezone])` or `toStartOfInterval(column[, interval[, origin[, timezone]]])`. + * Returns undefined if the expression is not of this form. + */ +export function parseToStartOfFunction( + expr: string, +): + | { function: string; columnArgument: string; formattedRemainingArgs: string } + | undefined { + const parts = splitAndTrimWithBracket(expr); + if (parts.length !== 1) return undefined; + + const toStartOfMatches = expr.match(/(toStartOf\w+)\s*\(/); + + if (toStartOfMatches) { + const [toStartOfSubstring, toStartOfFunction] = toStartOfMatches; + + const argsStartIndex = + expr.indexOf(toStartOfSubstring) + toStartOfSubstring.length; + const argsEndIndex = expr.lastIndexOf(')'); + const args = splitAndTrimWithBracket( + expr.substring(argsStartIndex, argsEndIndex), + ); + + const columnArgument = args[0]; + if (columnArgument == null) { + console.error(`Failed to parse column argument from ${expr}`); + return undefined; + } + + const formattedRemainingArgs = + args.length > 1 ? `, ${args.slice(1).join(', ')}` : ''; + + return { + function: toStartOfFunction.trim(), + columnArgument, + formattedRemainingArgs, + }; + } +} + +/** + * Returns an optimized timestamp value expression for a table based on its timestampValueExpression and primary key. + * + * When a table has a sort key like `toStartOfMinute(timestamp), ..., timestamp`, it is more performant + * to filter by toStartOfMinute(timestamp) and timestamp, instead of just timestamp. + */ +export function optimizeTimestampValueExpression( + timestampValueExpression: string, + primaryKey: string | undefined, +) { + if (!primaryKey || !timestampValueExpression) return timestampValueExpression; + + const timestampValueExprs = [timestampValueExpression]; + const primaryKeyExprs = splitAndTrimWithBracket(primaryKey); + for (const primaryKeyExpr of primaryKeyExprs) { + const toStartOf = parseToStartOfFunction(primaryKeyExpr); + + if ( + primaryKeyExpr === timestampValueExpression.trim() || + (primaryKeyExpr.startsWith('toUnixTimestamp') && + primaryKeyExpr.includes(timestampValueExpression)) || + (primaryKeyExpr.startsWith('toDateTime') && + primaryKeyExpr.includes(timestampValueExpression)) + ) { + // We only want to add expressions that come before the timestampExpr in the primary key + break; + } else if ( + toStartOf && + toStartOf.columnArgument === timestampValueExpression.trim() + ) { + timestampValueExprs.push(primaryKeyExpr); + } + } + + return timestampValueExprs.join(', '); +}