Skip to content

Commit a35a477

Browse files
authored
feat(tesseract): Athena support (#9707)
1 parent a30dfea commit a35a477

File tree

12 files changed

+380
-34
lines changed

12 files changed

+380
-34
lines changed

.github/workflows/drivers-tests.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,8 @@ jobs:
271271
use_tesseract_sql_planner: true
272272
- database: bigquery-export-bucket-gcs
273273
use_tesseract_sql_planner: true
274+
- database: athena-export-bucket-s3
275+
use_tesseract_sql_planner: true
274276
fail-fast: false
275277

276278
steps:

.github/workflows/rust-cubesql.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ jobs:
325325
matrix:
326326
# We do not need to test under all versions, we do it under linux
327327
node-version: [22.x]
328-
os-version: [windows-2019]
328+
os-version: [windows-2022]
329329
python-version: ["fallback"]
330330
fail-fast: false
331331

packages/cubejs-schema-compiler/src/adapter/BaseQuery.js

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,7 +1078,8 @@ export class BaseQuery {
10781078
* @returns {string}
10791079
*/
10801080
subtractInterval(date, interval) {
1081-
return `${date} - interval '${interval}'`;
1081+
const intervalStr = this.intervalString(interval);
1082+
return `${date} - interval ${intervalStr}`;
10821083
}
10831084

10841085
/**
@@ -1087,7 +1088,16 @@ export class BaseQuery {
10871088
* @returns {string}
10881089
*/
10891090
addInterval(date, interval) {
1090-
return `${date} + interval '${interval}'`;
1091+
const intervalStr = this.intervalString(interval);
1092+
return `${date} + interval ${intervalStr}`;
1093+
}
1094+
1095+
/**
1096+
* @param {string} interval
1097+
* @returns {string}
1098+
*/
1099+
intervalString(interval) {
1100+
return `'${interval}'`;
10911101
}
10921102

10931103
/**
@@ -4111,6 +4121,7 @@ export class BaseQuery {
41114121
},
41124122
tesseract: {
41134123
ilike: '{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}', // May require different overloads in Tesseract than the ilike from expressions used in SQLAPI.
4124+
series_bounds_cast: '{{ expr }}'
41144125
},
41154126
filters: {
41164127
equals: '{{ column }} = {{ value }}{{ is_null_check }}',

packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,10 @@ export class BigqueryQuery extends BaseQuery {
203203
return this.subtractInterval(timestamp, interval);
204204
}
205205

206+
public intervalString(interval: string): string {
207+
return `${interval}`;
208+
}
209+
206210
public addTimestampInterval(timestamp, interval) {
207211
return this.addInterval(timestamp, interval);
208212
}
@@ -353,6 +357,7 @@ export class BigqueryQuery extends BaseQuery {
353357
delete templates.expressions.like_escape;
354358
templates.filters.like_pattern = 'CONCAT({% if start_wild %}\'%\'{% else %}\'\'{% endif %}, LOWER({{ value }}), {% if end_wild %}\'%\'{% else %}\'\'{% endif %})';
355359
templates.tesseract.ilike = 'LOWER({{ expr }}) {% if negated %}NOT {% endif %} LIKE {{ pattern }}';
360+
templates.tesseract.series_bounds_cast = 'TIMESTAMP({{ expr }})';
356361
templates.types.boolean = 'BOOL';
357362
templates.types.float = 'FLOAT64';
358363
templates.types.double = 'FLOAT64';

packages/cubejs-schema-compiler/src/adapter/PostgresQuery.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ export class PostgresQuery extends BaseQuery {
8383
templates.types.binary = 'BYTEA';
8484
templates.operators.is_not_distinct_from = 'IS NOT DISTINCT FROM';
8585
templates.statements.generated_time_series_select = 'SELECT d AS "date_from",\n' +
86-
'd + interval \'{{ granularity }}\' - interval \'1 millisecond\' AS "date_to" \n' +
87-
'FROM generate_series({{ start }}::timestamp, {{ end }}:: timestamp, \'{{ granularity }}\'::interval) d ';
86+
'd + interval {{ granularity }} - interval \'1 millisecond\' AS "date_to" \n' +
87+
'FROM generate_series({{ start }}::timestamp, {{ end }}:: timestamp, {{ granularity }}::interval) d ';
8888
templates.statements.generated_time_series_with_cte_range_source = 'SELECT d AS "date_from",\n' +
89-
'd + interval \'{{ granularity }}\' - interval \'1 millisecond\' AS "date_to" \n' +
90-
'FROM {{ range_source }}, LATERAL generate_series({{ range_source }}.{{ min_name }}, {{ range_source }}.{{ max_name }}, \'{{ granularity }}\'::interval) d ';
89+
'd + interval {{ granularity }} - interval \'1 millisecond\' AS "date_to" \n' +
90+
'FROM {{ range_source }}, LATERAL generate_series({{ range_source }}.{{ min_name }}, {{ range_source }}.{{ max_name }}, {{ granularity }}::interval) d ';
9191
return templates;
9292
}
9393

packages/cubejs-schema-compiler/src/adapter/PrestodbQuery.ts

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,9 @@ export class PrestodbQuery extends BaseQuery {
8484
return `date_trunc('${GRANULARITY_TO_INTERVAL[granularity]}', ${dimension})`;
8585
}
8686

87-
public subtractInterval(date, interval) {
87+
public intervalString(interval: string): string {
8888
const [intervalValue, intervalUnit] = interval.split(' ');
89-
return `${date} - interval '${intervalValue}' ${intervalUnit}`;
90-
}
91-
92-
public addInterval(date, interval) {
93-
const [intervalValue, intervalUnit] = interval.split(' ');
94-
return `${date} + interval '${intervalValue}' ${intervalUnit}`;
89+
return `'${intervalValue}' ${intervalUnit}`;
9590
}
9691

9792
public seriesSql(timeDimension) {
@@ -138,9 +133,16 @@ export class PrestodbQuery extends BaseQuery {
138133
templates.functions.DATETRUNC = 'DATE_TRUNC({{ args_concat }})';
139134
templates.functions.DATEPART = 'DATE_PART({{ args_concat }})';
140135
delete templates.functions.PERCENTILECONT;
141-
templates.statements.select = 'SELECT {{ select_concat | map(attribute=\'aliased\') | join(\', \') }} \n' +
142-
'FROM (\n {{ from }}\n) AS {{ from_alias }} \n' +
136+
templates.statements.select = '{% if ctes %} WITH \n' +
137+
'{{ ctes | join(\',\n\') }}\n' +
138+
'{% endif %}' +
139+
'SELECT {{ select_concat | map(attribute=\'aliased\') | join(\', \') }} {% if from %}\n' +
140+
'FROM (\n {{ from }}\n) AS {{ from_alias }} {% elif from_prepared %}\n' +
141+
'FROM {{ from_prepared }}' +
142+
'{% endif %}' +
143+
'{% if filter %}\nWHERE {{ filter }}{% endif %}' +
143144
'{% if group_by %} GROUP BY {{ group_by }}{% endif %}' +
145+
'{% if having %}\nHAVING {{ having }}{% endif %}' +
144146
'{% if order_by %} ORDER BY {{ order_by | map(attribute=\'expr\') | join(\', \') }}{% endif %}' +
145147
'{% if offset is not none %}\nOFFSET {{ offset }}{% endif %}' +
146148
'{% if limit is not none %}\nLIMIT {{ limit }}{% endif %}';
@@ -153,6 +155,25 @@ export class PrestodbQuery extends BaseQuery {
153155
// Presto intervals have a YearMonth or DayTime type variants, but no universal type
154156
delete templates.types.interval;
155157
templates.types.binary = 'VARBINARY';
158+
templates.tesseract.ilike = 'LOWER({{ expr }}) {% if negated %}NOT {% endif %} LIKE {{ pattern }}';
159+
templates.filters.like_pattern = 'CONCAT({% if start_wild %}\'%\'{% else %}\'\'{% endif %}, LOWER({{ value }}), {% if end_wild %}\'%\'{% else %}\'\'{% endif %}) ESCAPE \'\\\'';
160+
templates.statements.time_series_select = 'SELECT from_iso8601_timestamp(dates.f) date_from, from_iso8601_timestamp(dates.t) date_to \n' +
161+
'FROM (\n' +
162+
'{% for time_item in seria %}' +
163+
' select \'{{ time_item[0] }}\' f, \'{{ time_item[1] }}\' t \n' +
164+
'{% if not loop.last %} UNION ALL\n{% endif %}' +
165+
'{% endfor %}' +
166+
') AS dates';
167+
templates.statements.generated_time_series_select = 'SELECT d AS date_from,\n' +
168+
'date_add(\'MILLISECOND\', -1, d + interval {{ granularity }}) AS date_to\n' +
169+
'FROM UNNEST(\n' +
170+
'SEQUENCE(CAST(from_iso8601_timestamp({{ start }}) AS TIMESTAMP), CAST(from_iso8601_timestamp({{ end }}) AS TIMESTAMP), INTERVAL {{ granularity }})\n' +
171+
') AS dates(d)';
172+
templates.statements.generated_time_series_with_cte_range_source = 'SELECT d AS date_from,\n' +
173+
'date_add(\'MILLISECOND\', -1, d + interval {{ granularity }}) AS date_to\n' +
174+
'FROM {{ range_source }} CROSS JOIN UNNEST(\n' +
175+
'SEQUENCE(CAST({{ range_source }}.{{ min_name }} AS TIMESTAMP), CAST({{ range_source }}.{{ max_name }} AS TIMESTAMP), INTERVAL {{ granularity }})\n' +
176+
') AS dates(d)';
156177
return templates;
157178
}
158179

packages/cubejs-testing-drivers/fixtures/athena.json

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,5 +174,41 @@
174174
"SQL API: Nested Rollup with aliases",
175175
"SQL API: Nested Rollup over asterisk",
176176
"SQL API: Extended nested Rollup over asterisk"
177+
],
178+
"tesseractSkip": [
179+
"for the ECommerce.TimeAnalysisExternal",
180+
"for the ECommerce.TimeAnalysisInternal",
181+
182+
"querying Products: dimensions -- doesn't work wo ordering",
183+
"querying ECommerce: total quantity, avg discount, total sales, total profit by product + order + total -- rounding in athena",
184+
"querying ECommerce: total sales, total profit by month + order (date) + total -- doesn't work with the BigQuery",
185+
"querying ECommerce: total quantity, avg discount, total sales, total profit by product + order + total -- noisy test",
186+
"querying BigECommerce: partitioned pre-agg",
187+
"querying BigECommerce: null sum",
188+
"querying BigECommerce: null boolean",
189+
"--------------------",
190+
191+
192+
"querying BigECommerce: rolling window by 2 week",
193+
"querying custom granularities ECommerce: count by three_months_by_march + no dimension",
194+
"querying custom granularities ECommerce: count by three_months_by_march + dimension",
195+
"querying custom granularities (with preaggregation) ECommerce: totalQuantity by half_year + no dimension",
196+
"querying custom granularities (with preaggregation) ECommerce: totalQuantity by half_year + dimension",
197+
"querying custom granularities ECommerce: count by two_mo_by_feb + no dimension + rollingCountByUnbounded",
198+
"querying custom granularities ECommerce: count by two_mo_by_feb + no dimension + rollingCountByTrailing",
199+
"querying custom granularities ECommerce: count by two_mo_by_feb + no dimension + rollingCountByLeading",
200+
"pre-aggregations Customers: running total without time dimension",
201+
"querying BigECommerce: totalProfitYearAgo",
202+
"SQL API: post-aggregate percentage of total",
203+
"SQL API: Simple Rollup",
204+
"SQL API: Complex Rollup",
205+
"SQL API: Nested Rollup",
206+
"SQL API: Rollup with aliases",
207+
"SQL API: Rollup over exprs",
208+
"SQL API: Nested Rollup with aliases",
209+
"SQL API: Nested Rollup over asterisk",
210+
"SQL API: Extended nested Rollup over asterisk",
211+
"SQL API: Timeshift measure from cube",
212+
"SQL API: SQL push down push to cube quoted alias"
177213
]
178214
}

0 commit comments

Comments
 (0)