Velox trace tool - Aggregation #12815
-
Hi Team, I am using the Velox trace tool while running Presto’s HiveExternalWorkerQueryRunner with native workers. My distributed query plan is as follows:
I am specifically tracing the Aggregate operation in Fragment 1, which was successfully traced and stored in the specified folder. Below is the trace summary:
However, when I attempt to replay the query, I encounter an error:
task_trace_meta.json is attached below. {
"planNode": {
"partitionFunctionSpec": {
"name": "GatherPartitionFunctionSpec"
},
"outputType": {
"names": [
"date",
"count",
"sum"
],
"cTypes": [
{
"name": "DateType",
"type": "DATE"
},
{
"type": "BIGINT",
"name": "Type"
},
{
"name": "Type",
"type": "DOUBLE"
}
],
"name": "Type",
"type": "ROW"
},
"replicateNullsAndAny": false,
"serdeKind": "Presto",
"keys": [],
"numPartitions": 1,
"kind": "PARTITIONED",
"sources": [
{
"aggregates": [
{
"sortingKeys": [],
"distinct": false,
"sortingOrders": [],
"call": {
"functionName": "presto.default.count",
"inputs": [
{
"fieldName": "count_12",
"type": {
"type": "BIGINT",
"name": "Type"
},
"name": "FieldAccessTypedExpr"
}
],
"type": {
"name": "Type",
"type": "BIGINT"
},
"name": "CallTypedExpr"
},
"rawInputTypes": []
},
{
"distinct": false,
"sortingOrders": [],
"sortingKeys": [],
"rawInputTypes": [
{
"name": "Type",
"type": "DOUBLE"
}
],
"call": {
"inputs": [
{
"fieldName": "sum_13",
"name": "FieldAccessTypedExpr",
"type": {
"name": "Type",
"type": "DOUBLE"
}
}
],
"functionName": "presto.default.sum",
"name": "CallTypedExpr",
"type": {
"type": "DOUBLE",
"name": "Type"
}
}
}
],
"aggregateNames": [
"count",
"sum"
],
"globalGroupingSets": [],
"ignoreNullKeys": false,
"sources": [
{
"type": "REPARTITION",
"partitionFunctionSpec": {
"constants": [],
"keyChannels": [
0
],
"name": "HashPartitionFunctionSpec",
"inputType": {
"cTypes": [
{
"type": "DATE",
"name": "DateType"
},
{
"type": "DOUBLE",
"name": "Type"
},
{
"name": "Type",
"type": "BIGINT"
}
],
"names": [
"date",
"sum_13",
"count_12"
],
"name": "Type",
"type": "ROW"
}
},
"scaleWriter": false,
"sources": [
{
"sources": [
{
"outputType": {
"names": [
"date",
"sum_13",
"count_12"
],
"cTypes": [
{
"name": "DateType",
"type": "DATE"
},
{
"type": "DOUBLE",
"name": "Type"
},
{
"type": "BIGINT",
"name": "Type"
}
],
"type": "ROW",
"name": "Type"
},
"serdeKind": "Presto",
"id": "288",
"name": "ExchangeNode"
}
],
"names": [
"date",
"sum_13",
"count_12"
],
"projections": [
{
"fieldName": "date",
"type": {
"name": "DateType",
"type": "DATE"
},
"name": "FieldAccessTypedExpr"
},
{
"fieldName": "sum_13",
"type": {
"name": "Type",
"type": "DOUBLE"
},
"name": "FieldAccessTypedExpr"
},
{
"fieldName": "count_12",
"name": "FieldAccessTypedExpr",
"type": {
"type": "BIGINT",
"name": "Type"
}
}
],
"name": "ProjectNode",
"id": "282.0"
}
],
"id": "282",
"name": "LocalPartitionNode"
}
],
"step": "FINAL",
"preGroupedKeys": [],
"groupingKeys": [
{
"fieldName": "date",
"name": "FieldAccessTypedExpr",
"type": {
"name": "DateType",
"type": "DATE"
}
}
],
"id": "3",
"name": "AggregationNode"
}
],
"name": "PartitionedOutputNode",
"id": "root.3"
},
"queryConfig": {
"query_max_memory_per_node": "4GB",
"max_output_buffer_size": "33554432",
"aggregation_spill_enabled": "true",
"adjust_timestamp_to_session_timezone": "true",
"spill_file_create_config": "",
"query_trace_enabled": "true",
"query_trace_node_ids": "3",
"max_page_partitioning_buffer_size": "33554432",
"mutable-config": "true",
"query_trace_max_bytes": "1000000",
"session_timezone": "Asia/Kolkata",
"order_by_spill_enabled": "true",
"join_spill_enabled": "true",
"query_trace_dir": "/Users/joe/Developer/queryTrace",
"query_trace_task_reg_exp": ".*\\.1\\..*\\..*\\..*",
"presto.array_agg.ignore_nulls": "false",
"max_partial_aggregation_memory": "16777216",
"driver_cpu_time_slice_limit_ms": "1000",
"spill_enabled": "false"
},
"connectorProperties": {}
} Could anyone help what am I missing? Any insights would be greatly appreciated. cc : @duanmeng, @xiaoxmeng,@tanjialiang |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 1 reply
-
@Joe-Abraham Could try to add a prefix aggregate::prestosql::registerAllAggregateFunctions("presto.default."); If it works, I will file a PR to support adding a prefix by gflags. cc @xiaoxmeng |
Beta Was this translation helpful? Give feedback.
-
Thank you @duanmeng for the support |
Beta Was this translation helpful? Give feedback.
@Joe-Abraham Could try to add a prefix
"presto.default."
in https://github.com/facebookincubator/velox/blob/main/velox/tool/trace/TraceReplayRunner.cpp#L291 as follows,If it works, I will file a PR to support adding a prefix by gflags. cc @xiaoxmeng