Skip to content

Commit e0dcc33

Browse files
authored
Support DISTINCT for aggregation functions over window (#9090)
1 parent 67cacf0 commit e0dcc33

File tree

31 files changed

+789
-71
lines changed

31 files changed

+789
-71
lines changed

ydb/library/yql/core/common_opt/yql_co_extr_members.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,15 @@ TExprNode::TPtr ApplyExtractMembersToCalcOverWindow(const TExprNode::TPtr& node,
601601
auto payload = winOnRows->Child(i)->Child(1);
602602
const TStructExprType* structType;
603603
if (payload->IsCallable("WindowTraits")) {
604-
structType = payload->Child(0)->GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>();
604+
bool isDistinct = winOnRows->Child(i)->ChildrenSize() == 3;
605+
if (isDistinct) {
606+
auto distinctColumn = winOnRows->Child(i)->Child(2);
607+
YQL_ENSURE(distinctColumn->IsAtom());
608+
usedFields.insert(distinctColumn->Content());
609+
continue;
610+
} else {
611+
structType = payload->Child(0)->GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>();
612+
}
605613
}
606614
else if (payload->IsCallable({"Lead", "Lag", "Rank", "DenseRank", "PercentRank"})) {
607615
structType = payload->Child(0)->GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TListExprType>()

ydb/library/yql/core/common_opt/yql_co_flow2.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1928,7 +1928,13 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) {
19281928
};
19291929

19301930
map["WindowTraits"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) {
1931-
auto structType = node->Child(0)->GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>();
1931+
auto type = node->Child(0)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
1932+
if (type->GetKind() != ETypeAnnotationKind::Struct) {
1933+
// usually distinct, type of column is used instead
1934+
return node;
1935+
}
1936+
1937+
auto structType = type->Cast<TStructExprType>();
19321938
TSet<TStringBuf> usedFields;
19331939
auto initLambda = node->Child(1);
19341940
auto updateLambda = node->Child(2);

ydb/library/yql/core/type_ann/type_ann_list.cpp

Lines changed: 67 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,22 @@ namespace {
2626
return x->GetTypeAnn() && x->GetTypeAnn()->GetKind() == ETypeAnnotationKind::EmptyList;
2727
};
2828

29+
bool IsFieldSubset(const TStructExprType& structType, const TStructExprType& sourceStructType) {
30+
for (auto& item : structType.GetItems()) {
31+
auto name = item->GetName();
32+
auto type = item->GetItemType();
33+
if (auto idx = sourceStructType.FindItem(name)) {
34+
if (sourceStructType.GetItems()[*idx]->GetItemType() == type) {
35+
continue;
36+
}
37+
}
38+
39+
return false;
40+
}
41+
42+
return true;
43+
}
44+
2945
TExprNode::TPtr RewriteMultiAggregate(const TExprNode& node, TExprContext& ctx) {
3046
auto exprLambda = node.Child(1);
3147
const TStructExprType* structType = nullptr;
@@ -551,20 +567,48 @@ namespace {
551567
traitsInputItemType = traitsInputItemType->Cast<TListExprType>()->GetItemType();
552568
}
553569

554-
const TStructExprType* traitsInputStruct = traitsInputItemType->Cast<TStructExprType>();
555-
// traitsInputStruct should be subset of inputStruct
556-
for (auto& item : traitsInputStruct->GetItems()) {
557-
auto name = item->GetName();
558-
auto type = item->GetItemType();
559-
if (auto idx = inputStructType.FindItem(name)) {
560-
if (inputStructType.GetItems()[*idx]->GetItemType() == type) {
561-
continue;
570+
bool isDistinct = calcSpec->IsCallable("WindowTraits") && func->ChildrenSize() == 3;
571+
if (isDistinct) {
572+
auto distinctColumn = func->Child(2);
573+
if (!EnsureAtom(*distinctColumn, ctx)) {
574+
return IGraphTransformer::TStatus::Error;
575+
}
576+
577+
auto distinctColumnType = inputStructType.FindItemType(distinctColumn->Content());
578+
if (!distinctColumnType) {
579+
ctx.AddError(TIssue(ctx.GetPosition(distinctColumn->Pos()),
580+
TStringBuilder() << "Unknown key column " << distinctColumn->Content() << " for distinct, input type is " <<
581+
static_cast<const TTypeAnnotationNode&>(inputStructType)));
582+
return IGraphTransformer::TStatus::Error;
583+
}
584+
585+
if (distinctColumnType->GetKind() == ETypeAnnotationKind::Struct) {
586+
const TStructExprType* traitsInputStruct = traitsInputItemType->Cast<TStructExprType>();
587+
const TStructExprType* distinctColumnStructType = distinctColumnType->Cast<TStructExprType>();
588+
if (!IsFieldSubset(*traitsInputStruct, *distinctColumnStructType)) {
589+
ctx.AddError(TIssue(ctx.GetPosition(traitsInputTypeNode->Pos()),
590+
TStringBuilder() << "Expected window traits input type " << *traitsInputItemType << " to be subset of distinct key struct type" <<
591+
static_cast<const TTypeAnnotationNode&>(*distinctColumnStructType)));
592+
return IGraphTransformer::TStatus::Error;
562593
}
594+
} else if (distinctColumnType != traitsInputItemType) {
595+
ctx.AddError(TIssue(ctx.GetPosition(distinctColumn->Pos()),
596+
TStringBuilder() << "Expected window traits input type " << *traitsInputItemType << " to be same as distinct key type " << *distinctColumnType));
597+
return IGraphTransformer::TStatus::Error;
598+
}
599+
600+
if (!EnsureHashableKey(distinctColumn->Pos(), distinctColumnType, ctx) ||
601+
!EnsureEquatableKey(distinctColumn->Pos(), distinctColumnType, ctx)) {
602+
return IGraphTransformer::TStatus::Error;
603+
}
604+
} else {
605+
const TStructExprType* traitsInputStruct = traitsInputItemType->Cast<TStructExprType>();
606+
if (!IsFieldSubset(*traitsInputStruct, inputStructType)) {
607+
ctx.AddError(TIssue(ctx.GetPosition(traitsInputTypeNode->Pos()), TStringBuilder() << "Invalid " <<
608+
calcSpec->Content() << " traits input type: " << *traitsInputItemType << ", expecting subset of " <<
609+
static_cast<const TTypeAnnotationNode&>(inputStructType)));
610+
return IGraphTransformer::TStatus::Error;
563611
}
564-
ctx.AddError(TIssue(ctx.GetPosition(traitsInputTypeNode->Pos()), TStringBuilder() << "Invalid " <<
565-
calcSpec->Content() << " traits input type: " << *traitsInputItemType << ", expecting subset of " <<
566-
static_cast<const TTypeAnnotationNode&>(inputStructType)));
567-
return IGraphTransformer::TStatus::Error;
568612
}
569613

570614
if (calcSpec->IsCallable("WindowTraits")) {
@@ -6018,9 +6062,14 @@ namespace {
60186062
}
60196063

60206064
for (ui32 i = 1; i < input->ChildrenSize(); ++i) {
6021-
if (!EnsureTupleSize(*input->Child(i), 2, ctx.Expr)) {
6065+
if (!EnsureTupleMinSize(*input->Child(i), 2, ctx.Expr)) {
60226066
return IGraphTransformer::TStatus::Error;
60236067
}
6068+
6069+
if (!EnsureTupleMaxSize(*input->Child(i), 3, ctx.Expr)) {
6070+
return IGraphTransformer::TStatus::Error;
6071+
}
6072+
60246073
auto currColumn = input->Child(i)->Child(0)->Content();
60256074
auto calcSpec = input->Child(i)->Child(1);
60266075
if (!calcSpec->IsCallable({"WindowTraits", "Lag", "Lead", "RowNumber", "Rank", "DenseRank", "PercentRank", "CumeDist", "NTile", "Void"})) {
@@ -6029,6 +6078,11 @@ namespace {
60296078
return IGraphTransformer::TStatus::Error;
60306079
}
60316080

6081+
if (input->Child(i)->ChildrenSize() == 3 && !calcSpec->IsCallable("WindowTraits")) {
6082+
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(calcSpec->Pos()), "DISTINCT is allowed only for aggregate functions"));
6083+
return IGraphTransformer::TStatus::Error;
6084+
}
6085+
60326086
if (i + 1 < input->ChildrenSize()) {
60336087
auto nextColumn = input->Child(i + 1)->Child(0)->Content();
60346088
if (currColumn == nextColumn) {

0 commit comments

Comments
 (0)