Skip to content

Commit 0555fc0

Browse files
authored
Introduce Intersect operation to Roaring UDF (#17611)
1 parent 5b14371 commit 0555fc0

File tree

3 files changed

+265
-1
lines changed

3 files changed

+265
-1
lines changed

ydb/library/yql/udfs/common/roaring/roaring.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,65 @@ namespace {
428428
}
429429
};
430430

431+
class TRoaringIntersect: public TBoxedValue {
432+
public:
433+
TRoaringIntersect(TSourcePosition pos)
434+
: Pos_(pos)
435+
{
436+
}
437+
438+
static TStringRef Name() {
439+
return TStringRef::Of("Intersect");
440+
}
441+
442+
private:
443+
TUnboxedValue Run(const IValueBuilder* valueBuilder,
444+
const TUnboxedValuePod* args) const override {
445+
Y_UNUSED(valueBuilder);
446+
try {
447+
auto* left = GetBitmapFromArg(args[0]);
448+
auto* right = GetBitmapFromArg(args[1]);
449+
450+
return TUnboxedValuePod(roaring_bitmap_intersect(left, right));
451+
} catch (const std::exception& e) {
452+
UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
453+
}
454+
}
455+
456+
TSourcePosition Pos_;
457+
};
458+
459+
class TRoaringIntersectWithBinary: public TBoxedValue {
460+
public:
461+
TRoaringIntersectWithBinary(TSourcePosition pos)
462+
: Pos_(pos)
463+
{
464+
}
465+
466+
static TStringRef Name() {
467+
return TStringRef::Of("IntersectWithBinary");
468+
}
469+
470+
private:
471+
TUnboxedValue Run(const IValueBuilder* valueBuilder,
472+
const TUnboxedValuePod* args) const override {
473+
Y_UNUSED(valueBuilder);
474+
try {
475+
auto* left = GetBitmapFromArg(args[0]);
476+
auto* right = DeserializePortable(args[1].AsStringRef());
477+
478+
auto intersect = roaring_bitmap_intersect(left, right);
479+
roaring_bitmap_free(right);
480+
return TUnboxedValuePod(intersect);
481+
482+
} catch (const std::exception& e) {
483+
UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
484+
}
485+
}
486+
487+
TSourcePosition Pos_;
488+
};
489+
431490
class TRoaringModule: public IUdfModule {
432491
public:
433492
class TMemoryHookInitializer {
@@ -611,6 +670,24 @@ namespace {
611670
if (!typesOnly) {
612671
builder.Implementation(new TRoaringNaiveBulkAndWithBinary(builder.GetSourcePosition()));
613672
}
673+
} else if (TRoaringIntersect::Name() == name) {
674+
builder.Returns<bool>()
675+
.Args()
676+
->Add<TAutoMap<TResource<RoaringResourceName>>>()
677+
.Add<TAutoMap<TResource<RoaringResourceName>>>();
678+
679+
if (!typesOnly) {
680+
builder.Implementation(new TRoaringIntersect(builder.GetSourcePosition()));
681+
}
682+
} else if (TRoaringIntersectWithBinary::Name() == name) {
683+
builder.Returns<bool>()
684+
.Args()
685+
->Add<TAutoMap<TResource<RoaringResourceName>>>()
686+
.Add<TAutoMap<char*>>();
687+
688+
if (!typesOnly) {
689+
builder.Implementation(new TRoaringIntersectWithBinary(builder.GetSourcePosition()));
690+
}
614691
} else {
615692
TStringBuilder sb;
616693
sb << "Unknown function: " << name.Data();

ydb/library/yql/udfs/common/roaring/test/canondata/test.test_intersect_/results.txt

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,5 +476,185 @@
476476
]
477477
}
478478
]
479+
};
480+
{
481+
"Write" = [
482+
{
483+
"Type" = [
484+
"ListType";
485+
[
486+
"StructType";
487+
[
488+
[
489+
"Intersect";
490+
[
491+
"OptionalType";
492+
[
493+
"DataType";
494+
"Bool"
495+
]
496+
]
497+
]
498+
]
499+
]
500+
];
501+
"Data" = [
502+
[
503+
[
504+
%true
505+
]
506+
]
507+
]
508+
}
509+
]
510+
};
511+
{
512+
"Write" = [
513+
{
514+
"Type" = [
515+
"ListType";
516+
[
517+
"StructType";
518+
[
519+
[
520+
"IntersectNull0";
521+
[
522+
"OptionalType";
523+
[
524+
"DataType";
525+
"Bool"
526+
]
527+
]
528+
]
529+
]
530+
]
531+
];
532+
"Data" = [
533+
[
534+
#
535+
]
536+
]
537+
}
538+
]
539+
};
540+
{
541+
"Write" = [
542+
{
543+
"Type" = [
544+
"ListType";
545+
[
546+
"StructType";
547+
[
548+
[
549+
"IntersectNull1";
550+
[
551+
"OptionalType";
552+
[
553+
"DataType";
554+
"Bool"
555+
]
556+
]
557+
]
558+
]
559+
]
560+
];
561+
"Data" = [
562+
[
563+
#
564+
]
565+
]
566+
}
567+
]
568+
};
569+
{
570+
"Write" = [
571+
{
572+
"Type" = [
573+
"ListType";
574+
[
575+
"StructType";
576+
[
577+
[
578+
"IntersectWithBinary";
579+
[
580+
"OptionalType";
581+
[
582+
"DataType";
583+
"Bool"
584+
]
585+
]
586+
]
587+
]
588+
]
589+
];
590+
"Data" = [
591+
[
592+
[
593+
%true
594+
]
595+
]
596+
]
597+
}
598+
]
599+
};
600+
{
601+
"Write" = [
602+
{
603+
"Type" = [
604+
"ListType";
605+
[
606+
"StructType";
607+
[
608+
[
609+
"IntersectWithBinaryNull";
610+
[
611+
"OptionalType";
612+
[
613+
"DataType";
614+
"Bool"
615+
]
616+
]
617+
]
618+
]
619+
]
620+
];
621+
"Data" = [
622+
[
623+
#
624+
]
625+
]
626+
}
627+
]
628+
};
629+
{
630+
"Write" = [
631+
{
632+
"Type" = [
633+
"ListType";
634+
[
635+
"StructType";
636+
[
637+
[
638+
"IntersectFalse";
639+
[
640+
"OptionalType";
641+
[
642+
"DataType";
643+
"Bool"
644+
]
645+
]
646+
]
647+
]
648+
]
649+
];
650+
"Data" = [
651+
[
652+
[
653+
%false
654+
]
655+
]
656+
]
657+
}
658+
]
479659
}
480660
]

ydb/library/yql/udfs/common/roaring/test/cases/intersect.sql

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,11 @@ SELECT Roaring::Uint32List(Roaring::AndNotWithBinary(Roaring::Deserialize(right)
1515
SELECT Roaring::Uint32List(Roaring::AndNotWithBinary(Roaring::Deserialize(right), NULL, true)) AS AndNotWithBinaryListEmptyInplace FROM Input;
1616

1717
SELECT Roaring::Uint32List(Roaring::NaiveBulkAnd(AsList(Roaring::Deserialize(right), Roaring::Deserialize(left)))) AS NaiveBulkAnd FROM Input;
18-
SELECT Roaring::Uint32List(Roaring::NaiveBulkAndWithBinary(AsList(right, left))) AS NaiveBulkAndWithBinary FROM Input;
18+
SELECT Roaring::Uint32List(Roaring::NaiveBulkAndWithBinary(AsList(right, left))) AS NaiveBulkAndWithBinary FROM Input;
19+
20+
SELECT Roaring::Intersect(Roaring::Deserialize(right), Roaring::Deserialize(left)) AS Intersect FROM Input;
21+
SELECT Roaring::Intersect(NULL, Roaring::Deserialize(left)) AS IntersectNull0 FROM Input;
22+
SELECT Roaring::Intersect(Roaring::Deserialize(right), NULL) AS IntersectNull1 FROM Input;
23+
SELECT Roaring::IntersectWithBinary(Roaring::Deserialize(right), left) AS IntersectWithBinary FROM Input;
24+
SELECT Roaring::IntersectWithBinary(Roaring::Deserialize(right), NULL) AS IntersectWithBinaryNull FROM Input;
25+
SELECT Roaring::Intersect(Roaring::Deserialize(right), Roaring::FromUint32List(AsList(100500))) AS IntersectFalse FROM Input;

0 commit comments

Comments
 (0)