Skip to content

Commit a0be68f

Browse files
authored
Merge 54df49e into 2876ba4
2 parents 2876ba4 + 54df49e commit a0be68f

File tree

8 files changed

+256
-0
lines changed

8 files changed

+256
-0
lines changed

ydb/library/yql/udfs/common/roaring/roaring.cpp

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <contrib/libs/croaring/include/roaring/memory.h>
88
#include <contrib/libs/croaring/include/roaring/roaring.h>
99

10+
#include <util/generic/array_ref.h>
1011
#include <util/generic/vector.h>
1112
#include <util/string/builder.h>
1213
#include <util/system/yassert.h>
@@ -30,6 +31,11 @@ namespace {
3031
{
3132
}
3233

34+
TRoaringWrapper(roaring_bitmap_t* bitmap)
35+
: Roaring(bitmap)
36+
{
37+
}
38+
3339
~TRoaringWrapper() {
3440
roaring_bitmap_free(Roaring);
3541
}
@@ -105,6 +111,47 @@ namespace {
105111
}
106112
};
107113

114+
class TRoaringAndNotWithBinary: public TBoxedValue {
115+
public:
116+
TRoaringAndNotWithBinary() {
117+
}
118+
119+
static TStringRef Name() {
120+
return TStringRef::Of("AndNotWithBinary");
121+
}
122+
123+
private:
124+
TUnboxedValue Run(const IValueBuilder* valueBuilder,
125+
const TUnboxedValuePod* args) const override {
126+
Y_UNUSED(valueBuilder);
127+
auto binaryString = args[1].AsStringRef();
128+
auto bitmap = DeserializePortable(binaryString);
129+
130+
roaring_bitmap_andnot_inplace(GetBitmapFromArg(args[0]), bitmap);
131+
roaring_bitmap_free(bitmap);
132+
133+
return args[0];
134+
}
135+
};
136+
137+
class TRoaringAndNot: public TBoxedValue {
138+
public:
139+
TRoaringAndNot() {
140+
}
141+
142+
static TStringRef Name() {
143+
return TStringRef::Of("AndNot");
144+
}
145+
146+
private:
147+
TUnboxedValue Run(const IValueBuilder* valueBuilder,
148+
const TUnboxedValuePod* args) const override {
149+
Y_UNUSED(valueBuilder);
150+
roaring_bitmap_andnot_inplace(GetBitmapFromArg(args[0]), GetBitmapFromArg(args[1]));
151+
return args[0];
152+
}
153+
};
154+
108155
class TRoaringOr: public TBoxedValue {
109156
public:
110157
TRoaringOr() {
@@ -223,6 +270,46 @@ namespace {
223270
TSourcePosition Pos_;
224271
};
225272

273+
class TRoaringFromUint32List: public TBoxedValue {
274+
public:
275+
TRoaringFromUint32List(TSourcePosition pos)
276+
: Pos_(pos)
277+
{
278+
}
279+
280+
static TStringRef Name() {
281+
return TStringRef::Of("FromUint32List");
282+
}
283+
284+
private:
285+
TUnboxedValue Run(const IValueBuilder* valueBuilder,
286+
const TUnboxedValuePod* args) const override {
287+
Y_UNUSED(valueBuilder);
288+
try {
289+
auto* bitmap = roaring_bitmap_create();
290+
291+
const auto vector = args[0];
292+
const auto* elements = vector.GetElements();
293+
if (elements) {
294+
for (auto& value : TArrayRef{elements, vector.GetListLength()}) {
295+
roaring_bitmap_add(bitmap, value.Get<ui32>());
296+
}
297+
} else {
298+
TUnboxedValue value;
299+
const auto it = vector.GetListIterator();
300+
while (it.Next(value)) {
301+
roaring_bitmap_add(bitmap, value.Get<ui32>());
302+
}
303+
}
304+
305+
return TUnboxedValuePod(new TRoaringWrapper(bitmap));
306+
} catch (const std::exception& e) {
307+
UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
308+
}
309+
}
310+
TSourcePosition Pos_;
311+
};
312+
226313
class TRoaringSerialize: public TBoxedValue {
227314
public:
228315
TRoaringSerialize() {
@@ -266,6 +353,25 @@ namespace {
266353
}
267354
};
268355

356+
class TRoaringRunOptimize: public TBoxedValue {
357+
public:
358+
TRoaringRunOptimize() {
359+
}
360+
361+
static TStringRef Name() {
362+
return TStringRef::Of("RunOptimize");
363+
}
364+
365+
private:
366+
TUnboxedValue Run(const IValueBuilder* valueBuilder,
367+
const TUnboxedValuePod* args) const override {
368+
Y_UNUSED(valueBuilder);
369+
auto bitmap = GetBitmapFromArg(args[0]);
370+
roaring_bitmap_run_optimize(bitmap);
371+
return args[0];
372+
}
373+
};
374+
269375
class TRoaringModule: public IUdfModule {
270376
public:
271377
TRoaringModule() {
@@ -282,6 +388,7 @@ namespace {
282388
void GetAllFunctions(IFunctionsSink& sink) const final {
283389
sink.Add(TRoaringSerialize::Name());
284390
sink.Add(TRoaringDeserialize::Name());
391+
sink.Add(TRoaringFromUint32List::Name());
285392

286393
sink.Add(TRoaringCardinality::Name());
287394

@@ -292,6 +399,11 @@ namespace {
292399

293400
sink.Add(TRoaringAndWithBinary::Name());
294401
sink.Add(TRoaringAnd::Name());
402+
403+
sink.Add(TRoaringAndNotWithBinary::Name());
404+
sink.Add(TRoaringAndNot::Name());
405+
406+
sink.Add(TRoaringRunOptimize::Name());
295407
}
296408

297409
void CleanupOnTerminate() const final {
@@ -312,6 +424,12 @@ namespace {
312424
if (!typesOnly) {
313425
builder.Implementation(new TRoaringDeserialize(builder.GetSourcePosition()));
314426
}
427+
} else if (TRoaringFromUint32List::Name() == name) {
428+
builder.Returns<TResource<RoaringResourceName>>().Args()->Add<TListType<ui32>>();
429+
430+
if (!typesOnly) {
431+
builder.Implementation(new TRoaringFromUint32List(builder.GetSourcePosition()));
432+
}
315433
} else if (TRoaringSerialize::Name() == name) {
316434
builder.Returns(builder.SimpleType<char*>())
317435
.Args()
@@ -372,6 +490,32 @@ namespace {
372490
if (!typesOnly) {
373491
builder.Implementation(new TRoaringAnd());
374492
}
493+
} else if (TRoaringAndNotWithBinary::Name() == name) {
494+
builder.Returns<TResource<RoaringResourceName>>()
495+
.Args()
496+
->Add<TAutoMap<TResource<RoaringResourceName>>>()
497+
.Add<TAutoMap<char*>>();
498+
499+
if (!typesOnly) {
500+
builder.Implementation(new TRoaringAndNotWithBinary());
501+
}
502+
} else if (TRoaringAndNot::Name() == name) {
503+
builder.Returns<TResource<RoaringResourceName>>()
504+
.Args()
505+
->Add<TAutoMap<TResource<RoaringResourceName>>>()
506+
.Add<TAutoMap<TResource<RoaringResourceName>>>();
507+
508+
if (!typesOnly) {
509+
builder.Implementation(new TRoaringAndNot());
510+
}
511+
} else if (TRoaringRunOptimize::Name() == name) {
512+
builder.Returns<TResource<RoaringResourceName>>()
513+
.Args()
514+
->Add<TAutoMap<TResource<RoaringResourceName>>>();
515+
516+
if (!typesOnly) {
517+
builder.Implementation(new TRoaringRunOptimize());
518+
}
375519
} else {
376520
TStringBuilder sb;
377521
sb << "Unknown function: " << name.Data();

ydb/library/yql/udfs/common/roaring/test/canondata/result.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,10 @@
1818
{
1919
"uri": "file://test.test_union_/results.txt"
2020
}
21+
],
22+
"test.test[run_optimize]": [
23+
{
24+
"uri": "file://test.test_run_optimize_/results.txt"
25+
}
2126
]
2227
}

ydb/library/yql/udfs/common/roaring/test/canondata/test.test_intersect_/results.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,36 @@
102102
]
103103
}
104104
]
105+
};
106+
{
107+
"Write" = [
108+
{
109+
"Type" = [
110+
"ListType";
111+
[
112+
"StructType";
113+
[
114+
[
115+
"AndNotList";
116+
[
117+
"ListType";
118+
[
119+
"DataType";
120+
"Uint32"
121+
]
122+
]
123+
]
124+
]
125+
]
126+
];
127+
"Data" = [
128+
[
129+
[
130+
"42"
131+
]
132+
]
133+
]
134+
}
135+
]
105136
}
106137
]
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
[
2+
{
3+
"Write" = [
4+
{
5+
"Type" = [
6+
"ListType";
7+
[
8+
"StructType";
9+
[
10+
[
11+
"RunOptimizeList";
12+
[
13+
"ListType";
14+
[
15+
"DataType";
16+
"Uint32"
17+
]
18+
]
19+
]
20+
]
21+
]
22+
];
23+
"Data" = [
24+
[
25+
[
26+
"10";
27+
"42";
28+
"567"
29+
]
30+
]
31+
]
32+
}
33+
]
34+
}
35+
]

ydb/library/yql/udfs/common/roaring/test/canondata/test.test_serialize_deserialize_/results.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,5 +172,38 @@
172172
]
173173
}
174174
]
175+
};
176+
{
177+
"Write" = [
178+
{
179+
"Type" = [
180+
"ListType";
181+
[
182+
"StructType";
183+
[
184+
[
185+
"DeserializedList";
186+
[
187+
"ListType";
188+
[
189+
"DataType";
190+
"Uint32"
191+
]
192+
]
193+
]
194+
]
195+
]
196+
];
197+
"Data" = [
198+
[
199+
[
200+
"10";
201+
"42";
202+
"567"
203+
]
204+
]
205+
]
206+
}
207+
]
175208
}
176209
]
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
SELECT Roaring::Uint32List(Roaring::And(Roaring::Deserialize(left), Roaring::Deserialize(right))) AS AndList FROM Input;
22
SELECT Roaring::Uint32List(Roaring::AndWithBinary(Roaring::Deserialize(right), left)) AS AndWithBinaryList FROM Input;
33
SELECT Roaring::Uint32List(Roaring::AndWithBinary(Roaring::Deserialize(right), NULL)) AS AndWithBinaryListEmpty FROM Input;
4+
5+
SELECT Roaring::Uint32List(Roaring::AndNot(Roaring::Deserialize(left), Roaring::Deserialize(right))) AS AndNotList FROM Input;
6+
SELECT Roaring::Uint32List(Roaring::AndNotWithBinary(Roaring::Deserialize(right), left)) AS AndNotWithBinaryList FROM Input;
7+
SELECT Roaring::Uint32List(Roaring::AndNotWithBinary(Roaring::Deserialize(right), NULL)) AS AndNotWithBinaryListEmpty FROM Input;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT Roaring::Uint32List(Roaring::RunOptimize(Roaring::FromUint32List(AsList(10, 567, 42)))) AS RunOptimizeList;

ydb/library/yql/udfs/common/roaring/test/cases/serialize_deserialize.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,6 @@ FROM Input;
1313

1414
SELECT ListTake(ListSkip(Roaring::Uint32List(Roaring::Deserialize(binaryString)), 10), 1) AS EmptyList
1515
FROM Input;
16+
17+
SELECT Roaring::Uint32List(Roaring::FromUint32List(AsList(10, 567, 42))) AS DeserializedList
18+
FROM Input;

0 commit comments

Comments
 (0)