Skip to content

Commit 9150df9

Browse files
authored
Implement double/int -> pg numeric conversion YQL-16767 (#727)
1 parent 3422227 commit 9150df9

File tree

5 files changed

+215
-0
lines changed

5 files changed

+215
-0
lines changed

ydb/library/yql/parser/pg_wrapper/arrow.cpp

+53
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "arrow.h"
2+
#include "arrow_impl.h"
23
#include <ydb/library/yql/parser/pg_wrapper/interface/arrow.h>
34
#include <ydb/library/yql/parser/pg_wrapper/interface/utils.h>
45
#include <ydb/library/yql/minikql/mkql_node_cast.h>
@@ -151,6 +152,55 @@ std::shared_ptr<arrow::Array> PgConvertString(const std::shared_ptr<arrow::Array
151152
return ret;
152153
}
153154

155+
Numeric PgFloatToNumeric(double item, ui64 scale, int digits) {
156+
double intPart, fracPart;
157+
bool error;
158+
fracPart = modf(item, &intPart);
159+
i64 fracInt = round(fracPart * scale);
160+
161+
// scale compaction: represent 711.56000 as 711.56
162+
while (digits > 0 && fracInt % 10 == 0) {
163+
fracInt /= 10;
164+
digits -= 1;
165+
}
166+
167+
if (digits == 0) {
168+
return int64_to_numeric(intPart);
169+
} else {
170+
return numeric_add_opt_error(
171+
int64_to_numeric(intPart),
172+
int64_div_fast_to_numeric(fracInt, digits),
173+
&error);
174+
}
175+
}
176+
177+
TColumnConverter BuildPgNumericColumnConverter(const std::shared_ptr<arrow::DataType>& originalType) {
178+
switch (originalType->id()) {
179+
case arrow::Type::INT16:
180+
return [](const std::shared_ptr<arrow::Array>& value) {
181+
return PgConvertNumeric<i16>(value);
182+
};
183+
case arrow::Type::INT32:
184+
return [](const std::shared_ptr<arrow::Array>& value) {
185+
return PgConvertNumeric<i32>(value);
186+
};
187+
case arrow::Type::INT64:
188+
return [](const std::shared_ptr<arrow::Array>& value) {
189+
return PgConvertNumeric<i64>(value);
190+
};
191+
case arrow::Type::FLOAT:
192+
return [](const std::shared_ptr<arrow::Array>& value) {
193+
return PgConvertNumeric<float>(value);
194+
};
195+
case arrow::Type::DOUBLE:
196+
return [](const std::shared_ptr<arrow::Array>& value) {
197+
return PgConvertNumeric<double>(value);
198+
};
199+
default:
200+
return {};
201+
}
202+
}
203+
154204
template <typename T, typename F>
155205
TColumnConverter BuildPgFixedColumnConverter(const std::shared_ptr<arrow::DataType>& originalType, const F& f) {
156206
auto primaryType = NKikimr::NMiniKQL::GetPrimitiveDataType<T>();
@@ -200,6 +250,9 @@ TColumnConverter BuildPgColumnConverter(const std::shared_ptr<arrow::DataType>&
200250
case FLOAT8OID: {
201251
return BuildPgFixedColumnConverter<double>(originalType, [](auto value){ return Float8GetDatum(value); });
202252
}
253+
case NUMERICOID: {
254+
return BuildPgNumericColumnConverter(originalType);
255+
}
203256
case BYTEAOID:
204257
case VARCHAROID:
205258
case TEXTOID:

ydb/library/yql/parser/pg_wrapper/arrow.h

+1
Original file line numberDiff line numberDiff line change
@@ -1299,3 +1299,4 @@ TExecFunc FindExec(Oid oid);
12991299
const NPg::TAggregateDesc& ResolveAggregation(const TString& name, NKikimr::NMiniKQL::TTupleType* tupleType, const std::vector<ui32>& argsColumns, NKikimr::NMiniKQL::TType* returnType);
13001300

13011301
}
1302+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#pragma once
2+
3+
#include <arrow/array.h>
4+
#include <arrow/array/builder_binary.h>
5+
6+
extern "C" {
7+
#include "utils/numeric.h"
8+
}
9+
10+
namespace NYql {
11+
12+
Numeric PgFloatToNumeric(double item, ui64 scale, int digits);
13+
14+
template<typename T>
15+
std::shared_ptr<arrow::Array> PgConvertNumeric(const std::shared_ptr<arrow::Array>& value) {
16+
TArenaMemoryContext arena;
17+
const auto& data = value->data();
18+
size_t length = data->length;
19+
arrow::BinaryBuilder builder;
20+
auto input = data->GetValues<T>(1);
21+
for (size_t i = 0; i < length; ++i) {
22+
if (value->IsNull(i)) {
23+
builder.AppendNull();
24+
continue;
25+
}
26+
T item = input[i];
27+
Numeric v;
28+
if constexpr(std::is_same_v<T,double>) {
29+
v = PgFloatToNumeric(item, 1000000000000LL, 12);
30+
} else if constexpr(std::is_same_v<T,float>) {
31+
v = PgFloatToNumeric(item, 1000000LL, 6);
32+
} else {
33+
v = int64_to_numeric(item);
34+
}
35+
auto datum = NumericGetDatum(v);
36+
auto ptr = (char*)datum;
37+
auto len = GetFullVarSize((const text*)datum);
38+
NUdf::ZeroMemoryContext(ptr);
39+
ARROW_OK(builder.Append(ptr - sizeof(void*), len + sizeof(void*)));
40+
}
41+
42+
std::shared_ptr<arrow::BinaryArray> ret;
43+
ARROW_OK(builder.Finish(&ret));
44+
return ret;
45+
}
46+
47+
}
48+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#include <arrow/api.h>
2+
#include <arrow/array.h>
3+
4+
#include <library/cpp/testing/unittest/registar.h>
5+
6+
#include "arrow.h"
7+
#include "arrow_impl.h"
8+
9+
extern "C" {
10+
#include "utils/fmgrprotos.h"
11+
}
12+
13+
namespace NYql {
14+
15+
Y_UNIT_TEST_SUITE(TArrowUtilsTests) {
16+
17+
Y_UNIT_TEST(TestPgFloatToNumeric) {
18+
TArenaMemoryContext arena;
19+
auto n = PgFloatToNumeric(711.56, 1000000000000LL, 12);
20+
auto value = TString(DatumGetCString(DirectFunctionCall1(numeric_out, NumericGetDatum(n))));
21+
UNIT_ASSERT_VALUES_EQUAL(value, "711.56");
22+
23+
n = PgFloatToNumeric(-711.56, 1000000000000LL, 12);
24+
value = TString(DatumGetCString(DirectFunctionCall1(numeric_out, NumericGetDatum(n))));
25+
UNIT_ASSERT_VALUES_EQUAL(value, "-711.56");
26+
27+
n = PgFloatToNumeric(711.56f, 100000LL, 5);
28+
value = TString(DatumGetCString(DirectFunctionCall1(numeric_out, NumericGetDatum(n))));
29+
UNIT_ASSERT_VALUES_EQUAL(value, "711.56");
30+
31+
n = PgFloatToNumeric(-711.56f, 100000LL, 5);
32+
value = TString(DatumGetCString(DirectFunctionCall1(numeric_out, NumericGetDatum(n))));
33+
UNIT_ASSERT_VALUES_EQUAL(value, "-711.56");
34+
}
35+
36+
37+
Y_UNIT_TEST(PgConvertNumericDouble) {
38+
TArenaMemoryContext arena;
39+
40+
arrow::DoubleBuilder builder;
41+
builder.Append(1.1);
42+
builder.Append(31.37);
43+
builder.AppendNull();
44+
builder.Append(-1.337);
45+
builder.Append(0.0);
46+
47+
std::shared_ptr<arrow::Array> array;
48+
builder.Finish(&array);
49+
50+
auto result = PgConvertNumeric<double>(array);
51+
const auto& data = result->data();
52+
53+
const char* expected[] = {
54+
"1.1", "31.37", nullptr, "-1.337", "0"
55+
};
56+
57+
NUdf::TStringBlockReader<arrow::BinaryType, true> reader;
58+
for (int i = 0; i < 5; i++) {
59+
auto item = reader.GetItem(*data, i);
60+
if (!item) {
61+
UNIT_ASSERT(expected[i] == nullptr);
62+
} else {
63+
const char* addr = item.AsStringRef().Data() + sizeof(void*);
64+
UNIT_ASSERT(expected[i] != nullptr);
65+
UNIT_ASSERT_VALUES_EQUAL(
66+
TString(DatumGetCString(DirectFunctionCall1(numeric_out, (Datum)addr))),
67+
expected[i]
68+
);
69+
}
70+
}
71+
}
72+
73+
Y_UNIT_TEST(PgConvertNumericInt) {
74+
TArenaMemoryContext arena;
75+
76+
arrow::Int64Builder builder;
77+
builder.Append(11);
78+
builder.Append(3137);
79+
builder.AppendNull();
80+
builder.Append(-1337);
81+
builder.Append(0);
82+
83+
std::shared_ptr<arrow::Array> array;
84+
builder.Finish(&array);
85+
86+
auto result = PgConvertNumeric<i64>(array);
87+
const auto& data = result->data();
88+
89+
const char* expected[] = {
90+
"11", "3137", nullptr, "-1337", "0"
91+
};
92+
93+
NUdf::TStringBlockReader<arrow::BinaryType, true> reader;
94+
for (int i = 0; i < 5; i++) {
95+
auto item = reader.GetItem(*data, i);
96+
if (!item) {
97+
UNIT_ASSERT(expected[i] == nullptr);
98+
} else {
99+
const char* addr = item.AsStringRef().Data() + sizeof(void*);
100+
UNIT_ASSERT(expected[i] != nullptr);
101+
UNIT_ASSERT_VALUES_EQUAL(
102+
TString(DatumGetCString(DirectFunctionCall1(numeric_out, (Datum)addr))),
103+
expected[i]
104+
);
105+
}
106+
}
107+
}
108+
109+
} // Y_UNIT_TEST_SUITE(TArrowUtilsTests)
110+
111+
} // namespace NYql
112+

ydb/library/yql/parser/pg_wrapper/ut/ya.make

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ NO_COMPILER_WARNINGS()
88
INCLUDE(../cflags.inc)
99

1010
SRCS(
11+
arrow_ut.cpp
1112
codegen_ut.cpp
1213
error_ut.cpp
1314
parser_ut.cpp

0 commit comments

Comments
 (0)