Skip to content

feat(query): Support array functions prepend and append #9844

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/query/functions/src/scalars/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ use common_expression::vectorize_with_builder_2_arg;
use common_expression::vectorize_with_builder_3_arg;
use common_expression::with_number_mapped_type;
use common_expression::Column;
use common_expression::ColumnBuilder;
use common_expression::Domain;
use common_expression::Function;
use common_expression::FunctionDomain;
Expand Down Expand Up @@ -303,6 +304,32 @@ pub fn register(registry: &mut FunctionRegistry) {
),
);

registry.register_2_arg_core::<GenericType<0>, ArrayType<GenericType<0>>, ArrayType<GenericType<0>>, _, _>(
"prepend",
FunctionProperty::default(),
|_, _| FunctionDomain::Full,
vectorize_2_arg::<GenericType<0>, ArrayType<GenericType<0>>, ArrayType<GenericType<0>>>(|val, arr, _| {
let data_type = arr.data_type();
let mut builder = ColumnBuilder::with_capacity(&data_type, arr.len() + 1);
builder.push(val);
builder.append_column(&arr);
builder.build()
}),
);

registry.register_2_arg_core::<ArrayType<GenericType<0>>, GenericType<0>, ArrayType<GenericType<0>>, _, _>(
"append",
FunctionProperty::default(),
|_, _| FunctionDomain::Full,
vectorize_2_arg::<ArrayType<GenericType<0>>, GenericType<0>, ArrayType<GenericType<0>>>(|arr, val, _| {
let data_type = arr.data_type();
let mut builder = ColumnBuilder::with_capacity(&data_type, arr.len() + 1);
builder.append_column(&arr);
builder.push(val);
builder.build()
}),
);

fn eval_contains<T: ArgType>(
lhs: ValueRef<ArrayType<T>>,
rhs: ValueRef<T>,
Expand Down
37 changes: 37 additions & 0 deletions src/query/functions/tests/it/scalars/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ fn test_array() {
test_remove_last(file);
test_contains(file);
test_concat(file);
test_prepend(file);
test_append(file);
}

fn test_create(file: &mut impl Write) {
Expand Down Expand Up @@ -81,6 +83,11 @@ fn test_slice(file: &mut impl Write) {
run_ast(file, "slice(['a', 'b', 'c', 'd'], 0, 2)", &[]);
run_ast(file, "slice(['a', 'b', 'c', 'd'], 1, 4)", &[]);
run_ast(file, "slice(['a', 'b', 'c', 'd'], 2, 6)", &[]);
run_ast(file, "slice([a, b, c], 1, 2)", &[
("a", Int16Type::from_data(vec![0i16, 1, 2])),
("b", Int16Type::from_data(vec![3i16, 4, 5])),
("c", Int16Type::from_data(vec![7i16, 8, 9])),
]);
}

fn test_remove_first(file: &mut impl Write) {
Expand All @@ -89,6 +96,10 @@ fn test_remove_first(file: &mut impl Write) {
run_ast(file, "remove_first([0, 1, 2, NULL])", &[]);
run_ast(file, "remove_first([0, 1, 2, 3])", &[]);
run_ast(file, "remove_first(['a', 'b', 'c', 'd'])", &[]);
run_ast(file, "remove_first([a, b])", &[
("a", Int16Type::from_data(vec![0i16, 1, 2])),
("b", Int16Type::from_data(vec![3i16, 4, 5])),
]);
}

fn test_remove_last(file: &mut impl Write) {
Expand All @@ -97,6 +108,10 @@ fn test_remove_last(file: &mut impl Write) {
run_ast(file, "remove_last([0, 1, 2, NULL])", &[]);
run_ast(file, "remove_last([0, 1, 2, 3])", &[]);
run_ast(file, "remove_last(['a', 'b', 'c', 'd'])", &[]);
run_ast(file, "remove_last([a, b])", &[
("a", Int16Type::from_data(vec![0i16, 1, 2])),
("b", Int16Type::from_data(vec![3i16, 4, 5])),
]);
}

fn test_contains(file: &mut impl Write) {
Expand Down Expand Up @@ -150,3 +165,25 @@ fn test_concat(file: &mut impl Write) {
);
run_ast(file, "concat([1,2,null], [int8_col])", &columns);
}

fn test_prepend(file: &mut impl Write) {
run_ast(file, "prepend(1, [])", &[]);
run_ast(file, "prepend(1, [2, 3, NULL, 4])", &[]);
run_ast(file, "prepend('a', ['b', NULL, NULL, 'c', 'd'])", &[]);
run_ast(file, "prepend(a, [b, c])", &[
("a", Int16Type::from_data(vec![0i16, 1, 2])),
("b", Int16Type::from_data(vec![3i16, 4, 5])),
("c", Int16Type::from_data(vec![6i16, 7, 8])),
]);
}

fn test_append(file: &mut impl Write) {
run_ast(file, "append([], 1)", &[]);
run_ast(file, "append([2, 3, NULL, 4], 5)", &[]);
run_ast(file, "append(['b', NULL, NULL, 'c', 'd'], 'e')", &[]);
run_ast(file, "append([b, c], a)", &[
("a", Int16Type::from_data(vec![0i16, 1, 2])),
("b", Int16Type::from_data(vec![3i16, 4, 5])),
("c", Int16Type::from_data(vec![6i16, 7, 8])),
]);
}
173 changes: 173 additions & 0 deletions src/query/functions/tests/it/scalars/testdata/array.txt
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,31 @@ output domain : [{"b"..="d"}]
output : ["b", "c", "d"]


ast : slice([a, b, c], 1, 2)
raw expr : slice(array(a::Int16, b::Int16, c::Int16), 1_u8, 2_u8)
checked expr : slice<T0=Int16><Array(T0), UInt64, UInt64>(array<T0=Int16><T0, T0, T0>(a, b, c), to_uint64<UInt8>(1_u8), to_uint64<UInt8>(2_u8))
optimized expr : slice<T0=Int16><Array(T0), UInt64, UInt64>(array<T0=Int16><T0, T0, T0>(a, b, c), 1_u64, 2_u64)
evaluation:
+--------+---------+---------+---------+----------------+
| | a | b | c | Output |
+--------+---------+---------+---------+----------------+
| Type | Int16 | Int16 | Int16 | Array(Int16) |
| Domain | {0..=2} | {3..=5} | {7..=9} | [{0..=9}] |
| Row 0 | 0_i16 | 3_i16 | 7_i16 | [0_i16, 3_i16] |
| Row 1 | 1_i16 | 4_i16 | 8_i16 | [1_i16, 4_i16] |
| Row 2 | 2_i16 | 5_i16 | 9_i16 | [2_i16, 5_i16] |
+--------+---------+---------+---------+----------------+
evaluation (internal):
+--------+--------------------------------------------------------------------------+
| Column | Data |
+--------+--------------------------------------------------------------------------+
| a | Int16([0, 1, 2]) |
| b | Int16([3, 4, 5]) |
| c | Int16([7, 8, 9]) |
| Output | ArrayColumn { values: Int16([0, 3, 1, 4, 2, 5]), offsets: [0, 2, 4, 6] } |
+--------+--------------------------------------------------------------------------+


ast : remove_first([])
raw expr : remove_first(array())
checked expr : remove_first<Array(Nothing)>(array<>())
Expand Down Expand Up @@ -318,6 +343,29 @@ output domain : [{"b"..="d"}]
output : ["b", "c", "d"]


ast : remove_first([a, b])
raw expr : remove_first(array(a::Int16, b::Int16))
checked expr : remove_first<T0=Int16><Array(T0)>(array<T0=Int16><T0, T0>(a, b))
evaluation:
+--------+---------+---------+--------------+
| | a | b | Output |
+--------+---------+---------+--------------+
| Type | Int16 | Int16 | Array(Int16) |
| Domain | {0..=2} | {3..=5} | [{0..=5}] |
| Row 0 | 0_i16 | 3_i16 | [3_i16] |
| Row 1 | 1_i16 | 4_i16 | [4_i16] |
| Row 2 | 2_i16 | 5_i16 | [5_i16] |
+--------+---------+---------+--------------+
evaluation (internal):
+--------+-----------------------------------------------------------------+
| Column | Data |
+--------+-----------------------------------------------------------------+
| a | Int16([0, 1, 2]) |
| b | Int16([3, 4, 5]) |
| Output | ArrayColumn { values: Int16([3, 4, 5]), offsets: [0, 1, 2, 3] } |
+--------+-----------------------------------------------------------------+


ast : remove_last([])
raw expr : remove_last(array())
checked expr : remove_last<Array(Nothing)>(array<>())
Expand Down Expand Up @@ -363,6 +411,29 @@ output domain : [{"a"..="c"}]
output : ["a", "b", "c"]


ast : remove_last([a, b])
raw expr : remove_last(array(a::Int16, b::Int16))
checked expr : remove_last<T0=Int16><Array(T0)>(array<T0=Int16><T0, T0>(a, b))
evaluation:
+--------+---------+---------+--------------+
| | a | b | Output |
+--------+---------+---------+--------------+
| Type | Int16 | Int16 | Array(Int16) |
| Domain | {0..=2} | {3..=5} | [{0..=5}] |
| Row 0 | 0_i16 | 3_i16 | [0_i16] |
| Row 1 | 1_i16 | 4_i16 | [1_i16] |
| Row 2 | 2_i16 | 5_i16 | [2_i16] |
+--------+---------+---------+--------------+
evaluation (internal):
+--------+-----------------------------------------------------------------+
| Column | Data |
+--------+-----------------------------------------------------------------+
| a | Int16([0, 1, 2]) |
| b | Int16([3, 4, 5]) |
| Output | ArrayColumn { values: Int16([0, 1, 2]), offsets: [0, 1, 2, 3] } |
+--------+-----------------------------------------------------------------+


ast : false in (false, true)
raw expr : or(eq(false, false), eq(false, true))
checked expr : or<Boolean, Boolean>(eq<Boolean, Boolean>(false, false), eq<Boolean, Boolean>(false, true))
Expand Down Expand Up @@ -561,3 +632,105 @@ evaluation (internal):
+----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+


ast : prepend(1, [])
raw expr : prepend(1_u8, array())
checked expr : prepend<T0=UInt8><T0, Array(T0)>(1_u8, CAST(array<>() AS Array(UInt8)))
optimized expr : [1_u8]
output type : Array(UInt8)
output domain : [{1..=1}]
output : [1_u8]


ast : prepend(1, [2, 3, NULL, 4])
raw expr : prepend(1_u8, array(2_u8, 3_u8, NULL, 4_u8))
checked expr : prepend<T0=UInt8 NULL><T0, Array(T0)>(CAST(1_u8 AS UInt8 NULL), array<T0=UInt8 NULL><T0, T0, T0, T0>(CAST(2_u8 AS UInt8 NULL), CAST(3_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL), CAST(4_u8 AS UInt8 NULL)))
optimized expr : [1_u8, 2_u8, 3_u8, NULL, 4_u8]
output type : Array(UInt8 NULL)
output domain : [{0..=4} ∪ {NULL}]
output : [1_u8, 2_u8, 3_u8, NULL, 4_u8]


ast : prepend('a', ['b', NULL, NULL, 'c', 'd'])
raw expr : prepend("a", array("b", NULL, NULL, "c", "d"))
checked expr : prepend<T0=String NULL><T0, Array(T0)>(CAST("a" AS String NULL), array<T0=String NULL><T0, T0, T0, T0, T0>(CAST("b" AS String NULL), CAST(NULL AS String NULL), CAST(NULL AS String NULL), CAST("c" AS String NULL), CAST("d" AS String NULL)))
optimized expr : ["a", "b", NULL, NULL, "c", "d"]
output type : Array(String NULL)
output domain : [{""..="d"} ∪ {NULL}]
output : ["a", "b", NULL, NULL, "c", "d"]


ast : prepend(a, [b, c])
raw expr : prepend(a::Int16, array(b::Int16, c::Int16))
checked expr : prepend<T0=Int16><T0, Array(T0)>(a, array<T0=Int16><T0, T0>(b, c))
evaluation:
+--------+---------+---------+---------+-----------------------+
| | a | b | c | Output |
+--------+---------+---------+---------+-----------------------+
| Type | Int16 | Int16 | Int16 | Array(Int16) |
| Domain | {0..=2} | {3..=5} | {6..=8} | [{-32768..=32767}] |
| Row 0 | 0_i16 | 3_i16 | 6_i16 | [0_i16, 3_i16, 6_i16] |
| Row 1 | 1_i16 | 4_i16 | 7_i16 | [1_i16, 4_i16, 7_i16] |
| Row 2 | 2_i16 | 5_i16 | 8_i16 | [2_i16, 5_i16, 8_i16] |
+--------+---------+---------+---------+-----------------------+
evaluation (internal):
+--------+-----------------------------------------------------------------------------------+
| Column | Data |
+--------+-----------------------------------------------------------------------------------+
| a | Int16([0, 1, 2]) |
| b | Int16([3, 4, 5]) |
| c | Int16([6, 7, 8]) |
| Output | ArrayColumn { values: Int16([0, 3, 6, 1, 4, 7, 2, 5, 8]), offsets: [0, 3, 6, 9] } |
+--------+-----------------------------------------------------------------------------------+


ast : append([], 1)
raw expr : append(array(), 1_u8)
checked expr : append<T0=UInt8><Array(T0), T0>(CAST(array<>() AS Array(UInt8)), 1_u8)
optimized expr : [1_u8]
output type : Array(UInt8)
output domain : [{1..=1}]
output : [1_u8]


ast : append([2, 3, NULL, 4], 5)
raw expr : append(array(2_u8, 3_u8, NULL, 4_u8), 5_u8)
checked expr : append<T0=UInt8 NULL><Array(T0), T0>(array<T0=UInt8 NULL><T0, T0, T0, T0>(CAST(2_u8 AS UInt8 NULL), CAST(3_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL), CAST(4_u8 AS UInt8 NULL)), CAST(5_u8 AS UInt8 NULL))
optimized expr : [2_u8, 3_u8, NULL, 4_u8, 5_u8]
output type : Array(UInt8 NULL)
output domain : [{0..=5} ∪ {NULL}]
output : [2_u8, 3_u8, NULL, 4_u8, 5_u8]


ast : append(['b', NULL, NULL, 'c', 'd'], 'e')
raw expr : append(array("b", NULL, NULL, "c", "d"), "e")
checked expr : append<T0=String NULL><Array(T0), T0>(array<T0=String NULL><T0, T0, T0, T0, T0>(CAST("b" AS String NULL), CAST(NULL AS String NULL), CAST(NULL AS String NULL), CAST("c" AS String NULL), CAST("d" AS String NULL)), CAST("e" AS String NULL))
optimized expr : ["b", NULL, NULL, "c", "d", "e"]
output type : Array(String NULL)
output domain : [{""..="e"} ∪ {NULL}]
output : ["b", NULL, NULL, "c", "d", "e"]


ast : append([b, c], a)
raw expr : append(array(b::Int16, c::Int16), a::Int16)
checked expr : append<T0=Int16><Array(T0), T0>(array<T0=Int16><T0, T0>(b, c), a)
evaluation:
+--------+---------+---------+---------+-----------------------+
| | a | b | c | Output |
+--------+---------+---------+---------+-----------------------+
| Type | Int16 | Int16 | Int16 | Array(Int16) |
| Domain | {0..=2} | {3..=5} | {6..=8} | [{-32768..=32767}] |
| Row 0 | 0_i16 | 3_i16 | 6_i16 | [3_i16, 6_i16, 0_i16] |
| Row 1 | 1_i16 | 4_i16 | 7_i16 | [4_i16, 7_i16, 1_i16] |
| Row 2 | 2_i16 | 5_i16 | 8_i16 | [5_i16, 8_i16, 2_i16] |
+--------+---------+---------+---------+-----------------------+
evaluation (internal):
+--------+-----------------------------------------------------------------------------------+
| Column | Data |
+--------+-----------------------------------------------------------------------------------+
| a | Int16([0, 1, 2]) |
| b | Int16([3, 4, 5]) |
| c | Int16([6, 7, 8]) |
| Output | ArrayColumn { values: Int16([3, 6, 0, 4, 7, 1, 5, 8, 2]), offsets: [0, 3, 6, 9] } |
+--------+-----------------------------------------------------------------------------------+


Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ and(Boolean, Boolean) :: Boolean
and(Boolean NULL, Boolean NULL) :: Boolean NULL
and_filters(Boolean, Boolean) :: Boolean
and_filters(Boolean NULL, Boolean NULL) :: Boolean NULL
append(Array(T0), T0) :: Array(T0)
array() :: Array(Nothing)
as_array(Variant) :: Variant NULL
as_array(Variant NULL) :: Variant NULL
Expand Down Expand Up @@ -2127,6 +2128,7 @@ position(String, String) :: UInt64
position(String NULL, String NULL) :: UInt64 NULL
pow(Float64, Float64) :: Float64
pow(Float64 NULL, Float64 NULL) :: Float64 NULL
prepend(T0, Array(T0)) :: Array(T0)
quote(String) :: String
quote(String NULL) :: String NULL
radians(Float64) :: Float64
Expand Down