Skip to content

Commit 73ab921

Browse files
authored
Cast-extend aggregation functions (#381)
### What This PR makes the introspection of aggregation functions take implicit casts into account. This enables aggregation functions on domain types, which would typically only be defined on the base type. It also enables certain other aggregations for scalar types that weren't defined before, most notably aggregation functions for `float4` on CockroachDB, which becomes accessible through implicit casts to `float8`. ### How The cast-extension for aggregation functions introspection code is based on the same for comparison operators. We also change our notion of "occurring scalar types" to include the return types aggregation functions. Previously, aggregation functions could be filtered out if their return type didn't feature in the set of occurring scalar types, which was unnecessarily strict.
1 parent 6b60788 commit 73ab921

File tree

13 files changed

+2284
-114
lines changed

13 files changed

+2284
-114
lines changed

changelog.md

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
### Added
66

7+
- Make aggregation functions available through implicit casts.
8+
([#381](https://github.com/hasura/ndc-postgres/pull/380))
79
- Support for introspecting domain types.
810
([#380](https://github.com/hasura/ndc-postgres/pull/380))
911

crates/configuration/src/version3/mod.rs

+11-9
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,11 @@ pub async fn introspect(
151151
.instrument(info_span!("Decode introspection result"))
152152
.await?;
153153

154-
let scalar_types = occurring_scalar_types(&tables, &args.metadata.native_queries);
154+
let scalar_types = occurring_scalar_types(
155+
&tables,
156+
&args.metadata.native_queries,
157+
&args.metadata.aggregate_functions,
158+
);
155159

156160
let relevant_comparison_operators =
157161
filter_comparison_operators(&scalar_types, comparison_operators);
@@ -178,6 +182,7 @@ pub async fn introspect(
178182
pub fn occurring_scalar_types(
179183
tables: &metadata::TablesInfo,
180184
native_queries: &metadata::NativeQueries,
185+
aggregate_functions: &metadata::AggregateFunctions,
181186
) -> BTreeSet<metadata::ScalarType> {
182187
let tables_column_types = tables
183188
.0
@@ -191,6 +196,10 @@ pub fn occurring_scalar_types(
191196
.0
192197
.values()
193198
.flat_map(|v| v.arguments.values().map(|c| &c.r#type));
199+
let aggregate_functions_result_types = aggregate_functions
200+
.0
201+
.values()
202+
.flat_map(|x| x.values().map(|agg_fn| agg_fn.return_type.clone()));
194203

195204
tables_column_types
196205
.chain(native_queries_column_types)
@@ -199,6 +208,7 @@ pub fn occurring_scalar_types(
199208
metadata::Type::ScalarType(ref t) => Some(t.clone()), // only keep scalar types
200209
metadata::Type::ArrayType(_) | metadata::Type::CompositeType(_) => None,
201210
})
211+
.chain(aggregate_functions_result_types)
202212
.collect::<BTreeSet<metadata::ScalarType>>()
203213
}
204214

@@ -240,14 +250,6 @@ fn filter_aggregate_functions(
240250
.0
241251
.into_iter()
242252
.filter(|(typ, _)| scalar_types.contains(typ))
243-
.map(|(typ, ops)| {
244-
(
245-
typ,
246-
ops.into_iter()
247-
.filter(|(_, op)| scalar_types.contains(&op.return_type))
248-
.collect(),
249-
)
250-
})
251253
.collect(),
252254
)
253255
}

crates/configuration/src/version3/version3.sql

+107-41
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,46 @@ WITH
354354
-- the purpose of selecting preferred implicit casts.
355355
),
356356

357+
implicit_casts AS
358+
(
359+
SELECT
360+
t_from.type_name as from_type,
361+
t_to.type_name as to_type
362+
FROM
363+
pg_cast
364+
INNER JOIN
365+
scalar_types
366+
AS t_from
367+
ON (t_from.type_id = pg_cast.castsource)
368+
INNER JOIN
369+
scalar_types
370+
AS t_to
371+
ON (t_to.type_id = pg_cast.casttarget)
372+
WHERE
373+
pg_cast.castcontext = 'i'
374+
AND t_from.type_name != t_to.type_name
375+
376+
-- This is a good candidate for a configurable option.
377+
AND (t_from.type_name, t_to.type_name) NOT IN
378+
(
379+
-- Ignore other casts that are unlikely to ever be relevant
380+
('bytea', 'geography'),
381+
('bytea', 'geometry'),
382+
('geography', 'bytea'),
383+
('geometry', 'bytea'),
384+
('geometry', 'text'),
385+
('text', 'geometry')
386+
)
387+
UNION
388+
-- Any domain type may be implicitly cast to its base type, even though these casts
389+
-- are not declared in `pg_cast`.
390+
SELECT
391+
domain_types.type_name as from_type,
392+
domain_types.base_type as to_type
393+
FROM
394+
domain_types
395+
),
396+
357397
-- Aggregate functions are recorded across 'pg_proc' and 'pg_aggregate', see
358398
-- https://www.postgresql.org/docs/current/catalog-pg-proc.html and
359399
-- https://www.postgresql.org/docs/current/catalog-pg-aggregate.html for
@@ -372,6 +412,13 @@ WITH
372412
FROM
373413
pg_catalog.pg_proc AS proc
374414

415+
INNER JOIN
416+
-- Until the schema is made part of our model of types we only consider
417+
-- types defined in the public schema.
418+
unqualified_schemas_for_types_and_procedures
419+
AS q
420+
ON (q.schema_id = proc.pronamespace)
421+
375422
-- fetch the argument type name, discarding any unsupported types
376423
INNER JOIN scalar_types AS arg_type
377424
ON (arg_type.type_id = proc.proargtypes[0])
@@ -393,6 +440,65 @@ WITH
393440
AND aggregate.aggnumdirectargs = 0
394441

395442
),
443+
aggregates_cast_extended AS
444+
(
445+
WITH
446+
type_combinations AS
447+
(
448+
SELECT
449+
agg.proc_name AS proc_name,
450+
agg.return_type AS return_type,
451+
cast1.from_type AS argument_type,
452+
true AS argument_casted
453+
FROM
454+
aggregates
455+
AS agg
456+
INNER JOIN
457+
implicit_casts
458+
AS cast1
459+
ON (cast1.to_type = agg.argument_type)
460+
UNION
461+
SELECT
462+
agg.proc_name AS proc_name,
463+
agg.return_type AS return_type,
464+
agg.argument_type AS argument_type,
465+
false AS argument_casted
466+
FROM
467+
aggregates
468+
AS agg
469+
),
470+
471+
preferred_combinations AS
472+
(
473+
SELECT
474+
*,
475+
-- CockroachDB does not observe ORDER BY of nested expressions,
476+
-- So we cannot use the DISTINCT ON idiom to remove duplicates.
477+
-- Therefore we resort to filtering by ordered ROW_NUMBER().
478+
ROW_NUMBER()
479+
OVER
480+
(
481+
PARTITION BY
482+
proc_name, argument_type
483+
ORDER BY
484+
-- Prefer uncast argument.
485+
NOT argument_casted DESC,
486+
-- Arbitrary desperation: Lexical ordering
487+
return_type ASC
488+
)
489+
AS row_number
490+
FROM
491+
type_combinations
492+
)
493+
SELECT
494+
proc_name,
495+
argument_type,
496+
return_type
497+
FROM
498+
preferred_combinations
499+
WHERE
500+
row_number = 1
501+
),
396502

397503
-- Comparison procedures are any entries in 'pg_proc' that happen to be
398504
-- binary functions that return booleans. We also require, for the sake of
@@ -508,46 +614,6 @@ WITH
508614
SELECT * FROM comparison_procedures
509615
),
510616

511-
implicit_casts AS
512-
(
513-
SELECT
514-
t_from.type_name as from_type,
515-
t_to.type_name as to_type
516-
FROM
517-
pg_cast
518-
INNER JOIN
519-
scalar_types
520-
AS t_from
521-
ON (t_from.type_id = pg_cast.castsource)
522-
INNER JOIN
523-
scalar_types
524-
AS t_to
525-
ON (t_to.type_id = pg_cast.casttarget)
526-
WHERE
527-
pg_cast.castcontext = 'i'
528-
AND t_from.type_name != t_to.type_name
529-
530-
-- This is a good candidate for a configurable option.
531-
AND (t_from.type_name, t_to.type_name) NOT IN
532-
(
533-
-- Ignore other casts that are unlikely to ever be relevant
534-
('bytea', 'geography'),
535-
('bytea', 'geometry'),
536-
('geography', 'bytea'),
537-
('geometry', 'bytea'),
538-
('geometry', 'text'),
539-
('text', 'geometry')
540-
)
541-
UNION
542-
-- Any domain type may be implicitly cast to its base type, even though these casts
543-
-- are not declared in `pg_cast`.
544-
SELECT
545-
domain_types.type_name as from_type,
546-
domain_types.base_type as to_type
547-
FROM
548-
domain_types
549-
),
550-
551617
-- Some comparison operators are not defined explicitly for every type they would be
552618
-- valid for, relying instead on implicit casts to extend the types they can apply to.
553619
--
@@ -1103,7 +1169,7 @@ FROM
11031169
agg.argument_type,
11041170
agg.return_type
11051171
FROM
1106-
aggregates AS agg
1172+
aggregates_cast_extended AS agg
11071173
ORDER BY argument_type, proc_name, return_type
11081174
) AS agg
11091175
GROUP BY agg.argument_type

crates/connectors/ndc-postgres/src/schema.rs

+56-52
Original file line numberDiff line numberDiff line change
@@ -21,60 +21,64 @@ pub async fn get_schema(
2121
) -> Result<models::SchemaResponse, connector::SchemaError> {
2222
let metadata = &config.metadata;
2323
let mut scalar_types: BTreeMap<String, models::ScalarType> =
24-
configuration::occurring_scalar_types(&metadata.tables, &metadata.native_queries)
25-
.iter()
26-
.map(|scalar_type| {
27-
(
28-
scalar_type.0.clone(),
29-
models::ScalarType {
30-
aggregate_functions: metadata
31-
.aggregate_functions
32-
.0
33-
.get(scalar_type)
34-
.unwrap_or(&BTreeMap::new())
35-
.iter()
36-
.map(|(function_name, function_definition)| {
37-
(
38-
function_name.clone(),
39-
models::AggregateFunctionDefinition {
40-
result_type: models::Type::Named {
41-
name: function_definition.return_type.0.clone(),
42-
},
24+
configuration::occurring_scalar_types(
25+
&metadata.tables,
26+
&metadata.native_queries,
27+
&metadata.aggregate_functions,
28+
)
29+
.iter()
30+
.map(|scalar_type| {
31+
(
32+
scalar_type.0.clone(),
33+
models::ScalarType {
34+
aggregate_functions: metadata
35+
.aggregate_functions
36+
.0
37+
.get(scalar_type)
38+
.unwrap_or(&BTreeMap::new())
39+
.iter()
40+
.map(|(function_name, function_definition)| {
41+
(
42+
function_name.clone(),
43+
models::AggregateFunctionDefinition {
44+
result_type: models::Type::Named {
45+
name: function_definition.return_type.0.clone(),
4346
},
44-
)
45-
})
46-
.collect(),
47-
comparison_operators: metadata
48-
.comparison_operators
49-
.0
50-
.get(scalar_type)
51-
.unwrap_or(&BTreeMap::new())
52-
.iter()
53-
.map(|(op_name, op_def)| {
54-
(
55-
op_name.clone(),
56-
match op_def.operator_kind {
57-
metadata::OperatorKind::Equal => {
58-
models::ComparisonOperatorDefinition::Equal
47+
},
48+
)
49+
})
50+
.collect(),
51+
comparison_operators: metadata
52+
.comparison_operators
53+
.0
54+
.get(scalar_type)
55+
.unwrap_or(&BTreeMap::new())
56+
.iter()
57+
.map(|(op_name, op_def)| {
58+
(
59+
op_name.clone(),
60+
match op_def.operator_kind {
61+
metadata::OperatorKind::Equal => {
62+
models::ComparisonOperatorDefinition::Equal
63+
}
64+
metadata::OperatorKind::In => {
65+
models::ComparisonOperatorDefinition::In
66+
}
67+
metadata::OperatorKind::Custom => {
68+
models::ComparisonOperatorDefinition::Custom {
69+
argument_type: models::Type::Named {
70+
name: op_def.argument_type.0.clone(),
71+
},
5972
}
60-
metadata::OperatorKind::In => {
61-
models::ComparisonOperatorDefinition::In
62-
}
63-
metadata::OperatorKind::Custom => {
64-
models::ComparisonOperatorDefinition::Custom {
65-
argument_type: models::Type::Named {
66-
name: op_def.argument_type.0.clone(),
67-
},
68-
}
69-
}
70-
},
71-
)
72-
})
73-
.collect(),
74-
},
75-
)
76-
})
77-
.collect();
73+
}
74+
},
75+
)
76+
})
77+
.collect(),
78+
},
79+
)
80+
})
81+
.collect();
7882

7983
let collections_by_identifier: BTreeMap<(&str, &str), &str> = metadata
8084
.tables

0 commit comments

Comments
 (0)