Skip to content

Commit c702546

Browse files
SamirTalwarplcplc
andauthored
Remove unnecessary DISTINCT ON expressions from introspection SQL. (#260)
### What There are two: 1. selecting only one relation per name 2. selecting only one aggregate function per input type These are both unnecessary. This work was inspired by CockroachDB failing to respect ordering within CTEs. ### How The first, selecting only one relation per name, was there to avoid having to deal with multiple relations of the same name in distinct schemas, but we now support schemas (and prefix all relations with their schema name), so this is unnecessary and possibly dangerous. The second, selecting only one aggregate function per input type, is not necessary because we only support aggregate functions which take a single argument, and therefore there can only be one per input type _anyway_, i.e. you cannot have two aggregate functions of the same name with the same input type(s) and different return types (return type polymorphism) in PostgreSQL. We therefore simplify these queries to avoid having to use `DISTINCT ON` at all. Co-authored-by: Philip Lykke Carlsen <[email protected]>
1 parent 9e0a2b4 commit c702546

File tree

1 file changed

+19
-71
lines changed
  • crates/connectors/ndc-postgres/src/configuration

1 file changed

+19
-71
lines changed

crates/connectors/ndc-postgres/src/configuration/version2.sql

+19-71
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,7 @@ WITH
6262
cl.relkind relation_kind
6363
FROM
6464
pg_class cl
65-
),
66-
queryable_relations AS
67-
(
68-
SELECT DISTINCT ON (relation_name) relations.*
69-
FROM relations
70-
WHERE relation_kind IN
65+
WHERE relkind IN
7166
-- Lots of different types of relations exist, but we're only interested in
7267
-- the ones that can be queried.
7368
(
@@ -82,11 +77,6 @@ WITH
8277
-- c = composite type,
8378
-- I = partitioned index
8479
)
85-
86-
-- Since we will _not_ be grouping by a key we need this to be ordered
87-
-- to get deterministic results.
88-
-- (Specificically, we do not yet take schemas into account)
89-
ORDER BY relation_name, schema_id, relation_kind
9080
),
9181

9282
-- Columns are recorded in `pg_attribute`. An 'attribute' is the generic term
@@ -301,78 +291,37 @@ WITH
301291
-- their schema.
302292
aggregates AS
303293
(
304-
WITH
305-
-- The arguments to an aggregate function is an array of type oids, which
306-
-- we want to resolve to an array of type names instead.
307-
-- Somewhat awkwardly, this means we have to unnest, join on types, and
308-
-- array_agg and group by.
309-
aggregate_argument_types AS
310-
(
311-
SELECT
312-
arg.proc_id,
313-
array_agg(arg.type_name) AS argument_types
314-
FROM
315-
(
316-
SELECT
317-
proc.proc_id,
318-
t.type_name
319-
FROM
320-
(
321-
SELECT
322-
proc.oid AS proc_id,
323-
unnest(proc.proargtypes) AS type_id
324-
FROM
325-
pg_catalog.pg_proc AS proc
326-
WHERE
327-
-- We only support single-argument aggregates currently.
328-
-- This assertion is important to make here since joining with
329-
-- 'types' filter arguments of polymorphic type, and we might
330-
-- risk ending up with one argument later.
331-
cardinality(proc.proargtypes) = 1
332-
)
333-
AS proc
334-
INNER JOIN
335-
scalar_types AS t
336-
USING (type_id)
337-
)
338-
AS arg
339-
GROUP BY arg.proc_id
340-
HAVING
341-
-- We need to check that we still have an argument, since we're
342-
-- filtering by our restricted notion of scalar types, which may
343-
-- exclude some types (e.g. pseudo-types and array types).
344-
cardinality(array_agg(arg.type_name)) = 1
345-
)
346294
SELECT
347295
proc.oid AS proc_id,
348296
proc.proname AS proc_name,
349297
proc.pronamespace AS schema_id,
350-
args.argument_types,
298+
arg_type.type_name as argument_type,
351299
ret_type.type_name as return_type
352-
353300
-- Columns that will likely be of interest soon:
354301
-- proc.proargnames AS argument_names,
355302

356303
FROM
357304
pg_catalog.pg_proc AS proc
358305

359-
INNER JOIN aggregate_argument_types
360-
AS args
361-
ON (proc.oid = args.proc_id)
306+
-- fetch the argument type name, discarding any unsupported types
307+
INNER JOIN scalar_types AS arg_type
308+
ON (arg_type.type_id = proc.proargtypes[0])
362309

363-
INNER JOIN scalar_types
364-
AS ret_type
310+
-- fetch the return type name, discarding any unsupported types
311+
INNER JOIN scalar_types AS ret_type
365312
ON (ret_type.type_id = proc.prorettype)
366313

367-
-- Restrict our scope to only aggregation functions
368-
INNER JOIN pg_aggregate
369-
ON (pg_aggregate.aggfnoid = proc.oid)
314+
-- restrict our scope to only aggregation functions
315+
INNER JOIN pg_aggregate AS aggregate
316+
ON (aggregate.aggfnoid = proc.oid)
370317

371318
WHERE
372-
-- We are only interested in functions:
373-
-- * Which are aggregation functions.
374-
-- * Which don't take any 'direct' (i.e., non-aggregation) arguments
375-
pg_aggregate.aggnumdirectargs = 0
319+
-- We are only interested in functions:
320+
-- * which take a single input argument
321+
-- * which are aggregation functions
322+
-- * which don't take any 'direct' (i.e., non-aggregation) arguments
323+
proc.pronargs = 1
324+
AND aggregate.aggnumdirectargs = 0
376325

377326
),
378327

@@ -849,7 +798,7 @@ FROM
849798
)
850799
AS result
851800
FROM
852-
queryable_relations
801+
relations
853802
AS rel
854803

855804
LEFT OUTER JOIN
@@ -1055,10 +1004,9 @@ FROM
10551004
) AS routines
10561005
FROM
10571006
(
1058-
-- We only support aggregation functions that take a single argument.
1059-
SELECT DISTINCT ON (argument_type, proc_name)
1007+
SELECT
10601008
agg.proc_name,
1061-
agg.argument_types[1] as argument_type,
1009+
agg.argument_type,
10621010
agg.return_type
10631011
FROM
10641012
aggregates AS agg

0 commit comments

Comments
 (0)