Skip to content

Commit a2b2ed6

Browse files
author
Artem Fadeev
committed
Fix build_knn_matrix (now called update_knn_matrix)
Previous version of build_knn_matrix had an unreachable branch (`if (features!=NULL)`), which lead to use_wide_search having no effect. There was also a memory bug of copying a memory area into itself. predict_for_relation was fixed with interoperation of use_wide_search and predict_with_few_neighbors features in mind. Additions to the look_a_like regression test reflect those changes. This commit also removes unused arguments from several functions and fixes a couple of typos.
1 parent 631b341 commit a2b2ed6

11 files changed

+248
-77
lines changed

Diff for: cardinality_estimation.c

+11-12
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,17 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns,
8181
&ncols, &features);
8282
data = OkNNr_allocate(ncols);
8383

84-
if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL))
84+
if (load_aqo_data(query_context.fspace_hash, *fss, data, false) &&
85+
data->rows >= (aqo_predict_with_few_neighbors ? 1 : aqo_k))
8586
result = OkNNr_predict(data, features);
87+
/* Try to search in surrounding feature spaces for the same node */
88+
else if (use_wide_search && load_aqo_data(query_context.fspace_hash, *fss, data, true))
89+
{
90+
elog(DEBUG5, "[AQO] Make prediction for fss "INT64_FORMAT" by a neighbour "
91+
"includes %d feature(s) and %d fact(s).",
92+
(int64) *fss, data->cols, data->rows);
93+
result = OkNNr_predict(data, features);
94+
}
8695
else
8796
{
8897
/*
@@ -91,17 +100,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns,
91100
* small part of paths was used for AQO learning and stored into
92101
* the AQO knowledge base.
93102
*/
94-
95-
/* Try to search in surrounding feature spaces for the same node */
96-
if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search, features))
97-
result = -1;
98-
else
99-
{
100-
elog(DEBUG5, "[AQO] Make prediction for fss %d by a neighbour "
101-
"includes %d feature(s) and %d fact(s).",
102-
*fss, data->cols, data->rows);
103-
result = OkNNr_predict(data, features);
104-
}
103+
result = -1;
105104
}
106105

107106
#ifdef AQO_DEBUG_PRINT

Diff for: cardinality_hooks.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs,
413413
*fss = get_grouped_exprs_hash(child_fss, group_exprs);
414414
memset(&data, 0, sizeof(OkNNrdata));
415415

416-
if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL))
416+
if (!load_aqo_data(query_context.fspace_hash, *fss, &data, false))
417417
return -1;
418418

419419
Assert(data.rows == 1);

Diff for: expected/gucs.out

+1
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,5 @@ SELECT count(*) FROM aqo_query_stat;
142142
0
143143
(1 row)
144144

145+
DROP TABLE t;
145146
DROP EXTENSION aqo;

Diff for: expected/look_a_like.out

+124-1
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ SELECT true AS success FROM aqo_reset();
99
SET aqo.wide_search = 'on';
1010
SET aqo.mode = 'learn';
1111
SET aqo.show_details = 'on';
12-
set aqo.show_hash = 'off';
12+
SET aqo.show_hash = 'off';
1313
SET aqo.min_neighbors_for_predicting = 1;
14+
SET aqo.predict_with_few_neighbors = 'off';
1415
SET enable_nestloop = 'off';
1516
SET enable_mergejoin = 'off';
1617
SET enable_material = 'off';
@@ -553,9 +554,131 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L
553554
JOINS: 2
554555
(24 rows)
555556

557+
-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote
558+
-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll
559+
-- refer to it as fs0.
560+
-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on',
561+
-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here.
562+
SELECT str AS result
563+
FROM expln('
564+
SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str
565+
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
566+
result
567+
----------------------------------------------------------------------
568+
Seq Scan on public.a (actual rows=100 loops=1)
569+
AQO: rows=20, error=-400%
570+
Output: x1, x2, x3
571+
Filter: ((a.x1 > '-100'::integer) AND (a.x2 < 10) AND (a.x3 < 10))
572+
Using aqo: true
573+
AQO mode: LEARN
574+
JOINS: 0
575+
(7 rows)
576+
577+
-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid)
578+
-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors
579+
-- is 'off', AQO is obliged to use both data records for fss0.
580+
SET aqo.min_neighbors_for_predicting = 2;
581+
SELECT str AS result
582+
FROM expln('
583+
SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str
584+
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
585+
result
586+
--------------------------------------------------------
587+
Seq Scan on public.a (actual rows=80 loops=1)
588+
AQO: rows=77, error=-4%
589+
Output: x1, x2, x3
590+
Filter: ((a.x1 > 1) AND (a.x2 < 10) AND (a.x3 < 10))
591+
Rows Removed by Filter: 20
592+
Using aqo: true
593+
AQO mode: LEARN
594+
JOINS: 0
595+
(8 rows)
596+
597+
-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid)
598+
-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use
599+
-- three data records from two neighbors for this one.
600+
SET aqo.min_neighbors_for_predicting = 3;
601+
SELECT str AS result
602+
FROM expln('
603+
SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str
604+
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
605+
result
606+
--------------------------------------------------------------
607+
HashAggregate (actual rows=6 loops=1)
608+
AQO not used
609+
Output: x2
610+
Group Key: a.x2
611+
-> Seq Scan on public.a (actual rows=60 loops=1)
612+
AQO: rows=71, error=15%
613+
Output: x1, x2, x3
614+
Filter: ((a.x1 > 3) AND (a.x2 < 10) AND (a.x3 < 10))
615+
Rows Removed by Filter: 40
616+
Using aqo: true
617+
AQO mode: LEARN
618+
JOINS: 0
619+
(12 rows)
620+
621+
-----
622+
DROP TABLE IF EXISTS t;
623+
NOTICE: table "t" does not exist, skipping
624+
CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x;
625+
ANALYZE t;
626+
SELECT true AS success FROM aqo_reset();
627+
success
628+
---------
629+
t
630+
(1 row)
631+
632+
-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid)
633+
-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation
634+
-- than neighbors' records.
635+
SELECT str AS result
636+
FROM expln('
637+
select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str
638+
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
639+
result
640+
------------------------------------------------------------------
641+
Seq Scan on public.t (actual rows=10000 loops=1)
642+
AQO not used
643+
Output: x, y, z
644+
Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000))
645+
Using aqo: true
646+
AQO mode: LEARN
647+
JOINS: 0
648+
(7 rows)
649+
650+
DO
651+
$$
652+
BEGIN
653+
for counter in 1..20 loop
654+
EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter);
655+
EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter);
656+
EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter);
657+
end loop;
658+
END;
659+
$$ LANGUAGE PLPGSQL;
660+
-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used.
661+
SELECT str AS result
662+
FROM expln('
663+
select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str
664+
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
665+
result
666+
------------------------------------------------------------------
667+
Seq Scan on public.t (actual rows=10000 loops=1)
668+
AQO: rows=9987, error=-0%
669+
Output: x, y, z
670+
Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000))
671+
Using aqo: true
672+
AQO mode: LEARN
673+
JOINS: 0
674+
(7 rows)
675+
556676
RESET aqo.wide_search;
677+
RESET aqo.predict_with_few_neighbors;
678+
RESET aqo.min_neighbors_for_predicting;
557679
DROP EXTENSION aqo CASCADE;
558680
DROP TABLE a;
559681
DROP TABLE b;
560682
DROP TABLE c;
683+
DROP TABLE t;
561684
DROP FUNCTION expln;

Diff for: expected/unsupported.out

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ $$ LANGUAGE PLPGSQL;
1616
SET aqo.mode = 'learn';
1717
SET aqo.show_details = 'on';
1818
DROP TABLE IF EXISTS t;
19+
NOTICE: table "t" does not exist, skipping
1920
CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs;
2021
ANALYZE t;
2122
CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y

Diff for: machine_learning.c

+1
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ OkNNr_predict(OkNNrdata *data, double *features)
150150

151151
if (!aqo_predict_with_few_neighbors && data->rows < aqo_k)
152152
return -1.;
153+
Assert(data->rows > 0);
153154

154155
for (i = 0; i < data->rows; ++i)
155156
distances[i] = fs_distance(data->matrix[i], features, data->cols);

Diff for: postprocessing.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data,
9595
double *features, double target, double rfactor,
9696
List *reloids)
9797
{
98-
if (!load_fss_ext(fs, fss, data, NULL))
98+
if (!load_aqo_data(fs, fss, data, false))
9999
data->rows = 0;
100100

101101
data->rows = OkNNr_learn(data, features, target, rfactor);

Diff for: sql/gucs.sql

+1
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,5 @@ SELECT count(*) FROM aqo_query_stat;
5050
SELECT true AS success FROM aqo_reset();
5151
SELECT count(*) FROM aqo_query_stat;
5252

53+
DROP TABLE t;
5354
DROP EXTENSION aqo;

Diff for: sql/look_a_like.sql

+65-1
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ SET aqo.wide_search = 'on';
66

77
SET aqo.mode = 'learn';
88
SET aqo.show_details = 'on';
9-
set aqo.show_hash = 'off';
9+
SET aqo.show_hash = 'off';
1010
SET aqo.min_neighbors_for_predicting = 1;
11+
SET aqo.predict_with_few_neighbors = 'off';
1112
SET enable_nestloop = 'off';
1213
SET enable_mergejoin = 'off';
1314
SET enable_material = 'off';
@@ -142,10 +143,73 @@ FROM expln('
142143
SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str
143144
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
144145

146+
147+
-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote
148+
-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll
149+
-- refer to it as fs0.
150+
151+
-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on',
152+
-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here.
153+
SELECT str AS result
154+
FROM expln('
155+
SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str
156+
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
157+
-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid)
158+
159+
-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors
160+
-- is 'off', AQO is obliged to use both data records for fss0.
161+
SET aqo.min_neighbors_for_predicting = 2;
162+
SELECT str AS result
163+
FROM expln('
164+
SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str
165+
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
166+
-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid)
167+
168+
-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use
169+
-- three data records from two neighbors for this one.
170+
SET aqo.min_neighbors_for_predicting = 3;
171+
SELECT str AS result
172+
FROM expln('
173+
SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str
174+
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
175+
176+
-----
177+
DROP TABLE IF EXISTS t;
178+
CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x;
179+
ANALYZE t;
180+
SELECT true AS success FROM aqo_reset();
181+
182+
-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid)
183+
-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation
184+
-- than neighbors' records.
185+
SELECT str AS result
186+
FROM expln('
187+
select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str
188+
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
189+
DO
190+
$$
191+
BEGIN
192+
for counter in 1..20 loop
193+
EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter);
194+
EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter);
195+
EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter);
196+
end loop;
197+
END;
198+
$$ LANGUAGE PLPGSQL;
199+
-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used.
200+
SELECT str AS result
201+
FROM expln('
202+
select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str
203+
WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%';
204+
205+
145206
RESET aqo.wide_search;
207+
RESET aqo.predict_with_few_neighbors;
208+
RESET aqo.min_neighbors_for_predicting;
146209
DROP EXTENSION aqo CASCADE;
147210

148211
DROP TABLE a;
149212
DROP TABLE b;
150213
DROP TABLE c;
214+
DROP TABLE t;
151215
DROP FUNCTION expln;

0 commit comments

Comments
 (0)