From 5b3e9b8b582d5c01b1ec5a30befe3db3ba9a2ff8 Mon Sep 17 00:00:00 2001 From: Aleksandr Khoroshilov Date: Sat, 20 Apr 2024 23:26:33 +0000 Subject: [PATCH] Optimized and MapJoin friendly Q16 --- .../benchmarks/queries/tpch/ydb/q16.sql | 59 ++++++++++++------- .../benchmarks/queries/tpch/yql/q16.sql | 59 ++++++++++++------- 2 files changed, 78 insertions(+), 40 deletions(-) diff --git a/ydb/library/benchmarks/queries/tpch/ydb/q16.sql b/ydb/library/benchmarks/queries/tpch/ydb/q16.sql index 167da30cedf7..4f56c51ab276 100644 --- a/ydb/library/benchmarks/queries/tpch/ydb/q16.sql +++ b/ydb/library/benchmarks/queries/tpch/ydb/q16.sql @@ -2,42 +2,61 @@ -- TPC TPC-H Parameter Substitution (Version 2.17.2 build 0) -- using 1680793381 as a seed to the RNG -$join = ( +$p = ( select - ps.ps_suppkey as ps_suppkey, - ps.ps_partkey as ps_partkey + p.p_brand as p_brand, + p.p_type as p_type, + p.p_size as p_size, + ps.ps_suppkey as ps_suppkey from + `{path}part` as p +join `{path}partsupp` as ps -left join - `{path}supplier` as w on - w.s_suppkey = ps.ps_suppkey -where not (s_comment like "%Customer%Complaints%") + p.p_partkey = ps.ps_partkey +where + p.p_brand <> 'Brand#33' + and (not StartsWith(p.p_type, 'PROMO POLISHED')) + and (p.p_size = 20 or p.p_size = 27 or p.p_size = 11 or p.p_size = 45 or p.p_size = 40 or p.p_size = 41 or p.p_size = 34 or p.p_size = 36) +); + +$s = ( +select + s_suppkey +from + `{path}supplier` +where + s_comment like "%Customer%Complaints%" ); +$j = ( select p.p_brand as p_brand, p.p_type as p_type, p.p_size as p_size, - count(distinct j.ps_suppkey) as supplier_cnt + p.ps_suppkey as ps_suppkey from - $join as j -join - `{path}part` as p + $p as p +left only join + $s as s on - p.p_partkey = j.ps_partkey -where - p.p_brand <> 'Brand#33' - and (not StartsWith(p.p_type, 'PROMO POLISHED')) - and (p.p_size = 20 or p.p_size = 27 or p.p_size = 11 or p.p_size = 45 or p.p_size = 40 or p.p_size = 41 or p.p_size = 34 or p.p_size = 36) + p.ps_suppkey = s.s_suppkey +); + +select + j.p_brand as p_brand, + j.p_type as p_type, + j.p_size as p_size, + count(distinct j.ps_suppkey) as supplier_cnt +from + $j as j group by - p.p_brand, - p.p_type, - p.p_size + j.p_brand, + j.p_type, + j.p_size order by supplier_cnt desc, p_brand, p_type, p_size ; - diff --git a/ydb/library/benchmarks/queries/tpch/yql/q16.sql b/ydb/library/benchmarks/queries/tpch/yql/q16.sql index 087a89bee2ba..1480546b0150 100644 --- a/ydb/library/benchmarks/queries/tpch/yql/q16.sql +++ b/ydb/library/benchmarks/queries/tpch/yql/q16.sql @@ -4,42 +4,61 @@ -- TPC TPC-H Parameter Substitution (Version 2.17.2 build 0) -- using 1680793381 as a seed to the RNG -$join = ( +$p = ( select - ps.ps_suppkey as ps_suppkey, - ps.ps_partkey as ps_partkey + p.p_brand as p_brand, + p.p_type as p_type, + p.p_size as p_size, + ps.ps_suppkey as ps_suppkey from + {{part}} as p +join {{partsupp}} as ps -left join - {{supplier}} as w on - w.s_suppkey = ps.ps_suppkey -where not (s_comment like "%Customer%Complaints%") + p.p_partkey = ps.ps_partkey +where + p.p_brand <> 'Brand#33' + and (not StartsWith(p.p_type, 'PROMO POLISHED')) + and (p.p_size = 20 or p.p_size = 27 or p.p_size = 11 or p.p_size = 45 or p.p_size = 40 or p.p_size = 41 or p.p_size = 34 or p.p_size = 36) +); + +$s = ( +select + s_suppkey +from + {{supplier}} +where + s_comment like "%Customer%Complaints%" ); +$j = ( select p.p_brand as p_brand, p.p_type as p_type, p.p_size as p_size, - count(distinct j.ps_suppkey) as supplier_cnt + p.ps_suppkey as ps_suppkey from - $join as j -join - {{part}} as p + $p as p +left only join + $s as s on - p.p_partkey = j.ps_partkey -where - p.p_brand <> 'Brand#33' - and (not StartsWith(p.p_type, 'PROMO POLISHED')) - and (p.p_size = 20 or p.p_size = 27 or p.p_size = 11 or p.p_size = 45 or p.p_size = 40 or p.p_size = 41 or p.p_size = 34 or p.p_size = 36) + p.ps_suppkey = s.s_suppkey +); + +select + j.p_brand as p_brand, + j.p_type as p_type, + j.p_size as p_size, + count(distinct j.ps_suppkey) as supplier_cnt +from + $j as j group by - p.p_brand, - p.p_type, - p.p_size + j.p_brand, + j.p_type, + j.p_size order by supplier_cnt desc, p_brand, p_type, p_size ; -