From fd5136c6822d9b8d6053c25eb4360d886630f43d Mon Sep 17 00:00:00 2001 From: Andrei Martsinchyk Date: Wed, 29 Nov 2023 09:29:43 -0800 Subject: [PATCH] [#19839] YSQL: Pushdown partial aggregate Summary: Allow to pushdown aggregates where split is AGGSPLIT_INITIAL_SERIAL. Backgrownd: there are 3 types of aggregate split: - AGGSPLIT_SIMPLE - no split, aggregation evaluates both phase 1 and phase 2; - AGGSPLIT_INITIAL_SERIAL - inner part of the split, aggregation evaluates phase 1 only; - AGGSPLIT_FINAL_DESERIAL - outer part f the split, aggregation evaluates phase 2 only. We previously only allowed to pushdown aggregate functions with no split, however we can also safely pushdown AGGSPLIT_INITIAL_SERIAL functions, since the phase 1 is the part of the aggregate function which is actually being pushed down. If a Partial Aggregate is pushed down, the plan node actually does nothing, but forward phase 1 results from the YB scan to upper level, so indicate that fact in the explain output by labeling the aggregate node with pushdown as "Noop Aggregate" instead of "Partial Aggregate". Jira: DB-8771 Test Plan: ybd --java-test 'org.yb.pgsql.TestPgRegressParallel#testPgRegressParallel' Reviewers: jason, tnayak Reviewed By: tnayak Subscribers: yql Tags: #jenkins-ready Differential Revision: https://phorge.dev.yugabyte.com/D30579 --- src/postgres/src/backend/commands/explain.c | 22 +++++++--- src/postgres/src/backend/executor/nodeAgg.c | 2 +- .../expected/yb_parallel_colocated.out | 25 ++++++----- .../regress/expected/yb_select_parallel.out | 43 +++++++++++-------- 4 files changed, 57 insertions(+), 35 deletions(-) diff --git a/src/postgres/src/backend/commands/explain.c b/src/postgres/src/backend/commands/explain.c index 9fac11c63164..5ff0c05df065 100644 --- a/src/postgres/src/backend/commands/explain.c +++ b/src/postgres/src/backend/commands/explain.c @@ -2003,7 +2003,15 @@ ExplainNode(PlanState *planstate, List *ancestors, if (DO_AGGSPLIT_SKIPFINAL(agg->aggsplit)) { - partialmode = "Partial"; + if (((AggState*) planstate)->yb_pushdown_supported) + /* + * If partial aggregate is pushed down, it does not + * really do anything, since entire operation is + * delegated to DocDB. + */ + partialmode = "Noop"; + else + partialmode = "Partial"; pname = psprintf("%s %s", partialmode, pname); } else if (DO_AGGSPLIT_COMBINE(agg->aggsplit) || @@ -2384,10 +2392,10 @@ ExplainNode(PlanState *planstate, List *ancestors, if (es->debug && yb_enable_base_scans_cost_model) { ExplainPropertyFloat( - "Estimated Seeks", NULL, + "Estimated Seeks", NULL, ((IndexScan *) plan)->yb_estimated_num_seeks, 0, es); ExplainPropertyFloat( - "Estimated Nexts", NULL, + "Estimated Nexts", NULL, ((IndexScan *) plan)->yb_estimated_num_nexts, 0, es); } break; @@ -2423,10 +2431,10 @@ ExplainNode(PlanState *planstate, List *ancestors, if (es->debug && yb_enable_base_scans_cost_model) { ExplainPropertyFloat( - "Estimated Seeks", NULL, + "Estimated Seeks", NULL, ((IndexOnlyScan *) plan)->yb_estimated_num_seeks, 0, es); ExplainPropertyFloat( - "Estimated Nexts", NULL, + "Estimated Nexts", NULL, ((IndexOnlyScan *) plan)->yb_estimated_num_nexts, 0, es); } break; @@ -2480,10 +2488,10 @@ ExplainNode(PlanState *planstate, List *ancestors, if (es->debug && yb_enable_base_scans_cost_model) { ExplainPropertyFloat( - "Estimated Seeks", NULL, + "Estimated Seeks", NULL, ((YbSeqScan *) plan)->yb_estimated_num_seeks, 0, es); ExplainPropertyFloat( - "Estimated Nexts", NULL, + "Estimated Nexts", NULL, ((YbSeqScan *) plan)->yb_estimated_num_nexts, 0, es); } break; diff --git a/src/postgres/src/backend/executor/nodeAgg.c b/src/postgres/src/backend/executor/nodeAgg.c index 231dc87d0622..892a590bb42d 100644 --- a/src/postgres/src/backend/executor/nodeAgg.c +++ b/src/postgres/src/backend/executor/nodeAgg.c @@ -1627,7 +1627,7 @@ yb_agg_pushdown_supported(AggState *aggstate) return; /* Simple split. */ - if (aggref->aggsplit != AGGSPLIT_SIMPLE) + if (aggref->aggsplit == AGGSPLIT_FINAL_DESERIAL) return; diff --git a/src/postgres/src/test/regress/expected/yb_parallel_colocated.out b/src/postgres/src/test/regress/expected/yb_parallel_colocated.out index ecb4e4f9c762..3e5dd6a5d7bf 100644 --- a/src/postgres/src/test/regress/expected/yb_parallel_colocated.out +++ b/src/postgres/src/test/regress/expected/yb_parallel_colocated.out @@ -54,10 +54,11 @@ SELECT count(*) FROM pctest1 WHERE d LIKE 'Value_9'; Finalize Aggregate -> Gather Workers Planned: 2 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Seq Scan on pctest1 Remote Filter: (d ~~ 'Value_9'::text) -(6 rows) + Partial Aggregate: true +(7 rows) SELECT count(*) FROM pctest1 WHERE d LIKE 'Value_9'; count @@ -272,10 +273,11 @@ SELECT count(*) FROM pctest1 WHERE k > 123; Finalize Aggregate -> Gather Workers Planned: 2 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Index Scan using pctest1_pkey on pctest1 Index Cond: (k > 123) -(6 rows) + Partial Aggregate: true +(7 rows) SELECT count(*) FROM pctest1 WHERE k > 123; count @@ -616,9 +618,10 @@ SELECT * from pctest2 -> Finalize Aggregate -> Gather Workers Planned: 2 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Seq Scan on pctest1 pctest1_1 Remote Filter: (d ~~ 'Value_9'::text) + Partial Aggregate: true -> Parallel Hash Semi Join Hash Cond: (pctest2.c = pctest1.b) -> Parallel Seq Scan on pctest2 @@ -626,7 +629,7 @@ SELECT * from pctest2 -> Parallel Hash -> Parallel Seq Scan on pctest1 Remote Filter: (d ~~ 'Value_9'::text) -(17 rows) +(18 rows) SELECT * from pctest2 WHERE c IN (SELECT b FROM pctest1 WHERE d LIKE 'Value_9') @@ -668,10 +671,11 @@ select * from -> Finalize Aggregate -> Gather Workers Planned: 2 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Seq Scan on pctest1 Remote Filter: (b > 10) -(8 rows) + Partial Aggregate: true +(9 rows) select * from (SELECT count(*) FROM pctest1 WHERE b > 10) ss @@ -694,10 +698,11 @@ select * from -> Finalize Aggregate -> Gather Workers Planned: 2 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Index Only Scan using pctest1_c_idx on pctest1 Index Cond: (c > 10) -(8 rows) + Partial Aggregate: true +(9 rows) select * from (SELECT count(*) FROM pctest1 WHERE c > 10) ss diff --git a/src/postgres/src/test/regress/expected/yb_select_parallel.out b/src/postgres/src/test/regress/expected/yb_select_parallel.out index 079d5489914e..d99b0470c1da 100644 --- a/src/postgres/src/test/regress/expected/yb_select_parallel.out +++ b/src/postgres/src/test/regress/expected/yb_select_parallel.out @@ -46,10 +46,11 @@ explain (costs off) Finalize Aggregate -> Gather Workers Planned: 4 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Seq Scan on tenk1 Remote Filter: (stringu1 = 'GRAAAA'::name) -(6 rows) + Partial Aggregate: true +(7 rows) select count(*) from tenk1 where stringu1 = 'GRAAAA'; count @@ -67,10 +68,11 @@ explain (costs off) Finalize Aggregate -> Gather Workers Planned: 4 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Seq Scan on tenk1 Remote Filter: (stringu1 = 'GRAAAA'::name) -(6 rows) + Partial Aggregate: true +(7 rows) select count(*) from tenk1 where stringu1 = 'GRAAAA'; count @@ -154,10 +156,11 @@ explain (costs off) execute tenk1_count(1); Finalize Aggregate -> Gather Workers Planned: 4 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Seq Scan on tenk1 Remote Filter: (hundred > 1) -(6 rows) + Partial Aggregate: true +(7 rows) execute tenk1_count(1); count @@ -217,15 +220,17 @@ explain (costs off) -> Finalize Aggregate -> Gather Workers Planned: 4 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Seq Scan on tenk2 + Partial Aggregate: true -> Gather Workers Planned: 4 Params Evaluated: $2 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Seq Scan on tenk1 Remote Filter: (unique1 = $2) -(13 rows) + Partial Aggregate: true +(15 rows) select count(*) from tenk1 where tenk1.unique1 = (Select max(tenk2.unique1) from tenk2); @@ -247,10 +252,11 @@ explain (costs off) Finalize Aggregate -> Gather Workers Planned: 4 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Index Scan using tenk1_hundred on tenk1 Index Cond: (hundred > 1) -(6 rows) + Partial Aggregate: true +(7 rows) select count((unique1)) from tenk1 where hundred > 1; count @@ -266,10 +272,11 @@ explain (costs off) Finalize Aggregate -> Gather Workers Planned: 4 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Index Only Scan using tenk1_thous_tenthous on tenk1 Index Cond: (thousand > 95) -(6 rows) + Partial Aggregate: true +(7 rows) select count(*) from tenk1 where thousand > 95; count @@ -290,10 +297,11 @@ select * from -> Finalize Aggregate -> Gather Workers Planned: 4 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Index Scan using tenk1_hundred on tenk1 Index Cond: (hundred > 10) -(8 rows) + Partial Aggregate: true +(9 rows) select * from (select count(unique1) from tenk1 where hundred > 10) ss @@ -316,10 +324,11 @@ select * from -> Finalize Aggregate -> Gather Workers Planned: 4 - -> Partial Aggregate + -> Noop Aggregate -> Parallel Index Only Scan using tenk1_thous_tenthous on tenk1 Index Cond: (thousand > 99) -(8 rows) + Partial Aggregate: true +(9 rows) select * from (select count(*) from tenk1 where thousand > 99) ss