Skip to content

Commit

Permalink
norm: push CASE WHEN condition into subquery so ApplyJoin is possible
Browse files Browse the repository at this point in the history
Fixes cockroachdb#80169
Fixes cockroachdb#71908

Previously, some queries which follow the pattern:
`SELECT CASE WHEN <cond> THEN <subquery> END FROM <table>` would error
out with a `could not decorrelate subquery` error. The reason is that
the subquery should only run in cases where `<cond>` is true or when it
is leak-proof (when running the subquery when `<cond>` is false couldn't
have side-effects, e.g. erroring out due to overflow during a CAST).
When the subquery is not leak-proof, it cannot be pulled above the
CASE expression into an apply join (which is a necessary first step
in executing a subquery expression).

To address this, this patch introduces a new normalization rule which
attempts to push the WHEN clause condition into the THEN clause subquery
and remove the CASE expression entirely (replace the CASE with the
subquery). The preconditions for this normalization are:
  1. There is a single WHEN clause.
  2. There is an ELSE NULL clause (either explicitly specified or
     implicit).
  3. The WHEN clause condition is not volatile (for example, the result
     is the same no matter how many times it is evaluated).
  4. The WHEN clause condition does not cause any side-effects, like
     writing rows to a table.
  5. The relational expressions in the THEN clause are only of the
     following types: (Select, Project, Limit, Offset, RecursiveCTE,
     InnerJoin, Scan, WithScan, ScalarGroupBy, Window).
  6. There are no aggregate functions which produce a non-null value
     when the input is empty, such as COUNT.
  7. There are no projected expressions above an aggregation in the
     subquery operation tree.

If these conditions are met, a new Select is placed above each Scan or
WithScan operation using the WHEN condition as a filter and the CASE
expression is replaced by the subquery.

Release note (sql change): This patch is adding support for some queries
which asyncpg generates internally, which previously would error out
with the message, "could not decorrelate subquery".
  • Loading branch information
Mark Sirek committed May 24, 2022
1 parent 12e772d commit 388e4fa
Show file tree
Hide file tree
Showing 4 changed files with 1,264 additions and 0 deletions.
255 changes: 255 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/subquery
Original file line number Diff line number Diff line change
Expand Up @@ -665,3 +665,258 @@ SELECT * FROM abc WHERE NOT EXISTS (SELECT * FROM xyz WHERE (abc.a = xyz.x OR ab
12 13 14

### End Split Disjunctions Tests
### PushCaseWhenConditionIntoSubquery Tests
query I
SELECT (CASE
WHEN x < 5
THEN (SELECT CAST(x AS INT2) FROM xyz ORDER BY 1 LIMIT 1)
ELSE NULL end)
FROM xyz AS t ORDER BY 1
----
NULL
NULL
NULL
NULL
1
1

query I
SELECT (CASE
WHEN t.x < 10
THEN (SELECT CAST(t.x AS INT2) FROM xyz WHERE xyz.x = t.x+3 LIMIT 1)
ELSE NULL end)
FROM xyz AS t ORDER BY 1
----
NULL
NULL
NULL
1
4
7

# Use of both LIMIT and OFFSET in recursive query.
# The x values in table xyz are the following:
# 1
# 4
# 5
# 7
# 10
# 13
query I
SELECT (CASE
WHEN xyz.x > 1
THEN (WITH RECURSIVE rec_with (x, depth)
AS (SELECT xyz.x, 0 AS depth
FROM xyz t2
WHERE t2.x = xyz.x
UNION ALL
SELECT t2.x, rec_with.depth + 1 AS depth
FROM xyz t2, rec_with
WHERE t2.x = rec_with.x+3)
SELECT x FROM rec_with
ORDER BY depth
LIMIT 1 OFFSET 1
)
END
) AS rw_field
FROM xyz ORDER BY 1
----
NULL
NULL
NULL
7
10
13

# Test an asyncpg-like query.
query ITTTII
WITH RECURSIVE typeinfo_tree(
oid, ns, name, kind, basetype, elemtype, elemdelim,
range_subtype, attrtypoids, attrnames, depth)
AS (
SELECT
ti.oid, ti.ns, ti.name, ti.kind, ti.basetype,
ti.elemtype, ti.elemdelim, ti.range_subtype,
ti.attrtypoids, ti.attrnames, 0
FROM
(
SELECT
t.oid AS oid,
ns.nspname AS ns,
t.typname AS name,
t.typtype AS kind,
(CASE WHEN t.typtype = 'd' THEN
(WITH RECURSIVE typebases(oid, depth) AS (
SELECT
t2.typbasetype AS oid,
0 AS depth
FROM
pg_catalog.pg_type t2
WHERE
t2.oid = t.oid
UNION ALL
SELECT
t2.typbasetype AS oid,
tb.depth + 1 AS depth
FROM
pg_catalog.pg_type t2,
typebases tb
WHERE
tb.oid = t2.oid
AND t2.typbasetype != 0
) SELECT oid FROM typebases ORDER BY depth DESC LIMIT 1)
ELSE NULL
END) AS basetype,
t.typelem AS elemtype,
elem_t.typdelim AS elemdelim,
range_t.rngsubtype AS range_subtype,
(CASE WHEN t.typtype = 'c' THEN
(SELECT
array_agg(ia.atttypid ORDER BY ia.attnum)
FROM
pg_catalog.pg_attribute ia
INNER JOIN pg_catalog.pg_class c
ON (ia.attrelid = c.oid)
WHERE
ia.attnum > 0 AND NOT ia.attisdropped
AND c.reltype = t.oid)
ELSE NULL
END) AS attrtypoids,
(CASE WHEN t.typtype = 'c' THEN
(SELECT
array_agg(ia.attname::text ORDER BY ia.attnum)
FROM
pg_catalog.pg_attribute ia
INNER JOIN pg_catalog.pg_class c
ON (ia.attrelid = c.oid)
WHERE
ia.attnum > 0 AND NOT ia.attisdropped
AND c.reltype = t.oid)
ELSE NULL
END) AS attrnames
FROM
pg_catalog.pg_type AS t
INNER JOIN pg_catalog.pg_namespace ns ON (
ns.oid = t.typnamespace)
LEFT JOIN pg_catalog.pg_type elem_t ON (
t.typlen = -1 AND
t.typelem != 0 AND
t.typelem = elem_t.oid
)
LEFT JOIN pg_catalog.pg_range range_t ON (
t.oid = range_t.rngtypid
)
)
AS ti
WHERE
ti.oid = any((16::oid,21::oid))
UNION ALL
SELECT
ti.oid, ti.ns, ti.name, ti.kind, ti.basetype,
ti.elemtype, ti.elemdelim, ti.range_subtype,
ti.attrtypoids, ti.attrnames, tt.depth + 1
FROM
(
SELECT
t.oid AS oid,
ns.nspname AS ns,
t.typname AS name,
t.typtype AS kind,
(CASE WHEN t.typtype = 'd' THEN
(WITH RECURSIVE typebases(oid, depth) AS (
SELECT
t2.typbasetype AS oid,
0 AS depth
FROM
pg_catalog.pg_type t2
WHERE
t2.oid = t.oid
UNION ALL
SELECT
t2.typbasetype AS oid,
tb.depth + 1 AS depth
FROM
pg_catalog.pg_type t2,
typebases tb
WHERE
tb.oid = t2.oid
AND t2.typbasetype != 0
) SELECT oid FROM typebases ORDER BY depth DESC LIMIT 1)
ELSE NULL
END) AS basetype,
t.typelem AS elemtype,
elem_t.typdelim AS elemdelim,
range_t.rngsubtype AS range_subtype,
(CASE WHEN t.typtype = 'c' THEN
(SELECT
array_agg(ia.atttypid ORDER BY ia.attnum)
FROM
pg_catalog.pg_attribute ia
INNER JOIN pg_catalog.pg_class c
ON (ia.attrelid = c.oid)
WHERE
ia.attnum > 0 AND NOT ia.attisdropped
AND c.reltype = t.oid)
ELSE NULL
END) AS attrtypoids,
(CASE WHEN t.typtype = 'c' THEN
(SELECT
array_agg(ia.attname::text ORDER BY ia.attnum)
FROM
pg_catalog.pg_attribute ia
INNER JOIN pg_catalog.pg_class c
ON (ia.attrelid = c.oid)
WHERE
ia.attnum > 0 AND NOT ia.attisdropped
AND c.reltype = t.oid)
ELSE NULL
END) AS attrnames
FROM
pg_catalog.pg_type AS t
INNER JOIN pg_catalog.pg_namespace ns ON (
ns.oid = t.typnamespace)
LEFT JOIN pg_catalog.pg_type elem_t ON (
t.typlen = -1 AND
t.typelem != 0 AND
t.typelem = elem_t.oid
)
LEFT JOIN pg_catalog.pg_range range_t ON (
t.oid = range_t.rngtypid
)
)
ti, typeinfo_tree tt
WHERE
(tt.elemtype IS NOT NULL AND ti.oid = tt.elemtype)
OR (tt.attrtypoids IS NOT NULL AND ti.oid = any(tt.attrtypoids))
OR (tt.range_subtype IS NOT NULL AND ti.oid = tt.range_subtype)
)
SELECT DISTINCT
oid::int, ns, name, kind, elemtype::int, depth
FROM
typeinfo_tree
ORDER BY 1
----
16 pg_catalog bool b 0 0
21 pg_catalog int2 b 0 0

# Cannot push WHEN condition with side effects
statement error pq: could not decorrelate subquery
SELECT (CASE
WHEN (select * from [update xyz set x=x+1 returning x]) > 10
THEN (WITH RECURSIVE rec_with (x, depth)
AS (SELECT xyz.x, 0 AS depth
FROM xyz t2
WHERE t2.x = xyz.x
UNION ALL
SELECT t2.x, rec_with.depth + 1 AS depth
FROM xyz t2, rec_with
WHERE t2.x = rec_with.x+1)
SELECT x FROM rec_with
ORDER BY depth
LIMIT 1
)
END
) AS rw_field
FROM xyz;

### End PushCaseWhenConditionIntoSubquery Tests
Loading

0 comments on commit 388e4fa

Please sign in to comment.