From 50a962ea8ab22a187632f7ab2d0b75143a7597af Mon Sep 17 00:00:00 2001 From: neil Date: Thu, 20 Jul 2023 10:24:57 -0700 Subject: [PATCH] Rebase with master upto commit 1c781b7aaae5a25cb6cfdfa1f6305b11dd087c3c --- src/postgres/.clang-format | 4 +- src/postgres/contrib/file_fdw/file_fdw.c | 3 +- src/postgres/contrib/oid2name/oid2name.c | 1 + .../contrib/pg_stat_monitor/pg_stat_monitor.c | 4 +- .../contrib/postgres_fdw/postgres_fdw.c | 3 +- src/postgres/contrib/vacuumlo/vacuumlo.c | 1 + .../contrib/yb_pg_metrics/yb_pg_metrics.c | 2 +- src/postgres/src/backend/access/brin/brin.c | 2 +- .../src/backend/access/common/attmap.c | 11 +- .../src/backend/access/common/printtup.c | 7 +- src/postgres/src/backend/access/index/genam.c | 2 + .../src/backend/access/index/indexam.c | 6 +- .../src/backend/access/yb_access/yb_lsm.c | 52 +- .../src/backend/access/yb_access/yb_scan.c | 422 +++-- .../src/backend/access/ybgin/ybginget.c | 42 +- .../src/backend/bootstrap/bootstrap.c | 3 +- src/postgres/src/backend/catalog/index.c | 3 +- src/postgres/src/backend/catalog/partition.c | 3 +- .../src/backend/catalog/yb_catalog/yb_type.c | 39 + .../src/backend/catalog/yb_system_views.sql | 35 + src/postgres/src/backend/commands/copy.c | 52 +- src/postgres/src/backend/commands/copyfrom.c | 223 ++- .../src/backend/commands/copyfromparse.c | 13 +- src/postgres/src/backend/commands/copyto.c | 29 +- src/postgres/src/backend/commands/createas.c | 8 +- .../src/backend/commands/dbcommands.c | 31 +- src/postgres/src/backend/commands/explain.c | 57 + src/postgres/src/backend/commands/extension.c | 2 +- .../src/backend/commands/functioncmds.c | 30 +- src/postgres/src/backend/commands/indexcmds.c | 7 +- src/postgres/src/backend/commands/matview.c | 8 +- src/postgres/src/backend/commands/policy.c | 2 +- src/postgres/src/backend/commands/tablecmds.c | 153 +- src/postgres/src/backend/commands/trigger.c | 8 +- src/postgres/src/backend/commands/typecmds.c | 3 - src/postgres/src/backend/commands/ybccmds.c | 36 +- src/postgres/src/backend/executor/Makefile | 3 +- src/postgres/src/backend/executor/execMain.c | 2 +- .../src/backend/executor/execPartition.c | 12 +- src/postgres/src/backend/executor/nodeAgg.c | 59 +- .../src/backend/executor/nodeIndexonlyscan.c | 65 +- .../src/backend/executor/nodeIndexscan.c | 17 +- .../src/backend/executor/nodeLockRows.c | 6 +- .../src/backend/executor/nodeUnique.c | 6 +- .../src/backend/executor/nodeYbSeqscan.c | 19 +- .../src/backend/executor/ybcFunction.c | 94 + src/postgres/src/backend/executor/ybc_fdw.c | 170 +- src/postgres/src/backend/lib/Makefile | 1 + src/postgres/src/backend/lib/yb_percentile.c | 187 ++ src/postgres/src/backend/libpq/auth.c | 47 +- src/postgres/src/backend/nodes/copyfuncs.c | 1 + src/postgres/src/backend/nodes/nodeFuncs.c | 20 + src/postgres/src/backend/nodes/outfuncs.c | 2 + src/postgres/src/backend/nodes/readfuncs.c | 1 + .../src/backend/optimizer/path/indxpath.c | 119 +- .../src/backend/optimizer/plan/createplan.c | 35 +- .../src/backend/optimizer/plan/planner.c | 5 +- .../src/backend/optimizer/util/pathnode.c | 273 +++ .../src/backend/parser/parse_utilcmd.c | 3 +- .../src/backend/partitioning/partprune.c | 11 +- .../src/backend/postmaster/postmaster.c | 14 + src/postgres/src/backend/tcop/postgres.c | 343 ++-- src/postgres/src/backend/utils/Gen_fmgrtab.pl | 2 +- src/postgres/src/backend/utils/adt/Makefile | 3 +- .../src/backend/utils/adt/arrayfuncs.c | 8 +- src/postgres/src/backend/utils/adt/int.c | 68 +- src/postgres/src/backend/utils/adt/int8.c | 11 +- .../src/backend/utils/adt/rangetypes.c | 16 +- src/postgres/src/backend/utils/adt/rowtypes.c | 8 +- .../src/backend/utils/adt/yb_lockfuncs.c | 162 ++ .../src/backend/utils/cache/relcache.c | 482 +++-- src/postgres/src/backend/utils/fmgr/fmgr.c | 58 + src/postgres/src/backend/utils/misc/guc.c | 65 +- .../src/backend/utils/misc/pg_yb_utils.c | 137 +- .../backend/utils/misc/postgresql.conf.sample | 1 + src/postgres/src/bin/psql/common.c | 8 + src/postgres/src/include/access/attmap.h | 3 +- src/postgres/src/include/access/relscan.h | 2 + src/postgres/src/include/access/yb_scan.h | 10 +- src/postgres/src/include/catalog/catalog.h | 2 +- src/postgres/src/include/catalog/pg_proc.dat | 21 + .../src/include/catalog/pg_yb_migration.dat | 4 +- src/postgres/src/include/catalog/yb_type.h | 7 + src/postgres/src/include/commands/ybccmds.h | 3 +- .../src/include/executor/ybcFunction.h | 41 + src/postgres/src/include/fmgr.h | 4 + src/postgres/src/include/libpq/libpq-be.h | 7 + src/postgres/src/include/nodes/execnodes.h | 9 +- src/postgres/src/include/nodes/nodeFuncs.h | 3 + src/postgres/src/include/nodes/pathnodes.h | 27 + src/postgres/src/include/nodes/plannodes.h | 7 +- src/postgres/src/include/optimizer/planmain.h | 2 +- src/postgres/src/include/pg_yb_utils.h | 47 +- src/postgres/src/include/utils/fmgrtab.h | 4 +- src/postgres/src/include/utils/guc.h | 1 + .../expected/yb-lock-status-waiters.out | 23 + .../expected/yb-lock-status-waiters_1.out | 25 + .../isolation/expected/yb-lock-status.out | 83 + .../specs/yb-lock-status-waiters.spec | 41 + .../test/isolation/specs/yb-lock-status.spec | 61 + .../test/isolation/yb_pg_isolation_schedule | 3 + .../test/isolation/yb_wait_queues_schedule | 1 + .../test/regress/expected/yb_aggregates.out | 1601 +++++++++++++++-- .../regress/expected/yb_catalog_version.out | 166 +- .../regress/expected/yb_distinct_pushdown.out | 25 + .../yb_explicit_row_lock_planning.out | 328 ++++ .../expected/yb_get_range_split_clause.out | 60 + .../regress/expected/yb_hdr_percentile.out | 94 + .../test/regress/expected/yb_index_scan.out | 137 +- .../expected/yb_index_scan_null_asc.out | 145 +- .../expected/yb_index_scan_null_create.out | 8 +- .../expected/yb_index_scan_null_hash.out | 119 +- .../regress/expected/yb_index_selectivity.out | 5 +- .../test/regress/expected/yb_lock_status.out | 539 ++++++ .../regress/expected/yb_lock_status_1.out | 560 ++++++ .../expected/yb_pg_hint_plan_test1.out | 5 +- .../expected/yb_pg_partition_prune.out | 20 +- .../src/test/regress/expected/yb_pg_rules.out | 25 +- .../src/test/regress/expected/yb_reindex.out | 6 +- .../src/test/regress/expected/yb_ybgin.out | 5 +- .../src/test/regress/sql/yb_aggregates.sql | 189 +- .../test/regress/sql/yb_catalog_version.sql | 18 +- .../test/regress/sql/yb_distinct_pushdown.sql | 16 + .../sql/yb_explicit_row_lock_planning.sql | 125 ++ .../regress/sql/yb_get_range_split_clause.sql | 48 + .../test/regress/sql/yb_hdr_percentile.sql | 18 + .../src/test/regress/sql/yb_index_scan.sql | 17 + .../regress/sql/yb_index_scan_null_create.sql | 8 +- .../sql/yb_index_scan_null_queries.sql | 48 +- .../src/test/regress/sql/yb_lock_status.sql | 263 +++ .../regress/sql/yb_pg_partition_prune.sql | 1 + .../src/test/regress/sql/yb_reindex.sql | 6 +- .../regress/yb_distinct_pushdown_schedule | 6 + .../src/test/regress/yb_misc_serial_schedule | 1 + .../src/test/regress/yb_percentile_schedule | 6 + .../src/test/regress/yb_proc_schedule | 1 + 136 files changed, 7647 insertions(+), 1264 deletions(-) create mode 100644 src/postgres/src/backend/executor/ybcFunction.c create mode 100644 src/postgres/src/backend/lib/yb_percentile.c create mode 100644 src/postgres/src/backend/utils/adt/yb_lockfuncs.c create mode 100644 src/postgres/src/include/executor/ybcFunction.h create mode 100644 src/postgres/src/test/isolation/expected/yb-lock-status-waiters.out create mode 100644 src/postgres/src/test/isolation/expected/yb-lock-status-waiters_1.out create mode 100644 src/postgres/src/test/isolation/expected/yb-lock-status.out create mode 100644 src/postgres/src/test/isolation/specs/yb-lock-status-waiters.spec create mode 100644 src/postgres/src/test/isolation/specs/yb-lock-status.spec create mode 100644 src/postgres/src/test/regress/expected/yb_distinct_pushdown.out create mode 100644 src/postgres/src/test/regress/expected/yb_explicit_row_lock_planning.out create mode 100644 src/postgres/src/test/regress/expected/yb_hdr_percentile.out create mode 100644 src/postgres/src/test/regress/expected/yb_lock_status.out create mode 100644 src/postgres/src/test/regress/expected/yb_lock_status_1.out create mode 100644 src/postgres/src/test/regress/sql/yb_distinct_pushdown.sql create mode 100644 src/postgres/src/test/regress/sql/yb_explicit_row_lock_planning.sql create mode 100644 src/postgres/src/test/regress/sql/yb_hdr_percentile.sql create mode 100644 src/postgres/src/test/regress/sql/yb_lock_status.sql create mode 100644 src/postgres/src/test/regress/yb_distinct_pushdown_schedule create mode 100644 src/postgres/src/test/regress/yb_percentile_schedule diff --git a/src/postgres/.clang-format b/src/postgres/.clang-format index 6c3d3b126de5..84189bfa8c2a 100644 --- a/src/postgres/.clang-format +++ b/src/postgres/.clang-format @@ -12,7 +12,9 @@ AlignAfterOpenBracket: Align AlignConsecutiveMacros: true AlignConsecutiveAssignments: false AlignConsecutiveBitFields: false -AlignConsecutiveDeclarations: true +# Leave AlignConsecutiveDeclarations to default as YB team can use either of the +# styles. Not aligned is preferred in most cases but this is not enforced. +# AlignConsecutiveDeclarations: false AlignEscapedNewlines: DontAlign AlignOperands: Align AlignTrailingComments: true diff --git a/src/postgres/contrib/file_fdw/file_fdw.c b/src/postgres/contrib/file_fdw/file_fdw.c index 03c533b540a6..58edc886024c 100644 --- a/src/postgres/contrib/file_fdw/file_fdw.c +++ b/src/postgres/contrib/file_fdw/file_fdw.c @@ -615,7 +615,8 @@ fileGetForeignPlan(PlannerInfo *root, best_path->fdw_private, NIL, /* no custom tlist */ NIL, /* no remote quals */ - outer_plan); + outer_plan, + best_path->path.yb_path_info); } /* diff --git a/src/postgres/contrib/oid2name/oid2name.c b/src/postgres/contrib/oid2name/oid2name.c index a62a5eedb122..a0af5f7e24fd 100644 --- a/src/postgres/contrib/oid2name/oid2name.c +++ b/src/postgres/contrib/oid2name/oid2name.c @@ -10,6 +10,7 @@ #include "postgres_fe.h" #include "catalog/pg_class_d.h" + #include "common/connect.h" #include "common/logging.h" #include "common/string.h" diff --git a/src/postgres/contrib/pg_stat_monitor/pg_stat_monitor.c b/src/postgres/contrib/pg_stat_monitor/pg_stat_monitor.c index c186fa427c44..887245d64b58 100644 --- a/src/postgres/contrib/pg_stat_monitor/pg_stat_monitor.c +++ b/src/postgres/contrib/pg_stat_monitor/pg_stat_monitor.c @@ -402,7 +402,7 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags) MemoryContext oldcxt; oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); - queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL); + queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false); MemoryContextSwitchTo(oldcxt); } pgss_store(queryId, /* query id */ @@ -3076,4 +3076,4 @@ get_histogram_timings(PG_FUNCTION_ARGS) } pfree(tmp_str); return CStringGetTextDatum(text_str); -} \ No newline at end of file +} diff --git a/src/postgres/contrib/postgres_fdw/postgres_fdw.c b/src/postgres/contrib/postgres_fdw/postgres_fdw.c index 6dadbb26f6d6..5f01dc51c2ef 100644 --- a/src/postgres/contrib/postgres_fdw/postgres_fdw.c +++ b/src/postgres/contrib/postgres_fdw/postgres_fdw.c @@ -1428,7 +1428,8 @@ postgresGetForeignPlan(PlannerInfo *root, fdw_private, fdw_scan_tlist, fdw_recheck_quals, - outer_plan); + outer_plan, + best_path->path.yb_path_info); } /* diff --git a/src/postgres/contrib/vacuumlo/vacuumlo.c b/src/postgres/contrib/vacuumlo/vacuumlo.c index b7c8f2c805e4..1cf44221262e 100644 --- a/src/postgres/contrib/vacuumlo/vacuumlo.c +++ b/src/postgres/contrib/vacuumlo/vacuumlo.c @@ -22,6 +22,7 @@ #endif #include "catalog/pg_class_d.h" + #include "common/connect.h" #include "common/logging.h" #include "common/string.h" diff --git a/src/postgres/contrib/yb_pg_metrics/yb_pg_metrics.c b/src/postgres/contrib/yb_pg_metrics/yb_pg_metrics.c index 6c81c05f6e30..a5bf31e38bbf 100644 --- a/src/postgres/contrib/yb_pg_metrics/yb_pg_metrics.c +++ b/src/postgres/contrib/yb_pg_metrics/yb_pg_metrics.c @@ -492,7 +492,7 @@ ybpgm_ExecutorStart(QueryDesc *queryDesc, int eflags) { MemoryContext oldcxt; oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); - queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_TIMER); + queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_TIMER, false); MemoryContextSwitchTo(oldcxt); } } diff --git a/src/postgres/src/backend/access/brin/brin.c b/src/postgres/src/backend/access/brin/brin.c index 7709808b63ff..fecca612e0f5 100644 --- a/src/postgres/src/backend/access/brin/brin.c +++ b/src/postgres/src/backend/access/brin/brin.c @@ -491,7 +491,7 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) tmp = index_getprocinfo(idxRel, keyattno, BRIN_PROCNUM_CONSISTENT); fmgr_info_copy(&consistentFn[keyattno - 1], tmp, - CurrentMemoryContext); + GetCurrentMemoryContext()); } /* Add key to the proper per-attribute array. */ diff --git a/src/postgres/src/backend/access/common/attmap.c b/src/postgres/src/backend/access/common/attmap.c index 896f82a22b48..89d17864666e 100644 --- a/src/postgres/src/backend/access/common/attmap.c +++ b/src/postgres/src/backend/access/common/attmap.c @@ -26,6 +26,8 @@ #include "access/htup_details.h" #include "utils/builtins.h" +/* Yugabyte includes */ +#include "pg_yb_utils.h" static bool check_attrmap_match(TupleDesc indesc, TupleDesc outdesc, @@ -172,7 +174,8 @@ build_attrmap_by_position(TupleDesc indesc, */ AttrMap * build_attrmap_by_name(TupleDesc indesc, - TupleDesc outdesc) + TupleDesc outdesc, + bool yb_ignore_type_mismatch) { AttrMap *attrMap; int outnatts; @@ -223,7 +226,8 @@ build_attrmap_by_name(TupleDesc indesc, if (strcmp(attname, NameStr(inatt->attname)) == 0) { /* Found it, check type */ - if (atttypid != inatt->atttypid || atttypmod != inatt->atttypmod) + if ((atttypid != inatt->atttypid || atttypmod != inatt->atttypmod) && + !(IsYugaByteEnabled() && yb_ignore_type_mismatch)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("could not convert row type"), @@ -262,7 +266,8 @@ build_attrmap_by_name_if_req(TupleDesc indesc, AttrMap *attrMap; /* Verify compatibility and prepare attribute-number map */ - attrMap = build_attrmap_by_name(indesc, outdesc); + attrMap = build_attrmap_by_name(indesc, outdesc, + false /* yb_ignore_type_mismatch */); /* Check if the map has a one-to-one match */ if (check_attrmap_match(indesc, outdesc, attrMap)) diff --git a/src/postgres/src/backend/access/common/printtup.c b/src/postgres/src/backend/access/common/printtup.c index d74271c4be01..f296f960a924 100644 --- a/src/postgres/src/backend/access/common/printtup.c +++ b/src/postgres/src/backend/access/common/printtup.c @@ -377,12 +377,7 @@ printtup(TupleTableSlot *slot, DestReceiver *self) else { /* Binary output */ - bytea *outputbytes; - - outputbytes = SendFunctionCall(&thisState->finfo, attr); - pq_sendint32(buf, VARSIZE(outputbytes) - VARHDRSZ); - pq_sendbytes(buf, VARDATA(outputbytes), - VARSIZE(outputbytes) - VARHDRSZ); + StringInfoSendFunctionCall(buf, &thisState->finfo, attr); } } diff --git a/src/postgres/src/backend/access/index/genam.c b/src/postgres/src/backend/access/index/genam.c index c40cac28e3d2..fbba537cd2d5 100644 --- a/src/postgres/src/backend/access/index/genam.c +++ b/src/postgres/src/backend/access/index/genam.c @@ -133,6 +133,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan->yb_scan_plan = NULL; scan->yb_rel_pushdown = NULL; scan->yb_idx_pushdown = NULL; + scan->yb_aggrefs = NIL; + scan->yb_agg_slot = NULL; return scan; } diff --git a/src/postgres/src/backend/access/index/indexam.c b/src/postgres/src/backend/access/index/indexam.c index 2944ce54f1fd..6355d5f544f7 100644 --- a/src/postgres/src/backend/access/index/indexam.c +++ b/src/postgres/src/backend/access/index/indexam.c @@ -639,11 +639,7 @@ index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot) * - If YugaByte returns a heap_tuple, all requested data was already selected in the tuple. */ if (IsYBRelation(scan->heapRelation)) - { - if (scan->xs_hitup != 0) - return scan->xs_hitup; - return YbFetchTableSlot(scan->heapRelation, &scan->xs_heaptid, slot); - } + return scan->xs_hitup; found = table_index_fetch_tuple(scan->xs_heapfetch, &scan->xs_heaptid, scan->xs_snapshot, slot, diff --git a/src/postgres/src/backend/access/yb_access/yb_lsm.c b/src/postgres/src/backend/access/yb_access/yb_lsm.c index 65fa66121357..8c951ec9a306 100644 --- a/src/postgres/src/backend/access/yb_access/yb_lsm.c +++ b/src/postgres/src/backend/access/yb_access/yb_lsm.c @@ -426,7 +426,8 @@ ybcinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys YbScanDesc ybScan = ybcBeginScan(scan->heapRelation, scan->indexRelation, scan->xs_want_itup, nscankeys, scankey, scan->yb_scan_plan, scan->yb_rel_pushdown, - scan->yb_idx_pushdown); + scan->yb_idx_pushdown, scan->yb_aggrefs, + scan->yb_exec_params); scan->opaque = ybScan; } @@ -447,13 +448,56 @@ ybcingettuple(IndexScanDesc scan, ScanDirection dir) YbScanDesc ybscan = (YbScanDesc) scan->opaque; ybscan->exec_params = scan->yb_exec_params; + /* exec_params can be NULL in case of systable_getnext, for example. */ if (ybscan->exec_params) ybscan->exec_params->work_mem = work_mem; - if (!ybscan->exec_params) { - ereport(DEBUG1, (errmsg("null exec_params"))); + /* Special case: aggregate pushdown. */ + if (scan->yb_aggrefs) + { + /* + * TODO(#18018): deduplicate with ybc_getnext_heaptuple, + * ybc_getnext_indextuple. + */ + if (ybscan->quit_scan) + return NULL; + + /* + * As of 2023-06-28, aggregate pushdown is only implemented for + * IndexOnlyScan, not IndexScan. + */ + Assert(ybscan->prepare_params.index_only_scan); + + /* + * TODO(#18018): deduplicate with ybc_getnext_heaptuple, + * ybc_getnext_indextuple. + */ + scan->xs_recheck = (ybscan->nhash_keys > 0 || + !ybscan->is_full_cond_bound); + if (!ybscan->is_exec_done) + { + HandleYBStatus(YBCPgSetForwardScan(ybscan->handle, + is_forward_scan)); + HandleYBStatus(YBCPgExecSelect(ybscan->handle, + ybscan->exec_params)); + ybscan->is_exec_done = true; + } + + /* + * Aggregate pushdown directly modifies the scan slot rather than + * passing it through xs_hitup or xs_itup. + * + * The index id passed into ybFetchNext is likely not going to be used + * as it is only used for system table scans, which have oid, and there + * shouldn't exist any system table secondary indexes that index the + * oid column. + * TODO(jason): deduplicate with ybcingettuple. + */ + scan->yb_agg_slot = + ybFetchNext(ybscan->handle, scan->yb_agg_slot, + RelationGetRelid(scan->indexRelation)); + return !TTS_EMPTY(scan->yb_agg_slot); } - Assert(PointerIsValid(ybscan)); /* * IndexScan(SysTable, Index) --> HeapTuple. diff --git a/src/postgres/src/backend/access/yb_access/yb_scan.c b/src/postgres/src/backend/access/yb_access/yb_scan.c index cb3280c25820..e27da587c73f 100644 --- a/src/postgres/src/backend/access/yb_access/yb_scan.c +++ b/src/postgres/src/backend/access/yb_access/yb_scan.c @@ -30,6 +30,7 @@ #include "access/htup_details.h" #include "access/relscan.h" #include "access/sysattr.h" +#include "access/xact.h" #include "commands/dbcommands.h" #include "commands/tablegroup.h" #include "catalog/index.h" @@ -52,10 +53,12 @@ #include "utils/snapmgr.h" #include "utils/spccache.h" +/* Yugabyte includes */ #include "yb/yql/pggate/ybc_pggate.h" #include "pg_yb_utils.h" #include "access/nbtree.h" #include "access/yb_scan.h" +#include "catalog/yb_type.h" #include "utils/elog.h" #include "utils/typcache.h" @@ -393,6 +396,7 @@ static HeapTuple ybcFetchNextHeapTuple(YbScanDesc ybScan, bool is_forward_scan) HeapTuple tuple = NULL; bool has_data = false; TupleDesc tupdesc = ybScan->target_desc; + TableScanDesc tsdesc = (TableScanDesc)ybScan; Datum *values = (Datum *) palloc0(tupdesc->natts * sizeof(Datum)); bool *nulls = (bool *) palloc(tupdesc->natts * sizeof(bool)); @@ -407,12 +411,41 @@ static HeapTuple ybcFetchNextHeapTuple(YbScanDesc ybScan, bool is_forward_scan) } /* Fetch one row. */ - HandleYBStatus(YBCPgDmlFetch(ybScan->handle, - tupdesc->natts, - (uint64_t *) values, - nulls, - &syscols, - &has_data)); + YBCStatus status = YBCPgDmlFetch(ybScan->handle, + tupdesc->natts, + (uint64_t *) values, + nulls, + &syscols, + &has_data); + + if (IsolationIsSerializable()) + HandleYBStatus(status); + else if (status) + { + if (ybScan->exec_params != NULL && YBCIsTxnConflictError(YBCStatusTransactionError(status))) + { + elog(DEBUG2, "Error when trying to lock row. " + "pg_wait_policy=%d docdb_wait_policy=%d txn_errcode=%d message=%s", + ybScan->exec_params->pg_wait_policy, + ybScan->exec_params->docdb_wait_policy, + YBCStatusTransactionError(status), + YBCStatusMessageBegin(status)); + if (ybScan->exec_params->pg_wait_policy == LockWaitError) + ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), + errmsg("could not obtain lock on row in relation \"%s\"", + RelationGetRelationName(tsdesc->rs_rd)))); + else + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"), + yb_txn_errcode(YBCGetTxnConflictErrorCode()))); + } + else if (YBCIsTxnSkipLockingError(YBCStatusTransactionError(status))) + /* For skip locking, it's correct to simply return no results. */ + has_data = false; + else + HandleYBStatus(status); + } if (has_data) { @@ -430,7 +463,7 @@ static HeapTuple ybcFetchNextHeapTuple(YbScanDesc ybScan, bool is_forward_scan) HEAPTUPLE_YBCTID(tuple) = PointerGetDatum(syscols.ybctid); ybcUpdateFKCache(ybScan, HEAPTUPLE_YBCTID(tuple)); } - tuple->t_tableOid = RelationGetRelid(ybScan->rs_base.rs_rd); + tuple->t_tableOid = RelationGetRelid(tsdesc->rs_rd); } pfree(values); pfree(nulls); @@ -511,7 +544,7 @@ static IndexTuple ybcFetchNextIndexTuple(YbScanDesc ybScan, Relation index, bool /* * Set up scan plan. * This function sets up target and bind columns for each type of scans. - * SELECT FROM WHERE op + * SELECT FROM
WHERE * * 1. SequentialScan(Table) and PrimaryIndexScan(Table): index = 0 * - Table can be systable or usertable. @@ -621,7 +654,7 @@ ybcSetupScanPlan(bool xs_want_itup, YbScanDesc ybScan, YbScanPlan scan_plan) * - The bind-attnum comes from the table that is being scan by the scan. * * Examples: - * - For IndexScan(SysTable, Index), SysTable is used for targets, but Index is for binds. + * - For IndexScan(Table, Index), Table is used for targets, but Index is for binds. * - For IndexOnlyScan(Table, Index), only Index is used to setup both target and bind. */ for (i = 0; i < ybScan->nkeys; i++) @@ -654,8 +687,8 @@ ybcSetupScanPlan(bool xs_want_itup, YbScanDesc ybScan, YbScanPlan scan_plan) else { /* - * IndexScan(SysTable or UserTable, Index) returns HeapTuple. - * Use SysTable attnum for targets. Use its index attnum for binds. + * IndexScan(Table, Index) returns HeapTuple. + * Use Table attnum for targets. Use its Index attnum for binds. */ scan_plan->bind_key_attnums[i] = key->sk_attno; ybScan->target_key_attnums[i] = @@ -773,30 +806,51 @@ YbGetLengthOfKey(ScanKey *key_ptr) } /* - * Check whether the conditions lead to empty result regardless of the values - * in the index because of always FALSE or UNKNOWN conditions. - * Return true if the combined key conditions are unsatisfiable. + * Given a table attribute number, get a corresponding index attribute number. + * Throw an error if it is not found. + */ +static AttrNumber +YbGetIndexAttnum(AttrNumber table_attno, Relation index) +{ + for (int i = 0; i < IndexRelationGetNumberOfAttributes(index); ++i) + { + if (table_attno == index->rd_index->indkey.values[i]) + return i + 1; + } + elog(ERROR, "column is not in index"); +} + +/* + * Return whether the given conditions are unsatisfiable regardless of the + * values in the index because of always FALSE or UNKNOWN conditions. */ static bool -YbIsEmptyResultCondition(int nkeys, ScanKey keys[]) +YbIsUnsatisfiableCondition(int nkeys, ScanKey keys[]) { - for (int i = 0; i < nkeys; i++) + for (int i = 0; i < nkeys; ++i) { ScanKey key = keys[i]; - if (!((key->sk_flags & SK_ROW_MEMBER) && YbIsRowHeader(keys[i - 1])) || - key->sk_strategy == BTEqualStrategyNumber) + /* + * Look for two cases: + * - = null + * - row(a, b, c) op row(null, e, f) + */ + if ((key->sk_strategy == BTEqualStrategyNumber || + (i > 0 && YbIsRowHeader(keys[i - 1]) && + key->sk_flags & SK_ROW_MEMBER)) && + YbIsNeverTrueNullCond(key)) { - if (YbIsNeverTrueNullCond(key)) - return true; + elog(DEBUG1, "skipping a scan due to unsatisfiable condition"); + return true; } } return false; } static bool -YbShouldPushdownScanPrimaryKey(Relation relation, YbScanPlan scan_plan, - AttrNumber attnum, ScanKey key) +YbShouldPushdownScanPrimaryKey(YbScanPlan scan_plan, AttrNumber attnum, + ScanKey key) { if (YbIsHashCodeSearch(key)) { @@ -855,8 +909,6 @@ static int int_compar_cb(const void *v1, const void *v2) static void ybcSetupScanKeys(YbScanDesc ybScan, YbScanPlan scan_plan) { - TableScanDesc tsdesc = (TableScanDesc) ybScan; - /* * Find the scan keys that are the primary key. */ @@ -874,8 +926,7 @@ ybcSetupScanKeys(YbScanDesc ybScan, YbScanPlan scan_plan) bool is_primary_key = bms_is_member(idx, scan_plan->primary_key); if (is_primary_key && - YbShouldPushdownScanPrimaryKey(tsdesc->rs_rd, scan_plan, attnum, - ybScan->keys[i])) + YbShouldPushdownScanPrimaryKey(scan_plan, attnum, ybScan->keys[i])) { scan_plan->sk_cols = bms_add_member(scan_plan->sk_cols, idx); } @@ -1381,12 +1432,6 @@ YbBindScanKeys(YbScanDesc ybScan, YbScanPlan scan_plan) { Relation relation = ((TableScanDesc)ybScan)->rs_rd; - HandleYBStatus(YBCPgNewSelect(YBCGetDatabaseOid(relation), - YbGetStorageRelid(relation), - &ybScan->prepare_params, - YBCIsRegionLocal(relation), - &ybScan->handle)); - ybScan->is_full_cond_bound = yb_bypass_cond_recheck && yb_pushdown_strict_inequality && yb_pushdown_is_not_null; @@ -2191,6 +2236,106 @@ ybcSetupTargets(YbScanDesc ybScan, YbScanPlan scan_plan, Scan *pg_scan_plan) } } +/* + * Set aggregate targets into handle. If index is not null, convert column + * attribute numbers from table-based numbers to index-based ones. + */ +void +YbDmlAppendTargetsAggregate(List *aggrefs, TupleDesc tupdesc, + Relation index, YBCPgStatement handle) +{ + ListCell *lc; + + /* Set aggregate scan targets. */ + foreach(lc, aggrefs) + { + Aggref *aggref = lfirst_node(Aggref, lc); + char *func_name = get_func_name(aggref->aggfnoid); + ListCell *lc_arg; + YBCPgExpr op_handle; + const YBCPgTypeEntity *type_entity; + + /* Get type entity for the operator from the aggref. */ + type_entity = YbDataTypeFromOidMod(InvalidAttrNumber, aggref->aggtranstype); + + /* Create operator. */ + HandleYBStatus(YBCPgNewOperator(handle, func_name, type_entity, aggref->aggcollid, &op_handle)); + + /* Handle arguments. */ + if (aggref->aggstar) { + /* + * Add dummy argument for COUNT(*) case, turning it into COUNT(0). + * We don't use a column reference as we want to count rows + * even if all column values are NULL. + */ + YBCPgExpr const_handle; + HandleYBStatus(YBCPgNewConstant(handle, + type_entity, + false /* collate_is_valid_non_c */, + NULL /* collation_sortkey */, + 0 /* datum */, + false /* is_null */, + &const_handle)); + HandleYBStatus(YBCPgOperatorAppendArg(op_handle, const_handle)); + } else { + /* Add aggregate arguments to operator. */ + foreach(lc_arg, aggref->args) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc_arg); + if (IsA(tle->expr, Const)) + { + Const* const_node = castNode(Const, tle->expr); + /* Already checked by yb_agg_pushdown_supported */ + Assert(const_node->constisnull || const_node->constbyval); + + YBCPgExpr const_handle; + HandleYBStatus(YBCPgNewConstant(handle, + type_entity, + false /* collate_is_valid_non_c */, + NULL /* collation_sortkey */, + const_node->constvalue, + const_node->constisnull, + &const_handle)); + HandleYBStatus(YBCPgOperatorAppendArg(op_handle, const_handle)); + } + else if (IsA(tle->expr, Var)) + { + /* + * Use original attribute number (varoattno) instead of projected one (varattno) + * as projection is disabled for tuples produced by pushed down operators. + */ + int attno = castNode(Var, tle->expr)->varattnosyn; + /* + * For index (only) scans, translate the table-based + * attribute number to an index-based one. + */ + if (index) + attno = YbGetIndexAttnum(attno, index); + Form_pg_attribute attr = TupleDescAttr(tupdesc, attno - 1); + YBCPgTypeAttrs type_attrs = {attr->atttypmod}; + + YBCPgExpr arg = YBCNewColumnRef(handle, + attno, + attr->atttypid, + attr->attcollation, + &type_attrs); + HandleYBStatus(YBCPgOperatorAppendArg(op_handle, arg)); + } + else + { + /* Should never happen. */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unsupported aggregate function argument type"))); + } + } + } + + /* Add aggregate operator as scan target. */ + HandleYBStatus(YBCPgDmlAppendTarget(handle, op_handle)); + } +} + /* * YbDmlAppendTargets * @@ -2278,20 +2423,26 @@ YbDmlAppendColumnRefs(List *colrefs, bool is_primary, YBCPgStatement handle) /* * Begin a scan for - * SELECT FROM USING + * SELECT FROM USING WHERE * NOTES: - * - "relation" is the table being SELECTed. - * - "index" identify the INDEX that will be used for scaning. - * - "nkeys" and "key" identify which key columns are provided in the SELECT WHERE clause. - * nkeys = Number of key. - * keys[].sk_attno = the columns' attnum in the IndexTable or "index" - * (This is not the attnum in UserTable or "relation") - * - * - If "xs_want_itup" is true, Postgres layer is expecting an IndexTuple that has ybctid to - * identify the desired row. - * - "rel_pushdown" defines expressions to pushdown to remote relation scan - * - "idx_pushdown" defines expressions to pushdown to remote secondary index - * scan. If the scan is not over a secondary index. + * - "relation" is the non-index table. + * - "index" is the index table, if applicable. + * - "nkeys" and "key" identify which key columns are provided in the SELECT + * WHERE clause. + * - nkeys = Number of keys. + * - keys[].sk_attno = the column's attribute number with respect to + * - "relation" if sequential scan + * - "index" if index (only) scan + * Easy way to tell between the two cases is whether index is NULL. + * Note: ybc_systable_beginscan can call for either case. + * - If "xs_want_itup" is true, Postgres layer is expecting an IndexTuple that + * has ybctid to identify the desired row. + * - "rel_pushdown" defines expressions to push down to the targeted relation. + * - sequential scan: non-index table. + * - index scan: non-index table. + * - index only scan: index table. + * - "idx_pushdown" defines expressions to push down to the index in case of an + * index scan. */ YbScanDesc ybcBeginScan(Relation relation, @@ -2301,7 +2452,9 @@ ybcBeginScan(Relation relation, ScanKey keys, Scan *pg_scan_plan, PushdownExprs *rel_pushdown, - PushdownExprs *idx_pushdown) + PushdownExprs *idx_pushdown, + List *aggrefs, + YBCPgExecParameters *exec_params) { if (nkeys > YB_MAX_SCAN_KEYS) ereport(ERROR, @@ -2309,7 +2462,7 @@ ybcBeginScan(Relation relation, errmsg("cannot use more than %d predicates in a table or index scan", YB_MAX_SCAN_KEYS))); - /* Set up YugaByte scan description */ + /* Set up Yugabyte scan description */ YbScanDesc ybScan = (YbScanDesc) palloc0(sizeof(YbScanDescData)); TableScanDesc tsdesc = (TableScanDesc)ybScan; tsdesc->rs_rd = relation; @@ -2343,69 +2496,82 @@ ybcBeginScan(Relation relation, while (((current++)->sk_flags & SK_ROW_END) == 0); } } - ybScan->exec_params = NULL; + if (YbIsUnsatisfiableCondition(ybScan->nkeys, ybScan->keys)) + { + ybScan->quit_scan = true; + return ybScan; + } + ybScan->exec_params = exec_params; ybScan->index = index; ybScan->quit_scan = false; - /* Setup the scan plan */ + /* Set up the scan plan */ YbScanPlanData scan_plan; ybcSetupScanPlan(xs_want_itup, ybScan, &scan_plan); ybcSetupScanKeys(ybScan, &scan_plan); - if (!YbIsEmptyResultCondition(ybScan->nkeys, ybScan->keys) && - YbBindScanKeys(ybScan, &scan_plan) && - YbBindHashKeys(ybScan)) - { - /* - * Setup the scan targets with respect to postgres scan plan - * (i.e. set only required targets) - */ - ybcSetupTargets(ybScan, &scan_plan, pg_scan_plan); + /* Create handle */ + HandleYBStatus(YBCPgNewSelect(YBCGetDatabaseOid(relation), + YbGetStorageRelid(relation), + &ybScan->prepare_params, + YBCIsRegionLocal(relation), + &ybScan->handle)); - /* - * Set up pushdown expressions. - * Sequential, IndexOnly and primary key scans are refer only one - * relation, and all expression they push down are in the rel_pushdown. - * Secondary index scan may have pushable expressions that refer columns - * not included in the index, those go to the rel_pushdown as well. - * Secondary index scan's expressions that refer only columns available - * from the index are go to the idx_pushdown and pushed down when the - * index is scanned. - */ - if (rel_pushdown != NULL) - { - YbDmlAppendQuals(rel_pushdown->quals, true /* is_primary */, - ybScan->handle); - YbDmlAppendColumnRefs(rel_pushdown->colrefs, true /* is_primary */, - ybScan->handle); - } + /* Set up binds */ + if (!YbBindScanKeys(ybScan, &scan_plan) || !YbBindHashKeys(ybScan)) + { + ybScan->quit_scan = true; + bms_free(scan_plan.hash_key); + bms_free(scan_plan.primary_key); + bms_free(scan_plan.sk_cols); + return ybScan; + } - if (idx_pushdown != NULL) - { - YbDmlAppendQuals(idx_pushdown->quals, false /* is_primary */, - ybScan->handle); - YbDmlAppendColumnRefs(idx_pushdown->colrefs, false /* is_primary */, - ybScan->handle); - } + /* + * Set up targets. There are two separate cases: + * - aggregate pushdown + * - not aggregate pushdown + * This ought to be reworked once aggregate pushdown supports a mix of + * non-aggregate and aggregate targets. + */ + if (aggrefs != NIL) + YbDmlAppendTargetsAggregate(aggrefs, ybScan->target_desc, index, + ybScan->handle); + else + ybcSetupTargets(ybScan, &scan_plan, pg_scan_plan); - /* - * Set the current syscatalog version (will check that we are up to date). - * Avoid it for syscatalog tables so that we can still use this for - * refreshing the caches when we are behind. - * Note: This works because we do not allow modifying schemas (alter/drop) - * for system catalog tables. - */ - if (!IsSystemRelation(relation)) - YbSetCatalogCacheVersion( - ybScan->handle, YbGetCatalogCacheVersion()); - } else - ybScan->quit_scan = true; + /* + * Set up pushdown expressions. + */ + if (rel_pushdown != NULL) + { + YbDmlAppendQuals(rel_pushdown->quals, true /* is_primary */, + ybScan->handle); + YbDmlAppendColumnRefs(rel_pushdown->colrefs, true /* is_primary */, + ybScan->handle); + } + if (idx_pushdown != NULL) + { + YbDmlAppendQuals(idx_pushdown->quals, false /* is_primary */, + ybScan->handle); + YbDmlAppendColumnRefs(idx_pushdown->colrefs, false /* is_primary */, + ybScan->handle); + } + /* + * Set the current syscatalog version (will check that we are up to + * date). Avoid it for syscatalog tables so that we can still use this + * for refreshing the caches when we are behind. + * Note: This works because we do not allow modifying schemas + * (alter/drop) for system catalog tables. + */ + if (!IsSystemRelation(relation)) + YbSetCatalogCacheVersion(ybScan->handle, + YbGetCatalogCacheVersion()); bms_free(scan_plan.hash_key); bms_free(scan_plan.primary_key); bms_free(scan_plan.sk_cols); - return ybScan; } @@ -2576,20 +2742,8 @@ SysScanDesc ybc_systable_beginscan(Relation relation, * - When selecting using INDEX, the key values are bound to the IndexTable, so index attnum * must be used for bindings. */ - int i, j; - for (i = 0; i < nkeys; i++) - { - for (j = 0; j < IndexRelationGetNumberOfAttributes(index); j++) - { - if (key[i].sk_attno == index->rd_index->indkey.values[j]) - { - key[i].sk_attno = j + 1; - break; - } - } - if (j == IndexRelationGetNumberOfAttributes(index)) - elog(ERROR, "column is not in index"); - } + for (int i = 0; i < nkeys; ++i) + key[i].sk_attno = YbGetIndexAttnum(key[i].sk_attno, index); } } @@ -2601,7 +2755,9 @@ SysScanDesc ybc_systable_beginscan(Relation relation, key, pg_scan_plan, NULL /* rel_pushdown */, - NULL /* idx_pushdown */); + NULL /* idx_pushdown */, + NULL /* aggrefs */, + NULL /* exec_params */); /* Set up Postgres sys table scan description */ SysScanDesc scan_desc = (SysScanDesc) palloc0(sizeof(SysScanDescData)); @@ -2652,7 +2808,9 @@ TableScanDesc ybc_heap_beginscan(Relation relation, key, pg_scan_plan, NULL /* rel_pushdown */, - NULL /* idx_pushdown */); + NULL /* idx_pushdown */, + NULL /* aggrefs */, + NULL /* exec_params */); /* Set up Postgres sys table scan description */ TableScanDesc tsdesc = (TableScanDesc)ybScan; @@ -2699,7 +2857,9 @@ TableScanDesc ybc_remote_beginscan(Relation relation, Snapshot snapshot, Scan *pg_scan_plan, - PushdownExprs *pushdown) + PushdownExprs *pushdown, + List *aggrefs, + YBCPgExecParameters *exec_params) { YbScanDesc ybScan = ybcBeginScan(relation, NULL /* index */, @@ -2708,7 +2868,9 @@ ybc_remote_beginscan(Relation relation, NULL /* key */, pg_scan_plan, pushdown /* rel_pushdown */, - NULL /* idx_pushdown */); + NULL /* idx_pushdown */, + aggrefs, + exec_params); /* Set up Postgres sys table scan description */ TableScanDesc tsdesc = (TableScanDesc)ybScan; @@ -3191,25 +3353,19 @@ YbFetchHeapTuple(Relation relation, ItemPointer tid, HeapTuple* tuple) } TM_Result -YBCLockTuple(Relation relation, Datum ybctid, RowMarkType mode, LockWaitPolicy wait_policy, - EState* estate) +YBCLockTuple(Relation relation, Datum ybctid, RowMarkType mode, LockWaitPolicy pg_wait_policy, + EState* estate) { - if (wait_policy == LockWaitBlock && !YBIsWaitQueueEnabled()) { - /* - * If wait-queues are not enabled, we default to the "Fail-on-Conflict" policy which is mapped - * to LockWaitError right now (see WaitPolicy proto for meaning of "Fail-on-Conflict" and the - * reason why LockWaitError is not mapped to no-wait semantics but to Fail-on-Conflict - * semantics). - */ - wait_policy = LockWaitError; - } + int docdb_wait_policy; + + YBSetRowLockPolicy(&docdb_wait_policy, pg_wait_policy); YBCPgStatement ybc_stmt; HandleYBStatus(YBCPgNewSelect(YBCGetDatabaseOid(relation), - RelationGetRelid(relation), - NULL /* prepare_params */, - YBCIsRegionLocal(relation), - &ybc_stmt)); + RelationGetRelid(relation), + NULL /* prepare_params */, + YBCIsRegionLocal(relation), + &ybc_stmt)); /* Bind ybctid to identify the current row. */ YBCPgExpr ybctid_expr = YBCNewConstant(ybc_stmt, BYTEAOID, InvalidOid, ybctid, false); @@ -3218,8 +3374,9 @@ YBCLockTuple(Relation relation, Datum ybctid, RowMarkType mode, LockWaitPolicy w YBCPgExecParameters exec_params = {0}; exec_params.limit_count = 1; exec_params.rowmark = mode; - exec_params.wait_policy = wait_policy; - exec_params.stmt_in_txn_limit_ht_for_reads = + exec_params.pg_wait_policy = pg_wait_policy; + exec_params.docdb_wait_policy = docdb_wait_policy; + exec_params.stmt_in_txn_limit_ht_for_reads = estate->yb_exec_params.stmt_in_txn_limit_ht_for_reads; TM_Result res = TM_Ok; @@ -3255,8 +3412,9 @@ YBCLockTuple(Relation relation, Datum ybctid, RowMarkType mode, LockWaitPolicy w MemoryContext error_context = MemoryContextSwitchTo(exec_context); ErrorData* edata = CopyErrorData(); - elog(DEBUG2, "Error when trying to lock row. wait_policy=%d txn_errcode=%d message=%s", - wait_policy, edata->yb_txn_errcode, edata->message); + elog(DEBUG2, "Error when trying to lock row. " + "pg_wait_policy=%d docdb_wait_policy=%d txn_errcode=%d message=%s", + pg_wait_policy, docdb_wait_policy, edata->yb_txn_errcode, edata->message); if (YBCIsTxnConflictError(edata->yb_txn_errcode)) res = TM_Updated; diff --git a/src/postgres/src/backend/access/ybgin/ybginget.c b/src/postgres/src/backend/access/ybgin/ybginget.c index bc76dfa4a6dc..f7969dc3ed98 100644 --- a/src/postgres/src/backend/access/ybgin/ybginget.c +++ b/src/postgres/src/backend/access/ybgin/ybginget.c @@ -518,7 +518,17 @@ ybginDoFirstExec(IndexScanDesc scan, ScanDirection dir) ybginSetupBinds(scan); /* targets */ - ybginSetupTargets(scan); + if (scan->yb_aggrefs != NIL) + /* + * As of 2023-06-28, aggregate pushdown is only implemented for + * IndexOnlyScan, not IndexScan. + */ + YbDmlAppendTargetsAggregate(scan->yb_aggrefs, + RelationGetDescr(scan->indexRelation), + scan->indexRelation, + ybso->handle); + else + ybginSetupTargets(scan); YbSetCatalogCacheVersion(ybso->handle, YbGetCatalogCacheVersion()); @@ -593,6 +603,36 @@ ybgingettuple(IndexScanDesc scan, ScanDirection dir) /* fetch */ YbItemPointerSetInvalid(&scan->xs_heaptid); + if (scan->yb_aggrefs) + { + /* + * As of 2023-06-28, aggregate pushdown is only implemented for + * IndexOnlyScan, not IndexScan. Also, this codepath is not exercised + * because such queries hit error "non-default search mode" when + * setting up binds. + */ + Assert(scan->xs_want_itup); + + /* + * TODO(jason): don't assume that recheck is needed. + */ + scan->xs_recheck = true; + + /* + * Aggregate pushdown directly modifies the scan slot rather than + * passing it through xs_hitup or xs_itup. + * + * The index id passed into ybFetchNext is likely not going to be used + * as it is only used for system table scans, which have oid, and there + * shouldn't exist any system table secondary indexes that index the + * oid column. + * TODO(jason): deduplicate with ybcingettuple. + */ + scan->yb_agg_slot = + ybFetchNext(ybso->handle, scan->yb_agg_slot, + RelationGetRelid(scan->indexRelation)); + return !TTS_EMPTY(scan->yb_agg_slot); + } while (HeapTupleIsValid(tup = ybginFetchNextHeapTuple(scan))) { if (true) /* TODO(jason): don't assume a match. */ diff --git a/src/postgres/src/backend/bootstrap/bootstrap.c b/src/postgres/src/backend/bootstrap/bootstrap.c index fff4a8d44402..5754d51bdef1 100644 --- a/src/postgres/src/backend/bootstrap/bootstrap.c +++ b/src/postgres/src/backend/bootstrap/bootstrap.c @@ -381,7 +381,8 @@ BootstrapModeMain(int argc, char *argv[], bool check_only) "template1", InvalidOid, YbFirstBootstrapObjectId, - false /* colocated */); + false /* colocated */, + NULL /* retry_on_oid_collision */); } /* diff --git a/src/postgres/src/backend/catalog/index.c b/src/postgres/src/backend/catalog/index.c index 27bceea85e39..e2cf65908de9 100644 --- a/src/postgres/src/backend/catalog/index.c +++ b/src/postgres/src/backend/catalog/index.c @@ -4239,7 +4239,8 @@ reindex_relation(Oid relid, int flags, ReindexParams *params) Relation new_rel = table_open(YbGetStorageRelid(rel), AccessExclusiveLock); AttrMap *new_to_old_attmap = build_attrmap_by_name(RelationGetDescr(new_rel), - RelationGetDescr(rel)); + RelationGetDescr(rel), + false /* yb_ignore_type_mismatch */); table_close(new_rel, AccessExclusiveLock); YbDropAndRecreateIndex(indexOid, relid, rel, new_to_old_attmap); RemoveReindexPending(indexOid); diff --git a/src/postgres/src/backend/catalog/partition.c b/src/postgres/src/backend/catalog/partition.c index 0d988f21c18c..8f63cc6ae896 100644 --- a/src/postgres/src/backend/catalog/partition.c +++ b/src/postgres/src/backend/catalog/partition.c @@ -232,7 +232,8 @@ map_partition_varattnos(List *expr, int fromrel_varno, * Need to pass "yb_ignore_type_mismatch" else where for this process. */ part_attmap = build_attrmap_by_name(RelationGetDescr(to_rel), - RelationGetDescr(from_rel)); + RelationGetDescr(from_rel), + false /* yb_ignore_type_mismatch */); expr = (List *) map_variable_attnos((Node *) expr, fromrel_varno, 0, diff --git a/src/postgres/src/backend/catalog/yb_catalog/yb_type.c b/src/postgres/src/backend/catalog/yb_catalog/yb_type.c index 3b36f5383bdf..0c45585e95ee 100644 --- a/src/postgres/src/backend/catalog/yb_catalog/yb_type.c +++ b/src/postgres/src/backend/catalog/yb_catalog/yb_type.c @@ -1505,3 +1505,42 @@ void YbGetTypeTable(const YBCPgTypeEntity **type_table, int *count) { *type_table = YbTypeEntityTable; *count = sizeof(YbTypeEntityTable)/sizeof(YBCPgTypeEntity); } + +int64_t +YbPostgresEpochToUnixEpoch(int64_t postgres_t) +{ + return postgres_t + ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * USECS_PER_DAY); + +} + +int64_t +YbUnixEpochToPostgresEpoch(int64_t unix_t) +{ + return unix_t - ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * USECS_PER_DAY); +} + +/* + * This function creates a TEXTARRAY datum from the given array of C strings. + * It takes an array of pointers to C strings, the number of elements in the + * array, and pointers to store the resulting datum and its length. The + * resulting TEXTARRAY datum is palloc'd, and it contains a copy of the input + * strings. Therefore, the input strings themselves do not need to be palloc'd. + */ +void +YbConstructTextArrayDatum(const char **strings, const int nelems, + char **datum, size_t *len) +{ + ArrayType *array; + Datum *elems = NULL; + + if (nelems > 0) { + elems = (Datum *) palloc(nelems * sizeof(Datum)); + for (int i = 0; i < nelems; i++) + elems[i] = CStringGetTextDatum(strings[i]); + } + + array = construct_array(elems, nelems, TEXTOID, -1, false, 'i'); + + *datum = VARDATA_ANY(array); + *len = VARSIZE_ANY_EXHDR(array); +} diff --git a/src/postgres/src/backend/catalog/yb_system_views.sql b/src/postgres/src/backend/catalog/yb_system_views.sql index dcb2e8b28a35..810f44d87c40 100644 --- a/src/postgres/src/backend/catalog/yb_system_views.sql +++ b/src/postgres/src/backend/catalog/yb_system_views.sql @@ -28,6 +28,41 @@ SELECT FROM yb_pg_stat_get_queries(NULL) AS S LEFT JOIN pg_database AS D ON (S.db_oid = D.oid); +-- YB_TODO(Arpan) Need to use this new definition for pg_locks without breaking initdb +-- +-- CREATE VIEW pg_locks AS +-- SELECT l.locktype, +-- l.database, +-- l.relation, +-- null::int AS page, +-- null::smallint AS tuple, +-- null::text AS virtualxid, +-- null::xid AS transactionid, +-- null::oid AS classid, +-- null::oid AS objid, +-- null::smallint AS objsubid, +-- null::text AS virtualtransaction, +-- l.pid, +-- array_to_string(mode, ',') AS mode, +-- l.granted, +-- l.fastpath, +-- l.waitstart, +-- l.waitend, +-- jsonb_build_object('node', l.node, +-- 'transactionid', l.transaction_id, +-- 'subtransaction_id', l.subtransaction_id, +-- 'is_explicit', l.is_explicit, +-- 'tablet_id', l.tablet_id, +-- 'blocked_by', l.blocked_by, +-- 'keyrangedetails', jsonb_build_object( +-- 'cols', to_jsonb(l.hash_cols || l.range_cols), +-- 'attnum', l.attnum, +-- 'column_id', l.column_id, +-- 'multiple_rows_locked', l.multiple_rows_locked +-- ) +-- ) AS ybdetails +-- FROM yb_lock_status(null, null) AS l; + CREATE OR REPLACE FUNCTION yb_is_database_colocated(check_legacy boolean DEFAULT false) RETURNS boolean diff --git a/src/postgres/src/backend/commands/copy.c b/src/postgres/src/backend/commands/copy.c index 9e53a18d724c..4fedafc9e179 100644 --- a/src/postgres/src/backend/commands/copy.c +++ b/src/postgres/src/backend/commands/copy.c @@ -473,32 +473,6 @@ ProcessCopyOptions(ParseState *pstate, errorConflictingDefElem(defel, pstate); opts_out->delim = defGetString(defel); } - else if (strcmp(defel->defname, "rows_per_transaction") == 0) - { - int rows = defGetInt32(defel); - if (rows >= 0) - opts_out->batch_size = rows; - else - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("argument to option \"%s\" must be a positive integer", defel->defname), - parser_errposition(pstate, defel->location))); - } - else if (strcmp(defel->defname, "skip") == 0) - { - int64_t num_initial_skipped_rows = defGetInt64(defel); - if (num_initial_skipped_rows >= 0) - opts_out->num_initial_skipped_rows = num_initial_skipped_rows; - else - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("argument to option \"%s\" must be a nonnegative integer", defel->defname), - parser_errposition(pstate, defel->location))); - } - else if (strcmp(defel->defname, "disable_fk_check") == 0) - opts_out->disable_fk_check = true; - else if (strcmp(defel->defname, "replace") == 0) - opts_out->on_conflict_action = ONCONFLICT_YB_REPLACE; else if (strcmp(defel->defname, "null") == 0) { if (opts_out->null_print) @@ -596,6 +570,32 @@ ProcessCopyOptions(ParseState *pstate, defel->defname), parser_errposition(pstate, defel->location))); } + else if (strcmp(defel->defname, "rows_per_transaction") == 0) + { + int rows = defGetInt32(defel); + if (rows >= 0) + opts_out->batch_size = rows; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be a positive integer", defel->defname), + parser_errposition(pstate, defel->location))); + } + else if (strcmp(defel->defname, "skip") == 0) + { + int64_t num_initial_skipped_rows = defGetInt64(defel); + if (num_initial_skipped_rows >= 0) + opts_out->num_initial_skipped_rows = num_initial_skipped_rows; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be a nonnegative integer", defel->defname), + parser_errposition(pstate, defel->location))); + } + else if (strcmp(defel->defname, "disable_fk_check") == 0) + opts_out->disable_fk_check = true; + else if (strcmp(defel->defname, "replace") == 0) + opts_out->on_conflict_action = ONCONFLICT_YB_REPLACE; else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), diff --git a/src/postgres/src/backend/commands/copyfrom.c b/src/postgres/src/backend/commands/copyfrom.c index 0fa6004da06c..8ee391d2abde 100644 --- a/src/postgres/src/backend/commands/copyfrom.c +++ b/src/postgres/src/backend/commands/copyfrom.c @@ -56,8 +56,6 @@ /* Yugabyte includes */ #include "executor/ybcModifyTable.h" -/* YB_TODO(ena & mihnea@yugabyte) Rework COPY command */ - /* * No more than this many tuples per CopyMultiInsertBuffer * @@ -121,6 +119,8 @@ CopyFromErrorCallback(void *arg) { CopyFromState cstate = (CopyFromState) arg; + pgstat_progress_update_param(PROGRESS_COPY_STATUS, CP_ERROR); + if (cstate->opts.binary) { /* can't usefully display the data */ @@ -543,7 +543,7 @@ CopyFrom(CopyFromState cstate) ModifyTableState *mtstate; ExprContext *econtext; TupleTableSlot *singleslot = NULL; - MemoryContext oldcontext = CurrentMemoryContext; + MemoryContext oldcontext = GetCurrentMemoryContext(); PartitionTupleRouting *proute = NULL; ErrorContextCallback errcallback; @@ -558,9 +558,23 @@ CopyFrom(CopyFromState cstate) bool has_instead_insert_row_trig; bool leafpart_use_multi_insert = false; + /* Yb variables */ + bool useYBMultiInsert; + bool useNonTxnInsert; + bool has_more_tuples; + Assert(cstate->rel); Assert(list_length(cstate->range_table) == 1); + /* + * If the batch size is not explicitly set in the query by the user, + * use the session variable value. + */ + if (cstate->opts.batch_size < 0) + { + cstate->opts.batch_size = yb_default_copy_from_rows_per_transaction; + } + /* * The target must be a plain, foreign, or partitioned relation, or have * an INSTEAD OF INSERT row trigger. (Currently, such triggers are only @@ -665,6 +679,7 @@ CopyFrom(CopyFromState cstate) ExecInitRangeTable(estate, cstate->range_table); resultRelInfo = target_resultRelInfo = makeNode(ResultRelInfo); ExecInitResultRelation(estate, resultRelInfo, 1); + estate->yb_es_is_fk_check_disabled = cstate->opts.disable_fk_check; /* Verify the named relation is a valid target for INSERT */ CheckValidResultRel(resultRelInfo, CMD_INSERT); @@ -715,6 +730,48 @@ CopyFrom(CopyFromState cstate) cstate->qualexpr = ExecInitQual(castNode(List, cstate->whereClause), &mtstate->ps); + if (cstate->opts.batch_size > 0) + { + /* + * Batched copy is not supported + * under the following use cases in which case + * all rows will be copied over in a single transaction. + */ + int batch_size = 0; + + if (!IsYBRelation(resultRelInfo->ri_RelationDesc)) + { + Assert(resultRelInfo->ri_RelationDesc->rd_rel->relpersistence == RELPERSISTENCE_TEMP || + resultRelInfo->ri_RelationDesc->rd_rel->relkind == RELKIND_FOREIGN_TABLE); + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Batched COPY is not supported on %s tables. " + "Defaulting to using one transaction for the entire copy.", + YbIsTempRelation(resultRelInfo->ri_RelationDesc) ? "temporary" : "foreign"), + errhint("Either copy onto non-temporary table or set rows_per_transaction " + "option to `0` to disable batching and remove this warning."))); + } + else if (YBIsDataSent()) + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Batched COPY is not supported in transaction blocks. " + "Defaulting to using one transaction for the entire copy."), + errhint("Either run this COPY outside of a transaction block or set " + "rows_per_transaction option to `0` to disable batching and " + "remove this warning."))); + else if (HasNonRITrigger(cstate->rel->trigdesc)) + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Batched COPY is not supported on table with non RI trigger. " + "Defaulting to using one transaction for the entire copy."), + errhint("Set rows_per_transaction option to `0` to disable batching " + "and remove this warning."))); + else + batch_size = cstate->opts.batch_size; + + cstate->opts.batch_size = batch_size; + } + /* * It's generally more efficient to prepare a bunch of tuples for * insertion, and insert them in one table_multi_insert() call, than call @@ -784,6 +841,7 @@ CopyFrom(CopyFromState cstate) * flag that we must later determine if we can use bulk-inserts for * the partition being inserted into. */ + useYBMultiInsert = IsYBRelation(resultRelInfo->ri_RelationDesc); if (proute) insertMethod = CIM_MULTI_CONDITIONAL; else @@ -793,6 +851,21 @@ CopyFrom(CopyFromState cstate) estate, mycid, ti_options); } + /* + * Only use non-txn insert if it's explicitly enabled, the relation meets criteria for + * multi insert (e.g. no triggers), and the relation does not have secondary indices. + */ + if (YBIsNonTxnCopyEnabled() && + useYBMultiInsert && + !YBCRelInfoHasSecondaryIndices(resultRelInfo)) + { + useNonTxnInsert = true; + } + else + { + useNonTxnInsert = false; + } + /* * If not using batch mode (which allocates slots as needed) set up a * tuple slot too. When inserting into a partitioned table, we also need @@ -828,18 +901,51 @@ CopyFrom(CopyFromState cstate) errcallback.previous = error_context_stack; error_context_stack = &errcallback; - for (;;) + /* Warn if non-txn COPY enabled and relation does not meet non-txn criteria. */ + if (YBIsNonTxnCopyEnabled() && !useNonTxnInsert) + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("non-transactional COPY is not supported on this relation; " + "using transactional COPY instead"), + errhint("Non-transactional COPY is not supported on relations with " + "secondary indices or triggers."))); + + has_more_tuples = true; + + /* Skip num_initial_skipped_rows. */ + for (uint64 i = 0; i < cstate->opts.num_initial_skipped_rows; i++) + { + has_more_tuples = NextCopyFrom(cstate, econtext, NULL, NULL, true /* skip_row */); + if (!has_more_tuples) + break; + } + + /* + * When batch size is not provided from the query option, + * default behavior is to read each line from the file + * until no more lines are left. If batch size is provided, + * lines will be read in batch sizes at a time. + */ +yb_process_more_batches: + for (int i = 0; cstate->opts.batch_size == 0 || i < cstate->opts.batch_size; i++) { TupleTableSlot *myslot; bool skip_tuple; + if (IsYBRelation(resultRelInfo->ri_RelationDesc)) + MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + CHECK_FOR_INTERRUPTS(); - /* - * Reset the per-tuple exprcontext. We do this after every tuple, to - * clean-up after expression evaluations etc. - */ - ResetPerTupleExprContext(estate); + /* YB_REVIEW(neil) Find equivalent code for (nBufferedTuples == 0) */ + if (!IsYBRelation(resultRelInfo->ri_RelationDesc)) + { + /* + * Reset the per-tuple exprcontext. We do this after every tuple, to + * clean-up after expression evaluations etc. + */ + ResetPerTupleExprContext(estate); + } /* select slot to (initially) load row into */ if (insertMethod == CIM_SINGLE || proute) @@ -860,13 +966,15 @@ CopyFrom(CopyFromState cstate) * Switch to per-tuple context before calling NextCopyFrom, which does * evaluate default expressions etc. and requires per-tuple context. */ - MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + if (!IsYBRelation(resultRelInfo->ri_RelationDesc)) + MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); ExecClearTuple(myslot); /* Directly store the values/nulls array in the slot */ - if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull, - true /* skip_row */)) + has_more_tuples = NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull, + true /* skip_row */); + if (!has_more_tuples) break; ExecStoreVirtualTuple(myslot); @@ -878,7 +986,8 @@ CopyFrom(CopyFromState cstate) myslot->tts_tableOid = RelationGetRelid(target_resultRelInfo->ri_RelationDesc); /* Triggers and stuff need to be invoked in query context. */ - MemoryContextSwitchTo(oldcontext); + if (!IsYBRelation(resultRelInfo->ri_RelationDesc)) + MemoryContextSwitchTo(oldcontext); if (cstate->whereClause) { @@ -1007,6 +1116,14 @@ CopyFrom(CopyFromState cstate) } } + /* + * Tuple memory will be allocated to per row memory context + * which will be cleaned up after every row gets processed. + * Thus there is no need to clean the tuple memory. + */ + if (IsYBRelation(resultRelInfo->ri_RelationDesc)) + myslot->tts_flags &= ~TTS_FLAG_SHOULDFREE; + /* ensure that triggers etc see the right relation */ myslot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); } @@ -1084,13 +1201,36 @@ CopyFrom(CopyFromState cstate) List *recheckIndexes = NIL; /* OK, store the tuple */ - if (resultRelInfo->ri_FdwRoutine != NULL) + if (IsYBRelation(resultRelInfo->ri_RelationDesc)) + { + /* YB_REVIEW(neil) Change executor to work with slot */ + TupleDesc tupDesc = RelationGetDescr(cstate->rel); + HeapTuple tuple = ExecCopySlotHeapTuple(myslot); + if (useNonTxnInsert) + { + YBCExecuteNonTxnInsert(resultRelInfo->ri_RelationDesc, + tupDesc, + tuple, + cstate->opts.on_conflict_action); + } + else + { + YBCExecuteInsert(resultRelInfo->ri_RelationDesc, + tupDesc, + tuple, + cstate->opts.on_conflict_action); + } + } + else if (resultRelInfo->ri_FdwRoutine != NULL) { + MemoryContext saved_context; + saved_context = MemoryContextSwitchTo(estate->es_query_cxt); myslot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate, resultRelInfo, myslot, NULL); + MemoryContextSwitchTo(saved_context); if (myslot == NULL) /* "do nothing" */ continue; /* next tuple please */ @@ -1126,6 +1266,12 @@ CopyFrom(CopyFromState cstate) } } + /* + * Free context per row. + */ + if (IsYBRelation(cstate->rel)) + ResetPerTupleExprContext(estate); + /* * We count only tuples not suppressed by a BEFORE INSERT trigger * or FDW; this is the same definition used by nodeModifyTable.c @@ -1134,9 +1280,41 @@ CopyFrom(CopyFromState cstate) */ pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, ++processed); + pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed); } } + if (cstate->opts.batch_size > 0) + { + /* + * Handle queued AFTER triggers before committing. If there are errors, + * do not commit the current batch. + */ + AfterTriggerEndQuery(estate); + + /* + * Commit transaction per batch. + * When CopyFrom method is called, we are already inside a transaction block + * and relevant transaction state properties have been previously set. + */ + YBCCommitTransaction(); + + /* Update progress of the COPY command as well. + */ + YBInitializeTransaction(); + + /* Start a new AFTER trigger */ + AfterTriggerBeginQuery(); + } + else + { + /* We need to flush buffered operations so that error callback is executed */ + YBFlushBufferedOperations(); + } + + if (has_more_tuples) + goto yb_process_more_batches; + /* Flush any remaining buffered tuples */ if (insertMethod != CIM_SINGLE) { @@ -1237,7 +1415,7 @@ BeginCopyFrom(ParseState *pstate, * We allocate everything used by a cstate in a new memory context. This * avoids memory leaks during repeated use of COPY in a query. */ - cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext, + cstate->copycontext = AllocSetContextCreate(GetCurrentMemoryContext(), "COPY", ALLOCSET_DEFAULT_SIZES); @@ -1351,6 +1529,7 @@ BeginCopyFrom(ParseState *pstate, } cstate->copy_src = COPY_FILE; /* default */ + pgstat_progress_update_param(PROGRESS_COPY_STATUS, CP_IN_PROG); cstate->whereClause = whereClause; @@ -1492,7 +1671,20 @@ BeginCopyFrom(ParseState *pstate, progress_vals[1] = PROGRESS_COPY_TYPE_PIPE; Assert(!is_program); /* the grammar does not allow this */ if (whereToSendOutput == DestRemote) + { + bool isDataSent = YBIsDataSent(); + bool isDataSentForCurrQuery = YBIsDataSentForCurrQuery(); ReceiveCopyBegin(cstate); + /* + * ReceiveCopyBegin sends a message back to the client + * with the expected format of the copy data. + * This implicitly causes YB data to be marked as sent + * although the message does not contain any data from YB. + * So we can safely roll back YBIsDataSent to its previous value. + */ + if (!isDataSent) YBMarkDataNotSent(); + if (!isDataSentForCurrQuery) YBMarkDataNotSentForCurrQuery(); + } else cstate->copy_file = stdin; } @@ -1588,6 +1780,7 @@ EndCopyFrom(CopyFromState cstate) } pgstat_progress_end_command(); + pgstat_progress_update_param(PROGRESS_COPY_STATUS, CP_SUCCESS); MemoryContextDelete(cstate->copycontext); pfree(cstate); diff --git a/src/postgres/src/backend/commands/copyfromparse.c b/src/postgres/src/backend/commands/copyfromparse.c index 5bb6cd37e389..3f3c3a9a2b41 100644 --- a/src/postgres/src/backend/commands/copyfromparse.c +++ b/src/postgres/src/backend/commands/copyfromparse.c @@ -848,6 +848,10 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields) * * 'values' and 'nulls' arrays must be the same length as columns of the * relation passed to BeginCopyFrom. This function fills the arrays. + * + * 'skip_row' is used to specify whether we should skip format checking for + * this row. In particular, if 'skip_row' is true, we will not raise error + * upon reading an invalid row. */ bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext, @@ -868,8 +872,11 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, attr_count = list_length(cstate->attnumlist); /* Initialize all values for row to NULL */ - MemSet(values, 0, num_phys_attrs * sizeof(Datum)); - MemSet(nulls, true, num_phys_attrs * sizeof(bool)); + if (!skip_row) + { + MemSet(values, 0, num_phys_attrs * sizeof(Datum)); + MemSet(nulls, true, num_phys_attrs * sizeof(bool)); + } if (!cstate->opts.binary) { @@ -1021,7 +1028,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, * per-tuple memory context in it. */ Assert(econtext != NULL); - Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory); + Assert(GetCurrentMemoryContext() == econtext->ecxt_per_tuple_memory); values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext, &nulls[defmap[i]]); diff --git a/src/postgres/src/backend/commands/copyto.c b/src/postgres/src/backend/commands/copyto.c index fca29a9a1050..4a7e33bbe5a0 100644 --- a/src/postgres/src/backend/commands/copyto.c +++ b/src/postgres/src/backend/commands/copyto.c @@ -329,6 +329,7 @@ EndCopy(CopyToState cstate) } pgstat_progress_end_command(); + pgstat_progress_update_param(PROGRESS_COPY_STATUS, CP_SUCCESS); MemoryContextDelete(cstate->copycontext); pfree(cstate); @@ -407,7 +408,7 @@ BeginCopyTo(ParseState *pstate, * We allocate everything used by a cstate in a new memory context. This * avoids memory leaks during repeated use of COPY in a query. */ - cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext, + cstate->copycontext = AllocSetContextCreate(GetCurrentMemoryContext(), "COPY", ALLOCSET_DEFAULT_SIZES); @@ -656,6 +657,11 @@ BeginCopyTo(ParseState *pstate, cstate->copy_dest = COPY_FILE; /* default */ + /* YB_REVIEW() Stat for PROGRESS_COPY_STATUS comes from + * https://phabricator.dev.yugabyte.com/D17504 + */ + pgstat_progress_update_param(PROGRESS_COPY_STATUS, CP_IN_PROG); + if (pipe) { progress_vals[1] = PROGRESS_COPY_TYPE_PIPE; @@ -815,7 +821,7 @@ DoCopyTo(CopyToState cstate) * datatype output routines, and should be faster than retail pfree's * anyway. (We don't need a whole econtext as CopyFrom does.) */ - cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext, + cstate->rowcontext = AllocSetContextCreate(GetCurrentMemoryContext(), "COPY TO", ALLOCSET_DEFAULT_SIZES); @@ -875,10 +881,26 @@ DoCopyTo(CopyToState cstate) { TupleTableSlot *slot; TableScanDesc scandesc; + bool is_yb_relation; + MemoryContext oldcontext; + MemoryContext yb_context; scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL); slot = table_slot_create(cstate->rel, NULL); + /* + * Create and switch to a temporary memory context that we can reset + * once per row to recover Yugabyte palloc'd memory. + */ + is_yb_relation = IsYBRelation(cstate->rel); + if (is_yb_relation) + { + yb_context = AllocSetContextCreate(GetCurrentMemoryContext(), + "COPY TO (YB)", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(yb_context); + } + processed = 0; while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot)) { @@ -896,6 +918,9 @@ DoCopyTo(CopyToState cstate) */ pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, ++processed); + /* Free Yugabyte memory for this row */ + if (is_yb_relation) + MemoryContextReset(yb_context); } ExecDropSingleTupleTableSlot(slot); diff --git a/src/postgres/src/backend/commands/createas.c b/src/postgres/src/backend/commands/createas.c index ca42fbc3b81a..8853ca81e86d 100644 --- a/src/postgres/src/backend/commands/createas.c +++ b/src/postgres/src/backend/commands/createas.c @@ -605,13 +605,7 @@ intorel_receive(TupleTableSlot *slot, DestReceiver *self) if (IsYBRelation(myState->rel)) { - /* - * YB_TODO(neil@yugabyte) Write Yugabyte API to work with slot. - * - * Current Yugabyte API works with HeapTuple instead of slot. - * - Create tuple as a workaround to compile. - * - Pass slot to Yugabyte call once the API is fixed. - */ + /* YB_TODO(API for Slot) Wait for slot API */ bool shouldFree = true; HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); diff --git a/src/postgres/src/backend/commands/dbcommands.c b/src/postgres/src/backend/commands/dbcommands.c index 6ede3b0e3312..fbd2ac9b4c21 100644 --- a/src/postgres/src/backend/commands/dbcommands.c +++ b/src/postgres/src/backend/commands/dbcommands.c @@ -1393,12 +1393,30 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) } else { - /* Select an OID for the new database if is not explicitly configured. */ - do + /* + * In vanilla PG, OIDs are assigned by a cluster-wide counter. + * For YSQL, we allocate OIDs on a per-database level and share the + * per-database OID range on tserver for all databases. OID collision + * happens due to the same range of OIDs allocated to different tservers. + * OID collision can happen for CREATE DATABASE. If it happens, we want to + * keep retrying CREATE DATABASE using the next available OID. + * This is needed for xcluster. + */ + bool retry_on_oid_collision = false; + do { - dboid = GetNewOidWithIndex(pg_database_rel, DatabaseOidIndexId, - Anum_pg_database_oid); - } while (check_db_file_conflict(dboid)); + /* Select an OID for the new database if is not explicitly configured. */ + do + { + dboid = GetNewOidWithIndex(pg_database_rel, DatabaseOidIndexId, + Anum_pg_database_oid); + } while (check_db_file_conflict(dboid)); + + retry_on_oid_collision = false; + if (IsYugaByteEnabled()) + YBCCreateDatabase(dboid, dbname, src_dboid, InvalidOid, dbcolocated, + &retry_on_oid_collision); + } while (retry_on_oid_collision); } /* @@ -1427,9 +1445,6 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) new_record[Anum_pg_database_datminmxid - 1] = TransactionIdGetDatum(src_minmxid); new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace); - if (IsYugaByteEnabled()) - YBCCreateDatabase(dboid, dbname, src_dboid, InvalidOid, dbcolocated); - new_record[Anum_pg_database_datcollate - 1] = CStringGetTextDatum(dbcollate); new_record[Anum_pg_database_datctype - 1] = CStringGetTextDatum(dbctype); if (dbiculocale) diff --git a/src/postgres/src/backend/commands/explain.c b/src/postgres/src/backend/commands/explain.c index d2c76e897224..ce40ccb2108e 100644 --- a/src/postgres/src/backend/commands/explain.c +++ b/src/postgres/src/backend/commands/explain.c @@ -256,6 +256,52 @@ YbExplainRpcRequestStat(YbExplainState *yb_es, YbStatLabel label, double count, timing / 1000000.0, 3, yb_es->es); } +/* Maps a row mark type to a string. */ +static const char * +YbRowMarkTypeToPgsqlString(RowMarkType row_mark_type) +{ + switch (row_mark_type) + { + case ROW_MARK_EXCLUSIVE: + return "FOR UPDATE"; + case ROW_MARK_NOKEYEXCLUSIVE: + return "FOR NO KEY UPDATE"; + case ROW_MARK_SHARE: + return "FOR SHARE"; + case ROW_MARK_KEYSHARE: + return "FOR KEY SHARE"; + default: + return ""; + } +} + +/* Explains a scan lock using row marks. */ +static void +YbExplainScanLocks(YbLockMechanism yb_lock_mechanism, ExplainState *es) +{ + ListCell *l; + const char *lock_mode; + + if (yb_lock_mechanism == YB_NO_SCAN_LOCK) + return; + + foreach(l, es->pstmt->rowMarks) + { + PlanRowMark *erm = (PlanRowMark *) lfirst(l); + if (erm->markType != ROW_MARK_REFERENCE && + erm->markType != ROW_MARK_COPY) + { + lock_mode = YbRowMarkTypeToPgsqlString(erm->markType); + break; + } + } + + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, " (Locked %s)", lock_mode); + else + ExplainPropertyText("Lock Type", lock_mode, es); +} + /* * ExplainQuery - * execute an EXPLAIN command @@ -1653,10 +1699,12 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_ValuesScan: case T_CteScan: case T_WorkTableScan: + YbExplainScanLocks(((Scan *) plan)->yb_lock_mechanism, es); ExplainScanTarget((Scan *) plan, es); break; case T_ForeignScan: case T_CustomScan: + YbExplainScanLocks(((Scan *) plan)->yb_lock_mechanism, es); if (((Scan *) plan)->scanrelid > 0) ExplainScanTarget((Scan *) plan, es); break; @@ -1664,6 +1712,7 @@ ExplainNode(PlanState *planstate, List *ancestors, { IndexScan *indexscan = (IndexScan *) plan; + YbExplainScanLocks(((Scan *) plan)->yb_lock_mechanism, es); ExplainIndexScanDetails(indexscan->indexid, indexscan->indexorderdir, es); @@ -2280,6 +2329,14 @@ ExplainNode(PlanState *planstate, List *ancestors, break; } + /* YB aggregate pushdown */ + if (IsYugaByteEnabled()) + { + List **aggrefs = YbPlanStateTryGetAggrefs(planstate); + if (aggrefs && *aggrefs != NIL) + ExplainPropertyBool("Partial Aggregate", true, es); + } + /* * Prepare per-worker JIT instrumentation. As with the overall JIT * summary, this is printed only if printing costs is enabled. diff --git a/src/postgres/src/backend/commands/extension.c b/src/postgres/src/backend/commands/extension.c index da1235490403..afe23a59b32f 100644 --- a/src/postgres/src/backend/commands/extension.c +++ b/src/postgres/src/backend/commands/extension.c @@ -751,7 +751,7 @@ execute_sql_string(const char *sql) * limit the memory used when there are many commands in the string. */ per_parsetree_context = - AllocSetContextCreate(CurrentMemoryContext, + AllocSetContextCreate(GetCurrentMemoryContext(), "execute_sql_string per-statement context", ALLOCSET_DEFAULT_SIZES); oldcontext = MemoryContextSwitchTo(per_parsetree_context); diff --git a/src/postgres/src/backend/commands/functioncmds.c b/src/postgres/src/backend/commands/functioncmds.c index 94a8c4145bff..18c8642c5cbf 100644 --- a/src/postgres/src/backend/commands/functioncmds.c +++ b/src/postgres/src/backend/commands/functioncmds.c @@ -2400,7 +2400,6 @@ CallStmtResultDesc(CallStmt *stmt) ObjectAddress AlterFunctionOwner(AlterOwnerStmt *stmt, Oid newOwnerId) { - Oid procId; Relation relation; ObjectAddress address; HeapTuple tup; @@ -2420,18 +2419,11 @@ AlterFunctionOwner(AlterOwnerStmt *stmt, Oid newOwnerId) (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("function with OID %u does not exist", address.objectId))); - /* YB_TODO(paullee@yugabyte) - * - OID is now a regular column. - * - Change code to select its value accordingly. - */ -#ifdef YB_TODO - procId = HeapTupleGetOid(tup); -#endif - procId = YB_HACK_INVALID_OID; - AlterFunctionOwner_internal(relation, tup, newOwnerId); - ObjectAddressSet(address, ProcedureRelationId, procId); + /* YB_TEST(neil) address should already have procid (address.objectid == tup->oid?) + * ObjectAddressSet(address, ProcedureRelationId, procId); + */ heap_freetuple(tup); @@ -2446,10 +2438,12 @@ AlterFunctionOwner(AlterOwnerStmt *stmt, Oid newOwnerId) void AlterFunctionOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId) { + Oid procId; Form_pg_proc proc; Oid namespaceId; proc = (Form_pg_proc) GETSTRUCT(tup); + procId = proc->oid; /* Assigning a function to the same owner is a no-op */ if (proc->proowner == newOwnerId) @@ -2484,17 +2478,13 @@ AlterFunctionOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId) proc->proowner = newOwnerId; CatalogTupleUpdate(rel, &tup->t_self, tup); - /* YB_TODO(paullee@yugabyte) - * - OID is now a regular column. - * - Change code to select its value accordingly. - */ /* Update owner dependency reference */ changeDependencyOnOwner(ProcedureRelationId, - YB_HACK_INVALID_OID /* HeapTupleGetOid(tup) */, + procId, newOwnerId); InvokeObjectPostAlterHook(ProcedureRelationId, - YB_HACK_INVALID_OID /* HeapTupleGetOid(tup) */, + procId, 0); } @@ -2543,12 +2533,8 @@ RenameFunction(RenameStmt *stmt, const char *newname) namestrcpy(&(((Form_pg_proc) GETSTRUCT(tup))->proname), newname); CatalogTupleUpdate(relation, &tup->t_self, tup); - /* YB_TODO(paullee@yugabyte) - * - OID is now a regular column. - * - Change code to select its value accordingly. - */ InvokeObjectPostAlterHook(ProcedureRelationId, - YB_HACK_INVALID_OID /* HeapTupleGetOid(tup) */, + proc->oid, 0); heap_freetuple(tup); diff --git a/src/postgres/src/backend/commands/indexcmds.c b/src/postgres/src/backend/commands/indexcmds.c index 63c3293906a5..f24066f57e7f 100644 --- a/src/postgres/src/backend/commands/indexcmds.c +++ b/src/postgres/src/backend/commands/indexcmds.c @@ -1577,13 +1577,10 @@ DefineIndex(Oid relationId, } childidxs = RelationGetIndexList(childrel); - - /* YB_TODO "convert_tuples_by_name_map" is no longer called here. - * Need to pass `false yb_ignore_type_mismatch` differently. - */ attmap = build_attrmap_by_name(RelationGetDescr(childrel), - parentDesc); + parentDesc, + false /* yb_ignore_type_mismatch */); foreach(cell, childidxs) { diff --git a/src/postgres/src/backend/commands/matview.c b/src/postgres/src/backend/commands/matview.c index db456180ec4c..5be40733a955 100644 --- a/src/postgres/src/backend/commands/matview.c +++ b/src/postgres/src/backend/commands/matview.c @@ -515,13 +515,7 @@ transientrel_receive(TupleTableSlot *slot, DestReceiver *self) */ if (IsYBRelation(myState->transientrel)) { - /* - * YB_TODO(neil@yugabyte) Write Yugabyte API to work with slot. - * - * Current Yugabyte API works with HeapTuple instead of slot. - * - Create tuple as a workaround to compile. - * - Pass slot to Yugabyte call once the API is fixed. - */ + /* YB_TODO(API for Slot) Wait for slot API */ bool shouldFree = true; HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); diff --git a/src/postgres/src/backend/commands/policy.c b/src/postgres/src/backend/commands/policy.c index 9403b40f3b52..b798b5c33894 100644 --- a/src/postgres/src/backend/commands/policy.c +++ b/src/postgres/src/backend/commands/policy.c @@ -209,7 +209,7 @@ RelationBuildRowSecurity(Relation relation) * a relcache flush. However, to cover the possibility of an error * partway through, we don't make the context long-lived till we're done. */ - rscxt = AllocSetContextCreate(CurrentMemoryContext, + rscxt = AllocSetContextCreate(GetCurrentMemoryContext(), "row security descriptor", ALLOCSET_SMALL_SIZES); MemoryContextCopyAndSetIdentifier(rscxt, diff --git a/src/postgres/src/backend/commands/tablecmds.c b/src/postgres/src/backend/commands/tablecmds.c index 3278f00bee5b..814d655add7b 100644 --- a/src/postgres/src/backend/commands/tablecmds.c +++ b/src/postgres/src/backend/commands/tablecmds.c @@ -1383,11 +1383,9 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, } } - /* YB_TODO "convert_tuples_by_name_map" is no longer called here. - * Need to pass `false yb_ignore_type_mismatch` differently. - */ attmap = build_attrmap_by_name(RelationGetDescr(rel), - RelationGetDescr(parent)); + RelationGetDescr(parent), + false /* yb_ignore_type_mismatch */); idxstmt = generateClonedIndexStmt(NULL, idxRel, attmap, &constraintOid); @@ -2147,7 +2145,7 @@ ExecuteTruncateGuts(List *explicit_rels, memset(&hctl, 0, sizeof(HASHCTL)); hctl.keysize = sizeof(Oid); hctl.entrysize = sizeof(ForeignTruncateInfo); - hctl.hcxt = CurrentMemoryContext; + hctl.hcxt = GetCurrentMemoryContext(); ft_htab = hash_create("TRUNCATE for Foreign Tables", 32, /* start small and extend */ @@ -10331,7 +10329,8 @@ addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint, Relation rel, CheckTableNotInUse(partition, "ALTER TABLE"); attmap = build_attrmap_by_name(RelationGetDescr(partition), - RelationGetDescr(rel)); + RelationGetDescr(rel), + false /* yb_ignore_type_mismatch */); for (int j = 0; j < numfks; j++) mapped_fkattnum[j] = attmap->attnums[fkattnum[j] - 1]; @@ -10539,7 +10538,8 @@ CloneFkReferenced(Relation parentRel, Relation partitionRel) trigrel = table_open(TriggerRelationId, RowExclusiveLock); attmap = build_attrmap_by_name(RelationGetDescr(partitionRel), - RelationGetDescr(parentRel)); + RelationGetDescr(parentRel), + false /* yb_ignore_type_mismatch */); foreach(cell, clone) { Oid constrOid = lfirst_oid(cell); @@ -10731,15 +10731,13 @@ CloneFkReferencing(List **wqueue, Relation parentRel, Relation partRel) */ trigrel = table_open(TriggerRelationId, RowExclusiveLock); - /* YB_TODO "convert_tuples_by_name_map" is no longer called here. - * Need to pass `false yb_ignore_type_mismatch` differently. - */ /* * The constraint key may differ, if the columns in the partition are * different. This map is used to convert them. */ attmap = build_attrmap_by_name(RelationGetDescr(partRel), - RelationGetDescr(parentRel)); + RelationGetDescr(parentRel), + false /* yb_ignore_type_mismatch */); partFKs = copyObject(RelationGetFKeyList(partRel)); @@ -12159,13 +12157,13 @@ YbFKTriggerScanBegin(TableScanDesc scan, { descr->vptr = &YbFKTriggerScanVTableIsYugaByteEnabled; descr->cxt = AllocSetContextCreate( - CurrentMemoryContext, "validateForeignKeyConstraint", ALLOCSET_DEFAULT_SIZES); + GetCurrentMemoryContext(), "validateForeignKeyConstraint", ALLOCSET_DEFAULT_SIZES); } else { descr->vptr = &YbFKTriggerScanVTableNotYugaByteEnabled; descr->cxt = AllocSetContextCreate( - CurrentMemoryContext, "validateForeignKeyConstraint", ALLOCSET_SMALL_SIZES); + GetCurrentMemoryContext(), "validateForeignKeyConstraint", ALLOCSET_SMALL_SIZES); } descr->old_cxt = MemoryContextSwitchTo(descr->cxt); return descr; @@ -13038,11 +13036,9 @@ ATPrepAlterColumnType(List **wqueue, /* create a copy to scribble on */ cmd = copyObject(cmd); - /* YB_TODO "convert_tuples_by_name_map" is no longer called here. - * Need to pass `false yb_ignore_type_mismatch` differently. - */ attmap = build_attrmap_by_name(RelationGetDescr(childrel), - RelationGetDescr(rel)); + RelationGetDescr(rel), + false /* yb_ignore_type_mismatch */); ((ColumnDef *) cmd->def)->cooked_default = map_variable_attnos(def->cooked_default, 1, 0, @@ -13512,11 +13508,6 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation *yb_mutable_rel, */ if (yb_clone_table) { - /* - * TODO(mislam): check for CDC and xCluster on the table and error out - * here. See https://github.com/yugabyte/yugabyte-db/issues/16625. - */ - *yb_mutable_rel = YbATCloneRelationSetColumnType( rel, colName, targetcollid, typeName, tab->newvals); @@ -13525,6 +13516,13 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation *yb_mutable_rel, * table. */ tab->relid = (*yb_mutable_rel)->rd_id; + + /* Update the altered column's attribute number. */ + HeapTuple attTup = + SearchSysCacheAttName(RelationGetRelid(*yb_mutable_rel), colName); + Assert(HeapTupleIsValid(attTup)); + attnum = ((Form_pg_attribute) GETSTRUCT(attTup))->attnum; + ReleaseSysCache(attTup); } /* @@ -13610,23 +13608,28 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation *yb_mutable_rel, ReleaseSysCache(typeTuple); + /* + * YB Note: Skip the steps below because datatype and collation + * dependencies for the table have already been created as part of the + * rewrite flow, so we do not need to install them here. + * Also, the pg_statistic entries were not cloned for the altered column, + * so we don't have any statistics to remove. + */ if (!yb_clone_table) { CatalogTupleUpdate(attrelation, &heapTup->t_self, heapTup); table_close(attrelation, RowExclusiveLock); - } - /* Install dependencies on new datatype and collation */ - add_column_datatype_dependency(RelationGetRelid(*yb_mutable_rel), attnum, - targettype); - add_column_collation_dependency(RelationGetRelid(*yb_mutable_rel), attnum, - targetcollid); + /* Install dependencies on new datatype and collation */ + add_column_datatype_dependency(RelationGetRelid(rel), attnum, targettype); + add_column_collation_dependency(RelationGetRelid(rel), attnum, targetcollid); - /* - * Drop any pg_statistic entry for the column, since it's now wrong type - */ - RemoveStatistics(RelationGetRelid(*yb_mutable_rel), attnum); + /* + * Drop any pg_statistic entry for the column, since it's now wrong type + */ + RemoveStatistics(RelationGetRelid(rel), attnum); + } InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(*yb_mutable_rel), attnum); @@ -18833,11 +18836,9 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel) /* construct an indexinfo to compare existing indexes against */ info = BuildIndexInfo(idxRel); - /* YB_TODO "convert_tuples_by_name_map" is no longer called here. - * Need to pass `false yb_ignore_type_mismatch` differently. - */ attmap = build_attrmap_by_name(RelationGetDescr(attachrel), - RelationGetDescr(rel)); + RelationGetDescr(rel), + false /* yb_ignore_type_mismatch */); constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(rel), idx); /* @@ -19777,11 +19778,9 @@ ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name) /* Ensure the indexes are compatible */ childInfo = BuildIndexInfo(partIdx); parentInfo = BuildIndexInfo(parentIdx); - /* YB_TODO "convert_tuples_by_name_map" is no longer called here. - * Need to pass `false yb_ignore_type_mismatch` differently. - */ attmap = build_attrmap_by_name(RelationGetDescr(partTbl), - RelationGetDescr(parentTbl)); + RelationGetDescr(parentTbl), + false /* yb_ignore_type_mismatch */); if (!CompareIndexInfo(childInfo, parentInfo, partIdx->rd_indcollation, parentIdx->rd_indcollation, @@ -20102,7 +20101,7 @@ GetAttributeCompression(Oid atttypid, char *compression) */ static void YbATCopyStats(Oid old_relid, RangeVar *new_rel, Oid new_relid, - AttrNumber *attmap, bool ext_only) + AttrNumber *attmap, int altered_old_attnum) { Relation pg_statistic, pg_statistic_ext; HeapTuple tuple; @@ -20156,9 +20155,6 @@ YbATCopyStats(Oid old_relid, RangeVar *new_rel, Oid new_relid, systable_endscan(scan); table_close(pg_statistic_ext, RowExclusiveLock); - if (ext_only) - return; - /* Copy pg_statistic entries with updated starelid and staattnum values. */ pg_statistic = table_open(StatisticRelationId, RowExclusiveLock); ScanKeyInit(&key, Anum_pg_statistic_starelid, BTEqualStrategyNumber, @@ -20174,6 +20170,13 @@ YbATCopyStats(Oid old_relid, RangeVar *new_rel, Oid new_relid, bool replaces[Natts_pg_statistic]; HeapTuple newtuple; + /* + * If this attribute's type was changed, don't copy the pg_statistic + * entry because it is invalid. + */ + if (stat_form->staattnum == altered_old_attnum) + continue; + memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replaces, false, sizeof(replaces)); @@ -20522,6 +20525,17 @@ YbATValidateChangePrimaryKey(Relation rel, IndexStmt *stmt) { Assert(IsYBRelation(rel)); + bool is_object_part_of_xrepl; + HandleYBStatus(YBCIsObjectPartOfXRepl(MyDatabaseId, + RelationGetRelid(rel), + &is_object_part_of_xrepl)); + if (is_object_part_of_xrepl) + ereport(ERROR, + (errmsg("cannot change the primary key of a table that is a " + "part of CDC or XCluster replication."), + errhint("See https://github.com/yugabyte/yugabyte-db/issues/" + "16625."))); + /* * Recreating a table will change its OID, which is not tolerable * for system tables. @@ -21226,7 +21240,10 @@ YbATCopyFkAndCheckConstraints(const Relation old_rel, Relation new_rel, * * old_rel is the relation to copy table rows from. * new_rel is the relation to copy rows to. - * attmap is the mapping of indexes of attributes from old_rel to new_rel. + * old2new_attmap is a mapping such that old2new_attmap[i] = j implies + * attnum i in the new relation maps to attnum j in the old relation. + * new2old_attmap is a mapping such that new2old_attmap[i] = j implies + * attnum i in the old relation maps to attnum j in the new relation. * * has_altered_column_type represents whether the new relation has a different * type for a column. @@ -21246,7 +21263,9 @@ YbATCopyFkAndCheckConstraints(const Relation old_rel, Relation new_rel, */ static void YbATCopyTableRowsUnchecked(Relation old_rel, Relation new_rel, - AttrNumber *attmap, bool has_altered_column_type, + AttrNumber *old2new_attmap, + AttrNumber *new2old_attmap, + bool has_altered_column_type, const List *altered_column_new_column_values, const char *altered_column_name, List *new_check_constraints, @@ -21291,7 +21310,14 @@ YbATCopyTableRowsUnchecked(Relation old_rel, Relation new_rel, foreach(cell, altered_column_new_column_values) { NewColumnValue *new_column_value = lfirst(cell); - + /* + * At the time the new column values expressions were created, the + * original attnum was used. We need to update the attnum after YB + * table rewrite (because attnum can change if there are dropped + * columns in the original relation). + */ + new_column_value->attnum = + new2old_attmap[new_column_value->attnum - 1]; /* expr already planned */ new_column_value->exprstate = ExecInitExpr((Expr *) new_column_value->expr, NULL); @@ -21347,8 +21373,8 @@ YbATCopyTableRowsUnchecked(Relation old_rel, Relation new_rel, } else { - new_values[i] = old_values[attmap[i] - 1]; - new_isnull[i] = old_isnull[attmap[i] - 1]; + new_values[i] = old_values[old2new_attmap[i] - 1]; + new_isnull[i] = old_isnull[old2new_attmap[i] - 1]; } } @@ -21795,6 +21821,7 @@ YbATCopyMetadataAndData(Relation old_rel, Relation new_rel, Relation pg_trigger, pg_depend; List *new_check_constraints = NIL; List *new_fk_constraint_oids = NIL; + int altered_old_attnum = 0; YbATCopyFkAndCheckConstraints(old_rel, new_rel, pg_constraint, &new_check_constraints, @@ -21809,7 +21836,7 @@ YbATCopyMetadataAndData(Relation old_rel, Relation new_rel, * Copy table content. */ YbATCopyTableRowsUnchecked(old_rel, new_rel, old2new_attmap, - has_altered_column_type, + new2old_attmap, has_altered_column_type, altered_column_new_column_values, altered_column_name, new_check_constraints, new_fk_constraint_oids); @@ -21854,13 +21881,18 @@ YbATCopyMetadataAndData(Relation old_rel, Relation new_rel, RangeVar *new_rel_rangevar = makeRangeVar( pstrdup(namespace_name), pstrdup(old_table_name), -1 /* location */); - /* - * If a column type was changed, only copy pg_statistic_ext because - * pg_statistic would be invalid. - */ + if (has_altered_column_type) + { + HeapTuple attTup = SearchSysCacheAttName(RelationGetRelid(old_rel), + altered_column_name); + Assert(HeapTupleIsValid(attTup)); + altered_old_attnum = ((Form_pg_attribute) GETSTRUCT(attTup))->attnum; + ReleaseSysCache(attTup); + } + YbATCopyStats(RelationGetRelid(old_rel), new_rel_rangevar, RelationGetRelid(new_rel), new2old_attmap, - has_altered_column_type /* ext_only */); + altered_old_attnum); /* * Copy policy objects. @@ -22158,6 +22190,17 @@ YbATValidateAlterColumnType(Relation rel) { Assert(IsYBRelation(rel)); + bool is_object_part_of_xrepl; + HandleYBStatus(YBCIsObjectPartOfXRepl(MyDatabaseId, + RelationGetRelid(rel), + &is_object_part_of_xrepl)); + if (is_object_part_of_xrepl) + ereport(ERROR, + (errmsg("cannot change a column type of a table that is a " + "part of CDC or XCluster replication."), + errhint("See https://github.com/yugabyte/yugabyte-db/issues/" + "16625."))); + /* * Recreating a table will change its OID, which is not tolerable * for system tables. diff --git a/src/postgres/src/backend/commands/trigger.c b/src/postgres/src/backend/commands/trigger.c index 9ac6b1aa8e50..c0bd33a21411 100644 --- a/src/postgres/src/backend/commands/trigger.c +++ b/src/postgres/src/backend/commands/trigger.c @@ -6546,13 +6546,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, if (IsYBBackedRelation(rel) && RI_FKey_trigger_type(trigger->tgfoid) == RI_TRIGGER_FK) { - /* - * YB_TODO(neil@yugabyte) Write Yugabyte API to work with slot. - * - * Current Yugabyte API works with HeapTuple instead of slot. - * - Create tuple as a workaround to compile. - * - Pass slot to Yugabyte call once the API is fixed. - */ + /* YB_TODO(API for Slot) Wait for slot API */ bool shouldFree = true; HeapTuple newtup = ExecFetchSlotHeapTuple(newslot, true, &shouldFree); YbAddTriggerFKReferenceIntent(trigger, rel, newtup); diff --git a/src/postgres/src/backend/commands/typecmds.c b/src/postgres/src/backend/commands/typecmds.c index 657184ef36e1..72ef7b1d8117 100644 --- a/src/postgres/src/backend/commands/typecmds.c +++ b/src/postgres/src/backend/commands/typecmds.c @@ -1973,9 +1973,6 @@ findTypeInputFunction(List *procname, Oid typeOid) argList[1] = OIDOID; argList[2] = INT4OID; - /* YB_TODO(neil) Yugabyte has some changes here. - * Check if it's still valid in Pg15. - */ procOid = LookupFuncName(procname, 1, argList, true); procOid2 = LookupFuncName(procname, 3, argList, true); if (OidIsValid(procOid)) diff --git a/src/postgres/src/backend/commands/ybccmds.c b/src/postgres/src/backend/commands/ybccmds.c index 25c0dd4f1835..04128b4994e7 100644 --- a/src/postgres/src/backend/commands/ybccmds.c +++ b/src/postgres/src/backend/commands/ybccmds.c @@ -98,7 +98,8 @@ ColumnSortingOptions(SortByDir dir, SortByNulls nulls, bool* is_desc, bool* is_n /* Database Functions. */ void -YBCCreateDatabase(Oid dboid, const char *dbname, Oid src_dboid, Oid next_oid, bool colocated) +YBCCreateDatabase(Oid dboid, const char *dbname, Oid src_dboid, Oid next_oid, bool colocated, + bool *retry_on_oid_collision) { if (YBIsDBCatalogVersionMode()) { @@ -123,7 +124,24 @@ YBCCreateDatabase(Oid dboid, const char *dbname, Oid src_dboid, Oid next_oid, bo next_oid, colocated, &handle)); - HandleYBStatus(YBCPgExecCreateDatabase(handle)); + + YBCStatus createdb_status = YBCPgExecCreateDatabase(handle); + /* If OID collision happends for CREATE DATABASE, then we need to retry CREATE DATABASE. */ + if (retry_on_oid_collision) + { + *retry_on_oid_collision = createdb_status && + YBCStatusPgsqlError(createdb_status) == ERRCODE_DUPLICATE_DATABASE && + *YBCGetGFlags()->ysql_enable_create_database_oid_collision_retry; + + if (*retry_on_oid_collision) + { + YBCFreeStatus(createdb_status); + return; + } + } + + HandleYBStatus(createdb_status); + if (YBIsDBCatalogVersionMode()) YbCreateMasterDBCatalogVersionTableEntry(dboid); } @@ -564,10 +582,8 @@ YBCCreateTable(CreateStmt *stmt, char relkind, TupleDesc desc, IndexStmt *idxstmt; Oid constraintOid; - /* YB_TODO "convert_tuples_by_name_map" is no longer called here. - * Need to pass `false yb_ignore_type_mismatch` differently. - */ - attmap = build_attrmap_by_name(RelationGetDescr(rel), RelationGetDescr(parentRel)); + attmap = build_attrmap_by_name(RelationGetDescr(rel), RelationGetDescr(parentRel), + false /* yb_ignore_type_mismatch */); idxstmt = generateClonedIndexStmt(NULL, idxRel, attmap, &constraintOid); primary_key = makeNode(Constraint); @@ -894,7 +910,8 @@ static void CreateIndexHandleSplitOptions(YBCPgStatement handle, TupleDesc desc, OptSplit *split_options, - int16 * coloptions) + int16 * coloptions, + int numIndexKeyAttrs) { /* Address both types of split options */ switch (split_options->split_type) @@ -914,7 +931,7 @@ CreateIndexHandleSplitOptions(YBCPgStatement handle, /* Construct array to SPLIT column datatypes */ Form_pg_attribute attrs[INDEX_MAX_KEYS]; int attr_count; - for (attr_count = 0; attr_count < desc->natts; ++attr_count) + for (attr_count = 0; attr_count < numIndexKeyAttrs; ++attr_count) { attrs[attr_count] = TupleDescAttr(desc, attr_count); } @@ -1007,7 +1024,8 @@ YBCCreateIndex(const char *indexName, /* Handle SPLIT statement, if present */ if (split_options) - CreateIndexHandleSplitOptions(handle, indexTupleDesc, split_options, coloptions); + CreateIndexHandleSplitOptions(handle, indexTupleDesc, split_options, coloptions, + indexInfo->ii_NumIndexKeyAttrs); /* Create the index. */ HandleYBStatus(YBCPgExecCreateIndex(handle)); diff --git a/src/postgres/src/backend/executor/Makefile b/src/postgres/src/backend/executor/Makefile index 0c862b02fcf9..f4480df168b4 100644 --- a/src/postgres/src/backend/executor/Makefile +++ b/src/postgres/src/backend/executor/Makefile @@ -78,9 +78,10 @@ OBJS = \ spi.o \ tqueue.o \ tstoreReceiver.o \ + ybcExpr.o \ + ybcFunction.o \ ybc_fdw.o \ ybcModifyTable.o \ - ybcExpr.o \ nodeYbBatchedNestloop.o \ nodeYbSeqscan.o diff --git a/src/postgres/src/backend/executor/execMain.c b/src/postgres/src/backend/executor/execMain.c index 00648c7283f8..c7b49308a3df 100644 --- a/src/postgres/src/backend/executor/execMain.c +++ b/src/postgres/src/backend/executor/execMain.c @@ -942,7 +942,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) i++; } - queryDesc->yb_query_stats = InstrAlloc(1, queryDesc->instrument_options); + queryDesc->yb_query_stats = InstrAlloc(1, queryDesc->instrument_options, false); /* * Initialize the private state information for all the nodes in the query diff --git a/src/postgres/src/backend/executor/execPartition.c b/src/postgres/src/backend/executor/execPartition.c index 8bd08d5b00a6..8abfe42a0e0a 100644 --- a/src/postgres/src/backend/executor/execPartition.c +++ b/src/postgres/src/backend/executor/execPartition.c @@ -588,7 +588,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, */ part_attmap = build_attrmap_by_name(RelationGetDescr(partrel), - RelationGetDescr(firstResultRel)); + RelationGetDescr(firstResultRel), + false /* yb_ignore_type_mismatch */); wcoList = (List *) map_variable_attnos((Node *) wcoList, firstVarno, 0, @@ -648,7 +649,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, if (part_attmap == NULL) part_attmap = build_attrmap_by_name(RelationGetDescr(partrel), - RelationGetDescr(firstResultRel)); + RelationGetDescr(firstResultRel), + false /* yb_ignore_type_mismatch */); returningList = (List *) map_variable_attnos((Node *) returningList, firstVarno, 0, @@ -794,7 +796,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, if (part_attmap == NULL) part_attmap = build_attrmap_by_name(RelationGetDescr(partrel), - RelationGetDescr(firstResultRel)); + RelationGetDescr(firstResultRel), + false /* yb_ignore_type_mismatch */); onconflset = (List *) map_variable_attnos((Node *) onconflset, INNER_VAR, 0, @@ -892,7 +895,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, if (part_attmap == NULL) part_attmap = build_attrmap_by_name(RelationGetDescr(partrel), - RelationGetDescr(firstResultRel)); + RelationGetDescr(firstResultRel), + false /* yb_ignore_type_mismatch */); if (unlikely(!leaf_part_rri->ri_projectNewInfoValid)) ExecInitMergeTupleSlots(mtstate, leaf_part_rri); diff --git a/src/postgres/src/backend/executor/nodeAgg.c b/src/postgres/src/backend/executor/nodeAgg.c index 60d80521aa6b..7380823bae60 100644 --- a/src/postgres/src/backend/executor/nodeAgg.c +++ b/src/postgres/src/backend/executor/nodeAgg.c @@ -2143,7 +2143,7 @@ lookup_hash_entries(AggState *aggstate) static void yb_agg_pushdown_supported(AggState *aggstate) { - ForeignScanState *scan_state; + ScanState *ss; ListCell *lc_agg; ListCell *lc_arg; bool check_outer_plan; @@ -2163,18 +2163,19 @@ yb_agg_pushdown_supported(AggState *aggstate) if (aggstate->phase->numsets != 0) return; - /* Foreign scan outer plan. */ - if (!IsA(outerPlanState(aggstate), ForeignScanState)) + /* Supported outer plan. */ + if (!(IsA(outerPlanState(aggstate), ForeignScanState) || + IsA(outerPlanState(aggstate), IndexOnlyScanState) || + IsA(outerPlanState(aggstate), YbSeqScanState))) return; + ss = (ScanState *) outerPlanState(aggstate); - scan_state = castNode(ForeignScanState, outerPlanState(aggstate)); - - /* Foreign relation we are scanning is a YB table. */ - if (!IsYBRelation(scan_state->ss.ss_currentRelation)) + /* Relation we are scanning is a YB table. */ + if (!IsYBRelation(ss->ss_currentRelation)) return; /* No WHERE quals. */ - if (scan_state->ss.ps.qual) + if (ss->ps.qual) return; check_outer_plan = false; @@ -2298,16 +2299,17 @@ yb_agg_pushdown_supported(AggState *aggstate) * select sum(1) from (select random() as r from foo) as res; * select sum(1) from (select (null=random())::int as r from foo) as res; * and pushdown will still be supported. - * For simplicity, we do not try to match Var between aggref->args and outplan - * targetlist and simply reject once we see any item that is not a simple column - * reference. + * TODO(#18122): For simplicity, we do not try to match Var between + * aggref->args and outplan targetlist and simply reject once we see + * any item that is not a simple column reference. This should be + * improved. */ ListCell *t; foreach(t, outerPlanState(aggstate)->plan->targetlist) { TargetEntry *tle = lfirst_node(TargetEntry, t); - if (!IsA(tle->expr, Var) || IS_SPECIAL_VARNO(castNode(Var, tle->expr)->varno)) + if (!IsA(tle->expr, Var)) return; } } @@ -2317,16 +2319,18 @@ yb_agg_pushdown_supported(AggState *aggstate) } /* - * Populates aggregate pushdown information in the YB foreign scan state. + * Populates aggregate pushdown information in the scan state. */ static void yb_agg_pushdown(AggState *aggstate) { - ForeignScanState *scan_state = castNode(ForeignScanState, outerPlanState(aggstate)); - List *pushdown_aggs = NIL; - int aggno; + PlanState *ps = outerPlanState(aggstate); + List **aggrefs = YbPlanStateTryGetAggrefs(ps); - for (aggno = 0; aggno < aggstate->numaggs; aggno++) + /* List of aggrefs should exist uninitialized. */ + Assert(aggrefs && *aggrefs == NIL); + + for (int aggno = 0; aggno < aggstate->numaggs; ++aggno) { Aggref *aggref = aggstate->peragg[aggno].aggref; const char *func_name = get_func_name(aggref->aggfnoid); @@ -2348,17 +2352,16 @@ yb_agg_pushdown(AggState *aggstate) sum_aggref->aggstar = aggref->aggstar; sum_aggref->args = aggref->args; - pushdown_aggs = lappend(pushdown_aggs, sum_aggref); - pushdown_aggs = lappend(pushdown_aggs, count_aggref); + *aggrefs = lappend(*aggrefs, sum_aggref); + *aggrefs = lappend(*aggrefs, count_aggref); } else { - pushdown_aggs = lappend(pushdown_aggs, aggref); + *aggrefs = lappend(*aggrefs, aggref); } } - scan_state->yb_fdw_aggs = pushdown_aggs; /* Disable projection for tuples produced by pushed down aggregate operators. */ - scan_state->ss.ps.ps_ProjInfo = NULL; + ps->ps_ProjInfo = NULL; } /* @@ -2394,6 +2397,18 @@ ExecAgg(PlanState *pstate) if (IsYugaByteEnabled()) { pstate->state->yb_exec_params.limit_use_default = true; + + // Currently, postgres employs an "optimization" where it requests the + // complete heap tuple from the executor whenever possible so as to + // avoid unnecessary copies + // See the comment in create_scan_plan (create_plan.c) for more info + // + // However, this "optimization" is not always in effect and here we guard + // against any undesirable prefix based filtering in the presence of + // aggregate targets. More importantly, the current behavior to + // retrieve the complete tuple is not necessarily optimal for + // remote storage such as DocDB and this may change in the future + pstate->state->yb_exec_params.yb_can_pushdown_distinct = false; } /* Dispatch based on strategy */ diff --git a/src/postgres/src/backend/executor/nodeIndexonlyscan.c b/src/postgres/src/backend/executor/nodeIndexonlyscan.c index 82d149263d34..1b2ee28cc644 100644 --- a/src/postgres/src/backend/executor/nodeIndexonlyscan.c +++ b/src/postgres/src/backend/executor/nodeIndexonlyscan.c @@ -88,6 +88,21 @@ IndexOnlyNext(IndexOnlyScanState *node) if (scandesc == NULL) { + if (IsYugaByteEnabled() && node->yb_ioss_aggrefs) + { + /* + * For aggregate pushdown, we only read aggregate results from + * DocDB and pass that up to the aggregate node (agg pushdown + * wouldn't be enabled if we needed to read other expressions). Set + * up a dummy scan slot to hold as many attributes as there are + * pushed aggregates. + */ + TupleDesc tupdesc = CreateTemplateTupleDesc(list_length(node->yb_ioss_aggrefs)); + ExecInitScanTupleSlot(estate, &node->ss, tupdesc, &TTSOpsVirtual); + /* Refresh the local pointer. */ + slot = node->ss.ss_ScanTupleSlot; + } + IndexOnlyScan *plan = castNode(IndexOnlyScan, node->ss.ps.plan); /* @@ -102,14 +117,20 @@ IndexOnlyNext(IndexOnlyScanState *node) node->ioss_NumOrderByKeys); node->ioss_ScanDesc = scandesc; - scandesc->yb_scan_plan = (Scan *) plan; - scandesc->yb_rel_pushdown = - YbInstantiatePushdownParams(&plan->yb_pushdown, estate); + /* Set it up for index-only scan */ node->ioss_ScanDesc->xs_want_itup = true; node->ioss_VMBuffer = InvalidBuffer; + if (IsYugaByteEnabled()) + { + scandesc->yb_scan_plan = (Scan *) plan; + scandesc->yb_rel_pushdown = + YbInstantiatePushdownParams(&plan->yb_pushdown, estate); + scandesc->yb_aggrefs = node->yb_ioss_aggrefs; + } + /* * If no run-time keys to calculate or they are ready, go ahead and * pass the scankeys to the index AM. @@ -122,14 +143,21 @@ IndexOnlyNext(IndexOnlyScanState *node) node->ioss_NumOrderByKeys); } - /* - * Setup LIMIT and future execution parameter before calling YugaByte scanning rountines. - */ - if (IsYugaByteEnabled()) { + if (IsYugaByteEnabled()) + { + /* + * Set up LIMIT and future execution parameter before calling Yugabyte + * scanning rountines. + */ scandesc->yb_exec_params = &estate->yb_exec_params; - - // TODO(hector) Add row marks for INDEX_ONLY_SCAN + /* TODO(hector) Add row marks for INDEX_ONLY_SCAN. */ scandesc->yb_exec_params->rowmark = -1; + + /* + * Set reference to slot in scan desc so that YB amgettuple can use it + * during aggregate pushdown. + */ + scandesc->yb_agg_slot = slot; } /* @@ -236,6 +264,10 @@ IndexOnlyNext(IndexOnlyScanState *node) } else if (scandesc->xs_itup) StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc); + else if (IsYugaByteEnabled() && scandesc->yb_aggrefs) + { + /* Slot should have already been updated by YB amgettuple. */ + } else elog(ERROR, "no data returned for index-only scan"); @@ -387,11 +419,16 @@ ExecReScanIndexOnlyScan(IndexOnlyScanState *node) /* reset index scan */ if (node->ioss_ScanDesc) { - IndexScanDesc scandesc = node->ioss_ScanDesc; - IndexOnlyScan *plan = (IndexOnlyScan *) scandesc->yb_scan_plan; - EState *estate = node->ss.ps.state; - scandesc->yb_rel_pushdown = - YbInstantiatePushdownParams(&plan->yb_pushdown, estate); + if (IsYugaByteEnabled()) + { + IndexScanDesc scandesc = node->ioss_ScanDesc; + IndexOnlyScan *plan = (IndexOnlyScan *) scandesc->yb_scan_plan; + EState *estate = node->ss.ps.state; + scandesc->yb_rel_pushdown = + YbInstantiatePushdownParams(&plan->yb_pushdown, estate); + scandesc->yb_aggrefs = node->yb_ioss_aggrefs; + } + index_rescan(node->ioss_ScanDesc, node->ioss_ScanKeys, node->ioss_NumScanKeys, node->ioss_OrderByKeys, node->ioss_NumOrderByKeys); diff --git a/src/postgres/src/backend/executor/nodeIndexscan.c b/src/postgres/src/backend/executor/nodeIndexscan.c index 7c5201e426a3..70a5a9412f51 100644 --- a/src/postgres/src/backend/executor/nodeIndexscan.c +++ b/src/postgres/src/backend/executor/nodeIndexscan.c @@ -144,27 +144,34 @@ IndexNext(IndexScanState *node) } /* - * Setup LIMIT and future execution parameter before calling YugaByte scanning rountines. + * Set up any locking that happens at the time of the scan. */ if (IsYugaByteEnabled()) { + IndexScan *plan; scandesc->yb_exec_params = &estate->yb_exec_params; scandesc->yb_exec_params->rowmark = -1; // Add row marks. - if (XactIsoLevel == XACT_SERIALIZABLE) + plan = castNode(IndexScan, node->ss.ps.plan); + if (plan->scan.yb_lock_mechanism == YB_RANGE_LOCK_ON_SCAN || + plan->scan.yb_lock_mechanism == YB_LOCK_CLAUSE_ON_PK) { /* - * In case of SERIALIZABLE isolation level we have to take predicate locks to disallow + * In case of SERIALIZABLE isolation level we have to take prefix range locks to disallow * INSERTion of new rows that satisfy the query predicate. So, we set the rowmark on all * read requests sent to tserver instead of locking each tuple one by one in LockRows node. + * + * For other isolation levels it's sometimes possible to take locks during the index scan + * as well. */ if (estate->es_rowmarks && estate->es_range_table_size > 0) { ExecRowMark *erm = estate->es_rowmarks[0]; // Do not propogate non-row-locking row marks. if (erm->markType != ROW_MARK_REFERENCE && erm->markType != ROW_MARK_COPY) { scandesc->yb_exec_params->rowmark = erm->markType; - YBUpdateRowLockPolicyForSerializable( - &scandesc->yb_exec_params->wait_policy, erm->waitPolicy); + scandesc->yb_exec_params->pg_wait_policy = erm->waitPolicy; + YBSetRowLockPolicy(&scandesc->yb_exec_params->docdb_wait_policy, + erm->waitPolicy); } } } diff --git a/src/postgres/src/backend/executor/nodeLockRows.c b/src/postgres/src/backend/executor/nodeLockRows.c index 71887210186a..287ffe3d3260 100644 --- a/src/postgres/src/backend/executor/nodeLockRows.c +++ b/src/postgres/src/backend/executor/nodeLockRows.c @@ -79,11 +79,7 @@ ExecLockRows(PlanState *pstate) "row locks is not supported"))); if (n_yb_relations > 0 && XactIsoLevel == XACT_SERIALIZABLE) { - /* - * For YB relations, we don't lock tuples using this node in SERIALIZABLE level. Instead we take - * predicate locks by setting the row mark in read requests sent to txn participants. - */ - return slot; + elog(ERROR, "Serializable locking should have been done in the scans."); } if (TupIsNull(slot)) diff --git a/src/postgres/src/backend/executor/nodeUnique.c b/src/postgres/src/backend/executor/nodeUnique.c index 7a617e639c99..7cb661c567e7 100644 --- a/src/postgres/src/backend/executor/nodeUnique.c +++ b/src/postgres/src/backend/executor/nodeUnique.c @@ -56,13 +56,15 @@ ExecUnique(PlanState *pstate) PlanState *outerPlan; CHECK_FOR_INTERRUPTS(); - + /* * SELECT DISTINCT is only enabled for an index scan. Specifically, for a scan on hash columns, * the index scan will not be used. + * + * `yb_can_pushdown_distinct` controls whether or not the DISTINCT operation is pushed down */ if (IsYugaByteEnabled()) - pstate->state->yb_exec_params.is_select_distinct = true; + pstate->state->yb_exec_params.yb_can_pushdown_distinct = yb_enable_distinct_pushdown; /* * get information from the node diff --git a/src/postgres/src/backend/executor/nodeYbSeqscan.c b/src/postgres/src/backend/executor/nodeYbSeqscan.c index d1dadea268de..9b34ee842728 100644 --- a/src/postgres/src/backend/executor/nodeYbSeqscan.c +++ b/src/postgres/src/backend/executor/nodeYbSeqscan.c @@ -78,13 +78,30 @@ YbSeqNext(YbSeqScanState *node) */ if (tsdesc == NULL) { + if (node->aggrefs) + { + /* + * For aggregate pushdown, we read just the aggregates from DocDB + * and pass that up to the aggregate node (agg pushdown wouldn't be + * enabled if we needed to read more than that). Set up a dummy + * scan slot to hold that as many attributes as there are pushed + * aggregates. + */ + TupleDesc tupdesc = CreateTemplateTupleDesc(list_length(node->aggrefs)); + ExecInitScanTupleSlot(estate, &node->ss, tupdesc, &TTSOpsVirtual); + /* Refresh the local pointer. */ + slot = node->ss.ss_ScanTupleSlot; + } + YbSeqScan *plan = (YbSeqScan *) node->ss.ps.plan; PushdownExprs *yb_pushdown = YbInstantiatePushdownParams(&plan->yb_pushdown, estate); tsdesc = ybc_remote_beginscan(node->ss.ss_currentRelation, estate->es_snapshot, (Scan *) plan, - yb_pushdown); + yb_pushdown, + node->aggrefs, + &estate->yb_exec_params); node->ss.ss_currentScanDesc = tsdesc; } diff --git a/src/postgres/src/backend/executor/ybcFunction.c b/src/postgres/src/backend/executor/ybcFunction.c new file mode 100644 index 000000000000..5249e1c0bd40 --- /dev/null +++ b/src/postgres/src/backend/executor/ybcFunction.c @@ -0,0 +1,94 @@ +/*-------------------------------------------------------------------------------------------------- + * ybcFunction.c + * Routines to construct a Postgres functions + * + * Copyright (c) YugaByte, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations + * under the License. + * + * IDENTIFICATION + * src/backend/executor/ybcFunction.c + *-------------------------------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/tupdesc.h" +#include "utils/memutils.h" + +#include "catalog/yb_type.h" +#include "executor/ybcFunction.h" +#include "pg_yb_utils.h" + +YbFuncCallContext +YbNewFuncCallContext(FuncCallContext *funcctx) +{ + MemoryContext per_call_ctx; + + per_call_ctx = AllocSetContextCreate(funcctx->multi_call_memory_ctx, + "YB SRF per-call context", + ALLOCSET_SMALL_SIZES); + + YbFuncCallContext yb_funcctx = (YbFuncCallContext) MemoryContextAllocZero( + funcctx->multi_call_memory_ctx, sizeof(FuncCallContext)); + + yb_funcctx->per_call_ctx = per_call_ctx; + + return yb_funcctx; +} + +void +YbSetFunctionParam(YBCPgFunction handle, const char *name, int attr_typid, + uint64_t datum, bool is_null) +{ + const YBCPgTypeEntity *type_entity = + YbDataTypeFromOidMod(InvalidAttrNumber, attr_typid); + HandleYBStatus( + YBCAddFunctionParam(handle, name, type_entity, datum, is_null)); +} + +void +YbSetSRFTargets(YbFuncCallContext context, TupleDesc desc) +{ + for (int attr_num = 0; attr_num < desc->natts; ++attr_num) + { + FormData_pg_attribute *attr = TupleDescAttr(desc, attr_num); + + if (attr->attisdropped) + continue; + + const char *attr_name = NameStr(attr->attname); + const YBCPgTypeEntity *type_entity = + YbDataTypeFromOidMod(attr->attnum, attr->atttypid); + const YBCPgTypeAttrs type_attrs = {.typmod = attr->atttypmod}; + + HandleYBStatus(YBCAddFunctionTarget(context->handle, attr_name, + type_entity, type_attrs)); + } + HandleYBStatus(YBCFinalizeFunctionTargets(context->handle)); +} + +bool +YbSRFGetNext(YbFuncCallContext context, uint64_t *values, bool *is_nulls) +{ + MemoryContext oldcontext; + oldcontext = MemoryContextSwitchTo(context->per_call_ctx); + + MemoryContextReset(context->per_call_ctx); + + bool has_data = false; + HandleYBStatus(YBCSRFGetNext(context->handle, values, is_nulls, &has_data)); + + MemoryContextSwitchTo(oldcontext); + + return has_data; +} diff --git a/src/postgres/src/backend/executor/ybc_fdw.c b/src/postgres/src/backend/executor/ybc_fdw.c index d1cf5ecd8dc9..22756b834365 100644 --- a/src/postgres/src/backend/executor/ybc_fdw.c +++ b/src/postgres/src/backend/executor/ybc_fdw.c @@ -279,7 +279,8 @@ ybcGetForeignPlan(PlannerInfo *root, remote_colrefs, /* fdw_private data (attribute types) */ NIL, /* remote target list (none for now) */ remote_quals, - outer_plan); + outer_plan, + best_path->path.yb_path_info); } /* ------------------------------------------------------------------------- */ @@ -308,6 +309,7 @@ ybcBeginForeignScan(ForeignScanState *node, int eflags) Relation relation = node->ss.ss_currentRelation; YbFdwExecState *ybc_state = NULL; + ForeignScan *foreignScan = castNode(ForeignScan, node->ss.ps.plan); /* Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL. */ if (eflags & EXEC_FLAG_EXPLAIN_ONLY) @@ -328,8 +330,7 @@ ybcBeginForeignScan(ForeignScanState *node, int eflags) if (YBReadFromFollowersEnabled()) { ereport(DEBUG2, (errmsg("Doing read from followers"))); } - - if (XactIsoLevel == XACT_SERIALIZABLE) + if (foreignScan->scan.yb_lock_mechanism == YB_RANGE_LOCK_ON_SCAN) { /* * In case of SERIALIZABLE isolation level we have to take predicate locks to disallow @@ -342,7 +343,9 @@ ybcBeginForeignScan(ForeignScanState *node, int eflags) if (erm->markType != ROW_MARK_REFERENCE && erm->markType != ROW_MARK_COPY) { ybc_state->exec_params->rowmark = erm->markType; - YBUpdateRowLockPolicyForSerializable(&ybc_state->exec_params->wait_policy, erm->waitPolicy); + ybc_state->exec_params->pg_wait_policy = erm->waitPolicy; + YBSetRowLockPolicy(&ybc_state->exec_params->docdb_wait_policy, + erm->waitPolicy); } } } @@ -361,10 +364,11 @@ ybcBeginForeignScan(ForeignScanState *node, int eflags) static void ybcSetupScanTargets(ForeignScanState *node) { - EState *estate = node->ss.ps.state; - ForeignScan *foreignScan = (ForeignScan *) node->ss.ps.plan; - Relation relation = node->ss.ss_currentRelation; - YbFdwExecState *ybc_state = (YbFdwExecState *) node->fdw_state; + ScanState *ss = &node->ss; + EState *estate = ss->ps.state; + ForeignScan *foreignScan = (ForeignScan *) ss->ps.plan; + Relation relation = ss->ss_currentRelation; + YBCPgStatement handle = ((YbFdwExecState *) node->fdw_state)->handle; TupleDesc tupdesc = RelationGetDescr(relation); ListCell *lc; @@ -372,10 +376,35 @@ ybcSetupScanTargets(ForeignScanState *node) List *target_attrs = foreignScan->fdw_exprs; MemoryContext oldcontext = - MemoryContextSwitchTo(node->ss.ps.ps_ExprContext->ecxt_per_query_memory); + MemoryContextSwitchTo(ss->ps.ps_ExprContext->ecxt_per_query_memory); /* Set scan targets. */ - if (node->yb_fdw_aggs == NIL) + if (node->yb_fdw_aggrefs != NIL) + { + YbDmlAppendTargetsAggregate(node->yb_fdw_aggrefs, + RelationGetDescr(ss->ss_currentRelation), + NULL /* index */, + handle); + + /* + * For aggregate pushdown, we read just the aggregates from DocDB + * and pass that up to the aggregate node (agg pushdown wouldn't be + * enabled if we needed to read more than that). Set up a dummy + * scan slot to hold that as many attributes as there are pushed + * aggregates. + */ + TupleDesc tupdesc = CreateTemplateTupleDesc(list_length(node->yb_fdw_aggrefs)); + ExecInitScanTupleSlot(estate, ss, tupdesc, &TTSOpsVirtual); + + /* + * Consider the example "SELECT COUNT(oid) FROM pg_type", Postgres would have to do a + * sequential scan to fetch the system column oid. Here YSQL does pushdown so what's + * fetched from a tablet is the result of count(oid), which is not even a column, let + * alone a system column. Clear fsSystemCol because no system column is needed. + */ + foreignScan->fsSystemCol = false; + } + else { /* Set non-aggregate column targets. */ bool target_added = false; @@ -385,13 +414,12 @@ ybcSetupScanTargets(ForeignScanState *node) AttrNumber attnum = target->resno; if (attnum < 0) - YbDmlAppendTargetSystem(attnum, ybc_state->handle); + YbDmlAppendTargetSystem(attnum, handle); else { Assert(attnum > 0); if (!TupleDescAttr(tupdesc, attnum - 1)->attisdropped) - YbDmlAppendTargetRegular(tupdesc, attnum, - ybc_state->handle); + YbDmlAppendTargetRegular(tupdesc, attnum, handle); else continue; } @@ -410,110 +438,9 @@ ybcSetupScanTargets(ForeignScanState *node) * targets. */ if (!target_added) - YbDmlAppendTargetSystem(YBTupleIdAttributeNumber, - ybc_state->handle); + YbDmlAppendTargetSystem(YBTupleIdAttributeNumber, handle); } - else - { - /* Set aggregate scan targets. */ - foreach(lc, node->yb_fdw_aggs) - { - Aggref *aggref = lfirst_node(Aggref, lc); - char *func_name = get_func_name(aggref->aggfnoid); - ListCell *lc_arg; - YBCPgExpr op_handle; - const YBCPgTypeEntity *type_entity; - - /* Get type entity for the operator from the aggref. */ - type_entity = YbDataTypeFromOidMod(InvalidAttrNumber, aggref->aggtranstype); - - /* Create operator. */ - HandleYBStatus(YBCPgNewOperator(ybc_state->handle, func_name, type_entity, aggref->aggcollid, &op_handle)); - - /* Handle arguments. */ - if (aggref->aggstar) { - /* - * Add dummy argument for COUNT(*) case, turning it into COUNT(0). - * We don't use a column reference as we want to count rows - * even if all column values are NULL. - */ - YBCPgExpr const_handle; - HandleYBStatus(YBCPgNewConstant(ybc_state->handle, - type_entity, - false /* collate_is_valid_non_c */, - NULL /* collation_sortkey */, - 0 /* datum */, - false /* is_null */, - &const_handle)); - HandleYBStatus(YBCPgOperatorAppendArg(op_handle, const_handle)); - } else { - /* Add aggregate arguments to operator. */ - foreach(lc_arg, aggref->args) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc_arg); - if (IsA(tle->expr, Const)) - { - Const* const_node = castNode(Const, tle->expr); - /* Already checked by yb_agg_pushdown_supported */ - Assert(const_node->constisnull || const_node->constbyval); - - YBCPgExpr const_handle; - HandleYBStatus(YBCPgNewConstant(ybc_state->handle, - type_entity, - false /* collate_is_valid_non_c */, - NULL /* collation_sortkey */, - const_node->constvalue, - const_node->constisnull, - &const_handle)); - HandleYBStatus(YBCPgOperatorAppendArg(op_handle, const_handle)); - } - else if (IsA(tle->expr, Var)) - { - /* - * Use original attribute number (varoattno) instead of projected one (varattno) - * as projection is disabled for tuples produced by pushed down operators. - */ - int attno = castNode(Var, tle->expr)->varattnosyn; - Form_pg_attribute attr = TupleDescAttr(tupdesc, attno - 1); - YBCPgTypeAttrs type_attrs = {attr->atttypmod}; - - YBCPgExpr arg = YBCNewColumnRef(ybc_state->handle, - attno, - attr->atttypid, - attr->attcollation, - &type_attrs); - HandleYBStatus(YBCPgOperatorAppendArg(op_handle, arg)); - } - else - { - /* Should never happen. */ - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("unsupported aggregate function argument type"))); - } - } - } - - /* Add aggregate operator as scan target. */ - HandleYBStatus(YBCPgDmlAppendTarget(ybc_state->handle, op_handle)); - } - /* - * Setup the scan slot based on new tuple descriptor for the given targets. This is a dummy - * tupledesc that only includes the number of attributes. - */ - TupleDesc target_tupdesc = CreateTemplateTupleDesc(list_length(node->yb_fdw_aggs)); - ExecInitScanTupleSlot(estate, &node->ss, target_tupdesc, - table_slot_callbacks(relation)); - - /* - * Consider the example "SELECT COUNT(oid) FROM pg_type", Postgres would have to do a - * sequential scan to fetch the system column oid. Here YSQL does pushdown so what's - * fetched from a tablet is the result of count(oid), which is not even a column, let - * alone a system column. Clear fsSystemCol because no system column is needed. - */ - foreignScan->fsSystemCol = false; - } MemoryContextSwitchTo(oldcontext); } @@ -716,17 +643,6 @@ ybcEndForeignScan(ForeignScanState *node) ybcFreeStatementObject(ybc_state); } -/* - * ybcExplainForeignScan - * Produce extra output for EXPLAIN of a ForeignScan on a foreign table - */ -static void -ybcExplainForeignScan(ForeignScanState *node, ExplainState *es) -{ - if (node->yb_fdw_aggs != NIL) - ExplainPropertyBool("Partial Aggregate", true, es); -} - /* ------------------------------------------------------------------------- */ /* FDW declaration */ @@ -746,9 +662,9 @@ ybc_fdw_handler() fdwroutine->IterateForeignScan = ybcIterateForeignScan; fdwroutine->ReScanForeignScan = ybcReScanForeignScan; fdwroutine->EndForeignScan = ybcEndForeignScan; - fdwroutine->ExplainForeignScan = ybcExplainForeignScan; /* TODO: These are optional but we should support them eventually. */ + /* fdwroutine->ExplainForeignScan = ybcExplainForeignScan; */ /* fdwroutine->AnalyzeForeignTable = ybcAnalyzeForeignTable; */ /* fdwroutine->IsForeignScanParallelSafe = ybcIsForeignScanParallelSafe; */ diff --git a/src/postgres/src/backend/lib/Makefile b/src/postgres/src/backend/lib/Makefile index 9dad31398aed..57511f7c301f 100644 --- a/src/postgres/src/backend/lib/Makefile +++ b/src/postgres/src/backend/lib/Makefile @@ -23,5 +23,6 @@ OBJS = \ knapsack.o \ pairingheap.o \ rbtree.o \ + yb_percentile.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/postgres/src/backend/lib/yb_percentile.c b/src/postgres/src/backend/lib/yb_percentile.c new file mode 100644 index 000000000000..1a55d50ebedd --- /dev/null +++ b/src/postgres/src/backend/lib/yb_percentile.c @@ -0,0 +1,187 @@ +#include "postgres.h" + +#include +#include + +#include "utils/jsonb.h" +#include "utils/builtins.h" +#include "utils/memutils.h" + +#define MAX_INTERVAL_LEN 100 + +typedef enum HistParseState +{ + NONE, + ARRAY_BEGIN, + OBJECT_BEGIN, + KEY, + VALUE, + OBJECT_END, + ARRAY_END +} HistParseState; + +typedef struct HistPair +{ + double end; + int64_t total; +} HistPair; + +typedef struct HistInterval +{ + double start; + double end; +} HistInterval; + +static double extract_from_match(const char *str, regmatch_t *match, int match_num) +{ + int start = match[match_num].rm_so; + int end = match[match_num].rm_eo; + if (start == -1 || end == -1 ) + return INFINITY; + char num_str[MAX_INTERVAL_LEN]; + strncpy(num_str, str + start, end - start); + num_str[end - start] = '\0'; + return atof(num_str); +} + +static HistInterval yb_extract_interval(regex_t *regex, const char *str, int len) +{ + regmatch_t match[7]; + double start; + double end; + + char str_cpy[MAX_INTERVAL_LEN]; + strncpy(str_cpy, str, len); + str_cpy[len] = '\0'; + if (0 == regexec(regex, str_cpy, 7, match, 0)) + { + start = extract_from_match(str_cpy, match, 1); + end = extract_from_match(str_cpy, match, 4); + } + else + elog(ERROR, "Input %s did not match either [num1,num2) or [num1,) regex", str); + + if (start == INFINITY) + elog(ERROR, "Interval start is mismatched or missing"); + if (end <= start) + elog(ERROR, "Unexpected histogram interval where where start >= end"); + + return (struct HistInterval){start, end}; +} + +Datum +yb_get_percentile(PG_FUNCTION_ARGS) +{ + Jsonb *jsonb = PG_GETARG_JSONB_P(0); + double percentile = PG_GETARG_FLOAT8(1); + JsonbIterator *it = JsonbIteratorInit(&jsonb->root); + JsonbValue val; + JsonbIteratorToken token; + + HistParseState h_state = NONE; + int64_t total_count = 0; + int total_entries = 0; + int allocated_entries = 100; + HistPair *entries = palloc(allocated_entries * sizeof(HistPair)); + HistInterval interval; + double last_interval_end = -INFINITY; + double ret = 0; + + regex_t regex; + const char *pattern = "^\\[([-+]?[0-9]+(\\.[0-9]+)?(e[-+]?[0-9]+)?),([-+]?[0-9]+(\\.[0-9]+)?(e[-+]?[0-9]+)?)?\\)$"; + if (regcomp(®ex, pattern, REG_EXTENDED)) + Assert(false); + + percentile = percentile > 0.0 ? percentile : 0.0; + percentile = percentile < 100.0 ? percentile : 100.0; + + MemoryContext tmpContext = AllocSetContextCreate(GetCurrentMemoryContext(), + "JSONB processing temporary context", ALLOCSET_DEFAULT_SIZES); + MemoryContext oldContext = MemoryContextSwitchTo(tmpContext); + + while ((token = JsonbIteratorNext(&it, &val, false)) != WJB_DONE) + { + switch (token) + { + case WJB_BEGIN_ARRAY: + if (h_state != NONE) + elog(ERROR, "Invalid histogram: Unexpected array beginning, should only be the first json element"); + h_state = ARRAY_BEGIN; + break; + case WJB_BEGIN_OBJECT: + if (h_state != OBJECT_END && h_state != ARRAY_BEGIN) + elog(ERROR, "Invalid histogram: Unexpected object beginning, should follow prior object or array beginning"); + h_state = OBJECT_BEGIN; + break; + case WJB_KEY: + if (h_state != OBJECT_BEGIN) + elog(ERROR, "Invalid histogram: Unexpected key, should follow object beginning"); + if (val.type != jbvString) + elog(ERROR, "Invalid histogram: Unexpected key that is not of string type"); + h_state = KEY; + interval = yb_extract_interval(®ex, val.val.string.val, + val.val.string.len); + if (interval.start < last_interval_end) + elog(ERROR, "Invalid histogram: Unexpected interval intersection between keys"); + last_interval_end = interval.end; + break; + case WJB_VALUE: + if (h_state != KEY) + elog(ERROR, "Invalid histogram: Unexpected value, should follow key within object"); + if (val.type != jbvNumeric) + elog(ERROR, "Invalid histogram: Unexpected value that is not of numeric type"); + int64_t count = DatumGetInt64(DirectFunctionCall1(numeric_int8, + NumericGetDatum(val.val.numeric))); + if (count < 0) + elog(ERROR, "Invalid histogram: Unexpected negative count value"); + if (count > 0) + { + total_count += count; + h_state = VALUE; + if (total_entries >= allocated_entries) + { + allocated_entries *= 2; + entries = repalloc(entries, + allocated_entries * sizeof(HistPair)); + } + entries[total_entries] = + (struct HistPair){last_interval_end, total_count}; + total_entries++; + } + break; + case WJB_END_OBJECT: + if (h_state != VALUE) + elog(ERROR, "Invalid histogram: Unexpected object end, should follow k/v pair within object"); + h_state = OBJECT_END; + break; + case WJB_END_ARRAY: + if (h_state != OBJECT_END && h_state != ARRAY_BEGIN) + elog(ERROR, "Invalid histogram: Unexpected array end, should follow valid objects or array beginning"); + h_state = ARRAY_END; + break; + default: + elog(ERROR, "Invalid histogram: Unexpected node found that is not an array, object, or k/v pair"); + break; + } + } + + /* Covers all 0 bucket counts case, as well as empty array */ + if (total_count == 0) + { + MemoryContextSwitchTo(oldContext); + MemoryContextDelete(tmpContext); + PG_RETURN_FLOAT8(-INFINITY); + } + + int64_t expected_min_count = ((percentile / 100) * total_count) + 0.5; + /* Always want a minimum count of at least 1 */ + expected_min_count = 0 < expected_min_count ? expected_min_count : 1; + + for (int i = total_entries - 1; + i >= 0 && entries[i].total >= expected_min_count; i--) + ret = entries[i].end; + + MemoryContextSwitchTo(oldContext); + MemoryContextDelete(tmpContext); + PG_RETURN_FLOAT8(ret); +} diff --git a/src/postgres/src/backend/libpq/auth.c b/src/postgres/src/backend/libpq/auth.c index bf97b6758ce9..e0a5fd95652c 100644 --- a/src/postgres/src/backend/libpq/auth.c +++ b/src/postgres/src/backend/libpq/auth.c @@ -59,6 +59,9 @@ static void auth_failed(Port *port, int status, const char *logdetail, bool lock static char *recv_password_packet(Port *port); static void set_authn_id(Port *port, const char *id); +static int YbAuthFailedErrorLevel(const bool auth_passthrough) { + return (YbIsClientYsqlConnMgr() && auth_passthrough == true) ? ERROR : FATAL; +} /*---------------------------------------------------------------- * Password-based authentication methods (password, md5, and scram-sha-256) @@ -343,7 +346,7 @@ auth_failed(Port *port, int status, const char *logdetail, bool yb_role_is_locke else logdetail = cdetail; - ereport(FATAL, + ereport(YbAuthFailedErrorLevel(port->yb_is_auth_passthrough_req), (errcode(errcode_return), (yb_role_is_locked_out ? errmsg("role \"%s\" is locked. Contact" " your database administrator.", @@ -410,6 +413,8 @@ ClientAuthentication(Port *port) int status = STATUS_ERROR; const char *logdetail = NULL; + bool auth_passthrough = port->yb_is_auth_passthrough_req; + /* * Get the authentication method to use for this frontend/database * combination. Note: we do not parse the file at this point; this has @@ -427,11 +432,24 @@ ClientAuthentication(Port *port) */ if (port->hba->clientcert != clientCertOff) { + if (YbIsClientYsqlConnMgr() && auth_passthrough == true) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), + errmsg("Cert authentication is not supported"))); + return; + } + + /* If we haven't loaded a root certificate store, fail */ if (!secure_loaded_verify_locations()) - ereport(FATAL, + { + ereport(YbAuthFailedErrorLevel(auth_passthrough), (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("client certificates can only be checked if a root certificate store is available"))); + return; + } + /* * If we loaded a root certificate store, and if a certificate is @@ -440,9 +458,12 @@ ClientAuthentication(Port *port) * already if it didn't verify ok. */ if (!port->peer_cert_valid) - ereport(FATAL, + { + ereport(YbAuthFailedErrorLevel(auth_passthrough), (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), errmsg("connection requires a valid client certificate"))); + return; + } } /* @@ -481,20 +502,26 @@ ClientAuthentication(Port *port) _("no encryption"); if (am_walsender && !am_db_walsender) - ereport(FATAL, + { + ereport(YbAuthFailedErrorLevel(auth_passthrough), (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), /* translator: last %s describes encryption state */ errmsg("pg_hba.conf rejects replication connection for host \"%s\", user \"%s\", %s", hostinfo, port->user_name, encryption_state))); + return; + } else - ereport(FATAL, + { + ereport(YbAuthFailedErrorLevel(auth_passthrough), (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), /* translator: last %s describes encryption state */ errmsg("pg_hba.conf rejects connection for host \"%s\", user \"%s\", database \"%s\", %s", hostinfo, port->user_name, port->database_name, encryption_state))); + return; + } break; } @@ -548,15 +575,19 @@ ClientAuthentication(Port *port) 0)) if (am_walsender && !am_db_walsender) - ereport(FATAL, + { + ereport(YbAuthFailedErrorLevel(auth_passthrough), (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), /* translator: last %s describes encryption state */ errmsg("no pg_hba.conf entry for replication connection from host \"%s\", user \"%s\", %s", hostinfo, port->user_name, encryption_state), HOSTNAME_LOOKUP_DETAIL(port))); + return; + } else - ereport(FATAL, + { + ereport(YbAuthFailedErrorLevel(auth_passthrough), (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), /* translator: last %s describes encryption state */ errmsg("no pg_hba.conf entry for host \"%s\", user \"%s\", database \"%s\", %s", @@ -564,6 +595,8 @@ ClientAuthentication(Port *port) port->database_name, encryption_state), HOSTNAME_LOOKUP_DETAIL(port))); + return; + } break; } diff --git a/src/postgres/src/backend/nodes/copyfuncs.c b/src/postgres/src/backend/nodes/copyfuncs.c index 850a97592fc8..db202e6e50d1 100644 --- a/src/postgres/src/backend/nodes/copyfuncs.c +++ b/src/postgres/src/backend/nodes/copyfuncs.c @@ -428,6 +428,7 @@ CopyScanFields(const Scan *from, Scan *newnode) CopyPlanFields((const Plan *) from, (Plan *) newnode); COPY_SCALAR_FIELD(scanrelid); + COPY_SCALAR_FIELD(yb_lock_mechanism); } /* diff --git a/src/postgres/src/backend/nodes/nodeFuncs.c b/src/postgres/src/backend/nodes/nodeFuncs.c index 63daf94cd8b1..08afe51f13d3 100644 --- a/src/postgres/src/backend/nodes/nodeFuncs.c +++ b/src/postgres/src/backend/nodes/nodeFuncs.c @@ -4235,3 +4235,23 @@ planstate_walk_members(PlanState **planstates, int nplans, return false; } + +/* + * Given PlanState, return pointer to aggrefs field if it exists, NULL + * otherwise. + */ +List ** +YbPlanStateTryGetAggrefs(PlanState *ps) +{ + switch (nodeTag(ps)) + { + case T_ForeignScanState: + return &castNode(ForeignScanState, ps)->yb_fdw_aggrefs; + case T_IndexOnlyScanState: + return &castNode(IndexOnlyScanState, ps)->yb_ioss_aggrefs; + case T_YbSeqScanState: + return &castNode(YbSeqScanState, ps)->aggrefs; + default: + return NULL; + } +} diff --git a/src/postgres/src/backend/nodes/outfuncs.c b/src/postgres/src/backend/nodes/outfuncs.c index 126ef11e4ae7..e129479e61db 100644 --- a/src/postgres/src/backend/nodes/outfuncs.c +++ b/src/postgres/src/backend/nodes/outfuncs.c @@ -368,6 +368,7 @@ _outScanInfo(StringInfo str, const Scan *node) _outPlanInfo(str, (const Plan *) node); WRITE_UINT_FIELD(scanrelid); + WRITE_ENUM_FIELD(yb_lock_mechanism, YbLockMechanism); } /* @@ -1860,6 +1861,7 @@ _outPathInfo(StringInfo str, const Path *node) WRITE_FLOAT_FIELD(startup_cost, "%.2f"); WRITE_FLOAT_FIELD(total_cost, "%.2f"); WRITE_NODE_FIELD(pathkeys); + WRITE_ENUM_FIELD(yb_path_info.yb_lock_mechanism, YbLockMechanism); } /* diff --git a/src/postgres/src/backend/nodes/readfuncs.c b/src/postgres/src/backend/nodes/readfuncs.c index cad30e08556e..7baaa3068158 100644 --- a/src/postgres/src/backend/nodes/readfuncs.c +++ b/src/postgres/src/backend/nodes/readfuncs.c @@ -1855,6 +1855,7 @@ ReadCommonScan(Scan *local_node) ReadCommonPlan(&local_node->plan); READ_UINT_FIELD(scanrelid); + READ_ENUM_FIELD(yb_lock_mechanism, YbLockMechanism); } /* diff --git a/src/postgres/src/backend/optimizer/path/indxpath.c b/src/postgres/src/backend/optimizer/path/indxpath.c index 588015d23fd6..8ce18fc19f6d 100644 --- a/src/postgres/src/backend/optimizer/path/indxpath.c +++ b/src/postgres/src/backend/optimizer/path/indxpath.c @@ -19,6 +19,7 @@ #include "access/stratnum.h" #include "access/sysattr.h" +#include "access/xact.h" #include "catalog/pg_am.h" #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" @@ -42,6 +43,9 @@ #include "utils/guc.h" #include "utils/rel.h" +/* GUC flag, whether to attempt single RPC lock+select in RR and RC levels. */ +bool yb_lock_pk_single_rpc = true; + /* XXX see PartCollMatchesExprColl */ #define IndexCollMatchesExprColl(idxcollation, exprcollation) \ ((idxcollation) == InvalidOid || (idxcollation) == (exprcollation)) @@ -1019,16 +1023,107 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel, } } +/* + * Returns whether the given IndexOptInfo represents the primary index in + * YugabyteDB (i.e., contains the primary source of truth for the data). + */ +static bool +yb_is_main_table(IndexOptInfo *indexinfo) +{ + Relation indrel; + bool is_main_table = false; + + if (!IsYugaByteEnabled()) + return false; + + if (indexinfo->rel->reloptkind != RELOPT_BASEREL) + return false; + + indrel = RelationIdGetRelation(indexinfo->indexoid); + is_main_table = indrel->rd_index->indisprimary; + RelationClose(indrel); + return is_main_table; +} + +/* Returns whether the given index_path matches the primary key exactly. */ +static bool +yb_ipath_matches_pk(IndexPath *index_path) { + return false; +#ifdef YB_TODO + /* YB_TODO(Trevor Foucher) This function needs changes to work with Pg15 */ + ListCell *values; + Bitmapset *primary_key_attrs = NULL; + List *qinfos = NIL; + ListCell *lc = NULL; + + /* + * Verify no non-primary-key filters are specified. There is one + * indrestrictinfo per query term. + */ + foreach(values, index_path->indexinfo->indrestrictinfo) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, values); + + /* + * There is one indexquals per key that has a query term. Note this + * means we can't simply compare indrestrictinfo count to indexquals, + * because if there is only one query term, both structures will contain + * one item, even if there are more columns in the primary key. + */ + if (!list_member_ptr(index_path->indexquals, rinfo)) + return false; + } + + /* + * Check that all WHERE clause conditions in the query use the equality + * operator, and count the number of primary keys used. + */ + qinfos = deconstruct_indexquals(index_path); + foreach(lc, qinfos) + { + IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc); + RestrictInfo *rinfo = qinfo->rinfo; + Expr *clause = rinfo->clause; + Oid clause_op; + int op_strategy; + + if (!IsA(clause, OpExpr)) + return false; + + clause_op = qinfo->clause_op; + if (!OidIsValid(clause_op)) + return false; + + op_strategy = get_op_opfamily_strategy( + clause_op, index_path->indexinfo->opfamily[qinfo->indexcol]); + Assert(op_strategy != 0); /* not a member of opfamily?? */ + if (op_strategy != BTEqualStrategyNumber) + return false; + /* Just used for counting, not matching. */ + primary_key_attrs = bms_add_member(primary_key_attrs, qinfo->indexcol); + } + + /* + * After checking all queries are for equality on primary keys, now we just + * have to ensure we've covered all the primary keys. + */ + return bms_num_members(primary_key_attrs) == + index_path->indexinfo->nkeycolumns; +#endif +} + /* * build_index_paths * Given an index and a set of index clauses for it, construct zero * or more IndexPaths. It also constructs zero or more partial IndexPaths. * * We return a list of paths because (1) this routine checks some cases - * that should cause us to not generate any IndexPath, and (2) in some - * cases we want to consider both a forward and a backward scan, so as - * to obtain both sort orders. Note that the paths are just returned - * to the caller and not immediately fed to add_path(). + * that should cause us to not generate any IndexPath, (2) in some cases + * we want to consider both a forward and a backward scan, so as to obtain + * both sort orders, and (3) in Yugabyte, we might want to do locks as part + * of the read, but sometimes must maintain a non-locking version for + * correctness. Note that the paths are just returned to the caller and not + * immediately fed to add_path(). * * At top level, useful_predicate should be exactly the index's predOK flag * (ie, true if it has a predicate that was proven from the restriction @@ -1251,6 +1346,22 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, outer_relids, loop_count, false); + if (IsYugaByteEnabled() && !IsolationIsSerializable()) + { + /* + * If there are rowMarks, then a LockRows node could later take + * the locks, but we check to see if we can avoid the second + * locking RPC by checking if we're trying to lock just one row, + * and set up to do the lock inline in that case. The propagation + * of yb_lock_mechanism will ensure there is no conflicting logic + * that means we shouldn't even lock this one row. + */ + if (index_clauses != NIL && root->parse->rowMarks && + yb_lock_pk_single_rpc && yb_is_main_table(ipath->indexinfo) && + yb_ipath_matches_pk(ipath)) + ipath->path.yb_path_info.yb_lock_mechanism = YB_LOCK_CLAUSE_ON_PK; + } + result = lappend(result, ipath); /* diff --git a/src/postgres/src/backend/optimizer/plan/createplan.c b/src/postgres/src/backend/optimizer/plan/createplan.c index 01684dcb33b6..8d78bcd5b16a 100644 --- a/src/postgres/src/backend/optimizer/plan/createplan.c +++ b/src/postgres/src/backend/optimizer/plan/createplan.c @@ -202,12 +202,14 @@ static void copy_generic_path_info(Plan *dest, Path *src); static void copy_plan_costsize(Plan *dest, Plan *src); static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples); -static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); +static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid, + YbPathInfo yb_path_info); static YbSeqScan *make_yb_seqscan(List *qptlist, List *local_quals, List *yb_pushdown_quals, List *yb_pushdown_colrefs, - Index scanrelid); + Index scanrelid, + YbPathInfo yb_path_info); static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid, TableSampleClause *tsc); static IndexScan *make_indexscan(List *qptlist, List *qpqual, @@ -217,7 +219,7 @@ static IndexScan *make_indexscan(List *qptlist, List *qpqual, List *indexqual, List *indexqualorig, List *indexorderby, List *indexorderbyorig, List *indexorderbyops, List *indextlist, - ScanDirection indexscandir); + ScanDirection indexscandir, YbPathInfo yb_path_info); static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual, List *yb_pushdown_colrefs, List *yb_pushdown_quals, Index scanrelid, Oid indexid, @@ -3913,9 +3915,11 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, if (best_path->parent->is_yb_relation) scan_plan = (SeqScan *) make_yb_seqscan(tlist, local_quals, remote_quals, colrefs, - scan_relid); + scan_relid, + best_path->yb_path_info); else - scan_plan = make_seqscan(tlist, local_quals, scan_relid); + scan_plan = make_seqscan(tlist, local_quals, scan_relid, + best_path->yb_path_info); copy_generic_path_info(&scan_plan->scan.plan, best_path); @@ -4311,7 +4315,8 @@ create_indexscan_plan(PlannerInfo *root, indexorderbys, indexorderbyops, best_path->indexinfo->indextlist, - best_path->indexscandir); + best_path->indexscandir, + best_path->path.yb_path_info); copy_generic_path_info(&scan_plan->plan, &best_path->path); @@ -5370,6 +5375,8 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, bms_free(attrs_used); } + scan_plan->scan.yb_lock_mechanism = best_path->path.yb_path_info.yb_lock_mechanism; + return scan_plan; } @@ -7063,7 +7070,8 @@ extract_pushdown_clauses(List *restrictinfo_list, static SeqScan * make_seqscan(List *qptlist, List *qpqual, - Index scanrelid) + Index scanrelid, + YbPathInfo yb_path_info) { SeqScan *node = makeNode(SeqScan); Plan *plan = &node->scan.plan; @@ -7072,6 +7080,7 @@ make_seqscan(List *qptlist, plan->qual = qpqual; plan->lefttree = NULL; plan->righttree = NULL; + node->scan.yb_lock_mechanism = yb_path_info.yb_lock_mechanism; node->scan.scanrelid = scanrelid; return node; @@ -7082,7 +7091,8 @@ make_yb_seqscan(List *qptlist, List *local_quals, List *yb_pushdown_quals, List *yb_pushdown_colrefs, - Index scanrelid) + Index scanrelid, + YbPathInfo yb_path_info) { YbSeqScan *node = makeNode(YbSeqScan); Plan *plan = &node->scan.plan; @@ -7094,6 +7104,7 @@ make_yb_seqscan(List *qptlist, node->scan.scanrelid = scanrelid; node->yb_pushdown.quals = yb_pushdown_quals; node->yb_pushdown.colrefs = yb_pushdown_colrefs; + node->scan.yb_lock_mechanism = yb_path_info.yb_lock_mechanism; return node; } @@ -7132,7 +7143,8 @@ make_indexscan(List *qptlist, List *indexorderbyorig, List *indexorderbyops, List *indextlist, - ScanDirection indexscandir) + ScanDirection indexscandir, + YbPathInfo yb_path_info) { IndexScan *node = makeNode(IndexScan); Plan *plan = &node->scan.plan; @@ -7154,6 +7166,7 @@ make_indexscan(List *qptlist, node->yb_rel_pushdown.quals = yb_rel_pushdown_quals; node->yb_idx_pushdown.colrefs = yb_idx_pushdown_colrefs; node->yb_idx_pushdown.quals = yb_idx_pushdown_quals; + node->scan.yb_lock_mechanism = yb_path_info.yb_lock_mechanism; return node; } @@ -7417,7 +7430,8 @@ make_foreignscan(List *qptlist, List *fdw_private, List *fdw_scan_tlist, List *fdw_recheck_quals, - Plan *outer_plan) + Plan *outer_plan, + YbPathInfo yb_path_info) { ForeignScan *node = makeNode(ForeignScan); Plan *plan = &node->scan.plan; @@ -7443,6 +7457,7 @@ make_foreignscan(List *qptlist, node->fs_relids = NULL; /* fsSystemCol will be filled in by create_foreignscan_plan */ node->fsSystemCol = false; + node->scan.yb_lock_mechanism = yb_path_info.yb_lock_mechanism; return node; } diff --git a/src/postgres/src/backend/optimizer/plan/planner.c b/src/postgres/src/backend/optimizer/plan/planner.c index 95db698053d7..52b7a5ed4513 100644 --- a/src/postgres/src/backend/optimizer/plan/planner.c +++ b/src/postgres/src/backend/optimizer/plan/planner.c @@ -1738,8 +1738,11 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * here. If there are only non-locking rowmarks, they should be * handled by the ModifyTable node instead. However, root->rowMarks * is what goes into the LockRows node.) + * + * For Yugabyte, some locks can be pushed down into an underlying scan. + * We avoid adding a LockRows node in this case. */ - if (parse->rowMarks) + if (parse->rowMarks && path->yb_path_info.yb_lock_mechanism == YB_NO_SCAN_LOCK) { path = (Path *) create_lockrows_path(root, final_rel, path, root->rowMarks, diff --git a/src/postgres/src/backend/optimizer/util/pathnode.c b/src/postgres/src/backend/optimizer/util/pathnode.c index 866a43666830..06a6d1772021 100644 --- a/src/postgres/src/backend/optimizer/util/pathnode.c +++ b/src/postgres/src/backend/optimizer/util/pathnode.c @@ -36,6 +36,7 @@ #include "utils/selfuncs.h" #include "pg_yb_utils.h" +#include "access/xact.h" #include "access/yb_scan.h" typedef enum @@ -918,6 +919,153 @@ add_partial_path_precheck(RelOptInfo *parent_rel, Cost total_cost, return true; } +/* + * If there are rowMarks and we're in a serializable transaction, we lock + * whole prefix ranges during scans instead of locking only the rows + * returned to the user. This is required to locks rows that might not yet + * have been inserted in the table, which is necessary to block concurrent + * transactions from inserting new rows that match the locked predicate. + */ +static void +yb_maybe_set_range_lock_mechanism(List *rowMarks, + YbLockMechanism *yb_lock_mechanism) +{ + if (!IsYugaByteEnabled()) + return; + + if (IsolationIsSerializable() && rowMarks) + *yb_lock_mechanism = YB_RANGE_LOCK_ON_SCAN; +} + +/* + * Propagate YugabyteDB fields between a parent and a single child. + * + * Currently there is only one field to propagate, yb_lock_mechanism. + * yb_lock_mechanism indicates whether locks can be taken in the same RPC + * as a SELECT. This field is unconventional because we let the parent + * affect the child, in the case of YB_PK_FOR_UPDATE_LOCK. This is because + * we have only one actual scan node that has a flag indicating whether it + * can lock, and in some cases, a node above needs to disable such locking + * because it would lead to over-locking. (In that case a LockRows node + * will be inserted to do the correct locking on exactly the right rows.) + * It would be nice if there could be a locked scan node and an unlocked + * scan node, with a pruning step eliminating the locked node. However + * in practice it's difficult to prune during path creation, and seems error- + * prone to traverse the paths to prune them later. Because it's a matter of + * correctness in locking, and a single RPC is strictly better than two RPCs + * otherwise, this is safe to do without complicating costs. + * + * Other than this case, Path data generally flows upward, from children to + * parents, as the serializable lock information does currently. Therefore this + * function is expected to simply copy information from children to parents for + * future fields. + */ +static void +yb_propagate_fields(YbPathInfo *parent_fields, YbPathInfo *child_fields) +{ + if (!IsYugaByteEnabled()) + return; + + if (child_fields->yb_lock_mechanism == YB_RANGE_LOCK_ON_SCAN) + parent_fields->yb_lock_mechanism = YB_RANGE_LOCK_ON_SCAN; + else + child_fields->yb_lock_mechanism = YB_NO_SCAN_LOCK; +} + +/* + * Propagate YugabyteDB fields between a parent and two children. + * See comment for yb_propagate_fields. + */ +static void +yb_propagate_fields2(YbPathInfo *parent_fields, YbPathInfo *child1_fields, + YbPathInfo *child2_fields) +{ + if (!IsYugaByteEnabled()) + return; + + if (child1_fields->yb_lock_mechanism == YB_RANGE_LOCK_ON_SCAN || + child2_fields->yb_lock_mechanism == YB_RANGE_LOCK_ON_SCAN) + { + Assert(child1_fields->yb_lock_mechanism != YB_LOCK_CLAUSE_ON_PK); + Assert(child2_fields->yb_lock_mechanism != YB_LOCK_CLAUSE_ON_PK); + parent_fields->yb_lock_mechanism = YB_RANGE_LOCK_ON_SCAN; + } + else + child1_fields->yb_lock_mechanism = child2_fields->yb_lock_mechanism = + YB_NO_SCAN_LOCK; +} + +/* + * Propagate YugabyteDB fields between a parent and a list of children. + * See comment for yb_propagate_fields. + */ +static void +yb_propagate_fields_list(YbPathInfo *parent_fields, List *child_paths) +{ + ListCell *lc; + bool found_serializable_lock = false; + + if (!IsYugaByteEnabled()) + return; + + foreach(lc, child_paths) + { + Path *subpath = (Path *) lfirst(lc); + + if (subpath->yb_path_info.yb_lock_mechanism == YB_RANGE_LOCK_ON_SCAN) + { + parent_fields->yb_lock_mechanism = YB_RANGE_LOCK_ON_SCAN; + found_serializable_lock = true; + break; + } + } + foreach(lc, child_paths) + { + Path *subpath = (Path *) lfirst(lc); + if (!found_serializable_lock) + subpath->yb_path_info.yb_lock_mechanism = YB_NO_SCAN_LOCK; + else + Assert(subpath->yb_path_info.yb_lock_mechanism != + YB_LOCK_CLAUSE_ON_PK); + } +} + +/* + * Propagate YugabyteDB fields between a parent and a list of MinMaxAggregate + * children. + * See comment for yb_propagate_fields. + */ +static void +yb_propagate_mmagg_fields(YbPathInfo *parent_fields, List *mmaggregates) +{ + ListCell *lc; + bool found_serializable_lock = false; + + if (!IsYugaByteEnabled()) + return; + + foreach(lc, mmaggregates) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + if (mminfo->path->yb_path_info.yb_lock_mechanism == + YB_RANGE_LOCK_ON_SCAN) + { + parent_fields->yb_lock_mechanism = YB_RANGE_LOCK_ON_SCAN; + found_serializable_lock = true; + break; + } + } + foreach(lc, mmaggregates) + { + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + if (!found_serializable_lock) + mminfo->path->yb_path_info.yb_lock_mechanism = YB_NO_SCAN_LOCK; + else + Assert(mminfo->path->yb_path_info.yb_lock_mechanism != + YB_LOCK_CLAUSE_ON_PK); + } +} + /***************************************************************************** * PATH NODE CREATION ROUTINES @@ -944,6 +1092,10 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_workers = parallel_workers; pathnode->pathkeys = NIL; /* seqscan has unordered result */ + yb_maybe_set_range_lock_mechanism( + root->parse->rowMarks, + &pathnode->yb_path_info.yb_lock_mechanism); + /* * The ybcCostEstimate is used to cost a ForeignScan node on YB table, * so use it here too, to get consistent results. @@ -984,6 +1136,10 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* samplescan has unordered result */ + yb_maybe_set_range_lock_mechanism( + root->parse->rowMarks, + &pathnode->yb_path_info.yb_lock_mechanism); + cost_samplescan(pathnode, root, rel, pathnode->param_info); return pathnode; @@ -1037,6 +1193,17 @@ create_index_path(PlannerInfo *root, pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = 0; pathnode->path.pathkeys = pathkeys; + /* + * In a serializable transaction, the presence of rowMarks tells us that + * this is a locked operation. Therefore we will take locks as part of + * this index scan. + */ + if (indexclauses != NIL) + { + yb_maybe_set_range_lock_mechanism( + root->parse->rowMarks, + &pathnode->path.yb_path_info.yb_lock_mechanism); + } pathnode->indexinfo = index; pathnode->indexclauses = indexclauses; @@ -1081,6 +1248,9 @@ create_bitmap_heap_path(PlannerInfo *root, pathnode->path.parallel_workers = parallel_degree; pathnode->path.pathkeys = NIL; /* always unordered */ + yb_propagate_fields(&pathnode->path.yb_path_info, + &bitmapqual->yb_path_info); + pathnode->bitmapqual = bitmapqual; cost_bitmap_heap_scan(&pathnode->path, root, rel, @@ -1134,6 +1304,8 @@ create_bitmap_and_path(PlannerInfo *root, pathnode->path.pathkeys = NIL; /* always unordered */ + yb_propagate_fields_list(&pathnode->path.yb_path_info, bitmapquals); + pathnode->bitmapquals = bitmapquals; /* this sets bitmapselectivity as well as the regular cost fields: */ @@ -1186,6 +1358,8 @@ create_bitmap_or_path(PlannerInfo *root, pathnode->path.pathkeys = NIL; /* always unordered */ + yb_propagate_fields_list(&pathnode->path.yb_path_info, bitmapquals); + pathnode->bitmapquals = bitmapquals; /* this sets bitmapselectivity as well as the regular cost fields: */ @@ -1214,6 +1388,10 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, pathnode->path.parallel_workers = 0; pathnode->path.pathkeys = NIL; /* always unordered */ + yb_maybe_set_range_lock_mechanism( + root->parse->rowMarks, + &pathnode->path.yb_path_info.yb_lock_mechanism); + pathnode->tidquals = tidquals; cost_tidscan(&pathnode->path, root, rel, tidquals, @@ -1351,6 +1529,9 @@ create_append_path(PlannerInfo *root, Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer)); } + yb_propagate_fields_list(&pathnode->path.yb_path_info, + pathnode->subpaths); + Assert(!parallel_aware || pathnode->path.parallel_safe); /* @@ -1449,6 +1630,7 @@ create_merge_append_path(PlannerInfo *root, pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = 0; pathnode->path.pathkeys = pathkeys; + yb_propagate_fields_list(&pathnode->path.yb_path_info, subpaths); pathnode->subpaths = subpaths; /* @@ -1593,6 +1775,9 @@ create_material_path(RelOptInfo *rel, Path *subpath) pathnode->path.parallel_workers = subpath->parallel_workers; pathnode->path.pathkeys = subpath->pathkeys; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); + pathnode->subpath = subpath; cost_material(&pathnode->path, @@ -1718,6 +1903,9 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, */ pathnode->path.pathkeys = NIL; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); + pathnode->subpath = subpath; pathnode->in_operators = sjinfo->semi_operators; pathnode->uniq_exprs = sjinfo->semi_rhs_exprs; @@ -1906,6 +2094,8 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, pathnode->subpath = subpath; pathnode->num_workers = subpath->parallel_workers; pathnode->path.pathkeys = pathkeys; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); pathnode->path.pathtarget = target ? target : rel->reltarget; pathnode->path.rows += subpath->rows; @@ -1994,6 +2184,9 @@ create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, pathnode->path.parallel_workers = 0; pathnode->path.pathkeys = NIL; /* Gather has unordered result */ + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); + pathnode->subpath = subpath; pathnode->num_workers = subpath->parallel_workers; pathnode->single_copy = false; @@ -2031,6 +2224,8 @@ create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, subpath->parallel_safe; pathnode->path.parallel_workers = subpath->parallel_workers; pathnode->path.pathkeys = pathkeys; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); pathnode->subpath = subpath; cost_subqueryscan(pathnode, root, rel, pathnode->path.param_info); @@ -2059,6 +2254,10 @@ create_functionscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_workers = 0; pathnode->pathkeys = pathkeys; + yb_maybe_set_range_lock_mechanism( + root->parse->rowMarks, + &pathnode->yb_path_info.yb_lock_mechanism); + cost_functionscan(pathnode, root, rel, pathnode->param_info); return pathnode; @@ -2085,6 +2284,10 @@ create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* result is always unordered */ + yb_maybe_set_range_lock_mechanism( + root->parse->rowMarks, + &pathnode->yb_path_info.yb_lock_mechanism); + cost_tablefuncscan(pathnode, root, rel, pathnode->param_info); return pathnode; @@ -2111,6 +2314,10 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* result is always unordered */ + yb_maybe_set_range_lock_mechanism( + root->parse->rowMarks, + &pathnode->yb_path_info.yb_lock_mechanism); + cost_valuesscan(pathnode, root, rel, pathnode->param_info); return pathnode; @@ -2136,6 +2343,10 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */ + yb_maybe_set_range_lock_mechanism( + root->parse->rowMarks, + &pathnode->yb_path_info.yb_lock_mechanism); + cost_ctescan(pathnode, root, rel, pathnode->param_info); return pathnode; @@ -2162,6 +2373,10 @@ create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* result is always unordered */ + yb_maybe_set_range_lock_mechanism( + root->parse->rowMarks, + &pathnode->yb_path_info.yb_lock_mechanism); + cost_namedtuplestorescan(pathnode, root, rel, pathnode->param_info); return pathnode; @@ -2214,6 +2429,10 @@ create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* result is always unordered */ + yb_maybe_set_range_lock_mechanism( + root->parse->rowMarks, + &pathnode->yb_path_info.yb_lock_mechanism); + /* Cost is the same as for a regular CTE scan */ cost_ctescan(pathnode, root, rel, pathnode->param_info); @@ -2353,6 +2572,10 @@ create_foreign_upper_path(PlannerInfo *root, RelOptInfo *rel, pathnode->path.total_cost = total_cost; pathnode->path.pathkeys = pathkeys; + yb_maybe_set_range_lock_mechanism( + root->parse->rowMarks, + &pathnode->path.yb_path_info.yb_lock_mechanism); + pathnode->fdw_outerpath = fdw_outerpath; pathnode->fdw_private = fdw_private; @@ -2508,6 +2731,9 @@ create_nestloop_path(PlannerInfo *root, /* This is a foolish way to estimate parallel_workers, but for now... */ pathnode->jpath.path.parallel_workers = outer_path->parallel_workers; pathnode->jpath.path.pathkeys = pathkeys; + yb_propagate_fields2(&pathnode->jpath.path.yb_path_info, + &inner_path->yb_path_info, + &outer_path->yb_path_info); pathnode->jpath.jointype = jointype; pathnode->jpath.inner_unique = extra->inner_unique; pathnode->jpath.outerjoinpath = outer_path; @@ -2572,6 +2798,9 @@ create_mergejoin_path(PlannerInfo *root, /* This is a foolish way to estimate parallel_workers, but for now... */ pathnode->jpath.path.parallel_workers = outer_path->parallel_workers; pathnode->jpath.path.pathkeys = pathkeys; + yb_propagate_fields2(&pathnode->jpath.path.yb_path_info, + &outer_path->yb_path_info, + &inner_path->yb_path_info); pathnode->jpath.jointype = jointype; pathnode->jpath.inner_unique = extra->inner_unique; pathnode->jpath.outerjoinpath = outer_path; @@ -2649,6 +2878,9 @@ create_hashjoin_path(PlannerInfo *root, * outer rel than it does now.) */ pathnode->jpath.path.pathkeys = NIL; + yb_propagate_fields2(&pathnode->jpath.path.yb_path_info, + &outer_path->yb_path_info, + &inner_path->yb_path_info); pathnode->jpath.jointype = jointype; pathnode->jpath.inner_unique = extra->inner_unique; pathnode->jpath.outerjoinpath = outer_path; @@ -2708,6 +2940,9 @@ create_projection_path(PlannerInfo *root, /* Projection does not change the sort order */ pathnode->path.pathkeys = subpath->pathkeys; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); + pathnode->subpath = subpath; /* @@ -2890,6 +3125,9 @@ create_set_projection_path(PlannerInfo *root, /* Projection does not change the sort order XXX? */ pathnode->path.pathkeys = subpath->pathkeys; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); + pathnode->subpath = subpath; /* @@ -3006,6 +3244,9 @@ create_sort_path(PlannerInfo *root, pathnode->path.parallel_workers = subpath->parallel_workers; pathnode->path.pathkeys = pathkeys; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); + pathnode->subpath = subpath; cost_sort(&pathnode->path, root, pathkeys, @@ -3052,6 +3293,8 @@ create_group_path(PlannerInfo *root, /* Group doesn't change sort ordering */ pathnode->path.pathkeys = subpath->pathkeys; + yb_propagate_fields(&pathnode->path.yb_path_info, &subpath->yb_path_info); + pathnode->subpath = subpath; pathnode->groupClause = groupClause; @@ -3110,6 +3353,9 @@ create_upper_unique_path(PlannerInfo *root, /* Unique doesn't change the input ordering */ pathnode->path.pathkeys = subpath->pathkeys; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); + pathnode->subpath = subpath; pathnode->numkeys = numCols; @@ -3167,6 +3413,8 @@ create_agg_path(PlannerInfo *root, pathnode->path.pathkeys = subpath->pathkeys; /* preserves order */ else pathnode->path.pathkeys = NIL; /* output is unordered */ + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); pathnode->subpath = subpath; pathnode->aggstrategy = aggstrategy; @@ -3257,6 +3505,9 @@ create_groupingsets_path(PlannerInfo *root, else pathnode->path.pathkeys = NIL; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); + pathnode->aggstrategy = aggstrategy; pathnode->rollups = rollups; pathnode->qual = having_qual; @@ -3393,6 +3644,8 @@ create_minmaxagg_path(PlannerInfo *root, pathnode->path.rows = 1; pathnode->path.pathkeys = NIL; + yb_propagate_mmagg_fields(&pathnode->path.yb_path_info, mmaggregates); + pathnode->mmaggregates = mmaggregates; pathnode->quals = quals; @@ -3470,6 +3723,9 @@ create_windowagg_path(PlannerInfo *root, /* WindowAgg preserves the input sort order */ pathnode->path.pathkeys = subpath->pathkeys; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); + pathnode->subpath = subpath; pathnode->winclause = winclause; pathnode->qual = qual; @@ -3540,6 +3796,9 @@ create_setop_path(PlannerInfo *root, pathnode->path.pathkeys = (strategy == SETOP_SORTED) ? subpath->pathkeys : NIL; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); + pathnode->subpath = subpath; pathnode->cmd = cmd; pathnode->strategy = strategy; @@ -3599,6 +3858,10 @@ create_recursiveunion_path(PlannerInfo *root, /* RecursiveUnion result is always unsorted */ pathnode->path.pathkeys = NIL; + yb_propagate_fields2(&pathnode->path.yb_path_info, + &leftpath->yb_path_info, + &rightpath->yb_path_info); + pathnode->leftpath = leftpath; pathnode->rightpath = rightpath; pathnode->distinctList = distinctList; @@ -3642,6 +3905,9 @@ create_lockrows_path(PlannerInfo *root, RelOptInfo *rel, */ pathnode->path.pathkeys = NIL; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); + pathnode->subpath = subpath; pathnode->rowMarks = rowMarks; pathnode->epqParam = epqParam; @@ -3715,6 +3981,11 @@ create_modifytable_path(PlannerInfo *root, RelOptInfo *rel, pathnode->path.parallel_workers = 0; pathnode->path.pathkeys = NIL; +#ifdef YB_TODO + /* YB_TODO(jasonk) subpaths is changed in Pg15 */ + yb_propagate_fields_list(&pathnode->path.yb_path_info, subpaths); +#endif + /* * Compute cost & rowcount as subpath cost & rowcount (if RETURNING) * @@ -3803,6 +4074,8 @@ create_limit_path(PlannerInfo *root, RelOptInfo *rel, pathnode->path.startup_cost = subpath->startup_cost; pathnode->path.total_cost = subpath->total_cost; pathnode->path.pathkeys = subpath->pathkeys; + yb_propagate_fields(&pathnode->path.yb_path_info, + &subpath->yb_path_info); pathnode->subpath = subpath; pathnode->limitOffset = limitOffset; pathnode->limitCount = limitCount; diff --git a/src/postgres/src/backend/parser/parse_utilcmd.c b/src/postgres/src/backend/parser/parse_utilcmd.c index d2c46ed6eb0e..be66c5e1a75b 100644 --- a/src/postgres/src/backend/parser/parse_utilcmd.c +++ b/src/postgres/src/backend/parser/parse_utilcmd.c @@ -1472,7 +1472,8 @@ expandTableLikeClause(RangeVar *heapRel, TableLikeClause *table_like_clause, Lis * have a failure since both tables are locked. */ attmap = build_attrmap_by_name(RelationGetDescr(childrel), - tupleDesc); + tupleDesc, + false /* yb_ignore_type_mismatch */); /* * Process defaults, if required. diff --git a/src/postgres/src/backend/partitioning/partprune.c b/src/postgres/src/backend/partitioning/partprune.c index f7a041b07110..172242f25563 100644 --- a/src/postgres/src/backend/partitioning/partprune.c +++ b/src/postgres/src/backend/partitioning/partprune.c @@ -3678,9 +3678,18 @@ perform_pruning_combine_step(PartitionPruneContext *context, if (cstep->source_stepids == NIL) { PartitionBoundInfo boundinfo = context->boundinfo; + int rangemax; + + /* + * Add all valid offsets into the boundinfo->indexes array. For range + * partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1) + * valid entries; otherwise there are boundinfo->ndatums. + */ + rangemax = context->strategy == PARTITION_STRATEGY_RANGE ? + boundinfo->nindexes : boundinfo->nindexes - 1; result->bound_offsets = - bms_add_range(NULL, 0, boundinfo->nindexes - 1); + bms_add_range(result->bound_offsets, 0, rangemax); result->scan_default = partition_bound_has_default(boundinfo); result->scan_null = partition_bound_accepts_nulls(boundinfo); return result; diff --git a/src/postgres/src/backend/postmaster/postmaster.c b/src/postgres/src/backend/postmaster/postmaster.c index bc6e9793ea83..115da9b39ff8 100644 --- a/src/postgres/src/backend/postmaster/postmaster.c +++ b/src/postgres/src/backend/postmaster/postmaster.c @@ -77,6 +77,10 @@ #include #include +#ifdef __linux__ +#include +#endif + #ifdef HAVE_SYS_SELECT_H #include #endif @@ -4481,6 +4485,16 @@ BackendStartup(Port *port) pid = fork_process(); if (pid == 0) /* child */ { +#ifdef __linux__ + /* + * In YB, all backends are stateless and upon PG master termination, all + * backend processes should also terminate regardless what state they are + * in. No clean-up procedure is needed in the backends. + */ + if (IsYugaByteEnabled()) + prctl(PR_SET_PDEATHSIG, SIGKILL); +#endif + free(bn); /* Detangle from postmaster */ diff --git a/src/postgres/src/backend/tcop/postgres.c b/src/postgres/src/backend/tcop/postgres.c index f5c4102b3882..7eb3bb9f0440 100644 --- a/src/postgres/src/backend/tcop/postgres.c +++ b/src/postgres/src/backend/tcop/postgres.c @@ -19,6 +19,7 @@ #include "postgres.h" +#include #include #include #include @@ -44,6 +45,8 @@ #include "commands/prepare.h" #include "common/pg_prng.h" #include "jit/jit.h" + +#include "libpq/auth.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "libpq/pqsignal.h" @@ -86,6 +89,7 @@ #include "commands/portalcmds.h" #include "libpq/yb_pqcomm_extensions.h" #include "pg_yb_utils.h" +#include "utils/builtins.h" #include "utils/catcache.h" #include "utils/inval.h" #include "utils/rel.h" @@ -452,6 +456,13 @@ SocketBackend(StringInfo inBuf) doing_extended_query_message = false; break; + case 'A': /* Auth Passthrough Request */ + maxmsglen = PQ_LARGE_MESSAGE_LIMIT; + if (!YbIsClientYsqlConnMgr()) + ereport(FATAL, (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("invalid frontend message type %d", qtype))); + break; + default: /* @@ -4124,7 +4135,10 @@ static bool YBTableSchemaVersionMismatchError(ErrorData *edata, char **table_id) return false; } -static void YBPrepareCacheRefreshIfNeeded(ErrorData *edata, bool consider_retry, bool *need_retry) +static void YBPrepareCacheRefreshIfNeeded(ErrorData *edata, + bool consider_retry, + bool is_dml, + bool *need_retry) { *need_retry = false; @@ -4134,6 +4148,23 @@ static void YBPrepareCacheRefreshIfNeeded(ErrorData *edata, bool consider_retry, if (!IsYugaByteEnabled()) return; + /* + * A non-DDL statement that failed due to transaction conflict does not + * require cache refresh. + */ + bool is_read_restart_error = YBCIsRestartReadError(edata->yb_txn_errcode); + bool is_conflict_error = YBCIsTxnConflictError(edata->yb_txn_errcode); + + /* + * Note that 'is_dml' could be set for a Select operation on a pg_catalog + * table. Even if it fails due to conflict, a retry is expected to succeed + * without refreshing the cache (as the schema of a PG catalog table cannot + * change). + */ + if (is_dml && (is_read_restart_error || is_conflict_error)) + { + return; + } char *table_to_refresh = NULL; const bool need_table_cache_refresh = YBTableSchemaVersionMismatchError(edata, &table_to_refresh); @@ -4142,19 +4173,27 @@ static void YBPrepareCacheRefreshIfNeeded(ErrorData *edata, bool consider_retry, * Get the latest syscatalog version from the master to check if we need * to refresh the cache. */ - YBCPgResetCatalogReadTime(); - const uint64_t catalog_master_version = YbGetMasterCatalogVersion(); bool need_global_cache_refresh = false; - if (YbGetCatalogCacheVersion() != catalog_master_version) { - need_global_cache_refresh = true; - YbUpdateLastKnownCatalogCacheVersion(catalog_master_version); - } - if (*YBCGetGFlags()->log_ysql_catalog_versions) + /* + * If an operation on the PG catalog has failed at this point, the + * below YbGetMasterCatalogVersion() is not expected to succeed either as it + * would be using the same transaction as the failed operation. + */ + if (!yb_non_ddl_txn_for_sys_tables_allowed) { - int elevel = need_global_cache_refresh ? LOG : DEBUG1; - ereport(elevel, - (errmsg("%s: got master catalog version: %" PRIu64, - __func__, catalog_master_version))); + YBCPgResetCatalogReadTime(); + const uint64_t catalog_master_version = YbGetMasterCatalogVersion(); + if (YbGetCatalogCacheVersion() != catalog_master_version) { + need_global_cache_refresh = true; + YbUpdateLastKnownCatalogCacheVersion(catalog_master_version); + } + if (*YBCGetGFlags()->log_ysql_catalog_versions) + { + int elevel = need_global_cache_refresh ? LOG : DEBUG1; + ereport(elevel, + (errmsg("%s: got master catalog version: %" PRIu64, + __func__, catalog_master_version))); + } } if (!(need_global_cache_refresh || need_table_cache_refresh)) return; @@ -4174,121 +4213,110 @@ static void YBPrepareCacheRefreshIfNeeded(ErrorData *edata, bool consider_retry, } /* - * Prepare to retry the query if possible. + * For single-query transactions we abort the current + * transaction to undo any already-applied operations + * and retry the query. + * + * For transaction blocks we would have to re-apply + * all previous queries and also continue the + * transaction for future queries (before commit). + * So we just re-throw the error in that case. + * */ - if (YBNeedRetryAfterCacheRefresh(edata)) + if (consider_retry && + !IsTransactionBlock() && + !YBCGetDisableTransparentCacheRefreshRetry()) { + /* Clear error state */ + FlushErrorState(); + /* - * For single-query transactions we abort the current - * transaction to undo any already-applied operations - * and retry the query. - * - * For transaction blocks we would have to re-apply - * all previous queries and also continue the - * transaction for future queries (before commit). - * So we just re-throw the error in that case. - * + * Make sure debug_query_string gets reset before we possibly clobber + * the storage it points at. */ - if (consider_retry && - !IsTransactionBlock() && - !YBCGetDisableTransparentCacheRefreshRetry()) - { - /* Clear error state */ - FlushErrorState(); - - /* - * Make sure debug_query_string gets reset before we possibly clobber - * the storage it points at. - */ - debug_query_string = NULL; + debug_query_string = NULL; - /* Abort the transaction and clean up. */ - AbortCurrentTransaction(); - if (am_walsender) - WalSndErrorCleanup(); + /* Abort the transaction and clean up. */ + AbortCurrentTransaction(); + if (am_walsender) + WalSndErrorCleanup(); - if (MyReplicationSlot != NULL) - ReplicationSlotRelease(); + if (MyReplicationSlot != NULL) + ReplicationSlotRelease(); - ReplicationSlotCleanup(); + ReplicationSlotCleanup(); - if (doing_extended_query_message) - ignore_till_sync = true; + if (doing_extended_query_message) + ignore_till_sync = true; - xact_started = false; + xact_started = false; - /* Refresh cache now so that the retry uses latest version. */ - if (need_global_cache_refresh) - YBRefreshCache(); + /* Refresh cache now so that the retry uses latest version. */ + if (need_global_cache_refresh) + YBRefreshCache(); - *need_retry = true; - } - else + *need_retry = true; + } + else + { + if (need_global_cache_refresh) { - if (need_global_cache_refresh) - { - int error_code = edata->sqlerrcode; + int error_code = edata->sqlerrcode; - /* - * TODO: This error occurs in tablet service when snapshot is outdated. - * We should eventually translate this type of error as a retryable error - * in the upper layer such as in YBCStatusPgsqlError(). - */ - bool isInvalidCatalogSnapshotError = strstr(edata->message, - "catalog snapshot used for this transaction has been invalidated") != NULL; + /* + * TODO: This error occurs in tablet service when snapshot is outdated. + * We should eventually translate this type of error as a retryable error + * in the upper layer such as in YBCStatusPgsqlError(). + */ + bool isInvalidCatalogSnapshotError = strstr(edata->message, + "catalog snapshot used for this transaction has been invalidated") != NULL; - /* - * If we got a schema-version-mismatch error while a DDL happened, - * this is likely caused by a conflict between the current - * transaction and the DDL transaction. - * So we map it to the retryable serialization failure error code. - * TODO: consider if we should - * 1. map this case to a different (retryable) error code - * 2. always map schema-version-mismatch to a retryable error. - */ - if (need_table_cache_refresh || isInvalidCatalogSnapshotError) - { - error_code = ERRCODE_T_R_SERIALIZATION_FAILURE; - } + /* + * If we got a schema-version-mismatch error while a DDL happened, + * this is likely caused by a conflict between the current + * transaction and the DDL transaction. + * So we map it to the retryable serialization failure error code. + * TODO: consider if we should + * 1. map this case to a different (retryable) error code + * 2. always map schema-version-mismatch to a retryable error. + */ + if (need_table_cache_refresh || isInvalidCatalogSnapshotError) + { + error_code = ERRCODE_T_R_SERIALIZATION_FAILURE; + } - /* - * Report the original error, but add a context mentioning that a - * possibly-conflicting, concurrent DDL transaction happened. - */ - if (edata->detail == NULL && edata->hint == NULL) - { - ereport(edata->elevel, - (yb_txn_errcode(edata->yb_txn_errcode), - errcode(error_code), - errmsg("%s", edata->message), - errcontext("Catalog Version Mismatch: A DDL occurred " - "while processing this query. Try again."))); - } - else - { - ereport(edata->elevel, - (yb_txn_errcode(edata->yb_txn_errcode), - errcode(error_code), - errmsg("%s", edata->message), - errdetail("%s", edata->detail), - errhint("%s", edata->hint), - errcontext("Catalog Version Mismatch: A DDL occurred " - "while processing this query. Try again."))); - } + /* + * Report the original error, but add a context mentioning that a + * possibly-conflicting, concurrent DDL transaction happened. + */ + if (edata->detail == NULL && edata->hint == NULL) + { + ereport(edata->elevel, + (yb_txn_errcode(edata->yb_txn_errcode), + errcode(error_code), + errmsg("%s", edata->message), + errcontext("Catalog Version Mismatch: A DDL occurred " + "while processing this query. Try again."))); } else { - Assert(need_table_cache_refresh); - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("%s", edata->message))); + ereport(edata->elevel, + (yb_txn_errcode(edata->yb_txn_errcode), + errcode(error_code), + errmsg("%s", edata->message), + errdetail("%s", edata->detail), + errhint("%s", edata->hint), + errcontext("Catalog Version Mismatch: A DDL occurred " + "while processing this query. Try again."))); } } - } - else - { - /* Clear error state */ - FlushErrorState(); + else + { + Assert(need_table_cache_refresh); + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("%s", edata->message))); + } } } @@ -4340,11 +4368,22 @@ static bool yb_is_begin_transaction(CommandTag command_tag) } /* - * Only retry SELECT, INSERT, UPDATE and DELETE commands. - * Do the minimum parsing to find out what the command is + * Find whether the statement is a SELECT/UPDATE/INSERT/DELETE + * with minimum parsing. + * Note: This function will always return false if + * yb_non_ddl_txn_for_sys_tables_allowed is set to true. */ -static bool yb_check_retry_allowed(const char *query_string) +static bool yb_is_dml_command(const char *query_string) { + if (yb_non_ddl_txn_for_sys_tables_allowed) + { + /* + * This guc variable is typically used to update the system catalog + * directly. Therefore we can assume that the user is running a non- + * DML statement. + */ + return false; + } if (!query_string) return false; @@ -4355,6 +4394,14 @@ static bool yb_check_retry_allowed(const char *query_string) command_tag == CMDTAG_UPDATE); } +/* + * Only retry supported commands. + */ +static bool yb_check_retry_allowed(const char *query_string) +{ + return yb_is_dml_command(query_string); +} + static void YBCheckSharedCatalogCacheVersion() { /* * We cannot refresh the cache if we are already inside a transaction, so don't @@ -5271,8 +5318,12 @@ PostgresMain(const char *dbname, const char *username) * *MyProcPort, because ConnCreate() allocated that space with malloc() * ... else we'd need to copy the Port data first. Also, subsidiary data * such as the username isn't lost either; see ProcessStartupPacket(). + * PostmasterContext is required in case of connections created by + * Ysql Connection Manager for `Authentication Passthrough`, so it shouldn't + * be deleted in this case. */ - if (PostmasterContext) + if(!(YbIsClientYsqlConnMgr()) + && PostmasterContext) { MemoryContextDelete(PostmasterContext); PostmasterContext = NULL; @@ -5716,9 +5767,11 @@ PostgresMain(const char *dbname, const char *username) edata = CopyErrorData(); bool need_retry = false; - YBPrepareCacheRefreshIfNeeded(edata, - yb_check_retry_allowed(query_string), - &need_retry); + YBPrepareCacheRefreshIfNeeded( + edata, + yb_check_retry_allowed(query_string), + yb_is_dml_command(query_string), + &need_retry); if (need_retry) { @@ -5793,9 +5846,11 @@ PostgresMain(const char *dbname, const char *username) * aborting the followup bind/execute. */ bool need_retry = false; - YBPrepareCacheRefreshIfNeeded(edata, - false /* consider_retry */, - &need_retry); + YBPrepareCacheRefreshIfNeeded( + edata, + false /* consider_retry */, + yb_is_dml_command(query_string), + &need_retry); MemoryContextSwitchTo(errorcontext); PG_RE_THROW(); @@ -5901,7 +5956,11 @@ PostgresMain(const char *dbname, const char *username) * Execute may have been partially applied so need to * cleanup (and restart) the transaction. */ - YBPrepareCacheRefreshIfNeeded(edata, can_retry, &need_retry); + YBPrepareCacheRefreshIfNeeded( + edata, + can_retry, + yb_is_dml_command(query_string), + &need_retry); if (need_retry && can_retry) { @@ -6144,6 +6203,54 @@ PostgresMain(const char *dbname, const char *username) */ break; + case 'A': /* Auth Passthrough Request */ + if (YbIsClientYsqlConnMgr()) + { + start_xact_command(); + + /* Store a copy of the old context */ + char *db_name = MyProcPort->database_name; + char *user_name = MyProcPort->user_name; + char *host = MyProcPort->remote_host; + + /* Update the Port details with the new context. */ + MyProcPort->user_name = + (char *) pq_getmsgstring(&input_message); + MyProcPort->database_name = + (char *) pq_getmsgstring(&input_message); + MyProcPort->remote_host = + (char *) pq_getmsgstring(&input_message); + + /* Update the `remote_host` */ + struct sockaddr_in *ip_address_1; + ip_address_1 = + (struct sockaddr_in *) (&MyProcPort->raddr.addr); + inet_pton(AF_INET, MyProcPort->remote_host, + &(ip_address_1->sin_addr)); + MyProcPort->yb_is_auth_passthrough_req = true; + + /* Start authentication */ + ClientAuthentication(MyProcPort); + + /* Place back the old context */ + MyProcPort->yb_is_auth_passthrough_req = false; + MyProcPort->user_name = user_name; + MyProcPort->database_name = db_name; + MyProcPort->remote_host = host; + inet_pton(AF_INET, MyProcPort->remote_host, + &(ip_address_1->sin_addr)); + + /* Send the Ready for Query */ + ReadyForQuery(DestRemote); + } + else + { + ereport(FATAL, (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("invalid frontend message type %d", + firstchar))); + } + break; + default: ereport(FATAL, (errcode(ERRCODE_PROTOCOL_VIOLATION), diff --git a/src/postgres/src/backend/utils/Gen_fmgrtab.pl b/src/postgres/src/backend/utils/Gen_fmgrtab.pl index b330c2151b46..da8515e61942 100644 --- a/src/postgres/src/backend/utils/Gen_fmgrtab.pl +++ b/src/postgres/src/backend/utils/Gen_fmgrtab.pl @@ -207,7 +207,7 @@ } # Create the fmgr_builtins table, collect data for fmgr_builtin_oid_index -print $tfh "\nconst FmgrBuiltin fmgr_builtins[] = {\n"; +print $tfh "\nFmgrBuiltin fmgr_builtins[] = {\n"; my %bmap; $bmap{'t'} = 'true'; $bmap{'f'} = 'false'; diff --git a/src/postgres/src/backend/utils/adt/Makefile b/src/postgres/src/backend/utils/adt/Makefile index 4885983e43e5..d2e580637c35 100644 --- a/src/postgres/src/backend/utils/adt/Makefile +++ b/src/postgres/src/backend/utils/adt/Makefile @@ -118,7 +118,8 @@ OBJS = \ windowfuncs.o \ xid.o \ xid8funcs.o \ - xml.o + xml.o \ + yb_lockfuncs.o jsonpath_scan.c: FLEXFLAGS = -CF -p -p jsonpath_scan.c: FLEX_NO_BACKUP=yes diff --git a/src/postgres/src/backend/utils/adt/arrayfuncs.c b/src/postgres/src/backend/utils/adt/arrayfuncs.c index eb999b103c38..0776aff7cb1b 100644 --- a/src/postgres/src/backend/utils/adt/arrayfuncs.c +++ b/src/postgres/src/backend/utils/adt/arrayfuncs.c @@ -1676,13 +1676,7 @@ array_send(PG_FUNCTION_ARGS) } else { - bytea *outputbytes; - - outputbytes = SendFunctionCall(&my_extra->proc, itemvalue); - pq_sendint32(&buf, VARSIZE(outputbytes) - VARHDRSZ); - pq_sendbytes(&buf, VARDATA(outputbytes), - VARSIZE(outputbytes) - VARHDRSZ); - pfree(outputbytes); + StringInfoSendFunctionCall(&buf, &my_extra->proc, itemvalue); } } diff --git a/src/postgres/src/backend/utils/adt/int.c b/src/postgres/src/backend/utils/adt/int.c index ff1f46e2b42d..78c9ec056f55 100644 --- a/src/postgres/src/backend/utils/adt/int.c +++ b/src/postgres/src/backend/utils/adt/int.c @@ -97,12 +97,13 @@ int2recv(PG_FUNCTION_ARGS) Datum int2send(PG_FUNCTION_ARGS) { - int16 arg1 = PG_GETARG_INT16(0); - StringInfoData buf; + uint16 arg1 = pg_hton16(PG_GETARG_INT16(0)); + + bytea* data = (bytea *) palloc(VARHDRSZ + sizeof(arg1)); + memcpy(data->vl_dat, &arg1, sizeof(arg1)); + SET_VARSIZE(data, VARHDRSZ + sizeof(arg1)); - pq_begintypsend(&buf); - pq_sendint16(&buf, arg1); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + PG_RETURN_BYTEA_P(data); } /* @@ -320,14 +321,61 @@ int4recv(PG_FUNCTION_ARGS) Datum int4send(PG_FUNCTION_ARGS) { - int32 arg1 = PG_GETARG_INT32(0); - StringInfoData buf; + uint32 arg1 = pg_hton32(PG_GETARG_INT32(0)); + + bytea* data = (bytea *) palloc(VARHDRSZ + sizeof(arg1)); + memcpy(data->vl_dat, &arg1, sizeof(arg1)); + SET_VARSIZE(data, VARHDRSZ + sizeof(arg1)); - pq_begintypsend(&buf); - pq_sendint32(&buf, arg1); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + PG_RETURN_BYTEA_P(data); } +#ifdef WORDS_BIGENDIAN + +#error Not implemented!!! + +#else + +/* + * int2send_direct - sends int2 directly to buf in binary format + */ +void +int2send_direct(StringInfo buf, Datum value) +{ + uint64 encoded = ((uint64)pg_hton16(value) << 32) | (2ULL << 24); + pq_sendbytes(buf, (const char*) &encoded, 6); +} + +/* + * int4send_direct - sends int4 directly to buf in binary format + */ +void +int4send_direct(StringInfo buf, Datum value) +{ + uint64 encoded = ((uint64)pg_hton32(value) << 32) | (4ULL << 24); + pq_sendbytes(buf, (const char*) &encoded, sizeof(encoded)); +} + +#endif + +/* + * int8send_direct - sends int8 directly to buf in binary format + */ +void +int8send_direct(StringInfo buf, Datum value) +{ + enlargeStringInfo(buf, 12); + + char* out = buf->data + buf->len; + buf->len += 12; + + uint32 size = pg_hton32(8); + memcpy(out, &size, sizeof(size)); + out += sizeof(size); + + uint64 be_value = pg_hton64(value); + memcpy(out, &be_value, sizeof(be_value)); +} /* * =================== diff --git a/src/postgres/src/backend/utils/adt/int8.c b/src/postgres/src/backend/utils/adt/int8.c index a80ed52dc3c7..72ffd67fee68 100644 --- a/src/postgres/src/backend/utils/adt/int8.c +++ b/src/postgres/src/backend/utils/adt/int8.c @@ -95,12 +95,13 @@ int8recv(PG_FUNCTION_ARGS) Datum int8send(PG_FUNCTION_ARGS) { - int64 arg1 = PG_GETARG_INT64(0); - StringInfoData buf; + uint64 arg1 = pg_hton64(PG_GETARG_INT64(0)); + + bytea* data = (bytea *) palloc(VARHDRSZ + sizeof(arg1)); + memcpy(data->vl_dat, &arg1, sizeof(arg1)); + SET_VARSIZE(data, VARHDRSZ + sizeof(arg1)); - pq_begintypsend(&buf); - pq_sendint64(&buf, arg1); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + PG_RETURN_BYTEA_P(data); } diff --git a/src/postgres/src/backend/utils/adt/rangetypes.c b/src/postgres/src/backend/utils/adt/rangetypes.c index baeef7bfb47d..29ff68997c4c 100644 --- a/src/postgres/src/backend/utils/adt/rangetypes.c +++ b/src/postgres/src/backend/utils/adt/rangetypes.c @@ -321,24 +321,12 @@ range_send(PG_FUNCTION_ARGS) if (RANGE_HAS_LBOUND(flags)) { - Datum bound = PointerGetDatum(SendFunctionCall(&cache->typioproc, - lower.val)); - uint32 bound_len = VARSIZE(bound) - VARHDRSZ; - char *bound_data = VARDATA(bound); - - pq_sendint32(buf, bound_len); - pq_sendbytes(buf, bound_data, bound_len); + StringInfoSendFunctionCall(buf, &cache->typioproc, lower.val); } if (RANGE_HAS_UBOUND(flags)) { - Datum bound = PointerGetDatum(SendFunctionCall(&cache->typioproc, - upper.val)); - uint32 bound_len = VARSIZE(bound) - VARHDRSZ; - char *bound_data = VARDATA(bound); - - pq_sendint32(buf, bound_len); - pq_sendbytes(buf, bound_data, bound_len); + StringInfoSendFunctionCall(buf, &cache->typioproc, upper.val); } PG_RETURN_BYTEA_P(pq_endtypsend(buf)); diff --git a/src/postgres/src/backend/utils/adt/rowtypes.c b/src/postgres/src/backend/utils/adt/rowtypes.c index 0d92814cf4b4..69a9c4167a15 100644 --- a/src/postgres/src/backend/utils/adt/rowtypes.c +++ b/src/postgres/src/backend/utils/adt/rowtypes.c @@ -754,8 +754,6 @@ record_send(PG_FUNCTION_ARGS) Form_pg_attribute att = TupleDescAttr(tupdesc, i); ColumnIOData *column_info = &my_extra->columns[i]; Oid column_type = att->atttypid; - Datum attr; - bytea *outputbytes; /* Ignore dropped columns in datatype */ if (att->attisdropped) @@ -783,11 +781,7 @@ record_send(PG_FUNCTION_ARGS) column_info->column_type = column_type; } - attr = values[i]; - outputbytes = SendFunctionCall(&column_info->proc, attr); - pq_sendint32(&buf, VARSIZE(outputbytes) - VARHDRSZ); - pq_sendbytes(&buf, VARDATA(outputbytes), - VARSIZE(outputbytes) - VARHDRSZ); + StringInfoSendFunctionCall(&buf, &column_info->proc, values[i]); } pfree(values); diff --git a/src/postgres/src/backend/utils/adt/yb_lockfuncs.c b/src/postgres/src/backend/utils/adt/yb_lockfuncs.c new file mode 100644 index 000000000000..c624ba41cea9 --- /dev/null +++ b/src/postgres/src/backend/utils/adt/yb_lockfuncs.c @@ -0,0 +1,162 @@ +/*-------------------------------------------------------------------------------------------------- +* +* yb_lockfuncs.c +* Functions for SQL access to YugabyteDB locking primitives +* +* Copyright (c) YugaByte, Inc. +* +* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +* in compliance with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software distributed under the License +* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +* or implied. See the License for the specific language governing permissions and limitations +* under the License. +* +* IDENTIFICATION +* src/backend/utils/adt/yb_lockfuncs.c +* +*-------------------------------------------------------------------------------------------------- +*/ + +#include "postgres.h" +#include "pg_yb_utils.h" + +#include "access/htup_details.h" +#include "catalog/pg_type.h" +#include "executor/ybcFunction.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "utils/array.h" +#include "utils/builtins.h" + +/* Number of columns in yb_lock_status output */ +#define YB_NUM_LOCK_STATUS_COLUMNS 21 + +/* + * yb_lock_status - produce a view with one row per held or awaited lock + */ +Datum +yb_lock_status(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + YbFuncCallContext yb_funcctx; + + if (!yb_enable_pg_locks) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("lock status is unavailable"), + errdetail("yb_enable_pg_locks is false or a system " + "upgrade is in progress"))); + } + + /* + * If this is not a superuser, do not return actual user data. + * TODO: Remove this as soon as we mask out user data. + */ + if (!superuser_arg(GetUserId()) || !IsYbDbAdminUser(GetUserId())) + { + ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: user must must be a " + "superuser or a member of the yb_db_admin role " + "to view lock status"))); + } + + if (SRF_IS_FIRSTCALL()) + { + TupleDesc tupdesc; + MemoryContext oldcontext; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* build tupdesc for result tuples */ + /* this had better match function's declaration in pg_proc.h */ + tupdesc = CreateTemplateTupleDesc(YB_NUM_LOCK_STATUS_COLUMNS); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "pid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "mode", + TEXTARRAYOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "granted", + BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "fastpath", + BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "waitstart", + TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "waitend", + TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 10, "node", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 11, "tablet_id", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 12, "transaction_id", + UUIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 13, "subtransaction_id", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 14, "status_tablet_id", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 15, "is_explicit", + BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 16, "hash_cols", + TEXTARRAYOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 17, "range_cols", + TEXTARRAYOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 18, "attnum", + INT2OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 19, "column_id", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 20, "multiple_rows_locked", + BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 21, "blocked_by", + UUIDARRAYOID, -1, 0); + + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + yb_funcctx = YbNewFuncCallContext(funcctx); + + HandleYBStatus(YBCNewGetLockStatusDataSRF(&yb_funcctx->handle)); + YbSetFunctionParam(yb_funcctx->handle, "relation", OIDOID, + (uint64_t) PG_GETARG_DATUM(0), PG_ARGISNULL(0)); + YbSetFunctionParam(yb_funcctx->handle, "database", OIDOID, + (uint64_t) ObjectIdGetDatum(MyDatabaseId), false); + YbSetFunctionParam(yb_funcctx->handle, "transaction_id", UUIDOID, + (uint64_t) PG_GETARG_DATUM(1), PG_ARGISNULL(1)); + YbSetSRFTargets(yb_funcctx, tupdesc); + + funcctx->user_fctx = (void *) yb_funcctx; + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + yb_funcctx = funcctx->user_fctx; + + Datum values[YB_NUM_LOCK_STATUS_COLUMNS]; + bool nulls[YB_NUM_LOCK_STATUS_COLUMNS]; + + while (YbSRFGetNext(yb_funcctx, (uint64_t *) values, nulls)) + { + HeapTuple tuple; + Datum result; + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + result = HeapTupleGetDatum(tuple); + + SRF_RETURN_NEXT(funcctx, result); + } + + SRF_RETURN_DONE(funcctx); +} diff --git a/src/postgres/src/backend/utils/cache/relcache.c b/src/postgres/src/backend/utils/cache/relcache.c index 458e36c4d3a4..9b771ceb3324 100644 --- a/src/postgres/src/backend/utils/cache/relcache.c +++ b/src/postgres/src/backend/utils/cache/relcache.c @@ -1328,18 +1328,87 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2) return true; } -typedef struct YBLoadRelationsResult { +static bool +YbIsNonAlterableRelation(Relation rel) +{ + /* Non-view system relations cannot currently be altered. */ + return IsSystemRelation(rel) && rel->rd_rel->relkind != RELKIND_VIEW; +} + +/* + * Group all tuples under the same relation into a list for partial key + * searches. + */ +typedef struct YbTupleCacheEntry +{ + /* Key must be the first */ + Oid key; + List *tuples; +} YbTupleCacheEntry; + +typedef struct YbTupleCache +{ + Relation rel; + HTAB *data; +} YbTupleCache; + +typedef Oid (*YbTupleCacheKeyExtractor)(HeapTuple); + +static void +YbLoadTupleCache(YbTupleCache *cache, Oid relid, + YbTupleCacheKeyExtractor key_extractor, const char *cache_name) +{ + Assert(!(cache->rel || cache->data)); + cache->rel = table_open(relid, AccessShareLock); + HASHCTL ctl = {0}; + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(YbTupleCacheEntry); + cache->data = hash_create(cache_name, 32, &ctl, HASH_ELEM | HASH_BLOBS); + + SysScanDesc scandesc = systable_beginscan( + cache->rel, InvalidOid, false /* indexOk */, NULL, 0, NULL); + + YbTupleCacheEntry *entry = NULL; + HeapTuple htup; + while (HeapTupleIsValid(htup = systable_getnext(scandesc))) + { + Oid key = key_extractor(htup); + if (!entry || entry->key != key) + { + bool found = false; + entry = hash_search(cache->data, &key, HASH_ENTER, &found); + + if (!found) + entry->tuples = NULL; + } + entry->tuples = lappend(entry->tuples, htup); + } + systable_endscan(scandesc); +} + +static void +YbCleanupTupleCache(YbTupleCache *cache) +{ + if (!cache->rel) + return; + + table_close(cache->rel, AccessShareLock); +} + +typedef struct YbUpdateRelationCacheState { bool sys_relations_update_required; bool has_partitioned_tables; bool has_relations_with_trigger; bool has_relations_with_row_security; -} YBLoadRelationsResult; + YbTupleCache pg_attrdef_cache; + YbTupleCache pg_constraint_cache; +} YbUpdateRelationCacheState; -static bool -YbIsNonAlterableRelation(Relation rel) +static void +YbCleanupUpdateRelationCacheState(YbUpdateRelationCacheState *state) { - /* Non-view system relations cannot currently be altered. */ - return IsSystemRelation(rel) && rel->rd_rel->relkind != RELKIND_VIEW; + YbCleanupTupleCache(&state->pg_attrdef_cache); + YbCleanupTupleCache(&state->pg_constraint_cache); } /* @@ -1361,10 +1430,9 @@ YbIsNonAlterableRelation(Relation rel) * Note: We assume that any error happening here will fatal so as to not end * up with partial information in the cache. */ -static YBLoadRelationsResult -YBLoadRelations() +static void +YBLoadRelations(YbUpdateRelationCacheState *state) { - YBLoadRelationsResult result = {0}; Relation pg_class_desc = table_open(RelationRelationId, AccessShareLock); SysScanDesc scandesc = systable_beginscan( pg_class_desc, RelationRelationId, false /* indexOk */, NULL, 0, NULL); @@ -1478,27 +1546,144 @@ YBLoadRelations() /* It's fully valid */ relation->rd_isvalid = true; /* Sys relation update is required in case at least one new sys relation has been loaded. */ - result.sys_relations_update_required = result.sys_relations_update_required || - IsSystemRelation(relation); + state->sys_relations_update_required |= IsSystemRelation(relation); - result.has_relations_with_trigger = result.has_relations_with_trigger || - relation->rd_rel->relhastriggers; + state->has_relations_with_trigger |= relation->rd_rel->relhastriggers; - result.has_relations_with_row_security = result.has_relations_with_row_security || - relation->rd_rel->relrowsecurity; + state->has_relations_with_row_security |= + relation->rd_rel->relrowsecurity; - result.has_partitioned_tables = result.has_partitioned_tables || - relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE; + state->has_partitioned_tables |= relation->rd_rel->relkind == + RELKIND_PARTITIONED_TABLE; } systable_endscan(scandesc); table_close(pg_class_desc, AccessShareLock); /* Check relation cache doesn't contain old entries */ Assert(hash_get_num_entries(RelationIdCache) == num_tuples); - return result; } -typedef struct YbAttrProcessorState { +/* + * YbAttrDefaultFetch performs same action as PG's AttrDefaultFetch + * but with using in memory tuple cache instead of relation scan. + * Most code is borrowed from PG's AttrDefaultFetch. + */ +static void +YbAttrDefaultFetch(Relation relation, const YbTupleCache *pg_attrdef_cache) +{ + AttrDefault *attrdef = relation->rd_att->constr->defval; + uint16 ndef = relation->rd_att->constr->num_defval; + Relation adrel = pg_attrdef_cache->rel; + + Oid relid = RelationGetRelid(relation); + const YbTupleCacheEntry *entry = + hash_search(pg_attrdef_cache->data, &relid, HASH_FIND, NULL); + + ListCell *cell; + foreach (cell, entry ? entry->tuples : NULL) + { + HeapTuple htup = lfirst(cell); + + Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup); + Form_pg_attribute attr = + TupleDescAttr(relation->rd_att, adform->adnum - 1); + + uint16 i = 0; + for (; i < ndef; ++i) + { + if (adform->adnum != attrdef[i].adnum) + continue; + if (attrdef[i].adbin != NULL) + elog(WARNING, + "multiple attrdef records found for attr %s of rel %s", + NameStr(attr->attname), RelationGetRelationName(relation)); + + bool isnull = false; + Datum val = fastgetattr(htup, Anum_pg_attrdef_adbin, adrel->rd_att, + &isnull); + if (isnull) + elog(WARNING, "null adbin for attr %s of rel %s", + NameStr(attr->attname), RelationGetRelationName(relation)); + else + { + /* detoast and convert to cstring in caller's context */ + char *s = TextDatumGetCString(val); + + attrdef[i].adbin = MemoryContextStrdup(CacheMemoryContext, s); + pfree(s); + } + break; + } + + if (i >= ndef) + elog(WARNING, + "unexpected attrdef record found for attr %d of rel %s", + adform->adnum, RelationGetRelationName(relation)); + } +} + +/* + * YbCheckConstraintFetch performs same actions as PG's CheckConstraintFetch + * but with using in memory tuple cache instead of relation scan. + * Most code is borrowed from PG's CheckConstraintFetch. + */ +static void +YbCheckConstraintFetch(Relation relation, const YbTupleCache *pg_constraint_cache) +{ + ConstrCheck *check = relation->rd_att->constr->check; + uint16 ncheck = relation->rd_att->constr->num_check; + Relation conrel = pg_constraint_cache->rel; + uint16 found = 0; + + Oid relid = RelationGetRelid(relation); + const YbTupleCacheEntry *entry = + hash_search(pg_constraint_cache->data, &relid, HASH_FIND, NULL); + + ListCell *cell; + foreach (cell, entry ? entry->tuples : NULL) + { + HeapTuple htup = (HeapTuple) lfirst(cell); + Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup); + + /* We want check constraints only */ + if (conform->contype != CONSTRAINT_CHECK) + continue; + + if (found >= ncheck) + elog(ERROR, "unexpected constraint record found for rel %s", + RelationGetRelationName(relation)); + + check[found].ccvalid = conform->convalidated; + check[found].ccnoinherit = conform->connoinherit; + check[found].ccname = + MemoryContextStrdup(CacheMemoryContext, NameStr(conform->conname)); + + bool isnull = false; + /* Grab and test conbin is actually set */ + Datum val = fastgetattr(htup, Anum_pg_constraint_conbin, + RelationGetDescr(conrel), &isnull); + if (isnull) + elog(ERROR, "null conbin for rel %s", + RelationGetRelationName(relation)); + + /* detoast and convert to cstring in caller's context */ + char *s = TextDatumGetCString(val); + check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s); + pfree(s); + + ++found; + } + + if (found != ncheck) + elog(ERROR, "%d constraint record(s) missing for rel %s", + ncheck - found, RelationGetRelationName(relation)); + + /* Sort the records so that CHECKs are applied in a deterministic order */ + if (ncheck > 1) + qsort(check, ncheck, sizeof(ConstrCheck), CheckConstraintCmp); +} + +typedef struct YbRelationAttrsProcessingState { Oid relid; Relation relation; int need; @@ -1506,27 +1691,33 @@ typedef struct YbAttrProcessorState { TupleConstr *constr; AttrDefault *attrdef; AttrMissing *attrmiss; +} YbRelationAttrsProcessingState; + +typedef struct YbAttrProcessorState +{ + YbRelationAttrsProcessingState processing; + const YbTupleCache* pg_attrdef_cache; + const YbTupleCache* pg_constraint_cache; } YbAttrProcessorState; static inline bool -YbIsAttrProcessingStarted(const YbAttrProcessorState* state) +YbIsAttrProcessingStarted(const YbAttrProcessorState *state) { - return OidIsValid(state->relid); + return OidIsValid(state->processing.relid); } static inline bool -YbIsAttrProcessingRequired(const YbAttrProcessorState* state) +YbIsAttrProcessingRequired(const YbAttrProcessorState *state) { - return state->relation; + return state->processing.relation; } static bool -YbApplyAttr(YbAttrProcessorState* state, - Relation attrel, - HeapTuple htup) +YbApplyAttr(YbAttrProcessorState *state, Relation attrel, HeapTuple htup) { + YbRelationAttrsProcessingState *processing = &state->processing; Form_pg_attribute attp = (Form_pg_attribute) GETSTRUCT(htup); - if (!YbIsAttrProcessingStarted(state) || state->relid != attp->attrelid) + if (!YbIsAttrProcessingStarted(state) || processing->relid != attp->attrelid) return false; if (!YbIsAttrProcessingRequired(state)) return true; @@ -1534,7 +1725,7 @@ YbApplyAttr(YbAttrProcessorState* state, if (attp->attnum <= 0) return true; - Relation relation = state->relation; + Relation relation = processing->relation; if (attp->attnum > relation->rd_rel->relnatts) elog(ERROR, "invalid attribute number %d for %s", @@ -1545,18 +1736,18 @@ YbApplyAttr(YbAttrProcessorState* state, /* Update constraint/default info */ if (attp->attnotnull) - state->constr->has_not_null = true; + processing->constr->has_not_null = true; if (attp->atthasdef) { - if (state->attrdef == NULL) - state->attrdef = (AttrDefault*) MemoryContextAllocZero( + if (processing->attrdef == NULL) + processing->attrdef = (AttrDefault*) MemoryContextAllocZero( CacheMemoryContext, relation->rd_rel->relnatts * sizeof(AttrDefault)); - AttrDefault *attrdef = state->attrdef; - attrdef[state->ndef].adnum = attp->attnum; - attrdef[state->ndef].adbin = NULL; - ++state->ndef; + AttrDefault *attrdef = processing->attrdef; + attrdef[processing->ndef].adnum = attp->attnum; + attrdef[processing->ndef].adbin = NULL; + ++processing->ndef; } /* Likewise for a missing value */ @@ -1572,8 +1763,8 @@ YbApplyAttr(YbAttrProcessorState* state, if (!missingNull) { /* Yes, fetch from the array */ - if (state->attrmiss == NULL) - state->attrmiss = (AttrMissing *)MemoryContextAllocZero( + if (processing->attrmiss == NULL) + processing->attrmiss = (AttrMissing *)MemoryContextAllocZero( CacheMemoryContext, relation->rd_rel->relnatts * sizeof(AttrMissing)); bool is_null; int one = 1; @@ -1581,7 +1772,7 @@ YbApplyAttr(YbAttrProcessorState* state, missingval, 1, &one, -1, attp->attlen, attp->attbyval, attp->attalign, &is_null); Assert(!is_null); - AttrMissing *attrmiss = state->attrmiss; + AttrMissing *attrmiss = processing->attrmiss; if (attp->attbyval) { /* for copy by val just copy the datum direct */ @@ -1598,7 +1789,7 @@ YbApplyAttr(YbAttrProcessorState* state, attrmiss[attp->attnum - 1].am_present = true; } } - --state->need; + --processing->need; return true; } @@ -1609,17 +1800,18 @@ YbStartNewAttrProcessing(YbAttrProcessorState* state, HeapTuple htup) { Assert(!YbIsAttrProcessingStarted(state)); + YbRelationAttrsProcessingState *processing = &state->processing; Form_pg_attribute attp = (Form_pg_attribute) GETSTRUCT(htup); Assert(OidIsValid(attp->attrelid)); - state->relid = attp->attrelid; + processing->relid = attp->attrelid; Relation relation; - RelationIdCacheLookup(state->relid, relation); + RelationIdCacheLookup(processing->relid, relation); if (!relation || (!sys_rel_update_required && IsSystemRelation(relation))) return; - state->relation = relation; - state->need = state->relation->rd_rel->relnatts; - state->constr = (TupleConstr*) MemoryContextAllocZero( + processing->relation = relation; + processing->need = processing->relation->rd_rel->relnatts; + processing->constr = (TupleConstr*) MemoryContextAllocZero( CacheMemoryContext, sizeof(TupleConstr)); bool applied = YbApplyAttr(state, attrel, htup); Assert(applied); @@ -1627,16 +1819,18 @@ YbStartNewAttrProcessing(YbAttrProcessorState* state, } static void -YbCompleteAttrProcessingImpl(const YbAttrProcessorState* state) +YbCompleteAttrProcessingImpl(const YbAttrProcessorState *state) { - if (state->need != 0) - elog(ERROR, "catalog is missing %d attribute(s) for relid %u", state->need, state->relid); + const YbRelationAttrsProcessingState *processing = &state->processing; + if (processing->need != 0) + elog(ERROR, "catalog is missing %d attribute(s) for relid %u", + processing->need, processing->relid); - Relation relation = state->relation; - TupleConstr *constr = state->constr; - AttrDefault *attrdef = state->attrdef; - AttrMissing *attrmiss = state->attrmiss; - int ndef = state->ndef; + Relation relation = processing->relation; + TupleConstr *constr = processing->constr; + AttrDefault *attrdef = processing->attrdef; + AttrMissing *attrmiss = processing->attrmiss; + int ndef = processing->ndef; /* copy some fields from pg_class row to rd_att */ relation->rd_att->tdtypeid = relation->rd_rel->reltype; @@ -1668,7 +1862,7 @@ YbCompleteAttrProcessingImpl(const YbAttrProcessorState* state) else constr->defval = attrdef; constr->num_defval = ndef; - AttrDefaultFetch(relation, ndef); + YbAttrDefaultFetch(relation, state->pg_attrdef_cache); } else constr->num_defval = 0; @@ -1680,7 +1874,7 @@ YbCompleteAttrProcessingImpl(const YbAttrProcessorState* state) constr->num_check = relation->rd_rel->relchecks; constr->check = (ConstrCheck *) MemoryContextAllocZero( CacheMemoryContext, constr->num_check * sizeof(ConstrCheck)); - CheckConstraintFetch(relation); + YbCheckConstraintFetch(relation, state->pg_constraint_cache); } else constr->num_check = 0; @@ -1712,18 +1906,18 @@ YbCompleteAttrProcessingImpl(const YbAttrProcessorState* state) } static void -YbCompleteAttrProcessing(YbAttrProcessorState* state) +YbCompleteAttrProcessing(YbAttrProcessorState *state) { if (!YbIsAttrProcessingStarted(state)) return; if (YbIsAttrProcessingRequired(state)) YbCompleteAttrProcessingImpl(state); - *state = (struct YbAttrProcessorState){0}; + state->processing = (struct YbRelationAttrsProcessingState){0}; } static void -YBUpdateRelationsAttributes(bool sys_relations_update_required) +YBUpdateRelationsAttributes(const YbUpdateRelationCacheState *cache_update_state) { /* * Open pg_attribute and begin a scan. Force heap scan if we haven't yet @@ -1739,13 +1933,14 @@ YBUpdateRelationsAttributes(bool sys_relations_update_required) Relation attrel = table_open(AttributeRelationId, AccessShareLock); SysScanDesc scandesc = systable_beginscan( attrel, InvalidOid, false /* indexOk */, NULL, 0, NULL); + YbAttrProcessorState state = {0}; - MemoryContext per_tuple_memory_context = - (*YBCGetGFlags()->ysql_disable_per_tuple_memory_context_in_update_relattrs) ? - NULL : AllocSetContextCreate(GetCurrentMemoryContext(), - "PerTupleContext", ALLOCSET_DEFAULT_SIZES); - if (per_tuple_memory_context) - MemoryContextSwitchTo(per_tuple_memory_context); + state.pg_attrdef_cache = &cache_update_state->pg_attrdef_cache; + state.pg_constraint_cache = &cache_update_state->pg_constraint_cache; + + const bool sys_rel_update_required = + cache_update_state->sys_relations_update_required; + HeapTuple htup; while (HeapTupleIsValid(htup = systable_getnext(scandesc))) { @@ -1753,23 +1948,16 @@ YBUpdateRelationsAttributes(bool sys_relations_update_required) { YbCompleteAttrProcessing(&state); YbStartNewAttrProcessing( - &state, sys_relations_update_required, attrel, htup); + &state, sys_rel_update_required, attrel, htup); } - if (per_tuple_memory_context) - MemoryContextReset(per_tuple_memory_context); } YbCompleteAttrProcessing(&state); - if (per_tuple_memory_context) - { - MemoryContextSwitchTo(per_tuple_memory_context->parent); - MemoryContextDelete(per_tuple_memory_context); - } systable_endscan(scandesc); table_close(attrel, AccessShareLock); } static void -YBUpdateRelationsPartitioning(bool sys_relations_update_required) +YBUpdateRelationsPartitioning(const YbUpdateRelationCacheState *state) { Relation partrel = table_open(PartitionedRelationId, AccessShareLock); SysScanDesc scandesc = systable_beginscan( @@ -1784,8 +1972,9 @@ YBUpdateRelationsPartitioning(bool sys_relations_update_required) RelationIdCacheLookup(part_table_form->partrelid, relation); if (relation && - relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && - (sys_relations_update_required || !IsSystemRelation(relation))) + relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && + (state->sys_relations_update_required || + !IsSystemRelation(relation))) { /* Initialize key and partition descriptor info */ RelationBuildPartitionKey(relation); @@ -1797,7 +1986,8 @@ YBUpdateRelationsPartitioning(bool sys_relations_update_required) table_close(partrel, AccessShareLock); } -typedef struct YbIndexProcessorState { +typedef struct YbIndexProcessorState +{ Oid relid; Relation relation; List *result; @@ -1965,7 +2155,7 @@ YbStartNewIndexProcessing(YbIndexProcessorState *state, * As a result the complexity is O(N) instead of O(N * M). */ static void -YBUpdateRelationsIndicies(bool sys_relations_update_required) +YBUpdateRelationsIndicies(const YbUpdateRelationCacheState *cache_update_state) { Relation indrel = table_open(IndexRelationId, AccessShareLock); SysScanDesc indscan = systable_beginscan( @@ -1992,7 +2182,7 @@ YBUpdateRelationsIndicies(bool sys_relations_update_required) { YbCompleteIndexProcessing(&state); YbStartNewIndexProcessing( - &state, sys_relations_update_required, htup); + &state, cache_update_state->sys_relations_update_required, htup); } } YbCompleteIndexProcessing(&state); @@ -2333,8 +2523,96 @@ YbRunWithPrefetcher( } } +static Oid +YbExtractAttrDefTupleCacheKey(HeapTuple htup) +{ + return ((Form_pg_attrdef) GETSTRUCT(htup))->adrelid; +} + +static Oid +YbExtractConstraintTupleCacheKey(HeapTuple htup) +{ + return ((Form_pg_constraint) GETSTRUCT(htup))->conrelid; +} + +static void +YbInitUpdateRelationCacheState(YbUpdateRelationCacheState *state) +{ + YbLoadTupleCache(&state->pg_attrdef_cache, AttrDefaultRelationId, + &YbExtractAttrDefTupleCacheKey, "pg_attrdef local cache"); + YbLoadTupleCache(&state->pg_constraint_cache, ConstraintRelationId, + &YbExtractConstraintTupleCacheKey, + "pg_constraint local cache"); +} + static YBCStatus -YbPreloadRelCacheImpl(YbRunWithPrefetcherContext* ctx) +YbUpdateRelationCacheImpl(YbUpdateRelationCacheState *state, + YbRunWithPrefetcherContext *ctx) +{ + YBLoadRelations(state); + + YbTablePrefetcherState *prefetcher = &ctx->prefetcher; + + if (!ctx->is_using_response_cache) + { + /* + * In case of disabled respose cache preload other tables on demand. + * This is the optimization to prevent master node from being overloaded + * with lots of fat read requests (request which reads too much tables) + * in case there are lots of opened connections. + * Some of our tests has such setup. Reading all the tables in one + * request on a debug build under heavy load may spend up to 5-6 secs. + */ + if (state->has_relations_with_trigger) + YbRegisterTable(prefetcher, YB_PFETCH_TABLE_PG_TRIGGER); + + if (state->has_relations_with_row_security) + YbRegisterTable(prefetcher, YB_PFETCH_TABLE_PG_POLICY); + + if (state->has_partitioned_tables) + { + static const YbPFetchTable tables[] = { + YB_PFETCH_TABLE_PG_CAST, + YB_PFETCH_TABLE_PG_INHERITS, + YB_PFETCH_TABLE_PG_PROC}; + YbRegisterTables(prefetcher, tables, lengthof(tables)); + } + + YBCStatus status = YbPrefetch(prefetcher); + if (status) + return status; + } + + YBUpdateRelationsAttributes(state); + + YBUpdateRelationsPartitioning(state); + + YbFillCatCaches(prefetcher); + + YBUpdateRelationsIndicies(state); + return NULL; +} + +static YBCStatus +YbUpdateRelationCache(YbRunWithPrefetcherContext *ctx) +{ + MemoryContext own_mem_ctx = AllocSetContextCreate(GetCurrentMemoryContext(), + "UpdateRelationCacheContext", + ALLOCSET_DEFAULT_SIZES); + MemoryContext old_mem_ctx = MemoryContextSwitchTo(own_mem_ctx); + + YbUpdateRelationCacheState state = {0}; + YbInitUpdateRelationCacheState(&state); + YBCStatus status = YbUpdateRelationCacheImpl(&state, ctx); + YbCleanupUpdateRelationCacheState(&state); + + MemoryContextSwitchTo(old_mem_ctx); + MemoryContextDelete(own_mem_ctx); + return status; +} + +static YBCStatus +YbPreloadRelCacheImpl(YbRunWithPrefetcherContext *ctx) { /* * During relcache loading postgres reads the data from multiple sys tables. @@ -2356,8 +2634,7 @@ YbPreloadRelCacheImpl(YbRunWithPrefetcherContext* ctx) YB_PFETCH_TABLE_PG_REWRITE, YB_PFETCH_TABLE_PG_TYPE }; - YbTablePrefetcherState* prefetcher = &ctx->prefetcher; - bool is_using_response_cache = ctx->is_using_response_cache; + YbTablePrefetcherState *prefetcher = &ctx->prefetcher; YbTryRegisterCatalogVersionTableForPrefetching(); YbRegisterTables(prefetcher, core_tables, lengthof(core_tables)); @@ -2383,7 +2660,7 @@ YbPreloadRelCacheImpl(YbRunWithPrefetcherContext* ctx) YbRegisterTables(prefetcher, tables, lengthof(tables)); } - if (is_using_response_cache) + if (ctx->is_using_response_cache) { static const YbPFetchTable tables[] = { YB_PFETCH_TABLE_PG_CAST, @@ -2423,46 +2700,9 @@ YbPreloadRelCacheImpl(YbRunWithPrefetcherContext* ctx) YbFillCatCache(prefetcher, YB_PFETCH_TABLE_PG_ATTRIBUTE); YbFillCatCaches(prefetcher); - YBLoadRelationsResult relations_result = YBLoadRelations(); - - if (!is_using_response_cache) - { - /* - * In case of disabled respose cache preload other tables on demand. - * This is the optimization to prevent master node from being overloaded - * with lots of fat read requests (request which reads too much tables) - * in case there are lots of opened connections. - * Some of our tests has such setup. Reading all the tables in one - * request on a debug build under heavy load may spend up to 5-6 secs. - */ - if (relations_result.has_relations_with_trigger) - YbRegisterTable(prefetcher, YB_PFETCH_TABLE_PG_TRIGGER); - - if (relations_result.has_relations_with_row_security) - YbRegisterTable(prefetcher, YB_PFETCH_TABLE_PG_POLICY); - - if (relations_result.has_partitioned_tables) - { - static const YbPFetchTable tables[] = { - YB_PFETCH_TABLE_PG_CAST, - YB_PFETCH_TABLE_PG_INHERITS, - YB_PFETCH_TABLE_PG_PROC - }; - YbRegisterTables(prefetcher, tables, lengthof(tables)); - } - - status = YbPrefetch(prefetcher); - if (status) - return status; - } - - YBUpdateRelationsAttributes(relations_result.sys_relations_update_required); - YBUpdateRelationsPartitioning( - relations_result.sys_relations_update_required); - - YbFillCatCaches(prefetcher); - - YBUpdateRelationsIndicies(relations_result.sys_relations_update_required); + status = YbUpdateRelationCache(ctx); + if (status) + return status; /* * DB connection is not valid anymore in case: diff --git a/src/postgres/src/backend/utils/fmgr/fmgr.c b/src/postgres/src/backend/utils/fmgr/fmgr.c index 062943bbcd1b..2bc0802f31f6 100644 --- a/src/postgres/src/backend/utils/fmgr/fmgr.c +++ b/src/postgres/src/backend/utils/fmgr/fmgr.c @@ -21,6 +21,7 @@ #include "catalog/pg_type.h" #include "executor/functions.h" #include "lib/stringinfo.h" +#include "libpq/pqformat.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" @@ -33,6 +34,7 @@ #include "utils/syscache.h" #include "pg_yb_utils.h" +#include /* * Hooks for function calls @@ -68,6 +70,34 @@ static void record_C_func(HeapTuple procedureTuple, /* extern so it's callable via JIT */ extern Datum fmgr_security_definer(PG_FUNCTION_ARGS); +extern void int2send_direct(StringInfo buf, Datum value); +extern void int4send_direct(StringInfo buf, Datum value); +extern void int8send_direct(StringInfo buf, Datum value); + +typedef void (*SendDirectFn)(StringInfo, Datum); + +/* + * Initialize direct send function with specified oid with specified func. + */ +static void +fmgr_init_direct_send_func(Oid oid, SendDirectFn func) +{ + fmgr_builtins[fmgr_builtin_oid_index[oid]].alt_func = func; +} + +/* + * Initialize all direct send functions. + */ +#define PG_PROC_INT2SEND_OID 2405 +#define PG_PROC_INT4SEND_OID 2407 +#define PG_PROC_INT8SEND_OID 2409 +static void +fmgr_init_direct_send() +{ + fmgr_init_direct_send_func(PG_PROC_INT2SEND_OID, int2send_direct); + fmgr_init_direct_send_func(PG_PROC_INT4SEND_OID, int4send_direct); + fmgr_init_direct_send_func(PG_PROC_INT8SEND_OID, int8send_direct); +} /* * Lookup routines for builtin-function table. We can search by either Oid @@ -83,6 +113,9 @@ fmgr_isbuiltin(Oid id) if (id > fmgr_last_builtin_oid) return NULL; + static pthread_once_t initialized = PTHREAD_ONCE_INIT; + pthread_once(&initialized, &fmgr_init_direct_send); + /* * Lookup function data. If there's a miss in that range it's likely a * nonexistent function, returning NULL here will trigger an ERROR later. @@ -171,6 +204,7 @@ fmgr_info_cxt_security(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt, finfo->fn_extra = NULL; finfo->fn_mcxt = mcxt; finfo->fn_expr = NULL; /* caller may set this later */ + finfo->fn_alt = NULL; if ((fbp = fmgr_isbuiltin(functionId)) != NULL) { @@ -183,6 +217,7 @@ fmgr_info_cxt_security(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt, finfo->fn_stats = TRACK_FUNC_ALL; /* ie, never track */ finfo->fn_addr = fbp->func; finfo->fn_oid = functionId; + finfo->fn_alt = fbp->alt_func; return; } @@ -1634,6 +1669,29 @@ SendFunctionCall(FmgrInfo *flinfo, Datum val) return DatumGetByteaP(FunctionCall1(flinfo, val)); } +/* + * Call a previously-looked-up datatype binary-output function. + * + * Putting output to specified StringInfo buffer. + */ +void +StringInfoSendFunctionCall(StringInfo buf, FmgrInfo *flinfo, Datum val) +{ + void (*alt)(StringInfo, Datum) = flinfo->fn_alt; + if (alt) { + // There is function to send value directly to buf, w/o intermediate + // conversion to bytea. + alt(buf, val); + return; + } + + bytea *outputbytes = SendFunctionCall(flinfo, val); + uint32 size = VARSIZE(outputbytes) - VARHDRSZ; + pq_sendint32(buf, size); + pq_sendbytes(buf, VARDATA(outputbytes), size); + pfree(outputbytes); +} + /* * As above, for I/O functions identified by OID. These are only to be used * in seldom-executed code paths. They are not only slow but leak memory. diff --git a/src/postgres/src/backend/utils/misc/guc.c b/src/postgres/src/backend/utils/misc/guc.c index 1eb51e3d4f6d..05ddfdea086a 100644 --- a/src/postgres/src/backend/utils/misc/guc.c +++ b/src/postgres/src/backend/utils/misc/guc.c @@ -1240,6 +1240,17 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"yb_lock_pk_single_rpc", PGC_USERSET, QUERY_TUNING_OTHER, + gettext_noop("Use single RPC to select and lock when PK is specified."), + gettext_noop("If possible (no conflicting filters in the plan), use a single RPC to " + "select and lock, when a locking clause is provided, in isolation levels " + "REPEATABLE READ and READ COMMITTED.") + }, + &yb_lock_pk_single_rpc, + true, + NULL, NULL, NULL + }, { {"enable_partition_pruning", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables plan-time and execution-time partition pruning."), @@ -2350,6 +2361,17 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"yb_enable_pg_locks", PGC_SUSET, LOCK_MANAGEMENT, + gettext_noop("Enable the pg_locks view. This view provides information about the locks held by active postgres sessions."), + NULL, + GUC_NOT_IN_SAMPLE + }, + &yb_enable_pg_locks, + true, + NULL, NULL, NULL + }, + { {"ysql_upgrade_mode", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("Enter a special mode designed specifically for YSQL cluster upgrades. " @@ -2446,6 +2468,15 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"yb_enable_distinct_pushdown", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Push supported DISTINCT operations to DocDB."), + NULL + }, + &yb_enable_distinct_pushdown, + true, + NULL, NULL, NULL + }, { {"yb_enable_hash_batch_in", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("GUC variable that enables batching RPCs of generated for IN queries on hash " @@ -2551,6 +2582,17 @@ static struct config_bool ConfigureNamesBool[] = }, #endif + { + {"yb_is_client_ysqlconnmgr", PGC_SU_BACKEND, CUSTOM_OPTIONS, + gettext_noop("Identifies that connection is created by " + "Ysql Connection Manager."), + NULL + }, + &yb_is_client_ysqlconnmgr, + false, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL @@ -2682,6 +2724,27 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"yb_locks_min_txn_age", PGC_USERSET, LOCK_MANAGEMENT, + gettext_noop("Sets the minimum transaction age for results from pg_locks."), + NULL, + GUC_UNIT_MS + }, + &yb_locks_min_txn_age, + 1000, 0, INT_MAX, + NULL, NULL, NULL + }, + + { + {"yb_locks_max_transactions", PGC_USERSET, LOCK_MANAGEMENT, + gettext_noop("Sets the maximum number of transactions for which to return rows in pg_locks."), + NULL + }, + &yb_locks_max_transactions, + 16, 1, INT_MAX, + NULL, NULL, NULL + }, + { {"max_standby_archive_delay", PGC_SIGHUP, REPLICATION_STANDBY, gettext_noop("Sets the maximum delay before canceling queries when a hot standby server is processing archived WAL data."), @@ -5752,7 +5815,7 @@ static struct config_enum ConfigureNamesEnum[] = {"yb_pg_batch_detection_mechanism", PGC_SIGHUP, COMPAT_OPTIONS_CLIENT, gettext_noop("The drivers use message protocol to communicate " "with PG. The driver does not inform PG in advance " - "about a Batch execution. We need to identify a batch " + "about a Batch execution. We need to identify a batch " "because in that case the single-shard optimization " "should be disabled. Postgres drivers pipeline " "messages and we exploit this to peek the message " diff --git a/src/postgres/src/backend/utils/misc/pg_yb_utils.c b/src/postgres/src/backend/utils/misc/pg_yb_utils.c index 97d43d2e06f5..da3463d73fa8 100644 --- a/src/postgres/src/backend/utils/misc/pg_yb_utils.c +++ b/src/postgres/src/backend/utils/misc/pg_yb_utils.c @@ -84,6 +84,7 @@ #include "utils/rel.h" #include "utils/spccache.h" #include "utils/syscache.h" +#include "utils/uuid.h" #include "fmgr.h" #include "funcapi.h" #include "mb/pg_wchar.h" @@ -190,6 +191,12 @@ IsYugaByteEnabled() return YBCPgIsYugaByteEnabled(); } +bool +YbIsClientYsqlConnMgr() +{ + return IsYugaByteEnabled() && yb_is_client_ysqlconnmgr; +} + void CheckIsYBSupportedRelation(Relation relation) { @@ -272,13 +279,6 @@ bool IsYBSystemColumn(int attrNum) attrNum == YBUniqueIdxKeySuffixAttributeNumber); } -bool -YBNeedRetryAfterCacheRefresh(ErrorData *edata) -{ - // TODO Inspect error code to distinguish retryable errors. - return true; -} - AttrNumber YBGetFirstLowInvalidAttrNumber(bool is_yb_relation) { return is_yb_relation ? YBFirstLowInvalidAttributeNumber : FirstLowInvalidHeapAttributeNumber; @@ -451,10 +451,15 @@ IsYBReadCommitted() bool YBIsWaitQueueEnabled() { +#ifdef NDEBUG + static bool kEnableWaitQueues = false; +#else + static bool kEnableWaitQueues = true; +#endif static int cached_value = -1; if (cached_value == -1) { - cached_value = YBCIsEnvVarTrueWithDefault("FLAGS_enable_wait_queues", false); + cached_value = YBCIsEnvVarTrueWithDefault("FLAGS_enable_wait_queues", kEnableWaitQueues); } return IsYugaByteEnabled() && cached_value; } @@ -657,6 +662,9 @@ YBInitPostgresBackend( callbacks.GetCurrentYbMemctx = &GetCurrentYbMemctx; callbacks.GetDebugQueryString = &GetDebugQueryString; callbacks.WriteExecOutParam = &YbWriteExecOutParam; + callbacks.UnixEpochToPostgresEpoch = &YbUnixEpochToPostgresEpoch; + callbacks.PostgresEpochToUnixEpoch= &YbPostgresEpochToUnixEpoch; + callbacks.ConstructTextArrayDatum = &YbConstructTextArrayDatum; YBCInitPgGate(type_table, count, callbacks); YBCInstallTxnDdlHook(); @@ -1179,6 +1187,7 @@ PowerWithUpperLimit(double base, int exp, double upper_limit) bool yb_enable_create_with_table_oid = false; int yb_index_state_flags_update_delay = 1000; bool yb_enable_expression_pushdown = true; +bool yb_enable_distinct_pushdown = true; bool yb_enable_optimizer_statistics = false; bool yb_bypass_cond_recheck = true; bool yb_make_next_ddl_statement_nonbreaking = false; @@ -1579,6 +1588,34 @@ bool IsTransactionalDdlStatement(PlannedStmt *pstmt, * order to correctly invalidate negative cache entries */ *is_breaking_catalog_change = false; + if (node_tag == T_CreateRoleStmt) { + /* + * If a create role statement does not reference another existing + * role there is no need to increment catalog version. + */ + CreateRoleStmt *stmt = castNode(CreateRoleStmt, parsetree); + int nopts = list_length(stmt->options); + if (nopts == 0) + *is_catalog_version_increment = false; + else + { + bool reference_other_role = false; + ListCell *lc; + foreach(lc, stmt->options) + { + DefElem *def = (DefElem *) lfirst(lc); + if (strcmp(def->defname, "rolemembers") == 0 || + strcmp(def->defname, "adminmembers") == 0 || + strcmp(def->defname, "addroleto") == 0) + { + reference_other_role = true; + break; + } + } + if (!reference_other_role) + *is_catalog_version_increment = false; + } + } break; } case T_CreateStmt: @@ -1691,7 +1728,6 @@ bool IsTransactionalDdlStatement(PlannedStmt *pstmt, case T_AlterPolicyStmt: case T_AlterPublicationStmt: case T_AlterRoleSetStmt: - case T_AlterRoleStmt: case T_AlterSeqStmt: case T_AlterSubscriptionStmt: case T_AlterSystemStmt: @@ -1707,6 +1743,28 @@ bool IsTransactionalDdlStatement(PlannedStmt *pstmt, case T_RenameStmt: break; + case T_AlterRoleStmt: + { + /* + * If this is a simple alter role change password statement, + * there is no need to increment catalog version. Password + * is only used for authentication at connection setup time. + * A new password does not affect existing connections that + * were authenticated using the old password. + */ + AlterRoleStmt *stmt = castNode(AlterRoleStmt, parsetree); + if (list_length(stmt->options) == 1) + { + DefElem *def = (DefElem *) linitial(stmt->options); + if (strcmp(def->defname, "password") == 0) + { + *is_breaking_catalog_change = false; + *is_catalog_version_increment = false; + } + } + break; + } + case T_AlterTableStmt: { *is_breaking_catalog_change = false; @@ -2656,6 +2714,26 @@ yb_get_effective_transaction_isolation_level(PG_FUNCTION_ARGS) PG_RETURN_CSTRING(yb_fetch_effective_transaction_isolation_level()); } +Datum +yb_cancel_transaction(PG_FUNCTION_ARGS) +{ + if (!IsYbDbAdminUser(GetUserId())) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to cancel transaction"))); + + pg_uuid_t *id = PG_GETARG_UUID_P(0); + YBCStatus status = YBCPgCancelTransaction(id->data); + if (status) + { + ereport(NOTICE, + (errmsg("failed to cancel transaction"), + errdetail("%s", YBCMessageAsCString(status)))); + PG_RETURN_BOOL(false); + } + PG_RETURN_BOOL(true); +} + /* * This PG function takes one optional bool input argument (legacy). * If the input argument is not specified or its value is false, this function @@ -3418,19 +3496,34 @@ uint64_t YbGetSharedCatalogVersion() return version; } -void YBUpdateRowLockPolicyForSerializable( - int *effectiveWaitPolicy, LockWaitPolicy userLockWaitPolicy) +void YBSetRowLockPolicy(int *docdb_wait_policy, LockWaitPolicy pg_wait_policy) { - /* - * TODO(concurrency-control): We don't honour SKIP LOCKED/ NO WAIT yet in serializable isolation - * level. - */ - if (userLockWaitPolicy == LockWaitSkip || userLockWaitPolicy == LockWaitError) - elog(WARNING, "%s clause is not supported yet for SERIALIZABLE isolation (GH issue #11761)", - userLockWaitPolicy == LockWaitSkip ? "SKIP LOCKED" : "NO WAIT"); + if (XactIsoLevel == XACT_REPEATABLE_READ && pg_wait_policy == LockWaitError) + { + /* The user requested NOWAIT, which isn't allowed in RR. */ + elog(WARNING, "Setting wait policy to NOWAIT which is not allowed in " + "REPEATABLE READ isolation (GH issue #12166)"); + } - *effectiveWaitPolicy = LockWaitBlock; - if (!YBIsWaitQueueEnabled()) + if (IsolationIsSerializable()) + { + /* + * TODO(concurrency-control): We don't honour SKIP LOCKED/ NO WAIT yet in serializable + * isolation level. + */ + if (pg_wait_policy == LockWaitSkip || pg_wait_policy == LockWaitError) + elog(WARNING, "%s clause is not supported yet for SERIALIZABLE isolation " + "(GH issue #11761)", + pg_wait_policy == LockWaitSkip ? "SKIP LOCKED" : "NO WAIT"); + + *docdb_wait_policy = LockWaitBlock; + } + else + { + *docdb_wait_policy = pg_wait_policy; + } + + if (*docdb_wait_policy == LockWaitBlock && !YBIsWaitQueueEnabled()) { /* * If wait-queues are not enabled, we default to the "Fail-on-Conflict" policy which is @@ -3438,7 +3531,7 @@ void YBUpdateRowLockPolicyForSerializable( * "Fail-on-Conflict" and the reason why LockWaitError is not mapped to no-wait * semantics but to Fail-on-Conflict semantics). */ - *effectiveWaitPolicy = LockWaitError; + *docdb_wait_policy = LockWaitError; } } @@ -3457,6 +3550,8 @@ bool YbIsBatchedExecution() { return yb_is_batched_execution; } +bool yb_is_client_ysqlconnmgr = false; + void YbSetIsBatchedExecution(bool value) { yb_is_batched_execution = value; } diff --git a/src/postgres/src/backend/utils/misc/postgresql.conf.sample b/src/postgres/src/backend/utils/misc/postgresql.conf.sample index d3a27ef288dd..7f611a18621f 100644 --- a/src/postgres/src/backend/utils/misc/postgresql.conf.sample +++ b/src/postgres/src/backend/utils/misc/postgresql.conf.sample @@ -391,6 +391,7 @@ #yb_enable_geolocation_costing = on #yb_enable_expression_pushdown = off #yb_enable_sequence_pushdown = on +#yb_enable_distinct_pushdown = on # - Planner Cost Constants - diff --git a/src/postgres/src/bin/psql/common.c b/src/postgres/src/bin/psql/common.c index 6b4841fe34d0..12e38977bbd2 100644 --- a/src/postgres/src/bin/psql/common.c +++ b/src/postgres/src/bin/psql/common.c @@ -220,6 +220,14 @@ psql_get_variable(const char *varname, PsqlScanQuoteType quote, } +/* + * YB_TODO(jasonk) + * - Postgres no longer uses psql_error. + * - Need to reintro "YB_DISABLE_ERROR_PREFIX" to Pg15 code. + * + * psql_error(const char *fmt,...) is gone. + */ + /* * for backend Notice messages (INFO, WARNING, etc) */ diff --git a/src/postgres/src/include/access/attmap.h b/src/postgres/src/include/access/attmap.h index 3ae40cade757..6d842a90d64e 100644 --- a/src/postgres/src/include/access/attmap.h +++ b/src/postgres/src/include/access/attmap.h @@ -42,7 +42,8 @@ extern void free_attrmap(AttrMap *map); /* Conversion routines to build mappings */ extern AttrMap *build_attrmap_by_name(TupleDesc indesc, - TupleDesc outdesc); + TupleDesc outdesc, + bool yb_ignore_type_mismatch); extern AttrMap *build_attrmap_by_name_if_req(TupleDesc indesc, TupleDesc outdesc); extern AttrMap *build_attrmap_by_position(TupleDesc indesc, diff --git a/src/postgres/src/include/access/relscan.h b/src/postgres/src/include/access/relscan.h index a3bdcb3902ab..bb63a856b62a 100644 --- a/src/postgres/src/include/access/relscan.h +++ b/src/postgres/src/include/access/relscan.h @@ -207,6 +207,8 @@ typedef struct IndexScanDescData Scan *yb_scan_plan; PushdownExprs *yb_rel_pushdown; PushdownExprs *yb_idx_pushdown; + List *yb_aggrefs; /* aggregate information for aggregate pushdown */ + TupleTableSlot *yb_agg_slot; /* scan slot used by aggregate pushdown */ /* * Result from Yugabyte. diff --git a/src/postgres/src/include/access/yb_scan.h b/src/postgres/src/include/access/yb_scan.h index 75c41c261fe4..fd3cae83f8a4 100644 --- a/src/postgres/src/include/access/yb_scan.h +++ b/src/postgres/src/include/access/yb_scan.h @@ -174,12 +174,16 @@ extern void ybc_heap_endscan(TableScanDesc scanDesc); extern TableScanDesc ybc_remote_beginscan(Relation relation, Snapshot snapshot, Scan *pg_scan_plan, - PushdownExprs *pushdown); + PushdownExprs *pushdown, + List *aggrefs, + YBCPgExecParameters *exec_params); /* Add targets to the given statement. */ extern void YbDmlAppendTargetSystem(AttrNumber attnum, YBCPgStatement handle); extern void YbDmlAppendTargetRegular(TupleDesc tupdesc, AttrNumber attnum, YBCPgStatement handle); +extern void YbDmlAppendTargetsAggregate(List *aggrefs, TupleDesc tupdesc, + Relation index, YBCPgStatement handle); extern void YbDmlAppendTargets(List *colrefs, YBCPgStatement handle); /* Add quals to the given statement. */ extern void YbDmlAppendQuals(List *quals, bool is_primary, @@ -200,7 +204,9 @@ extern YbScanDesc ybcBeginScan(Relation relation, ScanKey key, Scan *pg_scan_plan, PushdownExprs *rel_pushdown, - PushdownExprs *idx_pushdown); + PushdownExprs *idx_pushdown, + List *aggrefs, + YBCPgExecParameters *exec_params); HeapTuple ybc_getnext_heaptuple(YbScanDesc ybScan, bool is_forward_scan, bool *recheck); IndexTuple ybc_getnext_indextuple(YbScanDesc ybScan, bool is_forward_scan, bool *recheck); diff --git a/src/postgres/src/include/catalog/catalog.h b/src/postgres/src/include/catalog/catalog.h index 2b001f7791df..3516502a73bc 100644 --- a/src/postgres/src/include/catalog/catalog.h +++ b/src/postgres/src/include/catalog/catalog.h @@ -29,7 +29,7 @@ * If you increment it, make sure you didn't forget to add a new SQL migration * (see pg_yb_migration.dat and src/yb/yql/pgwrapper/ysql_migrations/README.md) */ -#define YB_LAST_USED_OID 8060 +#define YB_LAST_USED_OID 8063 extern bool IsSystemRelation(Relation relation); extern bool IsToastRelation(Relation relation); diff --git a/src/postgres/src/include/catalog/pg_proc.dat b/src/postgres/src/include/catalog/pg_proc.dat index 94994264ff9b..ea0eef469688 100644 --- a/src/postgres/src/include/catalog/pg_proc.dat +++ b/src/postgres/src/include/catalog/pg_proc.dat @@ -6173,6 +6173,13 @@ proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', proargnames => '{locktype,database,relation,page,tuple,virtualxid,transactionid,classid,objid,objsubid,virtualtransaction,pid,mode,granted,fastpath,waitstart}', prosrc => 'pg_lock_status' }, +{ oid => '8062', descr => 'view yugabyte lock information', + proname => 'yb_lock_status', prorows => '1000', proretset => 't', proisstrict => 'f', provolatile => 'v', + prorettype => 'record', proargtypes => 'oid uuid', + proallargtypes => '{oid,uuid,text,oid,oid,int4,_text,bool,bool,timestamptz,timestamptz,text,text,uuid,int4,text,bool,_text,_text,int2,int4,bool,_uuid}', + proargmodes => '{i,i,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{relation,transaction_id,locktype,database,relation,pid,mode,granted,fastpath,waitstart,waitend,node,tablet_id,transaction_id,subtransaction_id,status_tablet_id,is_explicit,hash_cols,range_cols,attnum,column_id,multiple_rows_locked,blocked_by}', + prosrc => 'yb_lock_status'}, { oid => '2561', descr => 'get array of PIDs of sessions blocking specified backend PID from acquiring a heavyweight lock', proname => 'pg_blocking_pids', provolatile => 'v', prorettype => '_int4', @@ -7708,6 +7715,9 @@ { oid => '2404', descr => 'I/O', proname => 'int2recv', prorettype => 'int2', proargtypes => 'internal', prosrc => 'int2recv' }, +# YB_TODO(sergei@yugabyte) Review changes for "PG_PROC_INT2SEND_OID" +# - PG13 doesn't allow having "oid_symbol" for proc. +# - Please define your symbol elsewhere. { oid => '2405', descr => 'I/O', proname => 'int2send', prorettype => 'bytea', proargtypes => 'int2', prosrc => 'int2send' }, @@ -8135,6 +8145,11 @@ prorettype => 'void', proargtypes => 'bool', proargnames => '{per_database_mode}', prosrc => 'insert into pg_catalog.pg_yb_catalog_version select oid, 1, 1 from pg_catalog.pg_database where per_database_mode and (oid not in (select db_oid from pg_catalog.pg_yb_catalog_version)); delete from pg_catalog.pg_yb_catalog_version where (not per_database_mode and db_oid != 1) or (per_database_mode and db_oid not in (select oid from pg_catalog.pg_database))' }, +{ oid => '8061', descr => 'returns percentile given jsonb histogram and percentile integer', + proname => 'yb_get_percentile', prokind => 'f', + prorettype => 'float8', proargtypes => 'jsonb float8', proargnames => '{hist,percentile}', + prosrc => 'yb_get_percentile' }, + # non-persistent series generator { oid => '1066', descr => 'non-persistent series generator', proname => 'generate_series', prorows => '1000', @@ -11990,4 +12005,10 @@ prorettype => 'cstring', proargtypes => '', prosrc => 'yb_get_effective_transaction_isolation_level' }, +{ oid => '8063', + descr => 'Cancel/abort the transaction with specified transaction id', + proname => 'yb_cancel_transaction', provolatile => 'v', prorettype => 'bool', + proargtypes => 'uuid', proargnames => '{txid}', + prosrc => 'yb_cancel_transaction' }, + ] diff --git a/src/postgres/src/include/catalog/pg_yb_migration.dat b/src/postgres/src/include/catalog/pg_yb_migration.dat index 3def647f7587..ba157fb61efc 100644 --- a/src/postgres/src/include/catalog/pg_yb_migration.dat +++ b/src/postgres/src/include/catalog/pg_yb_migration.dat @@ -12,7 +12,7 @@ [ # For better version control conflict detection, list latest migration filename -# here: V38__14445__alter_pg_stat_statements.sql -{ major => '38', minor => '0', name => '', time_applied => '_null_' } +# here: V42__16918__pg_locks_view.sql +{ major => '42', minor => '0', name => '', time_applied => '_null_' } ] diff --git a/src/postgres/src/include/catalog/yb_type.h b/src/postgres/src/include/catalog/yb_type.h index b6080e00d95d..9132875f3df1 100644 --- a/src/postgres/src/include/catalog/yb_type.h +++ b/src/postgres/src/include/catalog/yb_type.h @@ -62,4 +62,11 @@ bool YbDataTypeIsValidForKey(Oid type_id); */ void YbGetTypeTable(const YBCPgTypeEntity **type_table, int *count); +/* + * Callback functions + */ +int64_t YbPostgresEpochToUnixEpoch(int64_t postgres_t); +int64_t YbUnixEpochToPostgresEpoch(int64_t unix_t); +void YbConstructTextArrayDatum(const char **strings, const int nelems, + char **datum, size_t *len); #endif diff --git a/src/postgres/src/include/commands/ybccmds.h b/src/postgres/src/include/commands/ybccmds.h index 182e76f33549..edadcc12cf17 100644 --- a/src/postgres/src/include/commands/ybccmds.h +++ b/src/postgres/src/include/commands/ybccmds.h @@ -36,7 +36,8 @@ /* Database Functions -------------------------------------------------------------------------- */ extern void YBCCreateDatabase( - Oid dboid, const char *dbname, Oid src_dboid, Oid next_oid, bool colocated); + Oid dboid, const char *dbname, Oid src_dboid, Oid next_oid, bool colocated, + bool *retry_on_oid_collision); extern void YBCDropDatabase(Oid dboid, const char *dbname); diff --git a/src/postgres/src/include/executor/ybcFunction.h b/src/postgres/src/include/executor/ybcFunction.h new file mode 100644 index 000000000000..340a3f65f15f --- /dev/null +++ b/src/postgres/src/include/executor/ybcFunction.h @@ -0,0 +1,41 @@ +/*-------------------------------------------------------------------------------------------------- + * ybcFunction.h + * prototypes for ybcFunction.c + * + * Copyright (c) YugaByte, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations + * under the License. + * + * src/include/executor/ybcFunction.h + * + */ + +#pragma once + +#include "postgres.h" +#include "funcapi.h" + +#include "yb/yql/pggate/ybc_pg_typedefs.h" +#include "yb/yql/pggate/ybc_pggate.h" + +typedef struct YbPgFuncCallContextData +{ + YBCPgFunction handle; + MemoryContext per_call_ctx; +} YbPgFuncCallContextData; + +typedef struct YbPgFuncCallContextData *YbFuncCallContext; +extern YbFuncCallContext YbNewFuncCallContext(FuncCallContext *funcCallContext); +extern void YbSetFunctionParam(YBCPgFunction handle, const char *name, + int attr_typid, uint64_t datum, bool is_null); +extern void YbSetSRFTargets(YbFuncCallContext context, TupleDesc desc); +extern bool YbSRFGetNext(YbFuncCallContext context, uint64_t *values, + bool *is_nulls); diff --git a/src/postgres/src/include/fmgr.h b/src/postgres/src/include/fmgr.h index b75821fe8d5e..ccbb538dd48e 100644 --- a/src/postgres/src/include/fmgr.h +++ b/src/postgres/src/include/fmgr.h @@ -64,6 +64,8 @@ typedef struct FmgrInfo void *fn_extra; /* extra space for use by handler */ MemoryContext fn_mcxt; /* memory context to store fn_extra in */ fmNodePtr fn_expr; /* expression parse tree for call, or NULL */ + void *fn_alt; /* alternative function implementation for + * special cases */ } FmgrInfo; /* @@ -703,6 +705,8 @@ extern Datum ReceiveFunctionCall(FmgrInfo *flinfo, fmStringInfo buf, extern Datum OidReceiveFunctionCall(Oid functionId, fmStringInfo buf, Oid typioparam, int32 typmod); extern bytea *SendFunctionCall(FmgrInfo *flinfo, Datum val); +extern void StringInfoSendFunctionCall(fmStringInfo buf, FmgrInfo *flinfo, + Datum val); extern bytea *OidSendFunctionCall(Oid functionId, Datum val); diff --git a/src/postgres/src/include/libpq/libpq-be.h b/src/postgres/src/include/libpq/libpq-be.h index 1994bdb8997c..84e01dd15cca 100644 --- a/src/postgres/src/include/libpq/libpq-be.h +++ b/src/postgres/src/include/libpq/libpq-be.h @@ -157,6 +157,7 @@ typedef struct Port * Information that needs to be held during the authentication cycle. */ HbaLine *hba; + /* * Whether yb-tserver to postgres authentication is used. This information * needs to be saved separately from hba because hba gets deallocated after @@ -164,6 +165,12 @@ typedef struct Port */ bool yb_is_tserver_auth_method; + /* + * To be used during Authentication, identifies whether authentication is + * invoked due to Auth Passthrough Request packet. + */ + bool yb_is_auth_passthrough_req; + /* * Authenticated identity. The meaning of this identifier is dependent on * hba->auth_method; it is the identity (if any) that the user presented diff --git a/src/postgres/src/include/nodes/execnodes.h b/src/postgres/src/include/nodes/execnodes.h index 2f24474bdc0c..364ee49cbc46 100644 --- a/src/postgres/src/include/nodes/execnodes.h +++ b/src/postgres/src/include/nodes/execnodes.h @@ -1543,6 +1543,7 @@ typedef struct YbSeqScanState { ScanState ss; /* its first field is NodeTag */ // TODO handle; /* size of parallel heap scan descriptor */ + List *aggrefs; /* aggregate pushdown information */ } YbSeqScanState; /* ---------------- @@ -1658,6 +1659,9 @@ typedef struct IndexScanState * TableSlot slot for holding tuples fetched from the table * VMBuffer buffer in use for visibility map testing, if any * PscanLen size of parallel index-only scan descriptor + * + * YB specific attributes + * aggrefs aggregate pushdown information * ---------------- */ typedef struct IndexOnlyScanState @@ -1677,6 +1681,9 @@ typedef struct IndexOnlyScanState TupleTableSlot *ioss_TableSlot; Buffer ioss_VMBuffer; Size ioss_PscanLen; + + /* YB specific attributes. */ + List *yb_ioss_aggrefs; /* * yb_indexqual_for_recheck is the modified version of indexqual. * It is used in tuple recheck step only. @@ -2024,7 +2031,7 @@ typedef struct ForeignScanState void *fdw_state; /* foreign-data wrapper can keep state here */ /* YB specific attributes. */ - List *yb_fdw_aggs; /* aggregate pushdown information */ + List *yb_fdw_aggrefs; /* aggregate pushdown information */ } ForeignScanState; /* ---------------- diff --git a/src/postgres/src/include/nodes/nodeFuncs.h b/src/postgres/src/include/nodes/nodeFuncs.h index 32a7d838f194..614cdca338ad 100644 --- a/src/postgres/src/include/nodes/nodeFuncs.h +++ b/src/postgres/src/include/nodes/nodeFuncs.h @@ -160,4 +160,7 @@ struct PlanState; extern bool planstate_tree_walker(struct PlanState *planstate, bool (*walker) (), void *context); +/* YB additions. */ +extern List **YbPlanStateTryGetAggrefs(struct PlanState *planstate); + #endif /* NODEFUNCS_H */ diff --git a/src/postgres/src/include/nodes/pathnodes.h b/src/postgres/src/include/nodes/pathnodes.h index c6895992e75c..e5b6592ccefb 100644 --- a/src/postgres/src/include/nodes/pathnodes.h +++ b/src/postgres/src/include/nodes/pathnodes.h @@ -1179,6 +1179,29 @@ typedef struct ParamPathInfo } ParamPathInfo; +/* + * Indicates what kind of locking happens during execution. For locking in + * SERIALIZABLE isolation level, the mode is propagated throughout relevant + * paths. YB_LOCK_CLAUSE_ON_PK is to lock during SELECT in some locking clause + * cases, avoiding a second RPC. + */ +typedef enum YbLockMechanism { + YB_NO_SCAN_LOCK, /* no locks taken in this scan */ + YB_RANGE_LOCK_ON_SCAN, /* range locks will be taken for SERIALIZABLE */ + YB_LOCK_CLAUSE_ON_PK, /* may take locks on PK for locking clause */ +} YbLockMechanism; + +/* + * Info propagated for YugabyteDB. + * + * 'yb_lock_mechanism' indicates what kind of lock can or must be taken as part + * of a scan. + */ +typedef struct YbPathInfo { + YbLockMechanism yb_lock_mechanism; /* what lock as part of a scan */ +} YbPathInfo; + + /* * Type "Path" is used as-is for sequential-scan paths, as well as some other * simple plan types that we don't need any extra information in the path for. @@ -1207,6 +1230,8 @@ typedef struct ParamPathInfo * * "pathkeys" is a List of PathKey nodes (see above), describing the sort * ordering of the path's output rows. + * + * 'yb_path_info' contains info propagated for YugabyteDB. */ typedef struct Path { @@ -1230,6 +1255,8 @@ typedef struct Path List *pathkeys; /* sort ordering of path's output */ /* pathkeys is a List of PathKey nodes; see above */ + + YbPathInfo yb_path_info; /* fields used for YugabyteDB */ } Path; /* Macro for extracting a path's parameterization relids; beware double eval */ diff --git a/src/postgres/src/include/nodes/plannodes.h b/src/postgres/src/include/nodes/plannodes.h index 5c3ac4e26b66..b5de2512268f 100644 --- a/src/postgres/src/include/nodes/plannodes.h +++ b/src/postgres/src/include/nodes/plannodes.h @@ -14,12 +14,14 @@ #ifndef PLANNODES_H #define PLANNODES_H +#include "access/relation.h" #include "access/sdir.h" #include "access/stratnum.h" #include "lib/stringinfo.h" #include "nodes/bitmapset.h" #include "nodes/lockoptions.h" #include "nodes/parsenodes.h" +#include "nodes/pathnodes.h" #include "nodes/primnodes.h" @@ -357,8 +359,9 @@ typedef struct BitmapOr */ typedef struct Scan { - Plan plan; - Index scanrelid; /* relid is index into the range table */ + Plan plan; + Index scanrelid; /* relid is index into the range table */ + YbLockMechanism yb_lock_mechanism; /* locks taken as part of the scan */ } Scan; /* ---------------- diff --git a/src/postgres/src/include/optimizer/planmain.h b/src/postgres/src/include/optimizer/planmain.h index c4f61c1a09c0..c0a1e589095f 100644 --- a/src/postgres/src/include/optimizer/planmain.h +++ b/src/postgres/src/include/optimizer/planmain.h @@ -42,7 +42,7 @@ extern Plan *create_plan(PlannerInfo *root, Path *best_path); extern ForeignScan *make_foreignscan(List *qptlist, List *qpqual, Index scanrelid, List *fdw_exprs, List *fdw_private, List *fdw_scan_tlist, List *fdw_recheck_quals, - Plan *outer_plan); + Plan *outer_plan, YbPathInfo yb_path_info); extern Plan *change_plan_targetlist(Plan *subplan, List *tlist, bool tlist_parallel_safe); extern Plan *materialize_finished_plan(Plan *subplan); diff --git a/src/postgres/src/include/pg_yb_utils.h b/src/postgres/src/include/pg_yb_utils.h index a36e30a5e04c..8191857ab06a 100644 --- a/src/postgres/src/include/pg_yb_utils.h +++ b/src/postgres/src/include/pg_yb_utils.h @@ -104,6 +104,11 @@ extern GeolocationDistance get_tablespace_distance (Oid tablespaceoid); */ extern bool IsYugaByteEnabled(); +/* + * Check whether the connection is made from Ysql Connection Manager. + */ +extern bool YbIsClientYsqlConnMgr(); + extern bool yb_enable_docdb_tracing; extern bool yb_read_from_followers; extern int32_t yb_follower_read_staleness_ms; @@ -178,8 +183,6 @@ extern bool IsRealYBColumn(Relation rel, int attrNum); */ extern bool IsYBSystemColumn(int attrNum); -extern bool YBNeedRetryAfterCacheRefresh(ErrorData *edata); - extern void YBReportFeatureUnsupported(const char *err_msg); extern AttrNumber YBGetFirstLowInvalidAttrNumber(bool is_yb_relation); @@ -438,6 +441,12 @@ extern int yb_index_state_flags_update_delay; */ extern bool yb_enable_expression_pushdown; +/* + * Enables distinct pushdown. + * If true, send supported DISTINCT operations to DocDB + */ +extern bool yb_enable_distinct_pushdown; + /* * YSQL guc variable that is used to enable the use of Postgres's selectivity * functions and YSQL table statistics. @@ -762,24 +771,32 @@ uint64_t YbGetSharedCatalogVersion(); uint32_t YbGetNumberOfDatabases(); /* - * This function helps map the user intended row-level lock policy i.e., "userLockWaitPolicy" of - * type enum LockWaitPolicy to the "effectiveWaitPolicy" of type enum WaitPolicy as defined in + * This function maps the user intended row-level lock policy i.e., "pg_wait_policy" of + * type enum LockWaitPolicy to the "docdb_wait_policy" of type enum WaitPolicy as defined in * common.proto. * - * The semantics of the WaitPolicy enum differs slightly from the traditional LockWaitPolicy in - * Postgres as explained in common.proto. This is due to historical reasons. WaitPolicy in + * The semantics of the WaitPolicy enum differ slightly from those of the traditional LockWaitPolicy + * in Postgres, as explained in common.proto. This is for historical reasons. WaitPolicy in * common.proto was created as a copy of LockWaitPolicy to be passed to the Tserver to help in * appropriate conflict-resolution steps for the different row-level lock policies. * - * This function does the following: - * 1. Log a warning for a userLockWaitPolicy of LockWaitSkip and LockWaitError because SKIP LOCKED - * and NO WAIT are not supported yet. - * 2. Set effectiveWaitPolicy to either WAIT_BLOCK if wait queues are enabled. Else, set it to - * WAIT_ERROR (which actually uses the "Fail on Conflict" conflict management policy instead - * of "no wait" semantics as explained in "enum WaitPolicy" in common.proto). + * In isolation level SERIALIZABLE, this function sets docdb_wait_policy to WAIT_BLOCK as + * this is the only policy currently supported for SERIALIZABLE. + * + * However, if wait queues aren't enabled in the following cases: + * * Isolation level SERIALIZABLE + * * The user requested LockWaitBlock in another isolation level + * this function sets docdb_wait_policy to WAIT_ERROR (which actually uses the "Fail on Conflict" + * conflict management policy instead of "no wait" semantics, as explained in "enum WaitPolicy" in + * common.proto). + * + * Logs a warning: + * 1. In isolation level SERIALIZABLE for a pg_wait_policy of LockWaitSkip and LockWaitError + * because SKIP LOCKED and NOWAIT are not supported yet. + * 2. In isolation level REPEATABLE READ for a pg_wait_policy of LockWaitError because NOWAIT + * is not supported. */ -void YBUpdateRowLockPolicyForSerializable( - int *effectiveWaitPolicy, LockWaitPolicy userLockWaitPolicy); +void YBSetRowLockPolicy(int *docdb_wait_policy, LockWaitPolicy pg_wait_policy); const char* yb_fetch_current_transaction_priority(void); @@ -887,4 +904,6 @@ void YbSetIsBatchedExecution(bool value); } while (0) #endif +extern bool yb_is_client_ysqlconnmgr; + #endif /* PG_YB_UTILS_H */ diff --git a/src/postgres/src/include/utils/fmgrtab.h b/src/postgres/src/include/utils/fmgrtab.h index 0a59937656fd..cc3376f95594 100644 --- a/src/postgres/src/include/utils/fmgrtab.h +++ b/src/postgres/src/include/utils/fmgrtab.h @@ -30,9 +30,11 @@ typedef struct bool retset; /* T if function returns a set */ const char *funcName; /* C name of the function */ PGFunction func; /* pointer to compiled function */ + void* alt_func; /* alternative function implementation for + * special cases */ } FmgrBuiltin; -extern PGDLLIMPORT const FmgrBuiltin fmgr_builtins[]; +extern PGDLLIMPORT FmgrBuiltin fmgr_builtins[]; extern PGDLLIMPORT const int fmgr_nbuiltins; /* number of entries in table */ diff --git a/src/postgres/src/include/utils/guc.h b/src/postgres/src/include/utils/guc.h index c4c9d3c80c2c..a213fa855b7a 100644 --- a/src/postgres/src/include/utils/guc.h +++ b/src/postgres/src/include/utils/guc.h @@ -299,6 +299,7 @@ extern PGDLLIMPORT bool trace_sort; extern PGDLLIMPORT bool yb_enable_memory_tracking; extern PGDLLIMPORT int yb_bnl_batch_size; extern PGDLLIMPORT bool yb_bnl_enable_hashing; +extern PGDLLIMPORT bool yb_lock_pk_single_rpc; /* * Functions exported by guc.c diff --git a/src/postgres/src/test/isolation/expected/yb-lock-status-waiters.out b/src/postgres/src/test/isolation/expected/yb-lock-status-waiters.out new file mode 100644 index 000000000000..99ba0cd16961 --- /dev/null +++ b/src/postgres/src/test/isolation/expected/yb-lock-status-waiters.out @@ -0,0 +1,23 @@ +Parsed test spec with 2 sessions + +starting permutation: s1_insert s2_begin s2_insert s1_lock_status s1_commit +step s1_insert: INSERT INTO foo VALUES (2,2); +step s2_begin: BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; +step s2_insert: INSERT INTO foo VALUES (2,2); +step s1_lock_status: + SELECT + locktype, relation::regclass, mode, granted, fastpath, is_explicit, + hash_cols, range_cols, attnum, column_id, multiple_rows_locked + FROM yb_lock_status(null,null) + ORDER BY + relation::regclass::text, granted, hash_cols NULLS FIRST, range_cols NULLS FIRST; + +locktype relation mode granted fastpath is_explicit hash_cols range_cols attnum column_id multiple_rows_locked + +relation foo {WEAK_READ,WEAK_WRITE}f f f t +row foo {STRONG_READ,STRONG_WRITE}f f f {2} f +relation foo {WEAK_READ,WEAK_WRITE}t f f t +row foo {STRONG_READ,STRONG_WRITE}t f f {2} f +step s1_commit: COMMIT; +step s2_insert: <... completed> +error in steps s1_commit s2_insert: ERROR: duplicate key value violates unique constraint "foo_pkey" diff --git a/src/postgres/src/test/isolation/expected/yb-lock-status-waiters_1.out b/src/postgres/src/test/isolation/expected/yb-lock-status-waiters_1.out new file mode 100644 index 000000000000..ab3543f3bf54 --- /dev/null +++ b/src/postgres/src/test/isolation/expected/yb-lock-status-waiters_1.out @@ -0,0 +1,25 @@ +Parsed test spec with 2 sessions + +starting permutation: s1_insert s2_begin s2_insert s1_lock_status s1_commit +step s1_insert: INSERT INTO foo VALUES (2,2); +step s2_begin: BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; +step s2_insert: INSERT INTO foo VALUES (2,2); +step s1_lock_status: + SELECT + locktype, relation::regclass, mode, granted, fastpath, is_explicit, + hash_cols, range_cols, attnum, column_id, multiple_rows_locked + FROM yb_lock_status(null,null) + ORDER BY + relation::regclass::text, granted, hash_cols NULLS FIRST, range_cols NULLS FIRST; + +locktype relation mode granted fastpath is_explicit hash_cols range_cols attnum column_id multiple_rows_locked + +relation foo {WEAK_READ,WEAK_WRITE}f f f t +row foo {STRONG_READ,STRONG_WRITE}f f f {2} f +relation foo {WEAK_READ,WEAK_WRITE}t f f t +row foo {WEAK_READ,WEAK_WRITE}t f f {2} f +column foo {STRONG_READ,STRONG_WRITE}t f f {2} 0 f +column foo {STRONG_READ,STRONG_WRITE}t f f {2} 2 11 f +step s1_commit: COMMIT; +step s2_insert: <... completed> +error in steps s1_commit s2_insert: ERROR: duplicate key value violates unique constraint "foo_pkey" diff --git a/src/postgres/src/test/isolation/expected/yb-lock-status.out b/src/postgres/src/test/isolation/expected/yb-lock-status.out new file mode 100644 index 000000000000..87bd6bf55584 --- /dev/null +++ b/src/postgres/src/test/isolation/expected/yb-lock-status.out @@ -0,0 +1,83 @@ +Parsed test spec with 3 sessions + +starting permutation: totalrows +step totalrows: SELECT COUNT(*) FROM yb_lock_status(null, null); +count + +0 + +starting permutation: s1lock wait2s totalrows totaltxns +step s1lock: SELECT * FROM lock_status_test WHERE k = 1 FOR UPDATE; +k + +1 +step wait2s: SELECT pg_sleep(2); +pg_sleep + + +step totalrows: SELECT COUNT(*) FROM yb_lock_status(null, null); +count + +2 +step totaltxns: SELECT COUNT(DISTINCT transaction_id) from yb_lock_status(null, null); +count + +1 + +starting permutation: s1lock s2lock wait2s totalrows totaltxns +step s1lock: SELECT * FROM lock_status_test WHERE k = 1 FOR UPDATE; +k + +1 +step s2lock: SELECT * FROM lock_status_test WHERE k = 2 FOR UPDATE; +k + +2 +step wait2s: SELECT pg_sleep(2); +pg_sleep + + +step totalrows: SELECT COUNT(*) FROM yb_lock_status(null, null); +count + +4 +step totaltxns: SELECT COUNT(DISTINCT transaction_id) from yb_lock_status(null, null); +count + +2 + +starting permutation: 6secold s1lock s2lock wait2s totalrows totaltxns wait5s totalrows totaltxns +step 6secold: SET yb_locks_min_txn_age TO 6000; +step s1lock: SELECT * FROM lock_status_test WHERE k = 1 FOR UPDATE; +k + +1 +step s2lock: SELECT * FROM lock_status_test WHERE k = 2 FOR UPDATE; +k + +2 +step wait2s: SELECT pg_sleep(2); +pg_sleep + + +step totalrows: SELECT COUNT(*) FROM yb_lock_status(null, null); +count + +0 +step totaltxns: SELECT COUNT(DISTINCT transaction_id) from yb_lock_status(null, null); +count + +0 +step wait5s: SELECT pg_sleep(5); +step wait5s: <... completed> +pg_sleep + + +step totalrows: SELECT COUNT(*) FROM yb_lock_status(null, null); +count + +4 +step totaltxns: SELECT COUNT(DISTINCT transaction_id) from yb_lock_status(null, null); +count + +2 diff --git a/src/postgres/src/test/isolation/specs/yb-lock-status-waiters.spec b/src/postgres/src/test/isolation/specs/yb-lock-status-waiters.spec new file mode 100644 index 000000000000..bd0d7eda821d --- /dev/null +++ b/src/postgres/src/test/isolation/specs/yb-lock-status-waiters.spec @@ -0,0 +1,41 @@ +setup +{ + DROP TABLE IF EXISTS foo; + CREATE TABLE foo ( + k int PRIMARY KEY, + v int NOT NULL + ); + + INSERT INTO foo VALUES (1,1); +} + +teardown +{ + DROP TABLE foo; +} + +session "s1" +setup { BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; } +step "s1_insert" { INSERT INTO foo VALUES (2,2); } +step "s1_select_for_update" { SELECT * FROM foo FOR UPDATE; } +step "s1_lock_status" { + SELECT + locktype, relation::regclass, mode, granted, fastpath, is_explicit, + hash_cols, range_cols, attnum, column_id, multiple_rows_locked + FROM yb_lock_status(null,null) + ORDER BY + relation::regclass::text, granted, hash_cols NULLS FIRST, range_cols NULLS FIRST; +} +step "s1_commit" { COMMIT; } + +session "s2" +step "s2_begin" { BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; } +step "s2_insert" { INSERT INTO foo VALUES (2,2); } +step "s2_update" { UPDATE foo SET v=10 WHERE k=1; } + +permutation "s1_insert" "s2_begin" "s2_insert" "s1_lock_status" "s1_commit" +# TODO: uncomment the below permutation once #18195 is resolved +# permutation "s1_insert" "s2_insert" "s1_lock_status" "s1_commit" +# +# TODO: uncomment this as soon as issue #18149 is resolved +# permutation "s1_select_for_update" "s2_update" "s1_lock_status" "s1_commit" diff --git a/src/postgres/src/test/isolation/specs/yb-lock-status.spec b/src/postgres/src/test/isolation/specs/yb-lock-status.spec new file mode 100644 index 000000000000..ffcc6c7d0f12 --- /dev/null +++ b/src/postgres/src/test/isolation/specs/yb-lock-status.spec @@ -0,0 +1,61 @@ +setup +{ + CREATE TABLE lock_status_test ( + k int PRIMARY KEY + ); + INSERT INTO lock_status_test VALUES (1), (2); +} + +teardown +{ + DROP TABLE lock_status_test; +} + + +session "s1" +setup { BEGIN; } +step "s1lock" { SELECT * FROM lock_status_test WHERE k = 1 FOR UPDATE;} +teardown { COMMIT; } + + +session "s2" +setup { BEGIN; } +step "s2lock" { SELECT * FROM lock_status_test WHERE k = 2 FOR UPDATE;} +teardown { COMMIT; } + + +session "status" +step "totalrows" { SELECT COUNT(*) FROM yb_lock_status(null, null); } +step "totaltxns" { SELECT COUNT(DISTINCT transaction_id) from yb_lock_status(null, null); } +step "0secold" { SET yb_locks_min_txn_age TO 0; } +step "6secold" { SET yb_locks_min_txn_age TO 6000; } +step "max1" { SET yb_locks_max_transactions TO 1; } +step "wait2s" { SELECT pg_sleep(2); } +step "wait5s" { SELECT pg_sleep(5); } +teardown +{ + SET yb_locks_max_transactions TO 16; + SET yb_locks_min_txn_age TO 1000; +} + + +# (1) There should be no rows at all at the baseline. +permutation "totalrows" + +# (2) Taking 1 lock, and waiting 2s >> default min age of 1s: Should see +# 2 total rows and 1 transaction. +permutation "s1lock" "wait2s" "totalrows" "totaltxns" + +# (3) Now taking 2 locks and waiting 2s: Should see 4 total rows and 2 +# transactions. +permutation "s1lock" "s2lock" "wait2s" "totalrows" "totaltxns" + +# (4) Take 2 locks. Set the limit to 1 transaction, and min txn age to 0. +# Wait 5s and should get back 2 total rows and 1 transaction. +# TODO: Uncomment when this test is reliable. +# permutation "max1" "0secold" "s1lock" "s2lock" "wait5s" "totalrows" "totaltxns" + +# (5) Take 2 locks. Here, set the min_age to 6s. Make sure nothing is +# returned after 2s. But then after 5s more, we should see 4 total and 2 +# transactions. +permutation "6secold" "s1lock" "s2lock" "wait2s" "totalrows" "totaltxns" "wait5s" "totalrows" "totaltxns" diff --git a/src/postgres/src/test/isolation/yb_pg_isolation_schedule b/src/postgres/src/test/isolation/yb_pg_isolation_schedule index 68ade6cbc37a..c95bc72720cc 100644 --- a/src/postgres/src/test/isolation/yb_pg_isolation_schedule +++ b/src/postgres/src/test/isolation/yb_pg_isolation_schedule @@ -14,6 +14,9 @@ test: yb-modification-followed-by-lock test: yb-skip-locked-after-update test: yb-skip-locked-single-shard-transaction +# TODO: Re-enable after memory leak fixed +#test: yb-lock-status + # TODO: # 1. Test to ensure that new sub txns inherit ybDataSentForCurrQuery and parent txns # inherit it back from nested sub txns once they are removed. diff --git a/src/postgres/src/test/isolation/yb_wait_queues_schedule b/src/postgres/src/test/isolation/yb_wait_queues_schedule index cacd0db468ad..de331874245d 100644 --- a/src/postgres/src/test/isolation/yb_wait_queues_schedule +++ b/src/postgres/src/test/isolation/yb_wait_queues_schedule @@ -3,3 +3,4 @@ test: yb-wait-queues-single-shard-waiters-same-tablet test: lock-update-traversal test: yb-wait-queues-weak-read-unlocks test: yb-wait-queues-serializable-reads +test: yb-lock-status-waiters diff --git a/src/postgres/src/test/regress/expected/yb_aggregates.out b/src/postgres/src/test/regress/expected/yb_aggregates.out index eb6e12af104a..dc3b3bfc765c 100644 --- a/src/postgres/src/test/regress/expected/yb_aggregates.out +++ b/src/postgres/src/test/regress/expected/yb_aggregates.out @@ -10,209 +10,1213 @@ CREATE TABLE ybaggtest ( float_4 float4, float_8 float8 ); +CREATE INDEX NONCONCURRENTLY ybaggtestindex ON ybaggtest ( + (int_8, int_2) HASH, + float_4 DESC, + int_4 ASC +) INCLUDE (float_8); -- Insert maximum integer values multiple times to force overflow on SUM (both in DocDB and PG). INSERT INTO ybaggtest VALUES (1, 32767, 2147483647, 9223372036854775807, 1.1, 2.2); INSERT INTO ybaggtest SELECT series, t.int_2, t.int_4, t.int_8, t.float_4, t.float_8 FROM ybaggtest as t CROSS JOIN generate_series(2, 100) as series; -- Verify COUNT(...) returns proper value. +\set explain 'EXPLAIN (COSTS OFF)' +\set ss '/*+SeqScan(ybaggtest)*/' +\set ios '/*+IndexOnlyScan(ybaggtest ybaggtestindex)*/' +\set query 'SELECT COUNT(*) FROM ybaggtest' +\set run ':explain :query; :explain :ss :query; :explain :ios :query; :query; :ss :query; :ios :query' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------------------------------- + Finalize Aggregate + -> Index Only Scan using ybaggtestindex on ybaggtest + Partial Aggregate: true +(3 rows) + + count +------- + 100 +(1 row) + + count +------- + 100 +(1 row) + + count +------- + 100 +(1 row) + +\set query 'SELECT COUNT(0) FROM ybaggtest' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------------------------------- + Finalize Aggregate + -> Index Only Scan using ybaggtestindex on ybaggtest + Partial Aggregate: true +(3 rows) + + count +------- + 100 +(1 row) + + count +------- + 100 +(1 row) + + count +------- + 100 +(1 row) + +\set query 'SELECT COUNT(NULL) FROM ybaggtest' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------------------------------- + Finalize Aggregate + -> Index Only Scan using ybaggtestindex on ybaggtest + Partial Aggregate: true +(3 rows) + + count +------- + 0 +(1 row) + + count +------- + 0 +(1 row) + + count +------- + 0 +(1 row) + +-- Delete row, verify COUNT(...) returns proper value. +DELETE FROM ybaggtest WHERE id = 100; +SELECT COUNT(*) FROM ybaggtest; + count +------- + 99 +(1 row) + +/*+IndexOnlyScan(ybaggtest ybaggtestindex)*/ SELECT COUNT(*) FROM ybaggtest; count ------- - 100 + 99 +(1 row) + +SELECT COUNT(0) FROM ybaggtest; + count +------- + 99 +(1 row) + +/*+IndexOnlyScan(ybaggtest ybaggtestindex)*/ +SELECT COUNT(0) FROM ybaggtest; + count +------- + 99 +(1 row) + +-- Verify selecting different aggs for same column works. +\set query 'SELECT SUM(int_4), MAX(int_4), MIN(int_4), SUM(int_2), MAX(int_2), MIN(int_2) FROM ybaggtest' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------------------------------- + Finalize Aggregate + -> Index Only Scan using ybaggtestindex on ybaggtest + Partial Aggregate: true +(3 rows) + + sum | max | min | sum | max | min +--------------+------------+------------+---------+-------+------- + 212600881053 | 2147483647 | 2147483647 | 3243933 | 32767 | 32767 +(1 row) + + sum | max | min | sum | max | min +--------------+------------+------------+---------+-------+------- + 212600881053 | 2147483647 | 2147483647 | 3243933 | 32767 | 32767 +(1 row) + + sum | max | min | sum | max | min +--------------+------------+------------+---------+-------+------- + 212600881053 | 2147483647 | 2147483647 | 3243933 | 32767 | 32767 +(1 row) + +-- Verify SUMs are correct for all fields and do not overflow. +\set query 'SELECT SUM(int_2), SUM(int_4), SUM(int_8), SUM(float_4), SUM(float_8) FROM ybaggtest' +:run; + QUERY PLAN +----------------------------- + Aggregate + -> Seq Scan on ybaggtest +(2 rows) + + QUERY PLAN +-------------------------------- + Aggregate + -> YB Seq Scan on ybaggtest +(2 rows) + + QUERY PLAN +--------------------------------------------------------- + Aggregate + -> Index Only Scan using ybaggtestindex on ybaggtest +(2 rows) + + sum | sum | sum | sum | sum +---------+--------------+-----------------------+-------+------- + 3243933 | 212600881053 | 913113831648622804893 | 108.9 | 217.8 +(1 row) + + sum | sum | sum | sum | sum +---------+--------------+-----------------------+-------+------- + 3243933 | 212600881053 | 913113831648622804893 | 108.9 | 217.8 +(1 row) + + sum | sum | sum | sum | sum +---------+--------------+-----------------------+-------+------- + 3243933 | 212600881053 | 913113831648622804893 | 108.9 | 217.8 +(1 row) + +-- ...and do the same query excluding the int_8 column to test agg pushdown. +-- TODO(#16289): remove this. +\set query 'SELECT SUM(int_2), SUM(int_4), SUM(float_4), SUM(float_8) FROM ybaggtest' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------------------------------- + Finalize Aggregate + -> Index Only Scan using ybaggtestindex on ybaggtest + Partial Aggregate: true +(3 rows) + + sum | sum | sum | sum +---------+--------------+-------+------- + 3243933 | 212600881053 | 108.9 | 217.8 +(1 row) + + sum | sum | sum | sum +---------+--------------+-------+------- + 3243933 | 212600881053 | 108.9 | 217.8 +(1 row) + + sum | sum | sum | sum +---------+--------------+-------+------- + 3243933 | 212600881053 | 108.9 | 217.8 +(1 row) + +-- Verify shared aggregates work as expected. +\set query 'SELECT SUM(int_4), SUM(int_4) + 1 FROM ybaggtest' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------------------------------- + Finalize Aggregate + -> Index Only Scan using ybaggtestindex on ybaggtest + Partial Aggregate: true +(3 rows) + + sum | ?column? +--------------+-------------- + 212600881053 | 212600881054 +(1 row) + + sum | ?column? +--------------+-------------- + 212600881053 | 212600881054 +(1 row) + + sum | ?column? +--------------+-------------- + 212600881053 | 212600881054 +(1 row) + +-- Verify NaN float values are respected by aggregates. +INSERT INTO ybaggtest (id, float_4, float_8) VALUES (101, 'NaN', 'NaN'); +\set query 'SELECT COUNT(float_4), SUM(float_4), MAX(float_4), MIN(float_4) FROM ybaggtest' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------------------------------- + Finalize Aggregate + -> Index Only Scan using ybaggtestindex on ybaggtest + Partial Aggregate: true +(3 rows) + + count | sum | max | min +-------+-----+-----+----- + 100 | NaN | NaN | 1.1 +(1 row) + + count | sum | max | min +-------+-----+-----+----- + 100 | NaN | NaN | 1.1 +(1 row) + + count | sum | max | min +-------+-----+-----+----- + 100 | NaN | NaN | 1.1 +(1 row) + +\set query 'SELECT COUNT(float_8), SUM(float_8), MAX(float_8), MIN(float_8) FROM ybaggtest' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------------------------------- + Finalize Aggregate + -> Index Only Scan using ybaggtestindex on ybaggtest + Partial Aggregate: true +(3 rows) + + count | sum | max | min +-------+-----+-----+----- + 100 | NaN | NaN | 2.2 +(1 row) + + count | sum | max | min +-------+-----+-----+----- + 100 | NaN | NaN | 2.2 +(1 row) + + count | sum | max | min +-------+-----+-----+----- + 100 | NaN | NaN | 2.2 +(1 row) + +-- Negative tests - pushdown not supported +EXPLAIN (COSTS OFF) SELECT int_2, COUNT(*), SUM(int_4) FROM ybaggtest GROUP BY int_2; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: int_2 + -> Seq Scan on ybaggtest +(3 rows) + +EXPLAIN (COSTS OFF) SELECT DISTINCT int_4 FROM ybaggtest; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: int_4 + -> Seq Scan on ybaggtest +(3 rows) + +EXPLAIN (COSTS OFF) SELECT COUNT(distinct int_4), SUM(int_4) FROM ybaggtest; + QUERY PLAN +----------------------------- + Aggregate + -> Seq Scan on ybaggtest +(2 rows) + +-- +-- Test NULL rows are handled properly by COUNT. +-- +-- Create table without primary key. +CREATE TABLE ybaggtest2 ( + a int +); +-- Create index where column a is not part of the key. +CREATE INDEX NONCONCURRENTLY ybaggtest2index ON ybaggtest2 ((1)) INCLUDE (a); +-- Insert NULL rows. +INSERT INTO ybaggtest2 VALUES (NULL), (NULL), (NULL); +-- Insert regular rows. +INSERT INTO ybaggtest2 VALUES (1), (2), (3); +-- Verify NULL rows are included in COUNT(*) but not in COUNT(row). +\set ss '/*+SeqScan(ybaggtest2)*/' +\set ios '/*+IndexOnlyScan(ybaggtest2 ybaggtest2index)*/' +\set query 'SELECT COUNT(*) FROM ybaggtest2' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------------------- + Finalize Aggregate + -> Index Only Scan using ybaggtest2index on ybaggtest2 + Partial Aggregate: true +(3 rows) + + count +------- + 6 +(1 row) + + count +------- + 6 +(1 row) + + count +------- + 6 +(1 row) + +-- TODO(#16417): update the following three index only scan explains to have +-- "Partial Aggregate: true" because pushdown will be allowed once the index's +-- constant 1 column is not requested by the aggregate node to the index only +-- scan node when using CP_SMALL_TLIST. +\set query 'SELECT COUNT(a) FROM ybaggtest2' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------------------- + Aggregate + -> Index Only Scan using ybaggtest2index on ybaggtest2 +(2 rows) + + count +------- + 3 +(1 row) + + count +------- + 3 +(1 row) + + count +------- + 3 +(1 row) + +\set query 'SELECT COUNT(*), COUNT(a) FROM ybaggtest2' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------------------- + Aggregate + -> Index Only Scan using ybaggtest2index on ybaggtest2 +(2 rows) + + count | count +-------+------- + 6 | 3 +(1 row) + + count | count +-------+------- + 6 | 3 +(1 row) + + count | count +-------+------- + 6 | 3 +(1 row) + +-- Verify MAX/MIN respect NULL values. +\set query 'SELECT MAX(a), MIN(a) FROM ybaggtest2' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------------------- + Aggregate + -> Index Only Scan using ybaggtest2index on ybaggtest2 +(2 rows) + + max | min +-----+----- + 3 | 1 +(1 row) + + max | min +-----+----- + 3 | 1 +(1 row) + + max | min +-----+----- + 3 | 1 +(1 row) + +-- Verify SUM/MAX/MIN work as expected with constant arguments. +\set query 'SELECT SUM(2), MAX(2), MIN(2) FROM ybaggtest2' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------------------- + Finalize Aggregate + -> Index Only Scan using ybaggtest2index on ybaggtest2 + Partial Aggregate: true +(3 rows) + + sum | max | min +-----+-----+----- + 12 | 2 | 2 +(1 row) + + sum | max | min +-----+-----+----- + 12 | 2 | 2 +(1 row) + + sum | max | min +-----+-----+----- + 12 | 2 | 2 +(1 row) + +\set query 'SELECT SUM(NULL::int), MAX(NULL), MIN(NULL) FROM ybaggtest2' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on ybaggtest2 + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------------------- + Finalize Aggregate + -> Index Only Scan using ybaggtest2index on ybaggtest2 + Partial Aggregate: true +(3 rows) + + sum | max | min +-----+-----+----- + | | +(1 row) + + sum | max | min +-----+-----+----- + | | +(1 row) + + sum | max | min +-----+-----+----- + | | +(1 row) + +-- +-- Test column created with default value. +-- +CREATE TABLE digit(k INT PRIMARY KEY, v TEXT NOT NULL); +INSERT INTO digit VALUES(1, 'one'), (2, 'two'), (3, 'three'), (4, 'four'), (5, 'five'), (6, 'six'); +CREATE TABLE test(k INT PRIMARY KEY); +ALTER TABLE test ADD v1 int DEFAULT 5; +ALTER TABLE test ADD v2 int DEFAULT 10; +CREATE INDEX NONCONCURRENTLY testindex ON test (k) INCLUDE (v1, v2); +INSERT INTO test VALUES(1), (2), (3); +\set ss '/*+SeqScan(test)*/' +\set ios '/*+IndexOnlyScan(test testindex)*/' +\set query 'SELECT COUNT(*) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) + + count +------- + 3 +(1 row) + + count +------- + 3 +(1 row) + + count +------- + 3 +(1 row) + +\set query 'SELECT COUNT(k) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) + + count +------- + 3 +(1 row) + + count +------- + 3 +(1 row) + + count +------- + 3 +(1 row) + +\set query 'SELECT COUNT(v1) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) + + count +------- + 3 (1 row) -SELECT COUNT(0) FROM ybaggtest; count ------- - 100 + 3 (1 row) -SELECT COUNT(NULL) FROM ybaggtest; count ------- - 0 + 3 (1 row) --- Delete row, verify COUNT(...) returns proper value. -DELETE FROM ybaggtest WHERE id = 100; -SELECT COUNT(*) FROM ybaggtest; +\set query 'SELECT COUNT(v2) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) + count ------- - 99 + 3 (1 row) -SELECT COUNT(0) FROM ybaggtest; count ------- - 99 + 3 (1 row) --- Verify selecting different aggs for same column works. -SELECT SUM(int_4), MAX(int_4), MIN(int_4), SUM(int_2), MAX(int_2), MIN(int_2) FROM ybaggtest; - sum | max | min | sum | max | min ---------------+------------+------------+---------+-------+------- - 212600881053 | 2147483647 | 2147483647 | 3243933 | 32767 | 32767 + count +------- + 3 (1 row) --- Verify SUMs are correct for all fields and do not overflow. -SELECT SUM(int_2), SUM(int_4), SUM(int_8), SUM(float_4), SUM(float_8) FROM ybaggtest; - sum | sum | sum | sum | sum ----------+--------------+-----------------------+-------+------- - 3243933 | 212600881053 | 913113831648622804893 | 108.9 | 217.8 -(1 row) +\set query 'SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v2) AS count FROM test) AS c ON (d.k = c.count)' +:run; + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.v2))) +(6 rows) --- Verify shared aggregates work as expected. -SELECT SUM(int_4), SUM(int_4) + 1 FROM ybaggtest; - sum | ?column? ---------------+-------------- - 212600881053 | 212600881054 + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.v2))) +(6 rows) + + QUERY PLAN +----------------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.v2))) +(6 rows) + + k | v | count +---+-------+------- + 3 | three | 3 (1 row) --- Verify NaN float values are respected by aggregates. -INSERT INTO ybaggtest (id, float_4, float_8) VALUES (101, 'NaN', 'NaN'); -SELECT COUNT(float_4), SUM(float_4), MAX(float_4), MIN(float_4) FROM ybaggtest; - count | sum | max | min --------+-----+-----+----- - 100 | NaN | NaN | 1.1 + k | v | count +---+-------+------- + 3 | three | 3 (1 row) -SELECT COUNT(float_8), SUM(float_8), MAX(float_8), MIN(float_8) FROM ybaggtest; - count | sum | max | min --------+-----+-----+----- - 100 | NaN | NaN | 2.2 + k | v | count +---+-------+------- + 3 | three | 3 (1 row) --- --- Test NULL rows are handled properly by COUNT. --- --- Create table without primary key. -CREATE TABLE ybaggtest2 ( - a int -); --- Insert NULL rows. -INSERT INTO ybaggtest2 VALUES (NULL), (NULL), (NULL); --- Insert regular rows. -INSERT INTO ybaggtest2 VALUES (1), (2), (3); --- Verify NULL rows are included in COUNT(*) but not in COUNT(row). -SELECT COUNT(*) FROM ybaggtest2; +INSERT INTO test VALUES(4, NULL, 10), (5, 5, NULL), (6, 5, NULL); +\set query 'SELECT COUNT(*) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) + count ------- 6 (1 row) -SELECT COUNT(a) FROM ybaggtest2; count ------- - 3 + 6 (1 row) -SELECT COUNT(*), COUNT(a) FROM ybaggtest2; - count | count --------+------- - 6 | 3 + count +------- + 6 (1 row) --- Verify MAX/MIN respect NULL values. -SELECT MAX(a), MIN(a) FROM ybaggtest2; - max | min ------+----- - 3 | 1 -(1 row) +\set query 'SELECT COUNT(k) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) --- Verify SUM/MAX/MIN work as expected with constant arguments. -SELECT SUM(2), MAX(2), MIN(2) FROM ybaggtest2; - sum | max | min ------+-----+----- - 12 | 2 | 2 -(1 row) + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) -SELECT SUM(NULL::int), MAX(NULL), MIN(NULL) FROM ybaggtest2; - sum | max | min ------+-----+----- - | | -(1 row) + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) -CREATE TABLE digit(k INT PRIMARY KEY, v TEXT NOT NULL); -INSERT INTO digit VALUES(1, 'one'), (2, 'two'), (3, 'three'), (4, 'four'), (5, 'five'), (6, 'six'); -CREATE TABLE test(k INT PRIMARY KEY); -ALTER TABLE test ADD v1 int DEFAULT 5; -ALTER TABLE test ADD v2 int DEFAULT 10; -INSERT INTO test VALUES(1), (2), (3); -SELECT COUNT(*) FROM test; count ------- - 3 + 6 (1 row) -SELECT COUNT(k) FROM test; count ------- - 3 + 6 (1 row) -SELECT COUNT(v1) FROM test; count ------- - 3 + 6 (1 row) -SELECT COUNT(v2) FROM test; +\set query 'SELECT COUNT(v1) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) + count ------- - 3 + 5 (1 row) -SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v2) AS count FROM test) AS c ON (d.k = c.count); - k | v | count ----+-------+------- - 3 | three | 3 + count +------- + 5 (1 row) -INSERT INTO test VALUES(4, NULL, 10), (5, 5, NULL), (6, 5, NULL); -SELECT COUNT(*) FROM test; count ------- - 6 + 5 (1 row) -SELECT COUNT(k) FROM test; +\set query 'SELECT COUNT(v2) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) + count ------- - 6 + 4 (1 row) -SELECT COUNT(v1) FROM test; count ------- - 5 + 4 (1 row) -SELECT COUNT(v2) FROM test; count ------- 4 (1 row) -SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(*) AS count FROM test) AS c ON (d.k = c.count); +\set query 'SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(*) AS count FROM test) AS c ON (d.k = c.count)' +:run; + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(*))) +(6 rows) + + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(*))) +(6 rows) + + QUERY PLAN +----------------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(*))) +(6 rows) + + k | v | count +---+-----+------- + 6 | six | 6 +(1 row) + + k | v | count +---+-----+------- + 6 | six | 6 +(1 row) + + k | v | count +---+-----+------- + 6 | six | 6 +(1 row) + +\set query 'SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(k) AS count FROM test) AS c ON (d.k = c.count)' +:run; + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.k))) +(6 rows) + + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.k))) +(6 rows) + + QUERY PLAN +----------------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.k))) +(6 rows) + + k | v | count +---+-----+------- + 6 | six | 6 +(1 row) + + k | v | count +---+-----+------- + 6 | six | 6 +(1 row) + k | v | count ---+-----+------- 6 | six | 6 (1 row) -SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(k) AS count FROM test) AS c ON (d.k = c.count); - k | v | count ----+-----+------- - 6 | six | 6 +\set query 'SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v1) AS count FROM test) AS c ON (d.k = c.count)' +:run; + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.v1))) +(6 rows) + + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.v1))) +(6 rows) + + QUERY PLAN +----------------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.v1))) +(6 rows) + + k | v | count +---+------+------- + 5 | five | 5 +(1 row) + + k | v | count +---+------+------- + 5 | five | 5 +(1 row) + + k | v | count +---+------+------- + 5 | five | 5 +(1 row) + +\set query 'SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v2) AS count FROM test) AS c ON (d.k = c.count)' +:run; + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.v2))) +(6 rows) + + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.v2))) +(6 rows) + + QUERY PLAN +----------------------------------------------------- + Nested Loop + -> Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true + -> Index Scan using digit_pkey on digit d + Index Cond: (k = (count(test.v2))) +(6 rows) + + k | v | count +---+------+------- + 4 | four | 4 (1 row) -SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v1) AS count FROM test) AS c ON (d.k = c.count); k | v | count ---+------+------- - 5 | five | 5 + 4 | four | 4 (1 row) -SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v2) AS count FROM test) AS c ON (d.k = c.count); k | v | count ---+------+------- 4 | four | 4 @@ -220,40 +1224,207 @@ SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v2) AS count FROM test) AS c O DROP TABLE test; DROP TABLE digit; +-- +-- Test dropped column. +-- CREATE TABLE test(K INT PRIMARY KEY, v1 INT NOT NULL, v2 INT NOT NULL); +CREATE INDEX NONCONCURRENTLY testindex ON test (K) INCLUDE (v2); INSERT INTO test VALUES(1, 1, 1), (2, 2, 2), (3, 3, 3); AlTER TABLE test DROP v1; -SELECT MIN(v2) FROM test; +\set query 'SELECT MIN(v2) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) + + min +----- + 1 +(1 row) + min ----- 1 (1 row) -SELECT MAX(v2) FROM test; + min +----- + 1 +(1 row) + +\set query 'SELECT MAX(v2) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) + + max +----- + 3 +(1 row) + + max +----- + 3 +(1 row) + max ----- 3 (1 row) -SELECT SUM(v2) FROM test; +\set query 'SELECT SUM(v2) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) + + sum +----- + 6 +(1 row) + + sum +----- + 6 +(1 row) + sum ----- 6 (1 row) -SELECT COUNT(v2) FROM test; +\set query 'SELECT COUNT(v2) FROM test' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on test + Partial Aggregate: true +(3 rows) + + QUERY PLAN +----------------------------------------------- + Finalize Aggregate + -> Index Only Scan using testindex on test + Partial Aggregate: true +(3 rows) + + count +------- + 3 +(1 row) + + count +------- + 3 +(1 row) + count ------- 3 (1 row) --- For https://github.com/YugaByte/yugabyte-db/issues/10085 +-- +-- Test https://github.com/yugabyte/yugabyte-db/issues/10085: avoid pushdown +-- for certain cases. +-- -- Original test case that had postgres FATAL: CREATE TABLE t1(c0 DECIMAL ); +CREATE INDEX NONCONCURRENTLY t1index ON t1 (c0); INSERT INTO t1(c0) VALUES(0.4632167437031089463062016875483095645904541015625), (0.82173140818865475498711248292238451540470123291015625), (0.69990454445895500246166420765803195536136627197265625), (0.7554730989898816861938257716246880590915679931640625); ALTER TABLE ONLY t1 FORCE ROW LEVEL SECURITY, DISABLE ROW LEVEL SECURITY, NO FORCE ROW LEVEL SECURITY; INSERT INTO t1(c0) VALUES(0.9946693818538820952568357824929989874362945556640625), (0.13653666831997435249235195442452095448970794677734375), (0.3359001510719556993223022800520993769168853759765625), (0.312027233370160583802999099134467542171478271484375); -SELECT SUM(count) FROM (SELECT (CAST((((('[-1962327130,2000870418)'::int4range)*('(-1293215916,183586536]'::int4range)))-((('[-545024026,526859443]'::int4range)*(NULL)))) AS VARCHAR)~current_query())::INT as count FROM ONLY t1) as res; +\set ss '/*+SeqScan(t1)*/' +\set ios '/*+IndexOnlyScan(t1 t1index)*/' +\set query 'SELECT SUM(count) FROM (SELECT (CAST(((((''[-1962327130,2000870418)''::int4range)*(''(-1293215916,183586536]''::int4range)))-(((''[-545024026,526859443]''::int4range)*(NULL)))) AS VARCHAR)~current_query())::INT as count FROM ONLY t1) as res' +:run; + QUERY PLAN +---------------------- + Aggregate + -> Seq Scan on t1 +(2 rows) + + QUERY PLAN +------------------------- + Aggregate + -> YB Seq Scan on t1 +(2 rows) + + QUERY PLAN +------------------------------------------- + Aggregate + -> Index Only Scan using t1index on t1 +(2 rows) + + sum +----- + +(1 row) + + sum +----- + +(1 row) + sum ----- @@ -261,8 +1432,40 @@ SELECT SUM(count) FROM (SELECT (CAST((((('[-1962327130,2000870418)'::int4range)* -- Simplified test case that had postgres FATAL: CREATE TABLE t2(c0 DECIMAL ); +CREATE INDEX NONCONCURRENTLY t2index ON t2 (c0); INSERT INTO t2 VALUES(1), (2), (3); -SELECT SUM(r) < 6 from (SELECT random() as r from t2) as res; +\set ss '/*+SeqScan(t2)*/' +\set ios '/*+IndexOnlyScan(t2 t2index)*/' +\set query 'SELECT SUM(r) < 6 from (SELECT random() as r from t2) as res' +:run; + QUERY PLAN +---------------------- + Aggregate + -> Seq Scan on t2 +(2 rows) + + QUERY PLAN +------------------------- + Aggregate + -> YB Seq Scan on t2 +(2 rows) + + QUERY PLAN +------------------------------------------- + Aggregate + -> Index Only Scan using t2index on t2 +(2 rows) + + ?column? +---------- + t +(1 row) + + ?column? +---------- + t +(1 row) + ?column? ---------- t @@ -270,8 +1473,40 @@ SELECT SUM(r) < 6 from (SELECT random() as r from t2) as res; -- Simplified test case that had postgres FATAL: CREATE TABLE t3(c0 DECIMAL ); +CREATE INDEX NONCONCURRENTLY t3index ON t3 (c0); INSERT INTO t3 VALUES(1), (2), (3); -SELECT SUM(r) from (SELECT (NULL=random())::int as r from t3) as res; +\set ss '/*+SeqScan(t3)*/' +\set ios '/*+IndexOnlyScan(t3 t3index)*/' +\set query 'SELECT SUM(r) from (SELECT (NULL=random())::int as r from t3) as res' +:run; + QUERY PLAN +---------------------- + Aggregate + -> Seq Scan on t3 +(2 rows) + + QUERY PLAN +------------------------- + Aggregate + -> YB Seq Scan on t3 +(2 rows) + + QUERY PLAN +------------------------------------------- + Aggregate + -> Index Only Scan using t3index on t3 +(2 rows) + + sum +----- + +(1 row) + + sum +----- + +(1 row) + sum ----- @@ -279,42 +1514,132 @@ SELECT SUM(r) from (SELECT (NULL=random())::int as r from t3) as res; -- Test case that did not have postgres FATAL but showed wrong result 't': CREATE TABLE t4(c0 FLOAT8); +CREATE INDEX NONCONCURRENTLY t4index ON t4 (c0); INSERT INTO t4 VALUES(1), (2), (3); -SELECT SUM(r) = 6 from (SELECT random() as r from t4) as res; +\set ss '/*+SeqScan(t4)*/' +\set ios '/*+IndexOnlyScan(t4 t4index)*/' +\set query 'SELECT SUM(r) = 6 from (SELECT random() as r from t4) as res' +:run; + QUERY PLAN +---------------------- + Aggregate + -> Seq Scan on t4 +(2 rows) + + QUERY PLAN +------------------------- + Aggregate + -> YB Seq Scan on t4 +(2 rows) + + QUERY PLAN +------------------------------------------- + Aggregate + -> Index Only Scan using t4index on t4 +(2 rows) + + ?column? +---------- + f +(1 row) + + ?column? +---------- + f +(1 row) + ?column? ---------- f (1 row) --- Test EXPLAIN with aggregate pushdown -EXPLAIN (COSTS OFF) SELECT COUNT(*), SUM(int_4) FROM ybaggtest; +-- +-- System tables. +-- +\set ss '/*+SeqScan(pg_type)*/' +\set ios '/*+IndexOnlyScan(pg_type pg_type_typname_nsp_index)*/' +\set query 'SELECT MIN(typnamespace) FROM pg_type' +:run; QUERY PLAN --------------------------------- Finalize Aggregate - -> Seq Scan on ybaggtest + -> Seq Scan on pg_type Partial Aggregate: true (3 rows) --- Negative tests - pushdown not supported -EXPLAIN (COSTS OFF) SELECT int_2, COUNT(*), SUM(int_4) FROM ybaggtest GROUP BY int_2; - QUERY PLAN ------------------------------ - HashAggregate - Group Key: int_2 - -> Seq Scan on ybaggtest + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on pg_type + Partial Aggregate: true (3 rows) -EXPLAIN (COSTS OFF) SELECT DISTINCT int_4 FROM ybaggtest; - QUERY PLAN ------------------------------ - HashAggregate - Group Key: int_4 - -> Seq Scan on ybaggtest + QUERY PLAN +------------------------------------------------------------------ + Finalize Aggregate + -> Index Only Scan using pg_type_typname_nsp_index on pg_type + Partial Aggregate: true (3 rows) -EXPLAIN (COSTS OFF) SELECT COUNT(distinct int_4), SUM(int_4) FROM ybaggtest; - QUERY PLAN ------------------------------ - Aggregate - -> Seq Scan on ybaggtest -(2 rows) + min +----- + 11 +(1 row) + + min +----- + 11 +(1 row) + + min +----- + 11 +(1 row) + +-- +-- Colocation. +-- +CREATE DATABASE co COLOCATION TRUE; +\c co +CREATE TABLE t (i int, j int, k int); +CREATE INDEX NONCONCURRENTLY i ON t (j, k DESC, i); +INSERT INTO t VALUES (1, 2, 3), (4, 5, 6); +\set ss '/*+SeqScan(t)*/' +\set ios '/*+IndexOnlyScan(t i)*/' +\set query 'SELECT SUM(k), AVG(i), COUNT(*), MAX(j) FROM t' +:run; + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> Seq Scan on t + Partial Aggregate: true +(3 rows) + + QUERY PLAN +--------------------------------- + Finalize Aggregate + -> YB Seq Scan on t + Partial Aggregate: true +(3 rows) + + QUERY PLAN +------------------------------------ + Finalize Aggregate + -> Index Only Scan using i on t + Partial Aggregate: true +(3 rows) + + sum | avg | count | max +-----+--------------------+-------+----- + 9 | 2.5000000000000000 | 2 | 5 +(1 row) + + sum | avg | count | max +-----+--------------------+-------+----- + 9 | 2.5000000000000000 | 2 | 5 +(1 row) + + sum | avg | count | max +-----+--------------------+-------+----- + 9 | 2.5000000000000000 | 2 | 5 +(1 row) diff --git a/src/postgres/src/test/regress/expected/yb_catalog_version.out b/src/postgres/src/test/regress/expected/yb_catalog_version.out index bbd081364720..344dc8ad801a 100644 --- a/src/postgres/src/test/regress/expected/yb_catalog_version.out +++ b/src/postgres/src/test/regress/expected/yb_catalog_version.out @@ -16,32 +16,56 @@ :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 2 | 1 + 1 | 1 (1 row) --- The next CREATE ROLE will increment current_version. +-- The next CREATE ROLE will not increment current_version. CREATE ROLE cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 3 | 1 + 1 | 1 (1 row) -- The next CREATE ROLE fails and should not cause any catalog version change. CREATE ROLE cv_test_role; ERROR: role "cv_test_role" already exists +:display_catalog_version; + current_version | last_breaking_version +-----------------+----------------------- + 1 | 1 +(1 row) + +-- The next CREATE ROLE will increment current_version. +CREATE ROLE cv_test_role2 IN ROLE cv_test_role; +:display_catalog_version; + current_version | last_breaking_version +-----------------+----------------------- + 2 | 1 +(1 row) + +-- The next CREATE ROLE will increment current_version. +CREATE ROLE cv_test_role3 ADMIN cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- 3 | 1 (1 row) --- The next CREATE DATABASE will increment current_version. +-- The next CREATE ROLE will increment current_version. +CREATE ROLE cv_test_role4 ROLE cv_test_role2, cv_test_role3; +:display_catalog_version; + current_version | last_breaking_version +-----------------+----------------------- + 4 | 1 +(1 row) + +-- The next CREATE DATABASE will not increment current_version. CREATE DATABASE cv_test_database; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 3 | 1 + 4 | 1 (1 row) -- The next GRANT CONNECT will increment current_version. @@ -49,7 +73,7 @@ GRANT CONNECT ON DATABASE cv_test_database TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 4 | 1 + 5 | 1 (1 row) -- The next GRANT CONNECT should not cause any catalog version change. @@ -57,7 +81,7 @@ GRANT CONNECT ON DATABASE cv_test_database TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 4 | 1 + 5 | 1 (1 row) -- The next REVOKE CONNECT is a "breaking" catalog change. It will increment @@ -66,7 +90,7 @@ REVOKE CONNECT ON DATABASE cv_test_database from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 5 | 5 + 6 | 6 (1 row) -- The next REVOKE CONNECT should not cause any catalog version change. @@ -74,7 +98,7 @@ REVOKE CONNECT ON DATABASE cv_test_database from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 5 | 5 + 6 | 6 (1 row) -- The next CREATE TABLE should not cause any catalog version change. @@ -82,7 +106,7 @@ CREATE TABLE cv_test_table(id int); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 5 | 5 + 6 | 6 (1 row) -- The next GRANT SELECT will increment current_version. @@ -90,7 +114,7 @@ GRANT SELECT ON cv_test_table TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 6 | 5 + 7 | 6 (1 row) -- The next GRANT SELECT should not cause any catalog version change. @@ -98,7 +122,7 @@ GRANT SELECT ON cv_test_table TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 6 | 5 + 7 | 6 (1 row) -- The next REVOKE SELECT is a "breaking" catalog change. It will increment @@ -107,7 +131,7 @@ REVOKE SELECT ON cv_test_table FROM cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 7 | 7 + 8 | 8 (1 row) -- The next REVOKE SELECT should not cause any catalog version change. @@ -115,7 +139,7 @@ REVOKE SELECT ON cv_test_table FROM cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 7 | 7 + 8 | 8 (1 row) -- The next CREATE FOREIGN DATA WRAPPER will increment current_version. @@ -125,7 +149,7 @@ GRANT USAGE ON FOREIGN DATA WRAPPER cv_test_fdw_wrapper TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 9 | 7 + 10 | 8 (1 row) -- The next GRANT USAGE should not cause any catalog version change. @@ -133,7 +157,7 @@ GRANT USAGE ON FOREIGN DATA WRAPPER cv_test_fdw_wrapper TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 9 | 7 + 10 | 8 (1 row) -- The next REVOKE USAGE is a "breaking" catalog change. It will increment @@ -142,7 +166,7 @@ REVOKE USAGE ON FOREIGN DATA WRAPPER cv_test_fdw_wrapper FROM cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 10 | 10 + 11 | 11 (1 row) -- The next REVOKE USAGE should not cause any catalog version change. @@ -150,7 +174,7 @@ REVOKE USAGE ON FOREIGN DATA WRAPPER cv_test_fdw_wrapper FROM cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 10 | 10 + 11 | 11 (1 row) -- The next CREATE SERVER will increment current_version. @@ -160,7 +184,7 @@ GRANT USAGE ON FOREIGN SERVER cv_test_fdw_server TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 12 | 10 + 13 | 11 (1 row) -- The next GRANT USAGE should not cause any catalog version change. @@ -168,7 +192,7 @@ GRANT USAGE ON FOREIGN SERVER cv_test_fdw_server TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 12 | 10 + 13 | 11 (1 row) -- The next REVOKE USAGE is a "breaking" catalog change. It will increment @@ -177,7 +201,7 @@ REVOKE USAGE ON FOREIGN SERVER cv_test_fdw_server FROM cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 13 | 13 + 14 | 14 (1 row) -- The next REVOKE USAGE should not cause any catalog version change. @@ -185,7 +209,7 @@ REVOKE USAGE ON FOREIGN SERVER cv_test_fdw_server FROM cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 13 | 13 + 14 | 14 (1 row) -- The next CREATE FUNCTION will increment current_version. @@ -195,7 +219,7 @@ GRANT EXECUTE ON FUNCTION cv_test_function TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 14 | 13 + 15 | 14 (1 row) -- The next GRANT EXECUTE should not cause any catalog version change. @@ -203,7 +227,7 @@ GRANT EXECUTE ON FUNCTION cv_test_function TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 14 | 13 + 15 | 14 (1 row) -- The next REVOKE EXECUTE is a "breaking" catalog change. It will increment @@ -212,7 +236,7 @@ REVOKE EXECUTE ON FUNCTION cv_test_function from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 15 | 15 + 16 | 16 (1 row) -- The next REVOKE EXECUTE should not cause any catalog version change. @@ -220,7 +244,7 @@ REVOKE EXECUTE ON FUNCTION cv_test_function from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 15 | 15 + 16 | 16 (1 row) -- The next GRANT USAGE will increment current_version. @@ -228,7 +252,7 @@ GRANT USAGE ON LANGUAGE sql TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 16 | 15 + 17 | 16 (1 row) -- The next GRANT USAGE should not cause any catalog version change. @@ -236,7 +260,7 @@ GRANT USAGE ON LANGUAGE sql TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 16 | 15 + 17 | 16 (1 row) -- The next REVOKE USAGE is a "breaking" catalog change. It will increment @@ -245,7 +269,7 @@ REVOKE USAGE ON LANGUAGE sql from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 17 | 17 + 18 | 18 (1 row) -- The next REVOKE USAGE should not cause any catalog version change. @@ -253,7 +277,7 @@ REVOKE USAGE ON LANGUAGE sql from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 17 | 17 + 18 | 18 (1 row) SET yb_non_ddl_txn_for_sys_tables_allowed=1; @@ -269,7 +293,7 @@ GRANT SELECT ON LARGE OBJECT 1001 TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 18 | 17 + 19 | 18 (1 row) -- The next GRANT SELECT should not cause any catalog version change. @@ -277,7 +301,7 @@ GRANT SELECT ON LARGE OBJECT 1001 TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 18 | 17 + 19 | 18 (1 row) -- The next REVOKE SELECT is a "breaking" catalog change. It will increment @@ -286,7 +310,7 @@ REVOKE SELECT ON LARGE OBJECT 1001 from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 19 | 19 + 20 | 20 (1 row) -- The next REVOKE SELECT should not cause any catalog version change. @@ -294,7 +318,7 @@ REVOKE SELECT ON LARGE OBJECT 1001 from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 19 | 19 + 20 | 20 (1 row) -- The next GRANT USAGE will increment current_version. @@ -302,7 +326,7 @@ GRANT USAGE ON SCHEMA public TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 20 | 19 + 21 | 20 (1 row) -- The next GRANT USAGE should not cause any catalog version change. @@ -310,7 +334,7 @@ GRANT USAGE ON SCHEMA public TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 20 | 19 + 21 | 20 (1 row) -- The next REVOKE USAGE is a "breaking" catalog change. It will increment @@ -319,7 +343,7 @@ REVOKE USAGE ON SCHEMA public from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 21 | 21 + 22 | 22 (1 row) -- The next REVOKE USAGE should not cause any catalog version change. @@ -327,7 +351,7 @@ REVOKE USAGE ON SCHEMA public from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 21 | 21 + 22 | 22 (1 row) -- The next CREATE TABLEGROUP will increment current_version. @@ -337,7 +361,7 @@ GRANT CREATE ON TABLEGROUP cv_test_tablegroup TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 22 | 21 + 23 | 22 (1 row) -- The next GRANT CREATE should not cause any catalog version change. @@ -345,7 +369,7 @@ GRANT CREATE ON TABLEGROUP cv_test_tablegroup TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 22 | 21 + 23 | 22 (1 row) -- The next REVOKE CREATE is a "breaking" catalog change. It will increment @@ -354,7 +378,7 @@ REVOKE CREATE ON TABLEGROUP cv_test_tablegroup from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 23 | 23 + 24 | 24 (1 row) -- The next REVOKE CREATE should not cause any catalog version change. @@ -362,7 +386,7 @@ REVOKE CREATE ON TABLEGROUP cv_test_tablegroup from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 23 | 23 + 24 | 24 (1 row) -- The next CREATE TABLESPACE will increment current_version. @@ -373,7 +397,7 @@ GRANT CREATE ON TABLESPACE cv_test_tablespace TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 24 | 23 + 25 | 24 (1 row) -- The next GRANT CREATE should not cause any catalog version change. @@ -381,7 +405,7 @@ GRANT CREATE ON TABLESPACE cv_test_tablespace TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 24 | 23 + 25 | 24 (1 row) -- The next REVOKE CREATE is a "breaking" catalog change. It will increment @@ -390,7 +414,7 @@ REVOKE CREATE ON TABLESPACE cv_test_tablespace from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 25 | 25 + 26 | 26 (1 row) -- The next REVOKE CREATE should not cause any catalog version change. @@ -398,7 +422,7 @@ REVOKE CREATE ON TABLESPACE cv_test_tablespace from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 25 | 25 + 26 | 26 (1 row) -- The next CREATE TYPE will increment current_version. @@ -408,7 +432,7 @@ GRANT USAGE ON TYPE cv_test_type TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 27 | 25 + 28 | 26 (1 row) -- The next GRANT USAGE should not cause any catalog version change. @@ -416,7 +440,7 @@ GRANT USAGE ON TYPE cv_test_type TO cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 27 | 25 + 28 | 26 (1 row) -- The next REVOKE USAGE is a "breaking" catalog change. It will increment @@ -425,7 +449,7 @@ REVOKE USAGE ON TYPE cv_test_type from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 28 | 28 + 29 | 29 (1 row) -- The next REVOKE USAGE should not cause any catalog version change. @@ -433,7 +457,7 @@ REVOKE USAGE ON TYPE cv_test_type from cv_test_role; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 28 | 28 + 29 | 29 (1 row) -- Tables with various constraint types should not increment catalog version. @@ -441,49 +465,49 @@ CREATE TABLE t_check (col INT CHECK (col > 0)); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 28 | 28 + 29 | 29 (1 row) CREATE TABLE t_not_null (col INT NOT NULL); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 28 | 28 + 29 | 29 (1 row) CREATE TABLE t_primary_key (col INT PRIMARY KEY); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 28 | 28 + 29 | 29 (1 row) CREATE TABLE t_sequence (col SERIAL, value TEXT); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 28 | 28 + 29 | 29 (1 row) CREATE TABLE t_unique (col INT UNIQUE); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 28 | 28 + 29 | 29 (1 row) -CREATE TABLE t_identity (col INT GENERATED ALWAYS AS IDENTITY); +CREATE TABLE t_identity (col INT GENERATED ALWAYS AS IDENTITY); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 28 | 28 + 29 | 29 (1 row) -CREATE TABLE t_primary_key_sequence_identity (c1 INT PRIMARY KEY, c2 SERIAL, c3 INT GENERATED ALWAYS AS IDENTITY); +CREATE TABLE t_primary_key_sequence_identity (c1 INT PRIMARY KEY, c2 SERIAL, c3 INT GENERATED ALWAYS AS IDENTITY); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 28 | 28 + 29 | 29 (1 row) -- The CREATE TABLE with FOREIGN KEY will increment current_version. @@ -492,7 +516,7 @@ CREATE TABLE t2 (col INT REFERENCES t1(col)); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 29 | 28 + 30 | 29 (1 row) -- The ALTER TABLE will increment current_version. @@ -500,7 +524,7 @@ ALTER TABLE t1 ADD COLUMN val INT; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 30 | 28 + 31 | 29 (1 row) -- The CREATE PROCEDURE will not increment current_version. @@ -508,7 +532,7 @@ CREATE PROCEDURE test() AS $$ BEGIN INSERT INTO t1 VALUES(1); END; $$ LANGUAGE ' :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 30 | 28 + 31 | 29 (1 row) -- The CALL to PROCEDURE will not increment current_version. @@ -516,7 +540,7 @@ CALL test(); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 30 | 28 + 31 | 29 (1 row) -- The CREATE FUNCTION will increment current_version. @@ -525,7 +549,7 @@ CREATE OR REPLACE FUNCTION evt_trig_fn() RETURNS event_trigger AS $$ BEGIN INSER :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 31 | 28 + 32 | 29 (1 row) -- The CREATE EVENT TRIGGER will increment current_version. @@ -533,7 +557,7 @@ CREATE EVENT TRIGGER evt_ddl_start ON ddl_command_start EXECUTE PROCEDURE evt_tr :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 32 | 28 + 33 | 29 (1 row) -- The DDLs proceeding the trigger will increment current_version based on the command's individual behaviour. @@ -542,7 +566,7 @@ ALTER TABLE evt_trig_table ADD COLUMN val INT; :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 33 | 28 + 34 | 29 (1 row) -- The CREATE TABLE will not increment current_version. @@ -550,7 +574,7 @@ CREATE TABLE post_trigger_table (id INT); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 33 | 28 + 34 | 29 (1 row) -- The execution on atomic SPI context function will increment current_version. @@ -574,7 +598,7 @@ SELECT atomic_spi(1); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 34 | 28 + 35 | 29 (1 row) SELECT atomic_spi(2); @@ -586,7 +610,7 @@ SELECT atomic_spi(2); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 35 | 28 + 36 | 29 (1 row) -- The execution on non-atomic SPI context will not increment current_version. @@ -604,5 +628,5 @@ CALL p1(); :display_catalog_version; current_version | last_breaking_version -----------------+----------------------- - 35 | 28 -(1 row) \ No newline at end of file + 36 | 29 +(1 row) diff --git a/src/postgres/src/test/regress/expected/yb_distinct_pushdown.out b/src/postgres/src/test/regress/expected/yb_distinct_pushdown.out new file mode 100644 index 000000000000..17c45dfa4ca8 --- /dev/null +++ b/src/postgres/src/test/regress/expected/yb_distinct_pushdown.out @@ -0,0 +1,25 @@ +CREATE TABLE distinct_pushdown_table(r1 INT, r2 INT, PRIMARY KEY(r1 ASC, r2 ASC)); +INSERT INTO distinct_pushdown_table (SELECT 1, i FROM GENERATE_SERIES(1, 1000) AS i); +-- Disable DISTINCT pushdown +SET yb_enable_distinct_pushdown TO off; +-- Must pull even duplicate rows without pushdown. Verify that using EXPLAIN ANALYZE +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT DISTINCT r1 FROM distinct_pushdown_table WHERE r1 <= 10; + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Unique (actual rows=1 loops=1) + -> Index Scan using distinct_pushdown_table_pkey on distinct_pushdown_table (actual rows=1000 loops=1) + Index Cond: (r1 <= 10) +(3 rows) + +-- Enable DISTINCT pushdown +SET yb_enable_distinct_pushdown TO on; +-- Must pull fewer rows with pushdown. Verify that using EXPLAIN ANALYZE +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT DISTINCT r1 FROM distinct_pushdown_table WHERE r1 <= 10; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Unique (actual rows=1 loops=1) + -> Index Scan using distinct_pushdown_table_pkey on distinct_pushdown_table (actual rows=1 loops=1) + Index Cond: (r1 <= 10) +(3 rows) + +DROP TABLE distinct_pushdown_table; diff --git a/src/postgres/src/test/regress/expected/yb_explicit_row_lock_planning.out b/src/postgres/src/test/regress/expected/yb_explicit_row_lock_planning.out new file mode 100644 index 000000000000..19d3e59c1531 --- /dev/null +++ b/src/postgres/src/test/regress/expected/yb_explicit_row_lock_planning.out @@ -0,0 +1,328 @@ +-- +-- YB tests for locking +-- +CREATE TABLE yb_locks_t (k int PRIMARY KEY); +INSERT INTO yb_locks_t VALUES (1),(2),(3),(4),(5); +CREATE TABLE yb_locks_t2 (k1 int, k2 int, k3 int, v int, PRIMARY KEY(k1, k2, k3)); +INSERT INTO yb_locks_t2 VALUES (1,2,3,4),(5,6,7,8); +SET yb_lock_pk_single_rpc TO ON; +-- Test plain (unlocked case). +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5; + QUERY PLAN +------------------------------------------------ + Index Scan using yb_locks_t_pkey on yb_locks_t + Index Cond: (k = 5) +(2 rows) + +SELECT * FROM yb_locks_t WHERE k=5; + k +--- + 5 +(1 row) + +-- Test single-RPC select+lock (no LockRows node). +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + QUERY PLAN +-------------------------------------------------------------------- + Index Scan (Locked FOR UPDATE) using yb_locks_t_pkey on yb_locks_t + Index Cond: (k = 5) +(2 rows) + +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + k +--- + 5 +(1 row) + +-- Test other types of locking. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR SHARE; + QUERY PLAN +------------------------------------------------------------------- + Index Scan (Locked FOR SHARE) using yb_locks_t_pkey on yb_locks_t + Index Cond: (k = 5) +(2 rows) + +SELECT * FROM yb_locks_t WHERE k=5 FOR SHARE; + k +--- + 5 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR NO KEY UPDATE; + QUERY PLAN +--------------------------------------------------------------------------- + Index Scan (Locked FOR NO KEY UPDATE) using yb_locks_t_pkey on yb_locks_t + Index Cond: (k = 5) +(2 rows) + +SELECT * FROM yb_locks_t WHERE k=5 FOR NO KEY UPDATE; + k +--- + 5 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR KEY SHARE; + QUERY PLAN +----------------------------------------------------------------------- + Index Scan (Locked FOR KEY SHARE) using yb_locks_t_pkey on yb_locks_t + Index Cond: (k = 5) +(2 rows) + +SELECT * FROM yb_locks_t WHERE k=5 FOR KEY SHARE; + k +--- + 5 +(1 row) + +-- Test LockRows node (more RPCs), and scan is unlocked. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t FOR UPDATE; + QUERY PLAN +------------------------------ + LockRows + -> Seq Scan on yb_locks_t +(2 rows) + +SELECT * FROM yb_locks_t FOR UPDATE; + k +--- + 5 + 1 + 4 + 2 + 3 +(5 rows) + +-- Test with multi-column primary key. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2 WHERE k1=1 AND k2=2 AND k3=3 FOR UPDATE; + QUERY PLAN +---------------------------------------------------------------------- + Index Scan (Locked FOR UPDATE) using yb_locks_t2_pkey on yb_locks_t2 + Index Cond: ((k1 = 1) AND (k2 = 2) AND (k3 = 3)) +(2 rows) + +SELECT * FROM yb_locks_t2 WHERE k1=1 AND k2=2 AND k3=3 FOR UPDATE; + k1 | k2 | k3 | v +----+----+----+--- + 1 | 2 | 3 | 4 +(1 row) + +-- Test with partial column set for primary key (should use LockRows). +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2 WHERE k1=1 AND k2=2 FOR UPDATE; + QUERY PLAN +-------------------------------------------------------- + LockRows + -> Index Scan using yb_locks_t2_pkey on yb_locks_t2 + Index Cond: ((k1 = 1) AND (k2 = 2)) +(3 rows) + +SELECT * FROM yb_locks_t2 WHERE k1=1 AND k2=2 FOR UPDATE; + k1 | k2 | k3 | v +----+----+----+--- + 1 | 2 | 3 | 4 +(1 row) + +-- Test LockRows node is used for join. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2, yb_locks_t WHERE yb_locks_t2.k1 = yb_locks_t.k FOR UPDATE; + QUERY PLAN +------------------------------------------------------------ + LockRows + -> Nested Loop + -> Seq Scan on yb_locks_t2 + -> Index Scan using yb_locks_t_pkey on yb_locks_t + Index Cond: (k = yb_locks_t2.k1) +(5 rows) + +SELECT * FROM yb_locks_t2, yb_locks_t WHERE yb_locks_t2.k1 = yb_locks_t.k FOR UPDATE; + k1 | k2 | k3 | v | k +----+----+----+---+--- + 5 | 6 | 7 | 8 | 5 + 1 | 2 | 3 | 4 | 1 +(2 rows) + +-- In isolation level SERIALIZABLE, all locks are done during scans. +BEGIN ISOLATION LEVEL SERIALIZABLE; +-- Test same locking as for REPEATABLE READ (default isolation). +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + QUERY PLAN +-------------------------------------------------------------------- + Index Scan (Locked FOR UPDATE) using yb_locks_t_pkey on yb_locks_t + Index Cond: (k = 5) +(2 rows) + +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + k +--- + 5 +(1 row) + +-- Test no LockRows node for sequential scan. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t FOR UPDATE; + QUERY PLAN +-------------------------------------------- + Seq Scan (Locked FOR UPDATE) on yb_locks_t +(1 row) + +SELECT * FROM yb_locks_t FOR UPDATE; + k +--- + 5 + 1 + 4 + 2 + 3 +(5 rows) + +-- Test no LockRows node for join. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2, yb_locks_t WHERE yb_locks_t2.k1 = yb_locks_t.k FOR UPDATE; + QUERY PLAN +-------------------------------------------------------------------------- + Nested Loop + -> Seq Scan (Locked FOR UPDATE) on yb_locks_t2 + -> Index Scan (Locked FOR UPDATE) using yb_locks_t_pkey on yb_locks_t + Index Cond: (k = yb_locks_t2.k1) +(4 rows) + +SELECT * FROM yb_locks_t2, yb_locks_t WHERE yb_locks_t2.k1 = yb_locks_t.k FOR UPDATE; + k1 | k2 | k3 | v | k +----+----+----+---+--- + 5 | 6 | 7 | 8 | 5 + 1 | 2 | 3 | 4 | 1 +(2 rows) + +COMMIT; +-- Test with single-RPC select+lock turned off. +SET yb_lock_pk_single_rpc TO OFF; +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + QUERY PLAN +------------------------------------------------------ + LockRows + -> Index Scan using yb_locks_t_pkey on yb_locks_t + Index Cond: (k = 5) +(3 rows) + +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + k +--- + 5 +(1 row) + +-- Test that with the yb_lock_pk_single_rpc off, SERIALIZABLE still locks during the scan +-- (no LockRows). +BEGIN ISOLATION LEVEL SERIALIZABLE; +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + QUERY PLAN +-------------------------------------------------------------------- + Index Scan (Locked FOR UPDATE) using yb_locks_t_pkey on yb_locks_t + Index Cond: (k = 5) +(2 rows) + +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + k +--- + 5 +(1 row) + +COMMIT; +SET yb_lock_pk_single_rpc TO ON; +CREATE INDEX ON yb_locks_t2 (v); +-- Test with an index. We use a LockRows node for an index. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2 WHERE v=4 FOR UPDATE; + QUERY PLAN +--------------------------------------------------------- + LockRows + -> Index Scan using yb_locks_t2_v_idx on yb_locks_t2 + Index Cond: (v = 4) +(3 rows) + +SELECT * FROM yb_locks_t2 WHERE v=4 FOR UPDATE; + k1 | k2 | k3 | v +----+----+----+--- + 1 | 2 | 3 | 4 +(1 row) + +-- Isolation level SERIALIZABLE still locks with the scan though (no LockRows). +BEGIN ISOLATION LEVEL SERIALIZABLE; +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2 WHERE v=4 FOR UPDATE; + QUERY PLAN +----------------------------------------------------------------------- + Index Scan (Locked FOR UPDATE) using yb_locks_t2_v_idx on yb_locks_t2 + Index Cond: (v = 4) +(2 rows) + +SELECT * FROM yb_locks_t2 WHERE v=4 FOR UPDATE; + k1 | k2 | k3 | v +----+----+----+--- + 1 | 2 | 3 | 4 +(1 row) + +COMMIT; +-- Test partitions. +CREATE TABLE yb_locks_partition (a char PRIMARY KEY) PARTITION BY LIST (a); +CREATE TABLE yb_locks_partition_default PARTITION OF yb_locks_partition DEFAULT; +CREATE TABLE yb_locks_partition_a PARTITION OF yb_locks_partition FOR VALUES IN ('a'); +EXPLAIN (COSTS OFF) SELECT * FROM yb_locks_partition WHERE a = 'a' FOR UPDATE; + QUERY PLAN +-------------------------------------------------------------------------------- + LockRows + -> Append + -> Index Scan using yb_locks_partition_a_pkey on yb_locks_partition_a + Index Cond: (a = 'a'::bpchar) +(4 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM yb_locks_partition WHERE a = 'b' FOR UPDATE; + QUERY PLAN +-------------------------------------------------------------------------------------------- + LockRows + -> Append + -> Index Scan using yb_locks_partition_default_pkey on yb_locks_partition_default + Index Cond: (a = 'b'::bpchar) +(4 rows) + +BEGIN ISOLATION LEVEL SERIALIZABLE; +EXPLAIN (COSTS OFF) SELECT * FROM yb_locks_partition WHERE a = 'a' FOR UPDATE; + QUERY PLAN +---------------------------------------------------------------------------------------------- + Append + -> Index Scan (Locked FOR UPDATE) using yb_locks_partition_a_pkey on yb_locks_partition_a + Index Cond: (a = 'a'::bpchar) +(3 rows) + +COMMIT; +-- Test JSON. +EXPLAIN (COSTS OFF, FORMAT JSON) +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + QUERY PLAN +---------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "Index Scan", + + "Parallel Aware": false, + + "Lock Type": "FOR UPDATE", + + "Scan Direction": "Forward", + + "Index Name": "yb_locks_t_pkey",+ + "Relation Name": "yb_locks_t", + + "Alias": "yb_locks_t", + + "Index Cond": "(k = 5)" + + } + + } + + ] +(1 row) + +DROP TABLE yb_locks_t, yb_locks_t2, yb_locks_partition; diff --git a/src/postgres/src/test/regress/expected/yb_get_range_split_clause.out b/src/postgres/src/test/regress/expected/yb_get_range_split_clause.out index 24eb87648f70..9d7f1c6ebd40 100644 --- a/src/postgres/src/test/regress/expected/yb_get_range_split_clause.out +++ b/src/postgres/src/test/regress/expected/yb_get_range_split_clause.out @@ -664,6 +664,66 @@ SELECT yb_get_range_split_clause('tbl_with_include_clause'::regclass); (1 row) DROP TABLE tbl_with_include_clause; +-- Test index SPLIT AT with INCLUDE clause +CREATE TABLE test_tbl ( + a TEXT, + b DOUBLE PRECISION, + PRIMARY KEY (a ASC) +) SPLIT AT VALUES(('11')); +CREATE INDEX test_idx on test_tbl( + b ASC +) INCLUDE (a) SPLIT AT VALUES ((1.1)); +SELECT yb_get_range_split_clause('test_idx'::regclass); + yb_get_range_split_clause +--------------------------- + SPLIT AT VALUES ((1.1)) +(1 row) + +DROP INDEX test_idx; +DROP TABLE test_tbl; +-- Test index SPLIT AT with INCLUDE clause +CREATE TABLE test_tbl ( + a INT, + b TEXT, + c CHAR, + d BOOLEAN, + e REAL, + PRIMARY KEY (a ASC, b ASC) +) SPLIT AT VALUES((1, '111')); +CREATE INDEX test_idx on test_tbl( + a ASC, + b ASC, + c ASC +) INCLUDE (d, e) SPLIT AT VALUES ((1, '11', '1')); +SELECT yb_get_range_split_clause('test_idx'::regclass); + yb_get_range_split_clause +---------------------------------- + SPLIT AT VALUES ((1, '11', '1')) +(1 row) + +DROP INDEX test_idx; +DROP TABLE test_tbl; +-- Test index SPLIT AT with INCLUDE clause +CREATE TABLE test_tbl ( + a INT, + b INT, + c INT, + d INT, + e INT, + PRIMARY KEY (a DESC, b ASC) +) SPLIT AT VALUES((1, 1)); +CREATE INDEX test_idx on test_tbl( + a ASC, + b DESC +) INCLUDE (c, d, e) SPLIT AT VALUES ((1, 1)); +SELECT yb_get_range_split_clause('test_idx'::regclass); + yb_get_range_split_clause +--------------------------- + SPLIT AT VALUES ((1, 1)) +(1 row) + +DROP INDEX test_idx; +DROP TABLE test_tbl; -- Test secondary index with duplicate columns and backwards order columns CREATE TABLE test_tbl ( k1 INT, diff --git a/src/postgres/src/test/regress/expected/yb_hdr_percentile.out b/src/postgres/src/test/regress/expected/yb_hdr_percentile.out new file mode 100644 index 000000000000..812662553ff0 --- /dev/null +++ b/src/postgres/src/test/regress/expected/yb_hdr_percentile.out @@ -0,0 +1,94 @@ +-- Testing hdr percentile function +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', 50); + yb_get_percentile +------------------- + 409.6 +(1 row) + +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', 90); + yb_get_percentile +------------------- + 819.2 +(1 row) + +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', 99); + yb_get_percentile +------------------- + 1228.8 +(1 row) + +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', 0); + yb_get_percentile +------------------- + 409.6 +(1 row) + +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', -0.1); + yb_get_percentile +------------------- + 409.6 +(1 row) + +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', 8892.3); + yb_get_percentile +------------------- + 1228.8 +(1 row) + +SELECT yb_get_percentile('[]', 90); + yb_get_percentile +------------------- + -Infinity +(1 row) + +SELECT yb_get_percentile('[{"[-2.8,2e4)": 8}]', -10); + yb_get_percentile +------------------- + 20000 +(1 row) + +SELECT yb_get_percentile('[{"[-2.8,2e4)": 8}]', 90); + yb_get_percentile +------------------- + 20000 +(1 row) + +SELECT yb_get_percentile('[{"[-1.1e-3,5000)": 5}, {}]', -10); +ERROR: Invalid histogram: Unexpected object end, should follow k/v pair within object +SELECT yb_get_percentile('[{"[-1.1e-3,5000)": 5}, {}]', 100); +ERROR: Invalid histogram: Unexpected object end, should follow k/v pair within object +SELECT yb_get_percentile('[{"[12,)": 8}]', 0); + yb_get_percentile +------------------- + Infinity +(1 row) + +SELECT yb_get_percentile('[{"[12,)": 8}]', 50); + yb_get_percentile +------------------- + Infinity +(1 row) + +SELECT yb_get_percentile('[{"[12,)": 8}]', 100); + yb_get_percentile +------------------- + Infinity +(1 row) + +SELECT yb_get_percentile('[{"[1,2)": 5}, {"[3,4)": 4}, {"[5,)": 1}]', 50); + yb_get_percentile +------------------- + 2 +(1 row) + +SELECT yb_get_percentile('[{"[1,2)": 5}, {"[3,4)": 4}, {"[5,)": 1}]', 90); + yb_get_percentile +------------------- + 4 +(1 row) + +SELECT yb_get_percentile('[{"[1,2)": 5}, {"[3,4)": 4}, {"[5,)": 1}]', 99); + yb_get_percentile +------------------- + Infinity +(1 row) diff --git a/src/postgres/src/test/regress/expected/yb_index_scan.out b/src/postgres/src/test/regress/expected/yb_index_scan.out index fd24a367b53a..222af7ded91e 100644 --- a/src/postgres/src/test/regress/expected/yb_index_scan.out +++ b/src/postgres/src/test/regress/expected/yb_index_scan.out @@ -3663,20 +3663,22 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv) */ (2 rows) EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv) */ SELECT count(1) FROM t_kv; - QUERY PLAN ------------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) - -> Index Only Scan using t_kv_pkey on t_kv (actual rows=10 loops=1) + QUERY PLAN +----------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Index Only Scan using t_kv_pkey on t_kv (actual rows=1 loops=1) Heap Fetches: 0 -(3 rows) + Partial Aggregate: true +(4 rows) EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv) */ SELECT count(*) FROM t_kv; - QUERY PLAN ------------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) - -> Index Only Scan using t_kv_pkey on t_kv (actual rows=10 loops=1) + QUERY PLAN +----------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Index Only Scan using t_kv_pkey on t_kv (actual rows=1 loops=1) Heap Fetches: 0 -(3 rows) + Partial Aggregate: true +(4 rows) /*+ IndexOnlyScan(t_kv) */ SELECT count(*) FROM t_kv; count @@ -3702,19 +3704,21 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_v EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_vi) */ SELECT count(1) FROM t_kv; QUERY PLAN -------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) - -> Index Only Scan using t_vi on t_kv (actual rows=10 loops=1) +------------------------------------------------------------------ + Finalize Aggregate (actual rows=1 loops=1) + -> Index Only Scan using t_vi on t_kv (actual rows=1 loops=1) Heap Fetches: 0 -(3 rows) + Partial Aggregate: true +(4 rows) EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_vi) */ SELECT count(*) FROM t_kv; QUERY PLAN -------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) - -> Index Only Scan using t_vi on t_kv (actual rows=10 loops=1) +------------------------------------------------------------------ + Finalize Aggregate (actual rows=1 loops=1) + -> Index Only Scan using t_vi on t_kv (actual rows=1 loops=1) Heap Fetches: 0 -(3 rows) + Partial Aggregate: true +(4 rows) /*+ IndexOnlyScan(t_kv t_vi) */ SELECT count(*) FROM t_kv; count @@ -3740,19 +3744,21 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_v EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_vi) */ SELECT count(1) FROM t_kv; QUERY PLAN -------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) - -> Index Only Scan using t_vi on t_kv (actual rows=11 loops=1) +------------------------------------------------------------------ + Finalize Aggregate (actual rows=1 loops=1) + -> Index Only Scan using t_vi on t_kv (actual rows=1 loops=1) Heap Fetches: 0 -(3 rows) + Partial Aggregate: true +(4 rows) EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_vi) */ SELECT count(*) FROM t_kv; QUERY PLAN -------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) - -> Index Only Scan using t_vi on t_kv (actual rows=11 loops=1) +------------------------------------------------------------------ + Finalize Aggregate (actual rows=1 loops=1) + -> Index Only Scan using t_vi on t_kv (actual rows=1 loops=1) Heap Fetches: 0 -(3 rows) + Partial Aggregate: true +(4 rows) /*+ IndexOnlyScan(t_kv t_vi) */ SELECT count(*) FROM t_kv; count @@ -3778,19 +3784,21 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_v EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_vi) */ SELECT count(1) FROM t_kv; QUERY PLAN -------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) - -> Index Only Scan using t_vi on t_kv (actual rows=12 loops=1) +------------------------------------------------------------------ + Finalize Aggregate (actual rows=1 loops=1) + -> Index Only Scan using t_vi on t_kv (actual rows=1 loops=1) Heap Fetches: 0 -(3 rows) + Partial Aggregate: true +(4 rows) EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_vi) */ SELECT count(*) FROM t_kv; QUERY PLAN -------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) - -> Index Only Scan using t_vi on t_kv (actual rows=12 loops=1) +------------------------------------------------------------------ + Finalize Aggregate (actual rows=1 loops=1) + -> Index Only Scan using t_vi on t_kv (actual rows=1 loops=1) Heap Fetches: 0 -(3 rows) + Partial Aggregate: true +(4 rows) /*+ IndexOnlyScan(t_kv t_vi) */ SELECT count(*) FROM t_kv; count @@ -3814,20 +3822,22 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_k (2 rows) EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_kv_pkey) */ SELECT count(1) FROM t_kv; - QUERY PLAN ------------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) - -> Index Only Scan using t_kv_pkey on t_kv (actual rows=12 loops=1) + QUERY PLAN +----------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Index Only Scan using t_kv_pkey on t_kv (actual rows=1 loops=1) Heap Fetches: 0 -(3 rows) + Partial Aggregate: true +(4 rows) EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_kv_pkey) */ SELECT count(*) FROM t_kv; - QUERY PLAN ------------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) - -> Index Only Scan using t_kv_pkey on t_kv (actual rows=12 loops=1) + QUERY PLAN +----------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Index Only Scan using t_kv_pkey on t_kv (actual rows=1 loops=1) Heap Fetches: 0 -(3 rows) + Partial Aggregate: true +(4 rows) /*+ IndexOnlyScan(t_kv t_kv_pkey) */ SELECT count(*) FROM t_kv; count @@ -3843,3 +3853,42 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_k (2 rows) DROP TABLE t_kv; +-- Test index SPLIT AT with INCLUDE clause +CREATE TABLE test_tbl ( + a INT, + b INT, + PRIMARY KEY (a ASC) +) SPLIT AT VALUES((1)); +CREATE INDEX test_idx on test_tbl( + b ASC +) INCLUDE (a) SPLIT AT VALUES ((1)); +INSERT INTO test_tbl VALUES (1, 2),(2, 1),(4, 3),(5, 4); +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT a, b FROM test_tbl WHERE a = 4; + QUERY PLAN +-------------------------------------------------------------------- + Index Scan using test_tbl_pkey on test_tbl (actual rows=1 loops=1) + Index Cond: (a = 4) +(2 rows) + +SELECT a, b FROM test_tbl WHERE a = 4; + a | b +---+--- + 4 | 3 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT a, b FROM test_tbl WHERE b = 4; + QUERY PLAN +-------------------------------------------------------------------- + Index Only Scan using test_idx on test_tbl (actual rows=1 loops=1) + Index Cond: (b = 4) + Heap Fetches: 0 +(3 rows) + +SELECT a, b FROM test_tbl WHERE b = 4; + a | b +---+--- + 5 | 4 +(1 row) + +DROP INDEX test_idx; +DROP TABLE test_tbl; diff --git a/src/postgres/src/test/regress/expected/yb_index_scan_null_asc.out b/src/postgres/src/test/regress/expected/yb_index_scan_null_asc.out index 57e00160a50d..f9a1d22f4529 100644 --- a/src/postgres/src/test/regress/expected/yb_index_scan_null_asc.out +++ b/src/postgres/src/test/regress/expected/yb_index_scan_null_asc.out @@ -8,134 +8,213 @@ DROP INDEX IF EXISTS i_nulltest_ba; CREATE INDEX i_nulltest_ba ON nulltest (b ASC, a ASC); \i sql/yb_index_scan_null_queries.sql -- Queries for the null scan key tests +SET client_min_messages = DEBUG1; +\set YB_DISABLE_ERROR_PREFIX on -- Should return empty results (actual rows=0) -- The plans should not show any "Recheck" -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a = t2.x; - QUERY PLAN +DEBUG: skipping a scan due to unsatisfiable condition + QUERY PLAN ---------------------------------------------------------------------------- Nested Loop (actual rows=0 loops=1) -> Seq Scan on nulltest2 t2 (actual rows=1 loops=1) + Storage Table Read Requests: 3 -> Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1) Index Cond: (a = t2.x) -(4 rows) +(5 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a <= t2.x; - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------- Nested Loop (actual rows=0 loops=1) -> Seq Scan on nulltest2 t2 (actual rows=1 loops=1) + Storage Table Read Requests: 3 -> Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1) Index Cond: (a <= t2.x) -(4 rows) + Storage Index Read Requests: 1 +(6 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a BETWEEN t2.x AND t2.x + 2; - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------- Nested Loop (actual rows=0 loops=1) -> Seq Scan on nulltest2 t2 (actual rows=1 loops=1) + Storage Table Read Requests: 3 -> Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1) Index Cond: ((a >= t2.x) AND (a <= (t2.x + 2))) -(4 rows) + Storage Index Read Requests: 1 +(6 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON (t1.a, t1.b) = (t2.x, t2.y); - QUERY PLAN +DEBUG: skipping a scan due to unsatisfiable condition + QUERY PLAN ----------------------------------------------------------------------------- Nested Loop (actual rows=0 loops=1) -> Seq Scan on nulltest2 t2 (actual rows=1 loops=1) + Storage Table Read Requests: 3 -> Index Scan using i_nulltest_ba on nulltest t1 (actual rows=0 loops=1) Index Cond: ((b = t2.y) AND (a = t2.x)) -(4 rows) +(5 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON (t1.a, t1.b) <= (t2.x, t2.y); - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------- Nested Loop (actual rows=0 loops=1) -> Seq Scan on nulltest2 t2 (actual rows=1 loops=1) + Storage Table Read Requests: 3 -> Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1) Index Cond: (a <= t2.x) Filter: (ROW(a, b) <= ROW(t2.x, t2.y)) -(5 rows) + Storage Index Read Requests: 1 +(7 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT * FROM nulltest t1 WHERE (a, b) <= (null, 1); - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------- Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1) Index Cond: (a <= NULL::integer) Filter: (ROW(a, b) <= ROW(NULL::integer, 1)) -(3 rows) + Storage Index Read Requests: 1 +(4 rows) + +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT * FROM nulltest t1 WHERE (a, b) <= (null, 1); +DEBUG: skipping a scan due to unsatisfiable condition + QUERY PLAN +----------------------------------------------------------------------- + Index Scan using i_nulltest_ba on nulltest t1 (actual rows=0 loops=1) + Index Cond: (ROW(a, b) <= ROW(NULL::integer, 1)) +(2 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IN (null, null); - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------- Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1) Index Cond: (a = ANY ('{NULL,NULL}'::integer[])) (2 rows) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE a IN (null, null); + QUERY PLAN +----------------------------------------------------------------------- + Index Scan using i_nulltest_ba on nulltest t1 (actual rows=0 loops=1) + Index Cond: (a = ANY ('{NULL,NULL}'::integer[])) +(2 rows) + -- Should return 1s /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IN (null, 1); - a + a --- 1 1 (2 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IN (null, 1); - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------- Index Scan using i_nulltest_a on nulltest t1 (actual rows=2 loops=1) Index Cond: (a = ANY ('{NULL,1}'::integer[])) + Storage Table Read Requests: 1 + Storage Index Read Requests: 1 +(4 rows) + +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE a IN (null, 1); + a +--- + 1 + 1 (2 rows) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE a IN (null, 1); + QUERY PLAN +----------------------------------------------------------------------- + Index Scan using i_nulltest_ba on nulltest t1 (actual rows=2 loops=1) + Index Cond: (a = ANY ('{NULL,1}'::integer[])) + Storage Table Read Requests: 1 + Storage Index Read Requests: 1 +(4 rows) + /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); - a + a --- 1 1 (2 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------- Index Scan using i_nulltest_a on nulltest t1 (actual rows=2 loops=1) Index Cond: (a <= 2) Filter: (ROW(a, b) <= ROW(2, NULL::integer)) -(3 rows) + Storage Table Read Requests: 1 + Storage Index Read Requests: 1 +(5 rows) + +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); + a +--- + 1 + 1 +(2 rows) + +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); + QUERY PLAN +----------------------------------------------------------------------- + Index Scan using i_nulltest_ba on nulltest t1 (actual rows=2 loops=1) + Index Cond: (ROW(a, b) <= ROW(2, NULL::integer)) + Rows Removed by Index Recheck: 2 + Storage Table Read Requests: 1 + Storage Index Read Requests: 1 +(5 rows) -- Should return nulls /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IS NULL; - a + a --- - - + + (2 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IS NULL; - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------- Index Scan using i_nulltest_a on nulltest t1 (actual rows=2 loops=1) Index Cond: (a IS NULL) -(2 rows) + Storage Table Read Requests: 1 + Storage Index Read Requests: 1 +(4 rows) +RESET client_min_messages; +\unset YB_DISABLE_ERROR_PREFIX diff --git a/src/postgres/src/test/regress/expected/yb_index_scan_null_create.out b/src/postgres/src/test/regress/expected/yb_index_scan_null_create.out index bbb1ba9159fe..c54bd983b6d6 100644 --- a/src/postgres/src/test/regress/expected/yb_index_scan_null_create.out +++ b/src/postgres/src/test/regress/expected/yb_index_scan_null_create.out @@ -1,5 +1,9 @@ -- Create tables for the null scan key tests -CREATE TABLE nulltest (a int, b int); +-- +-- As of 2023-06-21, the tables will default to 3 tablets, but in case those +-- defaults change, explicitly set the numbers here. The number of tablets +-- affects the number of requests shown in EXPLAIN DIST. +CREATE TABLE nulltest (a int, b int) SPLIT INTO 3 TABLETS; INSERT INTO nulltest VALUES (null, null), (null, 1), (1, null), (1, 1); -CREATE TABLE nulltest2 (x int, y int); +CREATE TABLE nulltest2 (x int, y int) SPLIT INTO 3 TABLETS; INSERT INTO nulltest2 VALUES (null, null); diff --git a/src/postgres/src/test/regress/expected/yb_index_scan_null_hash.out b/src/postgres/src/test/regress/expected/yb_index_scan_null_hash.out index c28074e2b992..3e20718fcdba 100644 --- a/src/postgres/src/test/regress/expected/yb_index_scan_null_hash.out +++ b/src/postgres/src/test/regress/expected/yb_index_scan_null_hash.out @@ -8,20 +8,24 @@ DROP INDEX IF EXISTS i_nulltest_ba; CREATE INDEX i_nulltest_ba ON nulltest ((b, a) HASH); \i sql/yb_index_scan_null_queries.sql -- Queries for the null scan key tests +SET client_min_messages = DEBUG1; +\set YB_DISABLE_ERROR_PREFIX on -- Should return empty results (actual rows=0) -- The plans should not show any "Recheck" -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a = t2.x; +DEBUG: skipping a scan due to unsatisfiable condition QUERY PLAN ---------------------------------------------------------------------------- Nested Loop (actual rows=0 loops=1) -> Seq Scan on nulltest2 t2 (actual rows=1 loops=1) + Storage Table Read Requests: 3 -> Index Scan using i_nulltest_a on nulltest t1 (actual rows=0 loops=1) Index Cond: (a = t2.x) -(4 rows) +(5 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a <= t2.x; QUERY PLAN @@ -30,11 +34,13 @@ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a <= t2.x; Join Filter: (t1.a <= t2.x) Rows Removed by Join Filter: 4 -> Seq Scan on nulltest2 t2 (actual rows=1 loops=1) + Storage Table Read Requests: 3 -> Materialize (actual rows=4 loops=1) -> YB Seq Scan on nulltest t1 (actual rows=4 loops=1) -(6 rows) + Storage Table Read Requests: 3 +(8 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a BETWEEN t2.x AND t2.x + 2; QUERY PLAN @@ -43,22 +49,26 @@ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a BETWEEN t2.x AND t2.x + 2; Join Filter: ((t1.a >= t2.x) AND (t1.a <= (t2.x + 2))) Rows Removed by Join Filter: 4 -> Seq Scan on nulltest2 t2 (actual rows=1 loops=1) + Storage Table Read Requests: 3 -> Materialize (actual rows=4 loops=1) -> YB Seq Scan on nulltest t1 (actual rows=4 loops=1) -(6 rows) + Storage Table Read Requests: 3 +(8 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON (t1.a, t1.b) = (t2.x, t2.y); +DEBUG: skipping a scan due to unsatisfiable condition QUERY PLAN ----------------------------------------------------------------------------- Nested Loop (actual rows=0 loops=1) -> Seq Scan on nulltest2 t2 (actual rows=1 loops=1) + Storage Table Read Requests: 3 -> Index Scan using i_nulltest_ba on nulltest t1 (actual rows=0 loops=1) Index Cond: ((b = t2.y) AND (a = t2.x)) -(4 rows) +(5 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON (t1.a, t1.b) <= (t2.x, t2.y); QUERY PLAN @@ -67,11 +77,13 @@ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON (t1.a, t1.b) <= (t2.x, t2.y); Join Filter: (ROW(t1.a, t1.b) <= ROW(t2.x, t2.y)) Rows Removed by Join Filter: 4 -> Seq Scan on nulltest2 t2 (actual rows=1 loops=1) + Storage Table Read Requests: 3 -> Materialize (actual rows=4 loops=1) -> YB Seq Scan on nulltest t1 (actual rows=4 loops=1) -(6 rows) + Storage Table Read Requests: 3 +(8 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT * FROM nulltest t1 WHERE (a, b) <= (null, 1); QUERY PLAN @@ -79,9 +91,21 @@ SELECT * FROM nulltest t1 WHERE (a, b) <= (null, 1); YB Seq Scan on nulltest t1 (actual rows=0 loops=1) Filter: (ROW(a, b) <= ROW(NULL::integer, 1)) Rows Removed by Filter: 4 -(3 rows) + Storage Table Read Requests: 3 +(4 rows) + +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT * FROM nulltest t1 WHERE (a, b) <= (null, 1); + QUERY PLAN +---------------------------------------------------- + YB Seq Scan on nulltest t1 (actual rows=0 loops=1) + Filter: (ROW(a, b) <= ROW(NULL::integer, 1)) + Rows Removed by Filter: 4 + Storage Table Read Requests: 3 +(4 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IN (null, null); QUERY PLAN @@ -90,6 +114,18 @@ SELECT a FROM nulltest t1 WHERE a IN (null, null); Index Cond: (a = ANY ('{NULL,NULL}'::integer[])) (2 rows) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE a IN (null, null); + QUERY PLAN +----------------------------------------------------------------------- + Index Scan using i_nulltest_ba on nulltest t1 (actual rows=0 loops=1) + Index Cond: (a = ANY ('{NULL,NULL}'::integer[])) + Rows Removed by Index Recheck: 4 + Storage Table Read Requests: 2 + Storage Index Read Requests: 3 +(5 rows) + -- Should return 1s /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IN (null, 1); @@ -99,15 +135,37 @@ SELECT a FROM nulltest t1 WHERE a IN (null, 1); 1 (2 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IN (null, 1); QUERY PLAN ---------------------------------------------------------------------- Index Scan using i_nulltest_a on nulltest t1 (actual rows=2 loops=1) Index Cond: (a = ANY ('{NULL,1}'::integer[])) + Storage Table Read Requests: 1 + Storage Index Read Requests: 1 +(4 rows) + +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE a IN (null, 1); + a +--- + 1 + 1 (2 rows) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE a IN (null, 1); + QUERY PLAN +----------------------------------------------------------------------- + Index Scan using i_nulltest_ba on nulltest t1 (actual rows=2 loops=1) + Index Cond: (a = ANY ('{NULL,1}'::integer[])) + Rows Removed by Index Recheck: 2 + Storage Table Read Requests: 2 + Storage Index Read Requests: 3 +(5 rows) + /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); a @@ -116,7 +174,7 @@ SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); 1 (2 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); QUERY PLAN @@ -124,7 +182,27 @@ SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); YB Seq Scan on nulltest t1 (actual rows=2 loops=1) Filter: (ROW(a, b) <= ROW(2, NULL::integer)) Rows Removed by Filter: 2 -(3 rows) + Storage Table Read Requests: 3 +(4 rows) + +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); + a +--- + 1 + 1 +(2 rows) + +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); + QUERY PLAN +---------------------------------------------------- + YB Seq Scan on nulltest t1 (actual rows=2 loops=1) + Filter: (ROW(a, b) <= ROW(2, NULL::integer)) + Rows Removed by Filter: 2 + Storage Table Read Requests: 3 +(4 rows) -- Should return nulls /*+ IndexScan(t1) */ @@ -135,11 +213,16 @@ SELECT a FROM nulltest t1 WHERE a IS NULL; (2 rows) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IS NULL; QUERY PLAN ---------------------------------------------------------------------- Index Scan using i_nulltest_a on nulltest t1 (actual rows=2 loops=1) Index Cond: (a IS NULL) -(2 rows) + Storage Table Read Requests: 1 + Storage Index Read Requests: 1 +(4 rows) + +RESET client_min_messages; +\unset YB_DISABLE_ERROR_PREFIX diff --git a/src/postgres/src/test/regress/expected/yb_index_selectivity.out b/src/postgres/src/test/regress/expected/yb_index_selectivity.out index 8ad45c655346..f1721cc7f6a5 100644 --- a/src/postgres/src/test/regress/expected/yb_index_selectivity.out +++ b/src/postgres/src/test/regress/expected/yb_index_selectivity.out @@ -7,10 +7,11 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM airports WHERE iso_region = 'US-CA'; QUERY PLAN ------------------------------------------------------- - Aggregate + Finalize Aggregate -> Index Only Scan using airports_idx2 on airports Index Cond: (iso_region = 'US-CA'::text) -(3 rows) + Partial Aggregate: true +(4 rows) EXPLAIN (COSTS OFF) SELECT gps_code FROM airports WHERE iso_region = 'US-CA' AND type = 'small_airport' ORDER BY ident; diff --git a/src/postgres/src/test/regress/expected/yb_lock_status.out b/src/postgres/src/test/regress/expected/yb_lock_status.out new file mode 100644 index 000000000000..943b7cd27a1b --- /dev/null +++ b/src/postgres/src/test/regress/expected/yb_lock_status.out @@ -0,0 +1,539 @@ +CREATE TABLE yb_lock_tests +( + k1 int, + k2 int, + r1 int, + r2 text, + v1 text, + v2 text, + PRIMARY KEY((k1, k2) HASH, r1,r2) +) SPLIT INTO 2 TABLETS; +CREATE UNIQUE INDEX yb_lock_tests_k1_k2 ON yb_lock_tests (k1,k2) SPLIT INTO 2 TABLETS; +CREATE FUNCTION is_between_now_and_clock_timestamp(input_time timestamptz) +RETURNS boolean +AS $$ +BEGIN + RETURN input_time >= now() AND input_time <= clock_timestamp(); +END; +$$ LANGUAGE plpgsql; +CREATE +OR REPLACE FUNCTION validate_and_return_lock_status(input_relation oid, input_transaction_id uuid, + OUT locktype text, + OUT relation text, OUT mode text[], OUT granted boolean, + OUT fastpath boolean, OUT valid_waitstart boolean, + OUT valid_waitend boolean, OUT has_node boolean, + OUT has_tablet_id boolean, + OUT has_transaction_id boolean, + OUT valid_subtransaction_id boolean, + OUT has_status_tablet_id boolean, + OUT is_explicit boolean, + OUT hash_cols text[], + OUT range_cols text[], OUT attnum smallint, OUT column_id integer, + OUT multiple_rows_locked boolean, OUT num_blocking int4) + RETURNS SETOF record +AS +$$ +DECLARE + difference record; +BEGIN + FOR difference IN + SELECT + l.locktype, + l.database, + l.relation, + l.pid, + array_to_string(l.mode, ','), + l.granted, + l.fastpath, + l.waitstart, + l.waitend, + CASE WHEN l.node IS NOT NULL THEN to_jsonb(l.node) ELSE 'null'::jsonb END AS node, + CASE WHEN l.tablet_id IS NOT NULL THEN to_jsonb(l.tablet_id) ELSE 'null'::jsonb END AS tablet_id, + CASE WHEN l.transaction_id IS NOT NULL THEN to_jsonb(l.transaction_id) ELSE 'null'::jsonb END AS transaction_id, + CASE WHEN l.subtransaction_id IS NOT NULL THEN to_jsonb(l.subtransaction_id) ELSE 'null'::jsonb END AS subtransaction_id, + CASE WHEN l.is_explicit IS NOT NULL THEN to_jsonb(l.is_explicit) ELSE 'null'::jsonb END AS is_explicit, + CASE WHEN l.hash_cols IS NOT NULL OR l.range_cols IS NOT NULL THEN to_jsonb(l.hash_cols || l.range_cols) ELSE 'null'::jsonb END AS cols, + CASE WHEN l.attnum IS NOT NULL THEN to_jsonb(l.attnum) ELSE 'null'::jsonb END AS attnum, + CASE WHEN l.column_id IS NOT NULL THEN to_jsonb(l.column_id) ELSE 'null'::jsonb END AS column_id, + CASE WHEN l.multiple_rows_locked IS NOT NULL THEN to_jsonb(l.multiple_rows_locked) ELSE 'null'::jsonb END AS multiple_rows_locked, + CASE WHEN l.blocked_by IS NOT NULL THEN to_jsonb(l.blocked_by) ELSE 'null'::jsonb END AS blocked_by + FROM + yb_lock_status(null, null) l + EXCEPT + SELECT + p.locktype, + p.database, + p.relation, + p.pid, + p.mode, + p.granted, + p.fastpath, + p.waitstart, + p.waitend, + p.ybdetails->'node', + p.ybdetails->'tablet_id', + p.ybdetails->'transactionid', + p.ybdetails->'subtransaction_id', + p.ybdetails->'is_explicit', + p.ybdetails->'keyrangedetails'->'cols', + p.ybdetails->'keyrangedetails'->'attnum', + p.ybdetails->'keyrangedetails'->'column_id', + p.ybdetails->'keyrangedetails'->'multiple_rows_locked', + p.ybdetails->'blocked_by' + FROM pg_locks p + LOOP + RAISE EXCEPTION 'There is a difference in the output of pg_locks and yb_lock_status. The difference is: %', difference; + END LOOP; + + RETURN QUERY SELECT l.locktype, + l.relation::regclass::text, + l.mode, + l.granted, + l.fastpath, + is_between_now_and_clock_timestamp(l.waitstart) as valid_waitstart, + is_between_now_and_clock_timestamp(l.waitend) as valid_waitend, + CASE WHEN l.node IS NOT NULL THEN true ELSE false END as has_node, + CASE WHEN l.tablet_id IS NOT NULL THEN true ELSE FALSE END as has_tablet_id, + CASE WHEN l.transaction_id IS NOT NULL THEN true ELSE FALSE END as has_transaction_id, + (l.subtransaction_id > 0) as valid_subtransaction_id, + CASE WHEN l.status_tablet_id IS NOT NULL THEN true ELSE FALSE END as has_status_tablet_id, + l.is_explicit, + l.hash_cols, + l.range_cols, + l.attnum, + l.column_id, + l.multiple_rows_locked, + array_length(l.blocked_by, 1) + -- TODO: Add the relation arg when we support querying by relation + FROM yb_lock_status(null, input_transaction_id) l + WHERE l.relation = input_relation + ORDER BY l.relation::regclass::text, l.transaction_id, l.hash_cols NULLS FIRST, + l.range_cols NULLS FIRST, l.column_id NULLS FIRST; +END ; +$$ LANGUAGE plpgsql; +-- Basic queries +SELECT true FROM yb_lock_status(null, null); + bool +------ +(0 rows) + +SELECT true FROM yb_lock_status('yb_lock_tests'::regclass, null); + bool +------ +(0 rows) + +SELECT true FROM yb_lock_status('yb_lock_tests'::regclass::int4, null); + bool +------ +(0 rows) + +SELECT true FROM yb_lock_status(null, 'bogus'); +ERROR: invalid input syntax for type uuid: "bogus" +LINE 1: SELECT true FROM yb_lock_status(null, 'bogus'); + ^ +SELECT true FROM yb_lock_status(null, '10000000-2000-3000-1000-400000000000'); + bool +------ +(0 rows) + +SELECT true FROM yb_lock_status('yb_lock_tests'::regclass, '10000000-2000-3000-1000-400000000000'); + bool +------ +(0 rows) + +-- READ COMMITTED +-- Basic insert +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +INSERT INTO yb_lock_tests VALUES (1, 1, 1, 'one', 1, 1); +INSERT INTO yb_lock_tests VALUES (2, 2, 2, 'two', 2, 2); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | {2} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | {2,"\"two\""} | | | f | +(8 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | + row | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | | f | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2} | {2} | | | t | + row | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {2} | {2,"null"} | | | f | +(8 rows) + +COMMIT; +-- Basic Column Update +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +UPDATE yb_lock_tests SET v1 = 2 WHERE k1 = 1 AND k2 = 1; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | 5 | 4 | f | +(5 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +ABORT; +-- Basic primary key update +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +UPDATE yb_lock_tests SET r1 = 2 WHERE k1 = 1 AND k2 = 1; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {2} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {2,"\"one\""} | | | f | +(6 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | + row | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | | f | + row | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | | f | +(8 rows) + +ABORT; +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +UPDATE yb_lock_tests SET k2 = 2 WHERE k1 = 1 AND k2 = 1; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,2} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,2} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,2} | {1,"\"one\""} | | | f | +(7 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | + row | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | | f | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {2} | | | t | + row | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {2,"null"} | | | f | +(6 rows) + +ABORT; +-- SELECT FOR SHARE +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +SELECT * FROM yb_lock_tests FOR SHARE; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 2 | 2 | 2 | two | 2 | 2 + 1 | 1 | 1 | one | 1 | 1 +(2 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | t | {1,1} | {1,"\"one\""} | | | f | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {2,2} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {2,2} | {2} | | | t | + row | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | t | {2,2} | {2,"\"two\""} | | | f | +(8 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +COMMIT; +-- SELECT FOR KEY SHARE +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +SELECT * FROM yb_lock_tests FOR KEY SHARE; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 2 | 2 | 2 | two | 2 | 2 + 1 | 1 | 1 | one | 1 | 1 +(2 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+-------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | t | {1,1} | {1,"\"one\""} | | | f | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {2,2} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {2,2} | {2} | | | t | + row | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | t | {2,2} | {2,"\"two\""} | | | f | +(8 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +COMMIT; +-- SELECT FOR UPDATE +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +SELECT * FROM yb_lock_tests FOR UPDATE; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 2 | 2 | 2 | two | 2 | 2 + 1 | 1 | 1 | one | 1 | 1 +(2 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | t | {1,1} | {1,"\"one\""} | | | f | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | {2} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | t | {2,2} | {2,"\"two\""} | | | f | +(8 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +ABORT; +-- SERIALIZABLE tests +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 2 | 2 | 2 | two | 2 | 2 + 1 | 1 | 1 | one | 1 | 1 +(2 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | +(2 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +COMMIT; +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests where k1 = 1; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 1 | 1 | 1 | one | 1 | 1 +(1 row) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | +(4 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests_k1_k2 | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests_k1_k2 | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | +(2 rows) + +COMMIT; +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests where k1 = 1 and k2 = 1; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 1 | 1 | 1 | one | 1 | 1 +(1 row) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | +(4 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests_k1_k2 | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | +(3 rows) + +COMMIT; +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests where k1 = 1 and k2 = 1 and r1 = 1 and r2 = 'one'; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 1 | 1 | 1 | one | 1 | 1 +(1 row) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | +(4 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +COMMIT; +-- Foreign key reference +CREATE TABLE fk_reference +( + k1 int, + k2 int, + r1 int, + r2 text, + FOREIGN KEY (k1, k2, r1, r2) REFERENCES yb_lock_tests (k1, k2, r1, r2), + PRIMARY KEY (k1, k2, r1, r2) +); +BEGIN; +INSERT INTO fk_reference VALUES(1,1,1,'one'); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+-------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | t | {1,1} | {1,"\"one\""} | | | f | +(4 rows) + +SELECT * FROM validate_and_return_lock_status('fk_reference'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+--------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+-----------------+--------+-----------+----------------------+-------------- + relation | fk_reference | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | fk_reference | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | fk_reference | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | + keyrange | fk_reference | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,1} | | | t | + row | fk_reference | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,1,"\"one\""} | | | f | +(5 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +COMMIT; +-- When a number of rows are inserted +BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; +INSERT INTO yb_lock_tests SELECT i, i, i, 'value', i, i from generate_series(10, 20) i; +-- yb_lock_status returns entries from all tablets in the table +-- TODO: Remove WHERE when we support the relation argument +SELECT COUNT(DISTINCT(tablet_id)) FROM yb_lock_status('yb_lock_tests'::regclass, null) + WHERE relation = 'yb_lock_tests'::regclass; + count +------- + 2 +(1 row) + +ABORT; +-- Validate attnum +CREATE TABLE attno_test_table(a int, b int, c int, d int, e int, f int, PRIMARY KEY((e, f) hash, b)); +INSERT INTO attno_test_table VALUES (1,1,1,1,1,1); +-- Because the primary key is on (e,f,b), the first attnum 'a' should be the after the primary key in docdb +BEGIN; UPDATE attno_test_table SET a = 2 WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; + relation | attname | locktype | mode | hash_cols | range_cols | attnum | column_id +------------------+---------+----------+----------------------------+-----------+------------+--------+----------- + attno_test_table | | relation | {WEAK_READ,WEAK_WRITE} | | | | + attno_test_table | | keyrange | {WEAK_READ,WEAK_WRITE} | {1,1} | | | + attno_test_table | | row | {WEAK_READ,WEAK_WRITE} | {1,1} | {1} | | + attno_test_table | a | column | {STRONG_READ,STRONG_WRITE} | {1,1} | {1} | 1 | 3 +(4 rows) + +ABORT; +-- 'c' should be the second non-key column in docdb, and the second attnum +BEGIN; UPDATE attno_test_table SET c = 2 WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; + relation | attname | locktype | mode | hash_cols | range_cols | attnum | column_id +------------------+---------+----------+----------------------------+-----------+------------+--------+----------- + attno_test_table | | relation | {WEAK_READ,WEAK_WRITE} | | | | + attno_test_table | | keyrange | {WEAK_READ,WEAK_WRITE} | {1,1} | | | + attno_test_table | | row | {WEAK_READ,WEAK_WRITE} | {1,1} | {1} | | + attno_test_table | c | column | {STRONG_READ,STRONG_WRITE} | {1,1} | {1} | 3 | 4 +(4 rows) + +ABORT; +ALTER TABLE attno_test_table DROP COLUMN c; +-- 'd' is the fourth attnum, which should not have changed when we dropped column 'c' +BEGIN; UPDATE attno_test_table SET d = 2 WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; + relation | attname | locktype | mode | hash_cols | range_cols | attnum | column_id +------------------+---------+----------+----------------------------+-----------+------------+--------+----------- + attno_test_table | | relation | {WEAK_READ,WEAK_WRITE} | | | | + attno_test_table | | keyrange | {WEAK_READ,WEAK_WRITE} | {1,1} | | | + attno_test_table | | row | {WEAK_READ,WEAK_WRITE} | {1,1} | {1} | | + attno_test_table | d | column | {STRONG_READ,STRONG_WRITE} | {1,1} | {1} | 4 | 5 +(4 rows) + +ABORT; +ALTER TABLE attno_test_table ADD COLUMN c text; +-- After re-adding 'c', it should be the last column both in docdb and in pg_attribute +BEGIN; UPDATE attno_test_table SET c = 'test' WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; + relation | attname | locktype | mode | hash_cols | range_cols | attnum | column_id +------------------+---------+----------+----------------------------+-----------+------------+--------+----------- + attno_test_table | | relation | {WEAK_READ,WEAK_WRITE} | | | | + attno_test_table | | keyrange | {WEAK_READ,WEAK_WRITE} | {1,1} | | | + attno_test_table | | row | {WEAK_READ,WEAK_WRITE} | {1,1} | {1} | | + attno_test_table | c | column | {STRONG_READ,STRONG_WRITE} | {1,1} | {1} | 7 | 6 +(4 rows) + +ABORT; +-- Should not see any values +SELECT * FROM validate_and_return_lock_status(null, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +-- TODO: Add support for colocated tables diff --git a/src/postgres/src/test/regress/expected/yb_lock_status_1.out b/src/postgres/src/test/regress/expected/yb_lock_status_1.out new file mode 100644 index 000000000000..bc3c93d50c8d --- /dev/null +++ b/src/postgres/src/test/regress/expected/yb_lock_status_1.out @@ -0,0 +1,560 @@ +CREATE TABLE yb_lock_tests +( + k1 int, + k2 int, + r1 int, + r2 text, + v1 text, + v2 text, + PRIMARY KEY((k1, k2) HASH, r1,r2) +) SPLIT INTO 2 TABLETS; +CREATE UNIQUE INDEX yb_lock_tests_k1_k2 ON yb_lock_tests (k1,k2) SPLIT INTO 2 TABLETS; +CREATE FUNCTION is_between_now_and_clock_timestamp(input_time timestamptz) +RETURNS boolean +AS $$ +BEGIN + RETURN input_time >= now() AND input_time <= clock_timestamp(); +END; +$$ LANGUAGE plpgsql; +CREATE +OR REPLACE FUNCTION validate_and_return_lock_status(input_relation oid, input_transaction_id uuid, + OUT locktype text, + OUT relation text, OUT mode text[], OUT granted boolean, + OUT fastpath boolean, OUT valid_waitstart boolean, + OUT valid_waitend boolean, OUT has_node boolean, + OUT has_tablet_id boolean, + OUT has_transaction_id boolean, + OUT valid_subtransaction_id boolean, + OUT has_status_tablet_id boolean, + OUT is_explicit boolean, + OUT hash_cols text[], + OUT range_cols text[], OUT attnum smallint, OUT column_id integer, + OUT multiple_rows_locked boolean, OUT num_blocking int4) + RETURNS SETOF record +AS +$$ +DECLARE + difference record; +BEGIN + FOR difference IN + SELECT + l.locktype, + l.database, + l.relation, + l.pid, + array_to_string(l.mode, ','), + l.granted, + l.fastpath, + l.waitstart, + l.waitend, + CASE WHEN l.node IS NOT NULL THEN to_jsonb(l.node) ELSE 'null'::jsonb END AS node, + CASE WHEN l.tablet_id IS NOT NULL THEN to_jsonb(l.tablet_id) ELSE 'null'::jsonb END AS tablet_id, + CASE WHEN l.transaction_id IS NOT NULL THEN to_jsonb(l.transaction_id) ELSE 'null'::jsonb END AS transaction_id, + CASE WHEN l.subtransaction_id IS NOT NULL THEN to_jsonb(l.subtransaction_id) ELSE 'null'::jsonb END AS subtransaction_id, + CASE WHEN l.is_explicit IS NOT NULL THEN to_jsonb(l.is_explicit) ELSE 'null'::jsonb END AS is_explicit, + CASE WHEN l.hash_cols IS NOT NULL OR l.range_cols IS NOT NULL THEN to_jsonb(l.hash_cols || l.range_cols) ELSE 'null'::jsonb END AS cols, + CASE WHEN l.attnum IS NOT NULL THEN to_jsonb(l.attnum) ELSE 'null'::jsonb END AS attnum, + CASE WHEN l.column_id IS NOT NULL THEN to_jsonb(l.column_id) ELSE 'null'::jsonb END AS column_id, + CASE WHEN l.multiple_rows_locked IS NOT NULL THEN to_jsonb(l.multiple_rows_locked) ELSE 'null'::jsonb END AS multiple_rows_locked, + CASE WHEN l.blocked_by IS NOT NULL THEN to_jsonb(l.blocked_by) ELSE 'null'::jsonb END AS blocked_by + FROM + yb_lock_status(null, null) l + EXCEPT + SELECT + p.locktype, + p.database, + p.relation, + p.pid, + p.mode, + p.granted, + p.fastpath, + p.waitstart, + p.waitend, + p.ybdetails->'node', + p.ybdetails->'tablet_id', + p.ybdetails->'transactionid', + p.ybdetails->'subtransaction_id', + p.ybdetails->'is_explicit', + p.ybdetails->'keyrangedetails'->'cols', + p.ybdetails->'keyrangedetails'->'attnum', + p.ybdetails->'keyrangedetails'->'column_id', + p.ybdetails->'keyrangedetails'->'multiple_rows_locked', + p.ybdetails->'blocked_by' + FROM pg_locks p + LOOP + RAISE EXCEPTION 'There is a difference in the output of pg_locks and yb_lock_status. The difference is: %', difference; + END LOOP; + + RETURN QUERY SELECT l.locktype, + l.relation::regclass::text, + l.mode, + l.granted, + l.fastpath, + is_between_now_and_clock_timestamp(l.waitstart) as valid_waitstart, + is_between_now_and_clock_timestamp(l.waitend) as valid_waitend, + CASE WHEN l.node IS NOT NULL THEN true ELSE false END as has_node, + CASE WHEN l.tablet_id IS NOT NULL THEN true ELSE FALSE END as has_tablet_id, + CASE WHEN l.transaction_id IS NOT NULL THEN true ELSE FALSE END as has_transaction_id, + (l.subtransaction_id > 0) as valid_subtransaction_id, + CASE WHEN l.status_tablet_id IS NOT NULL THEN true ELSE FALSE END as has_status_tablet_id, + l.is_explicit, + l.hash_cols, + l.range_cols, + l.attnum, + l.column_id, + l.multiple_rows_locked, + array_length(l.blocked_by, 1) + -- TODO: Add the relation arg when we support querying by relation + FROM yb_lock_status(null, input_transaction_id) l + WHERE l.relation = input_relation + ORDER BY l.relation::regclass::text, l.transaction_id, l.hash_cols NULLS FIRST, + l.range_cols NULLS FIRST, l.column_id NULLS FIRST; +END ; +$$ LANGUAGE plpgsql; +-- Basic queries +SELECT true FROM yb_lock_status(null, null); + bool +------ +(0 rows) + +SELECT true FROM yb_lock_status('yb_lock_tests'::regclass, null); + bool +------ +(0 rows) + +SELECT true FROM yb_lock_status('yb_lock_tests'::regclass::int4, null); + bool +------ +(0 rows) + +SELECT true FROM yb_lock_status(null, 'bogus'); +ERROR: invalid input syntax for type uuid: "bogus" +LINE 1: SELECT true FROM yb_lock_status(null, 'bogus'); + ^ +SELECT true FROM yb_lock_status(null, '10000000-2000-3000-1000-400000000000'); + bool +------ +(0 rows) + +SELECT true FROM yb_lock_status('yb_lock_tests'::regclass, '10000000-2000-3000-1000-400000000000'); + bool +------ +(0 rows) + +-- READ COMMITTED +-- Basic insert +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +INSERT INTO yb_lock_tests VALUES (1, 1, 1, 'one', 1, 1); +INSERT INTO yb_lock_tests VALUES (2, 2, 2, 'two', 2, 2); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | 0 | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | 5 | 14 | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | 6 | 15 | f | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | {2} | | | t | + row | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | {2,"\"two\""} | | | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | {2,"\"two\""} | | 0 | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | {2,"\"two\""} | 5 | 14 | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | {2,"\"two\""} | 6 | 15 | f | +(14 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | + row | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | | f | + column | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | 0 | f | + column | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | 13 | f | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2} | {2} | | | t | + row | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2} | {2,"null"} | | | f | + column | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {2} | {2,"null"} | | 0 | f | + column | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {2} | {2,"null"} | | 13 | f | +(12 rows) + +COMMIT; +-- Basic Column Update +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +UPDATE yb_lock_tests SET v1 = 2 WHERE k1 = 1 AND k2 = 1; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | 5 | 14 | f | +(5 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +ABORT; +-- Basic primary key update +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +UPDATE yb_lock_tests SET r1 = 2 WHERE k1 = 1 AND k2 = 1; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {2} | | | t | + row | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {2,"\"one\""} | | | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {2,"\"one\""} | | 0 | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {2,"\"one\""} | 5 | 14 | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {2,"\"one\""} | 6 | 15 | f | +(9 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | + row | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | | f | + row | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | | f | + column | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | 0 | f | + column | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | 13 | f | +(10 rows) + +ABORT; +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +UPDATE yb_lock_tests SET k2 = 2 WHERE k1 = 1 AND k2 = 1; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,2} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,2} | {1} | | | t | + row | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,2} | {1,"\"one\""} | | | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,2} | {1,"\"one\""} | | 0 | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,2} | {1,"\"one\""} | 5 | 14 | f | + column | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1,2} | {1,"\"one\""} | 6 | 15 | f | +(10 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | + row | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,"null"} | | | f | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {2} | | | t | + row | yb_lock_tests_k1_k2 | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {2,"null"} | | | f | + column | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {2,"null"} | | 0 | f | + column | yb_lock_tests_k1_k2 | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {2,"null"} | | 13 | f | +(8 rows) + +ABORT; +-- SELECT FOR SHARE +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +SELECT * FROM yb_lock_tests FOR SHARE; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 2 | 2 | 2 | two | 2 | 2 + 1 | 1 | 1 | one | 1 | 1 +(2 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | t | {1,1} | {1,"\"one\""} | | | f | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {2,2} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {2,2} | {2} | | | t | + row | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | t | {2,2} | {2,"\"two\""} | | | f | +(8 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +COMMIT; +-- SELECT FOR KEY SHARE +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +SELECT * FROM yb_lock_tests FOR KEY SHARE; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 2 | 2 | 2 | two | 2 | 2 + 1 | 1 | 1 | one | 1 | 1 +(2 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+-------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | t | {1,1} | {1,"\"one\""} | | | f | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {2,2} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {2,2} | {2} | | | t | + row | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | t | {2,2} | {2,"\"two\""} | | | f | +(8 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +COMMIT; +-- SELECT FOR UPDATE +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +SELECT * FROM yb_lock_tests FOR UPDATE; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 2 | 2 | 2 | two | 2 | 2 + 1 | 1 | 1 | one | 1 | 1 +(2 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | t | {1,1} | {1,"\"one\""} | | | f | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {2,2} | {2} | | | t | + row | yb_lock_tests | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | t | {2,2} | {2,"\"two\""} | | | f | +(8 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +ABORT; +-- SERIALIZABLE tests +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 2 | 2 | 2 | two | 2 | 2 + 1 | 1 | 1 | one | 1 | 1 +(2 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + relation | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | +(2 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +COMMIT; +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests where k1 = 1; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 1 | 1 | 1 | one | 1 | 1 +(1 row) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | +(4 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests_k1_k2 | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests_k1_k2 | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | +(2 rows) + +COMMIT; +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests where k1 = 1 and k2 = 1; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 1 | 1 | 1 | one | 1 | 1 +(1 row) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | +(4 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests_k1_k2 | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests_k1_k2 | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | yb_lock_tests_k1_k2 | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | +(3 rows) + +COMMIT; +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests where k1 = 1 and k2 = 1 and r1 = 1 and r2 = 'one'; + k1 | k2 | r1 | r2 | v1 | v2 +----+----+----+-----+----+---- + 1 | 1 | 1 | one | 1 | 1 +(1 row) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+---------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {STRONG_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1,"\"one\""} | | | f | +(4 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +COMMIT; +-- Foreign key reference +CREATE TABLE fk_reference +( + k1 int, + k2 int, + r1 int, + r2 text, + FOREIGN KEY (k1, k2, r1, r2) REFERENCES yb_lock_tests (k1, k2, r1, r2), + PRIMARY KEY (k1, k2, r1, r2) +); +BEGIN; +INSERT INTO fk_reference VALUES(1,1,1,'one'); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+---------------+-------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+---------------+--------+-----------+----------------------+-------------- + relation | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | | | | t | + keyrange | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | f | {1,1} | {1} | | | t | + row | yb_lock_tests | {WEAK_READ} | t | f | | t | t | t | t | t | f | t | {1,1} | {1,"\"one\""} | | | f | +(4 rows) + +SELECT * FROM validate_and_return_lock_status('fk_reference'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+--------------+----------------------------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+-----------------+--------+-----------+----------------------+-------------- + relation | fk_reference | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | | | | | t | + keyrange | fk_reference | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | | | | t | + keyrange | fk_reference | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1} | | | t | + keyrange | fk_reference | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,1} | | | t | + row | fk_reference | {WEAK_READ,WEAK_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,1,"\"one\""} | | | f | + column | fk_reference | {STRONG_READ,STRONG_WRITE} | t | f | | t | t | t | t | t | f | f | {1} | {1,1,"\"one\""} | | 0 | f | +(6 rows) + +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +COMMIT; +-- When a number of rows are inserted +BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; +INSERT INTO yb_lock_tests SELECT i, i, i, 'value', i, i from generate_series(10, 20) i; +-- yb_lock_status returns entries from all tablets in the table +-- TODO: Remove WHERE when we support the relation argument +SELECT COUNT(DISTINCT(tablet_id)) FROM yb_lock_status('yb_lock_tests'::regclass, null) + WHERE relation = 'yb_lock_tests'::regclass; + count +------- + 2 +(1 row) + +ABORT; +-- Validate attnum +CREATE TABLE attno_test_table(a int, b int, c int, d int, e int, f int, PRIMARY KEY((e, f) hash, b)); +INSERT INTO attno_test_table VALUES (1,1,1,1,1,1); +-- Because the primary key is on (e,f,b), the first attnum 'a' should be the after the primary key in docdb +BEGIN; UPDATE attno_test_table SET a = 2 WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; + relation | attname | locktype | mode | hash_cols | range_cols | attnum | column_id +------------------+---------+----------+----------------------------+-----------+------------+--------+----------- + attno_test_table | | relation | {WEAK_READ,WEAK_WRITE} | | | | + attno_test_table | | keyrange | {WEAK_READ,WEAK_WRITE} | {1,1} | | | + attno_test_table | | row | {WEAK_READ,WEAK_WRITE} | {1,1} | {1} | | + attno_test_table | a | column | {STRONG_READ,STRONG_WRITE} | {1,1} | {1} | 1 | 13 +(4 rows) + +ABORT; +-- 'c' should be the second non-key column in docdb, and the second attnum +BEGIN; UPDATE attno_test_table SET c = 2 WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; + relation | attname | locktype | mode | hash_cols | range_cols | attnum | column_id +------------------+---------+----------+----------------------------+-----------+------------+--------+----------- + attno_test_table | | relation | {WEAK_READ,WEAK_WRITE} | | | | + attno_test_table | | keyrange | {WEAK_READ,WEAK_WRITE} | {1,1} | | | + attno_test_table | | row | {WEAK_READ,WEAK_WRITE} | {1,1} | {1} | | + attno_test_table | c | column | {STRONG_READ,STRONG_WRITE} | {1,1} | {1} | 3 | 14 +(4 rows) + +ABORT; +ALTER TABLE attno_test_table DROP COLUMN c; +-- 'd' is the fourth attnum, which should not have changed when we dropped column 'c' +BEGIN; UPDATE attno_test_table SET d = 2 WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; + relation | attname | locktype | mode | hash_cols | range_cols | attnum | column_id +------------------+---------+----------+----------------------------+-----------+------------+--------+----------- + attno_test_table | | relation | {WEAK_READ,WEAK_WRITE} | | | | + attno_test_table | | keyrange | {WEAK_READ,WEAK_WRITE} | {1,1} | | | + attno_test_table | | row | {WEAK_READ,WEAK_WRITE} | {1,1} | {1} | | + attno_test_table | d | column | {STRONG_READ,STRONG_WRITE} | {1,1} | {1} | 4 | 15 +(4 rows) + +ABORT; +ALTER TABLE attno_test_table ADD COLUMN c text; +-- After re-adding 'c', it should be the last column both in docdb and in pg_attribute +BEGIN; UPDATE attno_test_table SET c = 'test' WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; + relation | attname | locktype | mode | hash_cols | range_cols | attnum | column_id +------------------+---------+----------+----------------------------+-----------+------------+--------+----------- + attno_test_table | | relation | {WEAK_READ,WEAK_WRITE} | | | | + attno_test_table | | keyrange | {WEAK_READ,WEAK_WRITE} | {1,1} | | | + attno_test_table | | row | {WEAK_READ,WEAK_WRITE} | {1,1} | {1} | | + attno_test_table | c | column | {STRONG_READ,STRONG_WRITE} | {1,1} | {1} | 7 | 16 +(4 rows) + +ABORT; +-- Should not see any values +SELECT * FROM validate_and_return_lock_status(null, null); + locktype | relation | mode | granted | fastpath | valid_waitstart | valid_waitend | has_node | has_tablet_id | has_transaction_id | valid_subtransaction_id | has_status_tablet_id | is_explicit | hash_cols | range_cols | attnum | column_id | multiple_rows_locked | num_blocking +----------+----------+------+---------+----------+-----------------+---------------+----------+---------------+--------------------+-------------------------+----------------------+-------------+-----------+------------+--------+-----------+----------------------+-------------- +(0 rows) + +-- TODO: Add support for colocated tables diff --git a/src/postgres/src/test/regress/expected/yb_pg_hint_plan_test1.out b/src/postgres/src/test/regress/expected/yb_pg_hint_plan_test1.out index 939cfea138d5..f4e90f5bc66a 100644 --- a/src/postgres/src/test/regress/expected/yb_pg_hint_plan_test1.out +++ b/src/postgres/src/test/regress/expected/yb_pg_hint_plan_test1.out @@ -995,15 +995,16 @@ error hint: Index Scan using t1_pkey on t1 Index Cond: (id = $1) InitPlan 1 (returns $0) - -> Aggregate + -> Finalize Aggregate -> YB Seq Scan on t1 v_1 Remote Filter: (id < 10) + Partial Aggregate: true InitPlan 2 (returns $1) -> Finalize Aggregate -> Seq Scan on t1 v_2 Remote Filter: (id < 10) Partial Aggregate: true -(11 rows) +(12 rows) -- /*+BitmapScan(v_2)*/ -- EXPLAIN (COSTS false) SELECT (SELECT max(id) FROM t1 v_1 WHERE id < 10), id FROM v1 WHERE v1.id = (SELECT max(id) FROM t1 v_2 WHERE id < 10); diff --git a/src/postgres/src/test/regress/expected/yb_pg_partition_prune.out b/src/postgres/src/test/regress/expected/yb_pg_partition_prune.out index 9ae684de4ed8..443fbfe7b3da 100644 --- a/src/postgres/src/test/regress/expected/yb_pg_partition_prune.out +++ b/src/postgres/src/test/regress/expected/yb_pg_partition_prune.out @@ -519,15 +519,13 @@ explain (costs off) select * from rlp where a <= 31; Remote Filter: (a <= 31) -> Seq Scan on rlp5_1 Remote Filter: (a <= 31) - -> Seq Scan on rlp5_default - Remote Filter: (a <= 31) -> Seq Scan on rlp_default_10 Remote Filter: (a <= 31) -> Seq Scan on rlp_default_30 Remote Filter: (a <= 31) -> Seq Scan on rlp_default_default Remote Filter: (a <= 31) -(29 rows) +(27 rows) explain (costs off) select * from rlp where a = 1 or a = 7; QUERY PLAN @@ -575,11 +573,7 @@ explain (costs off) select * from rlp where a > 20 and a < 27; Remote Filter: ((a > 20) AND (a < 27)) -> Seq Scan on rlp4_2 Remote Filter: ((a > 20) AND (a < 27)) - -> Seq Scan on rlp4_default - Remote Filter: ((a > 20) AND (a < 27)) - -> Seq Scan on rlp_default_default - Remote Filter: ((a > 20) AND (a < 27)) -(9 rows) +(5 rows) explain (costs off) select * from rlp where a = 29; QUERY PLAN @@ -605,6 +599,16 @@ explain (costs off) select * from rlp where a >= 29; Remote Filter: (a >= 29) (11 rows) +explain (costs off) select * from rlp where a < 1 or (a > 20 and a < 25); + QUERY PLAN +------------------------------------------------------------- + Append + -> Seq Scan on rlp1 + Remote Filter: ((a < 1) OR ((a > 20) AND (a < 25))) + -> Seq Scan on rlp4_1 + Remote Filter: ((a < 1) OR ((a > 20) AND (a < 25))) +(5 rows) + -- redundant clauses are eliminated explain (costs off) select * from rlp where a > 1 and a = 10; /* only default */ QUERY PLAN diff --git a/src/postgres/src/test/regress/expected/yb_pg_rules.out b/src/postgres/src/test/regress/expected/yb_pg_rules.out index 018a17969a41..9f92ba558a90 100644 --- a/src/postgres/src/test/regress/expected/yb_pg_rules.out +++ b/src/postgres/src/test/regress/expected/yb_pg_rules.out @@ -1360,19 +1360,22 @@ pg_indexes| SELECT n.nspname AS schemaname, pg_locks| SELECT l.locktype, l.database, l.relation, - l.page, - l.tuple, - l.virtualxid, - l.transactionid, - l.classid, - l.objid, - l.objsubid, - l.virtualtransaction, + NULL::integer AS page, + NULL::smallint AS tuple, + NULL::text AS virtualxid, + NULL::xid AS transactionid, + NULL::oid AS classid, + NULL::oid AS objid, + NULL::smallint AS objsubid, + NULL::text AS virtualtransaction, l.pid, - l.mode, + array_to_string(l.mode, ','::text) AS mode, l.granted, - l.fastpath - FROM pg_lock_status() l(locktype, database, relation, page, tuple, virtualxid, transactionid, classid, objid, objsubid, virtualtransaction, pid, mode, granted, fastpath); + l.fastpath, + l.waitstart, + l.waitend, + jsonb_build_object('node', l.node, 'transactionid', l.transaction_id, 'subtransaction_id', l.subtransaction_id, 'is_explicit', l.is_explicit, 'tablet_id', l.tablet_id, 'blocked_by', l.blocked_by, 'keyrangedetails', jsonb_build_object('cols', to_jsonb((l.hash_cols || l.range_cols)), 'attnum', l.attnum, 'column_id', l.column_id, 'multiple_rows_locked', l.multiple_rows_locked)) AS ybdetails + FROM yb_lock_status(NULL::oid, NULL::uuid) l(locktype, database, relation, pid, mode, granted, fastpath, waitstart, waitend, node, tablet_id, transaction_id, subtransaction_id, status_tablet_id, is_explicit, hash_cols, range_cols, attnum, column_id, multiple_rows_locked, blocked_by); pg_matviews| SELECT n.nspname AS schemaname, c.relname AS matviewname, pg_get_userbyid(c.relowner) AS matviewowner, diff --git a/src/postgres/src/test/regress/expected/yb_reindex.out b/src/postgres/src/test/regress/expected/yb_reindex.out index b9b1e6256fb4..d32980cd93ca 100644 --- a/src/postgres/src/test/regress/expected/yb_reindex.out +++ b/src/postgres/src/test/regress/expected/yb_reindex.out @@ -233,7 +233,7 @@ SET yb_non_ddl_txn_for_sys_tables_allowed TO on; UPDATE pg_yb_catalog_version SET current_version = current_version + 1; UPDATE pg_yb_catalog_version SET last_breaking_version = current_version; RESET yb_non_ddl_txn_for_sys_tables_allowed; -SELECT current_version = last_breaking_version from pg_yb_catalog_version; +SELECT distinct(current_version = last_breaking_version) from pg_yb_catalog_version; ?column? ---------- t @@ -249,7 +249,7 @@ SET yb_non_ddl_txn_for_sys_tables_allowed TO on; UPDATE pg_yb_catalog_version SET current_version = current_version + 1; UPDATE pg_yb_catalog_version SET last_breaking_version = current_version; RESET yb_non_ddl_txn_for_sys_tables_allowed; -SELECT current_version = last_breaking_version from pg_yb_catalog_version; +SELECT distinct(current_version = last_breaking_version) from pg_yb_catalog_version; ?column? ---------- t @@ -277,7 +277,7 @@ SET yb_non_ddl_txn_for_sys_tables_allowed TO on; UPDATE pg_yb_catalog_version SET current_version = current_version + 1; UPDATE pg_yb_catalog_version SET last_breaking_version = current_version; RESET yb_non_ddl_txn_for_sys_tables_allowed; -SELECT current_version = last_breaking_version from pg_yb_catalog_version; +SELECT distinct(current_version = last_breaking_version) from pg_yb_catalog_version; ?column? ---------- t diff --git a/src/postgres/src/test/regress/expected/yb_ybgin.out b/src/postgres/src/test/regress/expected/yb_ybgin.out index 648b64be2725..ef2693dd35aa 100644 --- a/src/postgres/src/test/regress/expected/yb_ybgin.out +++ b/src/postgres/src/test/regress/expected/yb_ybgin.out @@ -487,9 +487,10 @@ EXPLAIN (costs off) SELECT count(*) FROM partial WHERE v @@ 'b'; EXPLAIN (costs off) SELECT count(*) FROM partial WHERE v @@ 'c'; QUERY PLAN ------------------------------------------------------ - Aggregate + Finalize Aggregate -> Index Only Scan using partial_v_idx on partial -(2 rows) + Partial Aggregate: true +(3 rows) SELECT count(*) FROM partial WHERE v @@ 'c'; ERROR: unsupported ybgin index scan diff --git a/src/postgres/src/test/regress/sql/yb_aggregates.sql b/src/postgres/src/test/regress/sql/yb_aggregates.sql index db3eaaa6f1db..19143a6b7804 100644 --- a/src/postgres/src/test/regress/sql/yb_aggregates.sql +++ b/src/postgres/src/test/regress/sql/yb_aggregates.sql @@ -11,6 +11,11 @@ CREATE TABLE ybaggtest ( float_4 float4, float_8 float8 ); +CREATE INDEX NONCONCURRENTLY ybaggtestindex ON ybaggtest ( + (int_8, int_2) HASH, + float_4 DESC, + int_4 ASC +) INCLUDE (float_8); -- Insert maximum integer values multiple times to force overflow on SUM (both in DocDB and PG). INSERT INTO ybaggtest VALUES (1, 32767, 2147483647, 9223372036854775807, 1.1, 2.2); @@ -19,28 +24,53 @@ INSERT INTO ybaggtest FROM ybaggtest as t CROSS JOIN generate_series(2, 100) as series; -- Verify COUNT(...) returns proper value. -SELECT COUNT(*) FROM ybaggtest; -SELECT COUNT(0) FROM ybaggtest; -SELECT COUNT(NULL) FROM ybaggtest; +\set explain 'EXPLAIN (COSTS OFF)' +\set ss '/*+SeqScan(ybaggtest)*/' +\set ios '/*+IndexOnlyScan(ybaggtest ybaggtestindex)*/' +\set query 'SELECT COUNT(*) FROM ybaggtest' +\set run ':explain :query; :explain :ss :query; :explain :ios :query; :query; :ss :query; :ios :query' +:run; +\set query 'SELECT COUNT(0) FROM ybaggtest' +:run; +\set query 'SELECT COUNT(NULL) FROM ybaggtest' +:run; -- Delete row, verify COUNT(...) returns proper value. DELETE FROM ybaggtest WHERE id = 100; SELECT COUNT(*) FROM ybaggtest; +/*+IndexOnlyScan(ybaggtest ybaggtestindex)*/ +SELECT COUNT(*) FROM ybaggtest; +SELECT COUNT(0) FROM ybaggtest; +/*+IndexOnlyScan(ybaggtest ybaggtestindex)*/ SELECT COUNT(0) FROM ybaggtest; -- Verify selecting different aggs for same column works. -SELECT SUM(int_4), MAX(int_4), MIN(int_4), SUM(int_2), MAX(int_2), MIN(int_2) FROM ybaggtest; +\set query 'SELECT SUM(int_4), MAX(int_4), MIN(int_4), SUM(int_2), MAX(int_2), MIN(int_2) FROM ybaggtest' +:run; -- Verify SUMs are correct for all fields and do not overflow. -SELECT SUM(int_2), SUM(int_4), SUM(int_8), SUM(float_4), SUM(float_8) FROM ybaggtest; +\set query 'SELECT SUM(int_2), SUM(int_4), SUM(int_8), SUM(float_4), SUM(float_8) FROM ybaggtest' +:run; +-- ...and do the same query excluding the int_8 column to test agg pushdown. +-- TODO(#16289): remove this. +\set query 'SELECT SUM(int_2), SUM(int_4), SUM(float_4), SUM(float_8) FROM ybaggtest' +:run; -- Verify shared aggregates work as expected. -SELECT SUM(int_4), SUM(int_4) + 1 FROM ybaggtest; +\set query 'SELECT SUM(int_4), SUM(int_4) + 1 FROM ybaggtest' +:run; -- Verify NaN float values are respected by aggregates. INSERT INTO ybaggtest (id, float_4, float_8) VALUES (101, 'NaN', 'NaN'); -SELECT COUNT(float_4), SUM(float_4), MAX(float_4), MIN(float_4) FROM ybaggtest; -SELECT COUNT(float_8), SUM(float_8), MAX(float_8), MIN(float_8) FROM ybaggtest; +\set query 'SELECT COUNT(float_4), SUM(float_4), MAX(float_4), MIN(float_4) FROM ybaggtest' +:run; +\set query 'SELECT COUNT(float_8), SUM(float_8), MAX(float_8), MIN(float_8) FROM ybaggtest' +:run; + +-- Negative tests - pushdown not supported +EXPLAIN (COSTS OFF) SELECT int_2, COUNT(*), SUM(int_4) FROM ybaggtest GROUP BY int_2; +EXPLAIN (COSTS OFF) SELECT DISTINCT int_4 FROM ybaggtest; +EXPLAIN (COSTS OFF) SELECT COUNT(distinct int_4), SUM(int_4) FROM ybaggtest; -- -- Test NULL rows are handled properly by COUNT. @@ -50,6 +80,9 @@ CREATE TABLE ybaggtest2 ( a int ); +-- Create index where column a is not part of the key. +CREATE INDEX NONCONCURRENTLY ybaggtest2index ON ybaggtest2 ((1)) INCLUDE (a); + -- Insert NULL rows. INSERT INTO ybaggtest2 VALUES (NULL), (NULL), (NULL); @@ -57,75 +90,147 @@ INSERT INTO ybaggtest2 VALUES (NULL), (NULL), (NULL); INSERT INTO ybaggtest2 VALUES (1), (2), (3); -- Verify NULL rows are included in COUNT(*) but not in COUNT(row). -SELECT COUNT(*) FROM ybaggtest2; -SELECT COUNT(a) FROM ybaggtest2; -SELECT COUNT(*), COUNT(a) FROM ybaggtest2; +\set ss '/*+SeqScan(ybaggtest2)*/' +\set ios '/*+IndexOnlyScan(ybaggtest2 ybaggtest2index)*/' +\set query 'SELECT COUNT(*) FROM ybaggtest2' +:run; +-- TODO(#16417): update the following three index only scan explains to have +-- "Partial Aggregate: true" because pushdown will be allowed once the index's +-- constant 1 column is not requested by the aggregate node to the index only +-- scan node when using CP_SMALL_TLIST. +\set query 'SELECT COUNT(a) FROM ybaggtest2' +:run; +\set query 'SELECT COUNT(*), COUNT(a) FROM ybaggtest2' +:run; -- Verify MAX/MIN respect NULL values. -SELECT MAX(a), MIN(a) FROM ybaggtest2; +\set query 'SELECT MAX(a), MIN(a) FROM ybaggtest2' +:run; -- Verify SUM/MAX/MIN work as expected with constant arguments. -SELECT SUM(2), MAX(2), MIN(2) FROM ybaggtest2; -SELECT SUM(NULL::int), MAX(NULL), MIN(NULL) FROM ybaggtest2; +\set query 'SELECT SUM(2), MAX(2), MIN(2) FROM ybaggtest2' +:run; +\set query 'SELECT SUM(NULL::int), MAX(NULL), MIN(NULL) FROM ybaggtest2' +:run; +-- +-- Test column created with default value. +-- CREATE TABLE digit(k INT PRIMARY KEY, v TEXT NOT NULL); INSERT INTO digit VALUES(1, 'one'), (2, 'two'), (3, 'three'), (4, 'four'), (5, 'five'), (6, 'six'); CREATE TABLE test(k INT PRIMARY KEY); ALTER TABLE test ADD v1 int DEFAULT 5; ALTER TABLE test ADD v2 int DEFAULT 10; +CREATE INDEX NONCONCURRENTLY testindex ON test (k) INCLUDE (v1, v2); INSERT INTO test VALUES(1), (2), (3); -SELECT COUNT(*) FROM test; -SELECT COUNT(k) FROM test; -SELECT COUNT(v1) FROM test; -SELECT COUNT(v2) FROM test; -SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v2) AS count FROM test) AS c ON (d.k = c.count); +\set ss '/*+SeqScan(test)*/' +\set ios '/*+IndexOnlyScan(test testindex)*/' +\set query 'SELECT COUNT(*) FROM test' +:run; +\set query 'SELECT COUNT(k) FROM test' +:run; +\set query 'SELECT COUNT(v1) FROM test' +:run; +\set query 'SELECT COUNT(v2) FROM test' +:run; +\set query 'SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v2) AS count FROM test) AS c ON (d.k = c.count)' +:run; INSERT INTO test VALUES(4, NULL, 10), (5, 5, NULL), (6, 5, NULL); -SELECT COUNT(*) FROM test; -SELECT COUNT(k) FROM test; -SELECT COUNT(v1) FROM test; -SELECT COUNT(v2) FROM test; -SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(*) AS count FROM test) AS c ON (d.k = c.count); -SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(k) AS count FROM test) AS c ON (d.k = c.count); -SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v1) AS count FROM test) AS c ON (d.k = c.count); -SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v2) AS count FROM test) AS c ON (d.k = c.count); +\set query 'SELECT COUNT(*) FROM test' +:run; +\set query 'SELECT COUNT(k) FROM test' +:run; +\set query 'SELECT COUNT(v1) FROM test' +:run; +\set query 'SELECT COUNT(v2) FROM test' +:run; +\set query 'SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(*) AS count FROM test) AS c ON (d.k = c.count)' +:run; +\set query 'SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(k) AS count FROM test) AS c ON (d.k = c.count)' +:run; +\set query 'SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v1) AS count FROM test) AS c ON (d.k = c.count)' +:run; +\set query 'SELECT * FROM digit AS d INNER JOIN (SELECT COUNT(v2) AS count FROM test) AS c ON (d.k = c.count)' +:run; DROP TABLE test; DROP TABLE digit; +-- +-- Test dropped column. +-- CREATE TABLE test(K INT PRIMARY KEY, v1 INT NOT NULL, v2 INT NOT NULL); +CREATE INDEX NONCONCURRENTLY testindex ON test (K) INCLUDE (v2); INSERT INTO test VALUES(1, 1, 1), (2, 2, 2), (3, 3, 3); AlTER TABLE test DROP v1; -SELECT MIN(v2) FROM test; -SELECT MAX(v2) FROM test; -SELECT SUM(v2) FROM test; -SELECT COUNT(v2) FROM test; +\set query 'SELECT MIN(v2) FROM test' +:run; +\set query 'SELECT MAX(v2) FROM test' +:run; +\set query 'SELECT SUM(v2) FROM test' +:run; +\set query 'SELECT COUNT(v2) FROM test' +:run; --- For https://github.com/YugaByte/yugabyte-db/issues/10085 +-- +-- Test https://github.com/yugabyte/yugabyte-db/issues/10085: avoid pushdown +-- for certain cases. +-- -- Original test case that had postgres FATAL: CREATE TABLE t1(c0 DECIMAL ); +CREATE INDEX NONCONCURRENTLY t1index ON t1 (c0); INSERT INTO t1(c0) VALUES(0.4632167437031089463062016875483095645904541015625), (0.82173140818865475498711248292238451540470123291015625), (0.69990454445895500246166420765803195536136627197265625), (0.7554730989898816861938257716246880590915679931640625); ALTER TABLE ONLY t1 FORCE ROW LEVEL SECURITY, DISABLE ROW LEVEL SECURITY, NO FORCE ROW LEVEL SECURITY; INSERT INTO t1(c0) VALUES(0.9946693818538820952568357824929989874362945556640625), (0.13653666831997435249235195442452095448970794677734375), (0.3359001510719556993223022800520993769168853759765625), (0.312027233370160583802999099134467542171478271484375); -SELECT SUM(count) FROM (SELECT (CAST((((('[-1962327130,2000870418)'::int4range)*('(-1293215916,183586536]'::int4range)))-((('[-545024026,526859443]'::int4range)*(NULL)))) AS VARCHAR)~current_query())::INT as count FROM ONLY t1) as res; +\set ss '/*+SeqScan(t1)*/' +\set ios '/*+IndexOnlyScan(t1 t1index)*/' +\set query 'SELECT SUM(count) FROM (SELECT (CAST(((((''[-1962327130,2000870418)''::int4range)*(''(-1293215916,183586536]''::int4range)))-(((''[-545024026,526859443]''::int4range)*(NULL)))) AS VARCHAR)~current_query())::INT as count FROM ONLY t1) as res' +:run; -- Simplified test case that had postgres FATAL: CREATE TABLE t2(c0 DECIMAL ); +CREATE INDEX NONCONCURRENTLY t2index ON t2 (c0); INSERT INTO t2 VALUES(1), (2), (3); -SELECT SUM(r) < 6 from (SELECT random() as r from t2) as res; +\set ss '/*+SeqScan(t2)*/' +\set ios '/*+IndexOnlyScan(t2 t2index)*/' +\set query 'SELECT SUM(r) < 6 from (SELECT random() as r from t2) as res' +:run; -- Simplified test case that had postgres FATAL: CREATE TABLE t3(c0 DECIMAL ); +CREATE INDEX NONCONCURRENTLY t3index ON t3 (c0); INSERT INTO t3 VALUES(1), (2), (3); -SELECT SUM(r) from (SELECT (NULL=random())::int as r from t3) as res; +\set ss '/*+SeqScan(t3)*/' +\set ios '/*+IndexOnlyScan(t3 t3index)*/' +\set query 'SELECT SUM(r) from (SELECT (NULL=random())::int as r from t3) as res' +:run; -- Test case that did not have postgres FATAL but showed wrong result 't': CREATE TABLE t4(c0 FLOAT8); +CREATE INDEX NONCONCURRENTLY t4index ON t4 (c0); INSERT INTO t4 VALUES(1), (2), (3); -SELECT SUM(r) = 6 from (SELECT random() as r from t4) as res; +\set ss '/*+SeqScan(t4)*/' +\set ios '/*+IndexOnlyScan(t4 t4index)*/' +\set query 'SELECT SUM(r) = 6 from (SELECT random() as r from t4) as res' +:run; --- Test EXPLAIN with aggregate pushdown -EXPLAIN (COSTS OFF) SELECT COUNT(*), SUM(int_4) FROM ybaggtest; --- Negative tests - pushdown not supported -EXPLAIN (COSTS OFF) SELECT int_2, COUNT(*), SUM(int_4) FROM ybaggtest GROUP BY int_2; -EXPLAIN (COSTS OFF) SELECT DISTINCT int_4 FROM ybaggtest; -EXPLAIN (COSTS OFF) SELECT COUNT(distinct int_4), SUM(int_4) FROM ybaggtest; +-- +-- System tables. +-- +\set ss '/*+SeqScan(pg_type)*/' +\set ios '/*+IndexOnlyScan(pg_type pg_type_typname_nsp_index)*/' +\set query 'SELECT MIN(typnamespace) FROM pg_type' +:run; + +-- +-- Colocation. +-- +CREATE DATABASE co COLOCATION TRUE; +\c co +CREATE TABLE t (i int, j int, k int); +CREATE INDEX NONCONCURRENTLY i ON t (j, k DESC, i); +INSERT INTO t VALUES (1, 2, 3), (4, 5, 6); +\set ss '/*+SeqScan(t)*/' +\set ios '/*+IndexOnlyScan(t i)*/' +\set query 'SELECT SUM(k), AVG(i), COUNT(*), MAX(j) FROM t' +:run; diff --git a/src/postgres/src/test/regress/sql/yb_catalog_version.sql b/src/postgres/src/test/regress/sql/yb_catalog_version.sql index 59a3fcc12750..586c2a1f9ae1 100644 --- a/src/postgres/src/test/regress/sql/yb_catalog_version.sql +++ b/src/postgres/src/test/regress/sql/yb_catalog_version.sql @@ -16,7 +16,7 @@ -- Display the initial catalog version. :display_catalog_version; --- The next CREATE ROLE will increment current_version. +-- The next CREATE ROLE will not increment current_version. CREATE ROLE cv_test_role; :display_catalog_version; @@ -24,7 +24,19 @@ CREATE ROLE cv_test_role; CREATE ROLE cv_test_role; :display_catalog_version; --- The next CREATE DATABASE will increment current_version. +-- The next CREATE ROLE will increment current_version. +CREATE ROLE cv_test_role2 IN ROLE cv_test_role; +:display_catalog_version; + +-- The next CREATE ROLE will increment current_version. +CREATE ROLE cv_test_role3 ADMIN cv_test_role; +:display_catalog_version; + +-- The next CREATE ROLE will increment current_version. +CREATE ROLE cv_test_role4 ROLE cv_test_role2, cv_test_role3; +:display_catalog_version; + +-- The next CREATE DATABASE will not increment current_version. CREATE DATABASE cv_test_database; :display_catalog_version; @@ -318,4 +330,4 @@ CREATE PROCEDURE p1() LANGUAGE PLPGSQL AS $$ BEGIN CALL p2(); END $$; CREATE PROCEDURE p2() LANGUAGE PLPGSQL AS $$ BEGIN CALL proc(2, 2); END $$; CALL p1(); -:display_catalog_version; \ No newline at end of file +:display_catalog_version; diff --git a/src/postgres/src/test/regress/sql/yb_distinct_pushdown.sql b/src/postgres/src/test/regress/sql/yb_distinct_pushdown.sql new file mode 100644 index 000000000000..7867bd82e015 --- /dev/null +++ b/src/postgres/src/test/regress/sql/yb_distinct_pushdown.sql @@ -0,0 +1,16 @@ +CREATE TABLE distinct_pushdown_table(r1 INT, r2 INT, PRIMARY KEY(r1 ASC, r2 ASC)); +INSERT INTO distinct_pushdown_table (SELECT 1, i FROM GENERATE_SERIES(1, 1000) AS i); + +-- Disable DISTINCT pushdown +SET yb_enable_distinct_pushdown TO off; + +-- Must pull even duplicate rows without pushdown. Verify that using EXPLAIN ANALYZE +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT DISTINCT r1 FROM distinct_pushdown_table WHERE r1 <= 10; + +-- Enable DISTINCT pushdown +SET yb_enable_distinct_pushdown TO on; + +-- Must pull fewer rows with pushdown. Verify that using EXPLAIN ANALYZE +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT DISTINCT r1 FROM distinct_pushdown_table WHERE r1 <= 10; + +DROP TABLE distinct_pushdown_table; diff --git a/src/postgres/src/test/regress/sql/yb_explicit_row_lock_planning.sql b/src/postgres/src/test/regress/sql/yb_explicit_row_lock_planning.sql new file mode 100644 index 000000000000..ac74725b6466 --- /dev/null +++ b/src/postgres/src/test/regress/sql/yb_explicit_row_lock_planning.sql @@ -0,0 +1,125 @@ +-- +-- YB tests for locking +-- + +CREATE TABLE yb_locks_t (k int PRIMARY KEY); +INSERT INTO yb_locks_t VALUES (1),(2),(3),(4),(5); + +CREATE TABLE yb_locks_t2 (k1 int, k2 int, k3 int, v int, PRIMARY KEY(k1, k2, k3)); +INSERT INTO yb_locks_t2 VALUES (1,2,3,4),(5,6,7,8); + +SET yb_lock_pk_single_rpc TO ON; + +-- Test plain (unlocked case). +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5; +SELECT * FROM yb_locks_t WHERE k=5; + +-- Test single-RPC select+lock (no LockRows node). +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + +-- Test other types of locking. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR SHARE; +SELECT * FROM yb_locks_t WHERE k=5 FOR SHARE; + +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR NO KEY UPDATE; +SELECT * FROM yb_locks_t WHERE k=5 FOR NO KEY UPDATE; + +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR KEY SHARE; +SELECT * FROM yb_locks_t WHERE k=5 FOR KEY SHARE; + +-- Test LockRows node (more RPCs), and scan is unlocked. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t FOR UPDATE; +SELECT * FROM yb_locks_t FOR UPDATE; + +-- Test with multi-column primary key. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2 WHERE k1=1 AND k2=2 AND k3=3 FOR UPDATE; +SELECT * FROM yb_locks_t2 WHERE k1=1 AND k2=2 AND k3=3 FOR UPDATE; + +-- Test with partial column set for primary key (should use LockRows). +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2 WHERE k1=1 AND k2=2 FOR UPDATE; +SELECT * FROM yb_locks_t2 WHERE k1=1 AND k2=2 FOR UPDATE; + +-- Test LockRows node is used for join. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2, yb_locks_t WHERE yb_locks_t2.k1 = yb_locks_t.k FOR UPDATE; +SELECT * FROM yb_locks_t2, yb_locks_t WHERE yb_locks_t2.k1 = yb_locks_t.k FOR UPDATE; + +-- In isolation level SERIALIZABLE, all locks are done during scans. +BEGIN ISOLATION LEVEL SERIALIZABLE; + +-- Test same locking as for REPEATABLE READ (default isolation). +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + +-- Test no LockRows node for sequential scan. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t FOR UPDATE; +SELECT * FROM yb_locks_t FOR UPDATE; + +-- Test no LockRows node for join. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2, yb_locks_t WHERE yb_locks_t2.k1 = yb_locks_t.k FOR UPDATE; +SELECT * FROM yb_locks_t2, yb_locks_t WHERE yb_locks_t2.k1 = yb_locks_t.k FOR UPDATE; + +COMMIT; + +-- Test with single-RPC select+lock turned off. +SET yb_lock_pk_single_rpc TO OFF; + +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + +-- Test that with the yb_lock_pk_single_rpc off, SERIALIZABLE still locks during the scan +-- (no LockRows). +BEGIN ISOLATION LEVEL SERIALIZABLE; +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; +COMMIT; + +SET yb_lock_pk_single_rpc TO ON; + +CREATE INDEX ON yb_locks_t2 (v); + +-- Test with an index. We use a LockRows node for an index. +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2 WHERE v=4 FOR UPDATE; +SELECT * FROM yb_locks_t2 WHERE v=4 FOR UPDATE; + +-- Isolation level SERIALIZABLE still locks with the scan though (no LockRows). +BEGIN ISOLATION LEVEL SERIALIZABLE; + +EXPLAIN (COSTS OFF) +SELECT * FROM yb_locks_t2 WHERE v=4 FOR UPDATE; +SELECT * FROM yb_locks_t2 WHERE v=4 FOR UPDATE; + +COMMIT; + +-- Test partitions. +CREATE TABLE yb_locks_partition (a char PRIMARY KEY) PARTITION BY LIST (a); +CREATE TABLE yb_locks_partition_default PARTITION OF yb_locks_partition DEFAULT; +CREATE TABLE yb_locks_partition_a PARTITION OF yb_locks_partition FOR VALUES IN ('a'); + +EXPLAIN (COSTS OFF) SELECT * FROM yb_locks_partition WHERE a = 'a' FOR UPDATE; +EXPLAIN (COSTS OFF) SELECT * FROM yb_locks_partition WHERE a = 'b' FOR UPDATE; + +BEGIN ISOLATION LEVEL SERIALIZABLE; +EXPLAIN (COSTS OFF) SELECT * FROM yb_locks_partition WHERE a = 'a' FOR UPDATE; +COMMIT; + +-- Test JSON. +EXPLAIN (COSTS OFF, FORMAT JSON) +SELECT * FROM yb_locks_t WHERE k=5 FOR UPDATE; + +DROP TABLE yb_locks_t, yb_locks_t2, yb_locks_partition; diff --git a/src/postgres/src/test/regress/sql/yb_get_range_split_clause.sql b/src/postgres/src/test/regress/sql/yb_get_range_split_clause.sql index bf2e96b78f8b..f40f64361276 100644 --- a/src/postgres/src/test/regress/sql/yb_get_range_split_clause.sql +++ b/src/postgres/src/test/regress/sql/yb_get_range_split_clause.sql @@ -439,6 +439,54 @@ CREATE TABLE tbl_with_include_clause ( SELECT yb_get_range_split_clause('tbl_with_include_clause'::regclass); DROP TABLE tbl_with_include_clause; +-- Test index SPLIT AT with INCLUDE clause +CREATE TABLE test_tbl ( + a TEXT, + b DOUBLE PRECISION, + PRIMARY KEY (a ASC) +) SPLIT AT VALUES(('11')); +CREATE INDEX test_idx on test_tbl( + b ASC +) INCLUDE (a) SPLIT AT VALUES ((1.1)); +SELECT yb_get_range_split_clause('test_idx'::regclass); +DROP INDEX test_idx; +DROP TABLE test_tbl; + +-- Test index SPLIT AT with INCLUDE clause +CREATE TABLE test_tbl ( + a INT, + b TEXT, + c CHAR, + d BOOLEAN, + e REAL, + PRIMARY KEY (a ASC, b ASC) +) SPLIT AT VALUES((1, '111')); +CREATE INDEX test_idx on test_tbl( + a ASC, + b ASC, + c ASC +) INCLUDE (d, e) SPLIT AT VALUES ((1, '11', '1')); +SELECT yb_get_range_split_clause('test_idx'::regclass); +DROP INDEX test_idx; +DROP TABLE test_tbl; + +-- Test index SPLIT AT with INCLUDE clause +CREATE TABLE test_tbl ( + a INT, + b INT, + c INT, + d INT, + e INT, + PRIMARY KEY (a DESC, b ASC) +) SPLIT AT VALUES((1, 1)); +CREATE INDEX test_idx on test_tbl( + a ASC, + b DESC +) INCLUDE (c, d, e) SPLIT AT VALUES ((1, 1)); +SELECT yb_get_range_split_clause('test_idx'::regclass); +DROP INDEX test_idx; +DROP TABLE test_tbl; + -- Test secondary index with duplicate columns and backwards order columns CREATE TABLE test_tbl ( k1 INT, diff --git a/src/postgres/src/test/regress/sql/yb_hdr_percentile.sql b/src/postgres/src/test/regress/sql/yb_hdr_percentile.sql new file mode 100644 index 000000000000..606a7722f9aa --- /dev/null +++ b/src/postgres/src/test/regress/sql/yb_hdr_percentile.sql @@ -0,0 +1,18 @@ +-- Testing hdr percentile function +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', 50); +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', 90); +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', 99); +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', 0); +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', -0.1); +SELECT yb_get_percentile('[{"[384.0,409.6)": 5}, {"[768.0,819.2)": 4}, {"[1126.4,1228.8)": 1}]', 8892.3); +SELECT yb_get_percentile('[]', 90); +SELECT yb_get_percentile('[{"[-2.8,2e4)": 8}]', -10); +SELECT yb_get_percentile('[{"[-2.8,2e4)": 8}]', 90); +SELECT yb_get_percentile('[{"[-1.1e-3,5000)": 5}, {}]', -10); +SELECT yb_get_percentile('[{"[-1.1e-3,5000)": 5}, {}]', 100); +SELECT yb_get_percentile('[{"[12,)": 8}]', 0); +SELECT yb_get_percentile('[{"[12,)": 8}]', 50); +SELECT yb_get_percentile('[{"[12,)": 8}]', 100); +SELECT yb_get_percentile('[{"[1,2)": 5}, {"[3,4)": 4}, {"[5,)": 1}]', 50); +SELECT yb_get_percentile('[{"[1,2)": 5}, {"[3,4)": 4}, {"[5,)": 1}]', 90); +SELECT yb_get_percentile('[{"[1,2)": 5}, {"[3,4)": 4}, {"[5,)": 1}]', 99); diff --git a/src/postgres/src/test/regress/sql/yb_index_scan.sql b/src/postgres/src/test/regress/sql/yb_index_scan.sql index d5e3c86f901d..f3917f790989 100644 --- a/src/postgres/src/test/regress/sql/yb_index_scan.sql +++ b/src/postgres/src/test/regress/sql/yb_index_scan.sql @@ -432,3 +432,20 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_k EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) /*+ IndexOnlyScan(t_kv t_kv_pkey) */ SELECT 1 FROM t_kv; DROP TABLE t_kv; + +-- Test index SPLIT AT with INCLUDE clause +CREATE TABLE test_tbl ( + a INT, + b INT, + PRIMARY KEY (a ASC) +) SPLIT AT VALUES((1)); +CREATE INDEX test_idx on test_tbl( + b ASC +) INCLUDE (a) SPLIT AT VALUES ((1)); +INSERT INTO test_tbl VALUES (1, 2),(2, 1),(4, 3),(5, 4); +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT a, b FROM test_tbl WHERE a = 4; +SELECT a, b FROM test_tbl WHERE a = 4; +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT a, b FROM test_tbl WHERE b = 4; +SELECT a, b FROM test_tbl WHERE b = 4; +DROP INDEX test_idx; +DROP TABLE test_tbl; diff --git a/src/postgres/src/test/regress/sql/yb_index_scan_null_create.sql b/src/postgres/src/test/regress/sql/yb_index_scan_null_create.sql index d6f88c798425..29ffe7329276 100644 --- a/src/postgres/src/test/regress/sql/yb_index_scan_null_create.sql +++ b/src/postgres/src/test/regress/sql/yb_index_scan_null_create.sql @@ -1,7 +1,11 @@ -- Create tables for the null scan key tests +-- +-- As of 2023-06-21, the tables will default to 3 tablets, but in case those +-- defaults change, explicitly set the numbers here. The number of tablets +-- affects the number of requests shown in EXPLAIN DIST. -CREATE TABLE nulltest (a int, b int); +CREATE TABLE nulltest (a int, b int) SPLIT INTO 3 TABLETS; INSERT INTO nulltest VALUES (null, null), (null, 1), (1, null), (1, 1); -CREATE TABLE nulltest2 (x int, y int); +CREATE TABLE nulltest2 (x int, y int) SPLIT INTO 3 TABLETS; INSERT INTO nulltest2 VALUES (null, null); diff --git a/src/postgres/src/test/regress/sql/yb_index_scan_null_queries.sql b/src/postgres/src/test/regress/sql/yb_index_scan_null_queries.sql index cf0e4ae19494..0b4fce5879cc 100644 --- a/src/postgres/src/test/regress/sql/yb_index_scan_null_queries.sql +++ b/src/postgres/src/test/regress/sql/yb_index_scan_null_queries.sql @@ -1,56 +1,84 @@ -- Queries for the null scan key tests +SET client_min_messages = DEBUG1; +\set YB_DISABLE_ERROR_PREFIX on + -- Should return empty results (actual rows=0) -- The plans should not show any "Recheck" -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a = t2.x; -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a <= t2.x; -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON t1.a BETWEEN t2.x AND t2.x + 2; -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON (t1.a, t1.b) = (t2.x, t2.y); -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) NestLoop(t2 t1) Leading((t2 t1)) */ SELECT * FROM nulltest t1 JOIN nulltest2 t2 ON (t1.a, t1.b) <= (t2.x, t2.y); -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT * FROM nulltest t1 WHERE (a, b) <= (null, 1); -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT * FROM nulltest t1 WHERE (a, b) <= (null, 1); + +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IN (null, null); +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE a IN (null, null); + -- Should return 1s /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IN (null, 1); -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IN (null, 1); +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE a IN (null, 1); + +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE a IN (null, 1); + /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); + +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) +/*+ IndexScan(t1 i_nulltest_ba) */ +SELECT a FROM nulltest t1 WHERE (a, b) <= (2, null); + -- Should return nulls /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IS NULL; -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (ANALYZE, COSTS OFF, DIST ON, TIMING OFF, SUMMARY OFF) /*+ IndexScan(t1) */ SELECT a FROM nulltest t1 WHERE a IS NULL; + +RESET client_min_messages; +\unset YB_DISABLE_ERROR_PREFIX diff --git a/src/postgres/src/test/regress/sql/yb_lock_status.sql b/src/postgres/src/test/regress/sql/yb_lock_status.sql new file mode 100644 index 000000000000..40bd6ebb7925 --- /dev/null +++ b/src/postgres/src/test/regress/sql/yb_lock_status.sql @@ -0,0 +1,263 @@ +CREATE TABLE yb_lock_tests +( + k1 int, + k2 int, + r1 int, + r2 text, + v1 text, + v2 text, + PRIMARY KEY((k1, k2) HASH, r1,r2) +) SPLIT INTO 2 TABLETS; + +CREATE UNIQUE INDEX yb_lock_tests_k1_k2 ON yb_lock_tests (k1,k2) SPLIT INTO 2 TABLETS; + +CREATE FUNCTION is_between_now_and_clock_timestamp(input_time timestamptz) +RETURNS boolean +AS $$ +BEGIN + RETURN input_time >= now() AND input_time <= clock_timestamp(); +END; +$$ LANGUAGE plpgsql; + + +CREATE +OR REPLACE FUNCTION validate_and_return_lock_status(input_relation oid, input_transaction_id uuid, + OUT locktype text, + OUT relation text, OUT mode text[], OUT granted boolean, + OUT fastpath boolean, OUT valid_waitstart boolean, + OUT valid_waitend boolean, OUT has_node boolean, + OUT has_tablet_id boolean, + OUT has_transaction_id boolean, + OUT valid_subtransaction_id boolean, + OUT has_status_tablet_id boolean, + OUT is_explicit boolean, + OUT hash_cols text[], + OUT range_cols text[], OUT attnum smallint, OUT column_id integer, + OUT multiple_rows_locked boolean, OUT num_blocking int4) + RETURNS SETOF record +AS +$$ +DECLARE + difference record; +BEGIN + FOR difference IN + SELECT + l.locktype, + l.database, + l.relation, + l.pid, + array_to_string(l.mode, ','), + l.granted, + l.fastpath, + l.waitstart, + l.waitend, + CASE WHEN l.node IS NOT NULL THEN to_jsonb(l.node) ELSE 'null'::jsonb END AS node, + CASE WHEN l.tablet_id IS NOT NULL THEN to_jsonb(l.tablet_id) ELSE 'null'::jsonb END AS tablet_id, + CASE WHEN l.transaction_id IS NOT NULL THEN to_jsonb(l.transaction_id) ELSE 'null'::jsonb END AS transaction_id, + CASE WHEN l.subtransaction_id IS NOT NULL THEN to_jsonb(l.subtransaction_id) ELSE 'null'::jsonb END AS subtransaction_id, + CASE WHEN l.is_explicit IS NOT NULL THEN to_jsonb(l.is_explicit) ELSE 'null'::jsonb END AS is_explicit, + CASE WHEN l.hash_cols IS NOT NULL OR l.range_cols IS NOT NULL THEN to_jsonb(l.hash_cols || l.range_cols) ELSE 'null'::jsonb END AS cols, + CASE WHEN l.attnum IS NOT NULL THEN to_jsonb(l.attnum) ELSE 'null'::jsonb END AS attnum, + CASE WHEN l.column_id IS NOT NULL THEN to_jsonb(l.column_id) ELSE 'null'::jsonb END AS column_id, + CASE WHEN l.multiple_rows_locked IS NOT NULL THEN to_jsonb(l.multiple_rows_locked) ELSE 'null'::jsonb END AS multiple_rows_locked, + CASE WHEN l.blocked_by IS NOT NULL THEN to_jsonb(l.blocked_by) ELSE 'null'::jsonb END AS blocked_by + FROM + yb_lock_status(null, null) l + EXCEPT + SELECT + p.locktype, + p.database, + p.relation, + p.pid, + p.mode, + p.granted, + p.fastpath, + p.waitstart, + p.waitend, + p.ybdetails->'node', + p.ybdetails->'tablet_id', + p.ybdetails->'transactionid', + p.ybdetails->'subtransaction_id', + p.ybdetails->'is_explicit', + p.ybdetails->'keyrangedetails'->'cols', + p.ybdetails->'keyrangedetails'->'attnum', + p.ybdetails->'keyrangedetails'->'column_id', + p.ybdetails->'keyrangedetails'->'multiple_rows_locked', + p.ybdetails->'blocked_by' + FROM pg_locks p + LOOP + RAISE EXCEPTION 'There is a difference in the output of pg_locks and yb_lock_status. The difference is: %', difference; + END LOOP; + + RETURN QUERY SELECT l.locktype, + l.relation::regclass::text, + l.mode, + l.granted, + l.fastpath, + is_between_now_and_clock_timestamp(l.waitstart) as valid_waitstart, + is_between_now_and_clock_timestamp(l.waitend) as valid_waitend, + CASE WHEN l.node IS NOT NULL THEN true ELSE false END as has_node, + CASE WHEN l.tablet_id IS NOT NULL THEN true ELSE FALSE END as has_tablet_id, + CASE WHEN l.transaction_id IS NOT NULL THEN true ELSE FALSE END as has_transaction_id, + (l.subtransaction_id > 0) as valid_subtransaction_id, + CASE WHEN l.status_tablet_id IS NOT NULL THEN true ELSE FALSE END as has_status_tablet_id, + l.is_explicit, + l.hash_cols, + l.range_cols, + l.attnum, + l.column_id, + l.multiple_rows_locked, + array_length(l.blocked_by, 1) + -- TODO: Add the relation arg when we support querying by relation + FROM yb_lock_status(null, input_transaction_id) l + WHERE l.relation = input_relation + ORDER BY l.relation::regclass::text, l.transaction_id, l.hash_cols NULLS FIRST, + l.range_cols NULLS FIRST, l.column_id NULLS FIRST; +END ; +$$ LANGUAGE plpgsql; + +-- Basic queries +SELECT true FROM yb_lock_status(null, null); +SELECT true FROM yb_lock_status('yb_lock_tests'::regclass, null); +SELECT true FROM yb_lock_status('yb_lock_tests'::regclass::int4, null); +SELECT true FROM yb_lock_status(null, 'bogus'); +SELECT true FROM yb_lock_status(null, '10000000-2000-3000-1000-400000000000'); +SELECT true FROM yb_lock_status('yb_lock_tests'::regclass, '10000000-2000-3000-1000-400000000000'); + +-- READ COMMITTED +-- Basic insert +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +INSERT INTO yb_lock_tests VALUES (1, 1, 1, 'one', 1, 1); +INSERT INTO yb_lock_tests VALUES (2, 2, 2, 'two', 2, 2); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +COMMIT; + +-- Basic Column Update +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +UPDATE yb_lock_tests SET v1 = 2 WHERE k1 = 1 AND k2 = 1; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +ABORT; + +-- Basic primary key update +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +UPDATE yb_lock_tests SET r1 = 2 WHERE k1 = 1 AND k2 = 1; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +ABORT; + +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +UPDATE yb_lock_tests SET k2 = 2 WHERE k1 = 1 AND k2 = 1; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +ABORT; + +-- SELECT FOR SHARE +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +SELECT * FROM yb_lock_tests FOR SHARE; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +COMMIT; + +-- SELECT FOR KEY SHARE +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +SELECT * FROM yb_lock_tests FOR KEY SHARE; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +COMMIT; + +-- SELECT FOR UPDATE +BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED; +SELECT * FROM yb_lock_tests FOR UPDATE; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +ABORT; + +-- SERIALIZABLE tests +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +COMMIT; + +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests where k1 = 1; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +COMMIT; + +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests where k1 = 1 and k2 = 1; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +COMMIT; + +BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; +SELECT * from yb_lock_tests where k1 = 1 and k2 = 1 and r1 = 1 and r2 = 'one'; +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +COMMIT; + +-- Foreign key reference +CREATE TABLE fk_reference +( + k1 int, + k2 int, + r1 int, + r2 text, + FOREIGN KEY (k1, k2, r1, r2) REFERENCES yb_lock_tests (k1, k2, r1, r2), + PRIMARY KEY (k1, k2, r1, r2) +); + +BEGIN; +INSERT INTO fk_reference VALUES(1,1,1,'one'); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests'::regclass, null); +SELECT * FROM validate_and_return_lock_status('fk_reference'::regclass, null); +SELECT * FROM validate_and_return_lock_status('yb_lock_tests_k1_k2'::regclass, null); +COMMIT; + +-- When a number of rows are inserted +BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ; +INSERT INTO yb_lock_tests SELECT i, i, i, 'value', i, i from generate_series(10, 20) i; +-- yb_lock_status returns entries from all tablets in the table +-- TODO: Remove WHERE when we support the relation argument +SELECT COUNT(DISTINCT(tablet_id)) FROM yb_lock_status('yb_lock_tests'::regclass, null) + WHERE relation = 'yb_lock_tests'::regclass; +ABORT; + +-- Validate attnum +CREATE TABLE attno_test_table(a int, b int, c int, d int, e int, f int, PRIMARY KEY((e, f) hash, b)); +INSERT INTO attno_test_table VALUES (1,1,1,1,1,1); + +-- Because the primary key is on (e,f,b), the first attnum 'a' should be the after the primary key in docdb +BEGIN; UPDATE attno_test_table SET a = 2 WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; +ABORT; + +-- 'c' should be the second non-key column in docdb, and the second attnum +BEGIN; UPDATE attno_test_table SET c = 2 WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; +ABORT; + +ALTER TABLE attno_test_table DROP COLUMN c; +-- 'd' is the fourth attnum, which should not have changed when we dropped column 'c' +BEGIN; UPDATE attno_test_table SET d = 2 WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; +ABORT; + +ALTER TABLE attno_test_table ADD COLUMN c text; +-- After re-adding 'c', it should be the last column both in docdb and in pg_attribute +BEGIN; UPDATE attno_test_table SET c = 'test' WHERE e = 1 AND f = 1 AND b = 1; +SELECT l.relation::regclass, a.attname, l.locktype, l.mode, l.hash_cols, l.range_cols, l.attnum, l.column_id +FROM yb_lock_status(null,null) l LEFT JOIN pg_attribute a ON a.attrelid = l.relation AND a.attnum = l.attnum; +ABORT; + +-- Should not see any values +SELECT * FROM validate_and_return_lock_status(null, null); + +-- TODO: Add support for colocated tables + diff --git a/src/postgres/src/test/regress/sql/yb_pg_partition_prune.sql b/src/postgres/src/test/regress/sql/yb_pg_partition_prune.sql index 424213ba5830..d45018efd8f7 100644 --- a/src/postgres/src/test/regress/sql/yb_pg_partition_prune.sql +++ b/src/postgres/src/test/regress/sql/yb_pg_partition_prune.sql @@ -84,6 +84,7 @@ explain (costs off) select * from rlp where a = 1 or b = 'ab'; explain (costs off) select * from rlp where a > 20 and a < 27; explain (costs off) select * from rlp where a = 29; explain (costs off) select * from rlp where a >= 29; +explain (costs off) select * from rlp where a < 1 or (a > 20 and a < 25); -- redundant clauses are eliminated explain (costs off) select * from rlp where a > 1 and a = 10; /* only default */ diff --git a/src/postgres/src/test/regress/sql/yb_reindex.sql b/src/postgres/src/test/regress/sql/yb_reindex.sql index b6fb3f0c8608..8d0a74d68e60 100644 --- a/src/postgres/src/test/regress/sql/yb_reindex.sql +++ b/src/postgres/src/test/regress/sql/yb_reindex.sql @@ -112,7 +112,7 @@ SET yb_non_ddl_txn_for_sys_tables_allowed TO on; UPDATE pg_yb_catalog_version SET current_version = current_version + 1; UPDATE pg_yb_catalog_version SET last_breaking_version = current_version; RESET yb_non_ddl_txn_for_sys_tables_allowed; -SELECT current_version = last_breaking_version from pg_yb_catalog_version; +SELECT distinct(current_version = last_breaking_version) from pg_yb_catalog_version; -- Do update that goes to table but doesn't go to index. UPDATE tmp SET i = 11 WHERE j = -5; -- Enable reads/writes to the index. @@ -123,7 +123,7 @@ SET yb_non_ddl_txn_for_sys_tables_allowed TO on; UPDATE pg_yb_catalog_version SET current_version = current_version + 1; UPDATE pg_yb_catalog_version SET last_breaking_version = current_version; RESET yb_non_ddl_txn_for_sys_tables_allowed; -SELECT current_version = last_breaking_version from pg_yb_catalog_version; +SELECT distinct(current_version = last_breaking_version) from pg_yb_catalog_version; -- Show the corruption. /*+SeqScan(tmp) */ SELECT i FROM tmp WHERE j = -5; @@ -137,7 +137,7 @@ SET yb_non_ddl_txn_for_sys_tables_allowed TO on; UPDATE pg_yb_catalog_version SET current_version = current_version + 1; UPDATE pg_yb_catalog_version SET last_breaking_version = current_version; RESET yb_non_ddl_txn_for_sys_tables_allowed; -SELECT current_version = last_breaking_version from pg_yb_catalog_version; +SELECT distinct(current_version = last_breaking_version) from pg_yb_catalog_version; -- 3. reindex (for temp index) REINDEX INDEX tmp_j_idx; diff --git a/src/postgres/src/test/regress/yb_distinct_pushdown_schedule b/src/postgres/src/test/regress/yb_distinct_pushdown_schedule new file mode 100644 index 000000000000..08faa78d0c2f --- /dev/null +++ b/src/postgres/src/test/regress/yb_distinct_pushdown_schedule @@ -0,0 +1,6 @@ +# src/test/regress/yb_distinct_pushdown_schedule +# +#################################################################################################### +# Postgres Testsuites: This suite includes tests on distinct pushdown. +#################################################################################################### +test: yb_distinct_pushdown diff --git a/src/postgres/src/test/regress/yb_misc_serial_schedule b/src/postgres/src/test/regress/yb_misc_serial_schedule index a6cac427d981..3ad00c6546b2 100644 --- a/src/postgres/src/test/regress/yb_misc_serial_schedule +++ b/src/postgres/src/test/regress/yb_misc_serial_schedule @@ -16,3 +16,4 @@ test: yb_dependency test: yb_create_language test: yb_get_range_split_clause test: yb_create_table_like +test: yb_explicit_row_lock_planning diff --git a/src/postgres/src/test/regress/yb_percentile_schedule b/src/postgres/src/test/regress/yb_percentile_schedule new file mode 100644 index 000000000000..2ce688cd6966 --- /dev/null +++ b/src/postgres/src/test/regress/yb_percentile_schedule @@ -0,0 +1,6 @@ +# src/test/regress/yb_percentile_schedule +# +#################################################################################################### +# Tests for yb_get_percentile jsonb function +#################################################################################################### +test: yb_hdr_percentile diff --git a/src/postgres/src/test/regress/yb_proc_schedule b/src/postgres/src/test/regress/yb_proc_schedule index 0e55aa72f25b..bb9973435be4 100644 --- a/src/postgres/src/test/regress/yb_proc_schedule +++ b/src/postgres/src/test/regress/yb_proc_schedule @@ -8,3 +8,4 @@ test: yb_function test: yb_functional_indexes test: yb_functional_index_multiple_column test: yb_get_current_transaction_priority +test: yb_lock_status