diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 00000000000..c6f1bef64aa
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,31 @@
+name: build
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ test:
+ runs-on:
+ - ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ compiler: [clang, gcc]
+ check_type: [normal, debug]
+ env:
+ LLVM_VER: 10
+ COMPILER: ${{ matrix.compiler }}
+ CHECK_TYPE: ${{ matrix.check_type }}
+ steps:
+ - name: Checkout code into workspace directory
+ uses: actions/checkout@v2
+ - name: Setup prerequisites
+ run: bash ./ci/prerequisites.sh
+ - name: Build
+ run: bash ./ci/build.sh
+ - name: Check
+ run: bash ./ci/check.sh
+ - name: Check output
+ run: bash ./ci/check_output.sh
+ if: ${{ success() || failure() }}
diff --git a/ci/build.sh b/ci/build.sh
new file mode 100644
index 00000000000..f541929e69c
--- /dev/null
+++ b/ci/build.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -eu
+
+if [ $COMPILER = "clang" ]; then
+ export CC=clang-$LLVM_VER
+else
+ export CC=gcc
+fi
+
+# configure & build
+if [ $CHECK_TYPE = "debug" ]; then
+ CFLAGS="-O0" ./configure --enable-debug --enable-cassert --enable-tap-tests --with-icu
+else
+ ./configure --disable-debug --disable-cassert --enable-tap-tests --with-icu
+fi
+
+make -sj4
+cd contrib
+make -sj4
+cd ..
diff --git a/ci/check.sh b/ci/check.sh
new file mode 100644
index 00000000000..faa8c25e84a
--- /dev/null
+++ b/ci/check.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -eu
+
+# unsets limit for coredumps size
+ulimit -c unlimited -S
+# sets a coredump file pattern
+mkdir -p /tmp/cores-$GITHUB_SHA-$TIMESTAMP
+sudo sh -c "echo \"/tmp/cores-$GITHUB_SHA-$TIMESTAMP/%t_%p_%s.core\" > /proc/sys/kernel/core_pattern"
+
+make check-world -j4
diff --git a/ci/check_output.sh b/ci/check_output.sh
new file mode 100644
index 00000000000..ae26cf63d68
--- /dev/null
+++ b/ci/check_output.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+set -eu
+
+status=0
+
+# show diff if it exists
+for f in ` find . -name regression.diffs ` ; do
+ echo "========= Contents of $f"
+ cat $f
+ status=1
+done
+
+# check core dumps if any
+cores=$(find /tmp/cores-$GITHUB_SHA-$TIMESTAMP/ -name '*.core' 2>/dev/null)
+
+if [ -n "$cores" ]; then
+ for corefile in $cores ; do
+ if [[ $corefile != *_3.core ]]; then
+ binary=$(gdb -quiet -core $corefile -batch -ex 'info auxv' | grep AT_EXECFN | perl -pe "s/^.*\"(.*)\"\$/\$1/g")
+ echo dumping $corefile for $binary
+ gdb --batch --quiet -ex "thread apply all bt full" -ex "quit" $binary $corefile
+ status=1
+ fi
+ done
+fi
+
+rm -rf /tmp/cores-$GITHUB_SHA-$TIMESTAMP
+
+exit $status
diff --git a/ci/prerequisites.sh b/ci/prerequisites.sh
new file mode 100644
index 00000000000..b26251b711c
--- /dev/null
+++ b/ci/prerequisites.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -eu
+
+# print the hostname to be able to identify runner by logs
+echo "HOSTNAME=`hostname`"
+TIMESTAMP=$(date +%s)
+echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_ENV
+echo "TIMESTAMP=$TIMESTAMP"
+
+sudo apt-get -y install -qq wget ca-certificates
+
+sudo apt-get update -qq
+
+apt_packages="build-essential flex bison pkg-config libreadline-dev make gdb libipc-run-perl libicu-dev python3 python3-dev python3-pip python3-setuptools python3-testresources"
+
+if [ $COMPILER = "clang" ]; then
+ apt_packages="$apt_packages llvm-$LLVM_VER clang-$LLVM_VER clang-tools-$LLVM_VER"
+fi
+
+# install required packages
+sudo apt-get -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -y install -qq $apt_packages
diff --git a/configure b/configure
index 6db03e4a228..9da3f6a9af1 100755
--- a/configure
+++ b/configure
@@ -628,6 +628,7 @@ ac_includes_default="\
ac_subst_vars='LTLIBOBJS
vpath_build
PG_SYSROOT
+ORIOLEDB_PATCHSET_VERSION
PG_VERSION_NUM
LDFLAGS_EX_BE
PROVE
@@ -6663,6 +6664,99 @@ fi
if test -n "$NOT_THE_CFLAGS"; then
CFLAGS="$CFLAGS -Wno-cast-function-type-strict"
fi
+ if test x"$host_cpu" == x"aarch64"; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -moutline-atomics, for CFLAGS" >&5
+$as_echo_n "checking whether ${CC} supports -moutline-atomics, for CFLAGS... " >&6; }
+if ${pgac_cv_prog_CC_cflags__moutline_atomics+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ pgac_save_CFLAGS=$CFLAGS
+pgac_save_CC=$CC
+CC=${CC}
+CFLAGS="${CFLAGS} -moutline-atomics"
+ac_save_c_werror_flag=$ac_c_werror_flag
+ac_c_werror_flag=yes
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ pgac_cv_prog_CC_cflags__moutline_atomics=yes
+else
+ pgac_cv_prog_CC_cflags__moutline_atomics=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_c_werror_flag=$ac_save_c_werror_flag
+CFLAGS="$pgac_save_CFLAGS"
+CC="$pgac_save_CC"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CC_cflags__moutline_atomics" >&5
+$as_echo "$pgac_cv_prog_CC_cflags__moutline_atomics" >&6; }
+if test x"$pgac_cv_prog_CC_cflags__moutline_atomics" = x"yes"; then
+ CFLAGS="${CFLAGS} -moutline-atomics"
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CXX} supports -moutline-atomics, for CXXFLAGS" >&5
+$as_echo_n "checking whether ${CXX} supports -moutline-atomics, for CXXFLAGS... " >&6; }
+if ${pgac_cv_prog_CXX_cxxflags__moutline_atomics+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ pgac_save_CXXFLAGS=$CXXFLAGS
+pgac_save_CXX=$CXX
+CXX=${CXX}
+CXXFLAGS="${CXXFLAGS} -moutline-atomics"
+ac_save_cxx_werror_flag=$ac_cxx_werror_flag
+ac_cxx_werror_flag=yes
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ pgac_cv_prog_CXX_cxxflags__moutline_atomics=yes
+else
+ pgac_cv_prog_CXX_cxxflags__moutline_atomics=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+CXXFLAGS="$pgac_save_CXXFLAGS"
+CXX="$pgac_save_CXX"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CXX_cxxflags__moutline_atomics" >&5
+$as_echo "$pgac_cv_prog_CXX_cxxflags__moutline_atomics" >&6; }
+if test x"$pgac_cv_prog_CXX_cxxflags__moutline_atomics" = x"yes"; then
+ CXXFLAGS="${CXXFLAGS} -moutline-atomics"
+fi
+
+
+ fi
elif test "$ICC" = yes; then
# Intel's compiler has a bug/misoptimization in checking for
# division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS.
@@ -15263,7 +15357,7 @@ fi
LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
-for ac_func in backtrace_symbols copyfile copy_file_range getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l
+for ac_func in backtrace_symbols copyfile copy_file_range getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
@@ -19203,6 +19297,10 @@ _ACEOF
+# Needed to check postgresql patches git tag during orioledb extension build
+ORIOLEDB_PATCHSET_VERSION=`git describe --tags | cut -d'_' -f2`
+
+
# If we are inserting PG_SYSROOT into CPPFLAGS, do so symbolically not
# literally, so that it's possible to override it at build time using
# a command like "make ... PG_SYSROOT=path". This has to be done after
diff --git a/configure.ac b/configure.ac
index 7531366b758..c1531abdd38 100644
--- a/configure.ac
+++ b/configure.ac
@@ -580,6 +580,10 @@ if test "$GCC" = yes -a "$ICC" = no; then
if test -n "$NOT_THE_CFLAGS"; then
CFLAGS="$CFLAGS -Wno-cast-function-type-strict"
fi
+ if test x"$host_cpu" == x"aarch64"; then
+ PGAC_PROG_CC_CFLAGS_OPT([-moutline-atomics])
+ PGAC_PROG_CXX_CFLAGS_OPT([-moutline-atomics])
+ fi
elif test "$ICC" = yes; then
# Intel's compiler has a bug/misoptimization in checking for
# division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS.
@@ -1758,7 +1762,6 @@ AC_CHECK_FUNCS(m4_normalize([
memset_s
posix_fallocate
ppoll
- pthread_is_threaded_np
setproctitle
setproctitle_fast
strchrnul
@@ -2447,6 +2450,10 @@ $AWK '{printf "%d%04d", $1, $2}'`"]
AC_DEFINE_UNQUOTED(PG_VERSION_NUM, $PG_VERSION_NUM, [PostgreSQL version as a number])
AC_SUBST(PG_VERSION_NUM)
+# Needed to check postgresql patches git tag during orioledb extension build
+[ORIOLEDB_PATCHSET_VERSION=`git describe --tags | cut -d'_' -f2`]
+AC_SUBST(ORIOLEDB_PATCHSET_VERSION)
+
# If we are inserting PG_SYSROOT into CPPFLAGS, do so symbolically not
# literally, so that it's possible to override it at build time using
# a command like "make ... PG_SYSROOT=path". This has to be done after
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 34990c5cea3..ed4497f9620 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -994,7 +994,7 @@ heap_entry_is_visible(BtreeCheckState *state, ItemPointer tid)
TupleTableSlot *slot = table_slot_create(state->heaprel, NULL);
tid_visible = table_tuple_fetch_row_version(state->heaprel,
- tid, state->snapshot, slot);
+ PointerGetDatum(tid), state->snapshot, slot);
if (slot != NULL)
ExecDropSingleTupleTableSlot(slot);
diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c
index f8a1061abb9..7873118d112 100644
--- a/contrib/bloom/blinsert.c
+++ b/contrib/bloom/blinsert.c
@@ -172,7 +172,7 @@ blbuildempty(Relation index)
*/
bool
blinsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -189,6 +189,7 @@ blinsert(Relation index, Datum *values, bool *isnull,
BlockNumber blkno = InvalidBlockNumber;
OffsetNumber nStart;
GenericXLogState *state;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
"Bloom insert temporary context",
diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
index fba3ba77711..b9aaca16fa2 100644
--- a/contrib/bloom/bloom.h
+++ b/contrib/bloom/bloom.h
@@ -189,7 +189,7 @@ extern bool blvalidate(Oid opclassoid);
/* index access method interface functions */
extern bool blinsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index 6836129c90d..9b72303c895 100644
--- a/contrib/bloom/blutils.c
+++ b/contrib/bloom/blutils.c
@@ -131,7 +131,8 @@ blhandler(PG_FUNCTION_ARGS)
amroutine->ambuild = blbuild;
amroutine->ambuildempty = blbuildempty;
- amroutine->aminsert = blinsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = blinsert;
amroutine->aminsertcleanup = NULL;
amroutine->ambulkdelete = blbulkdelete;
amroutine->amvacuumcleanup = blvacuumcleanup;
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index 38a539dad1b..cff8b945297 100644
--- a/contrib/pageinspect/heapfuncs.c
+++ b/contrib/pageinspect/heapfuncs.c
@@ -368,6 +368,7 @@ tuple_data_split_internal(Oid relid, char *tupdata,
*/
if (VARATT_IS_EXTERNAL(tupdata + off) &&
!VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
+ !VARATT_IS_EXTERNAL_ORIOLEDB(tupdata + off) &&
!VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
diff --git a/contrib/test_decoding/test_decoding.c b/contrib/test_decoding/test_decoding.c
index 7c50d139698..02d5c2e07da 100644
--- a/contrib/test_decoding/test_decoding.c
+++ b/contrib/test_decoding/test_decoding.c
@@ -578,7 +578,7 @@ tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple, bool skip_
/* print data */
if (isnull)
appendStringInfoString(s, "null");
- else if (typisvarlena && VARATT_IS_EXTERNAL_ONDISK(origval))
+ else if (typisvarlena && (VARATT_IS_EXTERNAL_ONDISK(origval) || VARATT_IS_EXTERNAL_ORIOLEDB(origval)))
appendStringInfoString(s, "unchanged-toast-datum");
else if (!typisvarlena)
print_literal(s, typid,
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index e3c1539a1e3..a33faf4f004 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -141,6 +141,7 @@ typedef struct IndexAmRoutine
ambuild_function ambuild;
ambuildempty_function ambuildempty;
aminsert_function aminsert;
+ aminsert_extended_function aminsertextended;
aminsertcleanup_function aminsertcleanup;
ambulkdelete_function ambulkdelete;
amvacuumcleanup_function amvacuumcleanup;
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index dc039d87566..063364f9702 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -306,6 +306,19 @@ PostgreSQL documentation
+
+
+
+
+
+ Load shared library that performs custom rewind for postgres extension.
+ The path may be full or
+ relative to PKGLIBDIR. File extension is optional. Multiple extensions
+ can be selected by multiple switches.
+
+
+
+
diff --git a/meson.build b/meson.build
index 4c2769dee0a..8ded179b23e 100644
--- a/meson.build
+++ b/meson.build
@@ -153,6 +153,7 @@ cdata.set('PG_VERSION_NUM', pg_version_num)
# PG_VERSION_STR is built later, it depends on compiler test results
cdata.set_quoted('CONFIGURE_ARGS', '')
+orioledb_patchset_version = '22'
###############################################################
@@ -2689,7 +2690,6 @@ func_checks = [
['posix_fallocate'],
['ppoll'],
['pthread_barrier_wait', {'dependencies': [thread_dep]}],
- ['pthread_is_threaded_np', {'dependencies': [thread_dep]}],
['sem_init', {'dependencies': [rt_dep, thread_dep], 'skip': sema_kind != 'unnamed_posix', 'define': false}],
['setproctitle', {'dependencies': [util_dep]}],
['setproctitle_fast'],
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index a00c909681e..8c7ee1c7217 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -44,6 +44,9 @@ VERSION_NUM = @PG_VERSION_NUM@
PACKAGE_URL = @PACKAGE_URL@
+# OrioleDB patchset git tag number
+ORIOLEDB_PATCHSET_VERSION = @ORIOLEDB_PATCHSET_VERSION@
+
# Set top_srcdir, srcdir, and VPATH.
ifdef PGXS
top_srcdir = $(top_builddir)
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 6467bed604a..c1ccef71937 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -273,7 +273,8 @@ brinhandler(PG_FUNCTION_ARGS)
amroutine->ambuild = brinbuild;
amroutine->ambuildempty = brinbuildempty;
- amroutine->aminsert = brininsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = brininsert;
amroutine->aminsertcleanup = brininsertcleanup;
amroutine->ambulkdelete = brinbulkdelete;
amroutine->amvacuumcleanup = brinvacuumcleanup;
@@ -333,7 +334,7 @@ initialize_brin_insertstate(Relation idxRel, IndexInfo *indexInfo)
*/
bool
brininsert(Relation idxRel, Datum *values, bool *nulls,
- ItemPointer heaptid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -348,6 +349,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
MemoryContext tupcxt = NULL;
MemoryContext oldcxt = CurrentMemoryContext;
bool autosummarize = BrinGetAutoSummarize(idxRel);
+ ItemPointer heaptid = DatumGetItemPointer(tupleid);
/*
* If first time through in this statement, initialize the insert state
diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c
index 3547cdba56e..27d0e37607a 100644
--- a/src/backend/access/common/detoast.c
+++ b/src/backend/access/common/detoast.c
@@ -26,9 +26,10 @@ static struct varlena *toast_fetch_datum(struct varlena *attr);
static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
int32 sliceoffset,
int32 slicelength);
-static struct varlena *toast_decompress_datum(struct varlena *attr);
static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
+static ToastFunc o_detoast_func = NULL;
+
/* ----------
* detoast_external_attr -
*
@@ -46,7 +47,7 @@ detoast_external_attr(struct varlena *attr)
{
struct varlena *result;
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr))
{
/*
* This is an external stored plain value
@@ -115,7 +116,7 @@ detoast_external_attr(struct varlena *attr)
struct varlena *
detoast_attr(struct varlena *attr)
{
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr))
{
/*
* This is an externally stored datum --- fetch it back from there
@@ -223,7 +224,14 @@ detoast_attr_slice(struct varlena *attr,
else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit))
slicelength = slicelimit = -1;
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+ {
+ Assert(o_detoast_func != NULL);
+ preslice = o_detoast_func(attr);
+ if (preslice == NULL)
+ elog(ERROR, "unexpected NULL detoast result");
+ }
+ else if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
struct varatt_external toast_pointer;
@@ -332,6 +340,18 @@ detoast_attr_slice(struct varlena *attr,
return result;
}
+void
+register_o_detoast_func(ToastFunc func)
+{
+ o_detoast_func = func;
+}
+
+void
+deregister_o_detoast_func()
+{
+ o_detoast_func = NULL;
+}
+
/* ----------
* toast_fetch_datum -
*
@@ -347,6 +367,17 @@ toast_fetch_datum(struct varlena *attr)
struct varatt_external toast_pointer;
int32 attrsize;
+ if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+ {
+ if (o_detoast_func != NULL)
+ {
+ result = o_detoast_func(attr);
+ if (result == NULL)
+ elog(ERROR, "unexpected NULL detoast result");
+ return result;
+ }
+ }
+
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
@@ -467,7 +498,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
*
* Decompress a compressed version of a varlena datum
*/
-static struct varlena *
+struct varlena *
toast_decompress_datum(struct varlena *attr)
{
ToastCompressionId cmid;
@@ -547,11 +578,17 @@ toast_raw_datum_size(Datum value)
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+ {
+ OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr);
+ result = toasted->raw_size + VARHDRSZ;
+ }
+ else if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
- /* va_rawsize is the size of the original datum -- including header */
struct varatt_external toast_pointer;
+ /* va_rawsize is the size of the original datum -- including header */
+
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = toast_pointer.va_rawsize;
}
@@ -603,7 +640,12 @@ toast_datum_size(Datum value)
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+ {
+ OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr);
+ result = toasted->toasted_size - VARHDRSZ;
+ }
+ else if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* Attribute is stored externally - return the extsize whether
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index 9e3407bf987..a1b8a99b739 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -755,6 +755,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
case TableOidAttributeNumber:
result = ObjectIdGetDatum(tup->t_tableOid);
break;
+ case RowIdAttributeNumber:
+ *isnull = true;
+ result = 0;
+ break;
default:
elog(ERROR, "invalid attnum: %d", attnum);
result = 0; /* keep compiler quiet */
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index d6eb5d85599..963995388bb 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -24,6 +24,7 @@
#include "access/nbtree.h"
#include "access/reloptions.h"
#include "access/spgist_private.h"
+#include "access/tableam.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
#include "commands/tablespace.h"
@@ -1377,7 +1378,7 @@ untransformRelOptions(Datum options)
*/
bytea *
extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
- amoptions_function amoptions)
+ const TableAmRoutine *tableam, amoptions_function amoptions)
{
bytea *options;
bool isnull;
@@ -1399,7 +1400,8 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
case RELKIND_MATVIEW:
- options = heap_reloptions(classForm->relkind, datum, false);
+ options = tableam_reloptions(tableam, classForm->relkind,
+ datum, false);
break;
case RELKIND_PARTITIONED_TABLE:
options = partitioned_table_reloptions(datum, false);
diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c
index 52230f31c68..0717947d689 100644
--- a/src/backend/access/common/toast_compression.c
+++ b/src/backend/access/common/toast_compression.c
@@ -260,7 +260,12 @@ toast_get_compression_id(struct varlena *attr)
* the external toast pointer. If compressed inline, fetch it from the
* toast compression header.
*/
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+ {
+ OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr);
+ cmid = toasted->formatFlags >> ORIOLEDB_EXT_FORMAT_FLAGS_BITS;
+ }
+ else if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
struct varatt_external toast_pointer;
diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c
index 90d0654e629..538a554c917 100644
--- a/src/backend/access/common/toast_internals.c
+++ b/src/backend/access/common/toast_internals.c
@@ -239,7 +239,7 @@ toast_save_datum(Relation rel, Datum value,
{
struct varatt_external old_toast_pointer;
- Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
+ Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal) || VARATT_IS_EXTERNAL_ORIOLEDB(oldexternal));
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
@@ -395,7 +395,7 @@ toast_delete_datum(Relation rel, Datum value, bool is_speculative)
int validIndex;
SnapshotData SnapshotToast;
- if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (!VARATT_IS_EXTERNAL_ONDISK(attr) && !VARATT_IS_EXTERNAL_ORIOLEDB(attr))
return;
/* Must copy to access aligned fields */
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 71f38be90c3..690c744d9a9 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -481,7 +481,7 @@ ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
bool
gininsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -490,6 +490,7 @@ gininsert(Relation index, Datum *values, bool *isnull,
MemoryContext oldCtx;
MemoryContext insertCtx;
int i;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
/* Initialize GinState cache if first call in this statement */
if (ginstate == NULL)
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 5747ae6a4ca..68ce032f150 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -63,7 +63,8 @@ ginhandler(PG_FUNCTION_ARGS)
amroutine->ambuild = ginbuild;
amroutine->ambuildempty = ginbuildempty;
- amroutine->aminsert = gininsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = gininsert;
amroutine->aminsertcleanup = NULL;
amroutine->ambulkdelete = ginbulkdelete;
amroutine->amvacuumcleanup = ginvacuumcleanup;
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index ed4ffa63a77..66b086ee4c7 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -85,7 +85,8 @@ gisthandler(PG_FUNCTION_ARGS)
amroutine->ambuild = gistbuild;
amroutine->ambuildempty = gistbuildempty;
- amroutine->aminsert = gistinsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = gistinsert;
amroutine->aminsertcleanup = NULL;
amroutine->ambulkdelete = gistbulkdelete;
amroutine->amvacuumcleanup = gistvacuumcleanup;
@@ -157,7 +158,7 @@ gistbuildempty(Relation index)
*/
bool
gistinsert(Relation r, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -165,6 +166,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
IndexTuple itup;
MemoryContext oldCxt;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
/* Initialize GISTSTATE cache if first call in this statement */
if (giststate == NULL)
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 01d06b7c328..557c7a3f316 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -83,7 +83,8 @@ hashhandler(PG_FUNCTION_ARGS)
amroutine->ambuild = hashbuild;
amroutine->ambuildempty = hashbuildempty;
- amroutine->aminsert = hashinsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = hashinsert;
amroutine->aminsertcleanup = NULL;
amroutine->ambulkdelete = hashbulkdelete;
amroutine->amvacuumcleanup = hashvacuumcleanup;
@@ -249,7 +250,7 @@ hashbuildCallback(Relation index,
*/
bool
hashinsert(Relation rel, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -257,6 +258,7 @@ hashinsert(Relation rel, Datum *values, bool *isnull,
Datum index_values[1];
bool index_isnull[1];
IndexTuple itup;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
/* convert data to a hash key; on failure, do not insert anything */
if (!_hash_convert_tuple(rel,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 91b20147a00..9d6b0ad10ae 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2669,10 +2669,11 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
}
/*
- * heap_delete - delete a tuple
+ * heap_delete - delete a tuple, optionally fetching it into a slot
*
* See table_tuple_delete() for an explanation of the parameters, except that
- * this routine directly takes a tuple rather than a slot.
+ * this routine directly takes a tuple rather than a slot. Also, we don't
+ * place a lock on the tuple in this function, just fetch the existing version.
*
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@@ -2681,8 +2682,9 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
*/
TM_Result
heap_delete(Relation relation, ItemPointer tid,
- CommandId cid, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, bool changingPart)
+ CommandId cid, Snapshot crosscheck, int options,
+ TM_FailureData *tmfd, bool changingPart,
+ TupleTableSlot *oldSlot)
{
TM_Result result;
TransactionId xid = GetCurrentTransactionId();
@@ -2760,7 +2762,7 @@ heap_delete(Relation relation, ItemPointer tid,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("attempted to delete invisible tuple")));
}
- else if (result == TM_BeingModified && wait)
+ else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
{
TransactionId xwait;
uint16 infomask;
@@ -2901,7 +2903,30 @@ heap_delete(Relation relation, ItemPointer tid,
tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
else
tmfd->cmax = InvalidCommandId;
- UnlockReleaseBuffer(buffer);
+
+ /*
+ * If we're asked to lock the updated tuple, we just fetch the
+ * existing tuple. That let's the caller save some resources on
+ * placing the lock.
+ */
+ if (result == TM_Updated &&
+ (options & TABLE_MODIFY_LOCK_UPDATED))
+ {
+ BufferHeapTupleTableSlot *bslot;
+
+ Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+ bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ bslot->base.tupdata = tp;
+ ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+ oldSlot,
+ buffer);
+ }
+ else
+ {
+ UnlockReleaseBuffer(buffer);
+ }
if (have_tuple_lock)
UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
if (vmbuffer != InvalidBuffer)
@@ -3075,8 +3100,24 @@ heap_delete(Relation relation, ItemPointer tid,
*/
CacheInvalidateHeapTuple(relation, &tp, NULL);
- /* Now we can release the buffer */
- ReleaseBuffer(buffer);
+ /* Fetch the old tuple version if we're asked for that. */
+ if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
+ {
+ BufferHeapTupleTableSlot *bslot;
+
+ Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+ bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+ bslot->base.tupdata = tp;
+ ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+ oldSlot,
+ buffer);
+ }
+ else
+ {
+ /* Now we can release the buffer */
+ ReleaseBuffer(buffer);
+ }
/*
* Release the lmgr tuple lock, if we had it.
@@ -3108,8 +3149,8 @@ simple_heap_delete(Relation relation, ItemPointer tid)
result = heap_delete(relation, tid,
GetCurrentCommandId(true), InvalidSnapshot,
- true /* wait for commit */ ,
- &tmfd, false /* changingPart */ );
+ TABLE_MODIFY_WAIT /* wait for commit */ ,
+ &tmfd, false /* changingPart */ , NULL);
switch (result)
{
case TM_SelfModified:
@@ -3136,10 +3177,11 @@ simple_heap_delete(Relation relation, ItemPointer tid)
}
/*
- * heap_update - replace a tuple
+ * heap_update - replace a tuple, optionally fetching it into a slot
*
* See table_tuple_update() for an explanation of the parameters, except that
- * this routine directly takes a tuple rather than a slot.
+ * this routine directly takes a tuple rather than a slot. Also, we don't
+ * place a lock on the tuple in this function, just fetch the existing version.
*
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@@ -3148,9 +3190,9 @@ simple_heap_delete(Relation relation, ItemPointer tid)
*/
TM_Result
heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
- CommandId cid, Snapshot crosscheck, bool wait,
+ CommandId cid, Snapshot crosscheck, int options,
TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes)
+ TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot)
{
TM_Result result;
TransactionId xid = GetCurrentTransactionId();
@@ -3327,7 +3369,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
/* see below about the "no wait" case */
- Assert(result != TM_BeingModified || wait);
+ Assert(result != TM_BeingModified || (options & TABLE_MODIFY_WAIT));
if (result == TM_Invisible)
{
@@ -3336,7 +3378,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("attempted to update invisible tuple")));
}
- else if (result == TM_BeingModified && wait)
+ else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
{
TransactionId xwait;
uint16 infomask;
@@ -3540,7 +3582,30 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
else
tmfd->cmax = InvalidCommandId;
- UnlockReleaseBuffer(buffer);
+
+ /*
+ * If we're asked to lock the updated tuple, we just fetch the
+ * existing tuple. That let's the caller save some resouces on
+ * placing the lock.
+ */
+ if (result == TM_Updated &&
+ (options & TABLE_MODIFY_LOCK_UPDATED))
+ {
+ BufferHeapTupleTableSlot *bslot;
+
+ Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+ bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ bslot->base.tupdata = oldtup;
+ ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+ oldSlot,
+ buffer);
+ }
+ else
+ {
+ UnlockReleaseBuffer(buffer);
+ }
if (have_tuple_lock)
UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
if (vmbuffer != InvalidBuffer)
@@ -4019,7 +4084,26 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
/* Now we can release the buffer(s) */
if (newbuf != buffer)
ReleaseBuffer(newbuf);
- ReleaseBuffer(buffer);
+
+ /* Fetch the old tuple version if we're asked for that. */
+ if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
+ {
+ BufferHeapTupleTableSlot *bslot;
+
+ Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+ bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+ bslot->base.tupdata = oldtup;
+ ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+ oldSlot,
+ buffer);
+ }
+ else
+ {
+ /* Now we can release the buffer */
+ ReleaseBuffer(buffer);
+ }
+
if (BufferIsValid(vmbuffer_new))
ReleaseBuffer(vmbuffer_new);
if (BufferIsValid(vmbuffer))
@@ -4227,8 +4311,8 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup,
result = heap_update(relation, otid, tup,
GetCurrentCommandId(true), InvalidSnapshot,
- true /* wait for commit */ ,
- &tmfd, &lockmode, update_indexes);
+ TABLE_MODIFY_WAIT /* wait for commit */ ,
+ &tmfd, &lockmode, update_indexes, NULL);
switch (result)
{
case TM_SelfModified:
@@ -4291,12 +4375,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
* tuples.
*
* Output parameters:
- * *tuple: all fields filled in
- * *buffer: set to buffer holding tuple (pinned but not locked at exit)
+ * *slot: BufferHeapTupleTableSlot filled with tuple
* *tmfd: filled in failure cases (see below)
*
* Function results are the same as the ones for table_tuple_lock().
*
+ * If *slot already contains the target tuple, it takes advantage on that by
+ * skipping the ReadBuffer() call.
+ *
* In the failure cases other than TM_Invisible, the routine fills
* *tmfd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact,
* if necessary), and t_cmax (the last only for TM_SelfModified,
@@ -4307,15 +4393,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
* See README.tuplock for a thorough explanation of this mechanism.
*/
TM_Result
-heap_lock_tuple(Relation relation, HeapTuple tuple,
+heap_lock_tuple(Relation relation, ItemPointer tid, TupleTableSlot *slot,
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
- bool follow_updates,
- Buffer *buffer, TM_FailureData *tmfd)
+ bool follow_updates, TM_FailureData *tmfd)
{
TM_Result result;
- ItemPointer tid = &(tuple->t_self);
ItemId lp;
Page page;
+ Buffer buffer;
Buffer vmbuffer = InvalidBuffer;
BlockNumber block;
TransactionId xid,
@@ -4327,8 +4412,24 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
bool skip_tuple_lock = false;
bool have_tuple_lock = false;
bool cleared_all_frozen = false;
+ BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+ HeapTuple tuple = &bslot->base.tupdata;
+
+ Assert(TTS_IS_BUFFERTUPLE(slot));
- *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+ /* Take advantage if slot already contains the relevant tuple */
+ if (!TTS_EMPTY(slot) &&
+ slot->tts_tableOid == relation->rd_id &&
+ ItemPointerCompare(&slot->tts_tid, tid) == 0 &&
+ BufferIsValid(bslot->buffer))
+ {
+ buffer = bslot->buffer;
+ IncrBufferRefCount(buffer);
+ }
+ else
+ {
+ buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+ }
block = ItemPointerGetBlockNumber(tid);
/*
@@ -4337,21 +4438,22 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
* in the middle of changing this, so we'll need to recheck after we have
* the lock.
*/
- if (PageIsAllVisible(BufferGetPage(*buffer)))
+ if (PageIsAllVisible(BufferGetPage(buffer)))
visibilitymap_pin(relation, block, &vmbuffer);
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
- page = BufferGetPage(*buffer);
+ page = BufferGetPage(buffer);
lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
Assert(ItemIdIsNormal(lp));
+ tuple->t_self = *tid;
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
tuple->t_len = ItemIdGetLength(lp);
tuple->t_tableOid = RelationGetRelid(relation);
l3:
- result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
+ result = HeapTupleSatisfiesUpdate(tuple, cid, buffer);
if (result == TM_Invisible)
{
@@ -4380,7 +4482,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
infomask2 = tuple->t_data->t_infomask2;
ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
- LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
/*
* If any subtransaction of the current top transaction already holds
@@ -4532,12 +4634,12 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
{
result = res;
/* recovery code expects to have buffer lock held */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
}
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* Make sure it's still an appropriate lock, else start over.
@@ -4572,7 +4674,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
!HEAP_XMAX_IS_EXCL_LOCKED(infomask))
{
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* Make sure it's still an appropriate lock, else start over.
@@ -4600,7 +4702,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
* No conflict, but if the xmax changed under us in the
* meantime, start over.
*/
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
xwait))
@@ -4612,7 +4714,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
}
else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
{
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* if the xmax changed in the meantime, start over */
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
@@ -4640,7 +4742,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
TransactionIdIsCurrentTransactionId(xwait))
{
/* ... but if the xmax changed in the meantime, start over */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
xwait))
@@ -4662,7 +4764,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
*/
if (require_sleep && (result == TM_Updated || result == TM_Deleted))
{
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
else if (require_sleep)
@@ -4687,7 +4789,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
*/
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
@@ -4713,7 +4815,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
{
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
break;
@@ -4753,7 +4855,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
{
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
break;
@@ -4779,12 +4881,12 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
{
result = res;
/* recovery code expects to have buffer lock held */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
}
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* xwait is done, but if xwait had just locked the tuple then some
@@ -4806,7 +4908,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
* don't check for this in the multixact case, because some
* locker transactions might still be running.
*/
- UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
+ UpdateXmaxHintBits(tuple->t_data, buffer, xwait);
}
}
@@ -4865,9 +4967,9 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
*/
if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
{
- LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
visibilitymap_pin(relation, block, &vmbuffer);
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto l3;
}
@@ -4930,7 +5032,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
cleared_all_frozen = true;
- MarkBufferDirty(*buffer);
+ MarkBufferDirty(buffer);
/*
* XLOG stuff. You might think that we don't need an XLOG record because
@@ -4950,7 +5052,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
XLogRecPtr recptr;
XLogBeginInsert();
- XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
xlrec.xmax = xid;
@@ -4971,7 +5073,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
result = TM_Ok;
out_locked:
- LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
out_unlocked:
if (BufferIsValid(vmbuffer))
@@ -4989,6 +5091,9 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
if (have_tuple_lock)
UnlockTupleTuplock(relation, tid, mode);
+ /* Put the target tuple to the slot */
+ ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
+
return result;
}
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 6f8b1b79298..7d6828db403 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -23,6 +23,7 @@
#include "access/heapam.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
+#include "access/reloptions.h"
#include "access/rewriteheap.h"
#include "access/syncscan.h"
#include "access/tableam.h"
@@ -46,6 +47,12 @@
#include "utils/builtins.h"
#include "utils/rel.h"
+static TM_Result heapam_tuple_lock(Relation relation, Datum tid,
+ Snapshot snapshot, TupleTableSlot *slot,
+ CommandId cid, LockTupleMode mode,
+ LockWaitPolicy wait_policy, uint8 flags,
+ TM_FailureData *tmfd);
+
static void reform_and_rewrite_tuple(HeapTuple tuple,
Relation OldHeap, Relation NewHeap,
Datum *values, bool *isnull, RewriteState rwstate);
@@ -70,6 +77,20 @@ heapam_slot_callbacks(Relation relation)
return &TTSOpsBufferHeapTuple;
}
+static RowRefType
+heapam_get_row_ref_type(Relation rel)
+{
+ return ROW_REF_TID;
+}
+
+static void
+heapam_free_rd_amcache(Relation rel)
+{
+ if (rel->rd_amcache)
+ pfree(rel->rd_amcache);
+ rel->rd_amcache = NULL;
+}
+
/* ------------------------------------------------------------------------
* Index Scan Callbacks for heap AM
@@ -111,7 +132,7 @@ heapam_index_fetch_end(IndexFetchTableData *scan)
static bool
heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot,
bool *call_again, bool *all_dead)
@@ -119,6 +140,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
bool got_heap_tuple;
+ ItemPointer tid = DatumGetItemPointer(tupleid);
Assert(TTS_IS_BUFFERTUPLE(slot));
@@ -179,7 +201,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
static bool
heapam_fetch_row_version(Relation relation,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot)
{
@@ -188,7 +210,7 @@ heapam_fetch_row_version(Relation relation,
Assert(TTS_IS_BUFFERTUPLE(slot));
- bslot->base.tupdata.t_self = *tid;
+ bslot->base.tupdata.t_self = *DatumGetItemPointer(tupleid);
if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
{
/* store in slot, transferring existing pin */
@@ -238,7 +260,7 @@ heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
* ----------------------------------------------------------------------------
*/
-static void
+static TupleTableSlot *
heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
int options, BulkInsertState bistate)
{
@@ -255,6 +277,8 @@ heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
if (shouldFree)
pfree(tuple);
+
+ return slot;
}
static void
@@ -297,36 +321,341 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
pfree(tuple);
}
+/*
+ * ExecCheckTupleVisible -- verify tuple is visible
+ *
+ * It would not be consistent with guarantees of the higher isolation levels to
+ * proceed with avoiding insertion (taking speculative insertion's alternative
+ * path) on the basis of another tuple that is not visible to MVCC snapshot.
+ * Check for the need to raise a serialization failure, and do so as necessary.
+ */
+static void
+ExecCheckTupleVisible(EState *estate,
+ Relation rel,
+ TupleTableSlot *slot)
+{
+ if (!IsolationUsesXactSnapshot())
+ return;
+
+ if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
+ {
+ Datum xminDatum;
+ TransactionId xmin;
+ bool isnull;
+
+ xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ xmin = DatumGetTransactionId(xminDatum);
+
+ /*
+ * We should not raise a serialization failure if the conflict is
+ * against a tuple inserted by our own transaction, even if it's not
+ * visible to our snapshot. (This would happen, for example, if
+ * conflicting keys are proposed for insertion in a single command.)
+ */
+ if (!TransactionIdIsCurrentTransactionId(xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("could not serialize access due to concurrent update")));
+ }
+}
+
+/*
+ * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
+ */
+static void
+ExecCheckTIDVisible(EState *estate,
+ Relation rel,
+ ItemPointer tid,
+ TupleTableSlot *tempSlot)
+{
+ /* Redundantly check isolation level */
+ if (!IsolationUsesXactSnapshot())
+ return;
+
+ if (!table_tuple_fetch_row_version(rel, PointerGetDatum(tid),
+ SnapshotAny, tempSlot))
+ elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
+ ExecCheckTupleVisible(estate, rel, tempSlot);
+ ExecClearTuple(tempSlot);
+}
+
+static inline TupleTableSlot *
+heapam_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ CommandId cid, int options,
+ struct BulkInsertStateData *bistate,
+ List *arbiterIndexes,
+ EState *estate,
+ LockTupleMode lockmode,
+ TupleTableSlot *lockedSlot,
+ TupleTableSlot *tempSlot)
+{
+ Relation rel = resultRelInfo->ri_RelationDesc;
+ uint32 specToken;
+ ItemPointerData conflictTid;
+ bool specConflict;
+ List *recheckIndexes = NIL;
+
+ while (true)
+ {
+ specConflict = false;
+ if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate, &conflictTid,
+ arbiterIndexes))
+ {
+ if (lockedSlot)
+ {
+ TM_Result test;
+ TM_FailureData tmfd;
+ Datum xminDatum;
+ TransactionId xmin;
+ bool isnull;
+
+ /* Determine lock mode to use */
+ lockmode = ExecUpdateLockMode(estate, resultRelInfo);
+
+ /*
+ * Lock tuple for update. Don't follow updates when tuple cannot be
+ * locked without doing so. A row locking conflict here means our
+ * previous conclusion that the tuple is conclusively committed is not
+ * true anymore.
+ */
+ test = table_tuple_lock(rel, PointerGetDatum(&conflictTid),
+ estate->es_snapshot,
+ lockedSlot, estate->es_output_cid,
+ lockmode, LockWaitBlock, 0,
+ &tmfd);
+ switch (test)
+ {
+ case TM_Ok:
+ /* success! */
+ break;
+
+ case TM_Invisible:
+
+ /*
+ * This can occur when a just inserted tuple is updated again in
+ * the same command. E.g. because multiple rows with the same
+ * conflicting key values are inserted.
+ *
+ * This is somewhat similar to the ExecUpdate() TM_SelfModified
+ * case. We do not want to proceed because it would lead to the
+ * same row being updated a second time in some unspecified order,
+ * and in contrast to plain UPDATEs there's no historical behavior
+ * to break.
+ *
+ * It is the user's responsibility to prevent this situation from
+ * occurring. These problems are why the SQL standard similarly
+ * specifies that for SQL MERGE, an exception must be raised in
+ * the event of an attempt to update the same row twice.
+ */
+ xminDatum = slot_getsysattr(lockedSlot,
+ MinTransactionIdAttributeNumber,
+ &isnull);
+ Assert(!isnull);
+ xmin = DatumGetTransactionId(xminDatum);
+
+ if (TransactionIdIsCurrentTransactionId(xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_CARDINALITY_VIOLATION),
+ /* translator: %s is a SQL command name */
+ errmsg("%s command cannot affect row a second time",
+ "ON CONFLICT DO UPDATE"),
+ errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
+
+ /* This shouldn't happen */
+ elog(ERROR, "attempted to lock invisible tuple");
+ break;
+
+ case TM_SelfModified:
+
+ /*
+ * This state should never be reached. As a dirty snapshot is used
+ * to find conflicting tuples, speculative insertion wouldn't have
+ * seen this row to conflict with.
+ */
+ elog(ERROR, "unexpected self-updated tuple");
+ break;
+
+ case TM_Updated:
+ if (IsolationUsesXactSnapshot())
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("could not serialize access due to concurrent update")));
+
+ /*
+ * As long as we don't support an UPDATE of INSERT ON CONFLICT for
+ * a partitioned table we shouldn't reach to a case where tuple to
+ * be lock is moved to another partition due to concurrent update
+ * of the partition key.
+ */
+ Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
+
+ /*
+ * Tell caller to try again from the very start.
+ *
+ * It does not make sense to use the usual EvalPlanQual() style
+ * loop here, as the new version of the row might not conflict
+ * anymore, or the conflicting tuple has actually been deleted.
+ */
+ ExecClearTuple(lockedSlot);
+ return false;
+
+ case TM_Deleted:
+ if (IsolationUsesXactSnapshot())
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("could not serialize access due to concurrent delete")));
+
+ /* see TM_Updated case */
+ Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
+ ExecClearTuple(lockedSlot);
+ return false;
+
+ default:
+ elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
+ }
+
+ /* Success, the tuple is locked. */
+
+ /*
+ * Verify that the tuple is visible to our MVCC snapshot if the current
+ * isolation level mandates that.
+ *
+ * It's not sufficient to rely on the check within ExecUpdate() as e.g.
+ * CONFLICT ... WHERE clause may prevent us from reaching that.
+ *
+ * This means we only ever continue when a new command in the current
+ * transaction could see the row, even though in READ COMMITTED mode the
+ * tuple will not be visible according to the current statement's
+ * snapshot. This is in line with the way UPDATE deals with newer tuple
+ * versions.
+ */
+ ExecCheckTupleVisible(estate, rel, lockedSlot);
+ return NULL;
+ }
+ else
+ {
+ ExecCheckTIDVisible(estate, rel, &conflictTid, tempSlot);
+ return NULL;
+ }
+ }
+
+ /*
+ * Before we start insertion proper, acquire our "speculative
+ * insertion lock". Others can use that to wait for us to decide
+ * if we're going to go ahead with the insertion, instead of
+ * waiting for the whole transaction to complete.
+ */
+ specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
+
+ /* insert the tuple, with the speculative token */
+ heapam_tuple_insert_speculative(rel, slot,
+ estate->es_output_cid,
+ 0,
+ NULL,
+ specToken);
+
+ /* insert index entries for tuple */
+ recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
+ slot, estate, false, true,
+ &specConflict,
+ arbiterIndexes,
+ false);
+
+ /* adjust the tuple's state accordingly */
+ heapam_tuple_complete_speculative(rel, slot,
+ specToken, !specConflict);
+
+ /*
+ * Wake up anyone waiting for our decision. They will re-check
+ * the tuple, see that it's no longer speculative, and wait on our
+ * XID as if this was a regularly inserted tuple all along. Or if
+ * we killed the tuple, they will see it's dead, and proceed as if
+ * the tuple never existed.
+ */
+ SpeculativeInsertionLockRelease(GetCurrentTransactionId());
+
+ /*
+ * If there was a conflict, start from the beginning. We'll do
+ * the pre-check again, which will now find the conflicting tuple
+ * (unless it aborts before we get there).
+ */
+ if (specConflict)
+ {
+ list_free(recheckIndexes);
+ CHECK_FOR_INTERRUPTS();
+ continue;
+ }
+
+ return slot;
+ }
+}
+
static TM_Result
-heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
- Snapshot snapshot, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, bool changingPart)
+heapam_tuple_delete(Relation relation, Datum tupleid, CommandId cid,
+ Snapshot snapshot, Snapshot crosscheck, int options,
+ TM_FailureData *tmfd, bool changingPart,
+ TupleTableSlot *oldSlot)
{
+ TM_Result result;
+ ItemPointer tid = DatumGetItemPointer(tupleid);
+
/*
* Currently Deleting of index tuples are handled at vacuum, in case if
* the storage itself is cleaning the dead tuples by itself, it is the
* time to call the index tuple deletion also.
*/
- return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
+ result = heap_delete(relation, tid, cid, crosscheck, options,
+ tmfd, changingPart, oldSlot);
+
+ /*
+ * If the tuple has been concurrently updated, then get the lock on it.
+ * (Do only if caller asked for this by setting the
+ * TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the
+ * delete should succeed even if there are more concurrent update
+ * attempts.
+ */
+ if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
+ {
+ /*
+ * heapam_tuple_lock() will take advantage of tuple loaded into
+ * oldSlot by heap_delete().
+ */
+ result = heapam_tuple_lock(relation, tupleid, snapshot,
+ oldSlot, cid, LockTupleExclusive,
+ (options & TABLE_MODIFY_WAIT) ?
+ LockWaitBlock :
+ LockWaitSkip,
+ TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+ tmfd);
+
+ if (result == TM_Ok)
+ return TM_Updated;
+ }
+
+ return result;
}
static TM_Result
-heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
+heapam_tuple_update(Relation relation, Datum tupleid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
- bool wait, TM_FailureData *tmfd,
- LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
+ int options, TM_FailureData *tmfd,
+ LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot)
{
bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
TM_Result result;
+ ItemPointer otid = DatumGetItemPointer(tupleid);
/* Update the tuple with table oid */
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
- result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
- tmfd, lockmode, update_indexes);
+ result = heap_update(relation, otid, tuple, cid, crosscheck, options,
+ tmfd, lockmode, update_indexes, oldSlot);
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
/*
@@ -353,19 +682,44 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
if (shouldFree)
pfree(tuple);
+ /*
+ * If the tuple has been concurrently updated, then get the lock on it.
+ * (Do only if caller asked for this by setting the
+ * TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the
+ * update should succeed even if there are more concurrent update
+ * attempts.
+ */
+ if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
+ {
+ /*
+ * heapam_tuple_lock() will take advantage of tuple loaded into
+ * oldSlot by heap_update().
+ */
+ result = heapam_tuple_lock(relation, tupleid, snapshot,
+ oldSlot, cid, *lockmode,
+ (options & TABLE_MODIFY_WAIT) ?
+ LockWaitBlock :
+ LockWaitSkip,
+ TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+ tmfd);
+
+ if (result == TM_Ok)
+ return TM_Updated;
+ }
+
return result;
}
static TM_Result
-heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
+heapam_tuple_lock(Relation relation, Datum tupleid, Snapshot snapshot,
TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
LockWaitPolicy wait_policy, uint8 flags,
TM_FailureData *tmfd)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
TM_Result result;
- Buffer buffer;
HeapTuple tuple = &bslot->base.tupdata;
+ ItemPointer tid = DatumGetItemPointer(tupleid);
bool follow_updates;
follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
@@ -374,9 +728,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
Assert(TTS_IS_BUFFERTUPLE(slot));
tuple_lock_retry:
- tuple->t_self = *tid;
- result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
- follow_updates, &buffer, tmfd);
+ result = heap_lock_tuple(relation, tid, slot, cid, mode, wait_policy,
+ follow_updates, tmfd);
if (result == TM_Updated &&
(flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
@@ -384,8 +737,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
/* Should not encounter speculative tuple on recheck */
Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
- ReleaseBuffer(buffer);
-
if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
{
SnapshotData SnapshotDirty;
@@ -407,6 +758,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
InitDirtySnapshot(SnapshotDirty);
for (;;)
{
+ Buffer buffer = InvalidBuffer;
+
if (ItemPointerIndicatesMovedPartitions(tid))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
@@ -501,7 +854,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
/*
* This is a live tuple, so try to lock it again.
*/
- ReleaseBuffer(buffer);
+ ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
goto tuple_lock_retry;
}
@@ -512,7 +865,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
*/
if (tuple->t_data == NULL)
{
- Assert(!BufferIsValid(buffer));
+ ReleaseBuffer(buffer);
return TM_Deleted;
}
@@ -565,9 +918,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
- /* store in slot, transferring existing pin */
- ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
-
return result;
}
@@ -2583,6 +2933,29 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
}
}
+static bool
+heapam_tuple_is_current(Relation rel, TupleTableSlot *slot)
+{
+ Datum xminDatum;
+ TransactionId xmin;
+ bool isnull;
+
+ xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ xmin = DatumGetTransactionId(xminDatum);
+ return TransactionIdIsCurrentTransactionId(xmin);
+}
+
+static bytea *
+heapam_reloptions(char relkind, Datum reloptions, bool validate)
+{
+ if (relkind == RELKIND_RELATION ||
+ relkind == RELKIND_TOASTVALUE ||
+ relkind == RELKIND_MATVIEW)
+ return heap_reloptions(relkind, reloptions, validate);
+
+ return NULL;
+}
/* ------------------------------------------------------------------------
* Definition of the heap table access method.
@@ -2593,6 +2966,8 @@ static const TableAmRoutine heapam_methods = {
.type = T_TableAmRoutine,
.slot_callbacks = heapam_slot_callbacks,
+ .get_row_ref_type = heapam_get_row_ref_type,
+ .free_rd_amcache = heapam_free_rd_amcache,
.scan_begin = heap_beginscan,
.scan_end = heap_endscan,
@@ -2612,8 +2987,7 @@ static const TableAmRoutine heapam_methods = {
.index_fetch_tuple = heapam_index_fetch_tuple,
.tuple_insert = heapam_tuple_insert,
- .tuple_insert_speculative = heapam_tuple_insert_speculative,
- .tuple_complete_speculative = heapam_tuple_complete_speculative,
+ .tuple_insert_with_arbiter = heapam_tuple_insert_with_arbiter,
.multi_insert = heap_multi_insert,
.tuple_delete = heapam_tuple_delete,
.tuple_update = heapam_tuple_update,
@@ -2645,7 +3019,11 @@ static const TableAmRoutine heapam_methods = {
.scan_bitmap_next_block = heapam_scan_bitmap_next_block,
.scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
.scan_sample_next_block = heapam_scan_sample_next_block,
- .scan_sample_next_tuple = heapam_scan_sample_next_tuple
+ .scan_sample_next_tuple = heapam_scan_sample_next_tuple,
+
+ .tuple_is_current = heapam_tuple_is_current,
+
+ .reloptions = heapam_reloptions
};
diff --git a/src/backend/access/index/amapi.c b/src/backend/access/index/amapi.c
index 079fb7cba65..a8f1c580acd 100644
--- a/src/backend/access/index/amapi.c
+++ b/src/backend/access/index/amapi.c
@@ -16,25 +16,27 @@
#include "access/amapi.h"
#include "access/htup_details.h"
#include "catalog/pg_am.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_index.h"
#include "catalog/pg_opclass.h"
#include "utils/fmgrprotos.h"
#include "utils/syscache.h"
+IndexAMRoutineHookType IndexAMRoutineHook = NULL;
-/*
- * GetIndexAmRoutine - call the specified access method handler routine to get
- * its IndexAmRoutine struct, which will be palloc'd in the caller's context.
- *
- * Note that if the amhandler function is built-in, this will not involve
- * any catalog access. It's therefore safe to use this while bootstrapping
- * indexes for the system catalogs. relcache.c relies on that.
- */
IndexAmRoutine *
-GetIndexAmRoutine(Oid amhandler)
+GetIndexAmRoutineWithTableAM(Oid tamoid, Oid amhandler)
{
Datum datum;
IndexAmRoutine *routine;
+ if (IndexAMRoutineHook != NULL)
+ {
+ routine = IndexAMRoutineHook(tamoid, amhandler);
+ if (routine)
+ return routine;
+ }
+
datum = OidFunctionCall0(amhandler);
routine = (IndexAmRoutine *) DatumGetPointer(datum);
@@ -45,6 +47,52 @@ GetIndexAmRoutine(Oid amhandler)
return routine;
}
+/*
+ * GetIndexAmRoutine - call the specified access method handler routine to get
+ * its IndexAmRoutine struct, which will be palloc'd in the caller's context.
+ *
+ * Note that if the amhandler function is built-in, this will not involve
+ * any catalog access. It's therefore safe to use this while bootstrapping
+ * indexes for the system catalogs. relcache.c relies on that.
+ */
+IndexAmRoutine *
+GetIndexAmRoutine(Oid amhandler)
+{
+ return GetIndexAmRoutineExtended(InvalidOid, amhandler);
+}
+
+IndexAmRoutine *
+GetIndexAmRoutineExtended(Oid indoid, Oid amhandler)
+{
+ HeapTuple ht_idx;
+ HeapTuple ht_tblrel;
+ Form_pg_index idxrec;
+ Form_pg_class tblrelrec;
+ Oid indrelid;
+ Oid tamoid;
+
+ if (!OidIsValid((indoid)) || indoid < FirstNormalObjectId)
+ return GetIndexAmRoutineWithTableAM(HEAP_TABLE_AM_OID, amhandler);
+
+ ht_idx = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indoid));
+ if (!HeapTupleIsValid(ht_idx))
+ elog(ERROR, "cache lookup failed for index %u", indoid);
+ idxrec = (Form_pg_index) GETSTRUCT(ht_idx);
+ Assert(indoid == idxrec->indexrelid);
+ indrelid = idxrec->indrelid;
+
+ ht_tblrel = SearchSysCache1(RELOID, ObjectIdGetDatum(indrelid));
+ if (!HeapTupleIsValid(ht_tblrel))
+ elog(ERROR, "cache lookup failed for relation %u", indrelid);
+ tblrelrec = (Form_pg_class) GETSTRUCT(ht_tblrel);
+ tamoid = tblrelrec->relam;
+
+ ReleaseSysCache(ht_tblrel);
+ ReleaseSysCache(ht_idx);
+
+ return GetIndexAmRoutineWithTableAM(tamoid, amhandler);
+}
+
/*
* GetIndexAmRoutineByAmId - look up the handler of the index access method
* with the given OID, and get its IndexAmRoutine struct.
@@ -53,7 +101,7 @@ GetIndexAmRoutine(Oid amhandler)
* noerror is true, else throws error.
*/
IndexAmRoutine *
-GetIndexAmRoutineByAmId(Oid amoid, bool noerror)
+GetIndexAmRoutineByAmId(Oid indoid, Oid amoid, bool noerror)
{
HeapTuple tuple;
Form_pg_am amform;
@@ -103,7 +151,7 @@ GetIndexAmRoutineByAmId(Oid amoid, bool noerror)
ReleaseSysCache(tuple);
/* And finally, call the handler function to get the API struct. */
- return GetIndexAmRoutine(amhandler);
+ return GetIndexAmRoutineExtended(indoid, amhandler);
}
@@ -129,7 +177,7 @@ amvalidate(PG_FUNCTION_ARGS)
ReleaseSysCache(classtup);
- amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
if (amroutine->amvalidate == NULL)
elog(ERROR, "function amvalidate is not defined for index access method %u",
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index de751e8e4a3..e162df6dfd1 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -101,6 +101,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
scan->orderByData = NULL;
scan->xs_want_itup = false; /* may be set later */
+ scan->xs_want_rowid = false; /* may be set later */
/*
* During recovery we ignore killed tuples and don't bother to kill them
@@ -122,6 +123,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
scan->xs_itupdesc = NULL;
scan->xs_hitup = NULL;
scan->xs_hitupdesc = NULL;
+ scan->xs_rowid.isnull = true;
return scan;
}
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index dcd04b813d8..4668d7159ae 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -213,24 +213,39 @@ bool
index_insert(Relation indexRelation,
Datum *values,
bool *isnull,
- ItemPointer heap_t_ctid,
+ ItemPointer tupleid,
Relation heapRelation,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
RELATION_CHECKS;
- CHECK_REL_PROCEDURE(aminsert);
+
+ if (indexRelation->rd_indam->aminsertextended == NULL && indexRelation->rd_indam->aminsert == NULL )
+ elog(ERROR, "at least one function aminsert or aminsertextended should be defined for index \"%s\"", \
+ RelationGetRelationName(indexRelation));
if (!(indexRelation->rd_indam->ampredlocks))
CheckForSerializableConflictIn(indexRelation,
(ItemPointer) NULL,
InvalidBlockNumber);
- return indexRelation->rd_indam->aminsert(indexRelation, values, isnull,
- heap_t_ctid, heapRelation,
+ if (indexRelation->rd_indam->aminsert)
+ {
+ /* compatibility method for extension AM's not aware of aminsertextended */
+ return indexRelation->rd_indam->aminsert(indexRelation, values, isnull,
+ tupleid, heapRelation,
+ checkUnique, indexUnchanged,
+ indexInfo);
+ }
+ else
+ {
+ /* index insert method for internal AM's and Orioledb that are aware of aminsertextended */
+ return indexRelation->rd_indam->aminsertextended(indexRelation, values, isnull,
+ ItemPointerGetDatum(tupleid), heapRelation,
checkUnique, indexUnchanged,
indexInfo);
+ }
}
/* -------------------------
@@ -247,6 +262,66 @@ index_insert_cleanup(Relation indexRelation,
indexRelation->rd_indam->aminsertcleanup(indexRelation, indexInfo);
}
+/* ----------------
+ * index_update - update an index tuple in a relation
+ * ----------------
+ */
+bool
+index_update(Relation indexRelation,
+ bool new_valid,
+ bool old_valid,
+ Datum *values,
+ bool *isnull,
+ Datum tupleid,
+ Datum *valuesOld,
+ bool *isnullOld,
+ Datum oldTupleid,
+ Relation heapRelation,
+ IndexUniqueCheck checkUnique,
+ IndexInfo *indexInfo)
+{
+ RELATION_CHECKS;
+ CHECK_REL_PROCEDURE(amupdate);
+
+ if (!(indexRelation->rd_indam->ampredlocks))
+ CheckForSerializableConflictIn(indexRelation,
+ (ItemPointer) NULL,
+ InvalidBlockNumber);
+
+ return indexRelation->rd_indam->amupdate(indexRelation,
+ new_valid, old_valid,
+ values, isnull, tupleid,
+ valuesOld, isnullOld, oldTupleid,
+ heapRelation,
+ checkUnique,
+ indexInfo);
+}
+
+
+/* ----------------
+ * index_delete - delete an index tuple from a relation
+ * ----------------
+ */
+bool
+index_delete(Relation indexRelation,
+ Datum *values, bool *isnull, Datum tupleid,
+ Relation heapRelation,
+ IndexInfo *indexInfo)
+{
+ RELATION_CHECKS;
+ CHECK_REL_PROCEDURE(amdelete);
+
+ if (!(indexRelation->rd_indam->ampredlocks))
+ CheckForSerializableConflictIn(indexRelation,
+ (ItemPointer) NULL,
+ InvalidBlockNumber);
+
+ return indexRelation->rd_indam->amdelete(indexRelation,
+ values, isnull, tupleid,
+ heapRelation,
+ indexInfo);
+}
+
/*
* index_beginscan - start a scan of an index with amgettuple
*
@@ -610,6 +685,55 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
return &scan->xs_heaptid;
}
+/* ----------------
+ * index_getnext_rowid - get the next ROWID from a scan
+ *
+ * The result is the next ROWID satisfying the scan keys,
+ * or isnull if no more matching tuples exist.
+ * ----------------
+ */
+NullableDatum
+index_getnext_rowid(IndexScanDesc scan, ScanDirection direction)
+{
+ NullableDatum result;
+ bool found;
+
+ SCAN_CHECKS;
+ CHECK_SCAN_PROCEDURE(amgettuple);
+
+ /* XXX: we should assert that a snapshot is pushed or registered */
+ Assert(TransactionIdIsValid(RecentXmin));
+
+ /*
+ * The AM's amgettuple proc finds the next index entry matching the scan
+ * keys, and puts the TID into scan->xs_heaptid. It should also set
+ * scan->xs_recheck and possibly scan->xs_itup/scan->xs_hitup, though we
+ * pay no attention to those fields here.
+ */
+ found = scan->indexRelation->rd_indam->amgettuple(scan, direction);
+
+ /* Reset kill flag immediately for safety */
+ scan->kill_prior_tuple = false;
+ scan->xs_heap_continue = false;
+
+ /* If we're out of index entries, we're done */
+ if (!found)
+ {
+ /* release resources (like buffer pins) from table accesses */
+ if (scan->xs_heapfetch)
+ table_index_fetch_reset(scan->xs_heapfetch);
+
+ result.isnull = true;
+ return result;
+ }
+ /* Assert(RowidIsValid(&scan->xs_rowid)); */
+
+ pgstat_count_index_tuples(scan->indexRelation, 1);
+
+ /* Return the ROWID of the tuple we found. */
+ return scan->xs_rowid;
+}
+
/* ----------------
* index_fetch_heap - get the scan's next heap tuple
*
@@ -633,8 +757,17 @@ index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot)
{
bool all_dead = false;
bool found;
+ Datum tupleid;
+
+ if (scan->xs_want_rowid)
+ {
+ Assert(!scan->xs_rowid.isnull);
+ tupleid = scan->xs_rowid.value;
+ }
+ else
+ tupleid = PointerGetDatum(&scan->xs_heaptid);
- found = table_index_fetch_tuple(scan->xs_heapfetch, &scan->xs_heaptid,
+ found = table_index_fetch_tuple(scan->xs_heapfetch, tupleid,
scan->xs_snapshot, slot,
&scan->xs_heap_continue, &all_dead);
@@ -676,16 +809,30 @@ index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *
{
if (!scan->xs_heap_continue)
{
- ItemPointer tid;
+ if (scan->xs_want_rowid)
+ {
+ NullableDatum rowid;
+ /* Time to fetch the next TID from the index */
+ rowid = index_getnext_rowid(scan, direction);
- /* Time to fetch the next TID from the index */
- tid = index_getnext_tid(scan, direction);
+ /* If we're out of index entries, we're done */
+ if (rowid.isnull)
+ break;
- /* If we're out of index entries, we're done */
- if (tid == NULL)
- break;
+ /* Assert(RowidEquals(rowid, &scan->xs_rowid)); */
+ }
+ else
+ {
+ ItemPointer tid;
+ /* Time to fetch the next TID from the index */
+ tid = index_getnext_tid(scan, direction);
- Assert(ItemPointerEquals(tid, &scan->xs_heaptid));
+ /* If we're out of index entries, we're done */
+ if (tid == NULL)
+ break;
+
+ Assert(ItemPointerEquals(tid, &scan->xs_heaptid));
+ }
}
/*
@@ -693,7 +840,8 @@ index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *
* If we don't find anything, loop around and grab the next TID from
* the index.
*/
- Assert(ItemPointerIsValid(&scan->xs_heaptid));
+ if (!scan->xs_want_rowid)
+ Assert(ItemPointerIsValid(&scan->xs_heaptid));
if (index_fetch_heap(scan, slot))
return true;
}
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 59155a7bea6..b661adb689e 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -127,7 +127,8 @@ bthandler(PG_FUNCTION_ARGS)
amroutine->ambuild = btbuild;
amroutine->ambuildempty = btbuildempty;
- amroutine->aminsert = btinsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = btinsert;
amroutine->aminsertcleanup = NULL;
amroutine->ambulkdelete = btbulkdelete;
amroutine->amvacuumcleanup = btvacuumcleanup;
@@ -180,13 +181,14 @@ btbuildempty(Relation index)
*/
bool
btinsert(Relation rel, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
bool result;
IndexTuple itup;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
/* generate an index tuple */
itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index c22ccec789d..55ff1fdbfaf 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -73,7 +73,6 @@ static int _bt_binsrch_array_skey(FmgrInfo *orderproc,
Datum tupdatum, bool tupnull,
BTArrayKeyInfo *array, ScanKey cur,
int32 *set_elem_result);
-static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir);
static void _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir);
static bool _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir,
IndexTuple tuple, TupleDesc tupdesc, int tupnatts,
@@ -1377,7 +1376,7 @@ _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
* On false result, the scankeys stay the same, and the array keys are not
* advanced (every array remains at its final element for scan direction).
*/
-static bool
+bool
_bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index 1bec19c2b88..57004e79f54 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -181,7 +181,7 @@ spgbuildempty(Relation index)
*/
bool
spginsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -189,6 +189,7 @@ spginsert(Relation index, Datum *values, bool *isnull,
SpGistState spgstate;
MemoryContext oldCtx;
MemoryContext insertCtx;
+ ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
"SP-GiST insert temporary context",
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 76b80146ff0..c1228ed2c01 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -70,7 +70,8 @@ spghandler(PG_FUNCTION_ARGS)
amroutine->ambuild = spgbuild;
amroutine->ambuildempty = spgbuildempty;
- amroutine->aminsert = spginsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = spginsert;
amroutine->aminsertcleanup = NULL;
amroutine->ambulkdelete = spgbulkdelete;
amroutine->amvacuumcleanup = spgvacuumcleanup;
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index e57a0b7ea31..8168bb78021 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -217,7 +217,7 @@ table_index_fetch_tuple_check(Relation rel,
slot = table_slot_create(rel, NULL);
scan = table_index_fetch_begin(rel);
- found = table_index_fetch_tuple(scan, tid, snapshot, slot, &call_again,
+ found = table_index_fetch_tuple(scan, PointerGetDatum(tid), snapshot, slot, &call_again,
all_dead);
table_index_fetch_end(scan);
ExecDropSingleTupleTableSlot(slot);
@@ -287,16 +287,23 @@ simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
* via ereport().
*/
void
-simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
+simple_table_tuple_delete(Relation rel, Datum tupleid, Snapshot snapshot,
+ TupleTableSlot *oldSlot)
{
TM_Result result;
TM_FailureData tmfd;
+ int options = TABLE_MODIFY_WAIT; /* wait for commit */
- result = table_tuple_delete(rel, tid,
+ /* Fetch old tuple if the relevant slot is provided */
+ if (oldSlot)
+ options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
+
+ result = table_tuple_delete(rel, tupleid,
GetCurrentCommandId(true),
snapshot, InvalidSnapshot,
- true /* wait for commit */ ,
- &tmfd, false /* changingPart */ );
+ options,
+ &tmfd, false /* changingPart */ ,
+ oldSlot);
switch (result)
{
@@ -332,20 +339,27 @@ simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
* via ereport().
*/
void
-simple_table_tuple_update(Relation rel, ItemPointer otid,
+simple_table_tuple_update(Relation rel, Datum tupleid,
TupleTableSlot *slot,
Snapshot snapshot,
- TU_UpdateIndexes *update_indexes)
+ TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot)
{
TM_Result result;
TM_FailureData tmfd;
LockTupleMode lockmode;
+ int options = TABLE_MODIFY_WAIT; /* wait for commit */
+
+ /* Fetch old tuple if the relevant slot is provided */
+ if (oldSlot)
+ options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
- result = table_tuple_update(rel, otid, slot,
+ result = table_tuple_update(rel, tupleid, slot,
GetCurrentCommandId(true),
snapshot, InvalidSnapshot,
- true /* wait for commit */ ,
- &tmfd, &lockmode, update_indexes);
+ options,
+ &tmfd, &lockmode, update_indexes,
+ oldSlot);
switch (result)
{
diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c
index e9b598256fb..cd01bd9934f 100644
--- a/src/backend/access/table/tableamapi.c
+++ b/src/backend/access/table/tableamapi.c
@@ -13,10 +13,11 @@
#include "access/tableam.h"
#include "access/xact.h"
+#include "catalog/pg_am.h"
#include "commands/defrem.h"
#include "miscadmin.h"
#include "utils/guc_hooks.h"
-
+#include "utils/syscache.h"
/*
* GetTableAmRoutine
@@ -68,8 +69,7 @@ GetTableAmRoutine(Oid amhandler)
* Could be made optional, but would require throwing error during
* parse-analysis.
*/
- Assert(routine->tuple_insert_speculative != NULL);
- Assert(routine->tuple_complete_speculative != NULL);
+ Assert(routine->tuple_insert_with_arbiter != NULL);
Assert(routine->multi_insert != NULL);
Assert(routine->tuple_delete != NULL);
@@ -97,9 +97,29 @@ GetTableAmRoutine(Oid amhandler)
Assert(routine->scan_sample_next_block != NULL);
Assert(routine->scan_sample_next_tuple != NULL);
+ Assert(routine->tuple_is_current != NULL);
+
return routine;
}
+const TableAmRoutine *
+GetTableAmRoutineByAmOid(Oid amoid)
+{
+ HeapTuple ht_am;
+ Form_pg_am amrec;
+ const TableAmRoutine *tableam = NULL;
+
+ ht_am = SearchSysCache1(AMOID, ObjectIdGetDatum(amoid));
+ if (!HeapTupleIsValid(ht_am))
+ elog(ERROR, "cache lookup failed for access method %u",
+ amoid);
+ amrec = (Form_pg_am)GETSTRUCT(ht_am);
+
+ tableam = GetTableAmRoutine(amrec->amhandler);
+ ReleaseSysCache(ht_am);
+ return tableam;
+}
+
/* check_hook: validate new default_table_access_method */
bool
check_default_table_access_method(char **newval, void **extra, GucSource source)
diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c
index 53224932f0d..a0738622657 100644
--- a/src/backend/access/table/toast_helper.c
+++ b/src/backend/access/table/toast_helper.c
@@ -71,10 +71,10 @@ toast_tuple_init(ToastTupleContext *ttc)
* we have to delete it later.
*/
if (att->attlen == -1 && !ttc->ttc_oldisnull[i] &&
- VARATT_IS_EXTERNAL_ONDISK(old_value))
+ (VARATT_IS_EXTERNAL_ONDISK(old_value) || VARATT_IS_EXTERNAL_ORIOLEDB(old_value)))
{
if (ttc->ttc_isnull[i] ||
- !VARATT_IS_EXTERNAL_ONDISK(new_value) ||
+ !(VARATT_IS_EXTERNAL_ONDISK(new_value) || VARATT_IS_EXTERNAL_ORIOLEDB(new_value)) ||
memcmp((char *) old_value, (char *) new_value,
VARSIZE_EXTERNAL(old_value)) != 0)
{
@@ -330,7 +330,7 @@ toast_delete_external(Relation rel, const Datum *values, const bool *isnull,
if (isnull[i])
continue;
- else if (VARATT_IS_EXTERNAL_ONDISK(value))
+ else if (VARATT_IS_EXTERNAL_ONDISK(value) || VARATT_IS_EXTERNAL_ORIOLEDB(value))
toast_delete_datum(rel, value, is_speculative);
}
}
diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c
index 75b5325df8b..95647a357ea 100644
--- a/src/backend/access/transam/transam.c
+++ b/src/backend/access/transam/transam.c
@@ -22,6 +22,7 @@
#include "access/clog.h"
#include "access/subtrans.h"
#include "access/transam.h"
+#include "storage/proc.h"
#include "utils/snapmgr.h"
/*
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 4cecf630060..198f1b403c5 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -212,6 +212,7 @@ typedef struct TransactionStateData
bool parallelChildXact; /* is any parent transaction parallel? */
bool chain; /* start a new block after this one */
bool topXidLogged; /* for a subxact: is top-level XID logged? */
+ CommitSeqNo csn;
struct TransactionStateData *parent; /* back link to parent */
} TransactionStateData;
@@ -245,6 +246,7 @@ static TransactionStateData TopTransactionStateData = {
.state = TRANS_DEFAULT,
.blockState = TBLOCK_DEFAULT,
.topXidLogged = false,
+ .csn = COMMITSEQNO_INPROGRESS
};
/*
@@ -323,6 +325,7 @@ typedef struct SubXactCallbackItem
static SubXactCallbackItem *SubXact_callbacks = NULL;
+xact_redo_hook_type xact_redo_hook = NULL;
/* local function prototypes */
static void AssignTransactionId(TransactionState s);
@@ -2035,6 +2038,7 @@ StartTransaction(void)
*/
s->state = TRANS_START;
s->fullTransactionId = InvalidFullTransactionId; /* until assigned */
+ s->csn = COMMITSEQNO_INPROGRESS;
/* Determine if statements are logged in this transaction */
xact_is_sampled = log_xact_sample_rate != 0 &&
@@ -2336,7 +2340,9 @@ CommitTransaction(void)
* must be done _before_ releasing locks we hold and _after_
* RecordTransactionCommit.
*/
+ MyProc->lastCommittedCSN = s->csn;
ProcArrayEndTransaction(MyProc, latestXid);
+ s->csn = MyProc->lastCommittedCSN;
/*
* This is all post-commit cleanup. Note that if an error is raised here,
@@ -2770,6 +2776,7 @@ AbortTransaction(void)
* while cleaning up!
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
/* Clear wait information and command progress indicator */
pgstat_report_wait_end();
@@ -5180,6 +5187,7 @@ AbortSubTransaction(void)
* Buffer locks, for example? I don't think so but I'm not sure.
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
pgstat_report_wait_end();
pgstat_progress_end_command();
@@ -6073,6 +6081,9 @@ xact_redo_commit(xl_xact_parsed_commit *parsed,
TransactionId max_xid;
TimestampTz commit_time;
+ if (xact_redo_hook)
+ xact_redo_hook(xid, lsn);
+
Assert(TransactionIdIsValid(xid));
max_xid = TransactionIdLatest(xid, parsed->nsubxacts, parsed->subxacts);
@@ -6382,3 +6393,9 @@ xact_redo(XLogReaderState *record)
else
elog(PANIC, "xact_redo: unknown op code %u", info);
}
+
+CommitSeqNo
+GetCurrentCSN(void)
+{
+ return TopTransactionStateData.csn;
+}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7f136026277..6e12db59c9c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -135,6 +135,7 @@ int wal_retrieve_retry_interval = 5000;
int max_slot_wal_keep_size_mb = -1;
int wal_decode_buffer_size = 512 * 1024;
bool track_wal_io_timing = false;
+CommitSeqNo startupCommitSeqNo = COMMITSEQNO_FIRST_NORMAL + 1;
#ifdef WAL_DEBUG
bool XLOG_DEBUG = false;
@@ -142,6 +143,11 @@ bool XLOG_DEBUG = false;
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE;
+/* Hook for plugins to get control in CheckPointGuts() */
+CheckPoint_hook_type CheckPoint_hook = NULL;
+double CheckPointProgress;
+after_checkpoint_cleanup_hook_type after_checkpoint_cleanup_hook = NULL;
+
/*
* Number of WAL insertion locks to use. A higher value allows more insertions
* to happen concurrently, but adds some CPU overhead to flushing the WAL,
@@ -5068,6 +5074,7 @@ BootStrapXLOG(void)
TransamVariables->nextXid = checkPoint.nextXid;
TransamVariables->nextOid = checkPoint.nextOid;
TransamVariables->oidCount = 0;
+ pg_atomic_write_u64(&TransamVariables->nextCommitSeqNo, COMMITSEQNO_FIRST_NORMAL + 1);
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@@ -5415,6 +5422,7 @@ StartupXLOG(void)
XLogRecPtr missingContrecPtr;
TransactionId oldestActiveXID;
bool promoted = false;
+ bool wasInRecovery;
/*
* We should have an aux process resource owner to use, and we should not
@@ -5544,6 +5552,7 @@ StartupXLOG(void)
TransamVariables->nextXid = checkPoint.nextXid;
TransamVariables->nextOid = checkPoint.nextOid;
TransamVariables->oidCount = 0;
+ pg_atomic_write_u64(&TransamVariables->nextCommitSeqNo, startupCommitSeqNo);
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@@ -6042,6 +6051,8 @@ StartupXLOG(void)
*/
PreallocXlogFiles(EndOfLog, newTLI);
+ wasInRecovery = InRecovery;
+
/*
* Okay, we're officially UP.
*/
@@ -6120,6 +6131,9 @@ StartupXLOG(void)
*/
CompleteCommitTsInitialization();
+ if (wasInRecovery && after_checkpoint_cleanup_hook)
+ after_checkpoint_cleanup_hook(EndOfLog, 0);
+
/*
* All done with end-of-recovery actions.
*
@@ -7315,6 +7329,9 @@ CreateCheckPoint(int flags)
if (!RecoveryInProgress())
TruncateSUBTRANS(GetOldestTransactionIdConsideredRunning());
+ if (after_checkpoint_cleanup_hook)
+ after_checkpoint_cleanup_hook(ProcLastRecPtr, flags);
+
/* Real work is done; log and update stats. */
LogCheckpointEnd(false);
@@ -7490,6 +7507,9 @@ CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
CheckPointPredicate();
CheckPointBuffers(flags);
+ if (CheckPoint_hook)
+ CheckPoint_hook(checkPointRedo, flags);
+
/* Perform all queued up fsyncs */
TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
CheckpointStats.ckpt_sync_t = GetCurrentTimestamp();
@@ -9068,6 +9088,19 @@ get_backup_status(void)
return sessionBackupState;
}
+/*
+ * Check if there is a backup in progress.
+ *
+ * We do this check without lock assuming 32-bit reads are atomic. In fact,
+ * the false result means that there was at least a moment of time when there
+ * were no backups.
+ */
+bool
+have_backup_in_progress(void)
+{
+ return (XLogCtl->Insert.runningBackups > 0);
+}
+
/*
* do_pg_backup_stop
*
@@ -9475,3 +9508,5 @@ SetWalWriterSleeping(bool sleeping)
XLogCtl->WalWriterSleeping = sleeping;
SpinLockRelease(&XLogCtl->info_lck);
}
+
+void (*RedoShutdownHook) (void) = NULL;
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index b45b8331720..a3e7fa810f8 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -1856,6 +1856,8 @@ PerformWalRecovery(void)
* exit with special return code to request shutdown of
* postmaster. Log messages issued from postmaster.
*/
+ if (RedoShutdownHook != NULL)
+ RedoShutdownHook();
proc_exit(3);
case RECOVERY_TARGET_ACTION_PAUSE:
diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c
index a44ccee3b68..043303bc2e3 100644
--- a/src/backend/catalog/aclchk.c
+++ b/src/backend/catalog/aclchk.c
@@ -1638,7 +1638,7 @@ expand_all_col_privileges(Oid table_oid, Form_pg_class classForm,
AttrNumber curr_att;
Assert(classForm->relnatts - FirstLowInvalidHeapAttributeNumber < num_col_privileges);
- for (curr_att = FirstLowInvalidHeapAttributeNumber + 1;
+ for (curr_att = FirstLowInvalidHeapAttributeNumber + 2;
curr_att <= classForm->relnatts;
curr_att++)
{
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 0489cbabcb8..b3873fbd2ac 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -186,6 +186,7 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
int flags)
{
int i;
+ bool *depends_on_relation;
/*
* Keep track of objects for event triggers, if necessary.
@@ -213,6 +214,33 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
}
}
+ depends_on_relation = palloc0(sizeof(bool) * targetObjects->numrefs);
+
+ for (i = targetObjects->numrefs - 1; i >= 0; i--)
+ {
+ ObjectAddressExtra *thisextra = targetObjects->extras + i;
+ int j;
+
+ if (thisextra->dependee.classId == RelationRelationId &&
+ thisextra->dependee.objectSubId == 0)
+ {
+ depends_on_relation[i] = true;
+ continue;
+ }
+
+ for (j = i + 1; j < targetObjects->numrefs; j++)
+ {
+ ObjectAddress *depobj = targetObjects->refs + j;
+ if (depobj->classId == thisextra->dependee.classId &&
+ depobj->objectId == thisextra->dependee.objectId &&
+ depobj->objectSubId == thisextra->dependee.objectSubId)
+ {
+ depends_on_relation[i] = depends_on_relation[j];
+ break;
+ }
+ }
+ }
+
/*
* Delete all the objects in the proper order, except that if told to, we
* should skip the original object(s).
@@ -221,13 +249,19 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
{
ObjectAddress *thisobj = targetObjects->refs + i;
ObjectAddressExtra *thisextra = targetObjects->extras + i;
+ int temp_flags = flags;
if ((flags & PERFORM_DELETION_SKIP_ORIGINAL) &&
(thisextra->flags & DEPFLAG_ORIGINAL))
continue;
- deleteOneObject(thisobj, depRel, flags);
+ if (depends_on_relation[i])
+ temp_flags |= PERFORM_DELETION_OF_RELATION;
+
+ deleteOneObject(thisobj, depRel, temp_flags);
}
+
+ pfree(depends_on_relation);
}
/*
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index a819b4197ce..92211c04d57 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -119,9 +119,6 @@ static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
bool immediate,
bool isvalid,
bool isready);
-static void index_update_stats(Relation rel,
- bool hasindex,
- double reltuples);
static void IndexCheckExclusion(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo);
@@ -295,7 +292,7 @@ ConstructTupleDescriptor(Relation heapRelation,
int i;
/* We need access to the index AM's API struct */
- amroutine = GetIndexAmRoutineByAmId(accessMethodId, false);
+ amroutine = GetIndexAmRoutineByAmId(InvalidOid, accessMethodId, false);
/* ... and to the table's tuple descriptor */
heapTupDesc = RelationGetDescr(heapRelation);
@@ -2651,9 +2648,6 @@ BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
*/
Assert(ii->ii_Unique);
- if (index->rd_rel->relam != BTREE_AM_OID)
- elog(ERROR, "unexpected non-btree speculative unique index");
-
ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
@@ -2777,7 +2771,7 @@ FormIndexDatum(IndexInfo *indexInfo,
* index. When updating an index, it's important because some index AMs
* expect a relcache flush to occur after REINDEX.
*/
-static void
+void
index_update_stats(Relation rel,
bool hasindex,
double reltuples)
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index c590a2adc35..f63faedfcfb 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -87,9 +87,6 @@ static void compute_index_stats(Relation onerel, double totalrows,
MemoryContext col_context);
static VacAttrStats *examine_attribute(Relation onerel, int attnum,
Node *index_expr);
-static int acquire_sample_rows(Relation onerel, int elevel,
- HeapTuple *rows, int targrows,
- double *totalrows, double *totaldeadrows);
static int compare_rows(const void *a, const void *b, void *arg);
static int acquire_inherited_sample_rows(Relation onerel, int elevel,
HeapTuple *rows, int targrows,
@@ -190,10 +187,7 @@ analyze_rel(Oid relid, RangeVar *relation,
if (onerel->rd_rel->relkind == RELKIND_RELATION ||
onerel->rd_rel->relkind == RELKIND_MATVIEW)
{
- /* Regular table, so we'll use the regular row acquisition function */
- acquirefunc = acquire_sample_rows;
- /* Also get regular table's size */
- relpages = RelationGetNumberOfBlocks(onerel);
+ table_analyze(onerel, &acquirefunc, &relpages);
}
else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{
@@ -1154,7 +1148,7 @@ block_sampling_read_stream_next(ReadStream *stream,
* block. The previous sampling method put too much credence in the row
* density near the start of the table.
*/
-static int
+int
acquire_sample_rows(Relation onerel, int elevel,
HeapTuple *rows, int targrows,
double *totalrows, double *totaldeadrows)
@@ -1421,9 +1415,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
if (childrel->rd_rel->relkind == RELKIND_RELATION ||
childrel->rd_rel->relkind == RELKIND_MATVIEW)
{
- /* Regular table, so use the regular row acquisition function */
- acquirefunc = acquire_sample_rows;
- relpages = RelationGetNumberOfBlocks(childrel);
+ table_analyze(childrel, &acquirefunc, &relpages);
}
else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{
diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c
index f7dc42f7452..ea5a1f365b1 100644
--- a/src/backend/commands/constraint.c
+++ b/src/backend/commands/constraint.c
@@ -109,7 +109,7 @@ unique_key_recheck(PG_FUNCTION_ARGS)
IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation);
bool call_again = false;
- if (!table_index_fetch_tuple(scan, &tmptid, SnapshotSelf, slot,
+ if (!table_index_fetch_tuple(scan, PointerGetDatum(&tmptid), SnapshotSelf, slot,
&call_again, NULL))
{
/*
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 8086607710e..e6c989aea19 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -81,9 +81,6 @@ static void report_triggers(ResultRelInfo *rInfo, bool show_relname,
ExplainState *es);
static double elapsed_time(instr_time *starttime);
static bool ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used);
-static void ExplainNode(PlanState *planstate, List *ancestors,
- const char *relationship, const char *plan_name,
- ExplainState *es);
static void show_plan_tlist(PlanState *planstate, List *ancestors,
ExplainState *es);
static void show_expression(Node *node, const char *qlabel,
@@ -92,9 +89,6 @@ static void show_expression(Node *node, const char *qlabel,
static void show_qual(List *qual, const char *qlabel,
PlanState *planstate, List *ancestors,
bool useprefix, ExplainState *es);
-static void show_scan_qual(List *qual, const char *qlabel,
- PlanState *planstate, List *ancestors,
- ExplainState *es);
static void show_upper_qual(List *qual, const char *qlabel,
PlanState *planstate, List *ancestors,
ExplainState *es);
@@ -131,8 +125,6 @@ static void show_memoize_info(MemoizeState *mstate, List *ancestors,
static void show_hashagg_info(AggState *aggstate, ExplainState *es);
static void show_tidbitmap_info(BitmapHeapScanState *planstate,
ExplainState *es);
-static void show_instrumentation_count(const char *qlabel, int which,
- PlanState *planstate, ExplainState *es);
static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
static const char *explain_get_index_name(Oid indexId);
static bool peek_buffer_usage(ExplainState *es, const BufferUsage *usage);
@@ -1363,7 +1355,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
* to the nesting depth of logical output groups, and therefore is controlled
* by ExplainOpenGroup/ExplainCloseGroup.
*/
-static void
+void
ExplainNode(PlanState *planstate, List *ancestors,
const char *relationship, const char *plan_name,
ExplainState *es)
@@ -2527,7 +2519,7 @@ show_qual(List *qual, const char *qlabel,
/*
* Show a qualifier expression for a scan plan node
*/
-static void
+void
show_scan_qual(List *qual, const char *qlabel,
PlanState *planstate, List *ancestors,
ExplainState *es)
@@ -3618,7 +3610,7 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
*
* "which" identifies which instrumentation counter to print
*/
-static void
+void
show_instrumentation_count(const char *qlabel, int which,
PlanState *planstate, ExplainState *es)
{
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index b987e023849..c8a926c0463 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -71,6 +71,7 @@
#include "utils/snapmgr.h"
#include "utils/syscache.h"
+GetDefaultOpClass_hook_type GetDefaultOpClass_hook = NULL;
/* non-export function prototypes */
static bool CompareOpclassOptions(const Datum *opts1, const Datum *opts2, int natts);
@@ -91,11 +92,7 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo,
Oid ddl_userid,
int ddl_sec_context,
int *ddl_save_nestlevel);
-static char *ChooseIndexName(const char *tabname, Oid namespaceId,
- const List *colnames, const List *exclusionOpNames,
- bool primary, bool isconstraint);
static char *ChooseIndexNameAddition(const List *colnames);
-static List *ChooseIndexColumnNames(const List *indexElems);
static void ReindexIndex(const ReindexStmt *stmt, const ReindexParams *params,
bool isTopLevel);
static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -223,7 +220,7 @@ CheckIndexCompatible(Oid oldId,
accessMethodName)));
accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
accessMethodId = accessMethodForm->oid;
- amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
+ amRoutine = GetIndexAmRoutineExtended(oldId, accessMethodForm->amhandler);
ReleaseSysCache(tuple);
amcanorder = amRoutine->amcanorder;
@@ -844,7 +841,7 @@ DefineIndex(Oid tableId,
}
accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
accessMethodId = accessMethodForm->oid;
- amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
+ amRoutine = GetIndexAmRoutineWithTableAM(rel->rd_rel->relam, accessMethodForm->amhandler);
pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
accessMethodId);
@@ -2317,6 +2314,9 @@ GetDefaultOpClass(Oid type_id, Oid am_id)
/* If it's a domain, look at the base type instead */
type_id = getBaseType(type_id);
+ if (GetDefaultOpClass_hook)
+ return GetDefaultOpClass_hook(type_id, am_id);
+
tcategory = TypeCategory(type_id);
/*
@@ -2532,7 +2532,7 @@ ChooseRelationName(const char *name1, const char *name2,
*
* The argument list is pretty ad-hoc :-(
*/
-static char *
+char *
ChooseIndexName(const char *tabname, Oid namespaceId,
const List *colnames, const List *exclusionOpNames,
bool primary, bool isconstraint)
@@ -2621,7 +2621,7 @@ ChooseIndexNameAddition(const List *colnames)
*
* Returns a List of plain strings (char *, not String nodes).
*/
-static List *
+List *
ChooseIndexColumnNames(const List *indexElems)
{
List *result = NIL;
diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c
index b8b5c147c5d..fe91b816c32 100644
--- a/src/backend/commands/opclasscmds.c
+++ b/src/backend/commands/opclasscmds.c
@@ -42,6 +42,7 @@
#include "parser/parse_oper.h"
#include "parser/parse_type.h"
#include "utils/acl.h"
+#include "postgres_ext.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
@@ -376,7 +377,7 @@ DefineOpClass(CreateOpClassStmt *stmt)
amform = (Form_pg_am) GETSTRUCT(tup);
amoid = amform->oid;
- amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
ReleaseSysCache(tup);
maxOpNumber = amroutine->amstrategies;
@@ -834,7 +835,7 @@ AlterOpFamily(AlterOpFamilyStmt *stmt)
amform = (Form_pg_am) GETSTRUCT(tup);
amoid = amform->oid;
- amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
ReleaseSysCache(tup);
maxOpNumber = amroutine->amstrategies;
@@ -881,7 +882,7 @@ AlterOpFamilyAdd(AlterOpFamilyStmt *stmt, Oid amoid, Oid opfamilyoid,
int maxOpNumber, int maxProcNumber, int optsProcNumber,
List *items)
{
- IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
List *operators; /* OpFamilyMember list for operators */
List *procedures; /* OpFamilyMember list for support procs */
ListCell *l;
@@ -1164,7 +1165,7 @@ assignOperTypes(OpFamilyMember *member, Oid amoid, Oid typeoid)
* the family has been created but not yet populated with the required
* operators.)
*/
- IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
if (!amroutine->amcanorderbyop)
ereport(ERROR,
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 0ecdecc2564..80e9048d6de 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -700,6 +700,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
ObjectAddress address;
LOCKMODE parentLockmode;
Oid accessMethodId = InvalidOid;
+ const TableAmRoutine *tableam = NULL;
/*
* Truncate relname to appropriate length (probably a waste of time, as
@@ -835,6 +836,29 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
if (!OidIsValid(ownerId))
ownerId = GetUserId();
+
+ /*
+ * For relations with table AM and partitioned tables, select access
+ * method to use: an explicitly indicated one, or (in the case of a
+ * partitioned table) the parent's, if it has one.
+ */
+ if (stmt->accessMethod != NULL)
+ {
+ Assert(RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_PARTITIONED_TABLE);
+ accessMethodId = get_table_am_oid(stmt->accessMethod, false);
+ }
+ else if (RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ if (stmt->partbound)
+ {
+ Assert(list_length(inheritOids) == 1);
+ accessMethodId = get_rel_relam(linitial_oid(inheritOids));
+ }
+
+ if (RELKIND_HAS_TABLE_AM(relkind) && !OidIsValid(accessMethodId))
+ accessMethodId = get_table_am_oid(default_table_access_method, false);
+ }
+
/*
* Parse and validate reloptions, if any.
*/
@@ -843,6 +867,12 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
switch (relkind)
{
+ case RELKIND_RELATION:
+ case RELKIND_TOASTVALUE:
+ case RELKIND_MATVIEW:
+ tableam = GetTableAmRoutineByAmOid(accessMethodId);
+ (void) tableam_reloptions(tableam, relkind, reloptions, true);
+ break;
case RELKIND_VIEW:
(void) view_reloptions(reloptions, true);
break;
@@ -851,6 +881,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
break;
default:
(void) heap_reloptions(relkind, reloptions, true);
+ break;
}
if (stmt->ofTypename)
@@ -941,28 +972,6 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
}
}
- /*
- * For relations with table AM and partitioned tables, select access
- * method to use: an explicitly indicated one, or (in the case of a
- * partitioned table) the parent's, if it has one.
- */
- if (stmt->accessMethod != NULL)
- {
- Assert(RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_PARTITIONED_TABLE);
- accessMethodId = get_table_am_oid(stmt->accessMethod, false);
- }
- else if (RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_PARTITIONED_TABLE)
- {
- if (stmt->partbound)
- {
- Assert(list_length(inheritOids) == 1);
- accessMethodId = get_rel_relam(linitial_oid(inheritOids));
- }
-
- if (RELKIND_HAS_TABLE_AM(relkind) && !OidIsValid(accessMethodId))
- accessMethodId = get_table_am_oid(default_table_access_method, false);
- }
-
/*
* Create the relation. Inherited defaults and constraints are passed in
* for immediate handling --- since they don't need parsing, they can be
@@ -6304,8 +6313,10 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
/* Write the tuple out to the new relation */
if (newrel)
+ {
table_tuple_insert(newrel, insertslot, mycid,
ti_options, bistate);
+ }
ResetExprContext(econtext);
@@ -14933,7 +14944,8 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation,
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
case RELKIND_MATVIEW:
- (void) heap_reloptions(rel->rd_rel->relkind, newOptions, true);
+ (void) table_reloptions(rel, rel->rd_rel->relkind,
+ newOptions, true);
break;
case RELKIND_PARTITIONED_TABLE:
(void) partitioned_table_reloptions(newOptions, true);
@@ -18629,12 +18641,14 @@ static void
AttachPartitionEnsureIndexes(List **wqueue, Relation rel, Relation attachrel)
{
List *idxes;
+ List *buildIdxes = NIL;
List *attachRelIdxs;
Relation *attachrelIdxRels;
IndexInfo **attachInfos;
ListCell *cell;
MemoryContext cxt;
MemoryContext oldcxt;
+ AttrMap *attmap;
cxt = AllocSetContextCreate(CurrentMemoryContext,
"AttachPartitionEnsureIndexes",
@@ -18683,6 +18697,10 @@ AttachPartitionEnsureIndexes(List **wqueue, Relation rel, Relation attachrel)
goto out;
}
+ attmap = build_attrmap_by_name(RelationGetDescr(attachrel),
+ RelationGetDescr(rel),
+ false);
+
/*
* For each index on the partitioned table, find a matching one in the
* partition-to-be; if one is not found, create one.
@@ -18692,7 +18710,6 @@ AttachPartitionEnsureIndexes(List **wqueue, Relation rel, Relation attachrel)
Oid idx = lfirst_oid(cell);
Relation idxRel = index_open(idx, AccessShareLock);
IndexInfo *info;
- AttrMap *attmap;
bool found = false;
Oid constraintOid;
@@ -18708,9 +18725,6 @@ AttachPartitionEnsureIndexes(List **wqueue, Relation rel, Relation attachrel)
/* construct an indexinfo to compare existing indexes against */
info = BuildIndexInfo(idxRel);
- attmap = build_attrmap_by_name(RelationGetDescr(attachrel),
- RelationGetDescr(rel),
- false);
constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(rel), idx);
/*
@@ -18776,19 +18790,7 @@ AttachPartitionEnsureIndexes(List **wqueue, Relation rel, Relation attachrel)
* now.
*/
if (!found)
- {
- IndexStmt *stmt;
- Oid conOid;
-
- stmt = generateClonedIndexStmt(NULL,
- idxRel, attmap,
- &conOid);
- DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid,
- RelationGetRelid(idxRel),
- conOid,
- -1,
- true, false, false, false, false);
- }
+ buildIdxes = lappend_oid(buildIdxes, RelationGetRelid(idxRel));
index_close(idxRel, AccessShareLock);
}
@@ -18797,6 +18799,25 @@ AttachPartitionEnsureIndexes(List **wqueue, Relation rel, Relation attachrel)
/* Clean up. */
for (int i = 0; i < list_length(attachRelIdxs); i++)
index_close(attachrelIdxRels[i], AccessShareLock);
+
+ foreach(cell, buildIdxes)
+ {
+ Oid idx = lfirst_oid(cell);
+ Relation idxRel = index_open(idx, AccessShareLock);
+ IndexStmt *stmt;
+ Oid conOid;
+
+ stmt = generateClonedIndexStmt(NULL,
+ idxRel, attmap,
+ &conOid);
+ DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid,
+ RelationGetRelid(idxRel),
+ conOid,
+ -1,
+ true, false, false, false, false);
+ index_close(idxRel, AccessShareLock);
+ }
+
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(cxt);
}
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 58b7fc5bbd5..1aee9d64212 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -76,7 +76,7 @@ static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger);
static bool GetTupleForTrigger(EState *estate,
EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tid,
+ Datum tupleid,
LockTupleMode lockmode,
TupleTableSlot *oldslot,
TupleTableSlot **epqslot,
@@ -2681,7 +2681,7 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
bool
ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot **epqslot,
TM_Result *tmresult,
@@ -2695,7 +2695,7 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
bool should_free = false;
int i;
- Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+ Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL));
if (fdw_trigtuple == NULL)
{
TupleTableSlot *epqslot_candidate = NULL;
@@ -2772,8 +2772,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
void
ExecARDeleteTriggers(EState *estate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
HeapTuple fdw_trigtuple,
+ TupleTableSlot *slot,
TransitionCaptureState *transition_capture,
bool is_crosspart_update)
{
@@ -2782,20 +2782,11 @@ ExecARDeleteTriggers(EState *estate,
if ((trigdesc && trigdesc->trig_delete_after_row) ||
(transition_capture && transition_capture->tcs_delete_old_table))
{
- TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
-
- Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
- if (fdw_trigtuple == NULL)
- GetTupleForTrigger(estate,
- NULL,
- relinfo,
- tupleid,
- LockTupleExclusive,
- slot,
- NULL,
- NULL,
- NULL);
- else
+ /*
+ * Put the FDW old tuple to the slot. Otherwise, caller is expected
+ * to have old tuple alredy fetched to the slot.
+ */
+ if (fdw_trigtuple != NULL)
ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -2932,7 +2923,7 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
bool
ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot *newslot,
TM_Result *tmresult,
@@ -2952,7 +2943,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
/* Determine lock mode to use */
lockmode = ExecUpdateLockMode(estate, relinfo);
- Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+ Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL));
if (fdw_trigtuple == NULL)
{
TupleTableSlot *epqslot_candidate = NULL;
@@ -3086,18 +3077,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
* Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
* and destination partitions, respectively, of a cross-partition update of
* the root partitioned table mentioned in the query, given by 'relinfo'.
- * 'tupleid' in that case refers to the ctid of the "old" tuple in the source
- * partition, and 'newslot' contains the "new" tuple in the destination
- * partition. This interface allows to support the requirements of
- * ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in
- * that case.
+ * 'oldslot' contains the "old" tuple in the source partition, and 'newslot'
+ * contains the "new" tuple in the destination partition. This interface
+ * allows to support the requirements of ExecCrossPartitionUpdateForeignKey();
+ * is_crosspart_update must be true in that case.
*/
void
ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo,
- ItemPointer tupleid,
HeapTuple fdw_trigtuple,
+ TupleTableSlot *oldslot,
TupleTableSlot *newslot,
List *recheckIndexes,
TransitionCaptureState *transition_capture,
@@ -3116,29 +3106,14 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
* separately for DELETE and INSERT to capture transition table rows.
* In such case, either old tuple or new tuple can be NULL.
*/
- TupleTableSlot *oldslot;
- ResultRelInfo *tupsrc;
-
Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
!is_crosspart_update);
- tupsrc = src_partinfo ? src_partinfo : relinfo;
- oldslot = ExecGetTriggerOldSlot(estate, tupsrc);
-
- if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
- GetTupleForTrigger(estate,
- NULL,
- tupsrc,
- tupleid,
- LockTupleExclusive,
- oldslot,
- NULL,
- NULL,
- NULL);
- else if (fdw_trigtuple != NULL)
+ if (fdw_trigtuple != NULL)
+ {
+ Assert(oldslot);
ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
- else
- ExecClearTuple(oldslot);
+ }
AfterTriggerSaveEvent(estate, relinfo,
src_partinfo, dst_partinfo,
@@ -3285,7 +3260,7 @@ static bool
GetTupleForTrigger(EState *estate,
EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tid,
+ Datum tupleid,
LockTupleMode lockmode,
TupleTableSlot *oldslot,
TupleTableSlot **epqslot,
@@ -3310,7 +3285,9 @@ GetTupleForTrigger(EState *estate,
*/
if (!IsolationUsesXactSnapshot())
lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION;
- test = table_tuple_lock(relation, tid, estate->es_snapshot, oldslot,
+
+ test = table_tuple_lock(relation, tupleid,
+ estate->es_snapshot, oldslot,
estate->es_output_cid,
lockmode, LockWaitBlock,
lockflags,
@@ -3406,8 +3383,8 @@ GetTupleForTrigger(EState *estate,
* We expect the tuple to be present, thus very simple error handling
* suffices.
*/
- if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny,
- oldslot))
+ if (!table_tuple_fetch_row_version(relation, tupleid,
+ SnapshotAny, oldslot))
elog(ERROR, "failed to fetch tuple for trigger");
}
@@ -3613,18 +3590,22 @@ typedef SetConstraintStateData *SetConstraintState;
* cycles. So we need only ensure that ats_firing_id is zero when attaching
* a new event to an existing AfterTriggerSharedData record.
*/
-typedef uint32 TriggerFlags;
+typedef uint64 TriggerFlags;
-#define AFTER_TRIGGER_OFFSET 0x07FFFFFF /* must be low-order bits */
-#define AFTER_TRIGGER_DONE 0x80000000
-#define AFTER_TRIGGER_IN_PROGRESS 0x40000000
+#define AFTER_TRIGGER_SIZE UINT64CONST(0xFFFF000000000) /* must be low-order bits */
+#define AFTER_TRIGGER_SIZE_SHIFT (36)
+#define AFTER_TRIGGER_OFFSET UINT64CONST(0x000000FFFFFFF) /* must be low-order bits */
+#define AFTER_TRIGGER_DONE UINT64CONST(0x0000800000000)
+#define AFTER_TRIGGER_IN_PROGRESS UINT64CONST(0x0000400000000)
/* bits describing the size and tuple sources of this event */
-#define AFTER_TRIGGER_FDW_REUSE 0x00000000
-#define AFTER_TRIGGER_FDW_FETCH 0x20000000
-#define AFTER_TRIGGER_1CTID 0x10000000
-#define AFTER_TRIGGER_2CTID 0x30000000
-#define AFTER_TRIGGER_CP_UPDATE 0x08000000
-#define AFTER_TRIGGER_TUP_BITS 0x38000000
+#define AFTER_TRIGGER_FDW_REUSE UINT64CONST(0x0000000000000)
+#define AFTER_TRIGGER_FDW_FETCH UINT64CONST(0x0000200000000)
+#define AFTER_TRIGGER_1CTID UINT64CONST(0x0000100000000)
+#define AFTER_TRIGGER_ROWID1 UINT64CONST(0x0000010000000)
+#define AFTER_TRIGGER_2CTID UINT64CONST(0x0000300000000)
+#define AFTER_TRIGGER_ROWID2 UINT64CONST(0x0000020000000)
+#define AFTER_TRIGGER_CP_UPDATE UINT64CONST(0x0000080000000)
+#define AFTER_TRIGGER_TUP_BITS UINT64CONST(0x0000380000000)
typedef struct AfterTriggerSharedData *AfterTriggerShared;
typedef struct AfterTriggerSharedData
@@ -3676,6 +3657,9 @@ typedef struct AfterTriggerEventDataZeroCtids
} AfterTriggerEventDataZeroCtids;
#define SizeofTriggerEvent(evt) \
+ (((evt)->ate_flags & AFTER_TRIGGER_SIZE) >> AFTER_TRIGGER_SIZE_SHIFT)
+
+#define BasicSizeofTriggerEvent(evt) \
(((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \
sizeof(AfterTriggerEventData) : \
(((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \
@@ -4028,14 +4012,34 @@ afterTriggerCopyBitmap(Bitmapset *src)
*/
static void
afterTriggerAddEvent(AfterTriggerEventList *events,
- AfterTriggerEvent event, AfterTriggerShared evtshared)
+ AfterTriggerEvent event, AfterTriggerShared evtshared,
+ bytea *rowid1, bytea *rowid2)
{
- Size eventsize = SizeofTriggerEvent(event);
- Size needed = eventsize + sizeof(AfterTriggerSharedData);
+ Size basiceventsize = MAXALIGN(BasicSizeofTriggerEvent(event));
+ Size eventsize;
+ Size needed;
AfterTriggerEventChunk *chunk;
AfterTriggerShared newshared;
AfterTriggerEvent newevent;
+ if (SizeofTriggerEvent(event) == 0)
+ {
+ eventsize = basiceventsize;
+ if (event->ate_flags & AFTER_TRIGGER_ROWID1)
+ eventsize += MAXALIGN(VARSIZE(rowid1));
+
+ if (event->ate_flags & AFTER_TRIGGER_ROWID2)
+ eventsize += MAXALIGN(VARSIZE(rowid2));
+
+ event->ate_flags |= eventsize << AFTER_TRIGGER_SIZE_SHIFT;
+ }
+ else
+ {
+ eventsize = SizeofTriggerEvent(event);
+ }
+
+ needed = eventsize + sizeof(AfterTriggerSharedData);
+
/*
* If empty list or not enough room in the tail chunk, make a new chunk.
* We assume here that a new shared record will always be needed.
@@ -4068,7 +4072,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events,
* sizes used should be MAXALIGN multiples, to ensure that the shared
* records will be aligned safely.
*/
-#define MIN_CHUNK_SIZE 1024
+#define MIN_CHUNK_SIZE (1024*4)
#define MAX_CHUNK_SIZE (1024*1024)
#if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1)
@@ -4087,6 +4091,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events,
chunksize *= 2; /* okay, double it */
else
chunksize /= 2; /* too many shared records */
+ chunksize = Max(chunksize, MIN_CHUNK_SIZE);
chunksize = Min(chunksize, MAX_CHUNK_SIZE);
}
chunk = MemoryContextAlloc(afterTriggers.event_cxt, chunksize);
@@ -4127,7 +4132,26 @@ afterTriggerAddEvent(AfterTriggerEventList *events,
/* Insert the data */
newevent = (AfterTriggerEvent) chunk->freeptr;
- memcpy(newevent, event, eventsize);
+ if (!rowid1 && !rowid2)
+ {
+ memcpy(newevent, event, eventsize);
+ }
+ else
+ {
+ Pointer ptr = chunk->freeptr;
+
+ memcpy(newevent, event, basiceventsize);
+ ptr += basiceventsize;
+
+ if (event->ate_flags & AFTER_TRIGGER_ROWID1)
+ {
+ memcpy(ptr, rowid1, MAXALIGN(VARSIZE(rowid1)));
+ ptr += MAXALIGN(VARSIZE(rowid1));
+ }
+
+ if (event->ate_flags & AFTER_TRIGGER_ROWID2)
+ memcpy(ptr, rowid2, MAXALIGN(VARSIZE(rowid2)));
+ }
/* ... and link the new event to its shared record */
newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET;
newevent->ate_flags |= (char *) newshared - (char *) newevent;
@@ -4287,6 +4311,7 @@ AfterTriggerExecute(EState *estate,
int tgindx;
bool should_free_trig = false;
bool should_free_new = false;
+ Pointer ptr;
/*
* Locate trigger in trigdesc. It might not be present, and in fact the
@@ -4322,15 +4347,17 @@ AfterTriggerExecute(EState *estate,
{
Tuplestorestate *fdw_tuplestore = GetCurrentFDWTuplestore();
- if (!tuplestore_gettupleslot(fdw_tuplestore, true, false,
- trig_tuple_slot1))
+ if (!tuplestore_force_gettupleslot(fdw_tuplestore, true, false,
+ trig_tuple_slot1))
elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) ==
TRIGGER_EVENT_UPDATE &&
- !tuplestore_gettupleslot(fdw_tuplestore, true, false,
- trig_tuple_slot2))
+ !tuplestore_force_gettupleslot(fdw_tuplestore, true, false,
+ trig_tuple_slot2))
elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
+ trig_tuple_slot1->tts_tid = event->ate_ctid1;
+ trig_tuple_slot2->tts_tid = event->ate_ctid2;
}
/* fall through */
case AFTER_TRIGGER_FDW_REUSE:
@@ -4362,13 +4389,26 @@ AfterTriggerExecute(EState *estate,
break;
default:
- if (ItemPointerIsValid(&(event->ate_ctid1)))
+ ptr = (Pointer) event + MAXALIGN(BasicSizeofTriggerEvent(event));
+ if (ItemPointerIsValid(&(event->ate_ctid1)) ||
+ (event->ate_flags & AFTER_TRIGGER_ROWID1))
{
+ Datum tupleid;
+
TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate,
src_relInfo);
- if (!table_tuple_fetch_row_version(src_rel,
- &(event->ate_ctid1),
+ if (event->ate_flags & AFTER_TRIGGER_ROWID1)
+ {
+ tupleid = PointerGetDatum(ptr);
+ ptr += MAXALIGN(VARSIZE(ptr));
+ }
+ else
+ {
+ tupleid = PointerGetDatum(&(event->ate_ctid1));
+ }
+
+ if (!table_tuple_fetch_row_version(src_rel, tupleid,
SnapshotAny,
src_slot))
elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
@@ -4404,13 +4444,23 @@ AfterTriggerExecute(EState *estate,
/* don't touch ctid2 if not there */
if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ||
(event->ate_flags & AFTER_TRIGGER_CP_UPDATE)) &&
- ItemPointerIsValid(&(event->ate_ctid2)))
+ (ItemPointerIsValid(&(event->ate_ctid2)) ||
+ (event->ate_flags & AFTER_TRIGGER_ROWID2)))
{
+ Datum tupleid;
+
TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate,
dst_relInfo);
- if (!table_tuple_fetch_row_version(dst_rel,
- &(event->ate_ctid2),
+ if (event->ate_flags & AFTER_TRIGGER_ROWID2)
+ {
+ tupleid = PointerGetDatum(ptr);
+ }
+ else
+ {
+ tupleid = PointerGetDatum(&(event->ate_ctid2));
+ }
+ if (!table_tuple_fetch_row_version(dst_rel, tupleid,
SnapshotAny,
dst_slot))
elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
@@ -4584,7 +4634,7 @@ afterTriggerMarkEvents(AfterTriggerEventList *events,
{
deferred_found = true;
/* add it to move_list */
- afterTriggerAddEvent(move_list, event, evtshared);
+ afterTriggerAddEvent(move_list, event, evtshared, NULL, NULL);
/* mark original copy "done" so we don't do it again */
event->ate_flags |= AFTER_TRIGGER_DONE;
}
@@ -4688,6 +4738,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
/* caution: trigdesc could be NULL here */
finfo = rInfo->ri_TrigFunctions;
instr = rInfo->ri_TrigInstrument;
+
if (slot1 != NULL)
{
ExecDropSingleTupleTableSlot(slot1);
@@ -6077,6 +6128,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
int tgtype_level;
int i;
Tuplestorestate *fdw_tuplestore = NULL;
+ bytea *rowId1 = NULL;
+ bytea *rowId2 = NULL;
/*
* Check state. We use a normal test not Assert because it is possible to
@@ -6170,6 +6223,21 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
* if so. This preserves the behavior that statement-level triggers fire
* just once per statement and fire after row-level triggers.
*/
+
+ /* Determine flags */
+ if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger))
+ {
+ if (row_trigger && event == TRIGGER_EVENT_UPDATE)
+ {
+ if (relkind == RELKIND_PARTITIONED_TABLE)
+ new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE;
+ else
+ new_event.ate_flags = AFTER_TRIGGER_2CTID;
+ }
+ else
+ new_event.ate_flags = AFTER_TRIGGER_1CTID;
+ }
+
switch (event)
{
case TRIGGER_EVENT_INSERT:
@@ -6180,6 +6248,13 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
Assert(newslot != NULL);
ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1));
ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ rowId1 = DatumGetByteaP(slot_getsysattr(newslot, RowIdAttributeNumber, &isnull));
+ new_event.ate_flags |= AFTER_TRIGGER_ROWID1;
+ Assert(!isnull);
+ }
}
else
{
@@ -6199,6 +6274,13 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
Assert(newslot == NULL);
ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ rowId1 = DatumGetByteaP(slot_getsysattr(oldslot, RowIdAttributeNumber, &isnull));
+ new_event.ate_flags |= AFTER_TRIGGER_ROWID1;
+ Assert(!isnull);
+ }
}
else
{
@@ -6214,10 +6296,54 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
tgtype_event = TRIGGER_TYPE_UPDATE;
if (row_trigger)
{
+ bool src_rowid = false,
+ dst_rowid = false;
Assert(oldslot != NULL);
Assert(newslot != NULL);
ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2));
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ Relation src_rel = src_partinfo->ri_RelationDesc;
+ Relation dst_rel = dst_partinfo->ri_RelationDesc;
+
+ src_rowid = table_get_row_ref_type(src_rel) ==
+ ROW_REF_ROWID;
+ dst_rowid = table_get_row_ref_type(dst_rel) ==
+ ROW_REF_ROWID;
+ }
+ else
+ {
+ if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+ {
+ src_rowid = true;
+ dst_rowid = true;
+ }
+ }
+
+ if (src_rowid)
+ {
+ Datum val;
+ bool isnull;
+ val = slot_getsysattr(oldslot,
+ RowIdAttributeNumber,
+ &isnull);
+ rowId1 = DatumGetByteaP(val);
+ Assert(!isnull);
+ new_event.ate_flags |= AFTER_TRIGGER_ROWID1;
+ }
+
+ if (dst_rowid)
+ {
+ Datum val;
+ bool isnull;
+ val = slot_getsysattr(newslot,
+ RowIdAttributeNumber,
+ &isnull);
+ rowId2 = DatumGetByteaP(val);
+ Assert(!isnull);
+ new_event.ate_flags |= AFTER_TRIGGER_ROWID2;
+ }
/*
* Also remember the OIDs of partitions to fetch these tuples
@@ -6255,20 +6381,6 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
break;
}
- /* Determine flags */
- if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger))
- {
- if (row_trigger && event == TRIGGER_EVENT_UPDATE)
- {
- if (relkind == RELKIND_PARTITIONED_TABLE)
- new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE;
- else
- new_event.ate_flags = AFTER_TRIGGER_2CTID;
- }
- else
- new_event.ate_flags = AFTER_TRIGGER_1CTID;
- }
-
/* else, we'll initialize ate_flags for each trigger */
tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT);
@@ -6434,7 +6546,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
new_shared.ats_modifiedcols = afterTriggerCopyBitmap(modifiedCols);
afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth].events,
- &new_event, &new_shared);
+ &new_event, &new_shared, rowId1, rowId2);
}
/*
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 3289e3e0219..1a7f6ae2c9b 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -613,7 +613,7 @@ IndexSupportsBackwardScan(Oid indexid)
idxrelrec = (Form_pg_class) GETSTRUCT(ht_idxrel);
/* Fetch the index AM's API struct */
- amroutine = GetIndexAmRoutineByAmId(idxrelrec->relam, false);
+ amroutine = GetIndexAmRoutineByAmId(indexid, idxrelrec->relam, false);
result = amroutine->amcanbackward;
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index a5395536a13..6913e3b7a6d 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -48,6 +48,8 @@
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/jsonfuncs.h"
+#include "utils/json.h"
+#include "utils/jsonb.h"
#include "utils/jsonpath.h"
#include "utils/lsyscache.h"
#include "utils/typcache.h"
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index aa68c115ba9..d830006d61b 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4994,7 +4994,9 @@ ExecEvalSysVar(ExprState *state, ExprEvalStep *op, ExprContext *econtext,
op->resnull);
*op->resvalue = d;
/* this ought to be unreachable, but it's cheap enough to check */
- if (unlikely(*op->resnull))
+ if (op->d.var.attnum != RowIdAttributeNumber &&
+ op->d.var.attnum != SelfItemPointerAttributeNumber &&
+ unlikely(*op->resnull))
elog(ERROR, "failed to fetch attribute from slot");
}
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index 9f05b3654c1..9e09ef1cf1f 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -304,7 +304,6 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
List *arbiterIndexes,
bool onlySummarizing)
{
- ItemPointer tupleid = &slot->tts_tid;
List *result = NIL;
int i;
int numIndices;
@@ -314,8 +313,20 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
ExprContext *econtext;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
+ ItemPointer tupleid;
- Assert(ItemPointerIsValid(tupleid));
+
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ tupleid = DatumGetItemPointer(slot_getsysattr(slot, RowIdAttributeNumber, &isnull));
+ Assert(!isnull);
+ }
+ else
+ {
+ Assert(ItemPointerIsValid(&slot->tts_tid));
+ tupleid = &slot->tts_tid;
+ }
/*
* Get information from the result relation info structure.
@@ -506,6 +517,406 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
return result;
}
+List *
+ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ TupleTableSlot *oldSlot,
+ EState *estate,
+ bool noDupErr,
+ bool *specConflict,
+ List *arbiterIndexes,
+ bool onlySummarizing)
+{
+ List *result = NIL;
+ int i;
+ int numIndices;
+ RelationPtr relationDescs;
+ Relation heapRelation;
+ IndexInfo **indexInfoArray;
+ ExprContext *econtext;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ ItemPointer tupleid;
+
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ tupleid = DatumGetItemPointer(slot_getsysattr(slot, RowIdAttributeNumber, &isnull));
+ Assert(!isnull);
+ }
+ else
+ {
+ Assert(ItemPointerIsValid(&slot->tts_tid));
+ tupleid = &slot->tts_tid;
+ }
+
+ /*
+ * Get information from the result relation info structure.
+ */
+ numIndices = resultRelInfo->ri_NumIndices;
+ relationDescs = resultRelInfo->ri_IndexRelationDescs;
+ indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+ heapRelation = resultRelInfo->ri_RelationDesc;
+
+ /* Sanity check: slot must belong to the same rel as the resultRelInfo. */
+ Assert(slot->tts_tableOid == RelationGetRelid(heapRelation));
+
+ /*
+ * for each index, form and insert the index tuple
+ */
+ for (i = 0; i < numIndices; i++)
+ {
+ Relation indexRelation = relationDescs[i];
+ IndexInfo *indexInfo;
+ bool applyNoDupErr;
+ IndexUniqueCheck checkUnique;
+ bool satisfiesConstraint;
+ bool new_valid = true;
+
+ if (indexRelation == NULL)
+ continue;
+
+ indexInfo = indexInfoArray[i];
+
+ /* If the index is marked as read-only, ignore it */
+ if (!indexInfo->ii_ReadyForInserts)
+ continue;
+
+ /*
+ * Skip processing of non-summarizing indexes if we only update
+ * summarizing indexes
+ */
+ if (onlySummarizing && !indexInfo->ii_Summarizing)
+ continue;
+
+ /*
+ * We will use the EState's per-tuple context for evaluating predicates
+ * and index expressions (creating it if it's not already there).
+ */
+ econtext = GetPerTupleExprContext(estate);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /* Check for partial index */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ ExprState *predicate;
+
+ /*
+ * If predicate state not set up yet, create it (in the estate's
+ * per-query context)
+ */
+ predicate = indexInfo->ii_PredicateState;
+ if (predicate == NULL)
+ {
+ predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+ indexInfo->ii_PredicateState = predicate;
+ }
+
+ /* Skip this index-update if the predicate isn't satisfied */
+ if (!ExecQual(predicate, econtext))
+ {
+ if (!indexRelation->rd_indam->ammvccaware)
+ continue;
+ new_valid = false;
+ }
+ }
+
+ /*
+ * FormIndexDatum fills in its values and isnull parameters with the
+ * appropriate values for the column(s) of the index.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ /* Check whether to apply noDupErr to this index */
+ applyNoDupErr = noDupErr &&
+ (arbiterIndexes == NIL ||
+ list_member_oid(arbiterIndexes,
+ indexRelation->rd_index->indexrelid));
+
+ /*
+ * The index AM does the actual insertion, plus uniqueness checking.
+ *
+ * For an immediate-mode unique index, we just tell the index AM to
+ * throw error if not unique.
+ *
+ * For a deferrable unique index, we tell the index AM to just detect
+ * possible non-uniqueness, and we add the index OID to the result
+ * list if further checking is needed.
+ *
+ * For a speculative insertion (used by INSERT ... ON CONFLICT), do
+ * the same as for a deferrable unique index.
+ */
+ if (!indexRelation->rd_index->indisunique)
+ checkUnique = UNIQUE_CHECK_NO;
+ else if (applyNoDupErr)
+ checkUnique = UNIQUE_CHECK_PARTIAL;
+ else if (indexRelation->rd_index->indimmediate)
+ checkUnique = UNIQUE_CHECK_YES;
+ else
+ checkUnique = UNIQUE_CHECK_PARTIAL;
+
+ if (indexRelation->rd_indam->ammvccaware)
+ {
+ Datum valuesOld[INDEX_MAX_KEYS];
+ bool isnullOld[INDEX_MAX_KEYS];
+ Datum oldTupleid;
+ bool old_valid = true;
+
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ oldTupleid = slot_getsysattr(oldSlot, RowIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ }
+ else
+ {
+ Assert(ItemPointerIsValid(&oldSlot->tts_tid));
+ oldTupleid = PointerGetDatum(&oldSlot->tts_tid);
+ }
+
+ econtext = GetPerTupleExprContext(estate);
+ econtext->ecxt_scantuple = oldSlot;
+
+ /* Check for partial index */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ ExprState *predicate;
+
+ /*
+ * If predicate state not set up yet, create it (in the estate's
+ * per-query context)
+ */
+ predicate = indexInfo->ii_PredicateState;
+ if (predicate == NULL)
+ {
+ predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+ indexInfo->ii_PredicateState = predicate;
+ }
+
+ /* Skip this index-update if the predicate isn't satisfied */
+ if (!ExecQual(predicate, econtext))
+ old_valid = false;
+ }
+
+ FormIndexDatum(indexInfo,
+ oldSlot,
+ estate,
+ valuesOld,
+ isnullOld);
+
+ satisfiesConstraint =
+ index_update(indexRelation, /* index relation */
+ new_valid,
+ old_valid,
+ values, /* array of index Datums */
+ isnull, /* null flags */
+ ItemPointerGetDatum(tupleid), /* tid of heap tuple */
+ valuesOld,
+ isnullOld,
+ oldTupleid,
+ heapRelation, /* heap relation */
+ checkUnique, /* type of uniqueness check to do */
+ indexInfo); /* index AM may need this */
+
+ }
+ else
+ {
+ bool indexUnchanged;
+ /*
+ * There's definitely going to be an index_insert() call for this
+ * index. If we're being called as part of an UPDATE statement,
+ * consider if the 'indexUnchanged' = true hint should be passed.
+ */
+ indexUnchanged = index_unchanged_by_update(resultRelInfo,
+ estate,
+ indexInfo,
+ indexRelation);
+
+ satisfiesConstraint =
+ index_insert(indexRelation, /* index relation */
+ values, /* array of index Datums */
+ isnull, /* null flags */
+ tupleid, /* tid of heap tuple */
+ heapRelation, /* heap relation */
+ checkUnique, /* type of uniqueness check to do */
+ indexUnchanged, /* UPDATE without logical change? */
+ indexInfo); /* index AM may need this */
+ }
+
+ /*
+ * If the index has an associated exclusion constraint, check that.
+ * This is simpler than the process for uniqueness checks since we
+ * always insert first and then check. If the constraint is deferred,
+ * we check now anyway, but don't throw error on violation or wait for
+ * a conclusive outcome from a concurrent insertion; instead we'll
+ * queue a recheck event. Similarly, noDupErr callers (speculative
+ * inserters) will recheck later, and wait for a conclusive outcome
+ * then.
+ *
+ * An index for an exclusion constraint can't also be UNIQUE (not an
+ * essential property, we just don't allow it in the grammar), so no
+ * need to preserve the prior state of satisfiesConstraint.
+ */
+ if (indexInfo->ii_ExclusionOps != NULL)
+ {
+ bool violationOK;
+ CEOUC_WAIT_MODE waitMode;
+
+ if (applyNoDupErr)
+ {
+ violationOK = true;
+ waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT;
+ }
+ else if (!indexRelation->rd_index->indimmediate)
+ {
+ violationOK = true;
+ waitMode = CEOUC_NOWAIT;
+ }
+ else
+ {
+ violationOK = false;
+ waitMode = CEOUC_WAIT;
+ }
+
+ satisfiesConstraint =
+ check_exclusion_or_unique_constraint(heapRelation,
+ indexRelation, indexInfo,
+ tupleid, values, isnull,
+ estate, false,
+ waitMode, violationOK, NULL);
+ }
+
+ if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
+ indexInfo->ii_ExclusionOps != NULL) &&
+ !satisfiesConstraint)
+ {
+ /*
+ * The tuple potentially violates the uniqueness or exclusion
+ * constraint, so make a note of the index so that we can re-check
+ * it later. Speculative inserters are told if there was a
+ * speculative conflict, since that always requires a restart.
+ */
+ result = lappend_oid(result, RelationGetRelid(indexRelation));
+ if (indexRelation->rd_index->indimmediate && specConflict)
+ *specConflict = true;
+ }
+ }
+
+ return result;
+}
+
+void
+ExecDeleteIndexTuples(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
+ EState *estate)
+{
+ int i;
+ int numIndices;
+ RelationPtr relationDescs;
+ Relation heapRelation;
+ IndexInfo **indexInfoArray;
+ ExprContext *econtext;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ Datum tupleid;
+
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ }
+ else
+ {
+ Assert(ItemPointerIsValid(&slot->tts_tid));
+ tupleid = PointerGetDatum(&slot->tts_tid);
+ }
+
+ /*
+ * Get information from the result relation info structure.
+ */
+ numIndices = resultRelInfo->ri_NumIndices;
+ relationDescs = resultRelInfo->ri_IndexRelationDescs;
+ indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+ heapRelation = resultRelInfo->ri_RelationDesc;
+
+ /* Sanity check: slot must belong to the same rel as the resultRelInfo. */
+ Assert(slot->tts_tableOid == RelationGetRelid(heapRelation));
+
+ /*
+ * for each index, form and insert the index tuple
+ */
+ for (i = 0; i < numIndices; i++)
+ {
+ Relation indexRelation = relationDescs[i];
+ IndexInfo *indexInfo;
+
+ if (indexRelation == NULL)
+ continue;
+
+ indexInfo = indexInfoArray[i];
+
+ /* If the index is marked as read-only, ignore it */
+ if (!indexInfo->ii_ReadyForInserts)
+ continue;
+
+ if (!indexRelation->rd_indam->ammvccaware)
+ continue;
+
+ /*
+ * We will use the EState's per-tuple context for evaluating predicates
+ * and index expressions (creating it if it's not already there).
+ */
+ econtext = GetPerTupleExprContext(estate);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /* Check for partial index */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ ExprState *predicate;
+
+ /*
+ * If predicate state not set up yet, create it (in the estate's
+ * per-query context)
+ */
+ predicate = indexInfo->ii_PredicateState;
+ if (predicate == NULL)
+ {
+ predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+ indexInfo->ii_PredicateState = predicate;
+ }
+
+ /* Skip this index-update if the predicate isn't satisfied */
+ if (!ExecQual(predicate, econtext))
+ continue;
+ }
+
+ /*
+ * FormIndexDatum fills in its values and isnull parameters with the
+ * appropriate values for the column(s) of the index.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ index_delete(indexRelation, /* index relation */
+ values, /* array of index Datums */
+ isnull, /* null flags */
+ tupleid, /* tid of heap tuple */
+ heapRelation, /* heap relation */
+ indexInfo); /* index AM may need this */
+ }
+}
+
/* ----------------------------------------------------------------
* ExecCheckIndexConstraints
*
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 2365c6861be..f0dfccd9fab 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -150,7 +150,7 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Assert(queryDesc->estate == NULL);
/* caller must ensure the query's snapshot is active */
- Assert(GetActiveSnapshot() == queryDesc->snapshot);
+ Assert((ActiveSnapshotSet() ? GetActiveSnapshot() : InvalidSnapshot) == queryDesc->snapshot);
/*
* If the transaction is read-only, we need to check if any writes are
@@ -325,7 +325,7 @@ standard_ExecutorRun(QueryDesc *queryDesc,
Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
/* caller must ensure the query's snapshot is active */
- Assert(GetActiveSnapshot() == estate->es_snapshot);
+ Assert((ActiveSnapshotSet() ? GetActiveSnapshot() : InvalidSnapshot) == estate->es_snapshot);
/*
* Switch into per-query memory context
@@ -869,13 +869,15 @@ InitPlan(QueryDesc *queryDesc, int eflags)
Oid relid;
Relation relation;
ExecRowMark *erm;
+ RangeTblEntry *rangeEntry;
/* ignore "parent" rowmarks; they are irrelevant at runtime */
if (rc->isParent)
continue;
/* get relation's OID (will produce InvalidOid if subquery) */
- relid = exec_rt_fetch(rc->rti, estate)->relid;
+ rangeEntry = exec_rt_fetch(rc->rti, estate);
+ relid = rangeEntry->relid;
/* open relation, if we need to access it for this mark type */
switch (rc->markType)
@@ -908,6 +910,10 @@ InitPlan(QueryDesc *queryDesc, int eflags)
erm->prti = rc->prti;
erm->rowmarkId = rc->rowmarkId;
erm->markType = rc->markType;
+ if (erm->markType == ROW_MARK_COPY)
+ erm->refType = ROW_REF_COPY;
+ else
+ erm->refType = rangeEntry->reftype;
erm->strength = rc->strength;
erm->waitPolicy = rc->waitPolicy;
erm->ermActive = false;
@@ -1273,6 +1279,8 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
resultRelInfo->ri_ChildToRootMap = NULL;
resultRelInfo->ri_ChildToRootMapValid = false;
resultRelInfo->ri_CopyMultiInsertBuffer = NULL;
+
+ resultRelInfo->ri_RowRefType = table_get_row_ref_type(resultRelationDesc);
}
/*
@@ -2407,17 +2415,28 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
aerm->rowmark = erm;
/* Look up the resjunk columns associated with this rowmark */
- if (erm->markType != ROW_MARK_COPY)
+ if (erm->refType == ROW_REF_TID)
{
+ Assert(erm->markType != ROW_MARK_COPY);
/* need ctid for all methods other than COPY */
snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId);
aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
resname);
if (!AttributeNumberIsValid(aerm->ctidAttNo))
elog(ERROR, "could not find junk %s column", resname);
+ } else if (erm->refType == ROW_REF_ROWID)
+ {
+ Assert(erm->markType != ROW_MARK_COPY);
+ /* need ctid for all methods other than COPY */
+ snprintf(resname, sizeof(resname), "rowid%u", erm->rowmarkId);
+ aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
+ resname);
+ if (!AttributeNumberIsValid(aerm->ctidAttNo))
+ elog(ERROR, "could not find junk %s column", resname);
}
else
{
+ Assert(erm->markType == ROW_MARK_COPY);
/* need wholerow if COPY */
snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId);
aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist,
@@ -2705,8 +2724,9 @@ EvalPlanQualFetchRowMark(EPQState *epqstate, Index rti, TupleTableSlot *slot)
{
/* ordinary table, fetch the tuple */
if (!table_tuple_fetch_row_version(erm->relation,
- (ItemPointer) DatumGetPointer(datum),
- SnapshotAny, slot))
+ datum,
+ SnapshotAny,
+ slot))
elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
return true;
}
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index d0a89cd5778..252efe51738 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -166,6 +166,25 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel,
return skey_attoff;
}
+static Datum
+slot_get_tupleid(Relation rel, TupleTableSlot *slot)
+{
+ Datum tupleid;
+
+ if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+ {
+ bool isnull;
+ tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ }
+ else
+ {
+ tupleid = PointerGetDatum(&slot->tts_tid);
+ }
+
+ return tupleid;
+}
+
/*
* Search the relation 'rel' for tuple using the index.
*
@@ -250,7 +269,8 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
PushActiveSnapshot(GetLatestSnapshot());
- res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+ res = table_tuple_lock(rel, slot_get_tupleid(rel, outslot),
+ GetLatestSnapshot(),
outslot,
GetCurrentCommandId(false),
lockmode,
@@ -434,7 +454,8 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode,
PushActiveSnapshot(GetLatestSnapshot());
- res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+ res = table_tuple_lock(rel, slot_get_tupleid(rel, outslot),
+ GetLatestSnapshot(),
outslot,
GetCurrentCommandId(false),
lockmode,
@@ -557,7 +578,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
{
bool skip_tuple = false;
Relation rel = resultRelInfo->ri_RelationDesc;
- ItemPointer tid = &(searchslot->tts_tid);
+ Datum tupleid = slot_get_tupleid(rel, searchslot);
/* For now we support only tables. */
Assert(rel->rd_rel->relkind == RELKIND_RELATION);
@@ -569,7 +590,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
resultRelInfo->ri_TrigDesc->trig_update_before_row)
{
if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
- tid, NULL, slot, NULL, NULL))
+ tupleid, NULL, slot, NULL, NULL))
skip_tuple = true; /* "do nothing" */
}
@@ -577,6 +598,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
{
List *recheckIndexes = NIL;
TU_UpdateIndexes update_indexes;
+ TupleTableSlot *oldSlot = NULL;
/* Compute stored generated columns */
if (rel->rd_att->constr &&
@@ -590,19 +612,24 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
if (rel->rd_rel->relispartition)
ExecPartitionCheck(resultRelInfo, slot, estate, true);
- simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
- &update_indexes);
+ oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
+
+ simple_table_tuple_update(rel, tupleid, slot, estate->es_snapshot,
+ &update_indexes, oldSlot);
if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None))
- recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- slot, estate, true, false,
+ recheckIndexes = ExecUpdateIndexTuples(resultRelInfo,
+ slot,
+ oldSlot,
+ estate,
+ false,
NULL, NIL,
(update_indexes == TU_Summarizing));
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
- tid, NULL, slot,
+ NULL, oldSlot, slot,
recheckIndexes, NULL, false);
list_free(recheckIndexes);
@@ -622,7 +649,7 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
{
bool skip_tuple = false;
Relation rel = resultRelInfo->ri_RelationDesc;
- ItemPointer tid = &searchslot->tts_tid;
+ Datum tupleid = slot_get_tupleid(rel, searchslot);
CheckCmdReplicaIdentity(rel, CMD_DELETE);
@@ -631,17 +658,25 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
resultRelInfo->ri_TrigDesc->trig_delete_before_row)
{
skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
- tid, NULL, NULL, NULL, NULL);
+ tupleid, NULL, NULL, NULL, NULL);
}
if (!skip_tuple)
{
+ TupleTableSlot *oldSlot = NULL;
+
+ oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
+
/* OK, delete the tuple */
- simple_table_tuple_delete(rel, tid, estate->es_snapshot);
+ simple_table_tuple_delete(rel, tupleid, estate->es_snapshot, oldSlot);
+
+ /* delete index entries if necessary */
+ if (resultRelInfo->ri_NumIndices > 0)
+ ExecDeleteIndexTuples(resultRelInfo, oldSlot, estate);
/* AFTER ROW DELETE Triggers */
ExecARDeleteTriggers(estate, resultRelInfo,
- tid, NULL, NULL, false);
+ NULL, oldSlot, NULL, false);
}
}
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 5737f9f4ebd..5cbe3bf46d1 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -1211,9 +1211,19 @@ ExecGetChildToRootMap(ResultRelInfo *resultRelInfo)
ResultRelInfo *rootRelInfo = resultRelInfo->ri_RootResultRelInfo;
if (rootRelInfo)
- resultRelInfo->ri_ChildToRootMap =
- convert_tuples_by_name(RelationGetDescr(resultRelInfo->ri_RelationDesc),
- RelationGetDescr(rootRelInfo->ri_RelationDesc));
+ {
+ TupleDesc indesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
+ TupleDesc outdesc = RelationGetDescr(rootRelInfo->ri_RelationDesc);
+ AttrMap *attrMap;
+
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) != ROW_REF_ROWID)
+ attrMap = build_attrmap_by_name_if_req(indesc, outdesc, false);
+ else
+ attrMap = build_attrmap_by_name(indesc, outdesc, false);
+ if (attrMap)
+ resultRelInfo->ri_ChildToRootMap =
+ convert_tuples_by_name_attrmap(indesc, outdesc, attrMap);
+ }
else /* this isn't a child result rel */
resultRelInfo->ri_ChildToRootMap = NULL;
@@ -1250,8 +1260,10 @@ ExecGetRootToChildMap(ResultRelInfo *resultRelInfo, EState *estate)
* to ignore by passing true for missing_ok.
*/
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
- attrMap = build_attrmap_by_name_if_req(indesc, outdesc,
- !childrel->rd_rel->relispartition);
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) != ROW_REF_ROWID)
+ attrMap = build_attrmap_by_name_if_req(indesc, outdesc, !childrel->rd_rel->relispartition);
+ else
+ attrMap = build_attrmap_by_name(indesc, outdesc, !childrel->rd_rel->relispartition);
if (attrMap)
resultRelInfo->ri_RootToChildMap =
convert_tuples_by_name_attrmap(indesc, outdesc, attrMap);
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index b49194c0167..a8424922ccc 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -65,7 +65,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
ScanDirection direction;
IndexScanDesc scandesc;
TupleTableSlot *slot;
- ItemPointer tid;
+ ItemPointer tid = NULL;
/*
* extract necessary information from index scan node
@@ -117,12 +117,36 @@ IndexOnlyNext(IndexOnlyScanState *node)
/*
* OK, now that we have what we need, fetch the next tuple.
*/
- while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
+ while (true)
{
bool tuple_from_heap = false;
CHECK_FOR_INTERRUPTS();
+ if (scandesc->xs_want_rowid)
+ {
+ NullableDatum rowid;
+ /* Time to fetch the next TID from the index */
+ rowid = index_getnext_rowid(scandesc, direction);
+
+ /* If we're out of index entries, we're done */
+ if (rowid.isnull)
+ break;
+
+ /* Assert(RowidEquals(rowid, &scan->xs_rowid)); */
+ }
+ else
+ {
+ /* Time to fetch the next TID from the index */
+ tid = index_getnext_tid(scandesc, direction);
+
+ /* If we're out of index entries, we're done */
+ if (tid == NULL)
+ break;
+
+ Assert(ItemPointerEquals(tid, &scandesc->xs_heaptid));
+ }
+
/*
* We can skip the heap fetch if the TID references a heap page on
* which all tuples are known visible to everybody. In any case,
@@ -157,7 +181,8 @@ IndexOnlyNext(IndexOnlyScanState *node)
* It's worth going through this complexity to avoid needing to lock
* the VM buffer, which could cause significant contention.
*/
- if (!VM_ALL_VISIBLE(scandesc->heapRelation,
+ if (!scandesc->xs_want_rowid &&
+ !VM_ALL_VISIBLE(scandesc->heapRelation,
ItemPointerGetBlockNumber(tid),
&node->ioss_VMBuffer))
{
@@ -242,7 +267,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
* If we didn't access the heap, then we'll need to take a predicate
* lock explicitly, as if we had. For now we do that at page level.
*/
- if (!tuple_from_heap)
+ if (!tuple_from_heap && !scandesc->xs_want_rowid)
PredicateLockPage(scandesc->heapRelation,
ItemPointerGetBlockNumber(tid),
estate->es_snapshot);
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index 41754ddfea9..ac401d7a470 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -27,6 +27,7 @@
#include "executor/nodeLockRows.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
+#include "utils/datum.h"
#include "utils/rel.h"
@@ -157,7 +158,16 @@ ExecLockRows(PlanState *pstate)
}
/* okay, try to lock (and fetch) the tuple */
- tid = *((ItemPointer) DatumGetPointer(datum));
+ if (erm->refType == ROW_REF_TID)
+ {
+ tid = *((ItemPointer) DatumGetPointer(datum));
+ datum = PointerGetDatum(&tid);
+ }
+ else
+ {
+ Assert(erm->refType = ROW_REF_ROWID);
+ datum = datumCopy(datum, false, -1);
+ }
switch (erm->markType)
{
case ROW_MARK_EXCLUSIVE:
@@ -182,12 +192,15 @@ ExecLockRows(PlanState *pstate)
if (!IsolationUsesXactSnapshot())
lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION;
- test = table_tuple_lock(erm->relation, &tid, estate->es_snapshot,
+ test = table_tuple_lock(erm->relation, datum, estate->es_snapshot,
markSlot, estate->es_output_cid,
lockmode, erm->waitPolicy,
lockflags,
&tmfd);
+ if (erm->refType == ROW_REF_ROWID)
+ pfree(DatumGetPointer(datum));
+
switch (test)
{
case TM_WouldBlock:
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 4913e493199..fb0997af2d4 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -135,12 +135,11 @@ static void ExecPendingInserts(EState *estate);
static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
ResultRelInfo *sourcePartInfo,
ResultRelInfo *destPartInfo,
- ItemPointer tupleid,
- TupleTableSlot *oldslot,
+ Datum tupleid,
+ TupleTableSlot *oldSlot,
TupleTableSlot *newslot);
static bool ExecOnConflictUpdate(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer conflictTid,
TupleTableSlot *excludedSlot,
bool canSetTag,
TupleTableSlot **returning);
@@ -153,13 +152,13 @@ static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
static TupleTableSlot *ExecMerge(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple oldtuple,
bool canSetTag);
static void ExecInitMerge(ModifyTableState *mtstate, EState *estate);
static TupleTableSlot *ExecMergeMatched(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple oldtuple,
bool canSetTag,
bool *matched);
@@ -167,7 +166,6 @@ static TupleTableSlot *ExecMergeNotMatched(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
bool canSetTag);
-
/*
* Verify that the tuples to be produced by INSERT match the
* target relation's rowtype
@@ -276,66 +274,6 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo,
return ExecProject(projectReturning);
}
-/*
- * ExecCheckTupleVisible -- verify tuple is visible
- *
- * It would not be consistent with guarantees of the higher isolation levels to
- * proceed with avoiding insertion (taking speculative insertion's alternative
- * path) on the basis of another tuple that is not visible to MVCC snapshot.
- * Check for the need to raise a serialization failure, and do so as necessary.
- */
-static void
-ExecCheckTupleVisible(EState *estate,
- Relation rel,
- TupleTableSlot *slot)
-{
- if (!IsolationUsesXactSnapshot())
- return;
-
- if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
- {
- Datum xminDatum;
- TransactionId xmin;
- bool isnull;
-
- xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
- Assert(!isnull);
- xmin = DatumGetTransactionId(xminDatum);
-
- /*
- * We should not raise a serialization failure if the conflict is
- * against a tuple inserted by our own transaction, even if it's not
- * visible to our snapshot. (This would happen, for example, if
- * conflicting keys are proposed for insertion in a single command.)
- */
- if (!TransactionIdIsCurrentTransactionId(xmin))
- ereport(ERROR,
- (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
- errmsg("could not serialize access due to concurrent update")));
- }
-}
-
-/*
- * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
- */
-static void
-ExecCheckTIDVisible(EState *estate,
- ResultRelInfo *relinfo,
- ItemPointer tid,
- TupleTableSlot *tempSlot)
-{
- Relation rel = relinfo->ri_RelationDesc;
-
- /* Redundantly check isolation level */
- if (!IsolationUsesXactSnapshot())
- return;
-
- if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
- elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
- ExecCheckTupleVisible(estate, rel, tempSlot);
- ExecClearTuple(tempSlot);
-}
-
/*
* Initialize to compute stored generated columns for a tuple
*
@@ -576,6 +514,10 @@ ExecInitInsertProjection(ModifyTableState *mtstate,
resultRelInfo->ri_newTupleSlot =
table_slot_create(resultRelInfo->ri_RelationDesc,
&estate->es_tupleTable);
+ if (node->onConflictAction == ONCONFLICT_UPDATE)
+ resultRelInfo->ri_oldTupleSlot =
+ table_slot_create(resultRelInfo->ri_RelationDesc,
+ &estate->es_tupleTable);
/* Build ProjectionInfo if needed (it probably isn't). */
if (need_projection)
@@ -1017,12 +959,19 @@ ExecInsert(ModifyTableContext *context,
if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
{
/* Perform a speculative insertion. */
- uint32 specToken;
- ItemPointerData conflictTid;
- bool specConflict;
List *arbiterIndexes;
+ TupleTableSlot *existing = NULL,
+ *returningSlot,
+ *inserted;
+ LockTupleMode lockmode = LockTupleExclusive;
arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
+ returningSlot = ExecGetReturningSlot(estate, resultRelInfo);
+ if (onconflict == ONCONFLICT_UPDATE)
+ {
+ lockmode = ExecUpdateLockMode(estate, resultRelInfo);
+ existing = resultRelInfo->ri_onConflict->oc_Existing;
+ }
/*
* Do a non-conclusive check for conflicts first.
@@ -1039,23 +988,29 @@ ExecInsert(ModifyTableContext *context,
*/
vlock:
CHECK_FOR_INTERRUPTS();
- specConflict = false;
- if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate,
- &conflictTid, arbiterIndexes))
+
+ inserted = table_tuple_insert_with_arbiter(resultRelInfo,
+ slot, estate->es_output_cid,
+ 0, NULL, arbiterIndexes, estate,
+ lockmode, existing, returningSlot);
+ if (!inserted)
{
- /* committed conflict tuple found */
if (onconflict == ONCONFLICT_UPDATE)
{
+ TupleTableSlot *returning = NULL;
+
+ if (TTS_EMPTY(existing))
+ goto vlock;
+
/*
* In case of ON CONFLICT DO UPDATE, execute the UPDATE
* part. Be prepared to retry if the UPDATE fails because
* of another concurrent UPDATE/DELETE to the conflict
* tuple.
*/
- TupleTableSlot *returning = NULL;
if (ExecOnConflictUpdate(context, resultRelInfo,
- &conflictTid, slot, canSetTag,
+ slot, canSetTag,
&returning))
{
InstrCountTuples2(&mtstate->ps, 1);
@@ -1078,57 +1033,13 @@ ExecInsert(ModifyTableContext *context,
* ExecGetReturningSlot() in the DO NOTHING case...
*/
Assert(onconflict == ONCONFLICT_NOTHING);
- ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid,
- ExecGetReturningSlot(estate, resultRelInfo));
InstrCountTuples2(&mtstate->ps, 1);
return NULL;
}
}
-
- /*
- * Before we start insertion proper, acquire our "speculative
- * insertion lock". Others can use that to wait for us to decide
- * if we're going to go ahead with the insertion, instead of
- * waiting for the whole transaction to complete.
- */
- specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
-
- /* insert the tuple, with the speculative token */
- table_tuple_insert_speculative(resultRelationDesc, slot,
- estate->es_output_cid,
- 0,
- NULL,
- specToken);
-
- /* insert index entries for tuple */
- recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- slot, estate, false, true,
- &specConflict,
- arbiterIndexes,
- false);
-
- /* adjust the tuple's state accordingly */
- table_tuple_complete_speculative(resultRelationDesc, slot,
- specToken, !specConflict);
-
- /*
- * Wake up anyone waiting for our decision. They will re-check
- * the tuple, see that it's no longer speculative, and wait on our
- * XID as if this was a regularly inserted tuple all along. Or if
- * we killed the tuple, they will see it's dead, and proceed as if
- * the tuple never existed.
- */
- SpeculativeInsertionLockRelease(GetCurrentTransactionId());
-
- /*
- * If there was a conflict, start from the beginning. We'll do
- * the pre-check again, which will now find the conflicting tuple
- * (unless it aborts before we get there).
- */
- if (specConflict)
+ else
{
- list_free(recheckIndexes);
- goto vlock;
+ slot = inserted;
}
/* Since there was no insertion conflict, we're done */
@@ -1136,9 +1047,9 @@ ExecInsert(ModifyTableContext *context,
else
{
/* insert the tuple normally */
- table_tuple_insert(resultRelationDesc, slot,
- estate->es_output_cid,
- 0, NULL);
+ slot = table_tuple_insert(resultRelationDesc, slot,
+ estate->es_output_cid,
+ 0, NULL);
/* insert index entries for tuple */
if (resultRelInfo->ri_NumIndices > 0)
@@ -1165,7 +1076,7 @@ ExecInsert(ModifyTableContext *context,
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
NULL,
- NULL,
+ resultRelInfo->ri_oldTupleSlot,
slot,
NULL,
mtstate->mt_transition_capture,
@@ -1314,12 +1225,20 @@ ExecPendingInserts(EState *estate)
*/
static bool
ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple,
+ Datum tupleid, HeapTuple oldtuple,
TupleTableSlot **epqreturnslot, TM_Result *result)
{
if (result)
*result = TM_Ok;
+ /*
+ * Open the table's indexes, if we have not done so already, so that we
+ * can delete index entries.
+ */
+ if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
+ resultRelInfo->ri_IndexRelationDescs == NULL)
+ ExecOpenIndices(resultRelInfo, false);
+
/* BEFORE ROW DELETE triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_delete_before_row)
@@ -1345,7 +1264,8 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
static TM_Result
ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, bool changingPart)
+ Datum tupleid, bool changingPart, int options,
+ TupleTableSlot *oldSlot)
{
EState *estate = context->estate;
@@ -1353,9 +1273,10 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
- true /* wait for commit */ ,
+ options /* wait for commit */ ,
&context->tmfd,
- changingPart);
+ changingPart,
+ oldSlot);
}
/*
@@ -1367,12 +1288,17 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
static void
ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, bool changingPart)
+ HeapTuple oldtuple,
+ TupleTableSlot *slot, bool changingPart)
{
ModifyTableState *mtstate = context->mtstate;
EState *estate = context->estate;
TransitionCaptureState *ar_delete_trig_tcs;
+ /* delete index entries if necessary */
+ if (resultRelInfo->ri_NumIndices > 0)
+ ExecDeleteIndexTuples(resultRelInfo, slot, context->estate);
+
/*
* If this delete is the result of a partition key update that moved the
* tuple to a new partition, put this row into the transition OLD TABLE,
@@ -1385,8 +1311,8 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
{
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
- tupleid, oldtuple,
- NULL, NULL, mtstate->mt_transition_capture,
+ oldtuple,
+ slot, NULL, NULL, mtstate->mt_transition_capture,
false);
/*
@@ -1397,10 +1323,30 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
}
/* AFTER ROW DELETE Triggers */
- ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
+ ExecARDeleteTriggers(estate, resultRelInfo, oldtuple, slot,
ar_delete_trig_tcs, changingPart);
}
+/*
+ * Initializes the tuple slot in a ResultRelInfo for DELETE action.
+ *
+ * We mark 'projectNewInfoValid' even though the projections themselves
+ * are not initialized here.
+ */
+static void
+ExecInitDeleteTupleSlot(ModifyTableState *mtstate,
+ ResultRelInfo *resultRelInfo)
+{
+ EState *estate = mtstate->ps.state;
+
+ Assert(!resultRelInfo->ri_projectNewInfoValid);
+
+ resultRelInfo->ri_oldTupleSlot =
+ table_slot_create(resultRelInfo->ri_RelationDesc,
+ &estate->es_tupleTable);
+ resultRelInfo->ri_projectNewInfoValid = true;
+}
+
/* ----------------------------------------------------------------
* ExecDelete
*
@@ -1426,8 +1372,9 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
static TupleTableSlot *
ExecDelete(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple oldtuple,
+ TupleTableSlot *oldSlot,
bool processReturning,
bool changingPart,
bool canSetTag,
@@ -1491,6 +1438,11 @@ ExecDelete(ModifyTableContext *context,
}
else
{
+ int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE;
+
+ if (!IsolationUsesXactSnapshot())
+ options |= TABLE_MODIFY_LOCK_UPDATED;
+
/*
* delete the tuple
*
@@ -1501,7 +1453,8 @@ ExecDelete(ModifyTableContext *context,
* transaction-snapshot mode transactions.
*/
ldelete:
- result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart);
+ result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart,
+ options, oldSlot);
if (tmresult)
*tmresult = result;
@@ -1548,7 +1501,6 @@ ExecDelete(ModifyTableContext *context,
case TM_Updated:
{
- TupleTableSlot *inputslot;
TupleTableSlot *epqslot;
if (IsolationUsesXactSnapshot())
@@ -1557,87 +1509,29 @@ ExecDelete(ModifyTableContext *context,
errmsg("could not serialize access due to concurrent update")));
/*
- * Already know that we're going to need to do EPQ, so
- * fetch tuple directly into the right slot.
+ * We need to do EPQ. The latest tuple is already found
+ * and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
*/
- EvalPlanQualBegin(context->epqstate);
- inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex);
+ Assert(context->tmfd.traversed);
+ epqslot = EvalPlanQual(context->epqstate,
+ resultRelationDesc,
+ resultRelInfo->ri_RangeTableIndex,
+ oldSlot);
+ if (TupIsNull(epqslot))
+ /* Tuple not passing quals anymore, exiting... */
+ return NULL;
- result = table_tuple_lock(resultRelationDesc, tupleid,
- estate->es_snapshot,
- inputslot, estate->es_output_cid,
- LockTupleExclusive, LockWaitBlock,
- TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
- &context->tmfd);
-
- switch (result)
+ /*
+ * If requested, skip delete and pass back the updated
+ * row.
+ */
+ if (epqreturnslot)
{
- case TM_Ok:
- Assert(context->tmfd.traversed);
- epqslot = EvalPlanQual(context->epqstate,
- resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex,
- inputslot);
- if (TupIsNull(epqslot))
- /* Tuple not passing quals anymore, exiting... */
- return NULL;
-
- /*
- * If requested, skip delete and pass back the
- * updated row.
- */
- if (epqreturnslot)
- {
- *epqreturnslot = epqslot;
- return NULL;
- }
- else
- goto ldelete;
-
- case TM_SelfModified:
-
- /*
- * This can be reached when following an update
- * chain from a tuple updated by another session,
- * reaching a tuple that was already updated in
- * this transaction. If previously updated by this
- * command, ignore the delete, otherwise error
- * out.
- *
- * See also TM_SelfModified response to
- * table_tuple_delete() above.
- */
- if (context->tmfd.cmax != estate->es_output_cid)
- ereport(ERROR,
- (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
- errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
- errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
- return NULL;
-
- case TM_Deleted:
- /* tuple already deleted; nothing to do */
- return NULL;
-
- default:
-
- /*
- * TM_Invisible should be impossible because we're
- * waiting for updated row versions, and would
- * already have errored out if the first version
- * is invisible.
- *
- * TM_Updated should be impossible, because we're
- * locking the latest version via
- * TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
- */
- elog(ERROR, "unexpected table_tuple_lock status: %u",
- result);
- return NULL;
+ *epqreturnslot = epqslot;
+ return NULL;
}
-
- Assert(false);
- break;
+ else
+ goto ldelete;
}
case TM_Deleted:
@@ -1671,7 +1565,8 @@ ExecDelete(ModifyTableContext *context,
if (tupleDeleted)
*tupleDeleted = true;
- ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, changingPart);
+ ExecDeleteEpilogue(context, resultRelInfo, oldtuple,
+ oldSlot, changingPart);
/* Process RETURNING if present and if requested */
if (processReturning && resultRelInfo->ri_projectReturning)
@@ -1687,19 +1582,15 @@ ExecDelete(ModifyTableContext *context,
/* FDW must have provided a slot containing the deleted row */
Assert(!TupIsNull(slot));
}
- else
+ else if (!slot || TupIsNull(slot))
{
+ /* Copy old tuple to the returning slot */
slot = ExecGetReturningSlot(estate, resultRelInfo);
if (oldtuple != NULL)
- {
ExecForceStoreHeapTuple(oldtuple, slot, false);
- }
else
- {
- if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
- SnapshotAny, slot))
- elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
- }
+ ExecCopySlot(slot, oldSlot);
+ Assert(!TupIsNull(slot));
}
rslot = ExecProcessReturning(resultRelInfo, slot, context->planSlot);
@@ -1740,7 +1631,7 @@ ExecDelete(ModifyTableContext *context,
static bool
ExecCrossPartitionUpdate(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple,
+ Datum tupleid, HeapTuple oldtuple,
TupleTableSlot *slot,
bool canSetTag,
UpdateContext *updateCxt,
@@ -1799,12 +1690,16 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
MemoryContextSwitchTo(oldcxt);
}
+ /* Make sure ri_oldTupleSlot is initialized. */
+ if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
+ ExecInitUpdateProjection(mtstate, resultRelInfo);
+
/*
* Row movement, part 1. Delete the tuple, but skip RETURNING processing.
* We want to return rows from INSERT.
*/
ExecDelete(context, resultRelInfo,
- tupleid, oldtuple,
+ tupleid, oldtuple, resultRelInfo->ri_oldTupleSlot,
false, /* processReturning */
true, /* changingPart */
false, /* canSetTag */
@@ -1845,21 +1740,13 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
return true;
else
{
- /* Fetch the most recent version of old tuple. */
- TupleTableSlot *oldSlot;
-
- /* ... but first, make sure ri_oldTupleSlot is initialized. */
- if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
- ExecInitUpdateProjection(mtstate, resultRelInfo);
- oldSlot = resultRelInfo->ri_oldTupleSlot;
- if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
- tupleid,
- SnapshotAny,
- oldSlot))
- elog(ERROR, "failed to fetch tuple being updated");
- /* and project the new tuple to retry the UPDATE with */
+ /*
+ * ExecDelete already fetches the most recent version of old tuple
+ * to resultRelInfo->ri_RelationDesc. So, just project the new
+ * tuple to retry the UPDATE with.
+ */
*retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot,
- oldSlot);
+ resultRelInfo->ri_oldTupleSlot);
return false;
}
}
@@ -1877,8 +1764,8 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
/* Tuple routing starts from the root table. */
context->cpUpdateReturningSlot =
- ExecInsert(context, mtstate->rootResultRelInfo, slot, canSetTag,
- inserted_tuple, insert_destrel);
+ ExecInsert(context, mtstate->rootResultRelInfo,
+ slot, canSetTag, inserted_tuple, insert_destrel);
/*
* Reset the transition state that may possibly have been written by
@@ -1900,7 +1787,7 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
*/
static bool
ExecUpdatePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
+ Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
TM_Result *result)
{
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -1977,8 +1864,9 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
*/
static TM_Result
ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
- bool canSetTag, UpdateContext *updateCxt)
+ Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
+ bool canSetTag, int options, TupleTableSlot *oldSlot,
+ UpdateContext *updateCxt)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -2070,7 +1958,8 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ExecCrossPartitionUpdateForeignKey(context,
resultRelInfo,
insert_destrel,
- tupleid, slot,
+ tupleid,
+ resultRelInfo->ri_oldTupleSlot,
inserted_tuple);
return TM_Ok;
@@ -2113,10 +2002,10 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
- true /* wait for commit */ ,
+ options /* wait for commit */ ,
&context->tmfd, &updateCxt->lockmode,
- &updateCxt->updateIndexes);
-
+ &updateCxt->updateIndexes,
+ oldSlot);
return result;
}
@@ -2128,24 +2017,29 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
static void
ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
- ResultRelInfo *resultRelInfo, ItemPointer tupleid,
- HeapTuple oldtuple, TupleTableSlot *slot)
+ ResultRelInfo *resultRelInfo,
+ HeapTuple oldtuple, TupleTableSlot *slot,
+ TupleTableSlot *oldSlot)
{
ModifyTableState *mtstate = context->mtstate;
List *recheckIndexes = NIL;
/* insert index entries for tuple if necessary */
if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None))
- recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- slot, context->estate,
- true, false,
+ {
+ recheckIndexes = ExecUpdateIndexTuples(resultRelInfo,
+ slot,
+ oldSlot,
+ context->estate,
+ false,
NULL, NIL,
(updateCxt->updateIndexes == TU_Summarizing));
+ }
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(context->estate, resultRelInfo,
NULL, NULL,
- tupleid, oldtuple, slot,
+ oldtuple, oldSlot, slot,
recheckIndexes,
mtstate->operation == CMD_INSERT ?
mtstate->mt_oc_transition_capture :
@@ -2177,7 +2071,7 @@ static void
ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
ResultRelInfo *sourcePartInfo,
ResultRelInfo *destPartInfo,
- ItemPointer tupleid,
+ Datum tupleid,
TupleTableSlot *oldslot,
TupleTableSlot *newslot)
{
@@ -2234,7 +2128,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
/* Perform the root table's triggers. */
ExecARUpdateTriggers(context->estate,
rootRelInfo, sourcePartInfo, destPartInfo,
- tupleid, NULL, newslot, NIL, NULL, true);
+ NULL, oldslot, newslot, NIL, NULL, true);
}
/* ----------------------------------------------------------------
@@ -2256,6 +2150,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
* NULL when the foreign table has no relevant triggers.
*
* slot contains the new tuple value to be stored.
+ * oldSlot is the slot to store the old tuple.
* planSlot is the output of the ModifyTable's subplan; we use it
* to access values from other input tables (for RETURNING),
* row-ID junk columns, etc.
@@ -2267,8 +2162,8 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
*/
static TupleTableSlot *
ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
- bool canSetTag)
+ Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
+ TupleTableSlot *oldSlot, bool canSetTag, bool locked)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -2321,6 +2216,15 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
}
else
{
+ int options = TABLE_MODIFY_WAIT;
+
+ if (!locked)
+ {
+ options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
+ if (!IsolationUsesXactSnapshot())
+ options |= TABLE_MODIFY_LOCK_UPDATED;
+ }
+
/*
* If we generate a new candidate tuple after EvalPlanQual testing, we
* must loop back here to try again. (We don't need to redo triggers,
@@ -2330,7 +2234,7 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
redo_act:
result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
- canSetTag, &updateCxt);
+ canSetTag, options, oldSlot, &updateCxt);
/*
* If ExecUpdateAct reports that a cross-partition update was done,
@@ -2381,88 +2285,30 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
case TM_Updated:
{
- TupleTableSlot *inputslot;
TupleTableSlot *epqslot;
- TupleTableSlot *oldSlot;
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ Assert(!locked);
/*
- * Already know that we're going to need to do EPQ, so
- * fetch tuple directly into the right slot.
+ * We need to do EPQ. The latest tuple is already found
+ * and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
*/
- inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex);
-
- result = table_tuple_lock(resultRelationDesc, tupleid,
- estate->es_snapshot,
- inputslot, estate->es_output_cid,
- updateCxt.lockmode, LockWaitBlock,
- TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
- &context->tmfd);
-
- switch (result)
- {
- case TM_Ok:
- Assert(context->tmfd.traversed);
-
- epqslot = EvalPlanQual(context->epqstate,
- resultRelationDesc,
- resultRelInfo->ri_RangeTableIndex,
- inputslot);
- if (TupIsNull(epqslot))
- /* Tuple not passing quals anymore, exiting... */
- return NULL;
-
- /* Make sure ri_oldTupleSlot is initialized. */
- if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
- ExecInitUpdateProjection(context->mtstate,
- resultRelInfo);
-
- /* Fetch the most recent version of old tuple. */
- oldSlot = resultRelInfo->ri_oldTupleSlot;
- if (!table_tuple_fetch_row_version(resultRelationDesc,
- tupleid,
- SnapshotAny,
- oldSlot))
- elog(ERROR, "failed to fetch tuple being updated");
- slot = ExecGetUpdateNewTuple(resultRelInfo,
- epqslot, oldSlot);
- goto redo_act;
-
- case TM_Deleted:
- /* tuple already deleted; nothing to do */
- return NULL;
-
- case TM_SelfModified:
-
- /*
- * This can be reached when following an update
- * chain from a tuple updated by another session,
- * reaching a tuple that was already updated in
- * this transaction. If previously modified by
- * this command, ignore the redundant update,
- * otherwise error out.
- *
- * See also TM_SelfModified response to
- * table_tuple_update() above.
- */
- if (context->tmfd.cmax != estate->es_output_cid)
- ereport(ERROR,
- (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
- errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
- errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
- return NULL;
-
- default:
- /* see table_tuple_lock call in ExecDelete() */
- elog(ERROR, "unexpected table_tuple_lock status: %u",
- result);
- return NULL;
- }
+ Assert(context->tmfd.traversed);
+ epqslot = EvalPlanQual(context->epqstate,
+ resultRelationDesc,
+ resultRelInfo->ri_RangeTableIndex,
+ oldSlot);
+ if (TupIsNull(epqslot))
+ /* Tuple not passing quals anymore, exiting... */
+ return NULL;
+ slot = ExecGetUpdateNewTuple(resultRelInfo,
+ epqslot,
+ oldSlot);
+ goto redo_act;
}
break;
@@ -2485,8 +2331,8 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
if (canSetTag)
(estate->es_processed)++;
- ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple,
- slot);
+ ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, oldtuple,
+ slot, oldSlot);
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
@@ -2509,144 +2355,26 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
static bool
ExecOnConflictUpdate(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- ItemPointer conflictTid,
TupleTableSlot *excludedSlot,
bool canSetTag,
TupleTableSlot **returning)
{
ModifyTableState *mtstate = context->mtstate;
ExprContext *econtext = mtstate->ps.ps_ExprContext;
- Relation relation = resultRelInfo->ri_RelationDesc;
ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
- TM_FailureData tmfd;
- LockTupleMode lockmode;
- TM_Result test;
- Datum xminDatum;
- TransactionId xmin;
- bool isnull;
-
- /* Determine lock mode to use */
- lockmode = ExecUpdateLockMode(context->estate, resultRelInfo);
+ Datum tupleid;
- /*
- * Lock tuple for update. Don't follow updates when tuple cannot be
- * locked without doing so. A row locking conflict here means our
- * previous conclusion that the tuple is conclusively committed is not
- * true anymore.
- */
- test = table_tuple_lock(relation, conflictTid,
- context->estate->es_snapshot,
- existing, context->estate->es_output_cid,
- lockmode, LockWaitBlock, 0,
- &tmfd);
- switch (test)
+ if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
{
- case TM_Ok:
- /* success! */
- break;
-
- case TM_Invisible:
-
- /*
- * This can occur when a just inserted tuple is updated again in
- * the same command. E.g. because multiple rows with the same
- * conflicting key values are inserted.
- *
- * This is somewhat similar to the ExecUpdate() TM_SelfModified
- * case. We do not want to proceed because it would lead to the
- * same row being updated a second time in some unspecified order,
- * and in contrast to plain UPDATEs there's no historical behavior
- * to break.
- *
- * It is the user's responsibility to prevent this situation from
- * occurring. These problems are why the SQL standard similarly
- * specifies that for SQL MERGE, an exception must be raised in
- * the event of an attempt to update the same row twice.
- */
- xminDatum = slot_getsysattr(existing,
- MinTransactionIdAttributeNumber,
- &isnull);
- Assert(!isnull);
- xmin = DatumGetTransactionId(xminDatum);
-
- if (TransactionIdIsCurrentTransactionId(xmin))
- ereport(ERROR,
- (errcode(ERRCODE_CARDINALITY_VIOLATION),
- /* translator: %s is a SQL command name */
- errmsg("%s command cannot affect row a second time",
- "ON CONFLICT DO UPDATE"),
- errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
-
- /* This shouldn't happen */
- elog(ERROR, "attempted to lock invisible tuple");
- break;
-
- case TM_SelfModified:
-
- /*
- * This state should never be reached. As a dirty snapshot is used
- * to find conflicting tuples, speculative insertion wouldn't have
- * seen this row to conflict with.
- */
- elog(ERROR, "unexpected self-updated tuple");
- break;
-
- case TM_Updated:
- if (IsolationUsesXactSnapshot())
- ereport(ERROR,
- (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
- errmsg("could not serialize access due to concurrent update")));
-
- /*
- * As long as we don't support an UPDATE of INSERT ON CONFLICT for
- * a partitioned table we shouldn't reach to a case where tuple to
- * be lock is moved to another partition due to concurrent update
- * of the partition key.
- */
- Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
-
- /*
- * Tell caller to try again from the very start.
- *
- * It does not make sense to use the usual EvalPlanQual() style
- * loop here, as the new version of the row might not conflict
- * anymore, or the conflicting tuple has actually been deleted.
- */
- ExecClearTuple(existing);
- return false;
-
- case TM_Deleted:
- if (IsolationUsesXactSnapshot())
- ereport(ERROR,
- (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
- errmsg("could not serialize access due to concurrent delete")));
-
- /* see TM_Updated case */
- Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
- ExecClearTuple(existing);
- return false;
-
- default:
- elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
+ bool isnull;
+ tupleid = slot_getsysattr(existing, RowIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ }
+ else
+ {
+ tupleid = PointerGetDatum(&existing->tts_tid);
}
-
- /* Success, the tuple is locked. */
-
- /*
- * Verify that the tuple is visible to our MVCC snapshot if the current
- * isolation level mandates that.
- *
- * It's not sufficient to rely on the check within ExecUpdate() as e.g.
- * CONFLICT ... WHERE clause may prevent us from reaching that.
- *
- * This means we only ever continue when a new command in the current
- * transaction could see the row, even though in READ COMMITTED mode the
- * tuple will not be visible according to the current statement's
- * snapshot. This is in line with the way UPDATE deals with newer tuple
- * versions.
- */
- ExecCheckTupleVisible(context->estate, relation, existing);
/*
* Make tuple and any needed join variables available to ExecQual and
@@ -2702,9 +2430,10 @@ ExecOnConflictUpdate(ModifyTableContext *context,
/* Execute UPDATE with projection */
*returning = ExecUpdate(context, resultRelInfo,
- conflictTid, NULL,
+ tupleid, NULL,
resultRelInfo->ri_onConflict->oc_ProjSlot,
- canSetTag);
+ existing,
+ canSetTag, true);
/*
* Clear out existing tuple, as there might not be another conflict among
@@ -2720,7 +2449,7 @@ ExecOnConflictUpdate(ModifyTableContext *context,
*/
static TupleTableSlot *
ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, bool canSetTag)
+ Datum tupleid, HeapTuple oldtuple, bool canSetTag)
{
TupleTableSlot *rslot = NULL;
bool matched;
@@ -2786,7 +2515,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* update chain and we never switch from ExecMergeNotMatched() to
* ExecMergeMatched(), there is no risk of a livelock.
*/
- matched = tupleid != NULL || oldtuple != NULL;
+ matched = DatumGetPointer(tupleid) != NULL || oldtuple != NULL;
if (matched)
rslot = ExecMergeMatched(context, resultRelInfo, tupleid, oldtuple,
canSetTag, &matched);
@@ -2846,7 +2575,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
static TupleTableSlot *
ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, bool canSetTag,
+ Datum tupleid, HeapTuple oldtuple, bool canSetTag,
bool *matched)
{
ModifyTableState *mtstate = context->mtstate;
@@ -2886,7 +2615,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* either have the tupleid of the target row, or an old tuple from the
* target wholerow junk attr.
*/
- Assert(tupleid != NULL || oldtuple != NULL);
+ Assert(DatumGetPointer(tupleid) != NULL || oldtuple != NULL);
if (oldtuple != NULL)
ExecForceStoreHeapTuple(oldtuple, resultRelInfo->ri_oldTupleSlot,
false);
@@ -2985,7 +2714,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
Assert(oldtuple == NULL);
result = ExecUpdateAct(context, resultRelInfo, tupleid,
- NULL, newslot, canSetTag,
+ NULL, newslot, canSetTag, TABLE_MODIFY_WAIT, NULL,
&updateCxt);
/*
@@ -3007,7 +2736,8 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
if (result == TM_Ok)
{
ExecUpdateEpilogue(context, &updateCxt, resultRelInfo,
- tupleid, NULL, newslot);
+ NULL, newslot,
+ resultRelInfo->ri_oldTupleSlot);
mtstate->mt_merge_updated += 1;
}
break;
@@ -3037,13 +2767,13 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
Assert(oldtuple == NULL);
result = ExecDeleteAct(context, resultRelInfo, tupleid,
- false);
+ false, TABLE_MODIFY_WAIT, NULL);
}
if (result == TM_Ok)
{
- ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL,
- false);
+ ExecDeleteEpilogue(context, resultRelInfo, NULL,
+ resultRelInfo->ri_oldTupleSlot, false);
mtstate->mt_merge_deleted += 1;
}
break;
@@ -3154,7 +2884,6 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
switch (result)
{
case TM_Ok:
-
/*
* If the tuple was updated and migrated to
* another partition concurrently, the current
@@ -3196,9 +2925,13 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* join quals no longer pass and we switch to
* the NOT MATCHED BY SOURCE case.
*/
- (void) ExecGetJunkAttribute(epqslot,
- resultRelInfo->ri_RowIdAttNo,
- &isNull);
+ /*
+ * Update tupleid to that of the new tuple, for
+ * the refetch we do at the top.
+ */
+ tupleid = ExecGetJunkAttribute(epqslot,
+ resultRelInfo->ri_RowIdAttNo,
+ &isNull);
if (isNull)
*matched = false;
@@ -3207,8 +2940,8 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* we need to switch to the NOT MATCHED BY
* SOURCE case.
*/
- if (!table_tuple_fetch_row_version(resultRelationDesc,
- &context->tmfd.ctid,
+ if (!isNull && !table_tuple_fetch_row_version(resultRelationDesc,
+ tupleid,
SnapshotAny,
resultRelInfo->ri_oldTupleSlot))
elog(ERROR, "failed to fetch the target tuple");
@@ -3225,6 +2958,11 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
/*
* Loop back and process the MATCHED or NOT
* MATCHED BY SOURCE actions from the start.
+ * A non-NULL ctid means that we are still dealing
+ * with MATCHED case. Restart the loop so that we
+ * apply all the MATCHED rules again, to ensure
+ * that the first qualifying WHEN MATCHED action
+ * is executed.
*/
goto lmerge_matched;
@@ -3763,10 +3501,10 @@ ExecModifyTable(PlanState *pstate)
PlanState *subplanstate;
TupleTableSlot *slot;
TupleTableSlot *oldSlot;
+ Datum tupleid;
ItemPointerData tuple_ctid;
HeapTupleData oldtupdata;
HeapTuple oldtuple;
- ItemPointer tupleid;
CHECK_FOR_INTERRUPTS();
@@ -3815,6 +3553,8 @@ ExecModifyTable(PlanState *pstate)
*/
for (;;)
{
+ RowRefType refType;
+
/*
* Reset the per-output-tuple exprcontext. This is needed because
* triggers expect to use that context as workspace. It's a bit ugly
@@ -3890,7 +3630,7 @@ ExecModifyTable(PlanState *pstate)
EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot);
slot = ExecMerge(&context, node->resultRelInfo,
- NULL, NULL, node->canSetTag);
+ PointerGetDatum(NULL), NULL, node->canSetTag);
/*
* If we got a RETURNING result, return it to the caller.
@@ -3934,7 +3674,8 @@ ExecModifyTable(PlanState *pstate)
EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot);
slot = context.planSlot;
- tupleid = NULL;
+ refType = resultRelInfo->ri_RowRefType;
+ tupleid = PointerGetDatum(NULL);
oldtuple = NULL;
/*
@@ -3977,7 +3718,7 @@ ExecModifyTable(PlanState *pstate)
EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot);
slot = ExecMerge(&context, node->resultRelInfo,
- NULL, NULL, node->canSetTag);
+ PointerGetDatum(NULL), NULL, node->canSetTag);
/*
* If we got a RETURNING result, return it to the
@@ -3992,9 +3733,24 @@ ExecModifyTable(PlanState *pstate)
elog(ERROR, "ctid is NULL");
}
- tupleid = (ItemPointer) DatumGetPointer(datum);
- tuple_ctid = *tupleid; /* be sure we don't free ctid!! */
- tupleid = &tuple_ctid;
+ if (refType == ROW_REF_TID)
+ {
+ /* shouldn't ever get a null result... */
+ if (isNull)
+ elog(ERROR, "ctid is NULL");
+
+ tuple_ctid = *((ItemPointer) DatumGetPointer(datum)); /* be sure we don't free ctid!! */
+ tupleid = PointerGetDatum(&tuple_ctid);
+ }
+ else
+ {
+ Assert(refType == ROW_REF_ROWID);
+ /* shouldn't ever get a null result... */
+ if (isNull)
+ elog(ERROR, "rowid is NULL");
+
+ tupleid = datumCopy(datum, false, -1);
+ }
}
/*
@@ -4034,7 +3790,7 @@ ExecModifyTable(PlanState *pstate)
EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot);
slot = ExecMerge(&context, node->resultRelInfo,
- NULL, NULL, node->canSetTag);
+ PointerGetDatum(NULL), NULL, node->canSetTag);
/*
* If we got a RETURNING result, return it to the
@@ -4098,6 +3854,7 @@ ExecModifyTable(PlanState *pstate)
/* Fetch the most recent version of old tuple. */
Relation relation = resultRelInfo->ri_RelationDesc;
+ Assert(DatumGetPointer(tupleid) != NULL);
if (!table_tuple_fetch_row_version(relation, tupleid,
SnapshotAny,
oldSlot))
@@ -4108,12 +3865,18 @@ ExecModifyTable(PlanState *pstate)
/* Now apply the update. */
slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
- slot, node->canSetTag);
+ slot, resultRelInfo->ri_oldTupleSlot,
+ node->canSetTag, false);
break;
case CMD_DELETE:
+ /* Initialize slot for DELETE to fetch the old tuple */
+ if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
+ ExecInitDeleteTupleSlot(node, resultRelInfo);
+
slot = ExecDelete(&context, resultRelInfo, tupleid, oldtuple,
- true, false, node->canSetTag, NULL, NULL, NULL);
+ resultRelInfo->ri_oldTupleSlot, true, false,
+ node->canSetTag, NULL, NULL, NULL);
break;
case CMD_MERGE:
@@ -4126,6 +3889,9 @@ ExecModifyTable(PlanState *pstate)
break;
}
+ if (refType == ROW_REF_ROWID && DatumGetPointer(tupleid) != NULL)
+ pfree(DatumGetPointer(tupleid));
+
/*
* If we got a RETURNING result, return it to caller. We'll continue
* the work on next call.
@@ -4370,10 +4136,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
relkind == RELKIND_MATVIEW ||
relkind == RELKIND_PARTITIONED_TABLE)
{
- resultRelInfo->ri_RowIdAttNo =
- ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid");
- if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
- elog(ERROR, "could not find junk ctid column");
+ if (resultRelInfo->ri_RowRefType == ROW_REF_TID)
+ {
+ resultRelInfo->ri_RowIdAttNo =
+ ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid");
+ if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
+ elog(ERROR, "could not find junk ctid column");
+ }
+ else
+ {
+ resultRelInfo->ri_RowIdAttNo =
+ ExecFindJunkAttributeInTlist(subplan->targetlist, "rowid");
+ if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
+ elog(ERROR, "could not find junk rowid column");
+ }
}
else if (relkind == RELKIND_FOREIGN_TABLE)
{
@@ -4683,6 +4459,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
estate->es_auxmodifytables = lcons(mtstate,
estate->es_auxmodifytables);
+
+
return mtstate;
}
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index 864a9013b62..f4a124ac4eb 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -377,7 +377,7 @@ TidNext(TidScanState *node)
if (node->tss_isCurrentOf)
table_tuple_get_latest_tid(scan, &tid);
- if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot))
+ if (table_tuple_fetch_row_version(heapRelation, PointerGetDatum(&tid), snapshot, slot))
return slot;
/* Bad TID or failed snapshot qual; try next */
diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c
index 4eb42445c52..ffa147ee4c8 100644
--- a/src/backend/nodes/read.c
+++ b/src/backend/nodes/read.c
@@ -205,6 +205,17 @@ pg_strtok(int *length)
return ret_str;
}
+bool
+pg_str_hasfield(void)
+{
+ const char *local_str = pg_strtok_ptr;
+
+ while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
+ local_str++;
+
+ return (*local_str == ':');
+}
+
/*
* debackslash -
* create a palloc'd string holding the given token.
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 4895cee9944..7e02b670931 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -83,6 +83,7 @@ int min_parallel_index_scan_size;
/* Hook for plugins to get control in set_rel_pathlist() */
set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
+set_plain_rel_pathlist_hook_type set_plain_rel_pathlist_hook = NULL;
/* Hook for plugins to replace standard_join_search() */
join_search_hook_type join_search_hook = NULL;
@@ -772,8 +773,10 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
*/
required_outer = rel->lateral_relids;
- /* Consider sequential scan */
- add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
+ if (!set_plain_rel_pathlist_hook ||
+ set_plain_rel_pathlist_hook(root, rel, rte))
+ /* Consider sequential scan */
+ add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
/* If appropriate, consider parallel sequential scan */
if (rel->consider_parallel && required_outer == NULL)
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index c0fcc7d78df..a698f888d71 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -20,6 +20,7 @@
#include "access/stratnum.h"
#include "access/sysattr.h"
#include "catalog/pg_am.h"
+#include "catalog/pg_amop.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_type.h"
@@ -32,8 +33,10 @@
#include "optimizer/paths.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
+#include "utils/array.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
+#include "utils/syscache.h"
/* XXX see PartCollMatchesExprColl */
@@ -48,14 +51,6 @@ typedef enum
ST_ANYSCAN, /* either is okay */
} ScanTypeControl;
-/* Data structure for collecting qual clauses that match an index */
-typedef struct
-{
- bool nonempty; /* True if lists are not all empty */
- /* Lists of IndexClause nodes, one list per index column */
- List *indexclauses[INDEX_MAX_KEYS];
-} IndexClauseSet;
-
/* Per-path data used within choose_bitmap_and() */
typedef struct
{
@@ -129,9 +124,6 @@ static double adjust_rowcount_for_semijoins(PlannerInfo *root,
Index outer_relid,
double rowcount);
static double approximate_joinrel_size(PlannerInfo *root, Relids relids);
-static void match_restriction_clauses_to_index(PlannerInfo *root,
- IndexOptInfo *index,
- IndexClauseSet *clauseset);
static void match_join_clauses_to_index(PlannerInfo *root,
RelOptInfo *rel, IndexOptInfo *index,
IndexClauseSet *clauseset,
@@ -177,6 +169,10 @@ static IndexClause *match_rowcompare_to_indexcol(PlannerInfo *root,
RestrictInfo *rinfo,
int indexcol,
IndexOptInfo *index);
+static IndexClause *match_orclause_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index);
static IndexClause *expand_indexqual_rowcompare(PlannerInfo *root,
RestrictInfo *rinfo,
int indexcol,
@@ -1166,6 +1162,386 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
return result;
}
+/*
+ * Utility structure used to group similar OR-clause arguments in
+ * group_similar_or_args(). It represents information about the OR-clause
+ * argument and its matching index key.
+ */
+typedef struct
+{
+ int indexnum; /* index of the matching index, or -1 if no
+ * matching index */
+ int colnum; /* index of the matching column, or -1 if no
+ * matching index */
+ Oid opno; /* OID of the OpClause operator, or InvalidOid
+ * if not an OpExpr */
+ Oid inputcollid; /* OID of the OpClause input collation */
+ int argindex; /* index of the clause in the list of
+ * arguments */
+} OrArgIndexMatch;
+
+/*
+ * Comparison function for OrArgIndexMatch which provides sort order placing
+ * similar OR-clause arguments together.
+ */
+static int
+or_arg_index_match_cmp(const void *a, const void *b)
+{
+ const OrArgIndexMatch *match_a = (const OrArgIndexMatch *) a;
+ const OrArgIndexMatch *match_b = (const OrArgIndexMatch *) b;
+
+ if (match_a->indexnum < match_b->indexnum)
+ return -1;
+ else if (match_a->indexnum > match_b->indexnum)
+ return 1;
+
+ if (match_a->colnum < match_b->colnum)
+ return -1;
+ else if (match_a->colnum > match_b->colnum)
+ return 1;
+
+ if (match_a->opno < match_b->opno)
+ return -1;
+ else if (match_a->opno > match_b->opno)
+ return 1;
+
+ if (match_a->inputcollid < match_b->inputcollid)
+ return -1;
+ else if (match_a->inputcollid > match_b->inputcollid)
+ return 1;
+
+ if (match_a->argindex < match_b->argindex)
+ return -1;
+ else if (match_a->argindex > match_b->argindex)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * group_similar_or_args
+ * Transform incoming OR-restrictinfo into a list of sub-restrictinfos,
+ * each of them containing a subset of OR-clauses from the source rinfo
+ * matching the same index column with the same operator and collation,
+ * It may be employed later, during the match_clause_to_indexcol() to
+ * transform whole OR-sub-rinfo to an SAOP clause.
+ *
+ * Similar arguments clauses of form "indexkey op constant" having same
+ * indexkey, operator, and collation. Constant may comprise either Const
+ * or Param.
+ *
+ * Returns the processed list of arguments.
+ */
+static List *
+group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
+{
+ int n;
+ int i;
+ int group_start;
+ OrArgIndexMatch *matches;
+ bool matched = false;
+ ListCell *lc;
+ ListCell *lc2;
+ List *orargs;
+ List *result = NIL;
+
+ Assert(IsA(rinfo->orclause, BoolExpr));
+ orargs = ((BoolExpr *) rinfo->orclause)->args;
+ n = list_length(orargs);
+
+ /*
+ * To avoid N^2 behavior, take utility pass along the list of OR-clause
+ * arguments. For each argument, fill the OrArgIndexMatch structure,
+ * which will be used to sort these arguments at the next step.
+ */
+ i = -1;
+ matches = (OrArgIndexMatch *) palloc(sizeof(OrArgIndexMatch) * n);
+ foreach(lc, orargs)
+ {
+ Node *arg = lfirst(lc);
+ RestrictInfo *argrinfo;
+ OpExpr *clause;
+ Oid opno;
+ Node *leftop,
+ *rightop;
+ Node *nonConstExpr;
+ int indexnum;
+ int colnum;
+
+ i++;
+ matches[i].argindex = i;
+ matches[i].indexnum = -1;
+ matches[i].colnum = -1;
+ matches[i].opno = InvalidOid;
+ matches[i].inputcollid = InvalidOid;
+
+ if (!IsA(arg, RestrictInfo))
+ continue;
+
+ argrinfo = castNode(RestrictInfo, arg);
+
+ /* Only operator clauses can match */
+ if (!IsA(argrinfo->clause, OpExpr))
+ continue;
+
+ clause = (OpExpr *) argrinfo->clause;
+ opno = clause->opno;
+
+ /* Only binary operators can match */
+ if (list_length(clause->args) != 2)
+ continue;
+
+ /*
+ * Ignore any RelabelType node above the operands. This is needed to
+ * be able to apply indexscanning in binary-compatible-operator cases.
+ * Note: we can assume there is at most one RelabelType node;
+ * eval_const_expressions() will have simplified if more than one.
+ */
+ leftop = get_leftop(clause);
+ if (IsA(leftop, RelabelType))
+ leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+ rightop = get_rightop(clause);
+ if (IsA(rightop, RelabelType))
+ rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+ /*
+ * Check for clauses of the form: (indexkey operator constant) or
+ * (constant operator indexkey). But we don't know a particular index
+ * yet. First check for a constant, which must be Const or Param.
+ * That's cheaper than search for an index key among all indexes.
+ */
+ if (IsA(leftop, Const) || IsA(leftop, Param))
+ {
+ opno = get_commutator(opno);
+
+ if (!OidIsValid(opno))
+ {
+ /* commutator doesn't exist, we can't reverse the order */
+ continue;
+ }
+ nonConstExpr = rightop;
+ }
+ else if (IsA(rightop, Const) || IsA(rightop, Param))
+ {
+ nonConstExpr = leftop;
+ }
+ else
+ {
+ continue;
+ }
+
+ /*
+ * Match non-constant part to the index key. It's possible that a
+ * single non-constant part matches multiple index keys. It's OK, we
+ * just stop with first matching index key. Given that this choice is
+ * determined the same for every clause, we will group similar clauses
+ * together anyway.
+ */
+ indexnum = 0;
+ foreach(lc2, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(lc2);
+
+ /*
+ * Ignore index if it doesn't support bitmap scans or SAOP
+ * clauses.
+ */
+ if (!index->amhasgetbitmap || !index->amsearcharray)
+ continue;
+
+ for (colnum = 0; colnum < index->nkeycolumns; colnum++)
+ {
+ if (match_index_to_operand(nonConstExpr, colnum, index))
+ {
+ matches[i].indexnum = indexnum;
+ matches[i].colnum = colnum;
+ matches[i].opno = opno;
+ matches[i].inputcollid = clause->inputcollid;
+ matched = true;
+ break;
+ }
+ }
+
+ /*
+ * Stop looping through the indexes, if we managed to match
+ * nonConstExpr to any index column.
+ */
+ if (matches[i].indexnum >= 0)
+ break;
+ indexnum++;
+ }
+ }
+
+ /*
+ * Fast-path check: if no clause is matching to the index column, we can
+ * just give up at this stage and return the clause list as-is.
+ */
+ if (!matched)
+ {
+ pfree(matches);
+ return orargs;
+ }
+
+ /* Sort clauses to make similar clauses go together */
+ qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp);
+
+ /*
+ * Group similar clauses into single sub-restrictinfo. Side effect: the
+ * resulting list of restrictions will be sorted by indexnum and colnum.
+ */
+ group_start = 0;
+ for (i = 1; i <= n; i++)
+ {
+ /* Check if it's a group boundary */
+ if (group_start >= 0 &&
+ (i == n ||
+ matches[i].indexnum != matches[group_start].indexnum ||
+ matches[i].colnum != matches[group_start].colnum ||
+ matches[i].opno != matches[group_start].opno ||
+ matches[i].inputcollid != matches[group_start].inputcollid ||
+ matches[i].indexnum == -1))
+ {
+ /*
+ * One clause in group: add it "as is" to the upper-level OR.
+ */
+ if (i - group_start == 1)
+ {
+ result = lappend(result,
+ list_nth(orargs,
+ matches[group_start].argindex));
+ }
+ else
+ {
+ /*
+ * Two or more clauses in a group: create a nested OR.
+ */
+ List *args = NIL;
+ List *rargs = NIL;
+ RestrictInfo *subrinfo;
+ int j;
+
+ Assert(i - group_start >= 2);
+
+ /* Construct the list of nested OR arguments */
+ for (j = group_start; j < i; j++)
+ {
+ Node *arg = list_nth(orargs, matches[j].argindex);
+
+ rargs = lappend(rargs, arg);
+ if (IsA(arg, RestrictInfo))
+ args = lappend(args, ((RestrictInfo *) arg)->clause);
+ else
+ args = lappend(args, arg);
+ }
+
+ /* Construct the nested OR and wrap it with RestrictInfo */
+ subrinfo = make_plain_restrictinfo(root,
+ make_orclause(args),
+ make_orclause(rargs),
+ rinfo->is_pushed_down,
+ rinfo->has_clone,
+ rinfo->is_clone,
+ rinfo->pseudoconstant,
+ rinfo->security_level,
+ rinfo->required_relids,
+ rinfo->incompatible_relids,
+ rinfo->outer_relids);
+ result = lappend(result, subrinfo);
+ }
+
+ group_start = i;
+ }
+ }
+ pfree(matches);
+ return result;
+}
+
+/*
+ * make_bitmap_paths_for_or_group
+ * Generate bitmap paths for a group of similar OR-clause arguments
+ * produced by group_similar_or_args().
+ *
+ * This function considers two cases: (1) matching a group of clauses to
+ * the index as a whole, and (2) matching the individual clauses one-by-one.
+ * (1) typically comprises an optimal solution. If not, (2) typically
+ * comprises fair alternative.
+ *
+ * Ideally, we could consider all arbitrary splits of arguments into
+ * subgroups, but that could lead to unacceptable computational complexity.
+ * This is why we only consider two cases of above.
+ */
+static List *
+make_bitmap_paths_for_or_group(PlannerInfo *root, RelOptInfo *rel,
+ RestrictInfo *ri, List *other_clauses)
+{
+ List *jointlist = NIL;
+ List *splitlist = NIL;
+ ListCell *lc;
+ List *orargs;
+ List *args = ((BoolExpr *) ri->orclause)->args;
+ Cost jointcost = 0.0,
+ splitcost = 0.0;
+ Path *bitmapqual;
+ List *indlist;
+
+ /*
+ * First, try to match the whole group to the one index.
+ */
+ orargs = list_make1(ri);
+ indlist = build_paths_for_OR(root, rel,
+ orargs,
+ other_clauses);
+ if (indlist != NIL)
+ {
+ bitmapqual = choose_bitmap_and(root, rel, indlist);
+ jointcost = bitmapqual->total_cost;
+ jointlist = list_make1(bitmapqual);
+ }
+
+ /*
+ * If we manage to find a bitmap scan, which uses the group of OR-clause
+ * arguments as a whole, we can skip matching OR-clause arguments
+ * one-by-one as long as there are no other clauses, which can bring more
+ * efficiency to one-by-one case.
+ */
+ if (jointlist != NIL && other_clauses == NIL)
+ return jointlist;
+
+ /*
+ * Also try to match all containing clauses one-by-one.
+ */
+ foreach(lc, args)
+ {
+ orargs = list_make1(lfirst(lc));
+
+ indlist = build_paths_for_OR(root, rel,
+ orargs,
+ other_clauses);
+
+ if (indlist == NIL)
+ {
+ splitlist = NIL;
+ break;
+ }
+
+ bitmapqual = choose_bitmap_and(root, rel, indlist);
+ splitcost += bitmapqual->total_cost;
+ splitlist = lappend(splitlist, bitmapqual);
+ }
+
+ /*
+ * Pick the best option.
+ */
+ if (splitlist == NIL)
+ return jointlist;
+ else if (jointlist == NIL)
+ return splitlist;
+ else
+ return (jointcost < splitcost) ? jointlist : splitlist;
+}
+
+
/*
* generate_bitmap_or_paths
* Look through the list of clauses to find OR clauses, and generate
@@ -1196,6 +1572,8 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
List *pathlist;
Path *bitmapqual;
ListCell *j;
+ List *groupedArgs;
+ List *inner_other_clauses = NIL;
/* Ignore RestrictInfos that aren't ORs */
if (!restriction_is_or_clause(rinfo))
@@ -1206,7 +1584,28 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
* the OR, else we can't use it.
*/
pathlist = NIL;
- foreach(j, ((BoolExpr *) rinfo->orclause)->args)
+
+ /*
+ * Group the similar OR-clause argument into dedicated RestrictInfos,
+ * because those RestrictInfos might match to the index as a whole.
+ */
+ groupedArgs = group_similar_or_args(root, rel, rinfo);
+
+ if (groupedArgs != ((BoolExpr *) rinfo->orclause)->args)
+ {
+ /*
+ * Some parts of the rinfo were grouped. In this case, we have a
+ * set of sub-rinfos that together are an exact duplicate of
+ * rinfo. Thus, we need to remove the rinfo from other clauses.
+ * match_clauses_to_index detects duplicated iclauses by comparing
+ * pointers to original rinfos that would be different. So, we
+ * must delete rinfo to avoid de-facto duplicated clauses in the
+ * index clauses list.
+ */
+ inner_other_clauses = list_delete(list_copy(all_clauses), rinfo);
+ }
+
+ foreach(j, groupedArgs)
{
Node *orarg = (Node *) lfirst(j);
List *indlist;
@@ -1226,12 +1625,34 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
andargs,
all_clauses));
}
+ else if (restriction_is_or_clause(castNode(RestrictInfo, orarg)))
+ {
+ RestrictInfo *ri = castNode(RestrictInfo, orarg);
+
+ /*
+ * Generate bitmap paths for the group of similar OR-clause
+ * arguments.
+ */
+ indlist = make_bitmap_paths_for_or_group(root,
+ rel, ri,
+ inner_other_clauses);
+
+ if (indlist == NIL)
+ {
+ pathlist = NIL;
+ break;
+ }
+ else
+ {
+ pathlist = list_concat(pathlist, indlist);
+ continue;
+ }
+ }
else
{
RestrictInfo *ri = castNode(RestrictInfo, orarg);
List *orargs;
- Assert(!restriction_is_or_clause(ri));
orargs = list_make1(ri);
indlist = build_paths_for_OR(root, rel,
@@ -1257,6 +1678,9 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
pathlist = lappend(pathlist, bitmapqual);
}
+ if (inner_other_clauses != NIL)
+ list_free(inner_other_clauses);
+
/*
* If we have a match for every arm, then turn them into a
* BitmapOrPath, and add to result list.
@@ -1964,7 +2388,7 @@ approximate_joinrel_size(PlannerInfo *root, Relids relids)
* Identify restriction clauses for the rel that match the index.
* Matching clauses are added to *clauseset.
*/
-static void
+void
match_restriction_clauses_to_index(PlannerInfo *root,
IndexOptInfo *index,
IndexClauseSet *clauseset)
@@ -2149,7 +2573,10 @@ match_clause_to_index(PlannerInfo *root,
* (3) must match the collation of the index, if collation is relevant.
*
* Our definition of "const" is exceedingly liberal: we allow anything that
- * doesn't involve a volatile function or a Var of the index's relation.
+ * doesn't involve a volatile function or a Var of the index's relation
+ * except for a boolean OR expression input: due to a trade-off between the
+ * expected execution speedup and planning complexity, we limit or->saop
+ * transformation by obvious cases when an index scan can get a profit.
* In particular, Vars belonging to other relations of the query are
* accepted here, since a clause of that form can be used in a
* parameterized indexscan. It's the responsibility of higher code levels
@@ -2179,6 +2606,10 @@ match_clause_to_index(PlannerInfo *root,
* It is also possible to match ScalarArrayOpExpr clauses to indexes, when
* the clause is of the form "indexkey op ANY (arrayconst)".
*
+ * It is also possible to match a list of OR clauses if it might be
+ * transformed into a single ScalarArrayOpExpr clause. On success,
+ * the returning index clause will contain a trasformed clause.
+ *
* For boolean indexes, it is also possible to match the clause directly
* to the indexkey; or perhaps the clause is (NOT indexkey).
*
@@ -2228,9 +2659,9 @@ match_clause_to_indexcol(PlannerInfo *root,
}
/*
- * Clause must be an opclause, funcclause, ScalarArrayOpExpr, or
- * RowCompareExpr. Or, if the index supports it, we can handle IS
- * NULL/NOT NULL clauses.
+ * Clause must be an opclause, funcclause, ScalarArrayOpExpr,
+ * RowCompareExpr, or OR-clause that could be converted to SAOP. Or, if
+ * the index supports it, we can handle IS NULL/NOT NULL clauses.
*/
if (IsA(clause, OpExpr))
{
@@ -2248,6 +2679,10 @@ match_clause_to_indexcol(PlannerInfo *root,
{
return match_rowcompare_to_indexcol(root, rinfo, indexcol, index);
}
+ else if (restriction_is_or_clause(rinfo))
+ {
+ return match_orclause_to_indexcol(root, rinfo, indexcol, index);
+ }
else if (index->amsearchnulls && IsA(clause, NullTest))
{
NullTest *nt = (NullTest *) clause;
@@ -2423,7 +2858,7 @@ match_opclause_to_indexcol(PlannerInfo *root,
/*
* Check for clauses of the form: (indexkey operator constant) or
- * (constant operator indexkey). See match_clause_to_indexcol's notes
+ * (constant operator indexkey). See match_clause_to_indexcol()'s notes
* about const-ness.
*
* Note that we don't ask the support function about clauses that don't
@@ -2771,6 +3206,269 @@ match_rowcompare_to_indexcol(PlannerInfo *root,
return NULL;
}
+/*
+ * match_orclause_to_indexcol()
+ * Handles the OR-expr case for match_clause_to_indexcol() in the case
+ * when it could be transformed to ScalarArrayOpExpr.
+ *
+ * Given a list of OR-clause args, attempts to transform this BoolExpr into
+ * a single SAOP expression. On success, returns an IndexClause, containing
+ * the transformed expression or NULL, if failed.
+ */
+static IndexClause *
+match_orclause_to_indexcol(PlannerInfo *root,
+ RestrictInfo *rinfo,
+ int indexcol,
+ IndexOptInfo *index)
+{
+ ListCell *lc;
+ BoolExpr *orclause = (BoolExpr *) rinfo->orclause;
+ Node *indexExpr = NULL;
+ List *consts = NIL;
+ Node *arrayNode = NULL;
+ ScalarArrayOpExpr *saopexpr = NULL;
+ Oid matchOpno = InvalidOid;
+ IndexClause *iclause;
+ Oid consttype = InvalidOid;
+ Oid arraytype = InvalidOid;
+ Oid inputcollid = InvalidOid;
+ bool firstTime = true;
+ bool have_param = false;
+
+ Assert(IsA(orclause, BoolExpr));
+ Assert(orclause->boolop == OR_EXPR);
+
+ /* Ignore index if it doesn't support SAOP clauses */
+ if(!index->amsearcharray)
+ return NULL;
+
+ /*
+ * Try to convert a list of OR-clauses to a single SAOP expression. Each
+ * OR entry must be in the form: (indexkey operator constant) or (constant
+ * operator indexkey). Operators of all the entries must match. Constant
+ * might be either Const or Param. To be effective, give up on the first
+ * non-matching entry. Exit is implemented as a break from the loop, which
+ * is catched afterwards.
+ */
+ foreach(lc, orclause->args)
+ {
+ RestrictInfo *subRinfo;
+ OpExpr *subClause;
+ Oid opno;
+ Node *leftop,
+ *rightop;
+ Node *constExpr;
+
+ if (!IsA(lfirst(lc), RestrictInfo))
+ break;
+
+ subRinfo = (RestrictInfo *) lfirst(lc);
+
+ /* Only operator clauses can match */
+ if (!IsA(subRinfo->clause, OpExpr))
+ break;
+
+ subClause = (OpExpr *) subRinfo->clause;
+ opno = subClause->opno;
+
+ /* Only binary operators can match */
+ if (list_length(subClause->args) != 2)
+ break;
+
+ /*
+ * The parameters below must match between sub-rinfo and its parent as
+ * make_restrictinfo() fills them with the same values, and further
+ * modifications are also the same for the whole subtree. However,
+ * still make a sanity check.
+ */
+ Assert(subRinfo->is_pushed_down == rinfo->is_pushed_down);
+ Assert(subRinfo->is_clone == rinfo->is_clone);
+ Assert(subRinfo->security_level == rinfo->security_level);
+ Assert(bms_equal(subRinfo->incompatible_relids, rinfo->incompatible_relids));
+ Assert(bms_equal(subRinfo->outer_relids, rinfo->outer_relids));
+
+ /*
+ * Also, check that required_relids in sub-rinfo is subset of parent's
+ * required_relids.
+ */
+ Assert(bms_is_subset(subRinfo->required_relids, rinfo->required_relids));
+
+ /* Only operator returning boolean suits the transformation */
+ if (get_op_rettype(opno) != BOOLOID)
+ break;
+
+ /*
+ * Check for clauses of the form: (indexkey operator constant) or
+ * (constant operator indexkey). Determine indexkey side first, check
+ * the constant later.
+ */
+ leftop = (Node *) linitial(subClause->args);
+ rightop = (Node *) lsecond(subClause->args);
+ if (match_index_to_operand(leftop, indexcol, index))
+ {
+ indexExpr = leftop;
+ constExpr = rightop;
+ }
+ else if (match_index_to_operand(rightop, indexcol, index))
+ {
+ opno = get_commutator(opno);
+ if (!OidIsValid(opno))
+ {
+ /* commutator doesn't exist, we can't reverse the order */
+ break;
+ }
+ indexExpr = rightop;
+ constExpr = leftop;
+ }
+ else
+ {
+ break;
+ }
+
+ /*
+ * Ignore any RelabelType node above the operands. This is needed to
+ * be able to apply indexscanning in binary-compatible-operator cases.
+ * Note: we can assume there is at most one RelabelType node;
+ * eval_const_expressions() will have simplified if more than one.
+ */
+ if (IsA(constExpr, RelabelType))
+ constExpr = (Node *) ((RelabelType *) constExpr)->arg;
+ if (IsA(indexExpr, RelabelType))
+ indexExpr = (Node *) ((RelabelType *) indexExpr)->arg;
+
+ /* We allow constant to be Const or Param */
+ if (!IsA(constExpr, Const) && !IsA(constExpr, Param))
+ break;
+
+ /* Forbid transformation for composite types, records. */
+ if (type_is_rowtype(exprType(constExpr)) ||
+ type_is_rowtype(exprType(indexExpr)))
+ break;
+
+ /*
+ * Save information about the operator, type, and collation for the
+ * first matching qual. Then, check that subsequent quals match the
+ * first.
+ */
+ if (firstTime)
+ {
+ matchOpno = opno;
+ consttype = exprType(constExpr);
+ arraytype = get_array_type(consttype);
+ inputcollid = subClause->inputcollid;
+
+ /*
+ * Check that the operator is presented in the opfamily and that
+ * the expression collation matches the index collation. Also,
+ * there must be an array type to construct an array later.
+ */
+ if (!IndexCollMatchesExprColl(index->indexcollations[indexcol], inputcollid) ||
+ !op_in_opfamily(matchOpno, index->opfamily[indexcol]) ||
+ !OidIsValid(arraytype))
+ break;
+ firstTime = false;
+ }
+ else
+ {
+ if (opno != matchOpno ||
+ inputcollid != subClause->inputcollid ||
+ consttype != exprType(constExpr))
+ break;
+ }
+
+ if (IsA(constExpr, Param))
+ have_param = true;
+ consts = lappend(consts, constExpr);
+ }
+
+ /*
+ * Catch the break from the loop above. Normally, a foreach() loop ends
+ * up with a NULL list cell. A non-NULL list cell indicates a break from
+ * the foreach() loop. Free the consts list and return NULL then.
+ */
+ if (lc != NULL)
+ {
+ list_free(consts);
+ return NULL;
+ }
+
+ /*
+ * Assemble an array from the list of constants. It seems more profitable
+ * to build a const array. But in the presence of parameters, we don't
+ * have a specific value here and must employ an ArrayExpr instead.
+ */
+
+ if (have_param)
+ {
+ ArrayExpr *arrayExpr = makeNode(ArrayExpr);
+
+ /* array_collid will be set by parse_collate.c */
+ arrayExpr->element_typeid = consttype;
+ arrayExpr->array_typeid = arraytype;
+ arrayExpr->multidims = false;
+ arrayExpr->elements = consts;
+ arrayExpr->location = -1;
+
+ arrayNode = (Node *) arrayExpr;
+ }
+ else
+ {
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+ Datum *elems;
+ int i = 0;
+ ArrayType *arrayConst;
+
+ get_typlenbyvalalign(consttype, &typlen, &typbyval, &typalign);
+
+ elems = (Datum *) palloc(sizeof(Datum) * list_length(consts));
+ foreach_node(Const, value, consts)
+ {
+ Assert(!value->constisnull);
+
+ elems[i++] = value->constvalue;
+ }
+
+ arrayConst = construct_array(elems, i, consttype,
+ typlen, typbyval, typalign);
+ arrayNode = (Node *) makeConst(arraytype, -1, inputcollid,
+ -1, PointerGetDatum(arrayConst),
+ false, false);
+
+ pfree(elems);
+ list_free(consts);
+ }
+
+ /* Build the SAOP expression node */
+ saopexpr = makeNode(ScalarArrayOpExpr);
+ saopexpr->opno = matchOpno;
+ saopexpr->opfuncid = get_opcode(matchOpno);
+ saopexpr->hashfuncid = InvalidOid;
+ saopexpr->negfuncid = InvalidOid;
+ saopexpr->useOr = true;
+ saopexpr->inputcollid = inputcollid;
+ saopexpr->args = list_make2(indexExpr, arrayNode);
+ saopexpr->location = -1;
+
+ /*
+ * Finally, build an IndexClause based on the SAOP node. Use
+ * make_simple_restrictinfo() to get RestrictInfo with clean selectivity
+ * estimations because it may differ from the estimation made for an OR
+ * clause. Although it is not a lossy expression, keep the old version of
+ * rinfo in iclause->rinfo to detect duplicates and recheck the original
+ * clause.
+ */
+ iclause = makeNode(IndexClause);
+ iclause->rinfo = rinfo;
+ iclause->indexquals = list_make1(make_simple_restrictinfo(root,
+ &saopexpr->xpr));
+ iclause->lossy = false;
+ iclause->indexcol = indexcol;
+ iclause->indexcols = NIL;
+ return iclause;
+}
+
/*
* expand_indexqual_rowcompare --- expand a single indexqual condition
* that is a RowCompareExpr
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index c0af10ebd34..4bb56f50c16 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -165,16 +165,12 @@ static MergeJoin *create_mergejoin_plan(PlannerInfo *root, MergePath *best_path)
static HashJoin *create_hashjoin_plan(PlannerInfo *root, HashPath *best_path);
static Node *replace_nestloop_params(PlannerInfo *root, Node *expr);
static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root);
-static void fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
- List **stripped_indexquals_p,
- List **fixed_indexquals_p);
static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path);
static Node *fix_indexqual_clause(PlannerInfo *root,
IndexOptInfo *index, int indexcol,
Node *clause, List *indexcolnos);
static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol);
static List *get_switched_clauses(List *clauses, Relids outerrelids);
-static List *order_qual_clauses(PlannerInfo *root, List *clauses);
static void copy_generic_path_info(Plan *dest, Path *src);
static void copy_plan_costsize(Plan *dest, Plan *src);
static void label_sort_with_costsize(PlannerInfo *root, Sort *plan,
@@ -4939,6 +4935,14 @@ replace_nestloop_params(PlannerInfo *root, Node *expr)
return replace_nestloop_params_mutator(expr, root);
}
+Node *
+replace_nestloop_params_compat(PlannerInfo *root, Node *expr)
+{
+ /* No setup needed for tree walk, so away we go */
+ return replace_nestloop_params_mutator(expr, root);
+}
+
+
static Node *
replace_nestloop_params_mutator(Node *node, PlannerInfo *root)
{
@@ -5019,7 +5023,7 @@ replace_nestloop_params_mutator(Node *node, PlannerInfo *root)
* are subplans in it (we need two separate copies of the subplan tree, or
* things will go awry).
*/
-static void
+void
fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
List **stripped_indexquals_p, List **fixed_indexquals_p)
{
@@ -5312,7 +5316,7 @@ get_switched_clauses(List *clauses, Relids outerrelids)
* instead of bare clauses. This is another reason why trying to consider
* selectivity in the ordering would likely do the wrong thing.
*/
-static List *
+List *
order_qual_clauses(PlannerInfo *root, List *clauses)
{
typedef struct
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 0c7273b9ccd..2d5b3978ca0 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2314,6 +2314,7 @@ preprocess_rowmarks(PlannerInfo *root)
RowMarkClause *rc = lfirst_node(RowMarkClause, l);
RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
PlanRowMark *newrc;
+ RowRefType refType;
/*
* Currently, it is syntactically impossible to have FOR UPDATE et al
@@ -2336,8 +2337,8 @@ preprocess_rowmarks(PlannerInfo *root)
newrc = makeNode(PlanRowMark);
newrc->rti = newrc->prti = rc->rti;
newrc->rowmarkId = ++(root->glob->lastRowMarkId);
- newrc->markType = select_rowmark_type(rte, rc->strength);
- newrc->allMarkTypes = (1 << newrc->markType);
+ newrc->markType = select_rowmark_type(rte, rc->strength, &refType);
+ newrc->allRefTypes = (1 << refType);
newrc->strength = rc->strength;
newrc->waitPolicy = rc->waitPolicy;
newrc->isParent = false;
@@ -2353,6 +2354,7 @@ preprocess_rowmarks(PlannerInfo *root)
{
RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
PlanRowMark *newrc;
+ RowRefType refType = ROW_REF_TID;
i++;
if (!bms_is_member(i, rels))
@@ -2361,8 +2363,8 @@ preprocess_rowmarks(PlannerInfo *root)
newrc = makeNode(PlanRowMark);
newrc->rti = newrc->prti = i;
newrc->rowmarkId = ++(root->glob->lastRowMarkId);
- newrc->markType = select_rowmark_type(rte, LCS_NONE);
- newrc->allMarkTypes = (1 << newrc->markType);
+ newrc->markType = select_rowmark_type(rte, LCS_NONE, &refType);
+ newrc->allRefTypes = (1 << refType);
newrc->strength = LCS_NONE;
newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
newrc->isParent = false;
@@ -2377,11 +2379,13 @@ preprocess_rowmarks(PlannerInfo *root)
* Select RowMarkType to use for a given table
*/
RowMarkType
-select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
+select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength,
+ RowRefType *refType)
{
if (rte->rtekind != RTE_RELATION)
{
/* If it's not a table at all, use ROW_MARK_COPY */
+ *refType = ROW_REF_COPY;
return ROW_MARK_COPY;
}
else if (rte->relkind == RELKIND_FOREIGN_TABLE)
@@ -2392,10 +2396,12 @@ select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
if (fdwroutine->GetForeignRowMarkType != NULL)
return fdwroutine->GetForeignRowMarkType(rte, strength);
/* Otherwise, use ROW_MARK_COPY by default */
+ *refType = ROW_REF_COPY;
return ROW_MARK_COPY;
}
else
{
+ *refType = rte->reftype;
/* Regular table, apply the appropriate lock type */
switch (strength)
{
diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c
index 931b9c09bda..9c4671c817e 100644
--- a/src/backend/optimizer/prep/preptlist.c
+++ b/src/backend/optimizer/prep/preptlist.c
@@ -234,7 +234,7 @@ preprocess_targetlist(PlannerInfo *root)
if (rc->rti != rc->prti)
continue;
- if (rc->allMarkTypes & ~(1 << ROW_MARK_COPY))
+ if (rc->allRefTypes & (1 << ROW_REF_TID))
{
/* Need to fetch TID */
var = makeVar(rc->rti,
@@ -250,7 +250,23 @@ preprocess_targetlist(PlannerInfo *root)
true);
tlist = lappend(tlist, tle);
}
- if (rc->allMarkTypes & (1 << ROW_MARK_COPY))
+ if (rc->allRefTypes & (1 << ROW_REF_ROWID))
+ {
+ /* Need to fetch TID */
+ var = makeVar(rc->rti,
+ RowIdAttributeNumber,
+ BYTEAOID,
+ -1,
+ InvalidOid,
+ 0);
+ snprintf(resname, sizeof(resname), "rowid%u", rc->rowmarkId);
+ tle = makeTargetEntry((Expr *) var,
+ list_length(tlist) + 1,
+ pstrdup(resname),
+ true);
+ tlist = lappend(tlist, tle);
+ }
+ if (rc->allRefTypes & (1 << ROW_REF_COPY))
{
/* Need the whole row as a junk var */
var = makeWholeRowVar(rt_fetch(rc->rti, range_table),
diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c
index 6ba4eba224a..ea012b2c164 100644
--- a/src/backend/optimizer/util/appendinfo.c
+++ b/src/backend/optimizer/util/appendinfo.c
@@ -895,17 +895,35 @@ add_row_identity_columns(PlannerInfo *root, Index rtindex,
relkind == RELKIND_MATVIEW ||
relkind == RELKIND_PARTITIONED_TABLE)
{
+ RowRefType refType = ROW_REF_TID;
+
+ refType = table_get_row_ref_type(target_relation);
+
/*
* Emit CTID so that executor can find the row to merge, update or
* delete.
*/
- var = makeVar(rtindex,
- SelfItemPointerAttributeNumber,
- TIDOID,
- -1,
- InvalidOid,
- 0);
- add_row_identity_var(root, var, rtindex, "ctid");
+ if (refType == ROW_REF_TID)
+ {
+ var = makeVar(rtindex,
+ SelfItemPointerAttributeNumber,
+ TIDOID,
+ -1,
+ InvalidOid,
+ 0);
+ add_row_identity_var(root, var, rtindex, "ctid");
+ }
+ else
+ {
+ Assert(refType == ROW_REF_ROWID);
+ var = makeVar(rtindex,
+ RowIdAttributeNumber,
+ BYTEAOID,
+ -1,
+ InvalidOid,
+ 0);
+ add_row_identity_var(root, var, rtindex, "rowid");
+ }
}
else if (relkind == RELKIND_FOREIGN_TABLE)
{
diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c
index c5b906a9d43..17c36c03202 100644
--- a/src/backend/optimizer/util/inherit.c
+++ b/src/backend/optimizer/util/inherit.c
@@ -16,6 +16,7 @@
#include "access/sysattr.h"
#include "access/table.h"
+#include "access/tableam.h"
#include "catalog/partition.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_type.h"
@@ -91,7 +92,7 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
LOCKMODE lockmode;
PlanRowMark *oldrc;
bool old_isParent = false;
- int old_allMarkTypes = 0;
+ int old_allRefTypes = 0;
Assert(rte->inh); /* else caller error */
@@ -131,8 +132,8 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
{
old_isParent = oldrc->isParent;
oldrc->isParent = true;
- /* Save initial value of allMarkTypes before children add to it */
- old_allMarkTypes = oldrc->allMarkTypes;
+ /* Save initial value of allRefTypes before children add to it */
+ old_allRefTypes = oldrc->allRefTypes;
}
/* Scan the inheritance set and expand it */
@@ -239,15 +240,15 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
*/
if (oldrc)
{
- int new_allMarkTypes = oldrc->allMarkTypes;
+ int new_allRefTypes = oldrc->allRefTypes;
Var *var;
TargetEntry *tle;
char resname[32];
List *newvars = NIL;
/* Add TID junk Var if needed, unless we had it already */
- if (new_allMarkTypes & ~(1 << ROW_MARK_COPY) &&
- !(old_allMarkTypes & ~(1 << ROW_MARK_COPY)))
+ if (new_allRefTypes & (1 << ROW_REF_TID) &&
+ !(old_allRefTypes & (1 << ROW_REF_TID)))
{
/* Need to fetch TID */
var = makeVar(oldrc->rti,
@@ -266,8 +267,8 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
}
/* Add whole-row junk Var if needed, unless we had it already */
- if ((new_allMarkTypes & (1 << ROW_MARK_COPY)) &&
- !(old_allMarkTypes & (1 << ROW_MARK_COPY)))
+ if ((new_allRefTypes & (1 << ROW_REF_COPY)) &&
+ !(old_allRefTypes & (1 << ROW_REF_COPY)))
{
var = makeWholeRowVar(planner_rt_fetch(oldrc->rti, root),
oldrc->rti,
@@ -282,6 +283,24 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
newvars = lappend(newvars, var);
}
+ if ((new_allRefTypes & (1 << ROW_REF_ROWID)) &&
+ !(old_allRefTypes & (1 << ROW_REF_ROWID)))
+ {
+ var = makeVar(oldrc->rti,
+ RowIdAttributeNumber,
+ BYTEAOID,
+ -1,
+ InvalidOid,
+ 0);
+ snprintf(resname, sizeof(resname), "rowid%u", oldrc->rowmarkId);
+ tle = makeTargetEntry((Expr *) var,
+ list_length(root->processed_tlist) + 1,
+ pstrdup(resname),
+ true);
+ root->processed_tlist = lappend(root->processed_tlist, tle);
+ newvars = lappend(newvars, var);
+ }
+
/* Add tableoid junk Var, unless we had it already */
if (!old_isParent)
{
@@ -450,7 +469,7 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo,
* where the hierarchy is flattened during RTE expansion.)
*
* PlanRowMarks still carry the top-parent's RTI, and the top-parent's
- * allMarkTypes field still accumulates values from all descendents.
+ * allRefTypes field still accumulates values from all descendents.
*
* "parentrte" and "parentRTindex" are immediate parent's RTE and
* RTI. "top_parentrc" is top parent's PlanRowMark.
@@ -494,6 +513,7 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
Assert(parentrte->rtekind == RTE_RELATION); /* else this is dubious */
childrte->relid = childOID;
childrte->relkind = childrel->rd_rel->relkind;
+ childrte->reftype = table_get_row_ref_type(childrel);
/* A partitioned child will need to be expanded further. */
if (childrte->relkind == RELKIND_PARTITIONED_TABLE)
{
@@ -583,14 +603,16 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
if (top_parentrc)
{
PlanRowMark *childrc = makeNode(PlanRowMark);
+ RowRefType refType;
childrc->rti = childRTindex;
childrc->prti = top_parentrc->rti;
childrc->rowmarkId = top_parentrc->rowmarkId;
/* Reselect rowmark type, because relkind might not match parent */
childrc->markType = select_rowmark_type(childrte,
- top_parentrc->strength);
- childrc->allMarkTypes = (1 << childrc->markType);
+ top_parentrc->strength,
+ &refType);
+ childrc->allRefTypes = (1 << refType);
childrc->strength = top_parentrc->strength;
childrc->waitPolicy = top_parentrc->waitPolicy;
@@ -601,8 +623,8 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
*/
childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
- /* Include child's rowmark type in top parent's allMarkTypes */
- top_parentrc->allMarkTypes |= childrc->allMarkTypes;
+ /* Include child's rowmark type in top parent's allRefTypes */
+ top_parentrc->allRefTypes |= childrc->allRefTypes;
root->rowMarks = lappend(root->rowMarks, childrc);
}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 86655f05dc8..6a45058cbbd 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -58,6 +58,7 @@ int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION;
/* Hook for plugins to get control in get_relation_info() */
get_relation_info_hook_type get_relation_info_hook = NULL;
+skip_tree_height_hook_type skip_tree_height_hook = NULL;
static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
@@ -485,7 +486,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
info->tuples = rel->tuples;
}
- if (info->relam == BTREE_AM_OID)
+ if (info->relam == BTREE_AM_OID && (!skip_tree_height_hook || !skip_tree_height_hook(indexRelation)))
{
/*
* For btrees, get tree height while we have the index
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index 0b406e93342..9e1458401c2 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -21,17 +21,6 @@
#include "optimizer/restrictinfo.h"
-static RestrictInfo *make_restrictinfo_internal(PlannerInfo *root,
- Expr *clause,
- Expr *orclause,
- bool is_pushed_down,
- bool has_clone,
- bool is_clone,
- bool pseudoconstant,
- Index security_level,
- Relids required_relids,
- Relids incompatible_relids,
- Relids outer_relids);
static Expr *make_sub_restrictinfos(PlannerInfo *root,
Expr *clause,
bool is_pushed_down,
@@ -90,36 +79,38 @@ make_restrictinfo(PlannerInfo *root,
/* Shouldn't be an AND clause, else AND/OR flattening messed up */
Assert(!is_andclause(clause));
- return make_restrictinfo_internal(root,
- clause,
- NULL,
- is_pushed_down,
- has_clone,
- is_clone,
- pseudoconstant,
- security_level,
- required_relids,
- incompatible_relids,
- outer_relids);
+ return make_plain_restrictinfo(root,
+ clause,
+ NULL,
+ is_pushed_down,
+ has_clone,
+ is_clone,
+ pseudoconstant,
+ security_level,
+ required_relids,
+ incompatible_relids,
+ outer_relids);
}
/*
- * make_restrictinfo_internal
+ * make_plain_restrictinfo
*
- * Common code for the main entry points and the recursive cases.
+ * Common code for the main entry points and the recursive cases. Also,
+ * useful while contrucitng RestrictInfos above OR clause, which already has
+ * RestrictInfos above its subclauses.
*/
-static RestrictInfo *
-make_restrictinfo_internal(PlannerInfo *root,
- Expr *clause,
- Expr *orclause,
- bool is_pushed_down,
- bool has_clone,
- bool is_clone,
- bool pseudoconstant,
- Index security_level,
- Relids required_relids,
- Relids incompatible_relids,
- Relids outer_relids)
+RestrictInfo *
+make_plain_restrictinfo(PlannerInfo *root,
+ Expr *clause,
+ Expr *orclause,
+ bool is_pushed_down,
+ bool has_clone,
+ bool is_clone,
+ bool pseudoconstant,
+ Index security_level,
+ Relids required_relids,
+ Relids incompatible_relids,
+ Relids outer_relids)
{
RestrictInfo *restrictinfo = makeNode(RestrictInfo);
Relids baserels;
@@ -296,17 +287,17 @@ make_sub_restrictinfos(PlannerInfo *root,
NULL,
incompatible_relids,
outer_relids));
- return (Expr *) make_restrictinfo_internal(root,
- clause,
- make_orclause(orlist),
- is_pushed_down,
- has_clone,
- is_clone,
- pseudoconstant,
- security_level,
- required_relids,
- incompatible_relids,
- outer_relids);
+ return (Expr *) make_plain_restrictinfo(root,
+ clause,
+ make_orclause(orlist),
+ is_pushed_down,
+ has_clone,
+ is_clone,
+ pseudoconstant,
+ security_level,
+ required_relids,
+ incompatible_relids,
+ outer_relids);
}
else if (is_andclause(clause))
{
@@ -328,17 +319,17 @@ make_sub_restrictinfos(PlannerInfo *root,
return make_andclause(andlist);
}
else
- return (Expr *) make_restrictinfo_internal(root,
- clause,
- NULL,
- is_pushed_down,
- has_clone,
- is_clone,
- pseudoconstant,
- security_level,
- required_relids,
- incompatible_relids,
- outer_relids);
+ return (Expr *) make_plain_restrictinfo(root,
+ clause,
+ NULL,
+ is_pushed_down,
+ has_clone,
+ is_clone,
+ pseudoconstant,
+ security_level,
+ required_relids,
+ incompatible_relids,
+ outer_relids);
}
/*
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index bca627c5463..b437e0f7dff 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -373,6 +373,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type OptSchemaEltList parameter_name_list
%type am_type
+%type opt_for_tableam
%type TriggerForSpec TriggerForType
%type TriggerActionTime
@@ -5870,17 +5871,21 @@ row_security_cmd:
/*****************************************************************************
*
* QUERY:
- * CREATE ACCESS METHOD name HANDLER handler_name
+ * CREATE ACCESS METHOD name TYPE am_type
+ * [FOR tableam_name]
+ * HANDLER handler_name
*
*****************************************************************************/
-CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type HANDLER handler_name
+CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type
+ opt_for_tableam HANDLER handler_name
{
CreateAmStmt *n = makeNode(CreateAmStmt);
n->amname = $4;
- n->handler_name = $8;
n->amtype = $6;
+ n->tableam_name = $7;
+ n->handler_name = $9;
$$ = (Node *) n;
}
;
@@ -5890,6 +5895,11 @@ am_type:
| TABLE { $$ = AMTYPE_TABLE; }
;
+opt_for_tableam:
+ FOR name { $$ = $2; }
+ | /*EMPTY*/ { $$ = NULL; }
+ ;
+
/*****************************************************************************
*
* QUERIES :
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 2f64eaf0e37..37d9b072b38 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -20,6 +20,7 @@
#include "access/relation.h"
#include "access/sysattr.h"
#include "access/table.h"
+#include "access/tableam.h"
#include "catalog/heap.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
@@ -1503,6 +1504,7 @@ addRangeTableEntry(ParseState *pstate,
rte->inh = inh;
rte->relkind = rel->rd_rel->relkind;
rte->rellockmode = lockmode;
+ rte->reftype = table_get_row_ref_type(rel);
/*
* Build the list of effective column names using user-supplied aliases
@@ -1588,6 +1590,7 @@ addRangeTableEntryForRelation(ParseState *pstate,
rte->inh = inh;
rte->relkind = rel->rd_rel->relkind;
rte->rellockmode = lockmode;
+ rte->reftype = table_get_row_ref_type(rel);
/*
* Build the list of effective column names using user-supplied aliases
@@ -1656,6 +1659,7 @@ addRangeTableEntryForSubquery(ParseState *pstate,
rte->rtekind = RTE_SUBQUERY;
rte->subquery = subquery;
rte->alias = alias;
+ rte->reftype = ROW_REF_COPY;
eref = alias ? copyObject(alias) : makeAlias("unnamed_subquery", NIL);
numaliases = list_length(eref->colnames);
@@ -1763,6 +1767,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
rte->functions = NIL; /* we'll fill this list below */
rte->funcordinality = rangefunc->ordinality;
rte->alias = alias;
+ rte->reftype = ROW_REF_COPY;
/*
* Choose the RTE alias name. We default to using the first function's
@@ -2079,6 +2084,7 @@ addRangeTableEntryForTableFunc(ParseState *pstate,
rte->coltypmods = tf->coltypmods;
rte->colcollations = tf->colcollations;
rte->alias = alias;
+ rte->reftype = ROW_REF_COPY;
refname = alias ? alias->aliasname :
pstrdup(tf->functype == TFT_XMLTABLE ? "xmltable" : "json_table");
@@ -2156,6 +2162,7 @@ addRangeTableEntryForValues(ParseState *pstate,
rte->coltypmods = coltypmods;
rte->colcollations = colcollations;
rte->alias = alias;
+ rte->reftype = ROW_REF_COPY;
eref = alias ? copyObject(alias) : makeAlias(refname, NIL);
@@ -2252,6 +2259,7 @@ addRangeTableEntryForJoin(ParseState *pstate,
rte->joinrightcols = rightcols;
rte->join_using_alias = join_using_alias;
rte->alias = alias;
+ rte->reftype = ROW_REF_COPY;
eref = alias ? copyObject(alias) : makeAlias("unnamed_join", NIL);
numaliases = list_length(eref->colnames);
@@ -2332,6 +2340,7 @@ addRangeTableEntryForCTE(ParseState *pstate,
rte->rtekind = RTE_CTE;
rte->ctename = cte->ctename;
rte->ctelevelsup = levelsup;
+ rte->reftype = ROW_REF_COPY;
/* Self-reference if and only if CTE's parse analysis isn't completed */
rte->self_reference = !IsA(cte->ctequery, Query);
@@ -2494,6 +2503,7 @@ addRangeTableEntryForENR(ParseState *pstate,
* if they access transition tables linked to a table that is altered.
*/
rte->relid = enrmd->reliddesc;
+ rte->reftype = ROW_REF_COPY;
/*
* Build the list of effective column names using user-supplied aliases
@@ -3262,6 +3272,9 @@ get_rte_attribute_name(RangeTblEntry *rte, AttrNumber attnum)
attnum > 0 && attnum <= list_length(rte->alias->colnames))
return strVal(list_nth(rte->alias->colnames, attnum - 1));
+ if (attnum == RowIdAttributeNumber)
+ return "rowid";
+
/*
* If the RTE is a relation, go to the system catalogs not the
* eref->colnames list. This is a little slower but it will give the
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index eaf46ab6871..ad207acae60 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -2310,19 +2310,6 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt)
errdetail("Cannot create a non-deferrable constraint using a deferrable index."),
parser_errposition(cxt->pstate, constraint->location)));
- /*
- * Insist on it being a btree. That's the only kind that supports
- * uniqueness at the moment anyway; but we must have an index that
- * exactly matches what you'd get from plain ADD CONSTRAINT syntax,
- * else dump and reload will produce a different index (breaking
- * pg_upgrade in particular).
- */
- if (index_rel->rd_rel->relam != get_index_am_oid(DEFAULT_INDEX_TYPE, false))
- ereport(ERROR,
- (errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("index \"%s\" is not a btree", index_name),
- parser_errposition(cxt->pstate, constraint->location)));
-
/* Must get indclass the hard way */
indclassDatum = SysCacheGetAttrNotNull(INDEXRELID,
index_rel->rd_indextuple,
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 8f27026d193..0f85dc13407 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -458,6 +458,7 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len)
* transaction.
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
pgstat_report_wait_end();
UnlockBuffers();
/* this is probably dead code, but let's be safe: */
@@ -2680,7 +2681,9 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc)
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_MATVIEW ||
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_TOASTVALUE);
- relopts = extractRelOptions(tup, pg_class_desc, NULL);
+ relopts = extractRelOptions(tup, pg_class_desc,
+ GetTableAmRoutineByAmOid(((Form_pg_class) GETSTRUCT(tup))->relam),
+ NULL);
if (relopts == NULL)
return NULL;
diff --git a/src/backend/postmaster/auxprocess.c b/src/backend/postmaster/auxprocess.c
index 78f4263eeb1..4dae7ce9c3c 100644
--- a/src/backend/postmaster/auxprocess.c
+++ b/src/backend/postmaster/auxprocess.c
@@ -101,6 +101,7 @@ static void
ShutdownAuxiliaryProcess(int code, Datum arg)
{
LWLockReleaseAll();
+ CustomErrorCleanup();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
}
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 0f75548759a..74cc63cc89f 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -167,6 +167,7 @@ BackgroundWriterMain(char *startup_data, size_t startup_data_len)
* about in bgwriter, but we do have LWLocks, buffers, and temp files.
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
ConditionVariableCancelSleep();
UnlockBuffers();
ReleaseAuxProcessResources(false);
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 199f008bcda..b6767a39911 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -54,11 +54,20 @@
#include "storage/proc.h"
#include "storage/procsignal.h"
#include "storage/shmem.h"
+#include "storage/sinvaladt.h"
#include "storage/smgr.h"
#include "storage/spin.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/resowner.h"
+#include "utils/syscache.h"
+
+/*
+ * Included for InitializeTimeouts and RegisterTimeout functions that
+ * needed for correct working of OrioleDB checkpoint.
+ * See comment for InitializeTimeouts call in CheckpointerMain for details.
+ */
+#include "utils/timeout.h"
/*----------
@@ -204,6 +213,21 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
*/
pqsignal(SIGCHLD, SIG_DFL);
+ /*
+ * To use OrioleDB checkpoint, we must initialize the data for the primary
+ * lock mechanism (lock.h) to work correctly. Because locks of this type are
+ * needed by the OrioleDB module for debug events and relation locks, but
+ * they are not used by the postgres checkpointer and are not initialized
+ * for it.
+ */
+ InitializeTimeouts(); /* establishes SIGALRM handler */
+ InitDeadLockChecking();
+ RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLockAlert);
+ RelationCacheInitialize();
+ InitCatalogCache();
+ SharedInvalBackendInit(false);
+
+
/*
* Initialize so that first time-driven event happens at the correct time.
*/
@@ -266,6 +290,7 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
* files.
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
UnlockBuffers();
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 02f91431f5f..35af55cd678 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -760,6 +760,22 @@ pgarch_readyXlog(char *xlog)
for (int i = 0; i < arch_files->arch_files_size; i++)
arch_files->arch_files[i] = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap));
+ /*
+ * Preload the WAL files if the relevant callback is provided.
+ */
+ if (ArchiveCallbacks->archive_preload_file_cb)
+ {
+ for (int i = 0; i < arch_files->arch_files_size; i++)
+ {
+ char *xlog1 = arch_files->arch_files[i];
+ char pathname[MAXPGPATH];
+
+ snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog1);
+ ArchiveCallbacks->archive_preload_file_cb(archive_module_state,
+ xlog1, pathname);
+ }
+ }
+
/* Return the highest priority file. */
arch_files->arch_files_size--;
strcpy(xlog, arch_files->arch_files[arch_files->arch_files_size]);
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index bf0241aed0c..16fde3e8ec4 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -85,10 +85,6 @@
#include
#endif
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-#include
-#endif
-
#include "access/xlog.h"
#include "access/xlogrecovery.h"
#include "common/file_perm.h"
@@ -137,7 +133,8 @@
#define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
#define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
#define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
-#define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
+#define BACKEND_TYPE_SYSTEM_BGWORKER 0x0010 /* system bgworker process */
+#define BACKEND_TYPE_ALL 0x001F /* OR of all the above */
/*
* List of active backends (or child processes anyway; we don't actually
@@ -439,7 +436,7 @@ static void MaybeStartSlotSyncWorker(void);
* even during recovery.
*/
#define PgArchStartupAllowed() \
- (((XLogArchivingActive() && pmState == PM_RUN) || \
+ (((XLogArchivingActive() && (pmState == PM_RUN || pmState == PM_SHUTDOWN)) || \
(XLogArchivingAlways() && \
(pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
PgArchCanRestart())
@@ -482,6 +479,12 @@ int postmaster_alive_fds[2] = {-1, -1};
HANDLE PostmasterHandle;
#endif
+bool
+IsFatalError(void)
+{
+ return FatalError;
+}
+
/*
* Postmaster main entry point
*/
@@ -1323,24 +1326,6 @@ PostmasterMain(int argc, char *argv[])
*/
}
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-
- /*
- * On macOS, libintl replaces setlocale() with a version that calls
- * CFLocaleCopyCurrent() when its second argument is "" and every relevant
- * environment variable is unset or empty. CFLocaleCopyCurrent() makes
- * the process multithreaded. The postmaster calls sigprocmask() and
- * calls fork() without an immediate exec(), both of which have undefined
- * behavior in a multithreaded program. A multithreaded postmaster is the
- * normal case on Windows, which offers neither fork() nor sigprocmask().
- */
- if (pthread_is_threaded_np() != 0)
- ereport(FATAL,
- (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("postmaster became multithreaded during startup"),
- errhint("Set the LC_ALL environment variable to a valid locale.")));
-#endif
-
/*
* Remember postmaster startup time
*/
@@ -1749,15 +1734,6 @@ ServerLoop(void)
if (StartWorkerNeeded || HaveCrashedWorker)
maybe_start_bgworkers();
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-
- /*
- * With assertions enabled, check regularly for appearance of
- * additional threads. All builds check at start and exit.
- */
- Assert(pthread_is_threaded_np() == 0);
-#endif
-
/*
* Lastly, check to see if it's time to do some things that we don't
* want to do every single time through the loop, because they're a
@@ -1888,8 +1864,9 @@ processCancelRequest(int backendPID, int32 cancelAuthCode)
/*
* canAcceptConnections --- check to see if database state allows connections
* of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
- * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
- * know whether a NORMAL connection might turn into a walsender.)
+ * BACKEND_TYPE_AUTOVAC, BACKEND_TYPE_BGWORKER or BACKEND_TYPE_SYSTEM_BGWORKER.
+ * (Note that we don't yet know whether a NORMAL connection might turn into
+ * a walsender.)
*/
static CAC_state
canAcceptConnections(int backend_type)
@@ -1903,7 +1880,8 @@ canAcceptConnections(int backend_type)
* bgworker_should_start_now() decided whether the DB state allows them.
*/
if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
- backend_type != BACKEND_TYPE_BGWORKER)
+ backend_type != BACKEND_TYPE_BGWORKER &&
+ backend_type != BACKEND_TYPE_SYSTEM_BGWORKER)
{
if (Shutdown > NoShutdown)
return CAC_SHUTDOWN; /* shutdown is pending */
@@ -2534,6 +2512,13 @@ process_pm_child_exit(void)
if (PgArchPID != 0)
signal_child(PgArchPID, SIGUSR2);
+ /*
+ * Terminate system background workers since checpoint is
+ * complete.
+ */
+ SignalSomeChildren(SIGTERM,
+ BACKEND_TYPE_SYSTEM_BGWORKER);
+
/*
* Waken walsenders for the last time. No regular backends
* should be around anymore.
@@ -2965,7 +2950,8 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
* Background workers were already processed above; ignore them
* here.
*/
- if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
+ if (bp->bkend_type == BACKEND_TYPE_BGWORKER ||
+ bp->bkend_type == BACKEND_TYPE_SYSTEM_BGWORKER)
continue;
if (take_action)
@@ -3156,7 +3142,7 @@ PostmasterStateMachine(void)
/* Signal all backend children except walsenders */
SignalSomeChildren(SIGTERM,
- BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND);
+ BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND - BACKEND_TYPE_SYSTEM_BGWORKER);
/* and the autovac launcher too */
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGTERM);
@@ -3198,7 +3184,7 @@ PostmasterStateMachine(void)
* here. Walsenders and archiver are also disregarded, they will be
* terminated later after writing the checkpoint record.
*/
- if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 &&
+ if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND - BACKEND_TYPE_SYSTEM_BGWORKER) == 0 &&
StartupPID == 0 &&
WalReceiverPID == 0 &&
WalSummarizerPID == 0 &&
@@ -3667,21 +3653,6 @@ report_fork_failure_to_client(ClientSocket *client_sock, int errnum)
static void
ExitPostmaster(int status)
{
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-
- /*
- * There is no known cause for a postmaster to become multithreaded after
- * startup. Recheck to account for the possibility of unknown causes.
- * This message uses LOG level, because an unclean shutdown at this point
- * would usually not look much different from a clean shutdown.
- */
- if (pthread_is_threaded_np() != 0)
- ereport(LOG,
- (errcode(ERRCODE_INTERNAL_ERROR),
- errmsg_internal("postmaster became multithreaded"),
- errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
-#endif
-
/* should cleanup shared memory and kill all backends */
/*
@@ -4300,16 +4271,20 @@ do_start_bgworker(RegisteredBgWorker *rw)
* specified start_time?
*/
static bool
-bgworker_should_start_now(BgWorkerStartTime start_time)
+bgworker_should_start_now(BgWorkerStartTime start_time, int flags)
{
switch (pmState)
{
case PM_NO_CHILDREN:
case PM_WAIT_DEAD_END:
case PM_SHUTDOWN_2:
+ break;
+
case PM_SHUTDOWN:
case PM_WAIT_BACKENDS:
case PM_STOP_BACKENDS:
+ if (flags & BGWORKER_CLASS_SYSTEM)
+ return true;
break;
case PM_RUN:
@@ -4384,7 +4359,10 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
bn->cancel_key = MyCancelKey;
bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
- bn->bkend_type = BACKEND_TYPE_BGWORKER;
+ if (rw->rw_worker.bgw_flags & BGWORKER_CLASS_SYSTEM)
+ bn->bkend_type = BACKEND_TYPE_SYSTEM_BGWORKER;
+ else
+ bn->bkend_type = BACKEND_TYPE_BGWORKER;
bn->dead_end = false;
bn->bgworker_notify = false;
@@ -4482,7 +4460,8 @@ maybe_start_bgworkers(void)
}
}
- if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
+ if (bgworker_should_start_now(rw->rw_worker.bgw_start_time,
+ rw->rw_worker.bgw_flags))
{
/* reset crash time before trying to start worker */
rw->rw_crashed_at = 0;
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index ef6f98ebcd7..5cea0f97a30 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -75,6 +75,8 @@ static volatile sig_atomic_t startup_progress_timer_expired = false;
*/
int log_startup_progress_interval = 10000; /* 10 sec */
+HandleStartupProcInterrupts_hook_type HandleStartupProcInterrupts_hook = NULL;
+
/* Signal handlers */
static void StartupProcTriggerHandler(SIGNAL_ARGS);
static void StartupProcSigHupHandler(SIGNAL_ARGS);
@@ -157,6 +159,9 @@ HandleStartupProcInterrupts(void)
static uint32 postmaster_poll_count = 0;
#endif
+ if (HandleStartupProcInterrupts_hook)
+ HandleStartupProcInterrupts_hook();
+
/*
* Process any requests or signals received recently.
*/
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 6e7918a78d4..3cb439d377a 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -164,6 +164,7 @@ WalWriterMain(char *startup_data, size_t startup_data_len)
* about in walwriter, but we do have LWLocks, and perhaps buffers?
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
UnlockBuffers();
diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c
index 95c09c95167..db41c955ec1 100644
--- a/src/backend/replication/logical/proto.c
+++ b/src/backend/replication/logical/proto.c
@@ -814,7 +814,7 @@ logicalrep_write_tuple(StringInfo out, Relation rel, TupleTableSlot *slot,
continue;
}
- if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i]))
+ if (att->attlen == -1 && (VARATT_IS_EXTERNAL_ONDISK(values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(values[i])))
{
/*
* Unchanged toasted datum. (Note that we don't promise to detect
diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c
index f139e7b01e9..4429127c434 100644
--- a/src/backend/replication/logical/relation.c
+++ b/src/backend/replication/logical/relation.c
@@ -834,7 +834,7 @@ IsIndexUsableForReplicaIdentityFull(IndexInfo *indexInfo, AttrMap *attrmap)
IndexAmRoutine *amroutine;
/* The given index access method must implement amgettuple. */
- amroutine = GetIndexAmRoutineByAmId(indexInfo->ii_Am, false);
+ amroutine = GetIndexAmRoutineByAmId(InvalidOid, indexInfo->ii_Am, false);
Assert(amroutine->amgettuple != NULL);
}
#endif
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index ae676145e60..12ebfdb4702 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -219,6 +219,8 @@ struct SnapBuild
*/
TransactionId next_phase_at;
+ CSNSnapshotData csnSnapshotData;
+
/*
* Array of transactions which could have catalog changes that committed
* between xmin and xmax.
@@ -418,6 +420,17 @@ SnapBuildCurrentState(SnapBuild *builder)
return builder->state;
}
+/*
+ * An which transaction id the next phase of initial snapshot building will
+ * happen?
+ */
+TransactionId
+SnapBuildNextPhaseAt(SnapBuild *builder)
+{
+ return builder->next_phase_at;
+}
+
+
/*
* Return the LSN at which the two-phase decoding was first enabled.
*/
@@ -565,6 +578,8 @@ SnapBuildBuildSnapshot(SnapBuild *builder)
snapshot->regd_count = 0;
snapshot->snapXactCompletionCount = 0;
+ snapshot->csnSnapshotData = builder->csnSnapshotData;
+
return snapshot;
}
@@ -662,6 +677,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder)
snap->snapshot_type = SNAPSHOT_MVCC;
snap->xcnt = newxcnt;
snap->xip = newxip;
+ snap->csnSnapshotData = builder->csnSnapshotData;
return snap;
}
@@ -1042,6 +1058,8 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid,
TransactionId xmax = xid;
+ builder->csnSnapshotData.xlogptr = lsn;
+
/*
* Transactions preceding BUILDING_SNAPSHOT will neither be decoded, nor
* will they be part of a snapshot. So we don't need to record anything.
@@ -1229,6 +1247,10 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact
ReorderBufferTXN *txn;
TransactionId xmin;
+ builder->csnSnapshotData.snapshotcsn = running->csn;
+ builder->csnSnapshotData.xmin = 0;
+ builder->csnSnapshotData.xlogptr = lsn;
+
/*
* If we're not consistent yet, inspect the record to see whether it
* allows to get closer to being consistent. If we are consistent, dump
@@ -1256,6 +1278,9 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact
* we hit fast paths in heapam_visibility.c.
*/
builder->xmin = running->oldestRunningXid;
+ builder->csnSnapshotData.snapshotcsn = running->csn;
+ builder->csnSnapshotData.xmin = 0;
+ builder->csnSnapshotData.xlogptr = lsn;
/* Remove transactions we don't need to keep track off anymore */
SnapBuildPurgeOlderTxn(builder);
@@ -2174,3 +2199,10 @@ SnapBuildSnapshotExists(XLogRecPtr lsn)
return ret == 0;
}
+
+void
+SnapBuildUpdateCSNSnaphot(SnapBuild *builder,
+ CSNSnapshotData *csnSnapshotData)
+{
+ builder->csnSnapshotData = *csnSnapshotData;
+}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index d091a1dd27c..f5a83e52eef 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -2417,9 +2417,8 @@ apply_handle_insert(StringInfo s)
/* Initialize the executor state. */
edata = create_edata_for_relation(rel);
estate = edata->estate;
- remoteslot = ExecInitExtraTupleSlot(estate,
- RelationGetDescr(rel->localrel),
- &TTSOpsVirtual);
+ remoteslot = table_slot_create(rel->localrel,
+ &estate->es_tupleTable);
/* Process and store remote tuple in the slot */
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
@@ -2573,9 +2572,8 @@ apply_handle_update(StringInfo s)
/* Initialize the executor state. */
edata = create_edata_for_relation(rel);
estate = edata->estate;
- remoteslot = ExecInitExtraTupleSlot(estate,
- RelationGetDescr(rel->localrel),
- &TTSOpsVirtual);
+ remoteslot = table_slot_create(rel->localrel,
+ &estate->es_tupleTable);
/*
* Populate updatedCols so that per-column triggers can fire, and so
@@ -2753,9 +2751,8 @@ apply_handle_delete(StringInfo s)
/* Initialize the executor state. */
edata = create_edata_for_relation(rel);
estate = edata->estate;
- remoteslot = ExecInitExtraTupleSlot(estate,
- RelationGetDescr(rel->localrel),
- &TTSOpsVirtual);
+ remoteslot = table_slot_create(rel->localrel,
+ &estate->es_tupleTable);
/* Build the search tuple. */
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index 00e7024563e..e6a4f0063a1 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -1320,8 +1320,8 @@ pgoutput_row_filter(Relation relation, TupleTableSlot *old_slot,
* VARTAG_INDIRECT. See ReorderBufferToastReplace.
*/
if (att->attlen == -1 &&
- VARATT_IS_EXTERNAL_ONDISK(new_slot->tts_values[i]) &&
- !VARATT_IS_EXTERNAL_ONDISK(old_slot->tts_values[i]))
+ (VARATT_IS_EXTERNAL_ONDISK(new_slot->tts_values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(new_slot->tts_values[i])) &&
+ !(VARATT_IS_EXTERNAL_ONDISK(old_slot->tts_values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(old_slot->tts_values[i])) )
{
if (!tmp_new_slot)
{
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index c3181e3295e..71be0e15f61 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -327,6 +327,7 @@ void
WalSndErrorCleanup(void)
{
LWLockReleaseAll();
+ CustomErrorCleanup();
ConditionVariableCancelSleep();
pgstat_report_wait_end();
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 6d59a2bb8dc..e9696b52d9f 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -23,6 +23,7 @@
#include "access/relation.h"
#include "access/sysattr.h"
#include "access/table.h"
+#include "access/tableam.h"
#include "catalog/dependency.h"
#include "commands/trigger.h"
#include "executor/executor.h"
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 61816730955..e179056de9f 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3086,6 +3086,7 @@ BufferSync(int flags)
BufferDesc *bufHdr = NULL;
CkptTsStatus *ts_stat = (CkptTsStatus *)
DatumGetPointer(binaryheap_first(ts_heap));
+ double progress;
buf_id = CkptBufferIds[ts_stat->index].buf_id;
Assert(buf_id != -1);
@@ -3140,7 +3141,10 @@ BufferSync(int flags)
*
* (This will check for barrier events even if it doesn't sleep.)
*/
- CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
+ progress = (double) num_processed / num_to_scan;
+ progress = CheckPointProgress + progress * (1 - CheckPointProgress);
+
+ CheckpointWriteDelay(flags, progress);
}
/*
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 9fc930e98f8..e7b083ee7a7 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -306,6 +306,8 @@ static GlobalVisState GlobalVisTempRels;
*/
static TransactionId ComputeXidHorizonsResultLastXmin;
+snapshot_hook_type snapshot_hook = NULL;
+
#ifdef XIDCACHE_DEBUG
/* counters for XidCache measurement */
@@ -749,6 +751,7 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
proc->delayChkptFlags = 0;
proc->recoveryConflictPending = false;
+ proc->lastCommittedCSN = pg_atomic_fetch_add_u64(&TransamVariables->nextCommitSeqNo, 1);
/* must be cleared with xid/xmin: */
/* avoid unnecessarily dirtying shared cachelines */
@@ -2234,6 +2237,8 @@ GetSnapshotData(Snapshot snapshot)
if (GetSnapshotDataReuse(snapshot))
{
+ if (snapshot_hook)
+ snapshot_hook(snapshot);
LWLockRelease(ProcArrayLock);
return snapshot;
}
@@ -2415,6 +2420,9 @@ GetSnapshotData(Snapshot snapshot)
if (!TransactionIdIsValid(MyProc->xmin))
MyProc->xmin = TransactionXmin = xmin;
+ if (snapshot_hook)
+ snapshot_hook(snapshot);
+
LWLockRelease(ProcArrayLock);
/* maintain state for GlobalVis* */
@@ -2850,6 +2858,7 @@ GetRunningTransactionData(void)
CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
CurrentRunningXacts->oldestDatabaseRunningXid = oldestDatabaseRunningXid;
CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
+ CurrentRunningXacts->csn = pg_atomic_read_u64(&TransamVariables->nextCommitSeqNo);
Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 872679ca447..17ddeb893c6 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -1353,6 +1353,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
xlrec.nextXid = CurrRunningXacts->nextXid;
xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
+ xlrec.csn = CurrRunningXacts->csn;
/* Header */
XLogBeginInsert();
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 0400a507779..98421b6dda5 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -635,6 +635,27 @@ GetLockMethodLocalHash(void)
}
#endif
+/*
+ * Returns true if any LOCKMODE lock with given locktag exist in LocalMethodLocalHash.
+ */
+bool
+DoLocalLockExist(const LOCKTAG *locktag)
+{
+ HASH_SEQ_STATUS scan_status;
+ LOCALLOCK* locallock;
+
+ hash_seq_init(&scan_status, LockMethodLocalHash);
+ while ((locallock = (LOCALLOCK *) hash_seq_search(&scan_status)) != NULL)
+ {
+ if (memcmp(&locallock->tag.lock, locktag, sizeof(LOCKTAG)) == 0)
+ {
+ hash_seq_term(&scan_status);
+ return true;
+ }
+ }
+ return false;
+}
+
/*
* LockHasWaiters -- look up 'locktag' and check if releasing this
* lock would wake up other processes waiting for it.
@@ -784,7 +805,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
bool reportMemoryError,
LOCALLOCK **locallockp)
{
- LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid;
+ LOCKMETHODID lockmethodid;
LockMethod lockMethodTable;
LOCALLOCKTAG localtag;
LOCALLOCK *locallock;
@@ -796,6 +817,15 @@ LockAcquireExtended(const LOCKTAG *locktag,
LWLock *partitionLock;
bool found_conflict;
bool log_lock = false;
+ bool no_log_lock = false;
+
+ if (locktag->locktag_lockmethodid == NO_LOG_LOCKMETHOD)
+ {
+ ((LOCKTAG *)locktag)->locktag_lockmethodid = DEFAULT_LOCKMETHOD;
+ no_log_lock = true;
+ }
+
+ lockmethodid = locktag->locktag_lockmethodid;
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
@@ -910,7 +940,8 @@ LockAcquireExtended(const LOCKTAG *locktag,
if (lockmode >= AccessExclusiveLock &&
locktag->locktag_type == LOCKTAG_RELATION &&
!RecoveryInProgress() &&
- XLogStandbyInfoActive())
+ XLogStandbyInfoActive() &&
+ !no_log_lock)
{
LogAccessExclusiveLockPrepare();
log_lock = true;
@@ -1087,6 +1118,8 @@ LockAcquireExtended(const LOCKTAG *locktag,
*/
if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
{
+ int i;
+
AbortStrongLockAcquire();
if (dontWait)
@@ -1136,7 +1169,27 @@ LockAcquireExtended(const LOCKTAG *locktag,
PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
LWLockRelease(partitionLock);
- elog(ERROR, "LockAcquire failed");
+ /*
+ * We've been removed from the queue without obtaining a lock.
+ * That's OK, we're going to return LOCKACQUIRE_NOT_AVAIL, but
+ * need to release a local lock first.
+ */
+ locallock->nLocks--;
+ for (i = 0; i < locallock->numLockOwners; i++)
+ {
+ if (locallock->lockOwners[i].owner == owner)
+ {
+ locallock->lockOwners[i].nLocks--;
+ if (locallock->lockOwners[i].nLocks == 0)
+ {
+ ResourceOwnerForgetLock(owner, locallock);
+ locallock->lockOwners[i] = locallock->lockOwners[--locallock->numLockOwners];
+ }
+ break;
+ }
+ }
+
+ return LOCKACQUIRE_NOT_AVAIL;
}
}
PROCLOCK_PRINT("LockAcquire: granted", proclock);
@@ -4646,8 +4699,8 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait)
LWLockRelease(&proc->fpInfoLock);
/* Time to wait. */
- (void) LockAcquire(&tag, ShareLock, false, false);
-
+ if (LockAcquire(&tag, ShareLock, false, false) == LOCKACQUIRE_NOT_AVAIL)
+ return false;
LockRelease(&tag, ShareLock, false);
return XactLockForVirtualXact(vxid, xid, wait);
}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index ce29da90121..bbfafd2a73e 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -861,6 +861,7 @@ ProcKill(int code, Datum arg)
* facility by releasing our PGPROC ...
*/
LWLockReleaseAll();
+ CustomErrorCleanup();
/* Cancel any pending condition variable sleep, too */
ConditionVariableCancelSleep();
@@ -982,6 +983,7 @@ AuxiliaryProcKill(int code, Datum arg)
/* Release any LW locks I am holding (see notes above) */
LWLockReleaseAll();
+ CustomErrorCleanup();
/* Cancel any pending condition variable sleep, too */
ConditionVariableCancelSleep();
@@ -1251,7 +1253,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
* If InHotStandby we set lock waits slightly later for clarity with other
* code.
*/
- if (!InHotStandby)
+ if (!InHotStandby && !InRecovery)
{
if (LockTimeout > 0)
{
@@ -1611,7 +1613,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
* already caused QueryCancelPending to become set, we want the cancel to
* be reported as a lock timeout, not a user cancel.
*/
- if (!InHotStandby)
+ if (!InHotStandby && !InRecovery)
{
if (LockTimeout > 0)
{
diff --git a/src/backend/utils/adt/amutils.c b/src/backend/utils/adt/amutils.c
index dd39a994c8d..b7ebe6a5f76 100644
--- a/src/backend/utils/adt/amutils.c
+++ b/src/backend/utils/adt/amutils.c
@@ -195,7 +195,7 @@ indexam_property(FunctionCallInfo fcinfo,
/*
* Get AM information. If we don't have a valid AM OID, return NULL.
*/
- routine = GetIndexAmRoutineByAmId(amoid, true);
+ routine = GetIndexAmRoutineByAmId(index_oid, amoid, true);
if (routine == NULL)
PG_RETURN_NULL();
@@ -455,7 +455,7 @@ pg_indexam_progress_phasename(PG_FUNCTION_ARGS)
IndexAmRoutine *routine;
char *name;
- routine = GetIndexAmRoutineByAmId(amoid, true);
+ routine = GetIndexAmRoutineByAmId(InvalidOid, amoid, true);
if (routine == NULL || !routine->ambuildphasename)
PG_RETURN_NULL();
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index e790f856ab3..b26e51246c1 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -16,8 +16,11 @@
#include "funcapi.h"
#include "miscadmin.h"
#include "storage/predicate_internals.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
#include "utils/array.h"
#include "utils/builtins.h"
+#include "utils/wait_event.h"
/*
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 1c57f12695e..b3b396b6a78 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -133,6 +133,7 @@ typedef struct
static HTAB *collation_cache = NULL;
+pg_newlocale_from_collation_hook_type pg_newlocale_from_collation_hook = NULL;
#if defined(WIN32) && defined(LC_MESSAGES)
static char *IsoLocaleName(const char *);
@@ -1673,6 +1674,7 @@ pg_newlocale_from_collation(Oid collid)
{
char *actual_versionstr;
char *collversionstr;
+ int level = WARNING;
collversionstr = TextDatumGetCString(datum);
@@ -1695,8 +1697,11 @@ pg_newlocale_from_collation(Oid collid)
NameStr(collform->collname))));
}
+ if (pg_newlocale_from_collation_hook && pg_newlocale_from_collation_hook())
+ level = ERROR;
+
if (strcmp(actual_versionstr, collversionstr) != 0)
- ereport(WARNING,
+ ereport(level,
(errmsg("collation \"%s\" has version mismatch",
NameStr(collform->collname)),
errdetail("The collation in the database was created using version %s, "
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 62601a6d80c..9760febe7cc 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -244,6 +244,7 @@ RI_FKey_check(TriggerData *trigdata)
TupleTableSlot *newslot;
RI_QueryKey qkey;
SPIPlanPtr qplan;
+ Relation rel = trigdata->tg_relation;
riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger,
trigdata->tg_relation, false);
@@ -261,7 +262,7 @@ RI_FKey_check(TriggerData *trigdata)
* and lock on the buffer to call HeapTupleSatisfiesVisibility. Caller
* should be holding pin, but not lock.
*/
- if (!table_tuple_satisfies_snapshot(trigdata->tg_relation, newslot, SnapshotSelf))
+ if (!table_tuple_satisfies_snapshot(rel, newslot, SnapshotSelf))
return PointerGetDatum(NULL);
/*
@@ -1327,7 +1328,7 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
* this if we knew the INSERT trigger already fired, but there is no easy
* way to know that.)
*/
- if (slot_is_current_xact_tuple(oldslot))
+ if (table_tuple_is_current(fk_rel, oldslot))
return true;
/* If all old and new key values are equal, no check is needed */
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index cd9c3eddd1d..216db91f335 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -1323,7 +1323,7 @@ pg_get_indexdef_worker(Oid indexrelid, int colno,
amrec = (Form_pg_am) GETSTRUCT(ht_am);
/* Fetch the index AM's API struct */
- amroutine = GetIndexAmRoutine(amrec->amhandler);
+ amroutine = GetIndexAmRoutineExtended(indexrelid, amrec->amhandler);
/*
* Get the index expressions, if any. (NOTE: we do not use the relcache
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 5f5d7959d8e..884d12da88c 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6339,12 +6339,32 @@ get_actual_variable_endpoint(Relation heapRel,
index_scan->xs_want_itup = true;
index_rescan(index_scan, scankeys, 1, NULL, 0);
- /* Fetch first/next tuple in specified direction */
- while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
+ while (true)
{
- BlockNumber block = ItemPointerGetBlockNumber(tid);
+ BlockNumber block = InvalidBlockNumber;
- if (!VM_ALL_VISIBLE(heapRel,
+ /* Fetch first/next tuple in specified direction */
+ if (index_scan->xs_want_rowid)
+ {
+ NullableDatum rowid;
+ rowid = index_getnext_rowid(index_scan, indexscandir);
+
+ if (rowid.isnull)
+ break;
+ }
+ else
+ {
+ tid = index_getnext_tid(index_scan, indexscandir);
+
+ if (tid == NULL)
+ break;
+
+ Assert(ItemPointerEquals(tid, &index_scan->xs_heaptid));
+ block = ItemPointerGetBlockNumber(tid);
+ }
+
+ if (!index_scan->xs_want_rowid &&
+ !VM_ALL_VISIBLE(heapRel,
block,
&vmbuffer))
{
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2e2e9bbba0..66625735b21 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -5139,7 +5139,7 @@ pg_column_toast_chunk_id(PG_FUNCTION_ARGS)
attr = (struct varlena *) DatumGetPointer(PG_GETARG_DATUM(0));
- if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (!(VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr)))
PG_RETURN_NULL();
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
diff --git a/src/backend/utils/adt/waitfuncs.c b/src/backend/utils/adt/waitfuncs.c
index e135c9e5e45..c68b36121e3 100644
--- a/src/backend/utils/adt/waitfuncs.c
+++ b/src/backend/utils/adt/waitfuncs.c
@@ -38,6 +38,7 @@
Datum
pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
{
+ PGPROC *blocked_proc;
int blocked_pid = PG_GETARG_INT32(0);
ArrayType *interesting_pids_a = PG_GETARG_ARRAYTYPE_P(1);
PGPROC *proc;
@@ -109,5 +110,9 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
if (GetSafeSnapshotBlockingPids(blocked_pid, &dummy, 1) > 0)
PG_RETURN_BOOL(true);
+ blocked_proc = BackendPidGetProc(blocked_pid);
+ if ((blocked_proc->wait_event_info & 0xFF000000) == PG_WAIT_EXTENSION)
+ PG_RETURN_BOOL(true);
+
PG_RETURN_BOOL(false);
}
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 111d8a280a0..1b2ad91424c 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -38,6 +38,7 @@
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/resowner.h"
+#include "utils/resowner_private.h"
#include "utils/syscache.h"
@@ -64,6 +65,10 @@
/* Cache management header --- pointer is NULL until created */
static CatCacheHeader *CacheHdr = NULL;
+SearchCatCacheInternal_hook_type SearchCatCacheInternal_hook = NULL;
+SearchCatCacheList_hook_type SearchCatCacheList_hook = NULL;
+GetCatCacheHashValue_hook_type GetCatCacheHashValue_hook = NULL;
+
static inline HeapTuple SearchCatCacheInternal(CatCache *cache,
int nkeys,
Datum v1, Datum v2,
@@ -137,7 +142,7 @@ static const ResourceOwnerDesc catlistref_resowner_desc =
};
/* Convenience wrappers over ResourceOwnerRemember/Forget */
-static inline void
+void
ResourceOwnerRememberCatCacheRef(ResourceOwner owner, HeapTuple tuple)
{
ResourceOwnerRemember(owner, PointerGetDatum(tuple), &catcache_resowner_desc);
@@ -147,7 +152,7 @@ ResourceOwnerForgetCatCacheRef(ResourceOwner owner, HeapTuple tuple)
{
ResourceOwnerForget(owner, PointerGetDatum(tuple), &catcache_resowner_desc);
}
-static inline void
+void
ResourceOwnerRememberCatCacheListRef(ResourceOwner owner, CatCList *list)
{
ResourceOwnerRemember(owner, PointerGetDatum(list), &catlistref_resowner_desc);
@@ -1324,6 +1329,14 @@ SearchCatCacheInternal(CatCache *cache,
dlist_head *bucket;
CatCTup *ct;
+ if (SearchCatCacheInternal_hook)
+ {
+ ct = SearchCatCacheInternal_hook(cache, nkeys, v1, v2, v3, v4);
+
+ if (ct)
+ return &ct->tuple;
+ }
+
/* Make sure we're in an xact, even if this ends up being a cache hit */
Assert(IsTransactionState());
@@ -1616,6 +1629,11 @@ GetCatCacheHashValue(CatCache *cache,
Datum v3,
Datum v4)
{
+ if (GetCatCacheHashValue_hook)
+ {
+ return GetCatCacheHashValue_hook(cache, cache->cc_nkeys,
+ v1, v2, v3, v4);
+ }
/*
* one-time startup overhead for each cache
*/
@@ -1666,6 +1684,14 @@ SearchCatCacheList(CatCache *cache,
MemoryContext oldcxt;
int i;
+ if (SearchCatCacheList_hook)
+ {
+ cl = SearchCatCacheList_hook(cache, nkeys, v1, v2, v3);
+
+ if (cl)
+ return cl;
+ }
+
/*
* one-time startup overhead for each cache
*/
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 603aa4157be..4b779ccd951 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -251,6 +251,7 @@ int debug_discard_caches = 0;
#define MAX_SYSCACHE_CALLBACKS 64
#define MAX_RELCACHE_CALLBACKS 10
+#define MAX_USERCACHE_CALLBACKS 10
static struct SYSCACHECALLBACK
{
@@ -272,6 +273,14 @@ static struct RELCACHECALLBACK
static int relcache_callback_count = 0;
+static struct USERCACHECALLBACK
+{
+ UsercacheCallbackFunction function;
+ Datum arg;
+} usercache_callback_list[MAX_RELCACHE_CALLBACKS];
+
+static int usercache_callback_count = 0;
+
/* ----------------------------------------------------------------
* Invalidation subgroup support functions
* ----------------------------------------------------------------
@@ -692,6 +701,16 @@ InvalidateSystemCachesExtended(bool debug_discard)
ccitem->function(ccitem->arg, InvalidOid);
}
+
+ for (i = 0; i < usercache_callback_count; i++)
+ {
+ struct USERCACHECALLBACK *ccitem = usercache_callback_list + i;
+
+ ccitem->function(ccitem->arg,
+ InvalidOid,
+ InvalidOid,
+ InvalidOid);
+ }
}
/*
@@ -773,6 +792,19 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
else if (msg->sn.dbId == MyDatabaseId)
InvalidateCatalogSnapshot();
}
+ else if (msg->id == SHAREDINVALUSERCACHE_ID)
+ {
+ int i;
+ for (i = 0; i < usercache_callback_count; i++)
+ {
+ struct USERCACHECALLBACK *ccitem = usercache_callback_list + i;
+
+ ccitem->function(ccitem->arg,
+ msg->usr.arg1,
+ msg->usr.arg2,
+ msg->usr.arg3);
+ }
+ }
else
elog(FATAL, "unrecognized SI message ID: %d", msg->id);
}
@@ -1429,6 +1461,25 @@ CacheInvalidateRelcacheByRelid(Oid relid)
ReleaseSysCache(tup);
}
+/*
+ * CacheInvalidateRelcacheByDbidRelid
+ */
+void
+CacheInvalidateRelcacheByDbidRelid(Oid dbid, Oid relid)
+{
+ SharedInvalidationMessage msg;
+
+ PrepareInvalidationState();
+
+ msg.rc.id = SHAREDINVALRELCACHE_ID;
+ msg.rc.dbId = dbid;
+ msg.rc.relId = relid;
+ /* check AddCatcacheInvalidationMessage() for an explanation */
+ VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg));
+
+ SendSharedInvalidMessages(&msg, 1);
+}
+
/*
* CacheInvalidateSmgr
@@ -1567,6 +1618,22 @@ CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
++relcache_callback_count;
}
+/*
+ * CacheRegisterUsercacheCallback
+ */
+void
+CacheRegisterUsercacheCallback(UsercacheCallbackFunction func,
+ Datum arg)
+{
+ if (usercache_callback_count >= MAX_USERCACHE_CALLBACKS)
+ elog(FATAL, "out of usercache_callback_list slots");
+
+ usercache_callback_list[usercache_callback_count].function = func;
+ usercache_callback_list[usercache_callback_count].arg = arg;
+
+ ++usercache_callback_count;
+}
+
/*
* CallSyscacheCallbacks
*
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 930cc03ee20..33bd7bcda8f 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -33,6 +33,7 @@
#include "access/htup_details.h"
#include "access/multixact.h"
#include "access/parallel.h"
+#include "access/relation.h"
#include "access/reloptions.h"
#include "access/sysattr.h"
#include "access/table.h"
@@ -319,6 +320,7 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
StrategyNumber numSupport);
static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
static void unlink_initfile(const char *initfilename, int elevel);
+static void release_rd_amcache(Relation rel);
/*
@@ -463,8 +465,9 @@ AllocateRelationDesc(Form_pg_class relp)
static void
RelationParseRelOptions(Relation relation, HeapTuple tuple)
{
- bytea *options;
- amoptions_function amoptsfn;
+ bytea *options;
+ amoptions_function amoptsfn;
+ const TableAmRoutine *tableam = NULL;
relation->rd_options = NULL;
@@ -476,9 +479,10 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
{
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
- case RELKIND_VIEW:
case RELKIND_MATVIEW:
+ case RELKIND_VIEW:
case RELKIND_PARTITIONED_TABLE:
+ tableam = relation->rd_tableam;
amoptsfn = NULL;
break;
case RELKIND_INDEX:
@@ -490,11 +494,12 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
}
/*
- * Fetch reloptions from tuple; have to use a hardwired descriptor because
- * we might not have any other for pg_class yet (consider executing this
- * code for pg_class itself)
- */
- options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn);
+ * Fetch reloptions from tuple; have to use a hardwired descriptor because
+ * we might not have any other for pg_class yet (consider executing this
+ * code for pg_class itself)
+ */
+ options = extractRelOptions(tuple, GetPgClassDescriptor(),
+ tableam, amoptsfn);
/*
* Copy parsed data into CacheMemoryContext. To guard against the
@@ -1408,7 +1413,7 @@ InitIndexAmRoutine(Relation relation)
* Call the amhandler in current, short-lived memory context, just in case
* it leaks anything (it probably won't, but let's be paranoid).
*/
- tmp = GetIndexAmRoutine(relation->rd_amhandler);
+ tmp = GetIndexAmRoutineExtended(relation->rd_id, relation->rd_amhandler);
/* OK, now transfer the data into relation's rd_indexcxt. */
cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
@@ -2270,9 +2275,7 @@ RelationReloadIndexInfo(Relation relation)
RelationCloseSmgr(relation);
/* Must free any AM cached data upon relcache flush */
- if (relation->rd_amcache)
- pfree(relation->rd_amcache);
- relation->rd_amcache = NULL;
+ release_rd_amcache(relation);
/*
* If it's a shared index, we might be called before backend startup has
@@ -2492,8 +2495,7 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
pfree(relation->rd_options);
if (relation->rd_indextuple)
pfree(relation->rd_indextuple);
- if (relation->rd_amcache)
- pfree(relation->rd_amcache);
+ release_rd_amcache(relation);
if (relation->rd_fdwroutine)
pfree(relation->rd_fdwroutine);
if (relation->rd_indexcxt)
@@ -2580,9 +2582,7 @@ RelationClearRelation(Relation relation, bool rebuild)
RelationCloseSmgr(relation);
/* Free AM cached data, if any */
- if (relation->rd_amcache)
- pfree(relation->rd_amcache);
- relation->rd_amcache = NULL;
+ release_rd_amcache(relation);
/*
* Treat nailed-in system relations separately, they always need to be
@@ -6896,3 +6896,9 @@ ResOwnerReleaseRelation(Datum res)
RelationCloseCleanup((Relation) res);
}
+
+static void
+release_rd_amcache(Relation rel)
+{
+ table_free_rd_amcache(rel);
+}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 3e03dfc9910..802ec4b218f 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -94,6 +94,7 @@ static int SysCacheSupportingRelOidSize;
static int oid_compare(const void *a, const void *b);
+SysCacheGetAttr_hook_type SysCacheGetAttr_hook = NULL;
/*
* InitCatalogCache - initialize the caches
@@ -480,6 +481,7 @@ SysCacheGetAttr(int cacheId, HeapTuple tup,
AttrNumber attributeNumber,
bool *isNull)
{
+ TupleDesc cc_tupdesc = SysCache[cacheId]->cc_tupdesc;
/*
* We just need to get the TupleDesc out of the cache entry, and then we
* can apply heap_getattr(). Normally the cache control data is already
@@ -489,14 +491,18 @@ SysCacheGetAttr(int cacheId, HeapTuple tup,
if (cacheId < 0 || cacheId >= SysCacheSize ||
!PointerIsValid(SysCache[cacheId]))
elog(ERROR, "invalid cache ID: %d", cacheId);
- if (!PointerIsValid(SysCache[cacheId]->cc_tupdesc))
+
+ if (!PointerIsValid(cc_tupdesc) && SysCacheGetAttr_hook)
+ cc_tupdesc = SysCacheGetAttr_hook(SysCache[cacheId]);
+ if (!PointerIsValid(cc_tupdesc))
{
InitCatCachePhase2(SysCache[cacheId], false);
Assert(PointerIsValid(SysCache[cacheId]->cc_tupdesc));
+ cc_tupdesc = SysCache[cacheId]->cc_tupdesc;
}
return heap_getattr(tup, attributeNumber,
- SysCache[cacheId]->cc_tupdesc,
+ cc_tupdesc,
isNull);
}
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index aa4720cb598..b18e50df27d 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -292,6 +292,8 @@ static int32 NextRecordTypmod = 0; /* number of entries used */
* as identifiers, so we start the counter at INVALID_TUPLEDESC_IDENTIFIER.
*/
static uint64 tupledesc_id_counter = INVALID_TUPLEDESC_IDENTIFIER;
+load_typcache_tupdesc_hook_type load_typcache_tupdesc_hook = NULL;
+load_enum_cache_data_hook_type load_enum_cache_data_hook = NULL;
static void load_typcache_tupdesc(TypeCacheEntry *typentry);
static void load_rangetype_info(TypeCacheEntry *typentry);
@@ -881,6 +883,12 @@ load_typcache_tupdesc(TypeCacheEntry *typentry)
{
Relation rel;
+ if (load_typcache_tupdesc_hook)
+ {
+ load_typcache_tupdesc_hook(typentry);
+ return;
+ }
+
if (!OidIsValid(typentry->typrelid)) /* should not happen */
elog(ERROR, "invalid typrelid for composite type %u",
typentry->type_id);
@@ -2563,6 +2571,12 @@ load_enum_cache_data(TypeCacheEntry *tcache)
int bm_size,
start_pos;
+ if (load_enum_cache_data_hook)
+ {
+ load_enum_cache_data_hook(tcache);
+ return;
+ }
+
/* Check that this is actually an enum */
if (tcache->typtype != TYPTYPE_ENUM)
ereport(ERROR,
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index b924b524d0b..9524530282e 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -3770,3 +3770,12 @@ write_stderr(const char *fmt,...)
#endif
va_end(ap);
}
+
+CustomErrorCleanupHookType CustomErrorCleanupHook = NULL;
+
+void
+CustomErrorCleanup(void)
+{
+ if (CustomErrorCleanupHook)
+ CustomErrorCleanupHook();
+}
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index e48a86be54b..5b7888c705f 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -72,7 +72,7 @@ extern Datum fmgr_security_definer(PG_FUNCTION_ARGS);
* or name, but search by Oid is much faster.
*/
-static const FmgrBuiltin *
+const FmgrBuiltin *
fmgr_isbuiltin(Oid id)
{
uint16 index;
@@ -97,7 +97,7 @@ fmgr_isbuiltin(Oid id)
* the array with the same name, but they should all point to the same
* routine.
*/
-static const FmgrBuiltin *
+const FmgrBuiltin *
fmgr_lookupByName(const char *name)
{
int i;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 0805398e24d..e4f9e14a91e 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -81,7 +81,7 @@ static void ClientCheckTimeoutHandler(void);
static bool ThereIsAtLeastOneRole(void);
static void process_startup_options(Port *port, bool am_superuser);
static void process_settings(Oid databaseid, Oid roleid);
-
+base_init_startup_hook_type base_init_startup_hook = NULL;
/*** InitPostgres support ***/
@@ -657,6 +657,9 @@ BaseInit(void)
*/
InitFileAccess();
+ if (base_init_startup_hook)
+ base_init_startup_hook();
+
/*
* Initialize statistics reporting. This needs to happen early to ensure
* that pgstat's shutdown callback runs after the shutdown callbacks of
diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index 947a868e569..d3a41533552 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -1100,6 +1100,36 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
}
}
+/*
+ * Same as tuplestore_gettupleslot(), but foces tuple storage to slot. Thus,
+ * it can work with slot types different than minimal tuple.
+ */
+bool
+tuplestore_force_gettupleslot(Tuplestorestate *state, bool forward,
+ bool copy, TupleTableSlot *slot)
+{
+ MinimalTuple tuple;
+ bool should_free;
+
+ tuple = (MinimalTuple) tuplestore_gettuple(state, forward, &should_free);
+
+ if (tuple)
+ {
+ if (copy && !should_free)
+ {
+ tuple = heap_copy_minimal_tuple(tuple);
+ should_free = true;
+ }
+ ExecForceStoreMinimalTuple(tuple, slot, should_free);
+ return true;
+ }
+ else
+ {
+ ExecClearTuple(slot);
+ return false;
+ }
+}
+
/*
* tuplestore_advance - exported function to adjust position without fetching
*
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 7d2b34d4f20..df9f4394f07 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -101,6 +101,10 @@ TransactionId RecentXmin = FirstNormalTransactionId;
/* (table, ctid) => (cmin, cmax) mapping during timetravel */
static HTAB *tuplecid_data = NULL;
+snapshot_hook_type snapshot_register_hook = NULL;
+snapshot_hook_type snapshot_deregister_hook = NULL;
+reset_xmin_hook_type reset_xmin_hook = NULL;
+
/*
* Elements of the active snapshot stack.
*
@@ -201,6 +205,11 @@ typedef struct SerializedSnapshotData
CommandId curcid;
TimestampTz whenTaken;
XLogRecPtr lsn;
+ CSNSnapshotData csnSnapshotData;
+ uint64 undoRegularLocation;
+ uint64 undoRegularXmin;
+ uint64 undoSystemLocation;
+ uint64 undoSystemXmin;
} SerializedSnapshotData;
/*
@@ -263,6 +272,8 @@ GetTransactionSnapshot(void)
/* Mark it as "registered" in FirstXactSnapshot */
FirstXactSnapshot->regd_count++;
pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
+ if (snapshot_register_hook)
+ snapshot_register_hook(FirstXactSnapshot);
}
else
CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
@@ -403,6 +414,8 @@ GetNonHistoricCatalogSnapshot(Oid relid)
* CatalogSnapshot pointer is already valid.
*/
pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
+ if (snapshot_register_hook)
+ snapshot_register_hook(CatalogSnapshot);
}
return CatalogSnapshot;
@@ -424,6 +437,8 @@ InvalidateCatalogSnapshot(void)
if (CatalogSnapshot)
{
pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
+ if (snapshot_deregister_hook)
+ snapshot_deregister_hook(CatalogSnapshot);
CatalogSnapshot = NULL;
SnapshotResetXmin();
}
@@ -501,6 +516,7 @@ SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
CurrentSnapshot->xmin = sourcesnap->xmin;
CurrentSnapshot->xmax = sourcesnap->xmax;
CurrentSnapshot->xcnt = sourcesnap->xcnt;
+ CurrentSnapshot->csnSnapshotData = sourcesnap->csnSnapshotData;
Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
if (sourcesnap->xcnt > 0)
memcpy(CurrentSnapshot->xip, sourcesnap->xip,
@@ -558,6 +574,8 @@ SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
/* Mark it as "registered" in FirstXactSnapshot */
FirstXactSnapshot->regd_count++;
pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
+ if (snapshot_register_hook)
+ snapshot_register_hook(FirstXactSnapshot);
}
FirstSnapshotSet = true;
@@ -820,7 +838,11 @@ RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
ResourceOwnerRememberSnapshot(owner, snap);
if (snap->regd_count == 1)
+ {
pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
+ if (snapshot_register_hook)
+ snapshot_register_hook(snap);
+ }
return snap;
}
@@ -863,7 +885,11 @@ UnregisterSnapshotNoOwner(Snapshot snapshot)
snapshot->regd_count--;
if (snapshot->regd_count == 0)
+ {
pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
+ if (snapshot_deregister_hook)
+ snapshot_deregister_hook(snapshot);
+ }
if (snapshot->regd_count == 0 && snapshot->active_count == 0)
{
@@ -915,6 +941,9 @@ SnapshotResetXmin(void)
{
Snapshot minSnapshot;
+ if (reset_xmin_hook)
+ reset_xmin_hook();
+
if (ActiveSnapshot != NULL)
return;
@@ -1008,6 +1037,8 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin)
Assert(FirstXactSnapshot->regd_count > 0);
Assert(!pairingheap_is_empty(&RegisteredSnapshots));
pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
+ if (snapshot_deregister_hook)
+ snapshot_deregister_hook(FirstXactSnapshot);
}
FirstXactSnapshot = NULL;
@@ -1039,6 +1070,8 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin)
pairingheap_remove(&RegisteredSnapshots,
&esnap->snapshot->ph_node);
+ if (snapshot_deregister_hook)
+ snapshot_deregister_hook(esnap->snapshot);
}
exportedSnapshots = NIL;
@@ -1167,6 +1200,8 @@ ExportSnapshot(Snapshot snapshot)
snapshot->regd_count++;
pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
+ if (snapshot_register_hook)
+ snapshot_register_hook(snapshot);
/*
* Fill buf with a text serialization of the snapshot, plus identification
@@ -1729,6 +1764,13 @@ SerializeSnapshot(Snapshot snapshot, char *start_address)
serialized_snapshot.curcid = snapshot->curcid;
serialized_snapshot.whenTaken = snapshot->whenTaken;
serialized_snapshot.lsn = snapshot->lsn;
+ serialized_snapshot.csnSnapshotData.xmin = snapshot->csnSnapshotData.xmin;
+ serialized_snapshot.csnSnapshotData.snapshotcsn = snapshot->csnSnapshotData.snapshotcsn;
+ serialized_snapshot.csnSnapshotData.xlogptr = snapshot->csnSnapshotData.xlogptr;
+ serialized_snapshot.undoRegularXmin = snapshot->undoRegularLocationPhNode.xmin;
+ serialized_snapshot.undoRegularLocation = snapshot->undoRegularLocationPhNode.undoLocation;
+ serialized_snapshot.undoSystemXmin = snapshot->undoSystemLocationPhNode.xmin;
+ serialized_snapshot.undoSystemLocation = snapshot->undoSystemLocationPhNode.undoLocation;
/*
* Ignore the SubXID array if it has overflowed, unless the snapshot was
@@ -1804,6 +1846,13 @@ RestoreSnapshot(char *start_address)
snapshot->whenTaken = serialized_snapshot.whenTaken;
snapshot->lsn = serialized_snapshot.lsn;
snapshot->snapXactCompletionCount = 0;
+ snapshot->csnSnapshotData.xmin = serialized_snapshot.csnSnapshotData.xmin;
+ snapshot->csnSnapshotData.snapshotcsn = serialized_snapshot.csnSnapshotData.snapshotcsn;
+ snapshot->csnSnapshotData.xlogptr = serialized_snapshot.csnSnapshotData.xlogptr;
+ snapshot->undoRegularLocationPhNode.xmin = serialized_snapshot.undoRegularXmin;
+ snapshot->undoRegularLocationPhNode.undoLocation = serialized_snapshot.undoRegularLocation;
+ snapshot->undoSystemLocationPhNode.xmin = serialized_snapshot.undoSystemXmin;
+ snapshot->undoSystemLocationPhNode.undoLocation = serialized_snapshot.undoSystemLocation;
/* Copy XIDs, if present. */
if (serialized_snapshot.xcnt > 0)
diff --git a/src/bin/pg_rewind/Makefile b/src/bin/pg_rewind/Makefile
index 12b138b2f2c..4f93864cf7e 100644
--- a/src/bin/pg_rewind/Makefile
+++ b/src/bin/pg_rewind/Makefile
@@ -21,6 +21,7 @@ LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
OBJS = \
$(WIN32RES) \
datapagemap.o \
+ extension.o \
file_ops.o \
filemap.o \
libpq_source.o \
@@ -35,19 +36,21 @@ EXTRA_CLEAN = xlogreader.c
all: pg_rewind
pg_rewind: $(OBJS) | submake-libpq submake-libpgport
- $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+ $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LDFLAGS_EX_BE) $(LIBS) -o $@$(X)
xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
rm -f $@ && $(LN_S) $< .
install: all installdirs
$(INSTALL_PROGRAM) pg_rewind$(X) '$(DESTDIR)$(bindir)/pg_rewind$(X)'
+ $(INSTALL_DATA) $(srcdir)/pg_rewind_ext.h '$(DESTDIR)$(includedir)'
installdirs:
- $(MKDIR_P) '$(DESTDIR)$(bindir)'
+ $(MKDIR_P) '$(DESTDIR)$(bindir)' '$(DESTDIR)$(includedir)'
uninstall:
rm -f '$(DESTDIR)$(bindir)/pg_rewind$(X)'
+ rm -f '$(DESTDIR)$(includedir)/pg_rewind_ext.h'
clean distclean:
rm -f pg_rewind$(X) $(OBJS) xlogreader.c
diff --git a/src/bin/pg_rewind/extension.c b/src/bin/pg_rewind/extension.c
new file mode 100644
index 00000000000..29ec4b5a6f6
--- /dev/null
+++ b/src/bin/pg_rewind/extension.c
@@ -0,0 +1,132 @@
+/*-------------------------------------------------------------------------
+ *
+ * extension.c
+ * Functions for processing shared libraries loaded by pg_rewind.
+ *
+ * Copyright (c) 2013-2023, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#ifndef WIN32
+#include
+
+/*
+ * On macOS, insists on including . If we're not
+ * using stdbool, undef bool to undo the damage.
+ */
+#ifndef PG_USE_STDBOOL
+#ifdef bool
+#undef bool
+#endif
+#endif
+#endif /* !WIN32 */
+
+#include
+
+#include "access/xlog_internal.h"
+#include "pg_rewind.h"
+
+/* signature for pg_rewind extension library rewind function */
+typedef void (*PG_rewind_t) (const char *datadir_target, char *datadir_source,
+ char *connstr_source, XLogRecPtr startpoint,
+ int tliIndex, XLogRecPtr endpoint,
+ const char *restoreCommand, const char *argv0,
+ bool debug);
+
+static bool
+file_exists(const char *argv0, const char *name)
+{
+ struct stat st;
+
+ Assert(name != NULL);
+
+ if (stat(name, &st) == 0)
+ return !S_ISDIR(st.st_mode);
+ else if (!(errno == ENOENT || errno == ENOTDIR || errno == EACCES))
+ {
+ const char *progname;
+
+ progname = get_progname(argv0);
+ pg_log_error("could not access file \"%s\": %m", name);
+ pg_log_error_hint("Try \"%s --help\" for more information.", progname);
+ exit(1);
+ }
+
+ return false;
+}
+
+static char *
+expand_dynamic_library_name(const char *argv0, const char *name)
+{
+ char *full;
+ char my_exec_path[MAXPGPATH];
+ char pkglib_path[MAXPGPATH];
+
+ Assert(name);
+
+ if (find_my_exec(argv0, my_exec_path) < 0)
+ pg_fatal("%s: could not locate my own executable path", argv0);
+ get_pkglib_path(my_exec_path, pkglib_path);
+ full = palloc(strlen(pkglib_path) + 1 + strlen(name) + 1);
+ sprintf(full, "%s/%s", pkglib_path, name);
+ if (file_exists(argv0, full))
+ return full;
+ pfree(full);
+
+ full = palloc(strlen(pkglib_path) + 1 + strlen(name) + 1 +
+ strlen(DLSUFFIX) + 1);
+ sprintf(full, "%s/%s%s", pkglib_path, name, DLSUFFIX);
+ if (file_exists(argv0, full))
+ return full;
+ pfree(full);
+
+ return pstrdup(name);
+}
+
+void
+process_extensions(SimpleStringList *extensions, const char *datadir_target,
+ char *datadir_source, char *connstr_source,
+ XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint,
+ const char *restoreCommand, const char *argv0,
+ bool debug)
+{
+ SimpleStringListCell *cell;
+
+ if (extensions->head == NULL)
+ return; /* nothing to do */
+
+ for (cell = extensions->head; cell; cell = cell->next)
+ {
+ char *filename = cell->val;
+ char *fullname;
+ void *lib_handle;
+ PG_rewind_t PG_rewind;
+ char *load_error;
+
+ fullname = expand_dynamic_library_name(argv0, filename);
+
+ lib_handle = dlopen(fullname, RTLD_NOW | RTLD_GLOBAL);
+ if (lib_handle == NULL)
+ {
+ load_error = dlerror();
+ pg_fatal("could not load library \"%s\": %s", fullname, load_error);
+ }
+
+ PG_rewind = dlsym(lib_handle, "_PG_rewind");
+
+ if (PG_rewind == NULL)
+ pg_fatal("could not find function \"_PG_rewind\" in \"%s\"",
+ fullname);
+ pfree(fullname);
+
+ if (showprogress)
+ pg_log_info("performing rewind for '%s' extension", filename);
+ PG_rewind(datadir_target, datadir_source, connstr_source, startpoint,
+ tliIndex, endpoint, restoreCommand, argv0, debug);
+
+ pg_log_debug("loaded library \"%s\"", filename);
+ }
+}
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 4458324c9d8..83a2476a7e1 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -53,6 +53,7 @@
#define FILEHASH_INITIAL_SIZE 1000
static filehash_hash *filehash;
+static SimpleStringList extensions_exclude = {NULL, NULL};
static bool isRelDataFile(const char *path);
static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
@@ -260,6 +261,8 @@ process_target_file(const char *path, file_type_t type, size_t size,
* from the target data folder all paths which have been filtered out from
* the source data folder when processing the source files.
*/
+ if (check_file_excluded(path, false))
+ return;
/*
* Like in process_source_file, pretend that pg_wal is always a directory.
@@ -404,6 +407,31 @@ check_file_excluded(const char *path, bool is_source)
}
}
+ /*
+ * Exclude extensions directories
+ */
+ if (extensions_exclude.head != NULL)
+ {
+ SimpleStringListCell *cell;
+
+ for (cell = extensions_exclude.head; cell; cell = cell->next)
+ {
+ char *exclude_dir = cell->val;
+
+ snprintf(localpath, sizeof(localpath), "%s/", exclude_dir);
+ if (strstr(path, localpath) == path)
+ {
+ if (is_source)
+ pg_log_debug("entry \"%s\" excluded from source file list",
+ path);
+ else
+ pg_log_debug("entry \"%s\" excluded from target file list",
+ path);
+ return true;
+ }
+ }
+ }
+
return false;
}
@@ -820,3 +848,15 @@ decide_file_actions(void)
return filemap;
}
+
+void
+extensions_exclude_add(char **exclude_dirs)
+{
+ int i;
+
+ for (i = 0; exclude_dirs[i] != NULL; i++)
+ {
+ simple_string_list_append(&extensions_exclude,
+ pstrdup(exclude_dirs[i]));
+ }
+}
diff --git a/src/bin/pg_rewind/meson.build b/src/bin/pg_rewind/meson.build
index e0f88bde221..e56d5ae24f6 100644
--- a/src/bin/pg_rewind/meson.build
+++ b/src/bin/pg_rewind/meson.build
@@ -2,6 +2,7 @@
pg_rewind_sources = files(
'datapagemap.c',
+ 'extension.c',
'file_ops.c',
'filemap.c',
'libpq_source.c',
@@ -23,6 +24,7 @@ pg_rewind = executable('pg_rewind',
pg_rewind_sources,
dependencies: [frontend_code, libpq, lz4, zstd],
c_args: ['-DFRONTEND'], # needed for xlogreader et al
+ export_dynamic: true,
kwargs: default_bin_args,
)
bin_targets += pg_rewind
@@ -48,3 +50,7 @@ tests += {
}
subdir('po', if_found: libintl)
+
+install_headers(
+ 'pg_rewind_ext.h'
+)
\ No newline at end of file
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 22f7351fdcd..ca8ec05220e 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -38,7 +38,7 @@ static const char *const RmgrNames[RM_MAX_ID + 1] = {
#define RmgrName(rmid) (((rmid) <= RM_MAX_BUILTIN_ID) ? \
RmgrNames[rmid] : "custom")
-static void extractPageInfo(XLogReaderState *record);
+static void extractPageInfo(XLogReaderState *record, void *arg);
static int xlogreadfd = -1;
static XLogSegNo xlogreadsegno = 0;
@@ -54,17 +54,11 @@ static int SimpleXLogPageRead(XLogReaderState *xlogreader,
XLogRecPtr targetPagePtr,
int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
-/*
- * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
- * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
- * the data blocks touched by the WAL records, and return them in a page map.
- *
- * 'endpoint' is the end of the last record to read. The record starting at
- * 'endpoint' is the first one that is not read.
- */
void
-extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
- XLogRecPtr endpoint, const char *restoreCommand)
+SimpleXLogRead(const char *datadir, XLogRecPtr startpoint, int tliIndex,
+ XLogRecPtr endpoint, const char *restoreCommand,
+ void (*page_callback) (XLogReaderState *, void *arg),
+ void *arg)
{
XLogRecord *record;
XLogReaderState *xlogreader;
@@ -97,7 +91,7 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
LSN_FORMAT_ARGS(errptr));
}
- extractPageInfo(xlogreader);
+ page_callback(xlogreader, arg);
} while (xlogreader->EndRecPtr < endpoint);
/*
@@ -116,6 +110,22 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
}
}
+/*
+ * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
+ * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
+ * the data blocks touched by the WAL records, and return them in a page map.
+ *
+ * 'endpoint' is the end of the last record to read. The record starting at
+ * 'endpoint' is the first one that is not read.
+ */
+void
+extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
+ XLogRecPtr endpoint, const char *restoreCommand)
+{
+ SimpleXLogRead(datadir, startpoint, tliIndex, endpoint, restoreCommand,
+ extractPageInfo, NULL);
+}
+
/*
* Reads one WAL record. Returns the end position of the record, without
* doing anything with the record itself.
@@ -365,7 +375,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
* Extract information on which blocks the current record modifies.
*/
static void
-extractPageInfo(XLogReaderState *record)
+extractPageInfo(XLogReaderState *record, void *arg)
{
int block_id;
RmgrId rmid = XLogRecGetRmid(record);
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 052c83b8757..50873c5d2cb 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -77,6 +77,8 @@ bool do_sync = true;
bool restore_wal = false;
DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+static SimpleStringList extensions = {NULL, NULL};
+
/* Target history */
TimeLineHistoryEntry *targetHistory;
int targetNentries;
@@ -110,6 +112,7 @@ usage(const char *progname)
printf(_(" --debug write a lot of debug messages\n"));
printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
printf(_(" --sync-method=METHOD set method for syncing files to disk\n"));
+ printf(_(" -e, --extension=PATH path to library performing rewind for extension\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
@@ -135,6 +138,7 @@ main(int argc, char **argv)
{"progress", no_argument, NULL, 'P'},
{"debug", no_argument, NULL, 3},
{"sync-method", required_argument, NULL, 6},
+ {"extension", required_argument, NULL, 'e'},
{NULL, 0, NULL, 0}
};
int option_index;
@@ -173,7 +177,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
+ while ((c = getopt_long(argc, argv, "cD:nNPRe", long_options, &option_index)) != -1)
{
switch (c)
{
@@ -227,6 +231,10 @@ main(int argc, char **argv)
exit(1);
break;
+ case 'e': /* -e or --extension */
+ simple_string_list_append(&extensions, optarg);
+ break;
+
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -463,6 +471,12 @@ main(int argc, char **argv)
/* Initialize the hash table to track the status of each file */
filehash_init();
+ if (extensions.head != NULL)
+ process_extensions(&extensions, datadir_target, datadir_source,
+ connstr_source, chkptrec, lastcommontliIndex,
+ target_wal_endrec, restore_command, argv[0],
+ debug);
+
/*
* Collect information about all files in the both data directories.
*/
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ec43cbe2c67..4397259e0d0 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -15,7 +15,9 @@
#include "common/logging.h"
#include "common/file_utils.h"
#include "datapagemap.h"
+#include "fe_utils/simple_list.h"
#include "libpq-fe.h"
+#include "pg_rewind_ext.h"
#include "storage/block.h"
#include "storage/relfilelocator.h"
@@ -55,4 +57,12 @@ extern TimeLineHistoryEntry *rewind_parseTimeLineHistory(char *buffer,
TimeLineID targetTLI,
int *nentries);
+/* in extension.c */
+extern void process_extensions(SimpleStringList *extensions,
+ const char *datadir_target, char *datadir_source,
+ char *connstr_source, XLogRecPtr startpoint,
+ int tliIndex, XLogRecPtr endpoint,
+ const char *restoreCommand, const char *argv0,
+ bool debug);
+
#endif /* PG_REWIND_H */
diff --git a/src/bin/pg_rewind/pg_rewind_ext.h b/src/bin/pg_rewind/pg_rewind_ext.h
new file mode 100644
index 00000000000..3616d94f588
--- /dev/null
+++ b/src/bin/pg_rewind/pg_rewind_ext.h
@@ -0,0 +1,44 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_rewind_ext.h
+ *
+ *
+ * Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_REWIND_EXT_H
+#define PG_REWIND_EXT_H
+
+#include "access/xlogreader.h"
+
+/* in parsexlog.c */
+/*
+ * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
+ * index 'tliIndex' in target timeline history, until 'endpoint'.
+ * Pass all WAL records to 'page_callback'.
+ *
+ * 'endpoint' is the end of the last record to read. The record starting at
+ * 'endpoint' is the first one that is not read.
+ */
+extern void SimpleXLogRead(const char *datadir, XLogRecPtr startpoint,
+ int tliIndex, XLogRecPtr endpoint,
+ const char *restoreCommand,
+ void (*page_callback) (XLogReaderState *,
+ void *arg),
+ void *arg);
+
+
+/* in filemap.c */
+/* Add NULL-terminated list of dirs that pg_rewind can skip copying */
+extern void extensions_exclude_add(char **exclude_dirs);
+
+/* signature for pg_rewind extension library rewind function */
+extern PGDLLEXPORT void _PG_rewind(const char *datadir_target,
+ char *datadir_source, char *connstr_source,
+ XLogRecPtr startpoint, int tliIndex,
+ XLogRecPtr endpoint,
+ const char *restoreCommand,
+ const char *argv0, bool debug);
+
+#endif /* PG_REWIND_EXT_H */
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 86ffb3c8683..a53cd9fd236 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -210,6 +210,11 @@ double throttle_delay = 0;
*/
int64 latency_limit = 0;
+/*
+ * tableam selection
+ */
+char *tableam = NULL;
+
/*
* tablespace selection
*/
@@ -893,6 +898,7 @@ usage(void)
" --partition-method=(range|hash)\n"
" partition pgbench_accounts with this method (default: range)\n"
" --partitions=NUM partition pgbench_accounts into NUM parts (default: 0)\n"
+ " --tableam=TABLEAM create tables using the specified tableam\n"
" --tablespace=TABLESPACE create tables in the specified tablespace\n"
" --unlogged-tables create tables as unlogged tables\n"
"\nOptions to select what to run:\n"
@@ -4778,14 +4784,34 @@ createPartitions(PGconn *con)
appendPQExpBufferStr(&query, "maxvalue");
appendPQExpBufferChar(&query, ')');
+
+ if (tableam != NULL)
+ {
+ char *escape_tableam;
+
+ escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam));
+ appendPQExpBuffer(&query, " using %s", escape_tableam);
+ PQfreemem(escape_tableam);
+ }
}
else if (partition_method == PART_HASH)
+ {
printfPQExpBuffer(&query,
"create%s table pgbench_accounts_%d\n"
" partition of pgbench_accounts\n"
" for values with (modulus %d, remainder %d)",
unlogged_tables ? " unlogged" : "", p,
partitions, p - 1);
+
+ if (tableam != NULL)
+ {
+ char *escape_tableam;
+
+ escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam));
+ appendPQExpBuffer(&query, " using %s", escape_tableam);
+ PQfreemem(escape_tableam);
+ }
+ }
else /* cannot get there */
Assert(0);
@@ -4872,10 +4898,20 @@ initCreateTables(PGconn *con)
if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
appendPQExpBuffer(&query,
" partition by %s (aid)", PARTITION_METHOD[partition_method]);
- else if (ddl->declare_fillfactor)
+ else
{
+ if (tableam != NULL)
+ {
+ char *escape_tableam;
+
+ escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam));
+ appendPQExpBuffer(&query, " using %s", escape_tableam);
+ PQfreemem(escape_tableam);
+ }
+
/* fillfactor is only expected on actual tables */
- appendPQExpBuffer(&query, " with (fillfactor=%d)", fillfactor);
+ if (ddl->declare_fillfactor)
+ appendPQExpBuffer(&query, " with (fillfactor=%d)", fillfactor);
}
if (tablespace != NULL)
@@ -6663,6 +6699,7 @@ main(int argc, char **argv)
{"verbose-errors", no_argument, NULL, 15},
{"exit-on-abort", no_argument, NULL, 16},
{"debug", no_argument, NULL, 17},
+ {"tableam", required_argument, NULL, 18},
{NULL, 0, NULL, 0}
};
@@ -7003,6 +7040,10 @@ main(int argc, char **argv)
case 17: /* debug */
pg_logging_increase_verbosity();
break;
+ case 18: /* tableam */
+ initialization_option_set = true;
+ tableam = pg_strdup(optarg);
+ break;
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
index f25c9d58a7d..c6f57f7d192 100644
--- a/src/include/access/amapi.h
+++ b/src/include/access/amapi.h
@@ -107,12 +107,42 @@ typedef void (*ambuildempty_function) (Relation indexRelation);
typedef bool (*aminsert_function) (Relation indexRelation,
Datum *values,
bool *isnull,
- ItemPointer heap_tid,
+ ItemPointer tupleid,
Relation heapRelation,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
+/* extended version of aminsert taking Datum tupleid */
+typedef bool (*aminsert_extended_function) (Relation indexRelation,
+ Datum *values,
+ bool *isnull,
+ Datum tupleid,
+ Relation heapRelation,
+ IndexUniqueCheck checkUnique,
+ bool indexUnchanged,
+ struct IndexInfo *indexInfo);
+
+/* update this tuple */
+typedef bool (*amupdate_function) (Relation indexRelation,
+ bool new_valid,
+ bool old_valid,
+ Datum *values,
+ bool *isnull,
+ Datum tupleid,
+ Datum *valuesOld,
+ bool *isnullOld,
+ Datum oldTupleid,
+ Relation heapRelation,
+ IndexUniqueCheck checkUnique,
+ struct IndexInfo *indexInfo);
+/* delete this tuple */
+typedef bool (*amdelete_function) (Relation indexRelation,
+ Datum *values, bool *isnull,
+ Datum tupleid,
+ Relation heapRelation,
+ struct IndexInfo *indexInfo);
+
/* cleanup after insert */
typedef void (*aminsertcleanup_function) (Relation indexRelation,
struct IndexInfo *indexInfo);
@@ -252,6 +282,8 @@ typedef struct IndexAmRoutine
bool amusemaintenanceworkmem;
/* does AM store tuple information only at block granularity? */
bool amsummarizing;
+ /* does AM can provide MVCC */
+ bool ammvccaware;
/* OR of parallel vacuum flags. See vacuum.h for flags. */
uint8 amparallelvacuumoptions;
/* type of data stored in index, or InvalidOid if variable */
@@ -267,7 +299,10 @@ typedef struct IndexAmRoutine
ambuild_function ambuild;
ambuildempty_function ambuildempty;
aminsert_function aminsert;
+ aminsert_extended_function aminsertextended;
aminsertcleanup_function aminsertcleanup;
+ amupdate_function amupdate;
+ amdelete_function amdelete;
ambulkdelete_function ambulkdelete;
amvacuumcleanup_function amvacuumcleanup;
amcanreturn_function amcanreturn; /* can be NULL */
@@ -293,7 +328,13 @@ typedef struct IndexAmRoutine
/* Functions in access/index/amapi.c */
+extern IndexAmRoutine *GetIndexAmRoutineWithTableAM(Oid tamoid, Oid amhandler);
extern IndexAmRoutine *GetIndexAmRoutine(Oid amhandler);
-extern IndexAmRoutine *GetIndexAmRoutineByAmId(Oid amoid, bool noerror);
+extern IndexAmRoutine *GetIndexAmRoutineExtended(Oid indoid, Oid amhandler);
+extern IndexAmRoutine *GetIndexAmRoutineByAmId(Oid indoid, Oid amoid, bool noerror);
+
+typedef IndexAmRoutine *(*IndexAMRoutineHookType) (Oid tamoid, Oid amhandler);
+
+extern IndexAMRoutineHookType IndexAMRoutineHook;
#endif /* AMAPI_H */
diff --git a/src/include/access/brin_internal.h b/src/include/access/brin_internal.h
index a5a9772621c..442d2c96b7b 100644
--- a/src/include/access/brin_internal.h
+++ b/src/include/access/brin_internal.h
@@ -92,7 +92,7 @@ extern IndexBuildResult *brinbuild(Relation heap, Relation index,
struct IndexInfo *indexInfo);
extern void brinbuildempty(Relation index);
extern bool brininsert(Relation idxRel, Datum *values, bool *nulls,
- ItemPointer heaptid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h
index 12d8cdb356a..9d78980e986 100644
--- a/src/include/access/detoast.h
+++ b/src/include/access/detoast.h
@@ -63,6 +63,13 @@ extern struct varlena *detoast_attr_slice(struct varlena *attr,
int32 sliceoffset,
int32 slicelength);
+/* ----------
+ * toast_decompress_datum -
+ *
+ * Decompress a compressed version of a varlena datum
+ */
+extern struct varlena *toast_decompress_datum(struct varlena *attr);
+
/* ----------
* toast_raw_datum_size -
*
@@ -79,4 +86,11 @@ extern Size toast_raw_datum_size(Datum value);
*/
extern Size toast_datum_size(Datum value);
+/*
+ * for in_memory module
+ */
+typedef struct varlena* (*ToastFunc) (struct varlena *attr);
+extern void register_o_detoast_func(ToastFunc func);
+extern void deregister_o_detoast_func(void);
+
#endif /* DETOAST_H */
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index fdcfbe8db74..5752a3cf1ef 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -144,13 +144,28 @@ extern void index_close(Relation relation, LOCKMODE lockmode);
extern bool index_insert(Relation indexRelation,
Datum *values, bool *isnull,
- ItemPointer heap_t_ctid,
+ ItemPointer tupleid,
Relation heapRelation,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
extern void index_insert_cleanup(Relation indexRelation,
struct IndexInfo *indexInfo);
+extern bool index_update(Relation indexRelation,
+ bool new_valid,
+ bool old_valid,
+ Datum *values,
+ bool *isnull,
+ Datum tupleid,
+ Datum *valuesOld,
+ bool *isnullOld,
+ Datum oldTupleid,
+ Relation heapRelation,
+ IndexUniqueCheck checkUnique,
+ struct IndexInfo *indexInfo);
+extern bool index_delete(Relation indexRelation, Datum *values, bool *isnull,
+ Datum tupleid, Relation heapRelation,
+ struct IndexInfo *indexInfo);
extern IndexScanDesc index_beginscan(Relation heapRelation,
Relation indexRelation,
@@ -176,6 +191,9 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel,
ParallelIndexScanDesc pscan);
extern ItemPointer index_getnext_tid(IndexScanDesc scan,
ScanDirection direction);
+extern NullableDatum index_getnext_rowid(IndexScanDesc scan,
+ ScanDirection direction);
+extern Datum index_getnext_tupleid(IndexScanDesc scan, ScanDirection direction);
struct TupleTableSlot;
extern bool index_fetch_heap(IndexScanDesc scan, struct TupleTableSlot *slot);
extern bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction,
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 3013a44bae1..2e81017f014 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -115,7 +115,7 @@ extern IndexBuildResult *ginbuild(Relation heap, Relation index,
struct IndexInfo *indexInfo);
extern void ginbuildempty(Relation index);
extern bool gininsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 7b8749c8db0..284fb49c517 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -401,7 +401,7 @@ typedef struct GiSTOptions
/* gist.c */
extern void gistbuildempty(Relation index);
extern bool gistinsert(Relation r, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 9c7d81525b4..e787974a3cf 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -364,7 +364,7 @@ extern IndexBuildResult *hashbuild(Relation heap, Relation index,
struct IndexInfo *indexInfo);
extern void hashbuildempty(Relation index);
extern bool hashinsert(Relation rel, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 9e9aec88a62..871c640c8db 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -322,19 +322,22 @@ extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
int ntuples, CommandId cid, int options,
BulkInsertState bistate);
extern TM_Result heap_delete(Relation relation, ItemPointer tid,
- CommandId cid, Snapshot crosscheck, bool wait,
- struct TM_FailureData *tmfd, bool changingPart);
+ CommandId cid, Snapshot crosscheck, int options,
+ struct TM_FailureData *tmfd, bool changingPart,
+ TupleTableSlot *oldSlot);
extern void heap_finish_speculative(Relation relation, ItemPointer tid);
extern void heap_abort_speculative(Relation relation, ItemPointer tid);
extern TM_Result heap_update(Relation relation, ItemPointer otid,
HeapTuple newtup,
- CommandId cid, Snapshot crosscheck, bool wait,
+ CommandId cid, Snapshot crosscheck, int options,
struct TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes);
-extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
- CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
- bool follow_updates,
- Buffer *buffer, struct TM_FailureData *tmfd);
+ TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot);
+extern TM_Result heap_lock_tuple(Relation relation, ItemPointer tid,
+ TupleTableSlot *slot,
+ CommandId cid, LockTupleMode mode,
+ LockWaitPolicy wait_policy, bool follow_updates,
+ struct TM_FailureData *tmfd);
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 74930433480..9ba149aa47d 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1164,7 +1164,7 @@ typedef struct BTOptions
*/
extern void btbuildempty(Relation index);
extern bool btinsert(Relation rel, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
@@ -1288,6 +1288,7 @@ extern BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup);
extern void _bt_freestack(BTStack stack);
extern bool _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir);
extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir);
+extern bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir);
extern void _bt_preprocess_keys(IndexScanDesc scan);
extern bool _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys,
IndexTuple tuple, int tupnatts);
diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h
index 81829b8270a..8ddc75df287 100644
--- a/src/include/access/reloptions.h
+++ b/src/include/access/reloptions.h
@@ -21,6 +21,7 @@
#include "access/amapi.h"
#include "access/htup.h"
+#include "access/tableam.h"
#include "access/tupdesc.h"
#include "nodes/pg_list.h"
#include "storage/lock.h"
@@ -224,6 +225,7 @@ extern Datum transformRelOptions(Datum oldOptions, List *defList,
bool acceptOidsOff, bool isReset);
extern List *untransformRelOptions(Datum options);
extern bytea *extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
+ const TableAmRoutine *tableam,
amoptions_function amoptions);
extern void *build_reloptions(Datum reloptions, bool validate,
relopt_kind kind,
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 521043304ab..24b04709012 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -122,6 +122,7 @@ typedef struct IndexScanDescData
struct ScanKeyData *keyData; /* array of index qualifier descriptors */
struct ScanKeyData *orderByData; /* array of ordering op descriptors */
bool xs_want_itup; /* caller requests index tuples */
+ bool xs_want_rowid; /* caller requests index tuples */
bool xs_temp_snap; /* unregister snapshot at scan end? */
/* signaling to index AM about killing index tuples */
@@ -145,6 +146,7 @@ typedef struct IndexScanDescData
struct TupleDescData *xs_hitupdesc; /* rowtype descriptor of xs_hitup */
ItemPointerData xs_heaptid; /* result */
+ NullableDatum xs_rowid; /* result if xs_want_rowid */
bool xs_heap_continue; /* T if must keep walking, potential
* further results */
IndexFetchTableData *xs_heapfetch;
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
index d6a49531200..b9cc48aba37 100644
--- a/src/include/access/spgist.h
+++ b/src/include/access/spgist.h
@@ -197,7 +197,7 @@ extern IndexBuildResult *spgbuild(Relation heap, Relation index,
struct IndexInfo *indexInfo);
extern void spgbuildempty(Relation index);
extern bool spginsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
diff --git a/src/include/access/sysattr.h b/src/include/access/sysattr.h
index e88dec71ee9..867b5eb489e 100644
--- a/src/include/access/sysattr.h
+++ b/src/include/access/sysattr.h
@@ -24,6 +24,7 @@
#define MaxTransactionIdAttributeNumber (-4)
#define MaxCommandIdAttributeNumber (-5)
#define TableOidAttributeNumber (-6)
-#define FirstLowInvalidHeapAttributeNumber (-7)
+#define RowIdAttributeNumber (-7)
+#define FirstLowInvalidHeapAttributeNumber (-8)
#endif /* SYSATTR_H */
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index da661289c1f..573a2576935 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -17,11 +17,15 @@
#ifndef TABLEAM_H
#define TABLEAM_H
+#include "access/amapi.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/xact.h"
#include "executor/tuptable.h"
#include "storage/read_stream.h"
+#include "nodes/execnodes.h"
+#include "storage/bufmgr.h"
+#include "utils/guc.h"
#include "utils/rel.h"
#include "utils/snapshot.h"
@@ -40,6 +44,16 @@ struct TBMIterateResult;
struct VacuumParams;
struct ValidateIndexState;
+typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
+ HeapTuple *rows, int targrows,
+ double *totalrows,
+ double *totaldeadrows);
+
+/* in commands/analyze.c */
+extern int acquire_sample_rows(Relation onerel, int elevel,
+ HeapTuple *rows, int targrows,
+ double *totalrows, double *totaldeadrows);
+
/*
* Bitmask values for the flags argument to the scan_begin callback.
*/
@@ -267,6 +281,11 @@ typedef struct TM_IndexDeleteOp
/* Follow update chain and lock latest version of tuple */
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
+/* "options" flag bits for table_tuple_update and table_tuple_delete */
+#define TABLE_MODIFY_WAIT 0x0001
+#define TABLE_MODIFY_FETCH_OLD_TUPLE 0x0002
+#define TABLE_MODIFY_LOCK_UPDATED 0x0004
+
/* Typedef for callback function for table_index_build_scan */
typedef void (*IndexBuildCallback) (Relation index,
@@ -303,6 +322,9 @@ typedef struct TableAmRoutine
*/
const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
+ RowRefType (*get_row_ref_type) (Relation rel);
+
+ void (*free_rd_amcache) (Relation rel);
/* ------------------------------------------------------------------------
* Table scan callbacks.
@@ -455,7 +477,7 @@ typedef struct TableAmRoutine
* future searches.
*/
bool (*index_fetch_tuple) (struct IndexFetchTableData *scan,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot,
bool *call_again, bool *all_dead);
@@ -472,7 +494,7 @@ typedef struct TableAmRoutine
* test, returns true, false otherwise.
*/
bool (*tuple_fetch_row_version) (Relation rel,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot);
@@ -508,23 +530,19 @@ typedef struct TableAmRoutine
*/
/* see table_tuple_insert() for reference about parameters */
- void (*tuple_insert) (Relation rel, TupleTableSlot *slot,
+ TupleTableSlot *(*tuple_insert) (Relation rel, TupleTableSlot *slot,
CommandId cid, int options,
struct BulkInsertStateData *bistate);
- /* see table_tuple_insert_speculative() for reference about parameters */
- void (*tuple_insert_speculative) (Relation rel,
- TupleTableSlot *slot,
- CommandId cid,
- int options,
- struct BulkInsertStateData *bistate,
- uint32 specToken);
-
- /* see table_tuple_complete_speculative() for reference about parameters */
- void (*tuple_complete_speculative) (Relation rel,
- TupleTableSlot *slot,
- uint32 specToken,
- bool succeeded);
+ TupleTableSlot *(*tuple_insert_with_arbiter) (ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ CommandId cid, int options,
+ struct BulkInsertStateData *bistate,
+ List *arbiterIndexes,
+ EState *estate,
+ LockTupleMode lockmode,
+ TupleTableSlot *lockedSlot,
+ TupleTableSlot *tempSlot);
/* see table_multi_insert() for reference about parameters */
void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
@@ -532,29 +550,31 @@ typedef struct TableAmRoutine
/* see table_tuple_delete() for reference about parameters */
TM_Result (*tuple_delete) (Relation rel,
- ItemPointer tid,
+ Datum tupleid,
CommandId cid,
Snapshot snapshot,
Snapshot crosscheck,
- bool wait,
+ int options,
TM_FailureData *tmfd,
- bool changingPart);
+ bool changingPart,
+ TupleTableSlot *oldSlot);
/* see table_tuple_update() for reference about parameters */
TM_Result (*tuple_update) (Relation rel,
- ItemPointer otid,
+ Datum tupleid,
TupleTableSlot *slot,
CommandId cid,
Snapshot snapshot,
Snapshot crosscheck,
- bool wait,
+ int options,
TM_FailureData *tmfd,
LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes);
+ TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot);
/* see table_tuple_lock() for reference about parameters */
TM_Result (*tuple_lock) (Relation rel,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot,
CommandId cid,
@@ -873,6 +893,14 @@ typedef struct TableAmRoutine
struct SampleScanState *scanstate,
TupleTableSlot *slot);
+ /* Check if tuple in the slot belongs to the current transaction */
+ bool (*tuple_is_current) (Relation rel, TupleTableSlot *slot);
+
+ void (*analyze_table) (Relation relation,
+ AcquireSampleRowsFunc *func,
+ BlockNumber *totalpages);
+
+ bytea *(*reloptions) (char relkind, Datum reloptions, bool validate);
} TableAmRoutine;
@@ -1239,7 +1267,7 @@ table_index_fetch_end(struct IndexFetchTableData *scan)
*/
static inline bool
table_index_fetch_tuple(struct IndexFetchTableData *scan,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot,
bool *call_again, bool *all_dead)
@@ -1252,7 +1280,7 @@ table_index_fetch_tuple(struct IndexFetchTableData *scan,
if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
- return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
+ return scan->rel->rd_tableam->index_fetch_tuple(scan, tupleid, snapshot,
slot, call_again,
all_dead);
}
@@ -1286,7 +1314,7 @@ extern bool table_index_fetch_tuple_check(Relation rel,
*/
static inline bool
table_tuple_fetch_row_version(Relation rel,
- ItemPointer tid,
+ Datum tupleid,
Snapshot snapshot,
TupleTableSlot *slot)
{
@@ -1298,7 +1326,7 @@ table_tuple_fetch_row_version(Relation rel,
if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
- return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
+ return rel->rd_tableam->tuple_fetch_row_version(rel, tupleid, snapshot, slot);
}
/*
@@ -1398,45 +1426,32 @@ table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
* insertion. But note that any toasting of fields within the slot is NOT
* reflected in the slots contents.
*/
-static inline void
+static inline TupleTableSlot *
table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid,
int options, struct BulkInsertStateData *bistate)
{
- rel->rd_tableam->tuple_insert(rel, slot, cid, options,
- bistate);
+ return rel->rd_tableam->tuple_insert(rel, slot, cid, options, bistate);
}
-/*
- * Perform a "speculative insertion". These can be backed out afterwards
- * without aborting the whole transaction. Other sessions can wait for the
- * speculative insertion to be confirmed, turning it into a regular tuple, or
- * aborted, as if it never existed. Speculatively inserted tuples behave as
- * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
- *
- * A transaction having performed a speculative insertion has to either abort,
- * or finish the speculative insertion with
- * table_tuple_complete_speculative(succeeded = ...).
- */
-static inline void
-table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot,
- CommandId cid, int options,
- struct BulkInsertStateData *bistate,
- uint32 specToken)
-{
- rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
- bistate, specToken);
-}
-
-/*
- * Complete "speculative insertion" started in the same transaction. If
- * succeeded is true, the tuple is fully inserted, if false, it's removed.
- */
-static inline void
-table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot,
- uint32 specToken, bool succeeded)
+static inline TupleTableSlot *
+table_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ CommandId cid, int options,
+ struct BulkInsertStateData *bistate,
+ List *arbiterIndexes,
+ EState *estate,
+ LockTupleMode lockmode,
+ TupleTableSlot *lockedSlot,
+ TupleTableSlot *tempSlot)
{
- rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
- succeeded);
+ Relation rel = resultRelInfo->ri_RelationDesc;
+
+ return rel->rd_tableam->tuple_insert_with_arbiter(resultRelInfo,
+ slot, cid, options,
+ bistate, arbiterIndexes,
+ estate,
+ lockmode, lockedSlot,
+ tempSlot);
}
/*
@@ -1462,7 +1477,7 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
}
/*
- * Delete a tuple.
+ * Delete a tuple (and optionally lock the last tuple version).
*
* NB: do not call this directly unless prepared to deal with
* concurrent-update conditions. Use simple_table_tuple_delete instead.
@@ -1473,11 +1488,21 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
* cid - delete command ID (used for visibility test, and stored into
* cmax if successful)
* crosscheck - if not InvalidSnapshot, also check tuple against this
- * wait - true if should wait for any conflicting update to commit/abort
+ * options:
+ * If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
+ * If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
+ * fetched into oldSlot when the update is successful.
+ * If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
+ * concurrently updated, then the last tuple version is locked and fetched
+ * into oldSlot.
+ *
* Output parameters:
* tmfd - filled in failure cases (see below)
* changingPart - true iff the tuple is being moved to another partition
* table due to an update of the partition key. Otherwise, false.
+ * oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
+ * TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
+ * is specified.
*
* Normal, successful return value is TM_Ok, which means we did actually
* delete it. Failure return codes are TM_SelfModified, TM_Updated, and
@@ -1488,17 +1513,19 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
* TM_FailureData for additional info.
*/
static inline TM_Result
-table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
- Snapshot snapshot, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, bool changingPart)
+table_tuple_delete(Relation rel, Datum tupleid, CommandId cid,
+ Snapshot snapshot, Snapshot crosscheck, int options,
+ TM_FailureData *tmfd, bool changingPart,
+ TupleTableSlot *oldSlot)
{
- return rel->rd_tableam->tuple_delete(rel, tid, cid,
+ return rel->rd_tableam->tuple_delete(rel, tupleid, cid,
snapshot, crosscheck,
- wait, tmfd, changingPart);
+ options, tmfd, changingPart,
+ oldSlot);
}
/*
- * Update a tuple.
+ * Update a tuple (and optionally lock the last tuple version).
*
* NB: do not call this directly unless you are prepared to deal with
* concurrent-update conditions. Use simple_table_tuple_update instead.
@@ -1510,13 +1537,23 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
* cid - update command ID (used for visibility test, and stored into
* cmax/cmin if successful)
* crosscheck - if not InvalidSnapshot, also check old tuple against this
- * wait - true if should wait for any conflicting update to commit/abort
+ * options:
+ * If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
+ * If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
+ * fetched into oldSlot when the update is successful.
+ * If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
+ * concurrently updated, then the last tuple version is locked and fetched
+ * into oldSlot.
+ *
* Output parameters:
* tmfd - filled in failure cases (see below)
* lockmode - filled with lock mode acquired on tuple
* update_indexes - in success cases this is set to true if new index entries
* are required for this tuple
- *
+ * oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
+ * TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
+ * is specified.
+
* Normal, successful return value is TM_Ok, which means we did actually
* update it. Failure return codes are TM_SelfModified, TM_Updated, and
* TM_BeingModified (the last only possible if wait == false).
@@ -1532,15 +1569,17 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
* for additional info.
*/
static inline TM_Result
-table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
+table_tuple_update(Relation rel, Datum tupleid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
- bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes)
+ int options, TM_FailureData *tmfd, LockTupleMode *lockmode,
+ TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot)
{
- return rel->rd_tableam->tuple_update(rel, otid, slot,
+ return rel->rd_tableam->tuple_update(rel, tupleid, slot,
cid, snapshot, crosscheck,
- wait, tmfd,
- lockmode, update_indexes);
+ options, tmfd,
+ lockmode, update_indexes,
+ oldSlot);
}
/*
@@ -1577,12 +1616,12 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
* comments for struct TM_FailureData for additional info.
*/
static inline TM_Result
-table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
+table_tuple_lock(Relation rel, Datum tupleid, Snapshot snapshot,
TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
LockWaitPolicy wait_policy, uint8 flags,
TM_FailureData *tmfd)
{
- return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
+ return rel->rd_tableam->tuple_lock(rel, tupleid, snapshot, slot,
cid, mode, wait_policy,
flags, tmfd);
}
@@ -2046,6 +2085,11 @@ table_scan_sample_next_tuple(TableScanDesc scan,
slot);
}
+static inline bool
+table_tuple_is_current(Relation rel, TupleTableSlot *slot)
+{
+ return rel->rd_tableam->tuple_is_current(rel, slot);
+}
/* ----------------------------------------------------------------------------
* Functions to make modifications a bit simpler.
@@ -2053,11 +2097,13 @@ table_scan_sample_next_tuple(TableScanDesc scan,
*/
extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
-extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
- Snapshot snapshot);
-extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
+extern void simple_table_tuple_delete(Relation rel, Datum tupleid,
+ Snapshot snapshot,
+ TupleTableSlot *oldSlot);
+extern void simple_table_tuple_update(Relation rel, Datum tupleid,
TupleTableSlot *slot, Snapshot snapshot,
- TU_UpdateIndexes *update_indexes);
+ TU_UpdateIndexes *update_indexes,
+ TupleTableSlot *oldSlot);
/* ----------------------------------------------------------------------------
@@ -2098,12 +2144,60 @@ extern void table_block_relation_estimate_size(Relation rel,
*/
extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
+extern const TableAmRoutine *GetTableAmRoutineByAmOid(Oid amoid);
+extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
-/* ----------------------------------------------------------------------------
- * Functions in heapam_handler.c
- * ----------------------------------------------------------------------------
- */
+static inline RowRefType
+table_get_row_ref_type(Relation rel)
+{
+ if (rel->rd_tableam)
+ return rel->rd_tableam->get_row_ref_type(rel);
+ else
+ return ROW_REF_TID;
+}
-extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
+static inline void
+table_free_rd_amcache(Relation rel)
+{
+ if (rel->rd_tableam)
+ {
+ rel->rd_tableam->free_rd_amcache(rel);
+ }
+ else
+ {
+ if (rel->rd_amcache)
+ pfree(rel->rd_amcache);
+ rel->rd_amcache = NULL;
+ }
+}
+
+static inline void
+table_analyze(Relation relation, AcquireSampleRowsFunc *func,
+ BlockNumber *totalpages)
+{
+ if (relation->rd_tableam->analyze_table)
+ {
+ relation->rd_tableam->analyze_table(relation, func, totalpages);
+ }
+ else
+ {
+ *func = acquire_sample_rows;
+ *totalpages = RelationGetNumberOfBlocks(relation);
+ }
+}
+
+static inline bytea *
+table_reloptions(Relation rel, char relkind,
+ Datum reloptions, bool validate)
+{
+ return rel->rd_tableam->reloptions(relkind, reloptions, validate);
+}
+
+static inline bytea *
+tableam_reloptions(const TableAmRoutine *tableam, char relkind,
+ Datum reloptions, bool validate)
+{
+ return tableam->reloptions(relkind, reloptions, validate);
+}
#endif /* TABLEAM_H */
diff --git a/src/include/access/transam.h b/src/include/access/transam.h
index 28a2d287fd5..bd6430c2865 100644
--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -15,7 +15,9 @@
#define TRANSAM_H
#include "access/xlogdefs.h"
-
+#ifndef FRONTEND
+#include "port/atomics.h"
+#endif
/* ----------------
* Special transaction ID values
@@ -196,6 +198,22 @@ FullTransactionIdAdvance(FullTransactionId *dest)
#define FirstUnpinnedObjectId 12000
#define FirstNormalObjectId 16384
+#define COMMITSEQNO_INPROGRESS UINT64CONST(0x0)
+#define COMMITSEQNO_NON_DELETED UINT64CONST(0x1)
+#define COMMITSEQNO_ABORTED UINT64CONST(0x2)
+#define COMMITSEQNO_FROZEN UINT64CONST(0x3)
+#define COMMITSEQNO_COMMITTING UINT64CONST(0x4)
+#define COMMITSEQNO_FIRST_NORMAL UINT64CONST(0x5)
+#define COMMITSEQNO_MAX_NORMAL UINT64CONST(0x7FFFFFFFFFFFFFFF)
+
+#define COMMITSEQNO_IS_INPROGRESS(csn) ((csn) == COMMITSEQNO_INPROGRESS || (csn) == COMMITSEQNO_NON_DELETED)
+#define COMMITSEQNO_IS_NON_DELETED(csn) ((csn) == COMMITSEQNO_NON_DELETED)
+#define COMMITSEQNO_IS_ABORTED(csn) ((csn) == COMMITSEQNO_ABORTED)
+#define COMMITSEQNO_IS_FROZEN(csn) ((csn) == COMMITSEQNO_FROZEN)
+#define COMMITSEQNO_IS_NORMAL(csn) ((csn) >= COMMITSEQNO_FIRST_NORMAL)
+#define COMMITSEQNO_IS_COMMITTING(csn) ((csn) == COMMITSEQNO_COMMITTING)
+#define COMMITSEQNO_IS_COMMITTED(csn) ((csn) >= COMMITSEQNO_FROZEN)
+
/*
* TransamVariables is a data structure in shared memory that is used to track
* OID and XID assignment state. For largely historical reasons, there is
@@ -252,9 +270,13 @@ typedef struct TransamVariablesData
*/
TransactionId oldestClogXid; /* oldest it's safe to look up in clog */
+#ifndef FRONTEND
+ pg_atomic_uint64 nextCommitSeqNo;
+#else
+ CommitSeqNo nextCommitSeqNo;
+#endif
} TransamVariablesData;
-
/* ----------------
* extern declarations
* ----------------
@@ -294,6 +316,7 @@ extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid);
extern bool ForceTransactionIdLimitUpdate(void);
extern Oid GetNewObjectId(void);
extern void StopGeneratingPinnedObjectIds(void);
+extern CommitSeqNo GetCurrentCSN(void);
#ifdef USE_ASSERT_CHECKING
extern void AssertTransactionIdInAllowableRange(TransactionId xid);
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 6d4439f0524..327328da54c 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -527,4 +527,7 @@ extern void EnterParallelMode(void);
extern void ExitParallelMode(void);
extern bool IsInParallelMode(void);
+typedef void (*xact_redo_hook_type) (TransactionId xid, XLogRecPtr lsn);
+extern xact_redo_hook_type xact_redo_hook;
+
#endif /* XACT_H */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 2c507ea618c..da077b00ee1 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -56,6 +56,7 @@ extern PGDLLIMPORT bool track_wal_io_timing;
extern PGDLLIMPORT int wal_decode_buffer_size;
extern PGDLLIMPORT int CheckPointSegments;
+extern PGDLLIMPORT CommitSeqNo startupCommitSeqNo;
/* Archive modes */
typedef enum ArchiveMode
@@ -292,6 +293,7 @@ extern void do_pg_backup_start(const char *backupidstr, bool fast,
StringInfo tblspcmapfile);
extern void do_pg_backup_stop(BackupState *state, bool waitforarchive);
extern void do_pg_abort_backup(int code, Datum arg);
+extern bool have_backup_in_progress(void);
extern void register_persistent_abort_backup_handler(void);
extern SessionBackupState get_backup_status(void);
@@ -307,4 +309,14 @@ extern SessionBackupState get_backup_status(void);
/* files to signal promotion to primary */
#define PROMOTE_SIGNAL_FILE "promote"
+typedef void (*CheckPoint_hook_type) (XLogRecPtr checkPointRedo, int flags);
+extern PGDLLIMPORT CheckPoint_hook_type CheckPoint_hook;
+extern double CheckPointProgress;
+typedef void (*after_checkpoint_cleanup_hook_type)(XLogRecPtr checkPointRedo,
+ int flags);
+extern PGDLLIMPORT after_checkpoint_cleanup_hook_type
+ after_checkpoint_cleanup_hook;
+
+extern void (*RedoShutdownHook) (void);
+
#endif /* XLOG_H */
diff --git a/src/include/archive/archive_module.h b/src/include/archive/archive_module.h
index 763af76e542..d73b9661a4f 100644
--- a/src/include/archive/archive_module.h
+++ b/src/include/archive/archive_module.h
@@ -37,13 +37,17 @@ typedef struct ArchiveModuleState
*/
typedef void (*ArchiveStartupCB) (ArchiveModuleState *state);
typedef bool (*ArchiveCheckConfiguredCB) (ArchiveModuleState *state);
-typedef bool (*ArchiveFileCB) (ArchiveModuleState *state, const char *file, const char *path);
+typedef void (*ArchivePreloadFileCB) (ArchiveModuleState *state,
+ const char *file, const char *path);
+typedef bool (*ArchiveFileCB) (ArchiveModuleState *state,
+ const char *file, const char *path);
typedef void (*ArchiveShutdownCB) (ArchiveModuleState *state);
typedef struct ArchiveModuleCallbacks
{
ArchiveStartupCB startup_cb;
ArchiveCheckConfiguredCB check_configured_cb;
+ ArchivePreloadFileCB archive_preload_file_cb;
ArchiveFileCB archive_file_cb;
ArchiveShutdownCB shutdown_cb;
} ArchiveModuleCallbacks;
diff --git a/src/include/c.h b/src/include/c.h
index dc1841346cd..b8f75ac8329 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -637,7 +637,7 @@ typedef double float8;
/*
* Oid, RegProcedure, TransactionId, SubTransactionId, MultiXactId,
- * CommandId
+ * CommandId, CommitSeqNo
*/
/* typedef Oid is in postgres_ext.h */
@@ -668,6 +668,8 @@ typedef uint32 CommandId;
#define FirstCommandId ((CommandId) 0)
#define InvalidCommandId (~(CommandId)0)
+typedef uint64 CommitSeqNo;
+
/* ----------------
* Variable-length datatypes all share the 'struct varlena' header.
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index 6908ca7180a..c9b59706373 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -96,6 +96,8 @@ typedef struct ObjectAddresses ObjectAddresses;
#define PERFORM_DELETION_SKIP_EXTENSIONS 0x0010 /* keep extensions */
#define PERFORM_DELETION_CONCURRENT_LOCK 0x0020 /* normal drop with
* concurrent lock mode */
+#define PERFORM_DELETION_OF_RELATION 0x0040 /* used for orioledb
+ * extension */
/* in dependency.c */
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 7d434f8e653..0beab397c79 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -215,4 +215,6 @@ itemptr_decode(ItemPointer itemptr, int64 encoded)
ItemPointerSet(itemptr, block, offset);
}
+extern void index_update_stats(Relation rel, bool hasindex, double reltuples);
+
#endif /* INDEX_H */
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index 29c511e3196..628e43dc33f 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -41,6 +41,10 @@ extern char *makeObjectName(const char *name1, const char *name2,
extern char *ChooseRelationName(const char *name1, const char *name2,
const char *label, Oid namespaceid,
bool isconstraint);
+extern List *ChooseIndexColumnNames(const List *indexElems);
+extern char *ChooseIndexName(const char *tabname, Oid namespaceId,
+ const List *colnames, const List *exclusionOpNames,
+ bool primary, bool isconstraint);
extern bool CheckIndexCompatible(Oid oldId,
const char *accessMethodName,
const List *attributeList,
@@ -158,4 +162,7 @@ extern int defGetTypeLength(DefElem *def);
extern List *defGetStringList(DefElem *def);
extern void errorConflictingDefElem(DefElem *defel, ParseState *pstate) pg_attribute_noreturn();
+typedef Oid (*GetDefaultOpClass_hook_type)(Oid type_id, Oid am_id);
+extern PGDLLIMPORT GetDefaultOpClass_hook_type GetDefaultOpClass_hook;
+
#endif /* DEFREM_H */
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index 9b8b351d9a2..5a6fabe8ed9 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -107,6 +107,14 @@ extern void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into,
const instr_time *planduration,
const BufferUsage *bufusage,
const MemoryContextCounters *mem_counters);
+extern void ExplainNode(PlanState *planstate, List *ancestors,
+ const char *relationship, const char *plan_name,
+ ExplainState *es);
+extern void show_scan_qual(List *qual, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ ExplainState *es);
+extern void show_instrumentation_count(const char *qlabel, int which,
+ PlanState *planstate, ExplainState *es);
extern void ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc);
extern void ExplainPrintTriggers(ExplainState *es, QueryDesc *queryDesc);
diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h
index 8a5a9fe6422..c16e6b6e5a0 100644
--- a/src/include/commands/trigger.h
+++ b/src/include/commands/trigger.h
@@ -209,15 +209,15 @@ extern void ExecASDeleteTriggers(EState *estate,
extern bool ExecBRDeleteTriggers(EState *estate,
EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot **epqslot,
TM_Result *tmresult,
TM_FailureData *tmfd);
extern void ExecARDeleteTriggers(EState *estate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
HeapTuple fdw_trigtuple,
+ TupleTableSlot *slot,
TransitionCaptureState *transition_capture,
bool is_crosspart_update);
extern bool ExecIRDeleteTriggers(EState *estate,
@@ -231,7 +231,7 @@ extern void ExecASUpdateTriggers(EState *estate,
extern bool ExecBRUpdateTriggers(EState *estate,
EPQState *epqstate,
ResultRelInfo *relinfo,
- ItemPointer tupleid,
+ Datum tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot *newslot,
TM_Result *tmresult,
@@ -240,8 +240,8 @@ extern void ExecARUpdateTriggers(EState *estate,
ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo,
- ItemPointer tupleid,
HeapTuple fdw_trigtuple,
+ TupleTableSlot *oldslot,
TupleTableSlot *newslot,
List *recheckIndexes,
TransitionCaptureState *transition_capture,
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 759f9a87d38..dfea1e93e33 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -378,6 +378,9 @@ extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
extern void analyze_rel(Oid relid, RangeVar *relation,
VacuumParams *params, List *va_cols, bool in_outer_xact,
BufferAccessStrategy bstrategy);
+extern int acquire_sample_rows(Relation onerel, int elevel,
+ HeapTuple *rows, int targrows,
+ double *totalrows, double *totaldeadrows);
extern bool std_typanalyze(VacAttrStats *stats);
/* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 9770752ea3c..1833f4d84b1 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -633,6 +633,16 @@ extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
bool noDupErr,
bool *specConflict, List *arbiterIndexes,
bool onlySummarizing);
+extern List *ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ TupleTableSlot *oldSlot,
+ EState *estate,
+ bool noDupErr,
+ bool *specConflict, List *arbiterIndexes,
+ bool onlySummarizing);
+extern void ExecDeleteIndexTuples(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ EState *estate);
extern bool ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot,
EState *estate, ItemPointer conflictTid,
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index fcde3876b28..777e59c86e9 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -13,6 +13,7 @@
#define FDWAPI_H
#include "access/parallel.h"
+#include "access/tableam.h"
#include "nodes/execnodes.h"
#include "nodes/pathnodes.h"
@@ -148,11 +149,6 @@ typedef void (*ExplainForeignModify_function) (ModifyTableState *mtstate,
typedef void (*ExplainDirectModify_function) (ForeignScanState *node,
struct ExplainState *es);
-typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
- HeapTuple *rows, int targrows,
- double *totalrows,
- double *totaldeadrows);
-
typedef bool (*AnalyzeForeignTable_function) (Relation relation,
AcquireSampleRowsFunc *func,
BlockNumber *totalpages);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index cd1b16296b5..48c7fec14ac 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -457,6 +457,8 @@ typedef struct ResultRelInfo
/* relation descriptor for result relation */
Relation ri_RelationDesc;
+ RowRefType ri_RowRefType;
+
/* # of indices existing on result relation */
int ri_NumIndices;
@@ -754,6 +756,7 @@ typedef struct ExecRowMark
Index prti; /* parent range table index, if child */
Index rowmarkId; /* unique identifier for resjunk columns */
RowMarkType markType; /* see enum in nodes/plannodes.h */
+ RowRefType refType;
LockClauseStrength strength; /* LockingClause's strength, or LCS_NONE */
LockWaitPolicy waitPolicy; /* NOWAIT and SKIP LOCKED */
bool ermActive; /* is this mark relevant for current tuple? */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 67c90a2bd32..82443390a85 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1103,6 +1103,7 @@ typedef struct RangeTblEntry
Index perminfoindex pg_node_attr(query_jumble_ignore);
/* sampling info, or NULL */
struct TableSampleClause *tablesample;
+ RowRefType reftype;
/*
* Fields valid for a subquery RTE (else NULL):
@@ -2992,6 +2993,7 @@ typedef struct CreateAmStmt
char *amname; /* access method name */
List *handler_name; /* handler function name */
char amtype; /* type of access method */
+ char *tableam_name; /* table AM name */
} CreateAmStmt;
/* ----------------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 1aeeaec95e1..9b41e298b0b 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1353,7 +1353,7 @@ typedef enum RowMarkType
* child relations will also have entries with isParent = true. The child
* entries have rti == child rel's RT index and prti == top parent's RT index,
* and can therefore be recognized as children by the fact that prti != rti.
- * The parent's allMarkTypes field gets the OR of (1<nextXid */
TransactionId oldestRunningXid; /* *not* oldestXmin */
TransactionId latestCompletedXid; /* so we can set xmax */
+ CommitSeqNo csn; /* current csn */
TransactionId xids[FLEXIBLE_ARRAY_MEMBER];
} xl_running_xacts;
diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h
index 3fb9647b87c..8b692cafea1 100644
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@@ -227,4 +227,28 @@ extern void PrepareToInvalidateCacheTuple(Relation relation,
HeapTuple newtuple,
void (*function) (int, uint32, Oid));
+typedef CatCTup *(*SearchCatCacheInternal_hook_type)(CatCache *cache,
+ int nkeys,
+ Datum v1, Datum v2,
+ Datum v3, Datum v4);
+extern SearchCatCacheInternal_hook_type SearchCatCacheInternal_hook;
+
+typedef CatCList *(*SearchCatCacheList_hook_type)(CatCache *cache,
+ int nkeys,
+ Datum v1,
+ Datum v2,
+ Datum v3);
+extern SearchCatCacheList_hook_type SearchCatCacheList_hook;
+
+typedef TupleDesc (*SysCacheGetAttr_hook_type)(CatCache *SysCache);
+extern SysCacheGetAttr_hook_type SysCacheGetAttr_hook;
+
+typedef uint32 (*GetCatCacheHashValue_hook_type)(CatCache *cache,
+ int nkeys,
+ Datum v1,
+ Datum v2,
+ Datum v3,
+ Datum v4);
+extern GetCatCacheHashValue_hook_type GetCatCacheHashValue_hook;
+
#endif /* CATCACHE_H */
diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h
index e54eca5b489..f583eca37ee 100644
--- a/src/include/utils/elog.h
+++ b/src/include/utils/elog.h
@@ -537,4 +537,10 @@ extern void write_jsonlog(ErrorData *edata);
*/
extern void write_stderr(const char *fmt,...) pg_attribute_printf(1, 2);
+typedef void (*CustomErrorCleanupHookType) (void);
+
+extern CustomErrorCleanupHookType CustomErrorCleanupHook;
+
+extern void CustomErrorCleanup(void);
+
#endif /* ELOG_H */
diff --git a/src/include/utils/fmgrtab.h b/src/include/utils/fmgrtab.h
index 151dd74055d..f8666ba7087 100644
--- a/src/include/utils/fmgrtab.h
+++ b/src/include/utils/fmgrtab.h
@@ -46,4 +46,7 @@ extern PGDLLIMPORT const Oid fmgr_last_builtin_oid; /* highest function OID in
#define InvalidOidBuiltinMapping PG_UINT16_MAX
extern PGDLLIMPORT const uint16 fmgr_builtin_oid_index[];
+extern const FmgrBuiltin *fmgr_isbuiltin(Oid id);
+extern const FmgrBuiltin *fmgr_lookupByName(const char *name);
+
#endif /* FMGRTAB_H */
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index 24695facf22..69498b9f77f 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -22,6 +22,7 @@ extern PGDLLIMPORT int debug_discard_caches;
typedef void (*SyscacheCallbackFunction) (Datum arg, int cacheid, uint32 hashvalue);
typedef void (*RelcacheCallbackFunction) (Datum arg, Oid relid);
+typedef void (*UsercacheCallbackFunction) (Datum arg, Oid arg1, Oid arg2, Oid arg3);
extern void AcceptInvalidationMessages(void);
@@ -48,6 +49,8 @@ extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple);
extern void CacheInvalidateRelcacheByRelid(Oid relid);
+extern void CacheInvalidateRelcacheByDbidRelid(Oid dbid, Oid relid);
+
extern void CacheInvalidateSmgr(RelFileLocatorBackend rlocator);
extern void CacheInvalidateRelmap(Oid databaseId);
@@ -59,6 +62,9 @@ extern void CacheRegisterSyscacheCallback(int cacheid,
extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
Datum arg);
+extern void CacheRegisterUsercacheCallback(UsercacheCallbackFunction func,
+ Datum arg);
+
extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue);
extern void InvalidateSystemCaches(void);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 040968d6ff2..9ce2a266dce 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -101,6 +101,8 @@ extern void make_icu_collator(const char *iculocstr,
extern bool pg_locale_deterministic(pg_locale_t locale);
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
+typedef bool (*pg_newlocale_from_collation_hook_type)();
+extern pg_newlocale_from_collation_hook_type pg_newlocale_from_collation_hook;
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
extern int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale);
diff --git a/src/include/utils/resowner_private.h b/src/include/utils/resowner_private.h
new file mode 100644
index 00000000000..d32a3a42ef0
--- /dev/null
+++ b/src/include/utils/resowner_private.h
@@ -0,0 +1,33 @@
+/*-------------------------------------------------------------------------
+ *
+ * resowner_private.h
+ * POSTGRES resource owner private definitions.
+ *
+ * See utils/resowner/README for more info.
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/resowner_private.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RESOWNER_PRIVATE_H
+#define RESOWNER_PRIVATE_H
+
+#include "storage/dsm.h"
+#include "storage/fd.h"
+#include "storage/lock.h"
+#include "utils/catcache.h"
+#include "utils/plancache.h"
+#include "utils/resowner.h"
+#include "utils/snapshot.h"
+
+
+extern void ResourceOwnerRememberCatCacheRef(ResourceOwner owner,
+ HeapTuple tuple);
+extern void ResourceOwnerRememberCatCacheListRef(ResourceOwner owner,
+ CatCList *list);
+
+#endif /* RESOWNER_PRIVATE_H */
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index 9398a84051c..3f6952d9895 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -18,6 +18,9 @@
#include "utils/resowner.h"
#include "utils/snapshot.h"
+#ifndef SNAPSHOT_H
+typedef void (*snapshot_hook_type) (Snapshot snapshot);
+#endif
extern PGDLLIMPORT bool FirstSnapshotSet;
@@ -78,7 +81,7 @@ extern void PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level);
extern void PushCopiedSnapshot(Snapshot snapshot);
extern void UpdateActiveSnapshotCommandId(void);
extern void PopActiveSnapshot(void);
-extern Snapshot GetActiveSnapshot(void);
+extern PGDLLIMPORT Snapshot GetActiveSnapshot(void);
extern bool ActiveSnapshotSet(void);
extern Snapshot RegisterSnapshot(Snapshot snapshot);
@@ -127,4 +130,10 @@ extern void SerializeSnapshot(Snapshot snapshot, char *start_address);
extern Snapshot RestoreSnapshot(char *start_address);
extern void RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc);
+typedef void (*reset_xmin_hook_type) (void);
+
+extern snapshot_hook_type snapshot_register_hook;
+extern snapshot_hook_type snapshot_deregister_hook;
+extern reset_xmin_hook_type reset_xmin_hook;
+
#endif /* SNAPMGR_H */
diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h
index 8d1e31e888e..9eec035622d 100644
--- a/src/include/utils/snapshot.h
+++ b/src/include/utils/snapshot.h
@@ -122,6 +122,20 @@ typedef struct SnapshotData *Snapshot;
#define InvalidSnapshot ((Snapshot) NULL)
+typedef struct
+{
+ uint64 undoLocation; /* undo log location retained by this snapshot */
+ uint64 xmin;
+ pairingheap_node ph_node;
+} RetainUndoLocationPHNode;
+
+typedef struct CSNSnapshotData
+{
+ uint64 xmin;
+ CommitSeqNo snapshotcsn;
+ XLogRecPtr xlogptr;
+} CSNSnapshotData;
+
/*
* Struct representing all kind of possible snapshots.
*
@@ -214,6 +228,12 @@ typedef struct SnapshotData
* transactions completed since the last GetSnapshotData().
*/
uint64 snapXactCompletionCount;
+
+ RetainUndoLocationPHNode undoRegularLocationPhNode;
+ RetainUndoLocationPHNode undoSystemLocationPhNode;
+ CSNSnapshotData csnSnapshotData;
} SnapshotData;
+typedef void (*snapshot_hook_type) (Snapshot snapshot);
+
#endif /* SNAPSHOT_H */
diff --git a/src/include/utils/tuplestore.h b/src/include/utils/tuplestore.h
index 419613c17ba..cf291a0d17a 100644
--- a/src/include/utils/tuplestore.h
+++ b/src/include/utils/tuplestore.h
@@ -70,6 +70,9 @@ extern bool tuplestore_in_memory(Tuplestorestate *state);
extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
bool copy, TupleTableSlot *slot);
+extern bool tuplestore_force_gettupleslot(Tuplestorestate *state, bool forward,
+ bool copy, TupleTableSlot *slot);
+
extern bool tuplestore_advance(Tuplestorestate *state, bool forward);
extern bool tuplestore_skiptuples(Tuplestorestate *state,
diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h
index f506cc4aa35..7c84978b7fa 100644
--- a/src/include/utils/typcache.h
+++ b/src/include/utils/typcache.h
@@ -207,4 +207,9 @@ extern void SharedRecordTypmodRegistryInit(SharedRecordTypmodRegistry *,
extern void SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *);
+typedef void (*load_typcache_tupdesc_hook_type)(TypeCacheEntry *typentry);
+extern PGDLLIMPORT load_typcache_tupdesc_hook_type load_typcache_tupdesc_hook;
+typedef void (*load_enum_cache_data_hook_type)(TypeCacheEntry *tcache);
+extern PGDLLIMPORT load_enum_cache_data_hook_type load_enum_cache_data_hook;
+
#endif /* TYPCACHE_H */
diff --git a/src/include/varatt.h b/src/include/varatt.h
index f04435e9ef3..0b4c09e639d 100644
--- a/src/include/varatt.h
+++ b/src/include/varatt.h
@@ -38,6 +38,25 @@ typedef struct varatt_external
Oid va_toastrelid; /* RelID of TOAST table containing it */
} varatt_external;
+typedef struct OToastExternal
+{
+ uint16 data_size; /* length of OToastExternal data */
+ int16 attnum;
+ int32 raw_size; /* original data size */
+ int32 toasted_size; /* compressed original data size */
+ /* for fetching data from TOAST tree */
+ CommitSeqNo csn;
+ /* for finding TOAST tree */
+ Oid datoid;
+ Oid relid;
+ Oid relnode;
+ /* for storing primary index tuple */
+ uint8 formatFlags; /* primary index tuple flags */
+ char data[FLEXIBLE_ARRAY_MEMBER]; /* data (primary index tuple) */
+} OToastExternal;
+
+#define ORIOLEDB_EXT_FORMAT_FLAGS_BITS 6
+
/*
* These macros define the "saved size" portion of va_extinfo. Its remaining
* two high-order bits identify the compression method.
@@ -86,17 +105,21 @@ typedef enum vartag_external
VARTAG_INDIRECT = 1,
VARTAG_EXPANDED_RO = 2,
VARTAG_EXPANDED_RW = 3,
- VARTAG_ONDISK = 18
+ VARTAG_ONDISK = 18,
+ VARTAG_ORIOLEDB = 34
} vartag_external;
/* this test relies on the specific tag values above */
#define VARTAG_IS_EXPANDED(tag) \
(((tag) & ~1) == VARTAG_EXPANDED_RO)
+#define O_TOAST_EXTERNAL_SZ offsetof(OToastExternal, data)
+
#define VARTAG_SIZE(tag) \
((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \
VARTAG_IS_EXPANDED(tag) ? sizeof(varatt_expanded) : \
(tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \
+ (tag) == VARTAG_ORIOLEDB ? O_TOAST_EXTERNAL_SZ : \
(AssertMacro(false), 0))
/*
@@ -282,11 +305,16 @@ typedef struct
#define VARDATA_SHORT(PTR) VARDATA_1B(PTR)
#define VARTAG_EXTERNAL(PTR) VARTAG_1B_E(PTR)
-#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR)))
+#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR)) \
+ + (VARATT_IS_EXTERNAL_ORIOLEDB(PTR) ? \
+ *((uint16 *) VARDATA_1B_E(PTR)) \
+ : 0))
+
#define VARDATA_EXTERNAL(PTR) VARDATA_1B_E(PTR)
#define VARATT_IS_COMPRESSED(PTR) VARATT_IS_4B_C(PTR)
#define VARATT_IS_EXTERNAL(PTR) VARATT_IS_1B_E(PTR)
+
#define VARATT_IS_EXTERNAL_ONDISK(PTR) \
(VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK)
#define VARATT_IS_EXTERNAL_INDIRECT(PTR) \
@@ -299,6 +327,9 @@ typedef struct
(VARATT_IS_EXTERNAL(PTR) && VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)))
#define VARATT_IS_EXTERNAL_NON_EXPANDED(PTR) \
(VARATT_IS_EXTERNAL(PTR) && !VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)))
+#define VARATT_IS_EXTERNAL_ORIOLEDB(PTR) \
+ (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ORIOLEDB)
+
#define VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR)
#define VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR))
diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build
index 5618050b306..192d3303f55 100644
--- a/src/makefiles/meson.build
+++ b/src/makefiles/meson.build
@@ -37,6 +37,7 @@ pgxs_kv = {
'PACKAGE_VERSION': pg_version,
'PG_MAJORVERSION': pg_version_major,
'PG_VERSION_NUM': pg_version_num,
+ 'ORIOLEDB_PATCHSET_VERSION': orioledb_patchset_version,
'configure_input': 'meson',
'vpath_build': 'yes',
diff --git a/src/test/isolation/expected/eval-plan-qual-2.out b/src/test/isolation/expected/eval-plan-qual-2.out
new file mode 100644
index 00000000000..117a3d3be8d
--- /dev/null
+++ b/src/test/isolation/expected/eval-plan-qual-2.out
@@ -0,0 +1,37 @@
+Parsed test spec with 3 sessions
+
+starting permutation: read_u wx2 wb1 c2 c1 read_u read
+step read_u: SELECT * FROM accounts;
+accountid|balance|balance2
+---------+-------+--------
+checking | 600| 1200
+savings | 600| 1200
+(2 rows)
+
+step wx2: UPDATE accounts SET balance = balance + 450 WHERE accountid = 'checking' RETURNING balance;
+balance
+-------
+ 1050
+(1 row)
+
+step wb1: DELETE FROM accounts WHERE balance = 600 RETURNING *;
+step c2: COMMIT;
+step wb1: <... completed>
+accountid|balance|balance2
+---------+-------+--------
+savings | 600| 1200
+(1 row)
+
+step c1: COMMIT;
+step read_u: SELECT * FROM accounts;
+accountid|balance|balance2
+---------+-------+--------
+checking | 1050| 2100
+(1 row)
+
+step read: SELECT * FROM accounts ORDER BY accountid;
+accountid|balance|balance2
+---------+-------+--------
+checking | 1050| 2100
+(1 row)
+
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 143109aa4da..f4df2146488 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -36,6 +36,7 @@ test: fk-partitioned-2
test: fk-snapshot
test: subxid-overflow
test: eval-plan-qual
+test: eval-plan-qual-2
test: eval-plan-qual-trigger
test: inplace-inval
test: intra-grant-inplace
diff --git a/src/test/isolation/specs/eval-plan-qual-2.spec b/src/test/isolation/specs/eval-plan-qual-2.spec
new file mode 100644
index 00000000000..30447bef24a
--- /dev/null
+++ b/src/test/isolation/specs/eval-plan-qual-2.spec
@@ -0,0 +1,30 @@
+setup
+{
+ CREATE TABLE accounts (accountid text PRIMARY KEY, balance numeric not null,
+ balance2 numeric GENERATED ALWAYS AS (balance * 2) STORED);
+ INSERT INTO accounts VALUES ('checking', 600), ('savings', 600);
+}
+
+teardown
+{
+ DROP TABLE accounts;
+}
+
+session s1
+setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step wb1 { DELETE FROM accounts WHERE balance = 600 RETURNING *; }
+step c1 { COMMIT; }
+
+session s2
+setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step wx2 { UPDATE accounts SET balance = balance + 450 WHERE accountid = 'checking' RETURNING balance; }
+step c2 { COMMIT; }
+
+session s3
+setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
+step read { SELECT * FROM accounts ORDER BY accountid; }
+step read_u { SELECT * FROM accounts; }
+
+teardown { COMMIT; }
+
+permutation read_u wx2 wb1 c2 c1 read_u read
diff --git a/src/test/modules/dummy_index_am/dummy_index_am.c b/src/test/modules/dummy_index_am/dummy_index_am.c
index 18185d02067..1c6825f391a 100644
--- a/src/test/modules/dummy_index_am/dummy_index_am.c
+++ b/src/test/modules/dummy_index_am/dummy_index_am.c
@@ -164,7 +164,7 @@ dibuildempty(Relation index)
*/
static bool
diinsert(Relation index, Datum *values, bool *isnull,
- ItemPointer ht_ctid, Relation heapRel,
+ Datum tupleid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
@@ -303,7 +303,8 @@ dihandler(PG_FUNCTION_ARGS)
amroutine->ambuild = dibuild;
amroutine->ambuildempty = dibuildempty;
- amroutine->aminsert = diinsert;
+ amroutine->aminsert = NULL;
+ amroutine->aminsertextended = diinsert;
amroutine->ambulkdelete = dibulkdelete;
amroutine->amvacuumcleanup = divacuumcleanup;
amroutine->amcanreturn = NULL;
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index cf6eac57349..1f74afeca8f 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -1233,6 +1233,24 @@ SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA';
14
(1 row)
+-- OR-clauses shouldn't be transformed into SAOP because hash indexes don't
+-- support SAOP scans.
+SET enable_seqscan = off;
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM tenk1 WHERE stringu1 = 'TVAAAA' OR stringu1 = 'TVAAAB';
+ QUERY PLAN
+------------------------------------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tenk1
+ Recheck Cond: ((stringu1 = 'TVAAAA'::name) OR (stringu1 = 'TVAAAB'::name))
+ -> BitmapOr
+ -> Bitmap Index Scan on hash_tuplesort_idx
+ Index Cond: (stringu1 = 'TVAAAA'::name)
+ -> Bitmap Index Scan on hash_tuplesort_idx
+ Index Cond: (stringu1 = 'TVAAAB'::name)
+(8 rows)
+
+RESET enable_seqscan;
DROP INDEX hash_tuplesort_idx;
RESET maintenance_work_mem;
--
@@ -1843,19 +1861,122 @@ DROP TABLE onek_with_null;
--
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1
- WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
- QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous = 0);
+ QUERY PLAN
+--------------------------------------------------------------------------------
+ Index Scan using tenk1_thous_tenthous on tenk1
+ Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42,0}'::integer[])))
+(2 rows)
+
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous = 0);
+ unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
+---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
+ 42 | 5530 | 0 | 2 | 2 | 2 | 42 | 42 | 42 | 42 | 42 | 84 | 85 | QBAAAA | SEIAAA | OOOOxx
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = (SELECT 1 + 2) OR tenthous = 42);
+ QUERY PLAN
+----------------------------------------------------------------------------------------
+ Index Scan using tenk1_thous_tenthous on tenk1
+ Index Cond: ((thousand = 42) AND (tenthous = ANY (ARRAY[1, (InitPlan 1).col1, 42])))
+ InitPlan 1
+ -> Result
+(4 rows)
+
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = (SELECT 1 + 2) OR tenthous = 42);
+ unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
+---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
+ 42 | 5530 | 0 | 2 | 2 | 2 | 42 | 42 | 42 | 42 | 42 | 84 | 85 | QBAAAA | SEIAAA | OOOOxx
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous IS NULL);
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on tenk1
- Recheck Cond: (((thousand = 42) AND (tenthous = 1)) OR ((thousand = 42) AND (tenthous = 3)) OR ((thousand = 42) AND (tenthous = 42)))
+ Recheck Cond: (((thousand = 42) AND (tenthous IS NULL)) OR ((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))))
+ Filter: ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42) OR (tenthous IS NULL))
-> BitmapOr
-> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous = 1))
+ Index Cond: ((thousand = 42) AND (tenthous IS NULL))
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[])))
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous::int2 = 3::int8 OR tenthous = 42::int8);
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------
+ Bitmap Heap Scan on tenk1
+ Recheck Cond: (thousand = 42)
+ Filter: ((tenthous = '1'::smallint) OR ((tenthous)::smallint = '3'::bigint) OR (tenthous = '42'::bigint))
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: (thousand = 42)
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous::int2 = 3::int8 OR tenthous::int2 = 42::int8);
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------
+ Bitmap Heap Scan on tenk1
+ Recheck Cond: (thousand = 42)
+ Filter: ((tenthous = '1'::smallint) OR ((tenthous)::smallint = '3'::bigint) OR ((tenthous)::smallint = '42'::bigint))
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: (thousand = 42)
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous = 3::int8 OR tenthous = 42::int8);
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------------------
+ Bitmap Heap Scan on tenk1
+ Recheck Cond: (((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))) OR ((thousand = 42) AND (tenthous = '1'::smallint)))
+ Filter: ((tenthous = '1'::smallint) OR (tenthous = '3'::bigint) OR (tenthous = '42'::bigint))
+ -> BitmapOr
-> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous = 3))
+ Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[])))
-> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous = 42))
-(9 rows)
+ Index Cond: ((thousand = 42) AND (tenthous = '1'::smallint))
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
+ QUERY PLAN
+---------------------------------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tenk1
+ Recheck Cond: ((hundred = 42) AND ((thousand = 42) OR (thousand = 99)))
+ -> BitmapAnd
+ -> Bitmap Index Scan on tenk1_hundred
+ Index Cond: (hundred = 42)
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: (thousand = ANY ('{42,99}'::integer[]))
+(8 rows)
+
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
+ count
+-------
+ 10
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Index Scan using tenk1_thous_tenthous on tenk1
+ Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[])))
+(2 rows)
SELECT * FROM tenk1
WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
@@ -1864,6 +1985,27 @@ SELECT * FROM tenk1
42 | 5530 | 0 | 2 | 2 | 2 | 42 | 42 | 42 | 42 | 42 | 84 | 85 | QBAAAA | SEIAAA | OOOOxx
(1 row)
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1::numeric OR tenthous = 3::int4 OR tenthous = 42::numeric);
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------
+ Bitmap Heap Scan on tenk1
+ Recheck Cond: (thousand = 42)
+ Filter: (((tenthous)::numeric = '1'::numeric) OR (tenthous = 3) OR ((tenthous)::numeric = '42'::numeric))
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: (thousand = 42)
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE tenthous = 1::numeric OR tenthous = 3::int4 OR tenthous = 42::numeric;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------
+ Seq Scan on tenk1
+ Filter: (((tenthous)::numeric = '1'::numeric) OR (tenthous = 3) OR ((tenthous)::numeric = '42'::numeric))
+(2 rows)
+
EXPLAIN (COSTS OFF)
SELECT count(*) FROM tenk1
WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
@@ -1872,23 +2014,191 @@ SELECT count(*) FROM tenk1
Aggregate
-> Bitmap Heap Scan on tenk1
Recheck Cond: ((hundred = 42) AND ((thousand = 42) OR (thousand = 99)))
+ -> BitmapAnd
+ -> Bitmap Index Scan on tenk1_hundred
+ Index Cond: (hundred = 42)
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: (thousand = ANY ('{42,99}'::integer[]))
+(8 rows)
+
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
+ count
+-------
+ 10
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand < 42 OR thousand < 99 OR 43 > thousand OR 42 > thousand);
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tenk1
+ Recheck Cond: ((hundred = 42) AND ((thousand < 42) OR (thousand < 99) OR (43 > thousand) OR (42 > thousand)))
+ -> BitmapAnd
+ -> Bitmap Index Scan on tenk1_hundred
+ Index Cond: (hundred = 42)
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: (thousand < ANY ('{42,99,43,42}'::integer[]))
+(8 rows)
+
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand < 42 OR thousand < 99 OR 43 > thousand OR 42 > thousand);
+ count
+-------
+ 10
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3) OR thousand = 41;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tenk1
+ Recheck Cond: (((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3))) OR (thousand = 41))
+ -> BitmapOr
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3}'::integer[])))
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: (thousand = 41)
+(8 rows)
+
+SELECT count(*) FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3) OR thousand = 41;
+ count
+-------
+ 10
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tenk1
+ Recheck Cond: (((hundred = 42) AND (((thousand = 42) OR (thousand = 99)) OR (tenthous < 2))) OR (thousand = 41))
+ Filter: (((hundred = 42) AND ((thousand = 42) OR (thousand = 99) OR (tenthous < 2))) OR (thousand = 41))
+ -> BitmapOr
+ -> BitmapAnd
+ -> Bitmap Index Scan on tenk1_hundred
+ Index Cond: (hundred = 42)
+ -> BitmapOr
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: (thousand = ANY ('{42,99}'::integer[]))
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: (tenthous < 2)
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: (thousand = 41)
+(15 rows)
+
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41;
+ count
+-------
+ 20
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2);
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tenk1
+ Recheck Cond: ((hundred = 42) AND (((thousand = 99) AND (tenthous = 2)) OR ((thousand = 42) OR (thousand = 41))))
+ Filter: ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2)))
-> BitmapAnd
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 42)
-> BitmapOr
-> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: (thousand = 42)
+ Index Cond: ((thousand = 99) AND (tenthous = 2))
-> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: (thousand = 99)
-(11 rows)
+ Index Cond: (thousand = ANY ('{42,41}'::integer[]))
+(12 rows)
SELECT count(*) FROM tenk1
- WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2);
count
-------
10
(1 row)
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1, tenk2
+ WHERE tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk1.thousand = 41 OR tenk2.tenthous = 2) AND
+ tenk2.hundred = tenk1.hundred;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------
+ Aggregate
+ -> Nested Loop
+ Join Filter: ((tenk2.thousand = 42) OR (tenk1.thousand = 41) OR (tenk2.tenthous = 2))
+ -> Bitmap Heap Scan on tenk1
+ Recheck Cond: (hundred = 42)
+ -> Bitmap Index Scan on tenk1_hundred
+ Index Cond: (hundred = 42)
+ -> Materialize
+ -> Bitmap Heap Scan on tenk2
+ Recheck Cond: (hundred = 42)
+ -> Bitmap Index Scan on tenk2_hundred
+ Index Cond: (hundred = 42)
+(12 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1, tenk2
+ WHERE tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND
+ tenk2.hundred = tenk1.hundred;
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Aggregate
+ -> Nested Loop
+ -> Bitmap Heap Scan on tenk2
+ Recheck Cond: (hundred = 42)
+ Filter: ((thousand = 42) OR (thousand = 41) OR (tenthous = 2))
+ -> Bitmap Index Scan on tenk2_hundred
+ Index Cond: (hundred = 42)
+ -> Index Only Scan using tenk1_hundred on tenk1
+ Index Cond: (hundred = 42)
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1 JOIN tenk2 ON
+ tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND
+ tenk2.hundred = tenk1.hundred;
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Aggregate
+ -> Nested Loop
+ -> Bitmap Heap Scan on tenk2
+ Recheck Cond: (hundred = 42)
+ Filter: ((thousand = 42) OR (thousand = 41) OR (tenthous = 2))
+ -> Bitmap Index Scan on tenk2_hundred
+ Index Cond: (hundred = 42)
+ -> Index Only Scan using tenk1_hundred on tenk1
+ Index Cond: (hundred = 42)
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1 LEFT JOIN tenk2 ON
+ tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND
+ tenk2.hundred = tenk1.hundred;
+ QUERY PLAN
+------------------------------------------------------------------------------------
+ Aggregate
+ -> Nested Loop Left Join
+ Join Filter: (tenk1.hundred = 42)
+ -> Index Only Scan using tenk1_hundred on tenk1
+ -> Memoize
+ Cache Key: tenk1.hundred
+ Cache Mode: logical
+ -> Index Scan using tenk2_hundred on tenk2
+ Index Cond: (hundred = tenk1.hundred)
+ Filter: ((thousand = 42) OR (thousand = 41) OR (tenthous = 2))
+(10 rows)
+
--
-- Check behavior with duplicate index column contents
--
@@ -2904,6 +3214,49 @@ SELECT b.relname,
(2 rows)
DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before;
+-- Check bitmap scan can consider similar OR arguments separately without
+-- grouping them into SAOP.
+CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL);
+INSERT INTO bitmap_split_or (SELECT 1, 1, i FROM generate_series(1, 1000) i);
+INSERT INTO bitmap_split_or (select i, 2, 2 FROM generate_series(1, 1000) i);
+VACUUM ANALYZE bitmap_split_or;
+CREATE INDEX t_b_partial_1_idx ON bitmap_split_or (b) WHERE a = 1;
+CREATE INDEX t_b_partial_2_idx ON bitmap_split_or (b) WHERE a = 2;
+EXPLAIN (COSTS OFF)
+SELECT * FROM bitmap_split_or WHERE (a = 1 OR a = 2) AND b = 2;
+ QUERY PLAN
+------------------------------------------------------------------
+ Bitmap Heap Scan on bitmap_split_or
+ Recheck Cond: (((b = 2) AND (a = 1)) OR ((b = 2) AND (a = 2)))
+ -> BitmapOr
+ -> Bitmap Index Scan on t_b_partial_1_idx
+ Index Cond: (b = 2)
+ -> Bitmap Index Scan on t_b_partial_2_idx
+ Index Cond: (b = 2)
+(7 rows)
+
+DROP INDEX t_b_partial_1_idx;
+DROP INDEX t_b_partial_2_idx;
+CREATE INDEX t_a_b_idx ON bitmap_split_or (a, b);
+CREATE INDEX t_b_c_idx ON bitmap_split_or (b, c);
+CREATE STATISTICS t_a_b_stat (mcv) ON a, b FROM bitmap_split_or;
+CREATE STATISTICS t_b_c_stat (mcv) ON b, c FROM bitmap_split_or;
+ANALYZE bitmap_split_or;
+EXPLAIN (COSTS OFF)
+SELECT * FROM bitmap_split_or WHERE a = 1 AND (b = 1 OR b = 2) AND c = 2;
+ QUERY PLAN
+------------------------------------------------------------------
+ Bitmap Heap Scan on bitmap_split_or
+ Recheck Cond: (((b = 1) AND (c = 2)) OR ((a = 1) AND (b = 2)))
+ Filter: ((a = 1) AND (c = 2))
+ -> BitmapOr
+ -> Bitmap Index Scan on t_b_c_idx
+ Index Cond: ((b = 1) AND (c = 2))
+ -> Bitmap Index Scan on t_a_b_idx
+ Index Cond: ((a = 1) AND (b = 2))
+(8 rows)
+
+DROP TABLE bitmap_split_or;
--
-- REINDEX SCHEMA
--
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 8d1d3ec1dcf..f1664516bf7 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4225,20 +4225,20 @@ select * from tenk1 a join tenk1 b on
Nested Loop
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4)))
-> Bitmap Heap Scan on tenk1 b
- Recheck Cond: ((unique1 = 2) OR (hundred = 4))
+ Recheck Cond: ((hundred = 4) OR (unique1 = 2))
-> BitmapOr
- -> Bitmap Index Scan on tenk1_unique1
- Index Cond: (unique1 = 2)
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 4)
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (unique1 = 2)
-> Materialize
-> Bitmap Heap Scan on tenk1 a
- Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
+ Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
-> BitmapOr
- -> Bitmap Index Scan on tenk1_unique1
- Index Cond: (unique1 = 1)
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = 3)
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (unique1 = 1)
(17 rows)
explain (costs off)
@@ -4252,12 +4252,12 @@ select * from tenk1 a join tenk1 b on
Filter: ((unique1 = 2) OR (ten = 4))
-> Materialize
-> Bitmap Heap Scan on tenk1 a
- Recheck Cond: ((unique1 = 1) OR (unique2 = 3))
+ Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
-> BitmapOr
- -> Bitmap Index Scan on tenk1_unique1
- Index Cond: (unique1 = 1)
-> Bitmap Index Scan on tenk1_unique2
Index Cond: (unique2 = 3)
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (unique1 = 1)
(12 rows)
explain (costs off)
@@ -4269,23 +4269,70 @@ select * from tenk1 a join tenk1 b on
Nested Loop
Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
-> Bitmap Heap Scan on tenk1 b
- Recheck Cond: ((unique1 = 2) OR (hundred = 4))
+ Recheck Cond: ((hundred = 4) OR (unique1 = 2))
-> BitmapOr
+ -> Bitmap Index Scan on tenk1_hundred
+ Index Cond: (hundred = 4)
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 2)
+ -> Materialize
+ -> Bitmap Heap Scan on tenk1 a
+ Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1))
+ Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
+ -> BitmapOr
+ -> Bitmap Index Scan on tenk1_unique2
+ Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (unique1 = 1)
+(18 rows)
+
+explain (costs off)
+select * from tenk1 a join tenk1 b on
+ (a.unique1 = 1 and b.unique1 = 2) or
+ ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4);
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------------------
+ Nested Loop
+ Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
+ -> Bitmap Heap Scan on tenk1 b
+ Recheck Cond: ((hundred = 4) OR (unique1 = 2))
+ -> BitmapOr
-> Bitmap Index Scan on tenk1_hundred
Index Cond: (hundred = 4)
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (unique1 = 2)
-> Materialize
-> Bitmap Heap Scan on tenk1 a
- Recheck Cond: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
+ Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1))
+ Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
-> BitmapOr
+ -> Bitmap Index Scan on tenk1_unique2
+ Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
-> Bitmap Index Scan on tenk1_unique1
Index Cond: (unique1 = 1)
+(18 rows)
+
+explain (costs off)
+select * from tenk1 a join tenk1 b on
+ (a.unique1 < 20 or a.unique1 = 3 or a.unique1 = 1 and b.unique1 = 2) or
+ ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4);
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Nested Loop
+ Join Filter: ((a.unique1 < 20) OR (a.unique1 = 3) OR ((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4)))
+ -> Seq Scan on tenk1 b
+ -> Materialize
+ -> Bitmap Heap Scan on tenk1 a
+ Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR ((unique1 = 3) OR (unique1 = 1)) OR (unique1 < 20))
+ Filter: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = 3) OR (unique2 = 7))
+ -> BitmapOr
-> Bitmap Index Scan on tenk1_unique2
- Index Cond: (unique2 = 3)
- -> Bitmap Index Scan on tenk1_unique2
- Index Cond: (unique2 = 7)
-(19 rows)
+ Index Cond: (unique2 = ANY ('{3,7}'::integer[]))
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (unique1 = ANY ('{3,1}'::integer[]))
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (unique1 < 20)
+(14 rows)
--
-- test placement of movable quals in a parameterized join tree
diff --git a/src/test/regress/expected/jsonb_jsonpath.out b/src/test/regress/expected/jsonb_jsonpath.out
index 57c117ea580..8cf6ecfc7f8 100644
--- a/src/test/regress/expected/jsonb_jsonpath.out
+++ b/src/test/regress/expected/jsonb_jsonpath.out
@@ -2634,12 +2634,16 @@ select jsonb_path_query('"12:34:56 +5:30"', '$.time_tz().string()');
"12:34:56+05:30"
(1 row)
+-- this timetz usage will absorb the UTC offset of the current timezone setting
+begin;
+set local timezone = 'UTC-10';
select jsonb_path_query_tz('"12:34:56"', '$.time_tz().string()');
jsonb_path_query_tz
---------------------
- "12:34:56-07:00"
+ "12:34:56+10:00"
(1 row)
+rollback;
select jsonb_path_query('"12:34:56"', '$.time().string()');
jsonb_path_query
------------------
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index 319190855bd..ef890b96cc6 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -4492,6 +4492,13 @@ SELECT * FROM rls_tbl WHERE a <<< 1000;
---
(0 rows)
+EXPLAIN (COSTS OFF) SELECT * FROM rls_tbl WHERE a <<< 1000 or a <<< 900;
+ QUERY PLAN
+--------------------------
+ Result
+ One-Time Filter: false
+(2 rows)
+
DROP OPERATOR <<< (int, int);
DROP FUNCTION op_leak(int, int);
RESET SESSION AUTHORIZATION;
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 8c4da955084..a4c7be487ef 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -3254,6 +3254,8 @@ CREATE OPERATOR <<< (procedure = op_leak, leftarg = int, rightarg = int,
restrict = scalarltsel);
SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied
ERROR: permission denied for table priv_test_tbl
+SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0;
+ERROR: permission denied for table priv_test_tbl
DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied
ERROR: permission denied for table priv_test_tbl
-- Grant access via a security barrier view, but hide all data
@@ -3268,6 +3270,11 @@ SELECT * FROM tststats.priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not l
---+---
(0 rows)
+SELECT * FROM tststats.priv_test_view WHERE a <<< 0 OR b <<< 0; -- Should not leak
+ a | b
+---+---
+(0 rows)
+
DELETE FROM tststats.priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak
-- Grant table access, but hide all data with RLS
RESET SESSION AUTHORIZATION;
@@ -3280,6 +3287,11 @@ SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not le
---+---
(0 rows)
+SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0;
+ a | b
+---+---
+(0 rows)
+
DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak
-- privilege checks for pg_stats_ext and pg_stats_ext_exprs
RESET SESSION AUTHORIZATION;
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 6026e15ed31..8f4ef0d7a6a 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -129,6 +129,37 @@ CREATE INDEX guid1_btree ON guid1 USING BTREE (guid_field);
CREATE INDEX guid1_hash ON guid1 USING HASH (guid_field);
-- unique index test
CREATE UNIQUE INDEX guid1_unique_BTREE ON guid1 USING BTREE (guid_field);
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM guid1 WHERE guid_field <> '11111111111111111111111111111111' OR
+ guid_field <> '3f3e3c3b-3a30-3938-3736-353433a2313e';
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------------------------------------
+ Aggregate
+ -> Seq Scan on guid1
+ Filter: ((guid_field <> '11111111-1111-1111-1111-111111111111'::uuid) OR (guid_field <> '3f3e3c3b-3a30-3938-3736-353433a2313e'::uuid))
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM guid1 WHERE guid_field <= '22222222-2222-2222-2222-222222222222' OR
+ guid_field <= '11111111111111111111111111111111' OR
+ guid_field <= '3f3e3c3b-3a30-3938-3736-353433a2313e';
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Aggregate
+ -> Seq Scan on guid1
+ Filter: ((guid_field <= '22222222-2222-2222-2222-222222222222'::uuid) OR (guid_field <= '11111111-1111-1111-1111-111111111111'::uuid) OR (guid_field <= '3f3e3c3b-3a30-3938-3736-353433a2313e'::uuid))
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM guid1 WHERE guid_field = '3f3e3c3b-3a30-3938-3736-353433a2313e' OR
+ guid_field = '11111111111111111111111111111111';
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------------------------------------------
+ Aggregate
+ -> Seq Scan on guid1
+ Filter: ((guid_field = '3f3e3c3b-3a30-3938-3736-353433a2313e'::uuid) OR (guid_field = '11111111-1111-1111-1111-111111111111'::uuid))
+(3 rows)
+
-- should fail
INSERT INTO guid1(guid_field) VALUES('11111111-1111-1111-1111-111111111111');
ERROR: duplicate key value violates unique constraint "guid1_unique_btree"
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 45a6ad3c49e..7b8e91d07b3 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -606,7 +606,7 @@ make_tuple_indirect(PG_FUNCTION_ARGS)
continue;
/* copy datum, so it still lives later */
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr))
attr = detoast_external_attr(attr);
else
{
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index e296891cab8..6b683da30f9 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -372,6 +372,12 @@ CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fi
EXPLAIN (COSTS OFF)
SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA';
SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA';
+-- OR-clauses shouldn't be transformed into SAOP because hash indexes don't
+-- support SAOP scans.
+SET enable_seqscan = off;
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM tenk1 WHERE stringu1 = 'TVAAAA' OR stringu1 = 'TVAAAB';
+RESET enable_seqscan;
DROP INDEX hash_tuplesort_idx;
RESET maintenance_work_mem;
@@ -726,18 +732,104 @@ DROP TABLE onek_with_null;
-- Check bitmap index path planning
--
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous = 0);
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous = 0);
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = (SELECT 1 + 2) OR tenthous = 42);
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = (SELECT 1 + 2) OR tenthous = 42);
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous IS NULL);
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous::int2 = 3::int8 OR tenthous = 42::int8);
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous::int2 = 3::int8 OR tenthous::int2 = 42::int8);
+
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous = 3::int8 OR tenthous = 42::int8);
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
+
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1
WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
SELECT * FROM tenk1
WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1::numeric OR tenthous = 3::int4 OR tenthous = 42::numeric);
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM tenk1
+ WHERE tenthous = 1::numeric OR tenthous = 3::int4 OR tenthous = 42::numeric;
+
EXPLAIN (COSTS OFF)
SELECT count(*) FROM tenk1
WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
SELECT count(*) FROM tenk1
WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand < 42 OR thousand < 99 OR 43 > thousand OR 42 > thousand);
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand < 42 OR thousand < 99 OR 43 > thousand OR 42 > thousand);
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3) OR thousand = 41;
+SELECT count(*) FROM tenk1
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3) OR thousand = 41;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41;
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2);
+SELECT count(*) FROM tenk1
+ WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2);
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1, tenk2
+ WHERE tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk1.thousand = 41 OR tenk2.tenthous = 2) AND
+ tenk2.hundred = tenk1.hundred;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1, tenk2
+ WHERE tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND
+ tenk2.hundred = tenk1.hundred;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1 JOIN tenk2 ON
+ tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND
+ tenk2.hundred = tenk1.hundred;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1 LEFT JOIN tenk2 ON
+ tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND
+ tenk2.hundred = tenk1.hundred;
--
-- Check behavior with duplicate index column contents
--
@@ -1252,6 +1344,27 @@ SELECT b.relname,
ORDER BY 1;
DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before;
+-- Check bitmap scan can consider similar OR arguments separately without
+-- grouping them into SAOP.
+CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL);
+INSERT INTO bitmap_split_or (SELECT 1, 1, i FROM generate_series(1, 1000) i);
+INSERT INTO bitmap_split_or (select i, 2, 2 FROM generate_series(1, 1000) i);
+VACUUM ANALYZE bitmap_split_or;
+CREATE INDEX t_b_partial_1_idx ON bitmap_split_or (b) WHERE a = 1;
+CREATE INDEX t_b_partial_2_idx ON bitmap_split_or (b) WHERE a = 2;
+EXPLAIN (COSTS OFF)
+SELECT * FROM bitmap_split_or WHERE (a = 1 OR a = 2) AND b = 2;
+DROP INDEX t_b_partial_1_idx;
+DROP INDEX t_b_partial_2_idx;
+CREATE INDEX t_a_b_idx ON bitmap_split_or (a, b);
+CREATE INDEX t_b_c_idx ON bitmap_split_or (b, c);
+CREATE STATISTICS t_a_b_stat (mcv) ON a, b FROM bitmap_split_or;
+CREATE STATISTICS t_b_c_stat (mcv) ON b, c FROM bitmap_split_or;
+ANALYZE bitmap_split_or;
+EXPLAIN (COSTS OFF)
+SELECT * FROM bitmap_split_or WHERE a = 1 AND (b = 1 OR b = 2) AND c = 2;
+DROP TABLE bitmap_split_or;
+
--
-- REINDEX SCHEMA
--
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 8281bbd8ef8..b67b4caef23 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1433,6 +1433,15 @@ select * from tenk1 a join tenk1 b on
(a.unique1 = 1 and b.unique1 = 2) or
((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4);
+explain (costs off)
+select * from tenk1 a join tenk1 b on
+ (a.unique1 = 1 and b.unique1 = 2) or
+ ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4);
+explain (costs off)
+select * from tenk1 a join tenk1 b on
+ (a.unique1 < 20 or a.unique1 = 3 or a.unique1 = 1 and b.unique1 = 2) or
+ ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4);
+
--
-- test placement of movable quals in a parameterized join tree
--
diff --git a/src/test/regress/sql/jsonb_jsonpath.sql b/src/test/regress/sql/jsonb_jsonpath.sql
index c647af55e94..acb508c0dd2 100644
--- a/src/test/regress/sql/jsonb_jsonpath.sql
+++ b/src/test/regress/sql/jsonb_jsonpath.sql
@@ -596,7 +596,11 @@ select jsonb_path_query_tz('"2023-08-15 12:34:56"', '$.timestamp_tz().string()')
select jsonb_path_query('"2023-08-15 12:34:56 +5:30"', '$.timestamp_tz().string()');
select jsonb_path_query('"2023-08-15 12:34:56"', '$.timestamp().string()');
select jsonb_path_query('"12:34:56 +5:30"', '$.time_tz().string()');
+-- this timetz usage will absorb the UTC offset of the current timezone setting
+begin;
+set local timezone = 'UTC-10';
select jsonb_path_query_tz('"12:34:56"', '$.time_tz().string()');
+rollback;
select jsonb_path_query('"12:34:56"', '$.time().string()');
select jsonb_path_query('"2023-08-15"', '$.date().string()');
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql
index 3011d71b12b..6d2414b6044 100644
--- a/src/test/regress/sql/rowsecurity.sql
+++ b/src/test/regress/sql/rowsecurity.sql
@@ -2177,6 +2177,7 @@ CREATE FUNCTION op_leak(int, int) RETURNS bool
CREATE OPERATOR <<< (procedure = op_leak, leftarg = int, rightarg = int,
restrict = scalarltsel);
SELECT * FROM rls_tbl WHERE a <<< 1000;
+EXPLAIN (COSTS OFF) SELECT * FROM rls_tbl WHERE a <<< 1000 or a <<< 900;
DROP OPERATOR <<< (int, int);
DROP FUNCTION op_leak(int, int);
RESET SESSION AUTHORIZATION;
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 0c08a6cc42e..5c786b16c6f 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1634,6 +1634,7 @@ CREATE FUNCTION op_leak(int, int) RETURNS bool
CREATE OPERATOR <<< (procedure = op_leak, leftarg = int, rightarg = int,
restrict = scalarltsel);
SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied
+SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0;
DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied
-- Grant access via a security barrier view, but hide all data
@@ -1645,6 +1646,7 @@ GRANT SELECT, DELETE ON tststats.priv_test_view TO regress_stats_user1;
-- Should now have access via the view, but see nothing and leak nothing
SET SESSION AUTHORIZATION regress_stats_user1;
SELECT * FROM tststats.priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak
+SELECT * FROM tststats.priv_test_view WHERE a <<< 0 OR b <<< 0; -- Should not leak
DELETE FROM tststats.priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak
-- Grant table access, but hide all data with RLS
@@ -1655,6 +1657,7 @@ GRANT SELECT, DELETE ON tststats.priv_test_tbl TO regress_stats_user1;
-- Should now have direct table access, but see nothing and leak nothing
SET SESSION AUTHORIZATION regress_stats_user1;
SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak
+SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0;
DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak
-- privilege checks for pg_stats_ext and pg_stats_ext_exprs
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index c88f6d087a7..75ee966ded0 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -63,6 +63,18 @@ CREATE INDEX guid1_hash ON guid1 USING HASH (guid_field);
-- unique index test
CREATE UNIQUE INDEX guid1_unique_BTREE ON guid1 USING BTREE (guid_field);
+
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM guid1 WHERE guid_field <> '11111111111111111111111111111111' OR
+ guid_field <> '3f3e3c3b-3a30-3938-3736-353433a2313e';
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM guid1 WHERE guid_field <= '22222222-2222-2222-2222-222222222222' OR
+ guid_field <= '11111111111111111111111111111111' OR
+ guid_field <= '3f3e3c3b-3a30-3938-3736-353433a2313e';
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM guid1 WHERE guid_field = '3f3e3c3b-3a30-3938-3736-353433a2313e' OR
+ guid_field = '11111111111111111111111111111111';
+
-- should fail
INSERT INTO guid1(guid_field) VALUES('11111111-1111-1111-1111-111111111111');
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d4e9515e9f4..0131f9a8d43 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1761,6 +1761,7 @@ OprCacheKey
OprInfo
OprProofCacheEntry
OprProofCacheKey
+OrArgIndexMatch
OuterJoinClauseInfo
OutputPluginCallbacks
OutputPluginOptions
@@ -3266,6 +3267,7 @@ amgetbitmap_function
amgettuple_function
aminitparallelscan_function
aminsert_function
+aminsert_extended_function
aminsertcleanup_function
ammarkpos_function
amoptions_function