From 4fde8501878ba781c4b680974e5d42fc10ec20a6 Mon Sep 17 00:00:00 2001 From: Konstantina Skovola Date: Thu, 14 Mar 2024 11:33:11 +0200 Subject: [PATCH] Fix plantime chunk exclusion for OSM chunk --- src/dimension_slice.h | 2 + src/hypertable.c | 2 +- src/hypertable_restrict_info.c | 28 ++++++++---- tsl/test/expected/chunk_utils_internal.out | 53 +++++++++++++++++++--- tsl/test/sql/chunk_utils_internal.sql | 7 +++ 5 files changed, 75 insertions(+), 17 deletions(-) diff --git a/src/dimension_slice.h b/src/dimension_slice.h index 6c5fac6ce4b..83c2935b23c 100644 --- a/src/dimension_slice.h +++ b/src/dimension_slice.h @@ -114,3 +114,5 @@ extern int ts_dimension_slice_update_by_id(int32 dimension_slice_id, #define dimension_slice_collision_scan(dimension_id, range_start, range_end) \ ts_dimension_slice_collision_scan_limit(dimension_id, range_start, range_end, 0) + +DimensionSlice *ts_chunk_get_osm_slice_and_lock(int32 osm_chunk_id, int32 time_dim_id); diff --git a/src/hypertable.c b/src/hypertable.c index 1199ae96d63..dcbf04291ad 100644 --- a/src/hypertable.c +++ b/src/hypertable.c @@ -2431,7 +2431,7 @@ hypertable_update_status_osm(Hypertable *ht) return success; } -static DimensionSlice * +DimensionSlice * ts_chunk_get_osm_slice_and_lock(int32 osm_chunk_id, int32 time_dim_id) { ChunkConstraints *constraints = diff --git a/src/hypertable_restrict_info.c b/src/hypertable_restrict_info.c index b00c38f20fd..d9877cf8711 100644 --- a/src/hypertable_restrict_info.c +++ b/src/hypertable_restrict_info.c @@ -692,14 +692,6 @@ ts_hypertable_restrict_info_get_chunks(HypertableRestrictInfo *hri, Hypertable * chunk_ids = ts_chunk_id_find_in_subspace(ht, dimension_vectors); } - /* - * Always include the OSM chunk if we have one and OSM reads are - * enabled. It has some virtual dimension slices (at the moment, - * (+inf, +inf) slice for time, but it used to be different and might - * change again.) So sometimes it will match and sometimes it won't, - * so we have to check if it's already there not to add a duplicate. - * Similarly if OSM reads are disabled then we exclude the OSM chunk. - */ int32 osm_chunk_id = ts_chunk_get_osm_chunk_id(ht->fd.id); if (osm_chunk_id != INVALID_CHUNK_ID) @@ -710,7 +702,25 @@ ts_hypertable_restrict_info_get_chunks(HypertableRestrictInfo *hri, Hypertable * } else { - chunk_ids = list_append_unique_int(chunk_ids, osm_chunk_id); + /* + * At this point the OSM chunk was either: + * 1. added to the list because it has a valid range that agrees with the restrictions; + * 2. not added because it has a valid range and it was excluded; + * 3. not added because it has an invalid range and it was excluded. + * If the chunk's range is invalid, only then should we consider adding it, otherwise + * the exclusion logic should have correctly included or excluded it from the list. + * Also, if the range is invalid but the NONCONTIGUOUS flag is not set, indicating that the + * chunk is empty, we don't need to do a scan so we do not add it either. + */ + const Dimension *time_dim = hyperspace_get_open_dimension(ht->space, 0); + DimensionSlice *slice = + ts_chunk_get_osm_slice_and_lock(osm_chunk_id, time_dim->fd.id); + bool range_invalid = + ts_osm_chunk_range_is_invalid(slice->fd.range_start, slice->fd.range_end); + + if (range_invalid && + ts_flags_are_set_32(ht->fd.status, HYPERTABLE_STATUS_OSM_CHUNK_NONCONTIGUOUS)) + chunk_ids = list_append_unique_int(chunk_ids, osm_chunk_id); } } } diff --git a/tsl/test/expected/chunk_utils_internal.out b/tsl/test/expected/chunk_utils_internal.out index 6536bd44ec7..b5dee1ccec5 100644 --- a/tsl/test/expected/chunk_utils_internal.out +++ b/tsl/test/expected/chunk_utils_internal.out @@ -758,14 +758,13 @@ EXPLAIN (COSTS OFF) SELECT * from ht_try; -> Seq Scan on _hyper_5_10_chunk (3 rows) +-- foreign chunk contains data from Jan 2020, so it is skipped during planning EXPLAIN (COSTS OFF) SELECT * from ht_try WHERE timec > '2022-01-01 01:00'; - QUERY PLAN ----------------------------------------------------------------------------------------- - Append - -> Foreign Scan on child_fdw_table - -> Index Scan using _hyper_5_10_chunk_ht_try_timec_idx on _hyper_5_10_chunk - Index Cond: (timec > 'Sat Jan 01 01:00:00 2022 PST'::timestamp with time zone) -(4 rows) + QUERY PLAN +---------------------------------------------------------------------------------- + Index Scan using _hyper_5_10_chunk_ht_try_timec_idx on _hyper_5_10_chunk + Index Cond: (timec > 'Sat Jan 01 01:00:00 2022 PST'::timestamp with time zone) +(2 rows) EXPLAIN (COSTS OFF) SELECT * from ht_try WHERE timec < '2023-01-01 01:00'; QUERY PLAN @@ -1323,6 +1322,27 @@ WHERE c.hypertable_id = :htid AND cc.chunk_id = c.id AND ds.id = cc.dimension_sl 28 | test_chunkapp_fdw_child | 0 | t | 25 | 9223372036854775806 | 9223372036854775807 (3 rows) +-- but also, OSM chunk should be included in the scan, since range is invalid and chunk is not empty +EXPLAIN SELECT * FROM test_chunkapp WHERE time < '2023-01-01' ORDER BY 1; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------- + Merge Append (cost=100.33..234.79 rows=2118 width=12) + Sort Key: _hyper_16_26_chunk."time" + -> Index Scan Backward using _hyper_16_26_chunk_test_chunkapp_time_idx on _hyper_16_26_chunk (cost=0.15..23.05 rows=680 width=12) + Index Cond: ("time" < 'Sun Jan 01 00:00:00 2023 PST'::timestamp with time zone) + -> Index Scan Backward using _hyper_16_27_chunk_test_chunkapp_time_idx on _hyper_16_27_chunk (cost=0.15..23.05 rows=680 width=12) + Index Cond: ("time" < 'Sun Jan 01 00:00:00 2023 PST'::timestamp with time zone) + -> Foreign Scan on test_chunkapp_fdw_child (cost=100.00..161.29 rows=758 width=12) +(7 rows) + +SELECT * FROM test_chunkapp WHERE time < '2023-01-01' ORDER BY 1; + time | a +------------------------------+--- + Wed Jan 01 01:00:00 2020 PST | 1 + Thu Jan 02 01:00:00 2020 PST | 2 + Fri Jan 03 02:00:00 2020 PST | 3 +(3 rows) + -- now set empty to true, should ordered append \c postgres_fdw_db :ROLE_4; DELETE FROM test_chunkapp_fdw; @@ -1350,6 +1370,25 @@ SELECT * FROM test_chunkapp ORDER BY 1; Thu Jan 02 01:00:00 2020 PST | 2 (2 rows) +-- should exclude the OSM chunk this time since it is empty +EXPLAIN SELECT * FROM test_chunkapp WHERE time < '2023-01-01' ORDER BY 1; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (ChunkAppend) on test_chunkapp (cost=0.15..46.11 rows=1360 width=12) + Order: test_chunkapp."time" + -> Index Scan Backward using _hyper_16_26_chunk_test_chunkapp_time_idx on _hyper_16_26_chunk (cost=0.15..23.05 rows=680 width=12) + Index Cond: ("time" < 'Sun Jan 01 00:00:00 2023 PST'::timestamp with time zone) + -> Index Scan Backward using _hyper_16_27_chunk_test_chunkapp_time_idx on _hyper_16_27_chunk (cost=0.15..23.05 rows=680 width=12) + Index Cond: ("time" < 'Sun Jan 01 00:00:00 2023 PST'::timestamp with time zone) +(6 rows) + +SELECT * FROM test_chunkapp WHERE time < '2023-01-01' ORDER BY 1; + time | a +------------------------------+--- + Wed Jan 01 01:00:00 2020 PST | 1 + Thu Jan 02 01:00:00 2020 PST | 2 +(2 rows) + \set ON_ERROR_STOP 0 -- test adding constraint directly on OSM chunk is blocked ALTER TABLE test_chunkapp_fdw_child ADD CHECK (a > 0); -- non-dimensional diff --git a/tsl/test/sql/chunk_utils_internal.sql b/tsl/test/sql/chunk_utils_internal.sql index 9c2dde243d2..dc12a524493 100644 --- a/tsl/test/sql/chunk_utils_internal.sql +++ b/tsl/test/sql/chunk_utils_internal.sql @@ -409,6 +409,7 @@ EXPLAIN (COSTS OFF) SELECT * from ht_try WHERE timec > '2022-01-01 01:00'; EXPLAIN (COSTS OFF) SELECT * from ht_try WHERE timec < '2023-01-01 01:00'; SET timescaledb.enable_tiered_reads=true; EXPLAIN (COSTS OFF) SELECT * from ht_try; +-- foreign chunk contains data from Jan 2020, so it is skipped during planning EXPLAIN (COSTS OFF) SELECT * from ht_try WHERE timec > '2022-01-01 01:00'; EXPLAIN (COSTS OFF) SELECT * from ht_try WHERE timec < '2023-01-01 01:00'; @@ -702,6 +703,9 @@ SELECT * FROM test_chunkapp ORDER BY 1; SELECT cc.chunk_id, c.table_name, c.status, c.osm_chunk, cc.dimension_slice_id, ds.range_start, ds.range_end FROM _timescaledb_catalog.chunk c, _timescaledb_catalog.chunk_constraint cc, _timescaledb_catalog.dimension_slice ds WHERE c.hypertable_id = :htid AND cc.chunk_id = c.id AND ds.id = cc.dimension_slice_id ORDER BY cc.chunk_id; +-- but also, OSM chunk should be included in the scan, since range is invalid and chunk is not empty +EXPLAIN SELECT * FROM test_chunkapp WHERE time < '2023-01-01' ORDER BY 1; +SELECT * FROM test_chunkapp WHERE time < '2023-01-01' ORDER BY 1; -- now set empty to true, should ordered append \c postgres_fdw_db :ROLE_4; DELETE FROM test_chunkapp_fdw; @@ -709,6 +713,9 @@ DELETE FROM test_chunkapp_fdw; SELECT _timescaledb_functions.hypertable_osm_range_update('test_chunkapp', NULL::timestamptz, NULL, empty => true); EXPLAIN SELECT * FROM test_chunkapp ORDER BY 1; SELECT * FROM test_chunkapp ORDER BY 1; +-- should exclude the OSM chunk this time since it is empty +EXPLAIN SELECT * FROM test_chunkapp WHERE time < '2023-01-01' ORDER BY 1; +SELECT * FROM test_chunkapp WHERE time < '2023-01-01' ORDER BY 1; \set ON_ERROR_STOP 0 -- test adding constraint directly on OSM chunk is blocked