Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use numrows_pre_compression in approx row count #6365

Merged
merged 1 commit into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .unreleased/fix_6365
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixes: #6365 Use numrows_pre_compression in approximate row count
35 changes: 19 additions & 16 deletions sql/size_utils.sql
Original file line number Diff line number Diff line change
Expand Up @@ -453,14 +453,15 @@ BEGIN
IF local_compressed_hypertable_id IS NOT NULL THEN
uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);

WITH compressed_hypertable AS (SELECT table_name, schema_name FROM _timescaledb_catalog.hypertable ht
WHERE ht.id = local_compressed_hypertable_id)
SELECT c.oid INTO compressed_hypertable_oid FROM pg_class c
INNER JOIN compressed_hypertable h ON (c.relname = h.table_name)
INNER JOIN pg_namespace n ON (n.nspname = h.schema_name);

compressed_row_count = _timescaledb_functions.get_approx_row_count(compressed_hypertable_oid);
RETURN (uncompressed_row_count + (compressed_row_count * max_compressed_row_count));
-- use the compression_chunk_size stats to fetch precompressed num rows
SELECT COALESCE(SUM(numrows_pre_compression), 0) FROM _timescaledb_catalog.chunk srcch,
_timescaledb_catalog.compression_chunk_size map, _timescaledb_catalog.hypertable srcht
INTO compressed_row_count
WHERE map.chunk_id = srcch.id
AND srcht.id = srcch.hypertable_id AND srcht.table_name = local_table_name
AND srcht.schema_name = local_schema_name;

RETURN (uncompressed_row_count + compressed_row_count);
ELSE
uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);
RETURN uncompressed_row_count;
Expand All @@ -477,22 +478,24 @@ BEGIN
-- 'input is chunk #1';
IF is_compressed_chunk IS NULL AND local_compressed_chunk_id IS NOT NULL THEN
-- 'Include both uncompressed and compressed chunk #2';
WITH compressed_ns_oid AS ( SELECT table_name, oid FROM _timescaledb_catalog.chunk ch INNER JOIN pg_namespace ns ON
(ch.id = local_compressed_chunk_id and ch.schema_name = ns.nspname))
SELECT c.oid FROM pg_class c INNER JOIN compressed_ns_oid
ON ( c.relnamespace = compressed_ns_oid.oid AND c.relname = compressed_ns_oid.table_name)
INTO local_compressed_chunk_oid;
-- use the compression_chunk_size stats to fetch precompressed num rows
SELECT COALESCE(numrows_pre_compression, 0) FROM _timescaledb_catalog.compression_chunk_size
INTO compressed_row_count
WHERE compressed_chunk_id = local_compressed_chunk_id;

uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);
compressed_row_count = _timescaledb_functions.get_approx_row_count(local_compressed_chunk_oid);
RETURN uncompressed_row_count + (compressed_row_count * max_compressed_row_count);
RETURN (uncompressed_row_count + compressed_row_count);
ELSIF is_compressed_chunk IS NULL AND local_compressed_chunk_id IS NULL THEN
-- 'input relation is uncompressed chunk #3';
uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);
RETURN uncompressed_row_count;
ELSE
-- 'compressed chunk only #4';
compressed_row_count = _timescaledb_functions.get_approx_row_count(relation) * max_compressed_row_count;
-- use the compression_chunk_size stats to fetch precompressed num rows
SELECT COALESCE(SUM(numrows_pre_compression), 0) FROM _timescaledb_catalog.chunk srcch,
_timescaledb_catalog.compression_chunk_size map INTO compressed_row_count
WHERE map.compressed_chunk_id = srcch.id
AND srcch.table_name = local_table_name AND srcch.schema_name = local_schema_name;
RETURN compressed_row_count;
END IF;
END IF;
Expand Down
82 changes: 80 additions & 2 deletions tsl/test/expected/compression.out
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,20 @@ insert into foo values( 3 , 16 , 20, NULL);
insert into foo values( 10 , 10 , 20, NULL);
insert into foo values( 20 , 11 , 20, NULL);
insert into foo values( 30 , 12 , 20, NULL);
analyze foo;
-- check that approximate_row_count works with a regular table
SELECT approximate_row_count('foo');
approximate_row_count
-----------------------
4
(1 row)

SELECT count(*) from foo;
count
-------
4
(1 row)

alter table foo set (timescaledb.compress, timescaledb.compress_segmentby = 'a,b', timescaledb.compress_orderby = 'c desc, d asc nulls last');
--test self-refencing updates
SET timescaledb.enable_transparent_decompression to ON;
Expand Down Expand Up @@ -1259,6 +1273,7 @@ SELECT * FROM pg_stats WHERE tablename = :statchunk;
(0 rows)

ALTER TABLE stattest SET (timescaledb.compress);
-- check that approximate_row_count works with all normal chunks
SELECT approximate_row_count('stattest');
approximate_row_count
-----------------------
Expand All @@ -1271,10 +1286,18 @@ SELECT compress_chunk(c) FROM show_chunks('stattest') c;
_timescaledb_internal._hyper_27_55_chunk
(1 row)

-- check that approximate_row_count works with all compressed chunks
SELECT approximate_row_count('stattest');
approximate_row_count
-----------------------
0
26
Comment on lines 1290 to +1293
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might want to add a test that compares this with the actual number of rows counted explicitly. For cases where you have serial execution, you should get the same value.

The test also seems to be missing some cases where you have a mix of uncompressed and compressed rows in a chunk, so might be good to verify that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mkindahl many additional tests added to this PR now

(1 row)

-- actual count should match with the above
SELECT count(*) from stattest;
count
-------
26
(1 row)

-- Uncompressed chunk table is empty since we just compressed the chunk and moved everything to compressed chunk table.
Expand Down Expand Up @@ -1330,6 +1353,13 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME'
1 | 1
(1 row)

-- verify that approximate_row_count works fine on a chunk with compressed data
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_COMP_CHUNK_NAME');
approximate_row_count
-----------------------
26
(1 row)

-- Verify partial chunk stats are handled correctly when analyzing
-- for both uncompressed and compressed chunk tables
INSERT INTO stattest SELECT '2020/02/20 01:00'::TIMESTAMPTZ + ('1 hour'::interval * v), 250 * v FROM generate_series(25,50) v;
Expand Down Expand Up @@ -1362,6 +1392,28 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME'
1 | 1
(1 row)

-- verify that approximate_row_count works fine on a chunk with a mix of uncompressed
-- and compressed data
SELECT table_name as "STAT_CHUNK_NAME" from temptable \gset
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK_NAME');
approximate_row_count
-----------------------
52
(1 row)

-- should match with the result via the hypertable post in-memory decompression
SELECT count(*) from stattest;
count
-------
52
(1 row)

SELECT count(*) from show_chunks('stattest');
count
-------
1
(1 row)

-- Verify that decompressing the chunk restores autoanalyze to the hypertable's setting
SELECT reloptions FROM pg_class WHERE relname = :statchunk;
reloptions
Expand Down Expand Up @@ -1423,7 +1475,6 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk;
1 | 52
(1 row)

DROP TABLE stattest;
--- Test that analyze on compression internal table updates stats on original chunks
CREATE TABLE stattest2(time TIMESTAMPTZ NOT NULL, c1 int, c2 int);
SELECT create_hypertable('stattest2', 'time', chunk_time_interval=>'1 day'::interval);
Expand Down Expand Up @@ -2626,3 +2677,30 @@ SELECT * FROM sensor_data_compressed ORDER BY time DESC LIMIT 5;
(70 rows)

RESET timescaledb.enable_decompression_sorted_merge;
-- create another chunk
INSERT INTO stattest SELECT '2021/02/20 01:00'::TIMESTAMPTZ + ('1 hour'::interval * v), 250 * v FROM generate_series(125,140) v;
ANALYZE stattest;
SELECT count(*) from show_chunks('stattest');
count
-------
2
(1 row)

SELECT table_name INTO TEMPORARY temptable FROM _timescaledb_catalog.chunk WHERE hypertable_id = (SELECT id FROM _timescaledb_catalog.hypertable WHERE table_name = 'stattest') ORDER BY creation_time desc limit 1;
SELECT table_name as "STAT_CHUNK2_NAME" FROM temptable \gset
-- verify that approximate_row_count works ok on normal chunks
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK2_NAME');
approximate_row_count
-----------------------
16
(1 row)

-- verify that approximate_row_count works fine on a hypertable with a mix of uncompressed
-- and compressed data
SELECT approximate_row_count('stattest');
approximate_row_count
-----------------------
68
(1 row)

DROP TABLE stattest;
33 changes: 31 additions & 2 deletions tsl/test/sql/compression.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ insert into foo values( 3 , 16 , 20, NULL);
insert into foo values( 10 , 10 , 20, NULL);
insert into foo values( 20 , 11 , 20, NULL);
insert into foo values( 30 , 12 , 20, NULL);
analyze foo;
-- check that approximate_row_count works with a regular table
SELECT approximate_row_count('foo');
SELECT count(*) from foo;

alter table foo set (timescaledb.compress, timescaledb.compress_segmentby = 'a,b', timescaledb.compress_orderby = 'c desc, d asc nulls last');

Expand Down Expand Up @@ -538,9 +542,13 @@ SELECT table_name INTO TEMPORARY temptable FROM _timescaledb_catalog.chunk WHERE
SELECT * FROM pg_stats WHERE tablename = :statchunk;

ALTER TABLE stattest SET (timescaledb.compress);
-- check that approximate_row_count works with all normal chunks
SELECT approximate_row_count('stattest');
SELECT compress_chunk(c) FROM show_chunks('stattest') c;
-- check that approximate_row_count works with all compressed chunks
SELECT approximate_row_count('stattest');
-- actual count should match with the above
SELECT count(*) from stattest;
-- Uncompressed chunk table is empty since we just compressed the chunk and moved everything to compressed chunk table.
-- reltuples is initially -1 on PG14 before VACUUM/ANALYZE was run
SELECT relpages, CASE WHEN reltuples > 0 THEN reltuples ELSE 0 END as reltuples FROM pg_class WHERE relname = :statchunk;
Expand All @@ -564,6 +572,8 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk;

-- verify that corresponding compressed chunk table stats is updated as well.
SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME';
-- verify that approximate_row_count works fine on a chunk with compressed data
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_COMP_CHUNK_NAME');

-- Verify partial chunk stats are handled correctly when analyzing
-- for both uncompressed and compressed chunk tables
Expand All @@ -577,7 +587,13 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk;
-- verify that corresponding compressed chunk table stats have not changed since
-- we didn't compress anything new.
SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME';

-- verify that approximate_row_count works fine on a chunk with a mix of uncompressed
-- and compressed data
SELECT table_name as "STAT_CHUNK_NAME" from temptable \gset
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK_NAME');
-- should match with the result via the hypertable post in-memory decompression
SELECT count(*) from stattest;
SELECT count(*) from show_chunks('stattest');

-- Verify that decompressing the chunk restores autoanalyze to the hypertable's setting
SELECT reloptions FROM pg_class WHERE relname = :statchunk;
Expand All @@ -597,7 +613,6 @@ SET client_min_messages TO NOTICE;
SELECT histogram_bounds FROM pg_stats WHERE tablename = :statchunk and attname = 'c1';
SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk;

DROP TABLE stattest;

--- Test that analyze on compression internal table updates stats on original chunks
CREATE TABLE stattest2(time TIMESTAMPTZ NOT NULL, c1 int, c2 int);
Expand Down Expand Up @@ -1052,3 +1067,17 @@ SET timescaledb.enable_decompression_sorted_merge = FALSE;
:PREFIX
SELECT * FROM sensor_data_compressed ORDER BY time DESC LIMIT 5;
RESET timescaledb.enable_decompression_sorted_merge;

-- create another chunk
INSERT INTO stattest SELECT '2021/02/20 01:00'::TIMESTAMPTZ + ('1 hour'::interval * v), 250 * v FROM generate_series(125,140) v;
ANALYZE stattest;
SELECT count(*) from show_chunks('stattest');
SELECT table_name INTO TEMPORARY temptable FROM _timescaledb_catalog.chunk WHERE hypertable_id = (SELECT id FROM _timescaledb_catalog.hypertable WHERE table_name = 'stattest') ORDER BY creation_time desc limit 1;
SELECT table_name as "STAT_CHUNK2_NAME" FROM temptable \gset
-- verify that approximate_row_count works ok on normal chunks
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK2_NAME');
-- verify that approximate_row_count works fine on a hypertable with a mix of uncompressed
-- and compressed data
SELECT approximate_row_count('stattest');

DROP TABLE stattest;
Loading