From 4cebdd9a4b42a72e360eee7080576164fc302d87 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jul 2021 17:12:29 +0100 Subject: [PATCH 1/5] Reduce likelihood of Postgres table scanning `state_groups_state`. The postgres statistics collector sometimes massively underestimates the number of distinct state groups are in the `state_groups_state`, which can cause postgres to use table scans for queries for multiple state groups. We fix this by manually setting `n_distinct` on the column. --- ...state_groups_state_n_distinct.sql.postgres | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres diff --git a/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres b/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres new file mode 100644 index 000000000000..04e09661539a --- /dev/null +++ b/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres @@ -0,0 +1,34 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- By default the postgres statistics collector massively underestimates the +-- number of distinct state groups are in the `state_groups_state`, which can +-- cause postgres to use table scans for queries for multiple state groups. +-- +-- To work around this we can manually tell postgres the number of distint state +-- groups there are by setting `n_distinct` (a negative value here is the number +-- of distinct values divided by the number of rows, so -0.02 means on average +-- there are 50 rows per disinct value). We don't need a particularly +-- accurate number here, as a) we just want it to always use index scans and b) +-- our estimate is going to be better than the one made by the statistics +-- collector. + +ALTER TABLE state_groups_state ALTER COLUMN state_group SET (n_dinstict = -0.02) + +-- Ideally we'd do an `ANALYZE state_groups_state (state_group)` here so that +-- the above gets picked up immediately, but that can take a bit of time so we +-- rely on the autovacuum eventually getting run and doing that in the +-- background for us. From 9d483170aabdee095cfc3eadc333f26716f0d8a9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Jul 2021 17:19:59 +0100 Subject: [PATCH 2/5] Newsfile --- changelog.d/10359.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10359.bugfix diff --git a/changelog.d/10359.bugfix b/changelog.d/10359.bugfix new file mode 100644 index 000000000000..d318f8fa088d --- /dev/null +++ b/changelog.d/10359.bugfix @@ -0,0 +1 @@ +Fix PostgreSQL sometimes using table scans for queries against `state_groups_state` table, taking a long time and a large amount of IO. From 64b48b7884b2e5c341a5b79d0bbbffb703862fd8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Jul 2021 10:04:53 +0100 Subject: [PATCH 3/5] Fix update --- .../main/delta/61/02state_groups_state_n_distinct.sql.postgres | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres b/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres index 04e09661539a..bd71e6f9bef5 100644 --- a/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres +++ b/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres @@ -26,7 +26,7 @@ -- our estimate is going to be better than the one made by the statistics -- collector. -ALTER TABLE state_groups_state ALTER COLUMN state_group SET (n_dinstict = -0.02) +ALTER TABLE state_groups_state ALTER COLUMN state_group SET (n_distinct = -0.02); -- Ideally we'd do an `ANALYZE state_groups_state (state_group)` here so that -- the above gets picked up immediately, but that can take a bit of time so we From 40e2bc0195008178901a46257da87c34303d855c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Jul 2021 11:33:40 +0100 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- .../delta/61/02state_groups_state_n_distinct.sql.postgres | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres b/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres index bd71e6f9bef5..35a153da7b92 100644 --- a/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres +++ b/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres @@ -18,10 +18,10 @@ -- number of distinct state groups are in the `state_groups_state`, which can -- cause postgres to use table scans for queries for multiple state groups. -- --- To work around this we can manually tell postgres the number of distint state +-- To work around this we can manually tell postgres the number of distinct state -- groups there are by setting `n_distinct` (a negative value here is the number -- of distinct values divided by the number of rows, so -0.02 means on average --- there are 50 rows per disinct value). We don't need a particularly +-- there are 50 rows per distinct value). We don't need a particularly -- accurate number here, as a) we just want it to always use index scans and b) -- our estimate is going to be better than the one made by the statistics -- collector. From 52729bded64478adc50df234aed8fc476786393f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Jul 2021 15:34:48 +0100 Subject: [PATCH 5/5] `state_groups_state` table is in the state DB --- .../delta/61/02state_groups_state_n_distinct.sql.postgres | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename synapse/storage/schema/{main => state}/delta/61/02state_groups_state_n_distinct.sql.postgres (100%) diff --git a/synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres b/synapse/storage/schema/state/delta/61/02state_groups_state_n_distinct.sql.postgres similarity index 100% rename from synapse/storage/schema/main/delta/61/02state_groups_state_n_distinct.sql.postgres rename to synapse/storage/schema/state/delta/61/02state_groups_state_n_distinct.sql.postgres