Skip to content

Commit

Permalink
Append mode: Do not try to delete objects that can't exist in middle
Browse files Browse the repository at this point in the history
When osm2pgsql runs in append mode it deletes all objects for which it
gets new versions from the middle tables before then adding the new
version. For a typical diff many of these deletes will be unnecessary
because the objects are new. With this commit the behaviour changes
slightly: We first get the maximum id from the nodes/ways/relations
middle tables. This operation is fast, because the PostgreSQL max()
function is aware of the btree index on those tables. Later, before we
delete an object we check the id against that maximum id, if it is
larger the object can't be in the table and we don't do the delete.

(Note that in theory we could use the fact that an object has version
number 1 to figure out that it must be new. But this is much less robust
than what we are doing here, for instance when the diff overlaps with
the original import.)
  • Loading branch information
joto committed Jul 17, 2023
1 parent 1cfb05b commit d276b6e
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 5 deletions.
37 changes: 32 additions & 5 deletions src/middle-pgsql.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,18 @@ void middle_pgsql_t::table_desc::build_index(std::string const &conninfo) const
db_connection.exec(m_create_fw_dep_indexes);
}

void middle_pgsql_t::table_desc::init_max_id(pg_conn_t const &db_connection)
{
auto const qual_name = qualified_name(schema(), name());
auto const res = db_connection.exec("SELECT max(id) FROM {}", qual_name);

if (res.is_null(0, 0)) {
return;
}

m_max_id = osmium::string_to_object_id(res.get_value(0, 0));
}

/**
* Decode item in an array literal from PostgreSQL to the next delimiter.
*
Expand Down Expand Up @@ -776,7 +788,7 @@ void middle_pgsql_t::node_delete(osmid_t osm_id)
m_persistent_cache->set(osm_id, osmium::Location{});
}

if (m_store_options.nodes) {
if (m_store_options.nodes && osm_id <= m_tables.nodes().max_id()) {
m_db_copy.new_line(m_tables.nodes().copy_target());
m_db_copy.delete_object(osm_id);
}
Expand Down Expand Up @@ -1035,8 +1047,11 @@ middle_query_pgsql_t::rel_members_get(osmium::Relation const &rel,
void middle_pgsql_t::way_delete(osmid_t osm_id)
{
assert(m_options->append);
m_db_copy.new_line(m_tables.ways().copy_target());
m_db_copy.delete_object(osm_id);

if (osm_id <= m_tables.ways().max_id()) {
m_db_copy.new_line(m_tables.ways().copy_target());
m_db_copy.delete_object(osm_id);
}
}

void middle_pgsql_t::relation_set_format1(osmium::Relation const &rel)
Expand Down Expand Up @@ -1175,8 +1190,10 @@ void middle_pgsql_t::relation_delete(osmid_t osm_id)
{
assert(m_options->append);

m_db_copy.new_line(m_tables.relations().copy_target());
m_db_copy.delete_object(osm_id);
if (osm_id <= m_tables.relations().max_id()) {
m_db_copy.new_line(m_tables.relations().copy_target());
m_db_copy.delete_object(osm_id);
}
}

void middle_pgsql_t::after_nodes()
Expand Down Expand Up @@ -1269,6 +1286,16 @@ void middle_pgsql_t::start()
// problems when accessing the intarrays.
m_db_connection.set_config("jit_above_cost", "-1");
m_db_connection.set_config("max_parallel_workers_per_gather", "0");

// Remember the maximum OSM ids in the middle tables. This is a very
// fast operation due to the index on the table. Later when we need
// to delete entries, we don't have to bother with entries that are
// definitely not in the table.
if (m_store_options.nodes) {
m_tables.nodes().init_max_id(m_db_connection);
}
m_tables.ways().init_max_id(m_db_connection);
m_tables.relations().init_max_id(m_db_connection);
} else {
if (m_store_options.db_format == 2) {
table_setup(m_db_connection, m_users_table);
Expand Down
7 changes: 7 additions & 0 deletions src/middle-pgsql.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,16 @@ struct middle_pgsql_t : public middle_t

std::chrono::microseconds task_wait() { return m_task_result.wait(); }

void init_max_id(pg_conn_t const &db_connection);

osmid_t max_id() const noexcept { return m_max_id; }

private:
std::shared_ptr<db_target_descr_t> m_copy_target;
task_result_t m_task_result;

/// The maximum id in the table (used only in append mode)
osmid_t m_max_id = 0;
};

std::shared_ptr<middle_query_t> get_query_instance() override;
Expand Down

0 comments on commit d276b6e

Please sign in to comment.