Skip to content

Commit

Permalink
Improved node location store without need for freeze()
Browse files Browse the repository at this point in the history
Store ids and locations as they come in without going through a
temporary block first. Slightly simpler code and we don't need the
freeze() function any more.

See also osm2pgsql-dev#1466.
  • Loading branch information
joto committed May 13, 2021
1 parent efb67a1 commit 4b1d4f2
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 89 deletions.
2 changes: 0 additions & 2 deletions src/middle-ram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,6 @@ void middle_ram_t::relation(osmium::Relation const &relation)
}
}

void middle_ram_t::after_nodes() { m_node_locations.freeze(); }

std::size_t middle_ram_t::nodes_get_list(osmium::WayNodeList *nodes) const
{
assert(nodes);
Expand Down
2 changes: 0 additions & 2 deletions src/middle-ram.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ class middle_ram_t : public middle_t, public middle_query_t
void way(osmium::Way const &way) override;
void relation(osmium::Relation const &) override;

void after_nodes() override;

std::size_t nodes_get_list(osmium::WayNodeList *nodes) const override;

bool way_get(osmid_t id, osmium::memory::Buffer *buffer) const override;
Expand Down
85 changes: 24 additions & 61 deletions src/node-locations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "node-locations.hpp"

#include <osmium/osm/location.hpp>
#include <osmium/util/delta.hpp>

// Workaround: This must be included before buffer_string.hpp due to a missing
// include in the upstream code. https://github.com/mapbox/protozero/pull/104
Expand All @@ -24,13 +23,20 @@

void node_locations_t::set(osmid_t id, osmium::Location location)
{
assert(block_index() == 0 || m_block[block_index() - 1].first < id);
if (first_entry_in_block()) {
m_did.clear();
m_dx.clear();
m_dy.clear();
m_index.add(id, m_data.size());
}

protozero::add_varint_to_buffer(&m_data, m_did.update(id));
protozero::add_varint_to_buffer(
&m_data, protozero::encode_zigzag64(m_dx.update(location.x())));
protozero::add_varint_to_buffer(
&m_data, protozero::encode_zigzag64(m_dy.update(location.y())));

m_block[block_index()] = {id, location};
++m_count;
if (block_index() == 0) {
freeze();
}
}

osmium::Location node_locations_t::get(osmid_t id) const
Expand All @@ -46,72 +52,29 @@ osmium::Location node_locations_t::get(osmid_t id) const
char const *const end = m_data.data() + m_data.size();

osmium::DeltaDecode<osmid_t> did;
std::size_t num = block_size;
for (std::size_t n = 0; n < block_size; ++n) {
auto bid = did.update(protozero::decode_varint(&begin, end));
if (bid == id) {
num = n;
}
if (bid > id && num == block_size) {
return osmium::Location{};
}
}
if (num == block_size) {
return osmium::Location{};
}

osmium::DeltaDecode<int64_t> dx;
osmium::DeltaDecode<int64_t> dy;
int32_t x = 0;
int32_t y = 0;
for (std::size_t n = 0; n <= num; ++n) {
x = dx.update(

for (std::size_t n = 0; n < block_size && begin != end; ++n) {
auto const bid = did.update(protozero::decode_varint(&begin, end));
int32_t const x = dx.update(
protozero::decode_zigzag64(protozero::decode_varint(&begin, end)));
y = dy.update(
int32_t const y = dy.update(
protozero::decode_zigzag64(protozero::decode_varint(&begin, end)));
if (bid == id) {
return osmium::Location{x, y};
}
if (bid > id) {
break;
}
}

return osmium::Location{x, y};
}

void node_locations_t::freeze()
{
encode_block();
clear_block();
return osmium::Location{};
}

void node_locations_t::clear()
{
m_data.clear();
m_data.shrink_to_fit();
m_index.clear();
clear_block();
m_count = 0;
}

void node_locations_t::encode_block()
{
auto const offset = m_data.size();
osmium::DeltaEncode<osmid_t> did;
osmium::DeltaEncode<int64_t> dx;
osmium::DeltaEncode<int64_t> dy;
for (auto const &nl : m_block) {
protozero::add_varint_to_buffer(&m_data, did.update(nl.first));
}
for (auto const &nl : m_block) {
protozero::add_varint_to_buffer(
&m_data, protozero::encode_zigzag64(dx.update(nl.second.x())));
protozero::add_varint_to_buffer(
&m_data, protozero::encode_zigzag64(dy.update(nl.second.y())));
}
m_index.add(m_block[0].first, offset);
}

void node_locations_t::clear_block()
{
for (auto &nl : m_block) {
nl.first = std::numeric_limits<osmid_t>::max();
nl.second = osmium::Location{};
}
}

35 changes: 15 additions & 20 deletions src/node-locations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "ordered-index.hpp"
#include "osmtypes.hpp"

#include <osmium/util/delta.hpp>

#include <array>
#include <cstddef>
#include <string>
Expand All @@ -25,18 +27,14 @@
*
* Internally nodes are stored in blocks of `block_size` (id, location) pairs.
* Ids inside a block and the x and y coordinates of each location are first
* delta encoded and then stored as varints. To access a stored location a
* full block must be decoded.
* delta encoded and then stored as varints. To access a stored location the
* block must be decoded until the id is found.
*
* Ids must be added in strictly ascending order. After all ids are stored,
* the `freeze()` function must be called. Only after that can the store
* be queried.
* Ids must be added in strictly ascending order.
*/
class node_locations_t
{
public:
node_locations_t() { clear_block(); }

/**
* Store a node location.
*
Expand All @@ -60,35 +58,32 @@ class node_locations_t
}

/**
* Freeze storage. Muste be called after set()ing all the ids and before
* get()ing the first one.
*/
void freeze();

/**
* Clear the memory used by this object. The object can not be reused
* after that.
* Clear the memory used by this object. The object can be reused after
* that.
*/
void clear();

private:
std::size_t block_index() const noexcept { return m_count % block_size; }

void encode_block();
void clear_block();
bool first_entry_in_block() const noexcept
{
return m_count % block_size == 0;
}

/**
* The block size used for internal blocks. The larger the block size
* the less memory is consumed but the more expensive the access is.
*/
static constexpr const std::size_t block_size = 32;

std::array<std::pair<osmid_t, osmium::Location>, block_size> m_block;
ordered_index_t m_index;
std::string m_data;

/// The number of (id, location) pairs stored.
std::size_t m_count = 0;

osmium::DeltaEncode<osmid_t> m_did;
osmium::DeltaEncode<int64_t> m_dx;
osmium::DeltaEncode<int64_t> m_dy;
}; // class node_locations_t

#endif // OSM2PGSQL_NODE_LOCATIONS_HPP
4 changes: 0 additions & 4 deletions tests/test-node-locations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ TEST_CASE("node locations basics", "[NoDB]")

REQUIRE(nl.size() == 2);

nl.freeze();
REQUIRE(nl.size() == 2);

REQUIRE(nl.get(1) == osmium::Location{});
REQUIRE(nl.get(4) == osmium::Location{});
REQUIRE(nl.get(6) == osmium::Location{});
Expand Down Expand Up @@ -70,7 +67,6 @@ TEST_CASE("node locations in more than one block", "[NoDB]")
nl.set(id, {id + 0.1, id + 0.2});
}

nl.freeze();
REQUIRE(nl.size() == max_id);

for (std::size_t id = 1; id <= max_id; ++id) {
Expand Down

0 comments on commit 4b1d4f2

Please sign in to comment.