Skip to content

Use planet-dump-ng for planet dump generation #368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions compose/db-backup-restore.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
version: '3'
services:
#####################################################
## OSM Database backup and restore section
#####################################################
db-backup-restore:
image: osmseed-backup-restore:v1
image: rub21/osmseed-backup-restore:v1
build:
context: ../images/backup-restore
dockerfile: Dockerfile
Expand Down
15 changes: 8 additions & 7 deletions images/backup-restore/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,26 @@ cloudStorageOps() {
}

backupDB() {
local LOCAL_BACKUP_FILE=${BACKUP_CLOUD_FILE}.sql.gz
local CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}.sql.gz"
local LOCAL_BACKUP_FILE=${BACKUP_CLOUD_FILE}.dump
local CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}.dump"
if [ "$SET_DATE_AT_NAME" == "true" ]; then
local CURRENT_DATE=$(date '+%Y%m%d-%H%M')
LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.sql.gz"
CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.sql.gz"
LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump"
CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump"
fi

# Backup database with max compression
echo "Backing up DB ${POSTGRES_DB} into ${LOCAL_BACKUP_FILE}"
pg_dump -h ${POSTGRES_HOST} -U ${POSTGRES_USER} ${POSTGRES_DB} | gzip -9 >${LOCAL_BACKUP_FILE}
# pg_dump -h ${POSTGRES_HOST} -U ${POSTGRES_USER} ${POSTGRES_DB} | gzip -9 >${LOCAL_BACKUP_FILE}
pg_dump -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -Fc -f ${LOCAL_BACKUP_FILE} ${POSTGRES_DB}

# Handle cloud storage based on the provider
cloudStorageOps "${LOCAL_BACKUP_FILE}" "${CLOUD_BACKUP_FILE}"
}

restoreDB() {
local CURRENT_DATE=$(date '+%Y%m%d-%H%M')
local RESTORE_FILE="backup.sql.gz"
local RESTORE_FILE="backup.dump"
local LOG_RESULT_FILE="restore_results-${CURRENT_DATE}.log"
local flag=true

Expand All @@ -62,7 +63,7 @@ restoreDB() {
flag=false
wget -O ${RESTORE_FILE} ${RESTORE_URL_FILE}
echo "Restoring ${RESTORE_URL_FILE} in ${POSTGRES_DB}"
gunzip -c <${RESTORE_FILE} | psql -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -d ${POSTGRES_DB} | tee ${LOG_RESULT_FILE}
pg_restore -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -d ${POSTGRES_DB} --create --no-owner ${RESTORE_FILE} | tee ${LOG_RESULT_FILE}
# aws s3 cp ${LOG_RESULT_FILE} s3://${AWS_S3_BUCKET}/${LOG_RESULT_FILE}
echo "Import data to ${POSTGRES_DB} has finished ..."
done
Expand Down
4 changes: 2 additions & 2 deletions images/full-history/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FROM developmentseed/osmseed-osm-processor:0.1.0-n802.h0d9f574
FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.964.h5e9b11b

VOLUME /mnt/data
COPY ./start.sh /
CMD /start.sh
CMD /start.sh
139 changes: 77 additions & 62 deletions images/full-history/start.sh
Original file line number Diff line number Diff line change
@@ -1,82 +1,97 @@
#!/usr/bin/env bash
set -e
export VOLUME_DIR=/mnt/data

# osmosis tuning: https://wiki.openstreetmap.org/wiki/Osmosis/Tuning,https://lists.openstreetmap.org/pipermail/talk/2012-October/064771.html
if [ -z "$MEMORY_JAVACMD_OPTIONS" ]; then
echo JAVACMD_OPTIONS=\"-server\" >~/.osmosis
echo JAVACMD_OPTIONS="-server" >~/.osmosis
else
memory="${MEMORY_JAVACMD_OPTIONS//i/}"
echo JAVACMD_OPTIONS=\"-server -Xmx$memory\" >~/.osmosis
echo JAVACMD_OPTIONS="-server -Xmx$memory" >~/.osmosis
fi

# Fixing name for historical file
export VOLUME_DIR=/mnt/data
export PLANET_EPOCH_DATE="${PLANET_EPOCH_DATE:-2004-01-01}"
date=$(date '+%y%m%d_%H%M')
local_fullHistoryFile=$VOLUME_DIR/history-${date}.osh.pbf
cloud_fullHistoryFile=planet/full-history/history-${date}.osh.pbf

# In case overwrite the file
if [ "$OVERWRITE_FHISTORY_FILE" == "true" ]; then
local_fullHistoryFile=$VOLUME_DIR/history-latest.osh.pbf
cloud_fullHistoryFile=planet/full-history/history-latest.osh.pbf
local_planetHistoryPBFFile=$VOLUME_DIR/planet-history-${date}.osm.pbf
cloud_planetHistoryPBFFile=planet/full-history/planet-history-${date}.osm.pbf
stateFile="$VOLUME_DIR/state.txt"
dumpFile="$VOLUME_DIR/input-latest.dump"


# If overwrite flag is enabled, use fixed filenames
if [ "$OVERWRITE_PLANET_FILE" == "true" ]; then
local_planetHistoryPBFFile=$VOLUME_DIR/planet-history-latest.osm.pbf
cloud_planetHistoryPBFFile=planet/planet-history-latest.osm.pbf
fi

# State file nname
stateFile="$VOLUME_DIR/state.txt"
osm_tmp_file="osm_tmp.osm"
# ===============================
# Download db .dump file
# ===============================
download_dump_file() {
echo "Downloading db .dump file from cloud..."
if [ "$CLOUDPROVIDER" == "aws" ]; then
if [[ "$DUMP_CLOUD_URL" == *.txt ]]; then
temp_txt="$VOLUME_DIR/tmp_dump_url.txt"
aws s3 cp "$DUMP_CLOUD_URL" "$temp_txt"
first_line=$(head -n 1 "$temp_txt")
aws s3 cp "$first_line" "$dumpFile"
else
aws s3 cp "$DUMP_CLOUD_URL" "$dumpFile"
fi
elif [ "$CLOUDPROVIDER" == "gcp" ]; then
gsutil cp "$DUMP_CLOUD_URL" "$dumpFile"
fi
}

# Creating full history
osmosis --read-apidb-change \
host=$POSTGRES_HOST \
database=$POSTGRES_DB \
user=$POSTGRES_USER \
password=$POSTGRES_PASSWORD \
validateSchemaVersion=no \
readFullHistory=yes \
--write-xml-change \
compressionMethod=auto \
$osm_tmp_file
# ===============================
# Upload planet + state
# ===============================
upload_planet_file() {
echo "Uploading history planet file and updating state.txt..."

# Convert file to PBF file
osmium cat $osm_tmp_file -o $local_fullHistoryFile
osmium fileinfo $local_fullHistoryFile
if [ "$CLOUDPROVIDER" == "aws" ]; then
AWS_URL=${AWS_S3_BUCKET/s3:\/\//http:\/\/}
echo "$AWS_URL.s3.amazonaws.com/$cloud_planetHistoryPBFFile" > "$stateFile"
aws s3 cp "$local_planetHistoryPBFFile" "$AWS_S3_BUCKET/$cloud_planetHistoryPBFFile" --acl public-read
aws s3 cp "$stateFile" "$AWS_S3_BUCKET/planet/state.txt" --acl public-read

# Remove full-hitory osm file, keep only history-latest.osh.pbf files
rm $osm_tmp_file
elif [ "$CLOUDPROVIDER" == "gcp" ]; then
echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_planetHistoryPBFFile" > "$stateFile"
gsutil cp -a public-read "$local_planetHistoryPBFFile" "$GCP_STORAGE_BUCKET/$cloud_planetHistoryPBFFile"
gsutil cp -a public-read "$stateFile" "$GCP_STORAGE_BUCKET/planet/state.txt"
fi
}

# AWS
if [ $CLOUDPROVIDER == "aws" ]; then
AWS_URL=${AWS_S3_BUCKET/s3:\/\//http:\/\/}
echo "$AWS_URL.s3.amazonaws.com/$cloud_fullHistoryFile" >$stateFile
# Upload history-planet.osm.pbf
aws s3 cp $local_fullHistoryFile $AWS_S3_BUCKET/$cloud_fullHistoryFile --acl public-read
# Upload state.txt
aws s3 cp $stateFile $AWS_S3_BUCKET/planet/full-history/state.txt --acl public-read
fi
# ===============================
# Generate planet file
# ===============================

# Google Storage
if [ $CLOUDPROVIDER == "gcp" ]; then
echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_fullHistoryFile" >$stateFile
# Upload history-planet.osm.pbf
gsutil cp -a public-read $local_fullHistoryFile $GCP_STORAGE_BUCKET/$cloud_fullHistoryFile
# Upload state.txt
gsutil cp -a public-read $stateFile $GCP_STORAGE_BUCKET/planet/full-history/state.txt
fi
if [ "$PLANET_EXPORT_METHOD" == "planet-dump-ng" ]; then
download_dump_file
echo "Generating history planet file with planet-dump-ng..."
export PLANET_EPOCH_DATE="$PLANET_EPOCH_DATE"
planet-dump-ng \
--dump-file "$dumpFile" \
--history-pbf "$local_planetHistoryPBFFile"

# Azure
if [ $CLOUDPROVIDER == "azure" ]; then
# Save the path file
echo "https://$AZURE_STORAGE_ACCOUNT.blob.core.windows.net/$AZURE_CONTAINER_NAME/$cloud_fullHistoryFile" >$stateFile
# Upload history-planet.osm.pbf
az storage blob upload \
--container-name $AZURE_CONTAINER_NAME \
--file $local_fullHistoryFile \
--name $cloud_fullHistoryFile \
--output table
# Upload state.txt
az storage blob upload \
--container-name $AZURE_CONTAINER_NAME \
--file $stateFile \
--name planet/full-history/state.txt \
--output table
elif [ "$PLANET_EXPORT_METHOD" == "osmosis" ]; then
echo "Generating history planet file with osmosis..."
# Creating full history
osmosis --read-apidb-change \
host=$POSTGRES_HOST \
database=$POSTGRES_DB \
user=$POSTGRES_USER \
password=$POSTGRES_PASSWORD \
validateSchemaVersion=no \
readFullHistory=yes \
--write-xml-change \
compressionMethod=auto \
$local_planetHistoryPBFFile
else
echo "Error: Unknown PLANET_EXPORT_METHOD value. Use 'planet-dump-ng' or 'osmosis'."
exit 1
fi

# Upload results
upload_planet_file
70 changes: 53 additions & 17 deletions images/osm-processor/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,59 @@
# Stage 1: builder
FROM debian:bookworm-slim AS builder
WORKDIR /opt/planet-dump-ng

RUN set -ex \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
build-essential \
automake \
autoconf \
libxml2-dev \
libboost-dev \
libboost-program-options-dev \
libboost-date-time-dev \
libboost-filesystem-dev \
libboost-thread-dev \
libboost-iostreams-dev \
libosmpbf-dev \
osmpbf-bin \
libprotobuf-dev \
pkg-config \
git \
&& git clone -b planet_epoch_date https://github.com/OpenHistoricalMap/planet-dump-ng.git . \
&& ./autogen.sh \
&& ./configure \
&& make \
&& strip planet-dump-ng

FROM debian:bookworm-slim
ENV workdir /mnt/data
WORKDIR $workdir

# Installs osmosis v0.48.3, osmium-tool v1.15.0, and PostgreSQL client
RUN set -ex \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install \
-y --no-install-recommends \
"osmosis" \
"osmium-tool" \
# Cloud provider CLIs
"awscli" \
"gsutil" \
"azure-cli" \
# PostgreSQL client
"postgresql-client" \
# Other useful packages
"rsync" \
"pyosmium" \
"tmux" \
"zsh" \
&& rm -rf /var/lib/apt/lists/*
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
osmosis \
osmium-tool \
awscli \
gsutil \
azure-cli \
postgresql-client \
rsync \
pyosmium \
tmux \
zsh \
git \
libxml2 \
libboost-filesystem1.74.0 \
libboost-program-options1.74.0 \
libboost-thread1.74.0 \
libboost-iostreams1.74.0 \
libboost-date-time1.74.0 \
libprotobuf32 \
libprotobuf-lite32 \
libosmpbf1 \
&& rm -rf /var/lib/apt/lists/*

COPY --from=builder /opt/planet-dump-ng/planet-dump-ng /usr/local/bin/planet-dump-ng
2 changes: 1 addition & 1 deletion images/planet-dump/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM developmentseed/osmseed-osm-processor:0.1.0-n802.h0d9f574
FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.964.h5e9b11b

VOLUME /mnt/data
COPY ./start.sh /
Expand Down
Loading
Loading