Skip to content

Use planet-dump-ng for planet dump generation #368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions compose/db-backup-restore.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
version: '3'
services:
#####################################################
## OSM Database backup and restore section
#####################################################
db-backup-restore:
image: osmseed-backup-restore:v1
image: rub21/osmseed-backup-restore:v1
build:
context: ../images/backup-restore
dockerfile: Dockerfile
Expand Down
30 changes: 16 additions & 14 deletions images/backup-restore/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,27 @@ cloudStorageOps() {
}

backupDB() {
local LOCAL_BACKUP_FILE=${BACKUP_CLOUD_FILE}.sql.gz
local CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}.sql.gz"
if [ "$SET_DATE_AT_NAME" == "true" ]; then
local CURRENT_DATE=$(date '+%Y%m%d-%H%M')
LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.sql.gz"
CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.sql.gz"
fi
local LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}.dump"
local LOCAL_BACKUP_FILE_GZIP="${BACKUP_CLOUD_FILE}.dump.gz"
local CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}.dump.gz"

# Backup database with max compression
echo "Backing up DB ${POSTGRES_DB} into ${LOCAL_BACKUP_FILE}"
pg_dump -h ${POSTGRES_HOST} -U ${POSTGRES_USER} ${POSTGRES_DB} | gzip -9 >${LOCAL_BACKUP_FILE}
if [ "$SET_DATE_AT_NAME" == "true" ]; then
local CURRENT_DATE
CURRENT_DATE=$(date '+%Y%m%d-%H%M')
LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump"
LOCAL_BACKUP_FILE_GZIP="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump.gz"
CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump.gz"
fi

# Handle cloud storage based on the provider
cloudStorageOps "${LOCAL_BACKUP_FILE}" "${CLOUD_BACKUP_FILE}"
# Backup database with pg_dump custom format (-Fc) + gzip
echo "Backing up DB ${POSTGRES_DB} into ${LOCAL_BACKUP_FILE_GZIP}"
pg_dump -h "${POSTGRES_HOST}" -U "${POSTGRES_USER}" -Fc "${POSTGRES_DB}" | gzip -9 > "${LOCAL_BACKUP_FILE}.gz"
cloudStorageOps "${LOCAL_BACKUP_FILE_GZIP}" "${CLOUD_BACKUP_FILE}"
}

restoreDB() {
local CURRENT_DATE=$(date '+%Y%m%d-%H%M')
local RESTORE_FILE="backup.sql.gz"
local RESTORE_FILE="backup.dump"
local LOG_RESULT_FILE="restore_results-${CURRENT_DATE}.log"
local flag=true

Expand All @@ -62,7 +64,7 @@ restoreDB() {
flag=false
wget -O ${RESTORE_FILE} ${RESTORE_URL_FILE}
echo "Restoring ${RESTORE_URL_FILE} in ${POSTGRES_DB}"
gunzip -c <${RESTORE_FILE} | psql -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -d ${POSTGRES_DB} | tee ${LOG_RESULT_FILE}
pg_restore -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -d ${POSTGRES_DB} --create --no-owner ${RESTORE_FILE} | tee ${LOG_RESULT_FILE}
# aws s3 cp ${LOG_RESULT_FILE} s3://${AWS_S3_BUCKET}/${LOG_RESULT_FILE}
echo "Import data to ${POSTGRES_DB} has finished ..."
done
Expand Down
4 changes: 2 additions & 2 deletions images/full-history/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FROM developmentseed/osmseed-osm-processor:0.1.0-n802.h0d9f574
FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.964.h5e9b11b

VOLUME /mnt/data
COPY ./start.sh /
CMD /start.sh
CMD /start.sh
180 changes: 114 additions & 66 deletions images/full-history/start.sh
Original file line number Diff line number Diff line change
@@ -1,82 +1,130 @@
#!/usr/bin/env bash
set -e
export VOLUME_DIR=/mnt/data

# osmosis tuning: https://wiki.openstreetmap.org/wiki/Osmosis/Tuning,https://lists.openstreetmap.org/pipermail/talk/2012-October/064771.html
if [ -z "$MEMORY_JAVACMD_OPTIONS" ]; then
echo JAVACMD_OPTIONS=\"-server\" >~/.osmosis
echo JAVACMD_OPTIONS="-server" >~/.osmosis
else
memory="${MEMORY_JAVACMD_OPTIONS//i/}"
echo JAVACMD_OPTIONS=\"-server -Xmx$memory\" >~/.osmosis
echo JAVACMD_OPTIONS="-server -Xmx$memory" >~/.osmosis
fi

# Fixing name for historical file
export VOLUME_DIR=/mnt/data
export PLANET_EPOCH_DATE="${PLANET_EPOCH_DATE:-2004-01-01}"
date=$(date '+%y%m%d_%H%M')
local_fullHistoryFile=$VOLUME_DIR/history-${date}.osh.pbf
cloud_fullHistoryFile=planet/full-history/history-${date}.osh.pbf

# In case overwrite the file
if [ "$OVERWRITE_FHISTORY_FILE" == "true" ]; then
local_fullHistoryFile=$VOLUME_DIR/history-latest.osh.pbf
cloud_fullHistoryFile=planet/full-history/history-latest.osh.pbf
fi

# State file nname
local_planetHistoryPBFFile=$VOLUME_DIR/planet-history-${date}.osm.pbf
cloud_planetHistoryPBFFile=planet/full-history/planet-history-${date}.osm.pbf
stateFile="$VOLUME_DIR/state.txt"
osm_tmp_file="osm_tmp.osm"

# Creating full history
osmosis --read-apidb-change \
host=$POSTGRES_HOST \
database=$POSTGRES_DB \
user=$POSTGRES_USER \
password=$POSTGRES_PASSWORD \
validateSchemaVersion=no \
readFullHistory=yes \
--write-xml-change \
compressionMethod=auto \
$osm_tmp_file

# Convert file to PBF file
osmium cat $osm_tmp_file -o $local_fullHistoryFile
osmium fileinfo $local_fullHistoryFile

# Remove full-hitory osm file, keep only history-latest.osh.pbf files
rm $osm_tmp_file

# AWS
if [ $CLOUDPROVIDER == "aws" ]; then
AWS_URL=${AWS_S3_BUCKET/s3:\/\//http:\/\/}
echo "$AWS_URL.s3.amazonaws.com/$cloud_fullHistoryFile" >$stateFile
# Upload history-planet.osm.pbf
aws s3 cp $local_fullHistoryFile $AWS_S3_BUCKET/$cloud_fullHistoryFile --acl public-read
# Upload state.txt
aws s3 cp $stateFile $AWS_S3_BUCKET/planet/full-history/state.txt --acl public-read
fi
dumpFile="$VOLUME_DIR/input-latest.dump"


# Google Storage
if [ $CLOUDPROVIDER == "gcp" ]; then
echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_fullHistoryFile" >$stateFile
# Upload history-planet.osm.pbf
gsutil cp -a public-read $local_fullHistoryFile $GCP_STORAGE_BUCKET/$cloud_fullHistoryFile
# Upload state.txt
gsutil cp -a public-read $stateFile $GCP_STORAGE_BUCKET/planet/full-history/state.txt
# If overwrite flag is enabled, use fixed filenames
if [ "$OVERWRITE_PLANET_FILE" == "true" ]; then
local_planetHistoryPBFFile=$VOLUME_DIR/planet-history-latest.osm.pbf
cloud_planetHistoryPBFFile=planet/planet-history-latest.osm.pbf
fi

# Azure
if [ $CLOUDPROVIDER == "azure" ]; then
# Save the path file
echo "https://$AZURE_STORAGE_ACCOUNT.blob.core.windows.net/$AZURE_CONTAINER_NAME/$cloud_fullHistoryFile" >$stateFile
# Upload history-planet.osm.pbf
az storage blob upload \
--container-name $AZURE_CONTAINER_NAME \
--file $local_fullHistoryFile \
--name $cloud_fullHistoryFile \
--output table
# Upload state.txt
az storage blob upload \
--container-name $AZURE_CONTAINER_NAME \
--file $stateFile \
--name planet/full-history/state.txt \
--output table

# ===============================
# Download db .dump file
# ===============================
download_dump_file() {
echo "Downloading db .dump file from cloud..."

if [ "$CLOUDPROVIDER" == "aws" ]; then
if [[ "$DUMP_CLOUD_URL" == *.txt ]]; then
temp_txt="$VOLUME_DIR/tmp_dump_url.txt"
aws s3 cp "$DUMP_CLOUD_URL" "$temp_txt"

# Get the first line (S3 URL to the .dump or .dump.gz file)
first_line=$(head -n 1 "$temp_txt")
echo "Found dump URL in txt: $first_line"

# Set dump file name based on extension
if [[ "$first_line" == *.gz ]]; then
dumpFile="${dumpFile}.gz"
fi

aws s3 cp "$first_line" "$dumpFile"
if [[ "$dumpFile" == *.gz ]]; then
echo "Decompressing gzip file..."
gunzip -f "$dumpFile"
dumpFile="${dumpFile%.gz}"
fi
rm -f "$temp_txt"

else
# Set dump file name based on extension
if [[ "$DUMP_CLOUD_URL" == *.gz ]]; then
dumpFile="${dumpFile}.gz"
fi
aws s3 cp "$DUMP_CLOUD_URL" "$dumpFile"
if [[ "$dumpFile" == *.gz ]]; then
echo "Decompressing gzip file..."
gunzip -f "$dumpFile"
dumpFile="${dumpFile%.gz}"
fi
fi

elif [ "$CLOUDPROVIDER" == "gcp" ]; then
gsutil cp "$DUMP_CLOUD_URL" "$dumpFile"
else
echo "Unsupported CLOUDPROVIDER: $CLOUDPROVIDER"
exit 1
fi

echo "Dump file ready at: $dumpFile"
}

# ===============================
# Upload planet + state
# ===============================
upload_planet_file() {
echo "Uploading history planet file and updating state.txt..."

if [ "$CLOUDPROVIDER" == "aws" ]; then
AWS_URL=${AWS_S3_BUCKET/s3:\/\//http:\/\/}
echo "$AWS_URL.s3.amazonaws.com/$cloud_planetHistoryPBFFile" > "$stateFile"
aws s3 cp "$local_planetHistoryPBFFile" "$AWS_S3_BUCKET/$cloud_planetHistoryPBFFile" --acl public-read
aws s3 cp "$stateFile" "$AWS_S3_BUCKET/planet/state.txt" --acl public-read

elif [ "$CLOUDPROVIDER" == "gcp" ]; then
echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_planetHistoryPBFFile" > "$stateFile"
gsutil cp -a public-read "$local_planetHistoryPBFFile" "$GCP_STORAGE_BUCKET/$cloud_planetHistoryPBFFile"
gsutil cp -a public-read "$stateFile" "$GCP_STORAGE_BUCKET/planet/state.txt"
fi
}

# ===============================
# Generate planet file
# ===============================

if [ "$PLANET_EXPORT_METHOD" == "planet-dump-ng" ]; then
download_dump_file
echo "Generating history planet file with planet-dump-ng..."
export PLANET_EPOCH_DATE="$PLANET_EPOCH_DATE"
planet-dump-ng \
--dump-file "$dumpFile" \
--history-pbf "$local_planetHistoryPBFFile"

elif [ "$PLANET_EXPORT_METHOD" == "osmosis" ]; then
echo "Generating history planet file with osmosis..."
# Creating full history
osmosis --read-apidb-change \
host=$POSTGRES_HOST \
database=$POSTGRES_DB \
user=$POSTGRES_USER \
password=$POSTGRES_PASSWORD \
validateSchemaVersion=no \
readFullHistory=yes \
--write-xml-change \
compressionMethod=auto \
$local_planetHistoryPBFFile
else
echo "Error: Unknown PLANET_EXPORT_METHOD value. Use 'planet-dump-ng' or 'osmosis'."
exit 1
fi

# Upload results
upload_planet_file
70 changes: 53 additions & 17 deletions images/osm-processor/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,59 @@
# Stage 1: builder
FROM debian:bookworm-slim AS builder
WORKDIR /opt/planet-dump-ng

RUN set -ex \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
build-essential \
automake \
autoconf \
libxml2-dev \
libboost-dev \
libboost-program-options-dev \
libboost-date-time-dev \
libboost-filesystem-dev \
libboost-thread-dev \
libboost-iostreams-dev \
libosmpbf-dev \
osmpbf-bin \
libprotobuf-dev \
pkg-config \
git \
&& git clone -b planet_epoch_date https://github.com/OpenHistoricalMap/planet-dump-ng.git . \
&& ./autogen.sh \
&& ./configure \
&& make \
&& strip planet-dump-ng

FROM debian:bookworm-slim
ENV workdir /mnt/data
WORKDIR $workdir

# Installs osmosis v0.48.3, osmium-tool v1.15.0, and PostgreSQL client
RUN set -ex \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install \
-y --no-install-recommends \
"osmosis" \
"osmium-tool" \
# Cloud provider CLIs
"awscli" \
"gsutil" \
"azure-cli" \
# PostgreSQL client
"postgresql-client" \
# Other useful packages
"rsync" \
"pyosmium" \
"tmux" \
"zsh" \
&& rm -rf /var/lib/apt/lists/*
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
osmosis \
osmium-tool \
awscli \
gsutil \
azure-cli \
postgresql-client \
rsync \
pyosmium \
tmux \
zsh \
git \
libxml2 \
libboost-filesystem1.74.0 \
libboost-program-options1.74.0 \
libboost-thread1.74.0 \
libboost-iostreams1.74.0 \
libboost-date-time1.74.0 \
libprotobuf32 \
libprotobuf-lite32 \
libosmpbf1 \
&& rm -rf /var/lib/apt/lists/*

COPY --from=builder /opt/planet-dump-ng/planet-dump-ng /usr/local/bin/planet-dump-ng
2 changes: 1 addition & 1 deletion images/planet-dump/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM developmentseed/osmseed-osm-processor:0.1.0-n802.h0d9f574
FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.964.h5e9b11b

VOLUME /mnt/data
COPY ./start.sh /
Expand Down
Loading
Loading