Skip to content

Commit

Permalink
Increase performances and reliability of compact script
Browse files Browse the repository at this point in the history
  • Loading branch information
srosset81 committed Apr 17, 2024
1 parent 454d234 commit 9520203
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 14 deletions.
6 changes: 3 additions & 3 deletions src/jena/fuseki-docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ ENV JAVA_MX_RAM="4G"
ENV FUSEKI_SHA512 2b92f3304743da335f648c1be7b5d7c3a94725ed0a9b5123362c89a50986690114dcef0813e328126b14240f321f740b608cc353417e485c9235476f059bd380
ENV FUSEKI_VERSION 3.17.0
# Tip: No need for https as we've coded the sha512 above
ENV ASF_MIRROR_EU http://www.eu.apache.org/dist/
ENV ASF_MIRROR_EU https://www.eu.apache.org/dist/
ENV ASF_MIRROR_US https://downloads.apache.org/
ENV ASF_ARCHIVE http://archive.apache.org/dist/
ENV ASF_ARCHIVE https://archive.apache.org/dist/
#

ENV JENA_SHA512 321c763fa3b3532fa06bb146363722e58e10289194f622f2e29117b610521e62e7ea51b9d06cd366570ed143f2ebbeded22e5302d2375b49da253b7ddef86d34
Expand All @@ -47,7 +47,7 @@ WORKDIR /tmp
# sha512 checksum
RUN echo "$FUSEKI_SHA512 fuseki.tar.gz" > fuseki.tar.gz.sha512
# Download/check/unpack/move in one go (to reduce image size)
RUN curl -sS --fail $ASF_ARCHIVE/jena/binaries/apache-jena-fuseki-$FUSEKI_VERSION.tar.gz > fuseki.tar.gz && \
RUN curl -sSL --fail $ASF_ARCHIVE/jena/binaries/apache-jena-fuseki-$FUSEKI_VERSION.tar.gz > fuseki.tar.gz && \
sha512sum -c fuseki.tar.gz.sha512 && \
tar zxf fuseki.tar.gz && \
mv apache-jena-fuseki* $FUSEKI_HOME && \
Expand Down
37 changes: 26 additions & 11 deletions src/jena/fuseki-docker/docker-compact-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
#!/bin/bash
set -e

cd /fuseki/databases
declare -a operations=("compact" "deleteOld")
declare -a graphs=("" "Acl" "Mirror")

# Go through all directories in the /fuseki/databases
for dir in */; do
echo "Compacting /fuseki/databases/${dir::-1}..."
for operation in "${operations[@]}"; do
for graph in "${graphs[@]}"; do
# Go through all files in the /fuseki/configuration
for filepath in /fuseki/configuration/*.ttl; do
filename=$(basename -- "$filepath")

/jena-fuseki/bin/tdb2.tdbcompact --loc=/fuseki/databases/${dir::-1}
# Remove .ttl extension
dataset="${filename%.*}"

# Wait 5 seconds to ensure the compacting is finished (this is usually done in less than 2 seconds)
sleep 5
dir="/fuseki/databases/${dataset}${graph}"

# Remove the old Data directory
cd /fuseki/databases/${dir::-1}
find . -iname 'Data*' ! -wholename $(find . -iname 'Data*' -type d | sort -n | tail -n 1) -type d -exec rm -rf {} +
done
if [ -d "$dir" ]; then
if [ "$operation" == "compact" ]; then
echo "Compacting ${dir}..."
# TODO use --deleteOld command available in higher Fuseki versions
/jena-fuseki/bin/tdb2.tdbcompact --loc=${dir}
else
echo "Deleting old directories from ${dir}..."
cd "${dir}"
find . -iname 'Data*' ! -wholename $(find . -iname 'Data*' -type d | sort -n | tail -n 1) -type d -exec rm -rf {} +
fi
else
echo "Directory ${dir} does not exist, skipping..."
fi
done
done
done

0 comments on commit 9520203

Please sign in to comment.