restore/cleanup: implement static-file -> /slowdisk offload + free it on removal

restore-volumes.sh: pre-create static-file symlinks from the backup's .txt manifest so
the immutable ancient/freezer dirs land on /slowdisk (SSD) and extract THROUGH the
symlinks via tar --keep-directory-symlink (was --dereference, which clobbered them);
hot state stays on the primary disk. Cleans stale /slowdisk targets first (no leak on
re-restore). Safe fallbacks: no /slowdisk / no manifest / no static paths -> normal
extract. Reth excluded (reth dropped whole-dir static-file symlinks).

volume-utils.sh: add delete_slowdisk_targets_for_key() — follows a volume's symlinks and
sweeps the rpc_<key>__data_ pattern under /slowdisk (matches delete-volumes.sh).

cleanup-volumes.sh: free the /slowdisk static data before docker volume rm (was leaking),
and fix the fragile substring used/unused match to an exact name match.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-15 03:05:04 +00:00
parent f39e09dac0
commit 345538954d
3 changed files with 110 additions and 52 deletions

View File

@@ -1,3 +1,8 @@
#!/bin/bash
# List (default) or, with --remove-from-disk, delete the docker volumes that are NOT
# referenced by the current COMPOSE_FILE. Removal also frees the static-file data that was
# offloaded to /slowdisk behind the volume's symlinks (delete_slowdisk_targets_for_key),
# so nothing leaks on /slowdisk.
BASEPATH="$(dirname "$0")" BASEPATH="$(dirname "$0")"
source $BASEPATH/.env source $BASEPATH/.env
source $BASEPATH/volume-utils.sh source $BASEPATH/volume-utils.sh
@@ -9,7 +14,7 @@ used_volumes=()
for part in "${parts[@]}"; do for part in "${parts[@]}"; do
volumes=$(get_volume_keys "$BASEPATH/$part") volumes=$(get_volume_keys "$BASEPATH/$part")
# Convert volumes to an array # Prefix each compose volume key with rpc_ to match docker's volume names.
prefix="rpc_" prefix="rpc_"
IFS=$'\n' read -r -d '' -a volumes_array <<< "$(printf "%s\n" "${volumes[@]}" | sed "/^$/! s/^/$prefix/")" IFS=$'\n' read -r -d '' -a volumes_array <<< "$(printf "%s\n" "${volumes[@]}" | sed "/^$/! s/^/$prefix/")"
@@ -18,21 +23,29 @@ done
on_disk=($(docker volume ls --format '{{.Name}}' | grep '^rpc_')) on_disk=($(docker volume ls --format '{{.Name}}' | grep '^rpc_'))
unused_volumes=() # A volume counts as "used" only on an EXACT name match. The previous substring test
# ([[ "${used_volumes[@]}" =~ "$element" ]]) could mis-classify a volume whose name is a
# substring of another (e.g. ...-pruned vs ...-pruned-trace) and wrongly purge a live one.
is_used() {
local v=$1 u
for u in "${used_volumes[@]}"; do
[[ "$u" == "$v" ]] && return 0
done
return 1
}
unused_volumes=()
for element in "${on_disk[@]}"; do for element in "${on_disk[@]}"; do
# Check if the element exists in array2 is_used "$element" || unused_volumes+=("$element")
if [[ ! "${used_volumes[@]}" =~ "$element" ]]; then
# If not, add it to the difference array
unused_volumes+=("$element")
fi
done done
if [ "$1" = "--remove-from-disk" ]; then if [ "$1" = "--remove-from-disk" ]; then
# Iterate over volumes in the difference array and remove them from disk # Remove each unused volume AND the /slowdisk static data behind its symlinks.
for volume in "${unused_volumes[@]}"; do for volume in "${unused_volumes[@]}"; do
docker volume rm "$volume" echo "removing unused volume: $volume"
done delete_slowdisk_targets_for_key "${volume#rpc_}"
docker volume rm "$volume"
done
else else
printf '%s\n' "${unused_volumes[@]}" printf '%s\n' "${unused_volumes[@]}"
fi fi

View File

@@ -19,74 +19,92 @@ if [ ! -d "$volume_dir" ]; then
exit 1 exit 1
fi fi
# Pre-create static-file -> /slowdisk symlinks from a backup's ".txt" manifest, so the
# immutable "ancient"/freezer dirs land on the (SSD) /slowdisk during extraction while the
# hot/dynamic state stays on the primary disk. tar then extracts THROUGH the symlinks via
# --keep-directory-symlink (it keeps the dir-symlinks instead of clobbering them).
# Target naming matches delete-volumes.sh / delete_slowdisk_targets_for_key cleanup.
# Safe fallbacks (just extract normally): no /slowdisk, no manifest, or no static paths.
prep_static_offload() {
local key=$1 meta=$2 data_dir=$3 rel target
[ -d /slowdisk ] || { echo " /slowdisk absent — no static offload"; return 0; }
[ -f "$meta" ] || { echo " no manifest ($meta) — no static offload"; return 0; }
# manifest data lines (after the 3-line header) are "<size> <relpath>"
while IFS= read -r rel; do
[ -z "$rel" ] && continue
rel="${rel#/}"
case "$rel" in *..*) echo " skip unsafe static path '$rel'"; continue;; esac
target="/slowdisk/rpc_${key}__data_${rel//\//_}"
echo " offload static '$rel' -> $target"
mkdir -p "$target" "$data_dir/$(dirname "$rel")" || { echo " WARN: mkdir failed for '$rel', skipping"; continue; }
ln -sfn "$target" "$data_dir/$rel"
done < <(awk 'NR>3 && NF>=2 {print $NF}' "$meta")
}
# Read the JSON input and extract the list of keys # Read the JSON input and extract the list of keys
keys=$(get_persistent_volume_keys "$dir/$1.yml" | grep -E '^[0-9a-z]') keys=$(get_persistent_volume_keys "$dir/$1.yml" | grep -E '^[0-9a-z]')
restore_files=()
cleanup_folders=()
echo "$keys" echo "$keys"
while IFS= read -r key; do while IFS= read -r key; do
volume_name="rpc_$key" [ -z "$key" ] && continue
data_dir="$volume_dir/rpc_$key/_data"
declare newest_file declare newest_file
if [[ -n "$remote_source" ]]; then if [[ -n "$remote_source" ]]; then
volume_name="rpc_$key-20" # needs to be followed by a date 2024 newest_file=$($dir/list-backups.sh "$remote_source" | grep "rpc_$key-20" | sort | tail -n 1)
newest_file=$($dir/list-backups.sh "$remote_source" | grep "${volume_name}" | sort | tail -n 1)
else else
newest_file=$(ls -1 "$backup_dir"/"${volume_name}"-[0-9]*G.tar.zst 2>/dev/null | sort | tail -n 1) newest_file=$(ls -1 "$backup_dir"/"rpc_$key"-[0-9]*G.tar.zst 2>/dev/null | sort | tail -n 1)
fi fi
directory="$volume_dir/rpc_$key/_data/"
if [ -z "$newest_file" ]; then if [ -z "$newest_file" ]; then
echo "Error: No backup found for volume '$volume_name'" echo "Error: No backup found for volume 'rpc_$key'"
exit 1 exit 1
else
restore_files+=("$newest_file")
cleanup_folders+=("$directory")
fi fi
done <<< "$keys" meta_file="${newest_file%.tar.zst}.txt"
echo "=== restoring rpc_$key <- $newest_file ==="
echo "${cleanup_folders[@]}" # 1) wipe live data AND any /slowdisk static targets for this key (no leak on re-restore)
delete_slowdisk_targets_for_key "$key"
[ -d "$data_dir" ] && rm -rf "$data_dir"/*
mkdir -p "$data_dir"
for folder in "${cleanup_folders[@]}"; do # 2) obtain the manifest (fetch the sidecar .txt for remote restores) and, unless this is
echo "delete '$folder'" # a reth node (reth dropped whole-dir static-file symlinks), pre-create the offload.
[ -d "$folder" ] && rm -rf "$folder"/* local_meta="$meta_file"
done if [[ -n "$remote_source" ]]; then
local_meta="$backup_dir/$(basename "$meta_file")"
echo "done cleanup" [ -d "$backup_dir" ] || local_meta="/tmp/$(basename "$meta_file")"
if [ ! -f "$local_meta" ]; then
for file in "${restore_files[@]}"; do curl --ipv4 -fsS "${remote_source}${meta_file}" -o "$local_meta" 2>/dev/null || local_meta=""
echo "Processing: $file" fi
fi
if [[ "$1" == *reth* ]]; then
echo " reth node: static-file symlink offload disabled (reth broke whole-dir symlinks)"
elif [ -n "$local_meta" ]; then
prep_static_offload "$key" "$local_meta" "$data_dir"
fi
# 3) extract THROUGH the pre-created symlinks (keep them, don't clobber)
if [[ -n "$remote_source" ]]; then if [[ -n "$remote_source" ]]; then
if [ ! -d "$backup_dir" ]; then if [ ! -d "$backup_dir" ]; then
echo "Error: /backup directory does not exist. download from http and extract directly to /var/lib/docker" echo "No /backup cache: streaming + extracting $newest_file directly"
curl --ipv4 -# "${remote_source}${newest_file}" | zstd -d | tar -xf - --keep-directory-symlink -C /
curl --ipv4 -# "${remote_source}${file}" | zstd -d | tar -xvf - --dereference -C /
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Error processing $file" echo "Error processing $newest_file"
exit 1 exit 1
else
echo "$file successfully processed."
fi fi
else else
echo "have backup dir to cache... $file" if [ ! -e "$backup_dir/$(basename "$newest_file")" ]; then
if [ ! -e "$backup_dir/$(basename $file)" ]; then aria2c -c -Z -x8 -j8 -s8 -d "$backup_dir" "${remote_source}${newest_file}"
aria2c -c -Z -x8 -j8 -s8 -d "$backup_dir" "${remote_source}${file}"
fi fi
tar -I zstd -xf "$backup_dir/$(basename $file)" --dereference -C / tar -I zstd -xf "$backup_dir/$(basename "$newest_file")" --keep-directory-symlink -C /
echo "Backup '$file' processed"
fi fi
else else
tar -I zstd -xf "$file" --dereference -C / tar -I zstd -xf "$newest_file" --keep-directory-symlink -C /
echo "Backup '$file' restored"
fi fi
done echo "Backup '$newest_file' restored"
done <<< "$keys"
"$dir/delete-node-keys.sh" "$1" "$dir/delete-node-keys.sh" "$1"

View File

@@ -71,3 +71,30 @@ is_local_backup_url() {
return 1 return 1
} }
# Delete the /slowdisk static-file offload targets for a single volume key, so removing a
# volume also frees the static data behind the symlinks (otherwise it leaks on /slowdisk).
# Safe: scoped to the key and to /slowdisk, and only removes dirs matching the
# rpc_<key>__data_ naming this repo creates (the same pattern delete-volumes.sh deletes).
delete_slowdisk_targets_for_key() {
local key=$1
[[ -z "$key" ]] && return 0
[[ -d /slowdisk ]] || return 0
local data_dir="/var/lib/docker/volumes/rpc_${key}/_data"
# 1) follow live symlinks in the volume and delete their /slowdisk targets
if [[ -d "$data_dir" ]]; then
while IFS= read -r -d '' l; do
local t; t=$(readlink -f "$l" 2>/dev/null)
if [[ -n "$t" && -d "$t" && "$t" == /slowdisk/rpc_${key}__data_* ]]; then
echo " rm slowdisk static target $t"; rm -rf "$t"
fi
done < <(find "$data_dir" -type l -print0 2>/dev/null)
fi
# 2) sweep any orphaned targets for this key (re-restore / dangling-symlink safety)
local t
shopt -s nullglob
for t in /slowdisk/rpc_${key}__data_*; do
[[ -d "$t" ]] && { echo " rm orphan slowdisk static target $t"; rm -rf "$t"; }
done
shopt -u nullglob
}