fix(cleanup-backups): trash-based retention instead of mass rm

The old script grouped backups with cut -d'-' -f1-3, which mis-grouped
volume keys and permanently deleted ~360 archives on 2026-06-11. Replace
with safe retention: group by full volume key regex, keep 3 newest per key,
move excess to /backup/trash, and protect uploading-*, broken_*, and
deprecated-* prefixes.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-06-11 14:53:03 +02:00
parent 12e5ff8ee1
commit 83328a5ab5

View File

@@ -1,21 +1,260 @@
#!/bin/bash #!/bin/bash
# Retain the N newest backups per volume key and move excess to trash.
#
# Usage:
# ./cleanup-backups.sh [--dry-run]
#
# Environment:
# BACKUP_DIR=/backup Backup root directory
# TRASH_DIR=/backup/trash Where removed backups are staged
# KEEP_COUNT=3 Backups to keep per volume key
# UPLOADING_MAX_AGE_HOURS=72 Trash uploading-* only when older than this
# EXCLUDE_DIRS=graveyard:quarantained:trash Colon-separated subdirs to skip
# Directory containing the backup files set -euo pipefail
backup_dir="/backup"
# Get a list of all backup files BASEPATH="$(cd "$(dirname "$0")" && pwd)"
backup_files=$(find "$backup_dir" -type f -name 'rpc_*-*.tar.zst') BACKUP_DIR="${BACKUP_DIR:-/backup}"
TRASH_DIR="${TRASH_DIR:-$BACKUP_DIR/trash}"
KEEP_COUNT="${KEEP_COUNT:-3}"
UPLOADING_MAX_AGE_HOURS="${UPLOADING_MAX_AGE_HOURS:-72}"
EXCLUDE_DIRS="${EXCLUDE_DIRS:-graveyard:quarantained:trash}"
# Iterate through each backup file DRY_RUN=false
for file in $backup_files; do if [[ "${1:-}" == "--dry-run" ]]; then
# Extract volume name from the file name DRY_RUN=true
volume_name=$(basename "$file" | cut -d '-' -f 1-3) elif [[ -n "${1:-}" ]]; then
echo "Usage: $0 [--dry-run]" >&2
exit 1
fi
# Get the latest backup file for this volume name BACKUP_PATTERN='^(rpc_.+)-([0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{2})-([0-9]+)G\.tar\.zst$'
latest_backup=$(find "$backup_dir" -type f -name "$volume_name-*" -printf "%T@ %p\n" | sort -n | tail -1 | cut -d ' ' -f 2)
# Keep only the latest backup file for this volume name log() {
if [[ "$file" != "$latest_backup" ]]; then echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2
rm "$file" }
bytes_trashed=0
add_trashed_bytes() {
local file=$1
if [[ -f "$file" ]]; then
local size
size=$(stat -c%s "$file" 2>/dev/null || echo 0)
bytes_trashed=$((bytes_trashed + size))
fi fi
done }
trash_file() {
local file=$1
local reason=$2
if [[ ! -f "$file" ]]; then
return 0
fi
local dest="$TRASH_DIR/$(basename "$file")"
if [[ -e "$dest" ]]; then
log "Error: trash destination already exists, skipping: $dest"
return 1
fi
if $DRY_RUN; then
log "DRY-RUN would trash ($reason): $file -> $dest"
return 0
fi
mv -- "$file" "$dest"
log "Trashed ($reason): $file -> $dest"
}
is_excluded_path() {
local path=$1
local rel="${path#"$BACKUP_DIR"/}"
if [[ "$rel" == broken_* ]] || [[ "$rel" == deprecated-* ]]; then
return 0
fi
local dir
IFS=':' read -ra excluded_dirs <<< "$EXCLUDE_DIRS"
for dir in "${excluded_dirs[@]}"; do
if [[ -n "$dir" && ( "$rel" == "$dir" || "$rel" == "$dir"/* ) ]]; then
return 0
fi
done
return 1
}
parse_volume_key() {
local basename=$1
if [[ "$basename" =~ $BACKUP_PATTERN ]]; then
echo "${BASH_REMATCH[1]}"
return 0
fi
return 1
}
metadata_for_archive() {
local archive=$1
echo "${archive%.tar.zst}.txt"
}
file_has_open_handles() {
local file=$1
if command -v lsof >/dev/null 2>&1; then
if lsof -- "$file" >/dev/null 2>&1; then
return 0
fi
fi
if command -v fuser >/dev/null 2>&1; then
if fuser -- "$file" >/dev/null 2>&1; then
return 0
fi
fi
return 1
}
cleanup_stale_uploads() {
local upload_file
local age_seconds=$((UPLOADING_MAX_AGE_HOURS * 3600))
local now
now=$(date +%s)
while IFS= read -r -d '' upload_file; do
if is_excluded_path "$upload_file"; then
continue
fi
local mtime age
mtime=$(stat -c %Y "$upload_file")
age=$((now - mtime))
if (( age < age_seconds )); then
log "Keeping in-progress upload ($(($age / 3600))h old): $upload_file"
continue
fi
if file_has_open_handles "$upload_file"; then
log "Keeping upload with open handles ($(($age / 3600))h old): $upload_file"
continue
fi
add_trashed_bytes "$upload_file"
trash_file "$upload_file" "stale upload older than ${UPLOADING_MAX_AGE_HOURS}h"
done < <(find "$BACKUP_DIR" -maxdepth 1 -type f -name 'uploading-*' -print0 2>/dev/null)
}
cleanup_volume_backups() {
declare -A volume_files=()
local file basename volume_key
while IFS= read -r -d '' file; do
if is_excluded_path "$file"; then
continue
fi
basename=$(basename "$file")
if ! volume_key=$(parse_volume_key "$basename"); then
log "Skipping unrecognized backup filename: $file"
continue
fi
volume_files["$volume_key"]+="$file"$'\n'
done < <(find "$BACKUP_DIR" -maxdepth 1 -type f -name 'rpc_*-*.tar.zst' -print0 2>/dev/null)
local volume trashed=0 kept=0
for volume in "${!volume_files[@]}"; do
mapfile -t files < <(printf '%s' "${volume_files[$volume]}" | sed '/^$/d' | sort)
local total=${#files[@]}
if (( total <= KEEP_COUNT )); then
kept=$((kept + total))
continue
fi
local delete_count=$((total - KEEP_COUNT))
local i
for ((i = 0; i < delete_count; i++)); do
local archive="${files[$i]}"
local metadata
metadata=$(metadata_for_archive "$archive")
add_trashed_bytes "$archive"
trash_file "$archive" "keeping ${KEEP_COUNT}/${total} for $volume"
trashed=$((trashed + 1))
if [[ -f "$metadata" ]]; then
add_trashed_bytes "$metadata"
trash_file "$metadata" "orphaned metadata for $volume"
fi
done
kept=$((kept + KEEP_COUNT))
done
echo "$trashed $kept"
}
cleanup_orphan_metadata() {
local metadata archive basename
local orphans=0
while IFS= read -r -d '' metadata; do
if is_excluded_path "$metadata"; then
continue
fi
basename=$(basename "$metadata")
if [[ ! "$basename" =~ ^rpc_.+\.txt$ ]]; then
continue
fi
archive="${metadata%.txt}.tar.zst"
if [[ -f "$archive" ]]; then
continue
fi
if [[ -f "$TRASH_DIR/$(basename "$archive")" ]]; then
add_trashed_bytes "$metadata"
trash_file "$metadata" "metadata for archived-in-trash backup"
orphans=$((orphans + 1))
continue
fi
add_trashed_bytes "$metadata"
trash_file "$metadata" "orphaned metadata without archive"
orphans=$((orphans + 1))
done < <(find "$BACKUP_DIR" -maxdepth 1 -type f -name 'rpc_*.txt' -print0 2>/dev/null)
echo "$orphans"
}
main() {
if [[ ! -d "$BACKUP_DIR" ]]; then
log "Error: backup directory does not exist: $BACKUP_DIR"
exit 1
fi
if ! $DRY_RUN; then
mkdir -p "$TRASH_DIR"
fi
log "Starting backup cleanup in $BACKUP_DIR (trash_dir=$TRASH_DIR, keep_count=$KEEP_COUNT, uploading_max_age=${UPLOADING_MAX_AGE_HOURS}h, dry_run=$DRY_RUN)"
cleanup_stale_uploads
local trashed kept orphan_metadata
read -r trashed kept < <(cleanup_volume_backups)
read -r orphan_metadata < <(cleanup_orphan_metadata)
local trashed_human
trashed_human=$(numfmt --to=iec-i --suffix=B "$bytes_trashed" 2>/dev/null || echo "${bytes_trashed}B")
log "Cleanup complete: trashed=$trashed kept=$kept orphan_metadata=$orphan_metadata staged≈$trashed_human"
}
main "$@"