#!/bin/bash # Retain the N newest backups per volume key and move excess to trash. # # Usage: # ./cleanup-backups.sh [--dry-run] # # Environment: # BACKUP_DIR=/backup Backup root directory # TRASH_DIR=/backup/trash Where removed backups are staged # KEEP_COUNT=3 Backups to keep per volume key # UPLOADING_MAX_AGE_HOURS=72 Trash uploading-* only when older than this # EXCLUDE_DIRS=graveyard:quarantained:trash Colon-separated subdirs to skip set -euo pipefail BASEPATH="$(cd "$(dirname "$0")" && pwd)" BACKUP_DIR="${BACKUP_DIR:-/backup}" TRASH_DIR="${TRASH_DIR:-$BACKUP_DIR/trash}" KEEP_COUNT="${KEEP_COUNT:-3}" UPLOADING_MAX_AGE_HOURS="${UPLOADING_MAX_AGE_HOURS:-72}" EXCLUDE_DIRS="${EXCLUDE_DIRS:-graveyard:quarantained:trash}" DRY_RUN=false if [[ "${1:-}" == "--dry-run" ]]; then DRY_RUN=true elif [[ -n "${1:-}" ]]; then echo "Usage: $0 [--dry-run]" >&2 exit 1 fi BACKUP_PATTERN='^(rpc_.+)-([0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{2})-([0-9]+)G\.tar\.zst$' log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2 } bytes_trashed=0 add_trashed_bytes() { local file=$1 if [[ -f "$file" ]]; then local size size=$(stat -c%s "$file" 2>/dev/null || echo 0) bytes_trashed=$((bytes_trashed + size)) fi } trash_file() { local file=$1 local reason=$2 if [[ ! -f "$file" ]]; then return 0 fi local dest="$TRASH_DIR/$(basename "$file")" if [[ -e "$dest" ]]; then log "Error: trash destination already exists, skipping: $dest" return 1 fi if $DRY_RUN; then log "DRY-RUN would trash ($reason): $file -> $dest" return 0 fi mv -- "$file" "$dest" log "Trashed ($reason): $file -> $dest" } is_excluded_path() { local path=$1 local rel="${path#"$BACKUP_DIR"/}" if [[ "$rel" == broken_* ]] || [[ "$rel" == deprecated-* ]]; then return 0 fi local dir IFS=':' read -ra excluded_dirs <<< "$EXCLUDE_DIRS" for dir in "${excluded_dirs[@]}"; do if [[ -n "$dir" && ( "$rel" == "$dir" || "$rel" == "$dir"/* ) ]]; then return 0 fi done return 1 } parse_volume_key() { local basename=$1 if [[ "$basename" =~ $BACKUP_PATTERN ]]; then echo "${BASH_REMATCH[1]}" return 0 fi return 1 } metadata_for_archive() { local archive=$1 echo "${archive%.tar.zst}.txt" } file_has_open_handles() { local file=$1 if command -v lsof >/dev/null 2>&1; then if lsof -- "$file" >/dev/null 2>&1; then return 0 fi fi if command -v fuser >/dev/null 2>&1; then if fuser -- "$file" >/dev/null 2>&1; then return 0 fi fi return 1 } cleanup_stale_uploads() { local upload_file local age_seconds=$((UPLOADING_MAX_AGE_HOURS * 3600)) local now now=$(date +%s) while IFS= read -r -d '' upload_file; do if is_excluded_path "$upload_file"; then continue fi local mtime age mtime=$(stat -c %Y "$upload_file") age=$((now - mtime)) if (( age < age_seconds )); then log "Keeping in-progress upload ($(($age / 3600))h old): $upload_file" continue fi if file_has_open_handles "$upload_file"; then log "Keeping upload with open handles ($(($age / 3600))h old): $upload_file" continue fi add_trashed_bytes "$upload_file" trash_file "$upload_file" "stale upload older than ${UPLOADING_MAX_AGE_HOURS}h" done < <(find "$BACKUP_DIR" -maxdepth 1 -type f -name 'uploading-*' -print0 2>/dev/null) } cleanup_volume_backups() { declare -A volume_files=() local file basename volume_key while IFS= read -r -d '' file; do if is_excluded_path "$file"; then continue fi basename=$(basename "$file") if ! volume_key=$(parse_volume_key "$basename"); then log "Skipping unrecognized backup filename: $file" continue fi volume_files["$volume_key"]+="$file"$'\n' done < <(find "$BACKUP_DIR" -maxdepth 1 -type f -name 'rpc_*-*.tar.zst' -print0 2>/dev/null) local volume trashed=0 kept=0 for volume in "${!volume_files[@]}"; do mapfile -t files < <(printf '%s' "${volume_files[$volume]}" | sed '/^$/d' | sort) local total=${#files[@]} if (( total <= KEEP_COUNT )); then kept=$((kept + total)) continue fi local delete_count=$((total - KEEP_COUNT)) local i for ((i = 0; i < delete_count; i++)); do local archive="${files[$i]}" local metadata metadata=$(metadata_for_archive "$archive") add_trashed_bytes "$archive" trash_file "$archive" "keeping ${KEEP_COUNT}/${total} for $volume" trashed=$((trashed + 1)) if [[ -f "$metadata" ]]; then add_trashed_bytes "$metadata" trash_file "$metadata" "orphaned metadata for $volume" fi done kept=$((kept + KEEP_COUNT)) done echo "$trashed $kept" } cleanup_orphan_metadata() { local metadata archive basename local orphans=0 while IFS= read -r -d '' metadata; do if is_excluded_path "$metadata"; then continue fi basename=$(basename "$metadata") if [[ ! "$basename" =~ ^rpc_.+\.txt$ ]]; then continue fi archive="${metadata%.txt}.tar.zst" if [[ -f "$archive" ]]; then continue fi if [[ -f "$TRASH_DIR/$(basename "$archive")" ]]; then add_trashed_bytes "$metadata" trash_file "$metadata" "metadata for archived-in-trash backup" orphans=$((orphans + 1)) continue fi add_trashed_bytes "$metadata" trash_file "$metadata" "orphaned metadata without archive" orphans=$((orphans + 1)) done < <(find "$BACKUP_DIR" -maxdepth 1 -type f -name 'rpc_*.txt' -print0 2>/dev/null) echo "$orphans" } main() { if [[ ! -d "$BACKUP_DIR" ]]; then log "Error: backup directory does not exist: $BACKUP_DIR" exit 1 fi if ! $DRY_RUN; then mkdir -p "$TRASH_DIR" fi log "Starting backup cleanup in $BACKUP_DIR (trash_dir=$TRASH_DIR, keep_count=$KEEP_COUNT, uploading_max_age=${UPLOADING_MAX_AGE_HOURS}h, dry_run=$DRY_RUN)" cleanup_stale_uploads local trashed kept orphan_metadata read -r trashed kept < <(cleanup_volume_backups) read -r orphan_metadata < <(cleanup_orphan_metadata) local trashed_human trashed_human=$(numfmt --to=iec-i --suffix=B "$bytes_trashed" 2>/dev/null || echo "${bytes_trashed}B") log "Cleanup complete: trashed=$trashed kept=$kept orphan_metadata=$orphan_metadata staged≈$trashed_human" } main "$@"