fix(cleanup-backups): trash-based retention instead of mass rm
The old script grouped backups with cut -d'-' -f1-3, which mis-grouped volume keys and permanently deleted ~360 archives on 2026-06-11. Replace with safe retention: group by full volume key regex, keep 3 newest per key, move excess to /backup/trash, and protect uploading-*, broken_*, and deprecated-* prefixes. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1,21 +1,260 @@
|
||||
#!/bin/bash
|
||||
# Retain the N newest backups per volume key and move excess to trash.
|
||||
#
|
||||
# Usage:
|
||||
# ./cleanup-backups.sh [--dry-run]
|
||||
#
|
||||
# Environment:
|
||||
# BACKUP_DIR=/backup Backup root directory
|
||||
# TRASH_DIR=/backup/trash Where removed backups are staged
|
||||
# KEEP_COUNT=3 Backups to keep per volume key
|
||||
# UPLOADING_MAX_AGE_HOURS=72 Trash uploading-* only when older than this
|
||||
# EXCLUDE_DIRS=graveyard:quarantained:trash Colon-separated subdirs to skip
|
||||
|
||||
# Directory containing the backup files
|
||||
backup_dir="/backup"
|
||||
set -euo pipefail
|
||||
|
||||
# Get a list of all backup files
|
||||
backup_files=$(find "$backup_dir" -type f -name 'rpc_*-*.tar.zst')
|
||||
BASEPATH="$(cd "$(dirname "$0")" && pwd)"
|
||||
BACKUP_DIR="${BACKUP_DIR:-/backup}"
|
||||
TRASH_DIR="${TRASH_DIR:-$BACKUP_DIR/trash}"
|
||||
KEEP_COUNT="${KEEP_COUNT:-3}"
|
||||
UPLOADING_MAX_AGE_HOURS="${UPLOADING_MAX_AGE_HOURS:-72}"
|
||||
EXCLUDE_DIRS="${EXCLUDE_DIRS:-graveyard:quarantained:trash}"
|
||||
|
||||
# Iterate through each backup file
|
||||
for file in $backup_files; do
|
||||
# Extract volume name from the file name
|
||||
volume_name=$(basename "$file" | cut -d '-' -f 1-3)
|
||||
DRY_RUN=false
|
||||
if [[ "${1:-}" == "--dry-run" ]]; then
|
||||
DRY_RUN=true
|
||||
elif [[ -n "${1:-}" ]]; then
|
||||
echo "Usage: $0 [--dry-run]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get the latest backup file for this volume name
|
||||
latest_backup=$(find "$backup_dir" -type f -name "$volume_name-*" -printf "%T@ %p\n" | sort -n | tail -1 | cut -d ' ' -f 2)
|
||||
BACKUP_PATTERN='^(rpc_.+)-([0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{2})-([0-9]+)G\.tar\.zst$'
|
||||
|
||||
# Keep only the latest backup file for this volume name
|
||||
if [[ "$file" != "$latest_backup" ]]; then
|
||||
rm "$file"
|
||||
log() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2
|
||||
}
|
||||
|
||||
bytes_trashed=0
|
||||
add_trashed_bytes() {
|
||||
local file=$1
|
||||
if [[ -f "$file" ]]; then
|
||||
local size
|
||||
size=$(stat -c%s "$file" 2>/dev/null || echo 0)
|
||||
bytes_trashed=$((bytes_trashed + size))
|
||||
fi
|
||||
}
|
||||
|
||||
trash_file() {
|
||||
local file=$1
|
||||
local reason=$2
|
||||
|
||||
if [[ ! -f "$file" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local dest="$TRASH_DIR/$(basename "$file")"
|
||||
if [[ -e "$dest" ]]; then
|
||||
log "Error: trash destination already exists, skipping: $dest"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if $DRY_RUN; then
|
||||
log "DRY-RUN would trash ($reason): $file -> $dest"
|
||||
return 0
|
||||
fi
|
||||
|
||||
mv -- "$file" "$dest"
|
||||
log "Trashed ($reason): $file -> $dest"
|
||||
}
|
||||
|
||||
is_excluded_path() {
|
||||
local path=$1
|
||||
local rel="${path#"$BACKUP_DIR"/}"
|
||||
|
||||
if [[ "$rel" == broken_* ]] || [[ "$rel" == deprecated-* ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local dir
|
||||
IFS=':' read -ra excluded_dirs <<< "$EXCLUDE_DIRS"
|
||||
for dir in "${excluded_dirs[@]}"; do
|
||||
if [[ -n "$dir" && ( "$rel" == "$dir" || "$rel" == "$dir"/* ) ]]; then
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
parse_volume_key() {
|
||||
local basename=$1
|
||||
if [[ "$basename" =~ $BACKUP_PATTERN ]]; then
|
||||
echo "${BASH_REMATCH[1]}"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
metadata_for_archive() {
|
||||
local archive=$1
|
||||
echo "${archive%.tar.zst}.txt"
|
||||
}
|
||||
|
||||
file_has_open_handles() {
|
||||
local file=$1
|
||||
|
||||
if command -v lsof >/dev/null 2>&1; then
|
||||
if lsof -- "$file" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
if command -v fuser >/dev/null 2>&1; then
|
||||
if fuser -- "$file" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
cleanup_stale_uploads() {
|
||||
local upload_file
|
||||
local age_seconds=$((UPLOADING_MAX_AGE_HOURS * 3600))
|
||||
local now
|
||||
now=$(date +%s)
|
||||
|
||||
while IFS= read -r -d '' upload_file; do
|
||||
if is_excluded_path "$upload_file"; then
|
||||
continue
|
||||
fi
|
||||
|
||||
local mtime age
|
||||
mtime=$(stat -c %Y "$upload_file")
|
||||
age=$((now - mtime))
|
||||
|
||||
if (( age < age_seconds )); then
|
||||
log "Keeping in-progress upload ($(($age / 3600))h old): $upload_file"
|
||||
continue
|
||||
fi
|
||||
|
||||
if file_has_open_handles "$upload_file"; then
|
||||
log "Keeping upload with open handles ($(($age / 3600))h old): $upload_file"
|
||||
continue
|
||||
fi
|
||||
|
||||
add_trashed_bytes "$upload_file"
|
||||
trash_file "$upload_file" "stale upload older than ${UPLOADING_MAX_AGE_HOURS}h"
|
||||
done < <(find "$BACKUP_DIR" -maxdepth 1 -type f -name 'uploading-*' -print0 2>/dev/null)
|
||||
}
|
||||
|
||||
cleanup_volume_backups() {
|
||||
declare -A volume_files=()
|
||||
local file basename volume_key
|
||||
|
||||
while IFS= read -r -d '' file; do
|
||||
if is_excluded_path "$file"; then
|
||||
continue
|
||||
fi
|
||||
|
||||
basename=$(basename "$file")
|
||||
if ! volume_key=$(parse_volume_key "$basename"); then
|
||||
log "Skipping unrecognized backup filename: $file"
|
||||
continue
|
||||
fi
|
||||
|
||||
volume_files["$volume_key"]+="$file"$'\n'
|
||||
done < <(find "$BACKUP_DIR" -maxdepth 1 -type f -name 'rpc_*-*.tar.zst' -print0 2>/dev/null)
|
||||
|
||||
local volume trashed=0 kept=0
|
||||
for volume in "${!volume_files[@]}"; do
|
||||
mapfile -t files < <(printf '%s' "${volume_files[$volume]}" | sed '/^$/d' | sort)
|
||||
|
||||
local total=${#files[@]}
|
||||
if (( total <= KEEP_COUNT )); then
|
||||
kept=$((kept + total))
|
||||
continue
|
||||
fi
|
||||
|
||||
local delete_count=$((total - KEEP_COUNT))
|
||||
local i
|
||||
for ((i = 0; i < delete_count; i++)); do
|
||||
local archive="${files[$i]}"
|
||||
local metadata
|
||||
metadata=$(metadata_for_archive "$archive")
|
||||
|
||||
add_trashed_bytes "$archive"
|
||||
trash_file "$archive" "keeping ${KEEP_COUNT}/${total} for $volume"
|
||||
trashed=$((trashed + 1))
|
||||
|
||||
if [[ -f "$metadata" ]]; then
|
||||
add_trashed_bytes "$metadata"
|
||||
trash_file "$metadata" "orphaned metadata for $volume"
|
||||
fi
|
||||
done
|
||||
|
||||
kept=$((kept + KEEP_COUNT))
|
||||
done
|
||||
|
||||
echo "$trashed $kept"
|
||||
}
|
||||
|
||||
cleanup_orphan_metadata() {
|
||||
local metadata archive basename
|
||||
local orphans=0
|
||||
|
||||
while IFS= read -r -d '' metadata; do
|
||||
if is_excluded_path "$metadata"; then
|
||||
continue
|
||||
fi
|
||||
|
||||
basename=$(basename "$metadata")
|
||||
if [[ ! "$basename" =~ ^rpc_.+\.txt$ ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
archive="${metadata%.txt}.tar.zst"
|
||||
if [[ -f "$archive" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
if [[ -f "$TRASH_DIR/$(basename "$archive")" ]]; then
|
||||
add_trashed_bytes "$metadata"
|
||||
trash_file "$metadata" "metadata for archived-in-trash backup"
|
||||
orphans=$((orphans + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
add_trashed_bytes "$metadata"
|
||||
trash_file "$metadata" "orphaned metadata without archive"
|
||||
orphans=$((orphans + 1))
|
||||
done < <(find "$BACKUP_DIR" -maxdepth 1 -type f -name 'rpc_*.txt' -print0 2>/dev/null)
|
||||
|
||||
echo "$orphans"
|
||||
}
|
||||
|
||||
main() {
|
||||
if [[ ! -d "$BACKUP_DIR" ]]; then
|
||||
log "Error: backup directory does not exist: $BACKUP_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! $DRY_RUN; then
|
||||
mkdir -p "$TRASH_DIR"
|
||||
fi
|
||||
|
||||
log "Starting backup cleanup in $BACKUP_DIR (trash_dir=$TRASH_DIR, keep_count=$KEEP_COUNT, uploading_max_age=${UPLOADING_MAX_AGE_HOURS}h, dry_run=$DRY_RUN)"
|
||||
|
||||
cleanup_stale_uploads
|
||||
|
||||
local trashed kept orphan_metadata
|
||||
read -r trashed kept < <(cleanup_volume_backups)
|
||||
read -r orphan_metadata < <(cleanup_orphan_metadata)
|
||||
|
||||
local trashed_human
|
||||
trashed_human=$(numfmt --to=iec-i --suffix=B "$bytes_trashed" 2>/dev/null || echo "${bytes_trashed}B")
|
||||
|
||||
log "Cleanup complete: trashed=$trashed kept=$kept orphan_metadata=$orphan_metadata staged≈$trashed_human"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
|
||||
Reference in New Issue
Block a user