#!/bin/bash set -e # Exit immediately if a command exits with a non-zero status. set -u # Treat unset variables as an error when substituting. DATA_DIR="${DATA_DIR:-/data}" STATIC_FILES_DIR="${STATIC_FILES_DIR:-$DATA_DIR/static_files}" DELETE_DIR="${DELETE_DIR:-$DATA_DIR/static_files/delete_me}" echo "Starting Reth pruning process for static files in $STATIC_FILES_DIR..." mkdir -p "$DELETE_DIR" # Step 1: Find all unique base filenames (static_file_{group}_{start}_{end}) echo "Finding unique static file base names..." # Use find to get all relevant files, strip extensions, sort uniquely # Ensure the base name includes the full path for mv later unique_base_files=$(find "$STATIC_FILES_DIR" -maxdepth 1 -type f -name 'static_file_*_*_*' | \ sed -E 's/\.(conf|off)$//' | \ sort -u) if [ -z "$unique_base_files" ]; then echo "No static files found matching the pattern 'static_file_*_*_*'." exit 0 fi # Convert to array readarray -t unique_base_files_array <<< "$unique_base_files" echo "Found ${#unique_base_files_array[@]} unique base file ranges across all groups." # Step 2: Group files by group_name (headers, receipts, transactions) declare -A groups echo "Grouping files by type (headers, receipts, transactions)..." for base in "${unique_base_files_array[@]}"; do filename=$(basename "$base") # Get just the filename part # Extract group name assuming format static_file_{group_name}_{startblock}_{endblock} group_name=$(echo "$filename" | cut -d_ -f3) # Store the full path base name, grouped by the extracted group name if [[ "$group_name" == "headers" || "$group_name" == "receipts" || "$group_name" == "transactions" ]]; then groups["$group_name"]+="$base " # Append base path with a space separator else echo "Warning: Skipping file with unexpected group name: $base" fi done # Step 3: Process each group according to retention rules moved_count=0 # Define the expected groups declare -a group_names=("headers" "receipts" "transactions") echo "Processing file groups..." for group_name in "${group_names[@]}"; do # Get the space-separated list of base paths for the current group, default to empty string if group doesn't exist group_bases_str="${groups[$group_name]:-}" if [ -z "$group_bases_str" ]; then echo "No files found for group '$group_name'." echo "--- Finished processing group '$group_name' ---" continue fi # Sort base names within the group numerically by start block # Use process substitution, awk for extraction/sorting, and readarray readarray -t sorted_bases < <( \ echo "$group_bases_str" | tr ' ' '\n' | \ awk -F_ '{ # Extract filename from full path if necessary split($0, path_parts, "/"); filename = path_parts[length(path_parts)]; # Split filename by underscore and get the start block (4th field) split(filename, name_parts, "_"); start_block = name_parts[4]; # Print start block (as number) and the original full base path print start_block+0, $0 }' | \ sort -n | \ cut -d' ' -f2- \ ) num_files=${#sorted_bases[@]} echo "Processing group '$group_name' with $num_files ranges." # Use an associative array to track which base paths to keep (for efficient lookup) declare -A files_to_keep # Use a standard array to store base paths to move files_to_move=() # --- Apply Retention Rules --- # Rule 1: Always keep the _0_499999 range if it exists first_range_kept=false for base in "${sorted_bases[@]}"; do filename=$(basename "$base") if [[ "$filename" == *"_0_499999" ]]; then # Check if the key for this base path is already set in files_to_keep # Use -v for safe check with set -u if ! [[ -v files_to_keep[$base] ]]; then echo "Marking first range '$filename' to keep for group '$group_name'." files_to_keep["$base"]=1 # Mark this base path for keeping first_range_kept=true fi # Don't break here; let it potentially be kept by Rule 2 as well if it's one of the last two fi done # Add a warning if the expected first range wasn't found (and there were files) if ! $first_range_kept && [[ $num_files -gt 0 ]]; then echo "Warning: Did not find the expected first range (_0_499999) for group '$group_name'." fi # Rule 2: Keep the last two ranges (sorted by start block) keep_last_count=2 # Determine how many to actually keep (can't keep 2 if only 0 or 1 exist) num_to_keep_last=$((num_files < keep_last_count ? num_files : keep_last_count)) if [[ $num_to_keep_last -gt 0 ]]; then echo "Marking last $num_to_keep_last range(s) to keep for group '$group_name':" # Calculate the starting index for the last 'num_to_keep_last' elements start_index=$((num_files - num_to_keep_last)) # Loop through the indices of the ranges to keep for (( i=start_index; i