Files
ethereum-rpc-docker/clone-node.sh

568 lines
20 KiB
Bash
Executable File

#!/bin/bash
# Fixed version that handles missing netstat
BASEPATH="$(dirname "$0")"
if [[ -n $2 ]]; then
DEST_HOST="$2.stakesquid.eu"
echo "Setting up optimized transfer to $DEST_HOST"
else
echo "Error: No destination provided"
exit 1
fi
# Configuration
BASE_PORT=9000
PORT_RANGE_START=9000
PORT_RANGE_END=9100
# Global array to track used ports
declare -a USED_PORTS=()
# Setup SSH multiplexing
setup_ssh_multiplex() {
echo "Setting up SSH control connection..."
ssh -nNf -o ControlMaster=yes \
-o StrictHostKeyChecking=no \
-o ControlPath=/tmp/ssh-mux-%h-%p-%r \
-o ControlPersist=600 \
-o Compression=no \
"$DEST_HOST" 2>/dev/null
if [[ $? -eq 0 ]]; then
echo "SSH control connection established"
export SSH_CMD="ssh -o StrictHostKeyChecking=no -o ControlPath=/tmp/ssh-mux-%h-%p-%r"
else
echo "Failed to setup SSH multiplexing, using direct SSH"
export SSH_CMD="ssh -o StrictHostKeyChecking=no"
fi
}
# Check if port is listening using various methods
check_port_listening() {
local port=$1
# Try different methods to check if port is listening
$SSH_CMD "$DEST_HOST" "
if command -v ss >/dev/null 2>&1; then
ss -tln | grep -q ':$port '
elif command -v netstat >/dev/null 2>&1; then
netstat -tln | grep -q ':$port '
elif command -v lsof >/dev/null 2>&1; then
lsof -i :$port >/dev/null 2>&1
else
# If no tools available, just try to connect to the port
timeout 1 bash -c 'cat < /dev/null > /dev/tcp/localhost/$port' 2>/dev/null
fi
"
return $?
}
# Find an available port in the range
find_available_port() {
local port=$PORT_RANGE_START
while [[ $port -le $PORT_RANGE_END ]]; do
# Check if port is already used by this script
local already_used=false
for used_port in "${USED_PORTS[@]}"; do
if [[ $port -eq $used_port ]]; then
already_used=true
break
fi
done
# Check if port is listening on remote host
if [[ "$already_used" == "false" ]] && ! check_port_listening $port; then
# Add to used ports array
USED_PORTS+=($port)
echo $port
return 0
fi
port=$((port + 1))
done
echo "Error: No available ports in range $PORT_RANGE_START-$PORT_RANGE_END" >&2
return 1
}
# Remove port from used ports array
release_port() {
local port=$1
local new_array=()
for used_port in "${USED_PORTS[@]}"; do
if [[ $used_port -ne $port ]]; then
new_array+=($used_port)
fi
done
USED_PORTS=("${new_array[@]}")
}
# Check if SLOWDISK mode is enabled on target
check_slowdisk_enabled() {
$SSH_CMD "$DEST_HOST" "grep -q '^SLOWDISK=true' /root/rpc/.env 2>/dev/null"
return $?
}
# Detect static files from source volume
detect_static_files() {
local key=$1
local source_folder="/var/lib/docker/volumes/rpc_${key}/_data"
local static_file_list="$BASEPATH/static-file-path-list.txt"
local static_size_kb=0
local static_paths=()
if [[ ! -f "$static_file_list" ]]; then
return 1
fi
# Check each static file path in the source volume
while IFS= read -r path; do
local full_path="$source_folder/$path"
if [[ -e "$full_path" ]]; then
# Get the size in KB
local size=$(du -sL "$full_path" 2>/dev/null | awk '{print $1}')
if [[ -n "$size" ]] && [[ "$size" =~ ^[0-9]+$ ]]; then
static_size_kb=$((static_size_kb + size))
static_paths+=("$path")
fi
fi
done < "$static_file_list"
if [[ ${#static_paths[@]} -eq 0 ]]; then
return 1
fi
echo "$static_size_kb"
printf '%s\n' "${static_paths[@]}"
}
# Setup slowdisk directory structure with symlinks
setup_slowdisk_structure() {
local key=$1
local static_size_kb=$2
shift 2
local static_paths=("$@")
local volume_path="/var/lib/docker/volumes/rpc_$key/_data"
local slowdisk_base="/slowdisk"
if [[ ${#static_paths[@]} -eq 0 ]]; then
echo "No static paths provided"
return 1
fi
echo "Setting up SLOWDISK structure for $key"
echo " Static size: $((static_size_kb / 1024))MB"
echo " Static paths: ${#static_paths[@]}"
# Check available space on /slowdisk
local slowdisk_available=$($SSH_CMD "$DEST_HOST" "df -BK /slowdisk 2>/dev/null | tail -1 | awk '{print \$4}' | sed 's/K//'")
if [[ -z "$slowdisk_available" ]] || [[ "$slowdisk_available" -lt "$static_size_kb" ]]; then
echo "Error: Not enough space on /slowdisk (available: ${slowdisk_available}KB, needed: ${static_size_kb}KB)"
return 1
fi
# Check available space on /var/lib/docker/volumes (for dynamic data)
# We'll estimate dynamic size as total - static, but we don't know total yet
# So we'll just check if there's reasonable space
local docker_available=$($SSH_CMD "$DEST_HOST" "df -BK /var/lib/docker/volumes 2>/dev/null | tail -1 | awk '{print \$4}' | sed 's/K//'")
if [[ -z "$docker_available" ]] || [[ "$docker_available" -lt 1048576 ]]; then # At least 1GB
echo "Warning: Limited space on /var/lib/docker/volumes (available: ${docker_available}KB)"
fi
# Create volume directory
$SSH_CMD "$DEST_HOST" "mkdir -p '$volume_path'"
# Create slowdisk directories and symlinks
for path in "${static_paths[@]}"; do
# Convert path to slowdisk path: /slowdisk/rpc_key__data_path
# Replace / with __
local slowdisk_path="${slowdisk_base}/rpc_${key}__data_${path//\//__}"
local target_path="${volume_path}/${path}"
local target_dir=$(dirname "$target_path")
# Create parent directories in volume
$SSH_CMD "$DEST_HOST" "mkdir -p '$target_dir'"
# Create slowdisk directory
$SSH_CMD "$DEST_HOST" "mkdir -p '$slowdisk_path'"
# Remove target if it exists (file, symlink, or empty directory)
# Then create symlink
$SSH_CMD "$DEST_HOST" "
if [[ -e '$target_path' ]]; then
rm -rf '$target_path'
fi
ln -s '$slowdisk_path' '$target_path'
"
echo " Created symlink: $target_path -> $slowdisk_path"
done
echo "SLOWDISK structure setup complete"
return 0
}
# Cleanup all used ports on exit
cleanup_all_ports() {
echo "Cleaning up all used ports..."
for port in "${USED_PORTS[@]}"; do
echo "Releasing port $port"
$SSH_CMD "$DEST_HOST" "
# Kill any processes on this port
lsof -i :$port 2>/dev/null | grep LISTEN | awk '{print \$2}' | xargs -r kill 2>/dev/null
" 2>/dev/null
done
USED_PORTS=()
}
# Transfer using screen method with better error handling
transfer_volume() {
local key=$1
local source_folder="/var/lib/docker/volumes/rpc_${key}/_data"
if [[ ! -d "$source_folder" ]]; then
echo "Warning: $source_folder does not exist, skipping"
return 1
fi
local folder_size=$(du -sb "$source_folder" 2>/dev/null | awk '{print $1}')
local tar_extract_opts="-xf - -C /"
local use_slowdisk=false
# Check for SLOWDISK mode and detect static files
if check_slowdisk_enabled; then
echo "SLOWDISK mode enabled, detecting static files in source volume..."
local static_output=$(detect_static_files "$key")
if [[ -n "$static_output" ]]; then
local static_size_kb=$(echo "$static_output" | head -n 1)
local static_paths=($(echo "$static_output" | tail -n +2))
if [[ ${#static_paths[@]} -gt 0 ]] && [[ -n "$static_size_kb" ]]; then
echo "Found ${#static_paths[@]} static paths (total: $((static_size_kb / 1024))MB)"
echo "Setting up SLOWDISK structure on target..."
# Setup slowdisk structure on remote
if setup_slowdisk_structure "$key" "$static_size_kb" "${static_paths[@]}"; then
use_slowdisk=true
# Use --skip-old-files to avoid overwriting existing symlinks/directories
tar_extract_opts="-xf - -C / --skip-old-files"
echo "SLOWDISK structure ready, will extract respecting symlinks"
else
echo "Warning: Failed to setup SLOWDISK structure, falling back to normal extraction"
fi
else
echo "No static files detected, using normal extraction"
fi
else
echo "Could not detect static files, using normal extraction"
fi
fi
# Find an available port
local port=$(find_available_port)
if [[ $? -ne 0 ]]; then
echo "Error: Could not find available port for $key"
return 1
fi
echo "Transferring volume $key (size: $((folder_size / 1048576))MB) on port $port"
# Clean up any existing transfer for this specific key only
$SSH_CMD "$DEST_HOST" "
# Kill any existing screen session for this specific transfer
screen -S transfer_${key} -X quit 2>/dev/null
# Clean up old files for this specific transfer
rm -f /tmp/transfer_${key}.* 2>/dev/null
"
# Check if screen is available
if $SSH_CMD "$DEST_HOST" "which screen" >/dev/null 2>&1; then
echo "Starting screen listener on port $port..."
# Start listener in screen session with proper escaping
# Use tar_extract_opts which may include --skip-old-files for SLOWDISK mode
$SSH_CMD "$DEST_HOST" "
screen -dmS transfer_${key} bash -c '
nc -l -p $port | zstd -d | tar $tar_extract_opts 2>/tmp/transfer_${key}.err
echo \$? > /tmp/transfer_${key}.done
'
"
# Give it time to start
sleep 2
# Check if screen session is running
if ! $SSH_CMD "$DEST_HOST" "screen -list | grep -q transfer_${key}"; then
echo "Error: Screen session failed to start"
return 1
fi
else
echo "Screen not available, using nohup method..."
# Use nohup with proper backgrounding
# Use tar_extract_opts which may include --skip-old-files for SLOWDISK mode
$SSH_CMD "$DEST_HOST" "
nohup bash -c '
nc -l -p $port | zstd -d | tar $tar_extract_opts 2>/tmp/transfer_${key}.err
echo \$? > /tmp/transfer_${key}.done
' > /tmp/transfer_${key}.log 2>&1 < /dev/null &
echo \$! > /tmp/transfer_${key}.pid
"
sleep 2
# Verify process is running
if ! $SSH_CMD "$DEST_HOST" "[[ -f /tmp/transfer_${key}.pid ]] && kill -0 \$(cat /tmp/transfer_${key}.pid) 2>/dev/null"; then
echo "Error: Listener process failed to start"
return 1
fi
fi
# Optional: Check if port is listening (may fail if tools aren't available)
echo "Checking if port $port is ready..."
if check_port_listening $port; then
echo "Port $port is listening, starting transfer..."
else
echo "Cannot verify port status, proceeding with transfer anyway..."
fi
# Send the data
echo "Sending data to ${DEST_HOST}:${port}..."
tar -cf - --dereference "$source_folder" 2>/dev/null | \
pv -pterb -s "$folder_size" -N "$key" | \
zstd -3 -T0 | \
nc -w 60 "$DEST_HOST" "$port"
local transfer_status=$?
if [[ $transfer_status -eq 0 ]]; then
echo "Transfer complete, waiting for extraction to finish..."
# Wait for done flag with timeout
local attempts=0
local max_attempts=60 # Wait up to 2 minutes
while [[ $attempts -lt $max_attempts ]]; do
if $SSH_CMD "$DEST_HOST" "[[ -f /tmp/transfer_${key}.done ]]" 2>/dev/null; then
local remote_status=$($SSH_CMD "$DEST_HOST" "cat /tmp/transfer_${key}.done 2>/dev/null || echo 1")
if [[ "$remote_status" == "0" ]]; then
echo "✓ Volume $key transferred and extracted successfully"
# Cleanup - only for this specific transfer
$SSH_CMD "$DEST_HOST" "
rm -f /tmp/transfer_${key}.* 2>/dev/null
screen -S transfer_${key} -X quit 2>/dev/null
[[ -f /tmp/transfer_${key}.pid ]] && kill \$(cat /tmp/transfer_${key}.pid) 2>/dev/null
"
# Release the port for reuse
release_port $port
return 0
else
echo "✗ Extraction failed with status $remote_status"
echo "Error log:"
$SSH_CMD "$DEST_HOST" "cat /tmp/transfer_${key}.err 2>/dev/null || echo 'No error log'"
# Release the port even on failure
release_port $port
return 1
fi
fi
# Show progress
if [[ $((attempts % 5)) -eq 0 ]]; then
echo "Still waiting for extraction to complete... ($attempts/$max_attempts)"
fi
sleep 2
attempts=$((attempts + 1))
done
echo "⚠ Timeout waiting for extraction to complete"
# Release the port on timeout
release_port $port
return 1
else
echo "✗ Transfer failed with status $transfer_status"
# Release the port on transfer failure
release_port $port
return 1
fi
}
# Fallback: Direct SSH pipe
transfer_volume_ssh() {
local key=$1
local source_folder="/var/lib/docker/volumes/rpc_${key}/_data"
if [[ ! -d "$source_folder" ]]; then
echo "Warning: $source_folder does not exist, skipping"
return 1
fi
local folder_size=$(du -sb "$source_folder" 2>/dev/null | awk '{print $1}')
local tar_extract_opts="-xf - -C /"
# Check for SLOWDISK mode and detect static files (same logic as transfer_volume)
if check_slowdisk_enabled; then
echo "SLOWDISK mode enabled, detecting static files in source volume..."
local static_output=$(detect_static_files "$key")
if [[ -n "$static_output" ]]; then
local static_size_kb=$(echo "$static_output" | head -n 1)
local static_paths=($(echo "$static_output" | tail -n +2))
if [[ ${#static_paths[@]} -gt 0 ]] && [[ -n "$static_size_kb" ]]; then
echo "Found ${#static_paths[@]} static paths (total: $((static_size_kb / 1024))MB)"
echo "Setting up SLOWDISK structure on target..."
# Setup slowdisk structure on remote
if setup_slowdisk_structure "$key" "$static_size_kb" "${static_paths[@]}"; then
tar_extract_opts="-xf - -C / --skip-old-files"
echo "SLOWDISK structure ready, will extract respecting symlinks"
else
echo "Warning: Failed to setup SLOWDISK structure, falling back to normal extraction"
fi
else
echo "No static files detected, using normal extraction"
fi
else
echo "Could not detect static files, using normal extraction"
fi
fi
echo "Using direct SSH transfer for $key (size: $((folder_size / 1048576))MB)"
tar -cf - --dereference "$source_folder" 2>/dev/null | \
pv -pterb -s "$folder_size" -N "$key" | \
zstd -3 -T0 | \
$SSH_CMD -c chacha20-poly1305@openssh.com "$DEST_HOST" \
"zstd -d | tar $tar_extract_opts"
if [[ $? -eq 0 ]]; then
echo "✓ Volume $key transferred successfully"
return 0
else
echo "✗ Transfer failed"
return 1
fi
}
# Main execution
main() {
# Set up cleanup trap
trap cleanup_all_ports EXIT INT TERM
setup_ssh_multiplex
# the following sysctls are critical for high-latency networks
# they are not persistent and should not influence low latency connections.
# but what do I know its what the bot told me...
# the issue was that on high bandwidth connections with high latency the buffers
# where so small that a roundtrip to confirm the packet was necessary every few ms.
# so that the theoretical bandwidth was limited to 200 MBit/s
# also it seems to be important to match the remote buffers to the local buffers.
# feel free to remove the whole section. maybe it does nothing really.
ssh "$DEST_HOST" "
sudo sysctl -w net.core.rmem_max=67108864
sudo sysctl -w net.core.wmem_max=67108864
sudo sysctl -w net.ipv4.tcp_rmem='4096 87380 67108864'
sudo sysctl -w net.ipv4.tcp_wmem='4096 87380 67108864'
sudo sysctl -w net.ipv4.tcp_congestion_control=bbr
sudo sysctl -w net.core.default_qdisc=fq
"
sudo sysctl -w net.ipv4.tcp_slow_start_after_idle=0
sudo sysctl -w net.ipv4.tcp_congestion_control=bbr
sudo sysctl -w net.core.default_qdisc=fq
sudo sysctl -w net.ipv4.tcp_window_scaling=1
sudo sysctl -w net.ipv4.tcp_syncookies=1
# CRITICAL CHANGES - Increase buffers for 154ms RTT
# Need at least 20MB for 1Gbps at 154ms, using 64MB for headroom
sudo sysctl -w net.core.rmem_max=67108864 # Was 8MB, now 64MB
sudo sysctl -w net.core.wmem_max=67108864 # Was 8MB, now 64MB
sudo sysctl -w net.ipv4.tcp_rmem='4096 87380 67108864' # Was 8MB max, now 64MB
sudo sysctl -w net.ipv4.tcp_wmem='4096 87380 67108864' # Was 8MB max, now 64MB
# Optional but helpful for high-latency
sudo sysctl -w net.ipv4.tcp_mtu_probing=1
sudo sysctl -w net.ipv4.tcp_no_metrics_save=1
echo "Reading volume configuration from $1.yml..."
keys=$(cat /root/rpc/$1.yml | yaml2json - | jq '.volumes' | jq -r 'keys[]')
if [[ -z "$keys" ]]; then
echo "Error: No volumes found in configuration"
exit 1
fi
volume_count=$(echo "$keys" | wc -l)
echo "Found $volume_count volumes to transfer"
echo "----------------------------------------"
success_count=0
failed_volumes=""
for key in $keys; do
# Try nc method first
transfer_volume "$key"
if [[ $? -ne 0 ]]; then
echo "NC transfer failed, trying direct SSH..."
transfer_volume_ssh "$key"
if [[ $? -eq 0 ]]; then
success_count=$((success_count + 1))
else
failed_volumes="$failed_volumes $key"
fi
else
success_count=$((success_count + 1))
fi
echo "----------------------------------------"
done
echo ""
echo "Transfer Summary:"
echo " Successful: $success_count/$volume_count"
[[ -n "$failed_volumes" ]] && echo " Failed:$failed_volumes"
# Restore Network buffer and congestion control settings.
# These are better for your 0.2-1.4ms environment
ssh "$DEST_HOST" "
# These are better for your 0.2-1.4ms environment
sudo sysctl -w net.core.rmem_max=2097152 # 2MB
sudo sysctl -w net.core.wmem_max=2097152
sudo sysctl -w net.ipv4.tcp_rmem='4096 87380 2097152'
sudo sysctl -w net.ipv4.tcp_wmem='4096 87380 2097152'
sudo sysctl -w net.ipv4.tcp_congestion_control=cubic
sudo sysctl -w net.core.default_qdisc=fq_codel
sudo sysctl -w net.ipv4.tcp_slow_start_after_idle=0
sudo sysctl -w net.ipv4.tcp_tw_reuse=1
"
sudo sysctl -w net.core.rmem_max=2097152 # 2MB
sudo sysctl -w net.core.wmem_max=2097152
sudo sysctl -w net.ipv4.tcp_rmem='4096 87380 2097152'
sudo sysctl -w net.ipv4.tcp_wmem='4096 87380 2097152'
sudo sysctl -w net.ipv4.tcp_congestion_control=cubic
sudo sysctl -w net.core.default_qdisc=fq_codel
sudo sysctl -w net.ipv4.tcp_slow_start_after_idle=0
sudo sysctl -w net.ipv4.tcp_tw_reuse=1
$SSH_CMD -O exit "$DEST_HOST" 2>/dev/null
# Exit with appropriate status (cleanup will be handled by trap)
[[ $success_count -eq $volume_count ]] && exit 0 || exit 1
}
main "$@"