sync-status: dRPC-homogeneous block-lag status + fix never-used reference fallbacks
Match the dRPC gateway's per-chain "how many blocks behind is ok" model instead of a
fixed 2s/5s timestamp tolerance:
- check-health.sh: compare the reference head vs local head by BLOCK NUMBER and classify
with the chain's dRPC lag thresholds (LAGGING_LAG/SYNCING_LAG, in blocks, from
chains.yaml). dRPC uses the two thresholds inconsistently across chains (sometimes
lagging<syncing, sometimes the reverse) so the smaller is the online boundary and the
larger the syncing/drop boundary. Defaults 2/6 when a chain has no thresholds.
- multicurl.sh: also skip responses with result:null (a lagging endpoint lacking the
requested block) so the fallback reference URLs are actually tried. Previously the first
endpoint's {"result":null} was accepted as success -> fallbacks never ran, and the null
reference hash made check-health report false "forked" (the online/forked flapping).
- sync-status.sh: resolve the lag thresholds (by drpc slug or chain id) and export
LAGGING_LAG/SYNCING_LAG.
- reference-rpc-endpoint.sh: add --lags and --block-time-ms lookups.
- reference-rpc-endpoint.json: regenerated with per-chain block_time_ms + lagging_lag +
syncing_lag (additive).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -189,12 +189,12 @@ if [ $? -eq 0 ]; then
|
||||
response3=$(cat "$response_file3")
|
||||
|
||||
if $is_starknet; then
|
||||
latest_block_timestamp_decimal3=$(echo "$response3" | jq -r '.result.timestamp')
|
||||
ref_num=$(echo "$response3" | jq -r '.result.block_number // empty')
|
||||
elif $is_aztec; then
|
||||
latest_block_timestamp_decimal3=$(echo "$response3" | jq -r '.result.header.globalVariables.timestamp')
|
||||
ref_num=$(echo "$response3" | jq -r '.result.header.globalVariables.blockNumber // empty')
|
||||
else
|
||||
latest_block_timestamp3=$(echo "$response3" | jq -r '.result.timestamp')
|
||||
latest_block_timestamp_decimal3=$((16#${latest_block_timestamp3#0x}))
|
||||
ref_num_hex=$(echo "$response3" | jq -r '.result.number // empty')
|
||||
ref_num=$([ -n "$ref_num_hex" ] && printf '%d' "$ref_num_hex" 2>/dev/null)
|
||||
fi
|
||||
|
||||
# echo "refer: $latest_block_timestamp_decimal3"
|
||||
@@ -205,25 +205,35 @@ if [ $? -eq 0 ]; then
|
||||
response4=$(cat "$response_file4")
|
||||
|
||||
if $is_starknet; then
|
||||
latest_block_timestamp_decimal4=$(echo "$response4" | jq -r '.result.timestamp')
|
||||
local_num=$(echo "$response4" | jq -r '.result.block_number // empty')
|
||||
elif $is_aztec; then
|
||||
latest_block_timestamp_decimal4=$(echo "$response4" | jq -r '.result.header.globalVariables.timestamp')
|
||||
local_num=$(echo "$response4" | jq -r '.result.header.globalVariables.blockNumber // empty')
|
||||
else
|
||||
latest_block_timestamp4=$(echo "$response4" | jq -r '.result.timestamp')
|
||||
latest_block_timestamp_decimal4=$((16#${latest_block_timestamp4#0x}))
|
||||
local_num_hex=$(echo "$response4" | jq -r '.result.number // empty')
|
||||
local_num=$([ -n "$local_num_hex" ] && printf '%d' "$local_num_hex" 2>/dev/null)
|
||||
fi
|
||||
|
||||
#echo "local: $latest_block_timestamp_decimal4"
|
||||
rm "$response_file4"
|
||||
|
||||
time_difference3=$(echo "scale=6; (${latest_block_timestamp_decimal3} - ${request_time3}) - (${latest_block_timestamp_decimal4} - ${request_time4})" | bc)
|
||||
|
||||
#echo "diff after network latency: $time_difference3 s"
|
||||
|
||||
if (( $(echo "$time_difference3 < 2" | bc -l) )); then
|
||||
# Lag in BLOCKS between the reference head and the local head
|
||||
# (positive => local behind). Compare against dRPC's own per-chain
|
||||
# thresholds (LAGGING_LAG / SYNCING_LAG from chains.yaml via
|
||||
# sync-status.sh) so our status matches the dRPC gateway's view.
|
||||
# dRPC uses the two thresholds inconsistently (sometimes
|
||||
# lagging<syncing, sometimes the reverse), so treat the smaller as
|
||||
# the online boundary and the larger as the syncing/drop boundary.
|
||||
if [ -z "$ref_num" ] || [ -z "$local_num" ]; then
|
||||
echo "error"
|
||||
exit 1
|
||||
fi
|
||||
lag=$(( ref_num - local_num ))
|
||||
lo=${LAGGING_LAG:-2}; hi=${SYNCING_LAG:-6}
|
||||
if [ "$lo" -gt "$hi" ]; then tmp=$lo; lo=$hi; hi=$tmp; fi
|
||||
if [ "$lag" -le "$lo" ]; then
|
||||
echo "online"
|
||||
exit 0
|
||||
elif (( $(echo "$time_difference3 < 5" | bc -l) )); then
|
||||
elif [ "$lag" -le "$hi" ]; then
|
||||
echo "lagging"
|
||||
exit 0
|
||||
else
|
||||
|
||||
@@ -43,7 +43,11 @@ for url in "${urls[@]}"; do
|
||||
output=$(eval "curl -s ${options[@]@Q} '$url' --fail")
|
||||
if [[ $? -eq 0 ]]; then
|
||||
|
||||
if cat "$temp_file" | jq -e 'has("error")' > /dev/null 2>&1; then
|
||||
# Skip and try the next reference URL when the response is a JSON-RPC error OR has a
|
||||
# null result (a lagging endpoint that doesn't have the requested block/data yet).
|
||||
# Without the result==null check the first endpoint's {"result":null} was accepted as
|
||||
# success and the remaining fallback URLs were never tried.
|
||||
if cat "$temp_file" | jq -e 'has("error") or (.result == null)' > /dev/null 2>&1; then
|
||||
continue # Try the next URL
|
||||
fi
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -44,6 +44,51 @@ if [ "$1" = "--protocol" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Look up the expected block time (milliseconds) for a registry key (drpc slug) or chain id.
|
||||
# Used by sync-status.sh / check-health.sh to scale the lag thresholds per chain.
|
||||
if [ "$1" = "--block-time-ms" ]; then
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "Usage: $0 --block-time-ms <slug|chainid>"
|
||||
exit 1
|
||||
fi
|
||||
key="$2"
|
||||
# Try by slug first
|
||||
bt=$(jq -r --arg k "$key" '.[$k].block_time_ms // empty' "$json_file" 2>/dev/null)
|
||||
if [ -z "$bt" ]; then
|
||||
# Fall back to lookup by chain id (decimal; convert hex)
|
||||
idk="$key"
|
||||
[[ "$idk" == 0x* ]] && idk=$(printf "%d" "$idk" 2>/dev/null)
|
||||
if [[ "$idk" =~ ^[0-9]+$ ]]; then
|
||||
bt=$(jq -r --arg id "$idk" 'first(.[] | select(.id == ($id | tonumber)) | .block_time_ms) // empty' "$json_file" 2>/dev/null)
|
||||
fi
|
||||
fi
|
||||
[ -z "$bt" ] && exit 1
|
||||
echo "$bt"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Look up the dRPC lag thresholds (in BLOCKS) for a registry key (slug) or chain id.
|
||||
# Prints "<lagging_lag> <syncing_lag>". Used by sync-status.sh -> check-health.sh so our
|
||||
# online/lagging/syncing status matches the dRPC gateway's per-chain lag model.
|
||||
if [ "$1" = "--lags" ]; then
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "Usage: $0 --lags <slug|chainid>"
|
||||
exit 1
|
||||
fi
|
||||
key="$2"
|
||||
lags=$(jq -r --arg k "$key" 'if (.[$k].lagging_lag != null and .[$k].syncing_lag != null) then "\(.[$k].lagging_lag) \(.[$k].syncing_lag)" else empty end' "$json_file" 2>/dev/null)
|
||||
if [ -z "$lags" ]; then
|
||||
idk="$key"
|
||||
[[ "$idk" == 0x* ]] && idk=$(printf "%d" "$idk" 2>/dev/null)
|
||||
if [[ "$idk" =~ ^[0-9]+$ ]]; then
|
||||
lags=$(jq -r --arg id "$idk" 'first(.[] | select(.id == ($id | tonumber)) | select(.lagging_lag != null and .syncing_lag != null) | "\(.lagging_lag) \(.syncing_lag)") // empty' "$json_file" 2>/dev/null)
|
||||
fi
|
||||
fi
|
||||
[ -z "$lags" ] && exit 1
|
||||
echo "$lags"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Look up by rollup_version (for Aztec: version from result.header.globalVariables.version)
|
||||
if [ "$1" = "--rollup-version" ]; then
|
||||
if [ $# -lt 2 ]; then
|
||||
|
||||
@@ -158,6 +158,24 @@ for path in $pathlist; do
|
||||
fi
|
||||
fi
|
||||
|
||||
# Per-chain dRPC lag thresholds (in blocks) so check-health.sh reports
|
||||
# online/lagging/syncing the same way the dRPC gateway does (homogeneous status).
|
||||
# Resolve by drpc slug first, then by chain id; empty => check-health uses its
|
||||
# built-in defaults.
|
||||
lags=""
|
||||
if [ -n "$chain_slug" ]; then
|
||||
lags=$($BASEPATH/reference-rpc-endpoint.sh --lags "$chain_slug" 2>/dev/null) || lags=""
|
||||
fi
|
||||
if [ -z "$lags" ] && [ -n "$chain_id_decimal" ]; then
|
||||
lags=$($BASEPATH/reference-rpc-endpoint.sh --lags "$chain_id_decimal" 2>/dev/null) || lags=""
|
||||
fi
|
||||
if [ -n "$lags" ]; then
|
||||
export LAGGING_LAG="${lags%% *}"
|
||||
export SYNCING_LAG="${lags##* }"
|
||||
else
|
||||
unset LAGGING_LAG SYNCING_LAG
|
||||
fi
|
||||
|
||||
# Call the health check script with RPC_URL, ref, and chain-type flag
|
||||
if $is_aztec; then
|
||||
$BASEPATH/check-health.sh "$RPC_URL" --aztec $ref
|
||||
|
||||
Reference in New Issue
Block a user