ethereum-rpc-docker/benchmark-proxy/main.go

// hi

package main

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"log"
	"net/http"
	"os"
	"sort"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	"github.com/gorilla/websocket"
)

// Simple structure to extract just the method from JSON-RPC requests
type JSONRPCRequest struct {
	Method string `json:"method"`
}

// BatchInfo contains information about a batch request
type BatchInfo struct {
	IsBatch      bool
	Methods      []string
	RequestCount int
	HasStateful  bool
}

// parseBatchInfo analyzes the request body to extract method information
func parseBatchInfo(body []byte) (*BatchInfo, error) {
	// Check for empty body
	if len(body) == 0 {
		return nil, fmt.Errorf("empty request body")
	}

	// Try parsing as array first (batch request)
	var batchReqs []JSONRPCRequest
	if err := json.Unmarshal(body, &batchReqs); err == nil {
		// It's a batch request
		info := &BatchInfo{
			IsBatch:      true,
			RequestCount: len(batchReqs),
			Methods:      make([]string, 0, len(batchReqs)),
		}

		// Extract methods and check for stateful ones
		methodSet := make(map[string]bool) // Track unique methods
		for _, req := range batchReqs {
			if req.Method != "" {
				info.Methods = append(info.Methods, req.Method)
				methodSet[req.Method] = true
				if isStatefulMethod(req.Method) {
					info.HasStateful = true
				}
			}
		}

		return info, nil
	}

	// Try parsing as single request
	var singleReq JSONRPCRequest
	if err := json.Unmarshal(body, &singleReq); err == nil {
		return &BatchInfo{
			IsBatch:      false,
			Methods:      []string{singleReq.Method},
			RequestCount: 1,
			HasStateful:  isStatefulMethod(singleReq.Method),
		}, nil
	}

	// Neither batch nor single request
	return nil, fmt.Errorf("invalid JSON-RPC request format")
}

// calculateBatchDelay determines the appropriate delay for a batch request
func calculateBatchDelay(methods []string, probe *SecondaryProbe, stats *StatsCollector) time.Duration {
	var maxDelay time.Duration

	for _, method := range methods {
		var delay time.Duration
		if probe != nil {
			delay = probe.getDelayForMethod(method)
		} else {
			delay = stats.GetPrimaryP75ForMethod(method)
		}

		if delay > maxDelay {
			maxDelay = delay
		}
	}

	// If no methods or all unknown, use a default
	if maxDelay == 0 {
		if probe != nil {
			return probe.minResponseTime + probe.minDelayBuffer
		}
		return 15 * time.Millisecond // Default fallback
	}

	return maxDelay
}

// formatMethodList creates a readable string from method list for logging
func formatMethodList(methods []string) string {
	if len(methods) == 0 {
		return "[]"
	}
	if len(methods) <= 3 {
		return fmt.Sprintf("%v", methods)
	}
	// Show first 3 methods + count of remaining
	return fmt.Sprintf("[%s, %s, %s, ... +%d more]",
		methods[0], methods[1], methods[2], len(methods)-3)
}

type Backend struct {
	URL  string
	Name string
	Role string
}

type ResponseStats struct {
	Backend    string
	StatusCode int
	Duration   time.Duration
	Error      error
	Method     string // Added method field
}

// WebSocketStats tracks information about websocket connections
type WebSocketStats struct {
	Backend          string
	Error            error
	ConnectTime      time.Duration
	IsActive         bool
	MessagesSent     int
	MessagesReceived int
}

// CUDataPoint represents a historical CU data point with timestamp
type CUDataPoint struct {
	Timestamp time.Time // End time of the interval
	CU        int
}

// StatsCollector maintains statistics for periodic summaries
type StatsCollector struct {
	mu                                 sync.Mutex
	requestStats                       []ResponseStats
	methodStats                        map[string][]time.Duration            // Track durations by method
	backendMethodStats                 map[string]map[string][]time.Duration // Track durations by backend and method
	backendWins                        map[string]int                        // Track how many times each backend responded first
	methodBackendWins                  map[string]map[string]int             // Track wins per method per backend
	firstResponseDurations             []time.Duration                       // Track durations of first successful responses (from winning backend's perspective)
	actualFirstResponseDurations       []time.Duration                       // Track actual user-experienced durations
	methodFirstResponseDurations       map[string][]time.Duration            // Track first response durations by method (winning backend's perspective)
	methodActualFirstResponseDurations map[string][]time.Duration            // Track actual user-experienced durations by method
	totalRequests                      int
	errorCount                         int
	wsConnections                      []WebSocketStats // Track websocket connections
	totalWsConnections                 int
	appStartTime                       time.Time // Application start time (never reset)
	intervalStartTime                  time.Time // Current interval start time (reset each interval)
	summaryInterval                    time.Duration
	methodCUPrices                     map[string]int      // Map of method names to CU prices
	totalCU                            int                 // Total CU earned
	methodCU                           map[string]int      // Track CU earned per method
	historicalCU                       []CUDataPoint       // Historical CU data for different time windows
	hasSecondaryBackends               bool                // Track if secondary backends are configured
	skippedSecondaryRequests           int                 // Track how many secondary requests were skipped
	secondaryProbe                     *SecondaryProbe     // Reference to secondary probe
	blockHeightTracker                 *BlockHeightTracker // Reference to block height tracker
}

// SecondaryProbe maintains latency information for secondary backends through active probing
type SecondaryProbe struct {
	mu                 sync.RWMutex
	backends           []Backend
	client             *http.Client
	minResponseTime    time.Duration            // Overall minimum response time
	methodTimings      map[string]time.Duration // Per-method minimum response times
	backendTimings     map[string]time.Duration // Per-backend minimum response times
	backendAvailable   map[string]bool          // Track if backends are available (responding without errors)
	backendErrorCount  map[string]int           // Track consecutive error count per backend
	backendLastSuccess map[string]time.Time     // Track last successful probe time per backend
	lastProbeTime      time.Time
	probeInterval      time.Duration
	minDelayBuffer     time.Duration // Buffer to add to minimum times
	probeMethods       []string
	enableDetailedLogs bool
	failureCount       int       // Track consecutive probe failures
	lastSuccessTime    time.Time // Last time probes succeeded
	maxErrorThreshold  int       // Maximum consecutive errors before marking backend unavailable
	recoveryThreshold  int       // Number of consecutive successes needed to mark backend available again
}

// BlockHeightTracker monitors block heights from different backends using WebSocket subscriptions
type BlockHeightTracker struct {
	mu                 sync.RWMutex
	backends           []Backend
	blockHeights       map[string]uint64          // Backend name -> latest block number
	lastUpdateTime     map[string]time.Time       // Backend name -> last successful update
	connections        map[string]*websocket.Conn // Active WebSocket connections
	enableDetailedLogs bool
	maxBlockBehind     uint64 // Maximum blocks a secondary can be behind primary
	stopChan           chan struct{}
}

// NewBlockHeightTracker creates a new WebSocket-based block height tracker
func NewBlockHeightTracker(backends []Backend, client *http.Client, checkInterval time.Duration,
	maxBlockBehind uint64, enableDetailedLogs bool) *BlockHeightTracker {

	bht := &BlockHeightTracker{
		backends:           backends,
		blockHeights:       make(map[string]uint64),
		lastUpdateTime:     make(map[string]time.Time),
		connections:        make(map[string]*websocket.Conn),
		enableDetailedLogs: enableDetailedLogs,
		maxBlockBehind:     maxBlockBehind,
		stopChan:           make(chan struct{}),
	}

	// Start WebSocket connections for all backends
	go bht.startWebSocketConnections()

	return bht
}

// startWebSocketConnections establishes WebSocket connections to all backends
func (bht *BlockHeightTracker) startWebSocketConnections() {
	for _, backend := range bht.backends {
		go bht.connectToBackend(backend)
	}
}

// connectToBackend establishes and maintains a WebSocket connection to a single backend
func (bht *BlockHeightTracker) connectToBackend(backend Backend) {
	for {
		select {
		case <-bht.stopChan:
			return
		default:
		}

		// Create WebSocket URL from HTTP URL
		wsURL := strings.Replace(backend.URL, "http://", "ws://", 1)
		wsURL = strings.Replace(wsURL, "https://", "wss://", 1)

		if bht.enableDetailedLogs {
			log.Printf("Block height tracker: connecting to %s at %s", backend.Name, wsURL)
		}

		// Connect to WebSocket
		conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
		if err != nil {
			if bht.enableDetailedLogs {
				log.Printf("Block height tracker: failed to connect to %s: %v", backend.Name, err)
			}
			time.Sleep(10 * time.Second) // Wait before retry
			continue
		}

		// Store connection
		bht.mu.Lock()
		bht.connections[backend.Name] = conn
		bht.mu.Unlock()

		// Subscribe to new block headers
		subscribeMsg := map[string]interface{}{
			"jsonrpc": "2.0",
			"method":  "eth_subscribe",
			"params":  []string{"newHeads"},
			"id":      1,
		}

		if err := conn.WriteJSON(subscribeMsg); err != nil {
			if bht.enableDetailedLogs {
				log.Printf("Block height tracker: failed to subscribe to %s: %v", backend.Name, err)
			}
			conn.Close()
			bht.mu.Lock()
			delete(bht.connections, backend.Name)
			bht.mu.Unlock()
			time.Sleep(5 * time.Second)
			continue
		}

		if bht.enableDetailedLogs {
			log.Printf("Block height tracker: subscribed to newHeads for %s", backend.Name)
		}

		// Handle incoming messages
		bht.handleWebSocketMessages(conn, backend)

		// Connection closed, clean up
		bht.mu.Lock()
		delete(bht.connections, backend.Name)
		bht.mu.Unlock()

		if bht.enableDetailedLogs {
			log.Printf("Block height tracker: connection to %s closed, will retry", backend.Name)
		}

		time.Sleep(5 * time.Second) // Wait before reconnecting
	}
}

// handleWebSocketMessages processes incoming WebSocket messages from a backend
func (bht *BlockHeightTracker) handleWebSocketMessages(conn *websocket.Conn, backend Backend) {
	defer conn.Close()

	for {
		select {
		case <-bht.stopChan:
			return
		default:
		}

		var message map[string]interface{}
		if err := conn.ReadJSON(&message); err != nil {
			if bht.enableDetailedLogs {
				log.Printf("Block height tracker: error reading from %s: %v", backend.Name, err)
			}
			return
		}

		// Check if this is a subscription notification
		if method, exists := message["method"]; exists && method == "eth_subscription" {
			bht.handleSubscriptionNotification(message, backend)
		} else if result, exists := message["result"]; exists {
			// This might be the subscription confirmation
			if bht.enableDetailedLogs {
				log.Printf("Block height tracker: subscription confirmed for %s: %v", backend.Name, result)
			}
		} else if errorObj, exists := message["error"]; exists {
			if bht.enableDetailedLogs {
				log.Printf("Block height tracker: error from %s: %v", backend.Name, errorObj)
			}
		}
	}
}

// handleSubscriptionNotification processes a newHeads subscription notification
func (bht *BlockHeightTracker) handleSubscriptionNotification(message map[string]interface{}, backend Backend) {
	params, exists := message["params"]
	if !exists {
		return
	}

	paramsMap, ok := params.(map[string]interface{})
	if !ok {
		return
	}

	result, exists := paramsMap["result"]
	if !exists {
		return
	}

	resultMap, ok := result.(map[string]interface{})
	if !ok {
		return
	}

	// Extract block number from the header
	blockNumberHex, exists := resultMap["number"]
	if !exists {
		return
	}

	blockNumberStr, ok := blockNumberHex.(string)
	if !ok {
		return
	}

	// Parse hex block number
	blockHex := strings.TrimPrefix(blockNumberStr, "0x")
	blockNum, err := strconv.ParseUint(blockHex, 16, 64)
	if err != nil {
		if bht.enableDetailedLogs {
			log.Printf("Block height tracker: failed to parse block number for %s: %s", backend.Name, blockNumberStr)
		}
		return
	}

	// Update the block height
	bht.mu.Lock()
	oldHeight := bht.blockHeights[backend.Name]
	bht.blockHeights[backend.Name] = blockNum
	bht.lastUpdateTime[backend.Name] = time.Now()
	bht.mu.Unlock()

	if bht.enableDetailedLogs && oldHeight != blockNum {
		log.Printf("Block height tracker: %s updated from %d to %d via WebSocket",
			backend.Name, oldHeight, blockNum)
	}
}

// isSecondaryBehind checks if a secondary backend is behind the primary by more than maxBlockBehind
func (bht *BlockHeightTracker) isSecondaryBehind(secondaryName string) bool {
	bht.mu.RLock()
	defer bht.mu.RUnlock()

	// Find primary backend block height
	var primaryHeight uint64
	var primaryFound bool
	for name, height := range bht.blockHeights {
		// Check if this is the primary backend (assuming it's named "primary")
		if name == "primary" {
			primaryHeight = height
			primaryFound = true
			break
		}
	}

	if !primaryFound {
		// If we can't find primary height, be conservative and allow secondary
		return false
	}

	// Get secondary height
	secondaryHeight, exists := bht.blockHeights[secondaryName]
	if !exists {
		// If we don't have secondary height data, be conservative and block it
		return true
	}

	// Check if secondary is behind by more than the threshold
	if primaryHeight > secondaryHeight {
		blocksBehind := primaryHeight - secondaryHeight
		return blocksBehind > bht.maxBlockBehind
	}

	// Secondary is not behind
	return false
}

// getBlockHeightStatus returns a summary of current block heights for logging
func (bht *BlockHeightTracker) getBlockHeightStatus() map[string]uint64 {
	bht.mu.RLock()
	defer bht.mu.RUnlock()

	result := make(map[string]uint64)
	for name, height := range bht.blockHeights {
		result[name] = height
	}
	return result
}

// Stop gracefully shuts down the block height tracker
func (bht *BlockHeightTracker) Stop() {
	close(bht.stopChan)

	// Close all WebSocket connections
	bht.mu.Lock()
	defer bht.mu.Unlock()

	for name, conn := range bht.connections {
		if bht.enableDetailedLogs {
			log.Printf("Block height tracker: closing connection to %s", name)
		}
		conn.Close()
	}
}

func NewStatsCollector(summaryInterval time.Duration, hasSecondaryBackends bool) *StatsCollector {
	now := time.Now()
	sc := &StatsCollector{
		requestStats:                       make([]ResponseStats, 0, 1000),
		methodStats:                        make(map[string][]time.Duration),
		backendMethodStats:                 make(map[string]map[string][]time.Duration),
		backendWins:                        make(map[string]int),
		methodBackendWins:                  make(map[string]map[string]int),
		firstResponseDurations:             make([]time.Duration, 0, 1000),
		actualFirstResponseDurations:       make([]time.Duration, 0, 1000),
		methodFirstResponseDurations:       make(map[string][]time.Duration),
		methodActualFirstResponseDurations: make(map[string][]time.Duration),
		appStartTime:                       now,
		intervalStartTime:                  now,
		summaryInterval:                    summaryInterval,
		methodCUPrices:                     initCUPrices(), // Initialize CU prices
		methodCU:                           make(map[string]int),
		historicalCU:                       make([]CUDataPoint, 0, 2000), // Store up to ~24 hours of 1-minute intervals
		hasSecondaryBackends:               hasSecondaryBackends,
	}

	// Start the periodic summary goroutine
	go sc.periodicSummary()

	return sc
}

// SetSecondaryProbe sets the secondary probe reference after stats collector is created
func (sc *StatsCollector) SetSecondaryProbe(probe *SecondaryProbe) {
	sc.mu.Lock()
	defer sc.mu.Unlock()
	sc.secondaryProbe = probe
}

// SetBlockHeightTracker sets the block height tracker reference after stats collector is created
func (sc *StatsCollector) SetBlockHeightTracker(tracker *BlockHeightTracker) {
	sc.mu.Lock()
	defer sc.mu.Unlock()
	sc.blockHeightTracker = tracker
}

// NewSecondaryProbe creates a new secondary probe instance
func NewSecondaryProbe(backends []Backend, client *http.Client, probeInterval time.Duration,
	minDelayBuffer time.Duration, probeMethods []string, enableDetailedLogs bool) *SecondaryProbe {

	// Filter only secondary backends
	var secondaryBackends []Backend
	for _, b := range backends {
		if b.Role == "secondary" {
			secondaryBackends = append(secondaryBackends, b)
		}
	}

	if len(secondaryBackends) == 0 {
		return nil
	}

	sp := &SecondaryProbe{
		backends:           secondaryBackends,
		client:             client,
		minResponseTime:    15 * time.Millisecond, // Start with reasonable default
		methodTimings:      make(map[string]time.Duration),
		backendTimings:     make(map[string]time.Duration),
		backendAvailable:   make(map[string]bool),
		backendErrorCount:  make(map[string]int),
		backendLastSuccess: make(map[string]time.Time),
		probeInterval:      probeInterval,
		minDelayBuffer:     minDelayBuffer,
		probeMethods:       probeMethods,
		enableDetailedLogs: enableDetailedLogs,
		lastSuccessTime:    time.Now(),
		maxErrorThreshold:  5, // Mark backend unavailable after 5 consecutive errors
		recoveryThreshold:  3, // Require 3 consecutive successes to mark backend available again
	}

	// Initialize all backends as available initially
	for _, backend := range secondaryBackends {
		sp.backendAvailable[backend.Name] = true
		sp.backendErrorCount[backend.Name] = 0
		sp.backendLastSuccess[backend.Name] = time.Now()
	}

	// Run initial probe immediately
	go func() {
		sp.runProbe()
		// Then start periodic probing
		sp.startPeriodicProbing()
	}()

	return sp
}

// getDelayForMethod returns the appropriate delay for a given method
func (sp *SecondaryProbe) getDelayForMethod(method string) time.Duration {
	sp.mu.RLock()
	defer sp.mu.RUnlock()

	// If probes have been failing, use a conservative fallback
	if sp.failureCount > 3 && time.Since(sp.lastSuccessTime) > 5*time.Minute {
		return 20 * time.Millisecond // Conservative fallback
	}

	// Use method-specific timing if available
	if timing, exists := sp.methodTimings[method]; exists {
		return timing + sp.minDelayBuffer
	}

	// Fall back to general minimum
	return sp.minResponseTime + sp.minDelayBuffer
}

// getDelayForBackendAndMethod returns the appropriate delay for a specific backend and method
func (sp *SecondaryProbe) getDelayForBackendAndMethod(backend, method string) time.Duration {
	sp.mu.RLock()
	defer sp.mu.RUnlock()

	// Start with backend-specific timing
	delay := sp.minResponseTime
	if backendTiming, exists := sp.backendTimings[backend]; exists {
		delay = backendTiming
	}

	// Use method-specific timing if it's longer
	if methodTiming, exists := sp.methodTimings[method]; exists && methodTiming > delay {
		delay = methodTiming
	}

	return delay + sp.minDelayBuffer
}

// runProbe performs a single probe cycle to all secondary backends
func (sp *SecondaryProbe) runProbe() {
	newMethodTimings := make(map[string]time.Duration)
	newBackendTimings := make(map[string]time.Duration)
	successfulProbes := 0

	for _, backend := range sp.backends {
		backendMin := time.Hour  // Start with large value
		backendSuccessCount := 0 // Track successes for this backend across all methods

		for _, method := range sp.probeMethods {
			methodMin := time.Hour // Track minimum for this method on this backend
			methodSuccesses := 0

			// Perform 10 probes for this method and take the minimum
			for probe := 0; probe < 10; probe++ {
				reqBody := []byte(fmt.Sprintf(
					`{"jsonrpc":"2.0","method":"%s","params":[],"id":"probe-%d-%d"}`,
					method, time.Now().UnixNano(), probe,
				))

				req, err := http.NewRequest("POST", backend.URL, bytes.NewReader(reqBody))
				if err != nil {
					continue
				}

				req.Header.Set("Content-Type", "application/json")
				// Ensure connection reuse by setting Connection: keep-alive
				req.Header.Set("Connection", "keep-alive")

				start := time.Now()
				resp, err := sp.client.Do(req)
				duration := time.Since(start)

				if err == nil && resp != nil {
					// Check for both network success and HTTP success
					if resp.StatusCode >= 200 && resp.StatusCode < 300 {
						methodSuccesses++
						successfulProbes++
						backendSuccessCount++

						// Track minimum for this method on this backend
						if duration < methodMin {
							methodMin = duration
						}

						if sp.enableDetailedLogs {
							log.Printf("Probe %d/10: backend=%s method=%s duration=%s status=%d (min so far: %s)",
								probe+1, backend.Name, method, duration, resp.StatusCode, methodMin)
						}
					} else {
						// HTTP error status (4xx, 5xx)
						if sp.enableDetailedLogs {
							log.Printf("Probe %d/10: backend=%s method=%s HTTP error status=%d",
								probe+1, backend.Name, method, resp.StatusCode)
						}
					}
					resp.Body.Close()
				} else {
					// Network error
					if sp.enableDetailedLogs {
						log.Printf("Probe %d/10: backend=%s method=%s network error: %v",
							probe+1, backend.Name, method, err)
					}
				}

				// Small delay between probes to avoid overwhelming the backend
				if probe < 9 { // Don't delay after the last probe
					time.Sleep(10 * time.Millisecond)
				}
			}

			// Only use this method's timing if we had successful probes
			if methodSuccesses > 0 && methodMin < time.Hour {
				// Update method timing (use minimum across all backends)
				if currentMin, exists := newMethodTimings[method]; !exists || methodMin < currentMin {
					newMethodTimings[method] = methodMin
				}

				// Track backend minimum
				if methodMin < backendMin {
					backendMin = methodMin
				}

				if sp.enableDetailedLogs {
					log.Printf("Method %s on backend %s: %d/10 successful probes, min duration: %s",
						method, backend.Name, methodSuccesses, methodMin)
				}
			}
		}

		// Store backend minimum if we got any successful probes
		if backendMin < time.Hour {
			newBackendTimings[backend.Name] = backendMin
		}

		// Update backend health based on overall success rate for this probe cycle
		// Consider a backend healthy if it had at least some successful responses
		sp.mu.Lock()
		sp.updateBackendHealth(backend.Name, backendSuccessCount > 0)
		sp.mu.Unlock()

		if sp.enableDetailedLogs {
			sp.mu.RLock()
			availability := "AVAILABLE"
			if !sp.backendAvailable[backend.Name] {
				availability = "UNAVAILABLE"
			}
			errorCount := sp.backendErrorCount[backend.Name]
			sp.mu.RUnlock()

			log.Printf("Backend %s probe cycle complete: %d/%d total successes, status: %s (error count: %d)",
				backend.Name, backendSuccessCount, len(sp.probeMethods)*10, availability, errorCount)
		}
	}

	// Update timings if we got successful probes
	sp.mu.Lock()
	defer sp.mu.Unlock()

	if successfulProbes > 0 {
		sp.failureCount = 0
		sp.lastSuccessTime = time.Now()

		// Update method timings
		for method, timing := range newMethodTimings {
			sp.methodTimings[method] = timing
		}

		// Update backend timings
		for backend, timing := range newBackendTimings {
			sp.backendTimings[backend] = timing
		}

		// Update overall minimum
		overallMin := time.Hour
		for _, timing := range newBackendTimings {
			if timing < overallMin {
				overallMin = timing
			}
		}
		if overallMin < time.Hour {
			sp.minResponseTime = overallMin
		}

		sp.lastProbeTime = time.Now()

		if sp.enableDetailedLogs {
			availableBackends := sp.getAvailableBackends()
			log.Printf("Probe complete: min=%s methods=%v backends=%v available_backends=%v",
				sp.minResponseTime, sp.methodTimings, sp.backendTimings, availableBackends)
		}
	} else {
		sp.failureCount++
		if sp.enableDetailedLogs {
			availableBackends := sp.getAvailableBackends()
			log.Printf("Probe failed: consecutive failures=%d available_backends=%v",
				sp.failureCount, availableBackends)
		}
	}
}

// startPeriodicProbing runs probes at regular intervals
func (sp *SecondaryProbe) startPeriodicProbing() {
	ticker := time.NewTicker(sp.probeInterval)
	defer ticker.Stop()

	for range ticker.C {
		sp.runProbe()
	}
}

// initCUPrices initializes the map of method names to their CU prices
func initCUPrices() map[string]int {
	return map[string]int{
		"debug_traceBlockByHash":                  90,
		"debug_traceBlockByNumber":                90,
		"debug_traceCall":                         90,
		"debug_traceTransaction":                  90,
		"eth_accounts":                            0,
		"eth_blockNumber":                         10,
		"eth_call":                                21,
		"eth_chainId":                             0,
		"eth_coinbase":                            0,
		"eth_createAccessList":                    30,
		"eth_estimateGas":                         60,
		"eth_feeHistory":                          15,
		"eth_gasPrice":                            15,
		"eth_getBalance":                          11,
		"eth_getBlockByHash":                      21,
		"eth_getBlockByHash#full":                 60,
		"eth_getBlockByNumber":                    24,
		"eth_getBlockByNumber#full":               60,
		"eth_getBlockReceipts":                    80,
		"eth_getBlockTransactionCountByHash":      15,
		"eth_getBlockTransactionCountByNumber":    11,
		"eth_getCode":                             24,
		"eth_getFilterChanges":                    20,
		"eth_getFilterLogs":                       60,
		"eth_getLogs":                             60,
		"eth_getProof":                            11,
		"eth_getStorageAt":                        14,
		"eth_getTransactionByBlockHashAndIndex":   19,
		"eth_getTransactionByBlockNumberAndIndex": 13,
		"eth_getTransactionByHash":                11,
		"eth_getTransactionCount":                 11,
		"eth_getTransactionReceipt":               30,
		"eth_getUncleByBlockHashAndIndex":         15,
		"eth_getUncleByBlockNumberAndIndex":       15,
		"eth_getUncleCountByBlockHash":            15,
		"eth_getUncleCountByBlockNumber":          15,
		"eth_hashrate":                            0,
		"eth_maxPriorityFeePerGas":                16,
		"eth_mining":                              0,
		"eth_newBlockFilter":                      20,
		"eth_newFilter":                           20,
		"eth_newPendingTransactionFilter":         20,
		"eth_protocolVersion":                     0,
		"eth_sendRawTransaction":                  90,
		"eth_syncing":                             0,
		"eth_subscribe":                           10,
		"eth_subscription":                        25, // For "Notifications from the events you've subscribed to"
		"eth_uninstallFilter":                     10,
		"eth_unsubscribe":                         10,
		"net_listening":                           0,
		"net_peerCount":                           0,
		"net_version":                             0,
		"trace_block":                             90,
		"trace_call":                              60,
		"trace_callMany":                          90,
		"trace_filter":                            75,
		"trace_get":                               20,
		"trace_rawTransaction":                    75,
		"trace_replayBlockTransactions":           90,
		"trace_replayBlockTransactions#vmTrace":   300,
		"trace_replayTransaction":                 90,
		"trace_replayTransaction#vmTrace":         300,
		"trace_transaction":                       90,
		"txpool_content":                          1000,
		"web3_clientVersion":                      0,
		"web3_sha3":                               10,
		"bor_getAuthor":                           10,
		"bor_getCurrentProposer":                  10,
		"bor_getCurrentValidators":                10,
		"bor_getRootHash":                         10,
		"bor_getSignersAtHash":                    10,
	}
}

func (sc *StatsCollector) AddStats(stats []ResponseStats, totalDuration time.Duration) {
	sc.mu.Lock()
	defer sc.mu.Unlock()

	// Find the fastest successful response and actual first response
	var fastestBackend string
	var fastestDuration time.Duration = time.Hour // Initialize with a very large duration
	var actualFirstDuration time.Duration
	var method string
	var hasActualFirst bool

	for _, stat := range stats {
		if stat.Backend == "actual-first-response" {
			actualFirstDuration = stat.Duration
			hasActualFirst = true
			method = stat.Method
		} else if stat.Error == nil && stat.Duration < fastestDuration {
			// Primary backend can always be fastest, secondary only if successful status
			canBeWinner := false
			if stat.Backend == "primary" {
				// Primary can always be winner (regardless of status code)
				canBeWinner = true
			} else {
				// Secondary backend can only be winner with successful status code
				canBeWinner = stat.StatusCode < 400
			}

			if canBeWinner {
				fastestDuration = stat.Duration
				fastestBackend = stat.Backend
				if method == "" {
					method = stat.Method
				}
			}
		}
	}

	// Track the win if we found a successful response
	if fastestBackend != "" {
		sc.backendWins[fastestBackend]++

		// Track wins per method
		if _, exists := sc.methodBackendWins[method]; !exists {
			sc.methodBackendWins[method] = make(map[string]int)
		}
		sc.methodBackendWins[method][fastestBackend]++

		// Track first response duration (from winning backend's perspective)
		sc.firstResponseDurations = append(sc.firstResponseDurations, fastestDuration)

		// Track first response duration by method
		if _, exists := sc.methodFirstResponseDurations[method]; !exists {
			sc.methodFirstResponseDurations[method] = make([]time.Duration, 0, 100)
		}
		sc.methodFirstResponseDurations[method] = append(sc.methodFirstResponseDurations[method], fastestDuration)

		// Track actual first response duration if available
		if hasActualFirst {
			sc.actualFirstResponseDurations = append(sc.actualFirstResponseDurations, actualFirstDuration)

			if _, exists := sc.methodActualFirstResponseDurations[method]; !exists {
				sc.methodActualFirstResponseDurations[method] = make([]time.Duration, 0, 100)
			}
			sc.methodActualFirstResponseDurations[method] = append(sc.methodActualFirstResponseDurations[method], actualFirstDuration)
		}
	}

	// Add stats to the collection (skip actual-first-response as it's synthetic)
	for _, stat := range stats {
		if stat.Backend == "actual-first-response" {
			continue // Don't add synthetic entries to regular stats
		}

		sc.requestStats = append(sc.requestStats, stat)
		if stat.Error != nil {
			// Don't count skipped secondary backends as errors
			if !strings.Contains(stat.Error.Error(), "skipped - primary responded") &&
				!strings.Contains(stat.Error.Error(), "skipped - behind in block height") {
				sc.errorCount++
			} else {
				// Track that we skipped a secondary request
				sc.skippedSecondaryRequests++
			}
		}

		// Track method-specific stats for all backends
		if stat.Error == nil {
			// Initialize backend map if not exists
			if _, exists := sc.backendMethodStats[stat.Backend]; !exists {
				sc.backendMethodStats[stat.Backend] = make(map[string][]time.Duration)
			}

			// Initialize method array if not exists
			if _, exists := sc.backendMethodStats[stat.Backend][stat.Method]; !exists {
				sc.backendMethodStats[stat.Backend][stat.Method] = make([]time.Duration, 0, 100)
			}

			// Add the duration
			sc.backendMethodStats[stat.Backend][stat.Method] = append(
				sc.backendMethodStats[stat.Backend][stat.Method], stat.Duration)

			// Keep tracking primary backend in the old way for backward compatibility
			if stat.Backend == "primary" {
				// Handle batch requests specially for CU calculation
				if strings.HasPrefix(stat.Method, "batch[") && len(stat.Method) > 6 {
					// Don't track batch as a method, it will be handled separately
				} else {
					if _, exists := sc.methodStats[stat.Method]; !exists {
						sc.methodStats[stat.Method] = make([]time.Duration, 0, 100)
					}
					sc.methodStats[stat.Method] = append(sc.methodStats[stat.Method], stat.Duration)

					// Add CU for this method
					cuValue := sc.methodCUPrices[stat.Method]
					sc.totalCU += cuValue
					sc.methodCU[stat.Method] += cuValue
				}
			}
		}
	}

	sc.totalRequests++
}

// AddBatchStats adds statistics for a batch request
func (sc *StatsCollector) AddBatchStats(methods []string, duration time.Duration, backend string) {
	sc.mu.Lock()
	defer sc.mu.Unlock()

	// Calculate total CU for the batch
	batchCU := 0
	for _, method := range methods {
		if method != "" {
			cuValue := sc.methodCUPrices[method]
			batchCU += cuValue

			// Track individual method CU
			sc.methodCU[method] += cuValue

			// Track method durations (use batch duration for each method)
			if _, exists := sc.methodStats[method]; !exists {
				sc.methodStats[method] = make([]time.Duration, 0, 100)
			}
			sc.methodStats[method] = append(sc.methodStats[method], duration)
		}
	}

	sc.totalCU += batchCU
}

func (sc *StatsCollector) AddWebSocketStats(stats WebSocketStats) {
	sc.mu.Lock()
	defer sc.mu.Unlock()

	sc.wsConnections = append(sc.wsConnections, stats)
	sc.totalWsConnections++

	if stats.Error != nil {
		sc.errorCount++
	}
}

func (sc *StatsCollector) periodicSummary() {
	ticker := time.NewTicker(sc.summaryInterval)
	defer ticker.Stop()

	for range ticker.C {
		sc.printSummary()
	}
}

// formatDuration formats a duration with at most 6 significant digits total
func formatDuration(d time.Duration) string {
	// Convert to string with standard formatting
	str := d.String()

	// Find the decimal point if it exists
	decimalIdx := strings.Index(str, ".")
	if decimalIdx == -1 {
		// No decimal point, return as is (already ≤ 6 digits or no need to truncate)
		return str
	}

	// Find the unit suffix (ms, µs, etc.)
	unitIdx := -1
	for i := decimalIdx; i < len(str); i++ {
		if !(str[i] >= '0' && str[i] <= '9') && str[i] != '.' {
			unitIdx = i
			break
		}
	}

	if unitIdx == -1 {
		unitIdx = len(str) // No unit suffix found
	}

	// Count digits before decimal (not including sign)
	digitsBeforeDecimal := 0
	for i := 0; i < decimalIdx; i++ {
		if str[i] >= '0' && str[i] <= '9' {
			digitsBeforeDecimal++
		}
	}

	// Calculate how many decimal places we can keep (allowing for 6 total digits)
	maxDecimalPlaces := 6 - digitsBeforeDecimal
	if maxDecimalPlaces <= 0 {
		// No room for decimal places
		return str[:decimalIdx] + str[unitIdx:]
	}

	// Calculate end position for truncation
	endPos := decimalIdx + 1 + maxDecimalPlaces
	if endPos > unitIdx {
		endPos = unitIdx
	}

	// Return truncated string
	return str[:endPos] + str[unitIdx:]
}

func (sc *StatsCollector) printSummary() {
	sc.mu.Lock()
	defer sc.mu.Unlock()

	uptime := time.Since(sc.appStartTime)
	fmt.Printf("\n=== BENCHMARK PROXY SUMMARY ===\n")
	fmt.Printf("Uptime: %s\n", uptime.Round(time.Second))
	fmt.Printf("Total HTTP Requests: %d\n", sc.totalRequests)
	fmt.Printf("Total WebSocket Connections: %d\n", sc.totalWsConnections)
	fmt.Printf("Error Rate: %.2f%%\n", float64(sc.errorCount)/float64(sc.totalRequests+sc.totalWsConnections)*100)

	// Display secondary probe information if available
	if sc.secondaryProbe != nil {
		sc.secondaryProbe.mu.RLock()
		fmt.Printf("\n--- Secondary Probe Status ---\n")
		fmt.Printf("Minimum Secondary Latency: %s\n", formatDuration(sc.secondaryProbe.minResponseTime))
		fmt.Printf("Probe Buffer: %s\n", formatDuration(sc.secondaryProbe.minDelayBuffer))
		fmt.Printf("Effective Delay Threshold: %s\n", formatDuration(sc.secondaryProbe.minResponseTime+sc.secondaryProbe.minDelayBuffer))

		// Display backend availability status
		fmt.Printf("Backend Health Status:\n")
		backendNames := make([]string, 0, len(sc.secondaryProbe.backendAvailable))
		for name := range sc.secondaryProbe.backendAvailable {
			backendNames = append(backendNames, name)
		}
		sort.Strings(backendNames)

		for _, name := range backendNames {
			available := sc.secondaryProbe.backendAvailable[name]
			errorCount := sc.secondaryProbe.backendErrorCount[name]
			lastSuccess := sc.secondaryProbe.backendLastSuccess[name]

			status := "AVAILABLE"
			if !available {
				status = "UNAVAILABLE"
			}

			timeSinceSuccess := time.Since(lastSuccess)
			fmt.Printf("  %s: %s (errors: %d, last success: %s ago)\n",
				name, status, errorCount, timeSinceSuccess.Round(time.Second))
		}

		if len(sc.secondaryProbe.methodTimings) > 0 {
			fmt.Printf("Method-Specific Thresholds:\n")
			// Sort methods for consistent output
			var methods []string
			for method := range sc.secondaryProbe.methodTimings {
				methods = append(methods, method)
			}
			sort.Strings(methods)
			for _, method := range methods {
				timing := sc.secondaryProbe.methodTimings[method]
				fmt.Printf("  %s: %s (+ %s buffer = %s)\n",
					method,
					formatDuration(timing),
					formatDuration(sc.secondaryProbe.minDelayBuffer),
					formatDuration(timing+sc.secondaryProbe.minDelayBuffer))
			}
		}

		if sc.secondaryProbe.failureCount > 0 {
			fmt.Printf("Probe Failures: %d consecutive\n", sc.secondaryProbe.failureCount)
		}

		sc.secondaryProbe.mu.RUnlock()
	}

	// Display block height information if available
	if sc.blockHeightTracker != nil {
		blockStatus := sc.blockHeightTracker.getBlockHeightStatus()
		if len(blockStatus) > 0 {
			fmt.Printf("\n--- Block Height Status ---\n")

			// Find primary height for comparison
			var primaryHeight uint64
			var primaryFound bool
			if height, exists := blockStatus["primary"]; exists {
				primaryHeight = height
				primaryFound = true
			}

			// Sort backend names for consistent output
			var backendNames []string
			for name := range blockStatus {
				backendNames = append(backendNames, name)
			}
			sort.Strings(backendNames)

			for _, name := range backendNames {
				height := blockStatus[name]
				if name == "primary" {
					fmt.Printf("  %s: %d (primary)\n", name, height)
				} else if primaryFound {
					behind := int64(primaryHeight) - int64(height)
					if behind > 0 {
						fmt.Printf("  %s: %d (%d blocks behind)\n", name, height, behind)
					} else if behind == 0 {
						fmt.Printf("  %s: %d (synchronized)\n", name, height)
					} else {
						fmt.Printf("  %s: %d (%d blocks ahead)\n", name, height, -behind)
					}
				} else {
					fmt.Printf("  %s: %d\n", name, height)
				}
			}
		}
	}

	if sc.hasSecondaryBackends && sc.skippedSecondaryRequests > 0 {
		fmt.Printf("Skipped Secondary Requests: %d (%.1f%% of requests)\n",
			sc.skippedSecondaryRequests,
			float64(sc.skippedSecondaryRequests)/float64(sc.totalRequests)*100)
	}
	fmt.Printf("Total Compute Units Earned (current interval): %d CU\n", sc.totalCU)

	// Calculate and display CU for different time windows
	timeWindows := []struct {
		duration time.Duration
		label    string
	}{
		{10 * time.Minute, "Last 10 minutes"},
		{1 * time.Hour, "Last hour"},
		{3 * time.Hour, "Last 3 hours"},
		{24 * time.Hour, "Last 24 hours"},
	}

	fmt.Printf("\nHistorical Compute Units:\n")
	for _, window := range timeWindows {
		actualCU, needsExtrapolation := sc.calculateCUForTimeWindow(window.duration)

		if needsExtrapolation {
			// Calculate actual data duration for extrapolation
			now := time.Now()
			cutoff := now.Add(-window.duration)
			var oldestDataStartTime time.Time
			hasData := false

			// Check current interval
			if sc.intervalStartTime.After(cutoff) {
				oldestDataStartTime = sc.intervalStartTime
				hasData = true
			}

			// Check historical data
			for i := len(sc.historicalCU) - 1; i >= 0; i-- {
				point := sc.historicalCU[i]
				intervalStart := point.Timestamp.Add(-sc.summaryInterval)

				if point.Timestamp.Before(cutoff) {
					break
				}

				if !hasData || intervalStart.Before(oldestDataStartTime) {
					oldestDataStartTime = intervalStart
				}
				hasData = true
			}

			var actualDuration time.Duration
			if hasData {
				actualDuration = now.Sub(oldestDataStartTime)
			}

			extrapolatedCU := sc.extrapolateCU(actualCU, actualDuration, window.duration)
			fmt.Printf("  %s: %s\n", window.label, formatCUWithExtrapolation(extrapolatedCU, true))
		} else {
			fmt.Printf("  %s: %s\n", window.label, formatCUWithExtrapolation(actualCU, false))
		}
	}

	// Calculate response time statistics for primary backend
	var primaryDurations []time.Duration
	for _, stat := range sc.requestStats {
		if stat.Backend == "primary" && stat.Error == nil {
			primaryDurations = append(primaryDurations, stat.Duration)
		}
	}

	if len(primaryDurations) > 0 {
		sort.Slice(primaryDurations, func(i, j int) bool {
			return primaryDurations[i] < primaryDurations[j]
		})

		var sum time.Duration
		for _, d := range primaryDurations {
			sum += d
		}

		avg := sum / time.Duration(len(primaryDurations))
		min := primaryDurations[0]
		max := primaryDurations[len(primaryDurations)-1]

		p50idx := len(primaryDurations) * 50 / 100
		p90idx := len(primaryDurations) * 90 / 100
		p99idx := minInt(len(primaryDurations)-1, len(primaryDurations)*99/100)

		p50 := primaryDurations[p50idx]
		p90 := primaryDurations[p90idx]
		p99 := primaryDurations[p99idx]

		fmt.Printf("\nPrimary Backend Response Times:\n")
		fmt.Printf("  Min: %s\n", formatDuration(min))
		fmt.Printf("  Avg: %s\n", formatDuration(avg))
		fmt.Printf("  Max: %s\n", formatDuration(max))
		fmt.Printf("  p50: %s\n", formatDuration(p50))
		fmt.Printf("  p90: %s\n", formatDuration(p90))
		fmt.Printf("  p99: %s\n", formatDuration(p99))
	}

	// Calculate response time statistics for ALL backends
	backendDurations := make(map[string][]time.Duration)
	for _, stat := range sc.requestStats {
		if stat.Error == nil {
			backendDurations[stat.Backend] = append(backendDurations[stat.Backend], stat.Duration)
		}
	}

	// Sort backend names for consistent output
	var backendNames []string
	for backend := range backendDurations {
		backendNames = append(backendNames, backend)
	}
	sort.Strings(backendNames)

	// Print per-backend statistics
	fmt.Printf("\nPer-Backend Response Time Comparison:\n")
	fmt.Printf("Note: 'User Latency' = actual time users wait; 'Backend Time' = winning backend's response time\n")
	fmt.Printf("%-20s %10s %10s %10s %10s %10s %10s %10s\n",
		"Backend", "Count", "Min", "Avg", "Max", "p50", "p90", "p99")
	fmt.Printf("%s\n", strings.Repeat("-", 100))

	// First, show the actual user latency if available
	if len(sc.actualFirstResponseDurations) > 0 {
		actualDurations := make([]time.Duration, len(sc.actualFirstResponseDurations))
		copy(actualDurations, sc.actualFirstResponseDurations)

		sort.Slice(actualDurations, func(i, j int) bool {
			return actualDurations[i] < actualDurations[j]
		})

		var sum time.Duration
		for _, d := range actualDurations {
			sum += d
		}

		avg := sum / time.Duration(len(actualDurations))
		min := actualDurations[0]
		max := actualDurations[len(actualDurations)-1]

		p50idx := len(actualDurations) * 50 / 100
		p90idx := len(actualDurations) * 90 / 100
		p99idx := minInt(len(actualDurations)-1, len(actualDurations)*99/100)

		p50 := actualDurations[p50idx]
		p90 := actualDurations[p90idx]
		p99 := actualDurations[p99idx]

		fmt.Printf("%-20s %10d %10s %10s %10s %10s %10s %10s\n",
			"User Latency", len(actualDurations),
			formatDuration(min), formatDuration(avg), formatDuration(max),
			formatDuration(p50), formatDuration(p90), formatDuration(p99))
	}

	// Then show the backend time (what backend actually took)
	if len(sc.firstResponseDurations) > 0 {
		firstRespDurations := make([]time.Duration, len(sc.firstResponseDurations))
		copy(firstRespDurations, sc.firstResponseDurations)

		sort.Slice(firstRespDurations, func(i, j int) bool {
			return firstRespDurations[i] < firstRespDurations[j]
		})

		var sum time.Duration
		for _, d := range firstRespDurations {
			sum += d
		}

		avg := sum / time.Duration(len(firstRespDurations))
		min := firstRespDurations[0]
		max := firstRespDurations[len(firstRespDurations)-1]

		p50idx := len(firstRespDurations) * 50 / 100
		p90idx := len(firstRespDurations) * 90 / 100
		p99idx := minInt(len(firstRespDurations)-1, len(firstRespDurations)*99/100)

		p50 := firstRespDurations[p50idx]
		p90 := firstRespDurations[p90idx]
		p99 := firstRespDurations[p99idx]

		fmt.Printf("%-20s %10d %10s %10s %10s %10s %10s %10s\n",
			"Backend Time", len(firstRespDurations),
			formatDuration(min), formatDuration(avg), formatDuration(max),
			formatDuration(p50), formatDuration(p90), formatDuration(p99))
		fmt.Printf("%s\n", strings.Repeat("-", 100))
	}

	for _, backend := range backendNames {
		durations := backendDurations[backend]
		if len(durations) == 0 {
			continue
		}

		sort.Slice(durations, func(i, j int) bool {
			return durations[i] < durations[j]
		})

		var sum time.Duration
		for _, d := range durations {
			sum += d
		}

		avg := sum / time.Duration(len(durations))
		min := durations[0]
		max := durations[len(durations)-1]

		p50idx := len(durations) * 50 / 100
		p90idx := len(durations) * 90 / 100
		p99idx := minInt(len(durations)-1, len(durations)*99/100)

		p50 := durations[p50idx]
		p90 := durations[p90idx]
		p99 := durations[p99idx]

		fmt.Printf("%-20s %10d %10s %10s %10s %10s %10s %10s\n",
			backend, len(durations),
			formatDuration(min), formatDuration(avg), formatDuration(max),
			formatDuration(p50), formatDuration(p90), formatDuration(p99))
	}

	// Print backend wins statistics
	fmt.Printf("\nBackend First Response Wins:\n")
	fmt.Printf("%-20s %10s %10s\n", "Backend", "Wins", "Win %")
	fmt.Printf("%s\n", strings.Repeat("-", 42))

	totalWins := 0
	for _, wins := range sc.backendWins {
		totalWins += wins
	}

	// Sort backends by wins for consistent output
	type backendWin struct {
		backend string
		wins    int
	}
	var winList []backendWin
	for backend, wins := range sc.backendWins {
		winList = append(winList, backendWin{backend, wins})
	}
	sort.Slice(winList, func(i, j int) bool {
		return winList[i].wins > winList[j].wins
	})

	for _, bw := range winList {
		winPercentage := float64(bw.wins) / float64(totalWins) * 100
		fmt.Printf("%-20s %10d %9.1f%%\n", bw.backend, bw.wins, winPercentage)
	}

	// Print per-method statistics
	if len(sc.methodStats) > 0 {
		fmt.Printf("\nPer-Method Statistics (Primary Backend):\n")

		// Sort methods by name for consistent output
		methods := make([]string, 0, len(sc.methodStats))
		for method := range sc.methodStats {
			methods = append(methods, method)
		}
		sort.Strings(methods)

		for _, method := range methods {
			var durations []time.Duration
			var displayLabel string

			// If secondary backends are configured and we have actual user latency data, use that
			if sc.hasSecondaryBackends {
				if actualDurations, exists := sc.methodActualFirstResponseDurations[method]; exists && len(actualDurations) > 0 {
					durations = make([]time.Duration, len(actualDurations))
					copy(durations, actualDurations)
					displayLabel = method + " (User Latency)"
				} else {
					// Fall back to primary backend times if no actual latency data
					durations = sc.methodStats[method]
					displayLabel = method + " (Primary Backend)"
				}
			} else {
				// No secondary backends, use primary backend times
				durations = sc.methodStats[method]
				displayLabel = method
			}

			if len(durations) == 0 {
				continue
			}

			sort.Slice(durations, func(i, j int) bool {
				return durations[i] < durations[j]
			})

			var sum time.Duration
			for _, d := range durations {
				sum += d
			}

			avg := sum / time.Duration(len(durations))
			minDuration := durations[0]
			max := durations[len(durations)-1]

			// Only calculate percentiles if we have enough samples
			p50 := minDuration
			p90 := minDuration
			p99 := minDuration

			if len(durations) >= 2 {
				p50idx := len(durations) * 50 / 100
				p90idx := len(durations) * 90 / 100
				p99idx := minInt(len(durations)-1, len(durations)*99/100)

				p50 = durations[p50idx]
				p90 = durations[p90idx]
				p99 = durations[p99idx]
			}

			// Add CU information to the output
			cuPrice := sc.methodCUPrices[method]
			cuEarned := sc.methodCU[method]

			fmt.Printf("  %-50s Count: %-5d Avg: %-10s Min: %-10s Max: %-10s p50: %-10s p90: %-10s p99: %-10s CU: %d x %d = %d\n",
				displayLabel, len(durations),
				formatDuration(avg), formatDuration(minDuration), formatDuration(max),
				formatDuration(p50), formatDuration(p90), formatDuration(p99),
				cuPrice, len(durations), cuEarned)
		}
	}

	// Print per-method statistics for ALL backends
	if len(sc.backendMethodStats) > 0 {
		fmt.Printf("\nPer-Method Backend Comparison (Top 3 Methods):\n")

		// Collect all unique methods across all backends with their total counts
		methodCounts := make(map[string]int)
		for _, methods := range sc.backendMethodStats {
			for method, durations := range methods {
				methodCounts[method] += len(durations)
			}
		}

		// Sort methods by total count (descending)
		type methodCount struct {
			method string
			count  int
		}
		var methodList []methodCount
		for method, count := range methodCounts {
			methodList = append(methodList, methodCount{method, count})
		}
		sort.Slice(methodList, func(i, j int) bool {
			return methodList[i].count > methodList[j].count
		})

		// Only show top 3 methods
		maxMethods := 3
		if len(methodList) < maxMethods {
			maxMethods = len(methodList)
		}

		// For each of the top methods, show stats from all backends
		for i := 0; i < maxMethods; i++ {
			method := methodList[i].method

			fmt.Printf("\n  Method: %s (Total requests: %d)\n", method, methodList[i].count)

			// Check if this is a stateful method
			if isStatefulMethod(method) {
				fmt.Printf("  Note: Stateful method - only sent to primary backend\n")
			}

			// Show wins for this method if available
			if methodWins, exists := sc.methodBackendWins[method]; exists {
				fmt.Printf("  First Response Wins: ")
				totalMethodWins := 0
				for _, wins := range methodWins {
					totalMethodWins += wins
				}

				// Sort backends by wins for this method
				var methodWinList []backendWin
				for backend, wins := range methodWins {
					methodWinList = append(methodWinList, backendWin{backend, wins})
				}
				sort.Slice(methodWinList, func(i, j int) bool {
					return methodWinList[i].wins > methodWinList[j].wins
				})

				// Print wins inline
				for idx, bw := range methodWinList {
					if idx > 0 {
						fmt.Printf(", ")
					}
					winPercentage := float64(bw.wins) / float64(totalMethodWins) * 100
					fmt.Printf("%s: %d (%.1f%%)", bw.backend, bw.wins, winPercentage)
				}
				fmt.Printf("\n")
			}

			fmt.Printf("  %-20s %10s %10s %10s %10s %10s %10s %10s\n",
				"Backend", "Count", "Min", "Avg", "Max", "p50", "p90", "p99")
			fmt.Printf("  %s\n", strings.Repeat("-", 98))

			for _, backend := range backendNames {
				durations, exists := sc.backendMethodStats[backend][method]
				if !exists || len(durations) == 0 {
					continue
				}

				sort.Slice(durations, func(i, j int) bool {
					return durations[i] < durations[j]
				})

				var sum time.Duration
				for _, d := range durations {
					sum += d
				}

				avg := sum / time.Duration(len(durations))
				min := durations[0]
				max := durations[len(durations)-1]

				p50 := min
				p90 := min
				p99 := min

				if len(durations) >= 2 {
					p50idx := len(durations) * 50 / 100
					p90idx := len(durations) * 90 / 100
					p99idx := minInt(len(durations)-1, len(durations)*99/100)

					p50 = durations[p50idx]
					p90 = durations[p90idx]
					p99 = durations[p99idx]
				}

				fmt.Printf("  %-20s %10d %10s %10s %10s %10s %10s %10s\n",
					backend, len(durations),
					formatDuration(min), formatDuration(avg), formatDuration(max),
					formatDuration(p50), formatDuration(p90), formatDuration(p99))
			}

			// Show User Latency for this method if available
			if methodActualDurations, exists := sc.methodActualFirstResponseDurations[method]; exists && len(methodActualDurations) > 0 {
				// Make a copy and sort
				durations := make([]time.Duration, len(methodActualDurations))
				copy(durations, methodActualDurations)

				sort.Slice(durations, func(i, j int) bool {
					return durations[i] < durations[j]
				})

				var sum time.Duration
				for _, d := range durations {
					sum += d
				}

				avg := sum / time.Duration(len(durations))
				min := durations[0]
				max := durations[len(durations)-1]

				p50 := min
				p90 := min
				p99 := min

				if len(durations) >= 2 {
					p50idx := len(durations) * 50 / 100
					p90idx := len(durations) * 90 / 100
					p99idx := minInt(len(durations)-1, len(durations)*99/100)

					p50 = durations[p50idx]
					p90 = durations[p90idx]
					p99 = durations[p99idx]
				}

				fmt.Printf("  %-20s %10d %10s %10s %10s %10s %10s %10s\n",
					"User Latency", len(durations),
					formatDuration(min), formatDuration(avg), formatDuration(max),
					formatDuration(p50), formatDuration(p90), formatDuration(p99))
			}

			// Show Backend Time statistics for this method
			if methodFirstDurations, exists := sc.methodFirstResponseDurations[method]; exists && len(methodFirstDurations) > 0 {
				// Make a copy and sort
				durations := make([]time.Duration, len(methodFirstDurations))
				copy(durations, methodFirstDurations)

				sort.Slice(durations, func(i, j int) bool {
					return durations[i] < durations[j]
				})

				var sum time.Duration
				for _, d := range durations {
					sum += d
				}

				avg := sum / time.Duration(len(durations))
				min := durations[0]
				max := durations[len(durations)-1]

				p50 := min
				p90 := min
				p99 := min

				if len(durations) >= 2 {
					p50idx := len(durations) * 50 / 100
					p90idx := len(durations) * 90 / 100
					p99idx := minInt(len(durations)-1, len(durations)*99/100)

					p50 = durations[p50idx]
					p90 = durations[p90idx]
					p99 = durations[p99idx]
				}

				fmt.Printf("  %-20s %10d %10s %10s %10s %10s %10s %10s\n",
					"Backend Time", len(durations),
					formatDuration(min), formatDuration(avg), formatDuration(max),
					formatDuration(p50), formatDuration(p90), formatDuration(p99))
				fmt.Printf("  %s\n", strings.Repeat("-", 98))
			}
		}
	}

	fmt.Printf("================================\n\n")

	// Store current interval's CU data in historical data before resetting
	if sc.totalCU > 0 {
		sc.historicalCU = append(sc.historicalCU, CUDataPoint{
			Timestamp: time.Now(), // Store the end time of the interval
			CU:        sc.totalCU,
		})
	}

	// Clean up old historical data (keep only last 24 hours + some buffer)
	cutoff := time.Now().Add(-25 * time.Hour)
	newStart := 0
	for i, point := range sc.historicalCU {
		if point.Timestamp.After(cutoff) {
			newStart = i
			break
		}
	}
	if newStart > 0 {
		sc.historicalCU = sc.historicalCU[newStart:]
	}

	// Reset statistics for the next interval
	// Keep only the last 1000 requests to prevent unlimited memory growth
	if len(sc.requestStats) > 1000 {
		sc.requestStats = sc.requestStats[len(sc.requestStats)-1000:]
	}

	// Reset method-specific statistics
	for method := range sc.methodStats {
		sc.methodStats[method] = sc.methodStats[method][:0]
	}

	// Reset backend method-specific statistics
	for backend := range sc.backendMethodStats {
		for method := range sc.backendMethodStats[backend] {
			sc.backendMethodStats[backend][method] = sc.backendMethodStats[backend][method][:0]
		}
	}

	// Reset backend wins statistics
	sc.backendWins = make(map[string]int)
	sc.methodBackendWins = make(map[string]map[string]int)

	// Reset first response statistics
	if len(sc.firstResponseDurations) > 1000 {
		sc.firstResponseDurations = sc.firstResponseDurations[len(sc.firstResponseDurations)-1000:]
	} else {
		sc.firstResponseDurations = sc.firstResponseDurations[:0]
	}
	if len(sc.actualFirstResponseDurations) > 1000 {
		sc.actualFirstResponseDurations = sc.actualFirstResponseDurations[len(sc.actualFirstResponseDurations)-1000:]
	} else {
		sc.actualFirstResponseDurations = sc.actualFirstResponseDurations[:0]
	}
	for method := range sc.methodFirstResponseDurations {
		sc.methodFirstResponseDurations[method] = sc.methodFirstResponseDurations[method][:0]
	}
	for method := range sc.methodActualFirstResponseDurations {
		sc.methodActualFirstResponseDurations[method] = sc.methodActualFirstResponseDurations[method][:0]
	}

	// Reset CU counters for the next interval
	sc.totalCU = 0
	sc.methodCU = make(map[string]int)

	// Reset error count for the next interval
	sc.errorCount = 0
	sc.skippedSecondaryRequests = 0
	sc.totalRequests = 0
	sc.totalWsConnections = 0

	// Reset WebSocket connections to prevent memory leak
	sc.wsConnections = sc.wsConnections[:0]

	// Reset the interval start time for the next interval
	sc.intervalStartTime = time.Now()
}

// Helper function to avoid potential index out of bounds
func minInt(a, b int) int {
	if a < b {
		return a
	}
	return b
}

// calculateCUForTimeWindow calculates total CU for a given time window
func (sc *StatsCollector) calculateCUForTimeWindow(window time.Duration) (int, bool) {
	now := time.Now()
	cutoff := now.Add(-window)

	totalCU := 0
	var oldestDataStartTime time.Time
	hasData := false

	// Add current interval's CU if it started within the window
	if sc.intervalStartTime.After(cutoff) {
		totalCU += sc.totalCU
		oldestDataStartTime = sc.intervalStartTime
		hasData = true
	}

	// Add historical CU data within the window
	// Historical timestamps represent the END of intervals
	for i := len(sc.historicalCU) - 1; i >= 0; i-- {
		point := sc.historicalCU[i]
		// Calculate the start time of this historical interval
		intervalStart := point.Timestamp.Add(-sc.summaryInterval)

		// Skip if the interval ended before our cutoff
		if point.Timestamp.Before(cutoff) {
			break
		}

		// Include this interval's CU
		totalCU += point.CU

		// Track the oldest data start time
		if !hasData || intervalStart.Before(oldestDataStartTime) {
			oldestDataStartTime = intervalStart
		}
		hasData = true
	}

	// Calculate actual data span
	var actualDataDuration time.Duration
	if hasData {
		actualDataDuration = now.Sub(oldestDataStartTime)
	}

	// We need extrapolation if we don't have enough historical data to cover the window
	needsExtrapolation := hasData && actualDataDuration < window

	return totalCU, needsExtrapolation
}

// extrapolateCU extrapolates CU data when there's insufficient historical data
func (sc *StatsCollector) extrapolateCU(actualCU int, actualDuration, targetDuration time.Duration) int {
	if actualDuration <= 0 {
		return 0
	}

	// Calculate CU per second rate
	cuPerSecond := float64(actualCU) / actualDuration.Seconds()

	// Extrapolate to target duration
	extrapolatedCU := cuPerSecond * targetDuration.Seconds()

	return int(extrapolatedCU)
}

// formatCUWithExtrapolation formats CU value with extrapolation indicator
func formatCUWithExtrapolation(cu int, isExtrapolated bool) string {
	if isExtrapolated {
		return fmt.Sprintf("%d CU (extrapolated)", cu)
	}
	return fmt.Sprintf("%d CU", cu)
}

// GetPrimaryP50 calculates the current p50 latency for the primary backend
func (sc *StatsCollector) GetPrimaryP50() time.Duration {
	sc.mu.Lock()
	defer sc.mu.Unlock()

	// Collect primary backend durations
	var primaryDurations []time.Duration
	for _, stat := range sc.requestStats {
		if stat.Backend == "primary" && stat.Error == nil {
			primaryDurations = append(primaryDurations, stat.Duration)
		}
	}

	// If we don't have enough data, return a sensible default
	if len(primaryDurations) < 10 {
		return 10 * time.Millisecond // Default to 10ms
	}

	// Sort and find p50
	sort.Slice(primaryDurations, func(i, j int) bool {
		return primaryDurations[i] < primaryDurations[j]
	})

	p50idx := len(primaryDurations) * 50 / 100
	return primaryDurations[p50idx]
}

// GetPrimaryP75ForMethod calculates the current p75 latency for a specific method on the primary backend
func (sc *StatsCollector) GetPrimaryP75ForMethod(method string) time.Duration {
	sc.mu.Lock()
	defer sc.mu.Unlock()

	// Get method-specific durations for primary backend
	if durations, exists := sc.methodStats[method]; exists && len(durations) >= 5 {
		// Make a copy to avoid modifying the original
		durationsCopy := make([]time.Duration, len(durations))
		copy(durationsCopy, durations)

		// Sort and find p75
		sort.Slice(durationsCopy, func(i, j int) bool {
			return durationsCopy[i] < durationsCopy[j]
		})

		p75idx := len(durationsCopy) * 75 / 100
		if p75idx >= len(durationsCopy) {
			p75idx = len(durationsCopy) - 1
		}
		return durationsCopy[p75idx]
	}

	// If we don't have enough method-specific data, calculate global p75 here
	// (instead of calling GetPrimaryP50 which would cause nested mutex lock)
	var primaryDurations []time.Duration
	for _, stat := range sc.requestStats {
		if stat.Backend == "primary" && stat.Error == nil {
			primaryDurations = append(primaryDurations, stat.Duration)
		}
	}

	// If we don't have enough data, return a sensible default
	if len(primaryDurations) < 10 {
		return 15 * time.Millisecond // Default to 15ms for p75
	}

	// Sort and find p75
	sort.Slice(primaryDurations, func(i, j int) bool {
		return primaryDurations[i] < primaryDurations[j]
	})

	p75idx := len(primaryDurations) * 75 / 100
	if p75idx >= len(primaryDurations) {
		p75idx = len(primaryDurations) - 1
	}
	return primaryDurations[p75idx]
}

// GetPrimaryP50ForMethod calculates the current p50 latency for a specific method on the primary backend
func (sc *StatsCollector) GetPrimaryP50ForMethod(method string) time.Duration {
	sc.mu.Lock()
	defer sc.mu.Unlock()

	// Get method-specific durations for primary backend
	if durations, exists := sc.methodStats[method]; exists && len(durations) >= 5 {
		// Make a copy to avoid modifying the original
		durationsCopy := make([]time.Duration, len(durations))
		copy(durationsCopy, durations)

		// Sort and find p50
		sort.Slice(durationsCopy, func(i, j int) bool {
			return durationsCopy[i] < durationsCopy[j]
		})

		p50idx := len(durationsCopy) * 50 / 100
		return durationsCopy[p50idx]
	}

	// If we don't have enough method-specific data, calculate global p50 here
	// (instead of calling GetPrimaryP50 which would cause nested mutex lock)
	var primaryDurations []time.Duration
	for _, stat := range sc.requestStats {
		if stat.Backend == "primary" && stat.Error == nil {
			primaryDurations = append(primaryDurations, stat.Duration)
		}
	}

	// If we don't have enough data, return a sensible default
	if len(primaryDurations) < 10 {
		return 10 * time.Millisecond // Default to 10ms
	}

	// Sort and find p50
	sort.Slice(primaryDurations, func(i, j int) bool {
		return primaryDurations[i] < primaryDurations[j]
	})

	p50idx := len(primaryDurations) * 50 / 100
	return primaryDurations[p50idx]
}

// isStatefulMethod returns true if the method requires session state and must always go to primary
func isStatefulMethod(method string) bool {
	statefulMethods := map[string]bool{
		// Filter methods - these create server-side state
		"eth_newFilter":                   true,
		"eth_newBlockFilter":              true,
		"eth_newPendingTransactionFilter": true,
		"eth_getFilterChanges":            true,
		"eth_getFilterLogs":               true,
		"eth_uninstallFilter":             true,

		// Subscription methods (WebSocket) - maintain persistent connections
		"eth_subscribe":    true,
		"eth_unsubscribe":  true,
		"eth_subscription": true, // Notification method

		// Some debug/trace methods might maintain state depending on implementation
		// But these are typically stateless, so not included here
	}

	return statefulMethods[method]
}

// isExpensiveMethod returns true if the method is computationally expensive and should be offloaded to secondary
func isExpensiveMethod(method string) bool {
	expensiveMethods := map[string]bool{
		// Debug methods - typically very expensive
		"debug_traceBlockByHash":            true,
		"debug_traceBlockByNumber":          true,
		"debug_traceCall":                   true,
		"debug_traceTransaction":            true,
		"debug_storageRangeAt":              true,
		"debug_getModifiedAccountsByHash":   true,
		"debug_getModifiedAccountsByNumber": true,

		// Trace methods - expensive computation
		"trace_block":                   true,
		"trace_call":                    true,
		"trace_callMany":                true,
		"trace_filter":                  true,
		"trace_get":                     true,
		"trace_rawTransaction":          true,
		"trace_replayBlockTransactions": true,
		"trace_replayTransaction":       true,
		"trace_transaction":             true,

		// VM trace variants - extremely expensive
		"trace_replayBlockTransactions#vmTrace": true,
		"trace_replayTransaction#vmTrace":       true,
	}

	return expensiveMethods[method]
}

// isExpensiveMethodByStats determines if a method is expensive based on actual performance statistics
func (sc *StatsCollector) isExpensiveMethodByStats(method string) bool {
	sc.mu.Lock()
	defer sc.mu.Unlock()

	// Need sufficient data to make a determination
	const minSamplesForAnalysis = 10
	const expensiveThresholdMultiplier = 5.0

	// Get all method durations for primary backend to calculate average
	var allDurations []time.Duration
	totalMethods := 0

	// Collect all durations from primary backend across all methods
	if methodStats, exists := sc.backendMethodStats["primary"]; exists {
		for _, durations := range methodStats {
			if len(durations) >= minSamplesForAnalysis {
				allDurations = append(allDurations, durations...)
				totalMethods++
			}
		}
	}

	// Need sufficient methods and samples for meaningful analysis
	if len(allDurations) < minSamplesForAnalysis*3 || totalMethods < 3 {
		return false // Not enough data, be conservative
	}

	// Calculate overall average latency across all methods on primary
	var totalDuration time.Duration
	for _, duration := range allDurations {
		totalDuration += duration
	}
	averageLatency := totalDuration / time.Duration(len(allDurations))

	// Get this method's durations from primary backend
	var methodDurations []time.Duration
	if methodStats, exists := sc.backendMethodStats["primary"]; exists {
		if durations, methodExists := methodStats[method]; methodExists {
			methodDurations = durations
		}
	}

	// Need sufficient samples for this specific method
	if len(methodDurations) < minSamplesForAnalysis {
		return false // Not enough data for this method
	}

	// Find minimum duration for this method (best case performance)
	minDuration := methodDurations[0]
	for _, duration := range methodDurations {
		if duration < minDuration {
			minDuration = duration
		}
	}

	// Method is expensive if its minimum time is >= 5x the average
	expensiveThreshold := time.Duration(float64(averageLatency) * expensiveThresholdMultiplier)

	return minDuration >= expensiveThreshold
}

// hasAvailableSecondaryAtChainHead checks if there are synchronized and available secondary backends
func hasAvailableSecondaryAtChainHead(backends []Backend, blockHeightTracker *BlockHeightTracker, secondaryProbe *SecondaryProbe) bool {
	if blockHeightTracker == nil {
		return false
	}

	for _, backend := range backends {
		if backend.Role == "secondary" && !blockHeightTracker.isSecondaryBehind(backend.Name) {
			// Also check if the backend is available (not marked unavailable due to errors)
			if secondaryProbe != nil && !secondaryProbe.isBackendAvailable(backend.Name) {
				continue // Skip unavailable backends
			}
			return true
		}
	}
	return false
}

// selectBestSecondaryForExpensiveMethod intelligently selects which secondary backend to use for expensive queries
// It prefers backends whose probe latency doesn't exceed half the minimum response time for the expensive method
func (sc *StatsCollector) selectBestSecondaryForExpensiveMethod(method string, backends []Backend,
	blockHeightTracker *BlockHeightTracker, secondaryProbe *SecondaryProbe) *Backend {

	if blockHeightTracker == nil || secondaryProbe == nil {
		return nil
	}

	// Get available synchronized secondary backends
	var availableSecondaries []Backend
	for _, backend := range backends {
		if backend.Role == "secondary" &&
			!blockHeightTracker.isSecondaryBehind(backend.Name) &&
			secondaryProbe.isBackendAvailable(backend.Name) {
			availableSecondaries = append(availableSecondaries, backend)
		}
	}

	if len(availableSecondaries) == 0 {
		return nil
	}

	// If only one secondary available, use it
	if len(availableSecondaries) == 1 {
		return &availableSecondaries[0]
	}

	// Get minimum response time for this expensive method on primary
	sc.mu.Lock()
	var methodMinTime time.Duration
	if methodStats, exists := sc.backendMethodStats["primary"]; exists {
		if durations, methodExists := methodStats[method]; methodExists && len(durations) > 0 {
			methodMinTime = durations[0]
			for _, duration := range durations {
				if duration < methodMinTime {
					methodMinTime = duration
				}
			}
		}
	}
	sc.mu.Unlock()

	// If we don't have stats for this method, use any available secondary
	if methodMinTime == 0 {
		return &availableSecondaries[0]
	}

	// Calculate the threshold: probe latency should not exceed half the expensive method's min time
	probeLatencyThreshold := methodMinTime / 2

	// Evaluate secondary backends based on their probe latencies
	type secondaryCandidate struct {
		backend      Backend
		probeLatency time.Duration
		isAcceptable bool
	}

	var candidates []secondaryCandidate

	secondaryProbe.mu.RLock()
	for _, backend := range availableSecondaries {
		probeLatency := secondaryProbe.minResponseTime // Default to overall minimum

		// Use backend-specific timing if available
		if backendTiming, exists := secondaryProbe.backendTimings[backend.Name]; exists {
			probeLatency = backendTiming
		}

		candidate := secondaryCandidate{
			backend:      backend,
			probeLatency: probeLatency,
			isAcceptable: probeLatency <= probeLatencyThreshold,
		}
		candidates = append(candidates, candidate)
	}
	secondaryProbe.mu.RUnlock()

	// Sort candidates by probe latency (ascending)
	sort.Slice(candidates, func(i, j int) bool {
		return candidates[i].probeLatency < candidates[j].probeLatency
	})

	// Strategy: Use the slowest backend among those that meet the criteria
	// This preserves faster backends for quicker queries
	var selectedCandidate *secondaryCandidate

	// First, try to find acceptable candidates (probe latency <= threshold)
	for i := len(candidates) - 1; i >= 0; i-- {
		if candidates[i].isAcceptable {
			selectedCandidate = &candidates[i]
			break // Take the slowest acceptable one
		}
	}

	// If no acceptable candidates, fall back to the slowest available
	if selectedCandidate == nil {
		selectedCandidate = &candidates[len(candidates)-1]
	}

	return &selectedCandidate.backend
}

// flushingResponseWriter wraps http.ResponseWriter to flush after every write
type flushingResponseWriter struct {
	http.ResponseWriter
	flusher http.Flusher
}

func (f *flushingResponseWriter) Write(p []byte) (n int, err error) {
	n, err = f.ResponseWriter.Write(p)
	if err == nil && n > 0 {
		f.flusher.Flush()
	}
	return
}

func main() {
	// Get configuration from environment variables
	listenAddr := getEnv("LISTEN_ADDR", ":8080")
	primaryBackend := getEnv("PRIMARY_BACKEND", "http://localhost:8545")
	secondaryBackendsStr := getEnv("SECONDARY_BACKENDS", "")
	summaryIntervalStr := getEnv("SUMMARY_INTERVAL", "60")        // Default 60 seconds
	enableDetailedLogs := getEnv("ENABLE_DETAILED_LOGS", "false") // Default to disabled

	// Secondary probe configuration
	enableSecondaryProbing := getEnv("ENABLE_SECONDARY_PROBING", "true") == "true"
	probeIntervalStr := getEnv("PROBE_INTERVAL", "10")   // Default 10 seconds
	minDelayBufferStr := getEnv("MIN_DELAY_BUFFER", "2") // Default 2ms buffer
	probeMethodsStr := getEnv("PROBE_METHODS", "eth_blockNumber,net_version,eth_chainId")

	// Block height tracking configuration
	enableBlockHeightTracking := getEnv("ENABLE_BLOCK_HEIGHT_TRACKING", "true") == "true"
	blockHeightCheckIntervalStr := getEnv("BLOCK_HEIGHT_CHECK_INTERVAL", "5") // Default 5 seconds (unused for WebSocket)
	maxBlockBehindStr := getEnv("MAX_BLOCKS_BEHIND", "1")                     // Default 1 block behind

	// Expensive method routing configuration
	enableExpensiveMethodRouting := getEnv("ENABLE_EXPENSIVE_METHOD_ROUTING", "true") == "true"

	summaryInterval, err := strconv.Atoi(summaryIntervalStr)
	if err != nil {
		log.Printf("Invalid SUMMARY_INTERVAL, using default of 60 seconds")
		summaryInterval = 60
	}

	probeInterval, err := strconv.Atoi(probeIntervalStr)
	if err != nil {
		log.Printf("Invalid PROBE_INTERVAL, using default of 10 seconds")
		probeInterval = 10
	}

	minDelayBuffer, err := strconv.Atoi(minDelayBufferStr)
	if err != nil {
		log.Printf("Invalid MIN_DELAY_BUFFER, using default of 2ms")
		minDelayBuffer = 2
	}

	blockHeightCheckInterval, err := strconv.Atoi(blockHeightCheckIntervalStr)
	if err != nil {
		log.Printf("Invalid BLOCK_HEIGHT_CHECK_INTERVAL, using default of 5 seconds")
		blockHeightCheckInterval = 5
	}

	maxBlocksBehind, err := strconv.ParseUint(maxBlockBehindStr, 10, 64)
	if err != nil {
		log.Printf("Invalid MAX_BLOCKS_BEHIND, using default of 1 block")
		maxBlocksBehind = 1
	}

	// Create stats collector for periodic summaries
	statsCollector := NewStatsCollector(time.Duration(summaryInterval)*time.Second, secondaryBackendsStr != "")

	// Configure backends
	var backends []Backend
	backends = append(backends, Backend{
		URL:  primaryBackend,
		Name: "primary",
		Role: "primary",
	})

	if secondaryBackendsStr != "" {
		secondaryList := strings.Split(secondaryBackendsStr, ",")
		for i, url := range secondaryList {
			backends = append(backends, Backend{
				URL:  strings.TrimSpace(url),
				Name: fmt.Sprintf("secondary-%d", i+1),
				Role: "secondary",
			})
		}
	}

	log.Printf("Starting benchmark proxy on %s", listenAddr)
	log.Printf("Primary backend: %s", primaryBackend)
	log.Printf("Secondary backends: %s", secondaryBackendsStr)
	if enableSecondaryProbing && secondaryBackendsStr != "" {
		log.Printf("Secondary probing: enabled (interval: %ds, buffer: %dms)", probeInterval, minDelayBuffer)
	}
	if enableExpensiveMethodRouting && secondaryBackendsStr != "" {
		log.Printf("Expensive method routing: enabled (trace/debug calls prefer secondary backends)")
	}

	// Set up HTTP client with reasonable timeouts
	client := &http.Client{
		Timeout: 30 * time.Second,
		Transport: &http.Transport{
			MaxIdleConns:        200,               // Increased for better connection pooling
			MaxIdleConnsPerHost: 50,                // Increased per-host limit for probing
			IdleConnTimeout:     120 * time.Second, // Longer idle timeout for connection reuse
			DisableCompression:  true,              // Typically JSON-RPC doesn't benefit from compression
			DisableKeepAlives:   false,             // Ensure keep-alives are enabled
			MaxConnsPerHost:     50,                // Limit concurrent connections per host
		},
	}

	// Initialize secondary probe if enabled and we have secondary backends
	var secondaryProbe *SecondaryProbe
	if enableSecondaryProbing && secondaryBackendsStr != "" {
		probeMethods := strings.Split(probeMethodsStr, ",")
		for i := range probeMethods {
			probeMethods[i] = strings.TrimSpace(probeMethods[i])
		}

		secondaryProbe = NewSecondaryProbe(
			backends,
			client,
			time.Duration(probeInterval)*time.Second,
			time.Duration(minDelayBuffer)*time.Millisecond,
			probeMethods,
			enableDetailedLogs == "true",
		)

		if secondaryProbe == nil {
			log.Printf("Secondary probe initialization failed - no secondary backends found")
		} else {
			// Set the probe in stats collector for display
			statsCollector.SetSecondaryProbe(secondaryProbe)
		}
	}

	// Initialize block height tracker if enabled and we have secondary backends
	var blockHeightTracker *BlockHeightTracker
	if enableBlockHeightTracking && secondaryBackendsStr != "" {
		blockHeightTracker = NewBlockHeightTracker(
			backends,
			client,
			time.Duration(blockHeightCheckInterval)*time.Second,
			maxBlocksBehind,
			enableDetailedLogs == "true",
		)

		if blockHeightTracker == nil {
			log.Printf("Block height tracker initialization failed")
		} else {
			log.Printf("Block height tracking: enabled (WebSocket-based, max blocks behind: %d)",
				maxBlocksBehind)
			// Set the tracker in stats collector for display
			statsCollector.SetBlockHeightTracker(blockHeightTracker)
		}
	}

	// Configure websocket upgrader with larger buffer sizes
	// 20MB frame size and 50MB message size
	const (
		maxFrameSize   = 20 * 1024 * 1024 // 20MB
		maxMessageSize = 50 * 1024 * 1024 // 50MB
	)

	upgrader := websocket.Upgrader{
		ReadBufferSize:  maxFrameSize,
		WriteBufferSize: maxFrameSize,
		// Allow all origins
		CheckOrigin: func(r *http.Request) bool { return true },
	}

	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
		// Check if this is a WebSocket upgrade request
		if websocket.IsWebSocketUpgrade(r) {
			handleWebSocketRequest(w, r, backends, client, &upgrader, statsCollector)
		} else {
			// Handle regular HTTP request
			handleRequest(w, r, backends, client, enableDetailedLogs == "true", statsCollector, secondaryProbe, blockHeightTracker, enableExpensiveMethodRouting)
		}
	})

	log.Fatal(http.ListenAndServe(listenAddr, nil))
}

func handleRequest(w http.ResponseWriter, r *http.Request, backends []Backend, client *http.Client, enableDetailedLogs bool, statsCollector *StatsCollector, secondaryProbe *SecondaryProbe, blockHeightTracker *BlockHeightTracker, enableExpensiveMethodRouting bool) {
	startTime := time.Now()

	// Create a context that will cancel after 35 seconds (5s buffer over backend timeout)
	ctx, cancel := context.WithTimeout(r.Context(), 35*time.Second)
	defer cancel()

	// Limit request body size to 10MB to prevent memory exhaustion
	const maxBodySize = 10 * 1024 * 1024 // 10MB
	r.Body = http.MaxBytesReader(w, r.Body, maxBodySize)

	// Read the entire request body
	body, err := io.ReadAll(r.Body)
	if err != nil {
		// Check if the error is due to body size limit
		if strings.Contains(err.Error(), "request body too large") {
			http.Error(w, "Request body too large (max 10MB)", http.StatusRequestEntityTooLarge)
		} else {
			http.Error(w, "Error reading request body", http.StatusBadRequest)
		}
		return
	}
	defer r.Body.Close()

	// Parse request to extract method information (handles both single and batch)
	batchInfo, err := parseBatchInfo(body)
	if err != nil {
		http.Error(w, "Invalid JSON-RPC request", http.StatusBadRequest)
		return
	}

	// For logging and stats, use the first method or "batch" for batch requests
	var displayMethod string
	var isStateful bool

	if batchInfo.IsBatch {
		displayMethod = fmt.Sprintf("batch[%d]", batchInfo.RequestCount)
		isStateful = batchInfo.HasStateful
	} else {
		displayMethod = batchInfo.Methods[0]
		if displayMethod == "" {
			displayMethod = "unknown"
		}
		isStateful = batchInfo.HasStateful
	}

	// Get delay threshold for secondary backends
	var secondaryDelay time.Duration
	if batchInfo.IsBatch {
		// For batch requests, use the maximum delay of all methods
		secondaryDelay = calculateBatchDelay(batchInfo.Methods, secondaryProbe, statsCollector)
	} else {
		// For single requests, use method-specific delay
		method := batchInfo.Methods[0]
		if secondaryProbe != nil {
			secondaryDelay = secondaryProbe.getDelayForMethod(method)
		} else {
			secondaryDelay = statsCollector.GetPrimaryP75ForMethod(method)
		}
	}

	if enableDetailedLogs {
		if batchInfo.IsBatch {
			log.Printf("Batch request: %d requests, methods: %s, max delay: %s (probe-based: %v)",
				batchInfo.RequestCount, formatMethodList(batchInfo.Methods),
				secondaryDelay, secondaryProbe != nil)
		} else {
			log.Printf("Method: %s, Secondary delay: %s (probe-based: %v)",
				displayMethod, secondaryDelay, secondaryProbe != nil)
		}
	}

	// Process backends with adaptive delay strategy
	var wg sync.WaitGroup
	var primaryWg sync.WaitGroup // Separate wait group for primary backend
	statsChan := make(chan ResponseStats, len(backends))
	responseChan := make(chan struct {
		backend string
		resp    *http.Response
		err     error
	}, len(backends))

	// Track if we've already sent a response
	var responseHandled atomic.Bool
	var firstBackendStartTime atomic.Pointer[time.Time]
	primaryResponseChan := make(chan struct{}, 1) // Signal when primary gets a response
	primaryFailedFast := make(chan struct{}, 1)   // Signal when primary fails immediately

	// Check if this is an expensive method that should be offloaded to secondary
	var isExpensive bool
	var hasExpensiveMethod bool
	if batchInfo.IsBatch {
		// Check if any method in the batch is expensive based on stats
		for _, method := range batchInfo.Methods {
			if statsCollector.isExpensiveMethodByStats(method) {
				hasExpensiveMethod = true
				if enableDetailedLogs {
					log.Printf("Stats-based expensive method detected in batch: %s", method)
				}
				break
			}
		}
	} else {
		hasExpensiveMethod = statsCollector.isExpensiveMethodByStats(batchInfo.Methods[0])
		if hasExpensiveMethod && enableDetailedLogs {
			log.Printf("Stats-based expensive method detected: %s", batchInfo.Methods[0])
		}
	}
	isExpensive = hasExpensiveMethod

	// For expensive methods, intelligently select the best secondary backend
	var preferSecondary bool
	var selectedSecondary *Backend
	if enableExpensiveMethodRouting && isExpensive && !isStateful {
		// Determine the method name for selection (use first method for batches)
		methodForSelection := displayMethod
		if batchInfo.IsBatch && len(batchInfo.Methods) > 0 {
			methodForSelection = batchInfo.Methods[0]
		}

		selectedSecondary = statsCollector.selectBestSecondaryForExpensiveMethod(
			methodForSelection, backends, blockHeightTracker, secondaryProbe)

		if selectedSecondary != nil {
			preferSecondary = true
			if enableDetailedLogs {
				log.Printf("Expensive method detected (%s), selected secondary backend: %s",
					displayMethod, selectedSecondary.Name)
			}
		}
	}

	for _, backend := range backends {
		// Skip secondary backends for stateful methods
		if isStateful && backend.Role != "primary" {
			continue
		}

		// Skip secondary backends if they're behind in block height
		if backend.Role != "primary" && blockHeightTracker != nil {
			if blockHeightTracker.isSecondaryBehind(backend.Name) {
				if enableDetailedLogs {
					blockStatus := blockHeightTracker.getBlockHeightStatus()
					log.Printf("Skipping secondary backend %s - behind in block height: %v", backend.Name, blockStatus)
				}
				// Record that we skipped this secondary backend due to block height
				statsChan <- ResponseStats{
					Backend:  backend.Name,
					Error:    fmt.Errorf("skipped - behind in block height"),
					Method:   displayMethod,
					Duration: 0, // No actual request made
				}
				continue
			}
		}

		// Skip secondary backends if they're marked as unavailable due to errors
		if backend.Role != "primary" && secondaryProbe != nil {
			if !secondaryProbe.isBackendAvailable(backend.Name) {
				if enableDetailedLogs {
					log.Printf("Skipping secondary backend %s - marked as unavailable due to errors", backend.Name)
				}
				// Record that we skipped this secondary backend due to availability
				statsChan <- ResponseStats{
					Backend:  backend.Name,
					Error:    fmt.Errorf("skipped - marked as unavailable"),
					Method:   displayMethod,
					Duration: 0, // No actual request made
				}
				continue
			}
		}

		// For expensive methods, only use the selected secondary backend
		if preferSecondary && backend.Role != "primary" {
			if selectedSecondary == nil || backend.Name != selectedSecondary.Name {
				// Skip this secondary - it's not the selected one for expensive method
				if enableDetailedLogs {
					log.Printf("Skipping secondary backend %s - not selected for expensive method %s",
						backend.Name, displayMethod)
				}
				statsChan <- ResponseStats{
					Backend:  backend.Name,
					Error:    fmt.Errorf("skipped - not selected for expensive method"),
					Method:   displayMethod,
					Duration: 0,
				}
				continue
			}
		}

		// For expensive methods, skip primary backend if we prefer secondary and have good ones
		if preferSecondary && backend.Role == "primary" {
			// Still add primary to the pool but with a delay to let secondaries try first
			if enableDetailedLogs {
				log.Printf("Delaying primary backend for expensive method %s to allow secondary backends priority", displayMethod)
			}
		}

		wg.Add(1)
		if backend.Role == "primary" {
			primaryWg.Add(1)
		}

		go func(b Backend) {
			defer wg.Done()
			if b.Role == "primary" {
				defer primaryWg.Done()
			}

			// Track when this goroutine actually starts processing
			goroutineStartTime := time.Now()

			// Record the first backend start time (should be primary)
			if b.Role == "primary" {
				t := goroutineStartTime
				firstBackendStartTime.Store(&t)
			}

			// Delay logic based on backend role and method type
			if b.Role != "primary" {
				// Secondary backend: apply normal delay logic unless it's an expensive method
				if !preferSecondary {
					// Normal delay for secondary backends
					delayTimer := time.NewTimer(secondaryDelay)
					select {
					case <-delayTimer.C:
						// Timer expired, primary is slow, proceed with secondary request
					case <-primaryResponseChan:
						// Primary already got a response, skip secondary
						delayTimer.Stop()

						// Still record that we skipped this backend
						statsChan <- ResponseStats{
							Backend:  b.Name,
							Error:    fmt.Errorf("skipped - primary responded quickly"),
							Method:   displayMethod,
							Duration: time.Since(goroutineStartTime),
						}
						return
					case <-primaryFailedFast:
						// Primary failed immediately, start secondary now
						delayTimer.Stop()
						if enableDetailedLogs {
							log.Printf("Primary failed fast for %s, starting secondary immediately", displayMethod)
						}
					}
				} else {
					// Expensive method: secondary backends start immediately
					if enableDetailedLogs {
						log.Printf("Starting secondary backend %s immediately for expensive method %s", b.Name, displayMethod)
					}
				}
			} else if preferSecondary {
				// Primary backend: delay for expensive methods to give secondaries priority
				expensiveMethodDelay := 50 * time.Millisecond // Give secondaries a head start
				delayTimer := time.NewTimer(expensiveMethodDelay)
				select {
				case <-delayTimer.C:
					// Timer expired, proceed with primary request
					if enableDetailedLogs {
						log.Printf("Starting primary backend after delay for expensive method %s", displayMethod)
					}
				case <-primaryFailedFast:
					// If something went wrong, start immediately
					delayTimer.Stop()
				}
			}

			// Create a new request (no longer using context for cancellation)
			backendReq, err := http.NewRequest(r.Method, b.URL, bytes.NewReader(body))
			if err != nil {
				statsChan <- ResponseStats{
					Backend:  b.Name,
					Error:    err,
					Method:   displayMethod,
					Duration: time.Since(goroutineStartTime), // Include any wait time
				}
				return
			}

			// Copy headers
			for name, values := range r.Header {
				for _, value := range values {
					backendReq.Header.Add(name, value)
				}
			}

			// Send the request
			reqStart := time.Now()
			resp, err := client.Do(backendReq)
			reqDuration := time.Since(reqStart)

			if err != nil {
				// If primary failed with connection error, signal secondary to start
				if b.Role == "primary" {
					select {
					case primaryFailedFast <- struct{}{}:
					default:
					}
				}

				statsChan <- ResponseStats{
					Backend:  b.Name,
					Duration: reqDuration, // Keep backend-specific duration
					Error:    err,
					Method:   displayMethod,
				}
				return
			}

			// Don't close resp.Body here - it will be closed by the winner or drained by losers

			// Check if primary returned an error status
			if b.Role == "primary" && resp.StatusCode >= 400 {
				// Any 4xx or 5xx error should trigger immediate secondary
				select {
				case primaryFailedFast <- struct{}{}:
				default:
				}
			}

			// Signal primary response immediately for secondary backends to check
			if b.Role == "primary" && resp.StatusCode < 400 {
				select {
				case primaryResponseChan <- struct{}{}:
				default:
					// Channel already has a signal
				}
			}

			statsChan <- ResponseStats{
				Backend:    b.Name,
				StatusCode: resp.StatusCode,
				Duration:   reqDuration, // This is the backend-specific duration
				Method:     displayMethod,
			}

			// Try to be the first to respond
			// Primary backend always wins, secondary only wins with successful responses
			shouldWin := false
			if b.Role == "primary" {
				// Primary backend always tries to win (whether success or error)
				shouldWin = responseHandled.CompareAndSwap(false, true)
			} else {
				// Secondary backend only tries to win if response is successful
				if resp.StatusCode < 400 {
					shouldWin = responseHandled.CompareAndSwap(false, true)
				} else {
					// Secondary backend has error status, don't try to win
					if enableDetailedLogs {
						log.Printf("Secondary backend %s returned error status %d, not using response", b.Name, resp.StatusCode)
					}
				}
			}

			if shouldWin {
				responseChan <- struct {
					backend string
					resp    *http.Response
					err     error
				}{b.Name, resp, nil}
			} else {
				// Not the winning response, need to drain and close the body
				// Use a goroutine with timeout to prevent hanging
				go func() {
					defer resp.Body.Close()
					// Create a deadline for draining
					done := make(chan struct{})
					go func() {
						io.Copy(io.Discard, resp.Body)
						close(done)
					}()

					select {
					case <-done:
						// Drained successfully
					case <-time.After(5 * time.Second):
						// Timeout draining, just close
						if enableDetailedLogs {
							log.Printf("Timeout draining response from backend %s", b.Name)
						}
					}
				}()
			}
		}(backend)
	}

	// Wait for the first successful response
	var response struct {
		backend string
		resp    *http.Response
		err     error
	}
	var responseReceivedTime time.Time

	select {
	case response = <-responseChan:
		// Got a response
		responseReceivedTime = time.Now()
	case <-time.After(30 * time.Second):
		// Timeout
		if !responseHandled.CompareAndSwap(false, true) {
			// Someone else handled it
			response = <-responseChan
			responseReceivedTime = time.Now()
		} else {
			http.Error(w, "Timeout waiting for any backend", http.StatusGatewayTimeout)
			// Always wait for primary backend to complete before collecting stats
			go func() {
				primaryWg.Wait() // Wait for primary first
				wg.Wait()        // Then wait for all
				close(statsChan)
			}()
			// Collect stats
			var stats []ResponseStats
			for stat := range statsChan {
				stats = append(stats, stat)
			}
			return
		}
	}

	// Send the response to the client
	if response.err == nil && response.resp != nil {
		defer response.resp.Body.Close()

		// Copy response headers
		for name, values := range response.resp.Header {
			for _, value := range values {
				w.Header().Add(name, value)
			}
		}

		// Ensure we flush data as it arrives for better streaming
		flusher, canFlush := w.(http.Flusher)

		w.WriteHeader(response.resp.StatusCode)

		// Track when streaming started
		streamingStartTime := time.Now()

		// Stream the response body to the client with proper error handling
		done := make(chan error, 1)
		go func() {
			// Use a custom writer that flushes periodically
			var err error
			if canFlush {
				// Flush every 32KB for better streaming performance
				flushingWriter := &flushingResponseWriter{
					ResponseWriter: w,
					flusher:        flusher,
				}
				_, err = io.Copy(flushingWriter, response.resp.Body)
			} else {
				_, err = io.Copy(w, response.resp.Body)
			}
			done <- err
		}()

		select {
		case streamErr := <-done:
			if streamErr != nil {
				if enableDetailedLogs {
					log.Printf("Error streaming response body: %v", streamErr)
				}
				// Connection is likely broken, can't send error to client
			}
		case <-ctx.Done():
			// Context timeout - client connection might be gone
			if enableDetailedLogs {
				streamingDuration := time.Since(streamingStartTime)
				totalRequestDuration := time.Since(startTime)

				// Determine if it was a timeout or cancellation
				reason := "timeout"
				if ctx.Err() == context.Canceled {
					reason = "client disconnection"
				}

				log.Printf("Context cancelled while streaming response from backend '%s' (method: %s) after streaming for %s (total request time: %s) - reason: %s",
					response.backend, displayMethod, streamingDuration, totalRequestDuration, reason)
			}
		}
	} else {
		// No valid response received from any backend
		http.Error(w, "All backends failed", http.StatusBadGateway)
	}

	// Collect stats asynchronously to avoid blocking the response
	go func() {
		// Always wait for primary backend to complete before collecting stats
		// This ensures primary backend stats are always included
		primaryWg.Wait() // Wait for primary backend to complete first
		wg.Wait()        // Then wait for all other backends
		close(statsChan)

		// Collect stats
		var stats []ResponseStats
		for stat := range statsChan {
			stats = append(stats, stat)
		}

		// Log response times if enabled
		totalDuration := time.Since(startTime)
		if enableDetailedLogs {
			logResponseStats(totalDuration, stats)
		}

		// Add the actual user-experienced duration for the winning response
		if response.err == nil && response.backend != "" {
			// Find the stat for the winning backend and update it with the actual user-experienced duration
			for i := range stats {
				if stats[i].Backend == response.backend && stats[i].Error == nil {
					// Calculate user latency from when the first backend started processing
					var userLatency time.Duration
					if firstStart := firstBackendStartTime.Load(); firstStart != nil && !responseReceivedTime.IsZero() {
						userLatency = responseReceivedTime.Sub(*firstStart)
					} else {
						// Fallback to original calculation if somehow we don't have the times
						userLatency = time.Since(startTime)
					}

					// Create a special stat entry for the actual first response time
					actualFirstResponseStat := ResponseStats{
						Backend:    "actual-first-response",
						StatusCode: stats[i].StatusCode,
						Duration:   userLatency,
						Error:      nil,
						Method:     stats[i].Method,
					}
					stats = append(stats, actualFirstResponseStat)
					break
				}
			}
		}

		// Send stats to collector
		statsCollector.AddStats(stats, 0)

		// If this was a successful batch request from primary, add batch stats for CU calculation
		if batchInfo.IsBatch && response.err == nil && response.backend == "primary" {
			// Find the primary backend stat with successful response
			for _, stat := range stats {
				if stat.Backend == "primary" && stat.Error == nil {
					statsCollector.AddBatchStats(batchInfo.Methods, stat.Duration, "primary")
					break
				}
			}
		}
	}()

	// Return immediately after sending response to client
	return
}

func logResponseStats(totalDuration time.Duration, stats []ResponseStats) {
	// Format: timestamp | total_time | method | backend1:time1 | backend2:time2 | ...
	var parts []string
	parts = append(parts, time.Now().Format("2006-01-02 15:04:05.000"))
	parts = append(parts, fmt.Sprintf("total:%s", totalDuration))

	// Add method if available (use the first stat with a method)
	method := "unknown"
	for _, stat := range stats {
		if stat.Method != "" {
			method = stat.Method
			break
		}
	}
	parts = append(parts, fmt.Sprintf("method:%s", method))

	for _, stat := range stats {
		if stat.Error != nil {
			parts = append(parts, fmt.Sprintf("%s:error:%s", stat.Backend, stat.Error))
		} else {
			parts = append(parts, fmt.Sprintf("%s:%d:%s", stat.Backend, stat.StatusCode, stat.Duration))
		}
	}

	fmt.Println(strings.Join(parts, " | "))
}

// handleWebSocketRequest manages WebSocket proxying
func handleWebSocketRequest(w http.ResponseWriter, r *http.Request, backends []Backend,
	httpClient *http.Client, upgrader *websocket.Upgrader,
	statsCollector *StatsCollector) {
	// Upgrade the client connection
	clientConn, err := upgrader.Upgrade(w, r, nil)
	if err != nil {
		log.Printf("Failed to upgrade client connection: %v", err)
		return
	}
	defer clientConn.Close()

	// Set max message size for client connection
	clientConn.SetReadLimit(50 * 1024 * 1024) // 50MB message size limit

	// Connect to all backends
	var wg sync.WaitGroup
	primaryConnected := make(chan bool, 1) // Track if primary connected successfully

	for _, backend := range backends {
		wg.Add(1)
		go func(b Backend) {
			defer wg.Done()

			// Create backend URL with ws/wss instead of http/https
			backendURL := strings.Replace(b.URL, "http://", "ws://", 1)
			backendURL = strings.Replace(backendURL, "https://", "wss://", 1)

			// Create a clean header map for the dialer
			header := http.Header{}

			// Only copy non-WebSocket headers
			if origin := r.Header.Get("Origin"); origin != "" {
				header.Set("Origin", origin)
			}
			if userAgent := r.Header.Get("User-Agent"); userAgent != "" {
				header.Set("User-Agent", userAgent)
			}

			startTime := time.Now()
			// Connect to backend WebSocket with larger buffer sizes
			dialer := &websocket.Dialer{
				ReadBufferSize:  20 * 1024 * 1024, // 20MB
				WriteBufferSize: 20 * 1024 * 1024, // 20MB
			}
			backendConn, resp, err := dialer.Dial(backendURL, header)
			connectDuration := time.Since(startTime)

			stats := WebSocketStats{
				Backend:     b.Name,
				ConnectTime: connectDuration,
				IsActive:    false,
			}

			if err != nil {
				status := 0
				if resp != nil {
					status = resp.StatusCode
				}
				log.Printf("Failed to connect to backend %s: %v (status: %d)", b.Name, err, status)
				stats.Error = err
				statsCollector.AddWebSocketStats(stats)

				// If primary failed to connect, signal that
				if b.Role == "primary" {
					select {
					case primaryConnected <- false:
					default:
					}
				}
				return
			}
			defer backendConn.Close()

			// Set max message size for backend connection
			backendConn.SetReadLimit(50 * 1024 * 1024) // 50MB message size limit

			stats.IsActive = true
			statsCollector.AddWebSocketStats(stats)

			// If this is the primary backend, signal successful connection
			if b.Role == "primary" {
				select {
				case primaryConnected <- true:
				default:
				}
			}

			// If this is the primary backend, set up bidirectional proxying
			if b.Role == "primary" {
				// Channel to signal when primary connection fails
				primaryFailed := make(chan struct{}, 2) // Buffered for 2 signals

				// Forward messages from client to primary backend
				go func() {
					for {
						messageType, message, err := clientConn.ReadMessage()
						if err != nil {
							log.Printf("Error reading from client: %v", err)
							select {
							case primaryFailed <- struct{}{}:
							default:
							}
							return
						}

						err = backendConn.WriteMessage(messageType, message)
						if err != nil {
							log.Printf("Error writing to primary backend: %v", err)
							select {
							case primaryFailed <- struct{}{}:
							default:
							}
							return
						}
					}
				}()

				// Forward messages from primary backend to client
				go func() {
					for {
						messageType, message, err := backendConn.ReadMessage()
						if err != nil {
							log.Printf("Error reading from primary backend: %v", err)
							select {
							case primaryFailed <- struct{}{}:
							default:
							}
							return
						}

						err = clientConn.WriteMessage(messageType, message)
						if err != nil {
							log.Printf("Error writing to client: %v", err)
							select {
							case primaryFailed <- struct{}{}:
							default:
							}
							return
						}
					}
				}()

				// Wait for primary connection failure
				<-primaryFailed

				// Primary backend failed, close client connection with proper close message
				log.Printf("Primary backend WebSocket failed, closing client connection")
				closeMsg := websocket.FormatCloseMessage(websocket.CloseGoingAway,
					"Primary backend unavailable")
				clientConn.WriteControl(websocket.CloseMessage, closeMsg,
					time.Now().Add(time.Second))

				// Return to trigger cleanup
				return
			} else {
				// For secondary backends, just read and discard messages
				for {
					_, _, err := backendConn.ReadMessage()
					if err != nil {
						log.Printf("Secondary backend %s connection closed: %v", b.Name, err)
						return
					}
				}
			}
		}(backend)
	}

	// Wait for all connections to terminate
	wg.Wait()

	// Check if primary connected successfully
	select {
	case connected := <-primaryConnected:
		if !connected {
			// Primary failed to connect, close client connection
			log.Printf("Primary backend WebSocket failed to connect, closing client connection")
			closeMsg := websocket.FormatCloseMessage(websocket.CloseServiceRestart,
				"Primary backend unavailable")
			clientConn.WriteControl(websocket.CloseMessage, closeMsg,
				time.Now().Add(time.Second))
		}
	default:
		// No primary backend in the configuration (shouldn't happen)
		log.Printf("Warning: No primary backend configured for WebSocket")
	}
}

func getEnv(key, fallback string) string {
	if value, exists := os.LookupEnv(key); exists {
		return value
	}
	return fallback
}

// isBackendAvailable checks if a backend is currently marked as available
func (sp *SecondaryProbe) isBackendAvailable(backendName string) bool {
	sp.mu.RLock()
	defer sp.mu.RUnlock()

	available, exists := sp.backendAvailable[backendName]
	return exists && available
}

// getAvailableBackends returns a list of currently available backend names
func (sp *SecondaryProbe) getAvailableBackends() []string {
	sp.mu.RLock()
	defer sp.mu.RUnlock()

	var available []string
	for name, isAvailable := range sp.backendAvailable {
		if isAvailable {
			available = append(available, name)
		}
	}
	return available
}

// updateBackendHealth updates the health status of a backend based on probe results
func (sp *SecondaryProbe) updateBackendHealth(backendName string, isSuccess bool) {
	// This method should be called with the mutex already held (write lock)

	if isSuccess {
		// Reset error count and update last success time
		sp.backendErrorCount[backendName] = 0
		sp.backendLastSuccess[backendName] = time.Now()

		// If backend was marked unavailable, check if we should mark it available again
		if !sp.backendAvailable[backendName] {
			// For now, mark available immediately on first success
			// Could be enhanced to require multiple consecutive successes
			sp.backendAvailable[backendName] = true
			if sp.enableDetailedLogs {
				log.Printf("Backend %s marked as AVAILABLE (recovered from errors)", backendName)
			}
		}
	} else {
		// Increment error count
		sp.backendErrorCount[backendName]++

		// Check if we should mark backend as unavailable
		if sp.backendAvailable[backendName] && sp.backendErrorCount[backendName] >= sp.maxErrorThreshold {
			sp.backendAvailable[backendName] = false
			if sp.enableDetailedLogs {
				log.Printf("Backend %s marked as UNAVAILABLE (consecutive errors: %d)",
					backendName, sp.backendErrorCount[backendName])
			}
		}
	}
}