From f2b91ac1c0ae0a19010b2d4bcda3b60a784924c9 Mon Sep 17 00:00:00 2001
From: Para Dox <armhole.limeade_0l@icloud.com>
Date: Mon, 26 May 2025 01:58:33 +0700
Subject: [PATCH] done

---
 benchmark-proxy/main.go | 132 +++++++++++++++++++++++++++++++++++++++-
 reth.Dockerfile         |  25 ++++++++
 2 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/benchmark-proxy/main.go b/benchmark-proxy/main.go
index d631832b..7402b1f3 100644
--- a/benchmark-proxy/main.go
+++ b/benchmark-proxy/main.go
@@ -48,6 +48,12 @@ type WebSocketStats struct {
 	MessagesReceived int
 }
 
+// CUDataPoint represents a historical CU data point with timestamp
+type CUDataPoint struct {
+	Timestamp time.Time
+	CU        int
+}
+
 // StatsCollector maintains statistics for periodic summaries
 type StatsCollector struct {
 	mu                 sync.Mutex
@@ -62,6 +68,7 @@ type StatsCollector struct {
 	methodCUPrices     map[string]int // Map of method names to CU prices
 	totalCU            int            // Total CU earned
 	methodCU           map[string]int // Track CU earned per method
+	historicalCU       []CUDataPoint  // Historical CU data for different time windows
 }
 
 func NewStatsCollector(summaryInterval time.Duration) *StatsCollector {
@@ -72,6 +79,7 @@ func NewStatsCollector(summaryInterval time.Duration) *StatsCollector {
 		summaryInterval: summaryInterval,
 		methodCUPrices:  initCUPrices(), // Initialize CU prices
 		methodCU:        make(map[string]int),
+		historicalCU:    make([]CUDataPoint, 0, 2000), // Store up to ~24 hours of 1-minute intervals
 	}
 
 	// Start the periodic summary goroutine
@@ -266,7 +274,49 @@ func (sc *StatsCollector) printSummary() {
 	fmt.Printf("Total HTTP Requests: %d\n", sc.totalRequests)
 	fmt.Printf("Total WebSocket Connections: %d\n", sc.totalWsConnections)
 	fmt.Printf("Error Rate: %.2f%%\n", float64(sc.errorCount)/float64(sc.totalRequests+sc.totalWsConnections)*100)
-	fmt.Printf("Total Compute Units Earned: %d CU\n", sc.totalCU)
+	fmt.Printf("Total Compute Units Earned (current interval): %d CU\n", sc.totalCU)
+
+	// Calculate and display CU for different time windows
+	timeWindows := []struct {
+		duration time.Duration
+		label    string
+	}{
+		{10 * time.Minute, "Last 10 minutes"},
+		{1 * time.Hour, "Last hour"},
+		{3 * time.Hour, "Last 3 hours"},
+		{24 * time.Hour, "Last 24 hours"},
+	}
+
+	fmt.Printf("\nHistorical Compute Units:\n")
+	for _, window := range timeWindows {
+		actualCU, needsExtrapolation := sc.calculateCUForTimeWindow(window.duration)
+
+		if needsExtrapolation {
+			// Calculate actual data duration for extrapolation
+			now := time.Now()
+			cutoff := now.Add(-window.duration)
+			actualDuration := time.Duration(0)
+
+			// Check current interval
+			if sc.startTime.After(cutoff) {
+				actualDuration = now.Sub(sc.startTime)
+			}
+
+			// Check historical data
+			for i := len(sc.historicalCU) - 1; i >= 0; i-- {
+				point := sc.historicalCU[i]
+				if point.Timestamp.Before(cutoff) {
+					break
+				}
+				actualDuration = now.Sub(point.Timestamp)
+			}
+
+			extrapolatedCU := sc.extrapolateCU(actualCU, actualDuration, window.duration)
+			fmt.Printf("  %s: %s\n", window.label, formatCUWithExtrapolation(extrapolatedCU, true))
+		} else {
+			fmt.Printf("  %s: %s\n", window.label, formatCUWithExtrapolation(actualCU, false))
+		}
+	}
 
 	// Calculate response time statistics for primary backend
 	var primaryDurations []time.Duration
@@ -366,6 +416,27 @@ func (sc *StatsCollector) printSummary() {
 
 	fmt.Printf("================================\n\n")
 
+	// Store current interval's CU data in historical data before resetting
+	if sc.totalCU > 0 {
+		sc.historicalCU = append(sc.historicalCU, CUDataPoint{
+			Timestamp: time.Now(),
+			CU:        sc.totalCU,
+		})
+	}
+
+	// Clean up old historical data (keep only last 24 hours + some buffer)
+	cutoff := time.Now().Add(-25 * time.Hour)
+	newStart := 0
+	for i, point := range sc.historicalCU {
+		if point.Timestamp.After(cutoff) {
+			newStart = i
+			break
+		}
+	}
+	if newStart > 0 {
+		sc.historicalCU = sc.historicalCU[newStart:]
+	}
+
 	// Reset statistics for the next interval
 	// Keep only the last 1000 requests to prevent unlimited memory growth
 	if len(sc.requestStats) > 1000 {
@@ -398,6 +469,65 @@ func minInt(a, b int) int {
 	return b
 }
 
+// calculateCUForTimeWindow calculates total CU for a given time window
+func (sc *StatsCollector) calculateCUForTimeWindow(window time.Duration) (int, bool) {
+	now := time.Now()
+	cutoff := now.Add(-window)
+
+	totalCU := 0
+	actualDataDuration := time.Duration(0)
+
+	// First add the current interval's CU if it's within the window
+	if sc.startTime.After(cutoff) {
+		totalCU += sc.totalCU
+		actualDataDuration = now.Sub(sc.startTime)
+	}
+
+	// Add historical CU data within the window
+	for i := len(sc.historicalCU) - 1; i >= 0; i-- {
+		point := sc.historicalCU[i]
+		if point.Timestamp.Before(cutoff) {
+			break // Data is too old
+		}
+		totalCU += point.CU
+
+		// Update actual data duration
+		if actualDataDuration == 0 {
+			actualDataDuration = now.Sub(point.Timestamp)
+		} else {
+			actualDataDuration = now.Sub(point.Timestamp)
+		}
+	}
+
+	// Check if we need extrapolation
+	needsExtrapolation := actualDataDuration < window && actualDataDuration > 0
+
+	return totalCU, needsExtrapolation
+}
+
+// extrapolateCU extrapolates CU data when there's insufficient historical data
+func (sc *StatsCollector) extrapolateCU(actualCU int, actualDuration, targetDuration time.Duration) int {
+	if actualDuration <= 0 {
+		return 0
+	}
+
+	// Calculate CU per second rate
+	cuPerSecond := float64(actualCU) / actualDuration.Seconds()
+
+	// Extrapolate to target duration
+	extrapolatedCU := cuPerSecond * targetDuration.Seconds()
+
+	return int(extrapolatedCU)
+}
+
+// formatCUWithExtrapolation formats CU value with extrapolation indicator
+func formatCUWithExtrapolation(cu int, isExtrapolated bool) string {
+	if isExtrapolated {
+		return fmt.Sprintf("%d CU (extrapolated)", cu)
+	}
+	return fmt.Sprintf("%d CU", cu)
+}
+
 func main() {
 	// Get configuration from environment variables
 	listenAddr := getEnv("LISTEN_ADDR", ":8080")
diff --git a/reth.Dockerfile b/reth.Dockerfile
index cfba1776..2669e09b 100644
--- a/reth.Dockerfile
+++ b/reth.Dockerfile
@@ -174,26 +174,51 @@ RUN --mount=type=cache,target=/usr/local/cargo/registry \
         RUSTFLAGS="-C target-cpu=znver4 -C link-arg=-fuse-ld=/usr/local/bin/mold -C opt-level=3 -C llvm-args=-enable-machine-outliner" \
         CFLAGS="$CFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=512" \
         CXXFLAGS="$CXXFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=512"; \
+    elif [ "$ARCH_TARGET" = "multinode-zen4-pinned" ] || [ "$ARCH_TARGET" = "multinode-7950x-pinned" ]; then \
+        # Optimized for multiple nodes on Zen4/7950X with CPU pinning - dedicated cores
+        RUSTFLAGS="-C target-cpu=znver4 -C link-arg=-fuse-ld=/usr/local/bin/mold -C opt-level=3 -C llvm-args=-enable-machine-outliner -C llvm-args=-enable-gvn-hoist -C llvm-args=-slp-vectorize-hor-store" \
+        CFLAGS="$CFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=1024 --param l3-cache-size=32768" \
+        CXXFLAGS="$CXXFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=1024 --param l3-cache-size=32768"; \
     elif [ "$ARCH_TARGET" = "multinode-7900" ]; then \
         # Optimized for multiple nodes on AMD Ryzen 9 7900 (12 cores) - reduced cache assumptions
         RUSTFLAGS="-C target-cpu=znver4 -C link-arg=-fuse-ld=/usr/local/bin/mold -C opt-level=3 -C llvm-args=-enable-machine-outliner" \
         CFLAGS="$CFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=512" \
         CXXFLAGS="$CXXFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=512"; \
+    elif [ "$ARCH_TARGET" = "multinode-7900-pinned" ]; then \
+        # Optimized for multiple nodes on AMD Ryzen 9 7900 with CPU pinning - dedicated cores
+        RUSTFLAGS="-C target-cpu=znver4 -C link-arg=-fuse-ld=/usr/local/bin/mold -C opt-level=3 -C llvm-args=-enable-machine-outliner -C llvm-args=-enable-gvn-hoist -C llvm-args=-slp-vectorize-hor-store" \
+        CFLAGS="$CFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=1024 --param l3-cache-size=32768" \
+        CXXFLAGS="$CXXFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=1024 --param l3-cache-size=32768"; \
     elif [ "$ARCH_TARGET" = "multinode-zen5" ] || [ "$ARCH_TARGET" = "multinode-9950x" ]; then \
         # Optimized for multiple nodes on same machine - reduced cache assumptions
         RUSTFLAGS="-C target-cpu=znver5 -C link-arg=-fuse-ld=/usr/local/bin/mold -C opt-level=3 -C llvm-args=-enable-machine-outliner" \
         CFLAGS="$CFLAGS_BASE -march=znver5 -mtune=znver5 --param l1-cache-line-size=64 --param l1-cache-size=48 --param l2-cache-size=512" \
         CXXFLAGS="$CXXFLAGS_BASE -march=znver5 -mtune=znver5 --param l1-cache-line-size=64 --param l1-cache-size=48 --param l2-cache-size=512"; \
+    elif [ "$ARCH_TARGET" = "multinode-zen5-pinned" ] || [ "$ARCH_TARGET" = "multinode-9950x-pinned" ]; then \
+        # Optimized for multiple nodes on Zen5/9950X with CPU pinning - dedicated cores
+        RUSTFLAGS="-C target-cpu=znver5 -C link-arg=-fuse-ld=/usr/local/bin/mold -C opt-level=3 -C llvm-args=-enable-machine-outliner -C llvm-args=-enable-gvn-hoist -C llvm-args=-enable-dfa-jump-thread -C llvm-args=-slp-vectorize-hor-store" \
+        CFLAGS="$CFLAGS_BASE -march=znver5 -mtune=znver5 --param l1-cache-line-size=64 --param l1-cache-size=48 --param l2-cache-size=2048 --param l3-cache-size=32768" \
+        CXXFLAGS="$CXXFLAGS_BASE -march=znver5 -mtune=znver5 --param l1-cache-line-size=64 --param l1-cache-size=48 --param l2-cache-size=2048 --param l3-cache-size=32768"; \
     elif [ "$ARCH_TARGET" = "multinode-epyc" ]; then \
         # Optimized for multiple nodes on EPYC systems - assume shared resources
         RUSTFLAGS="-C target-cpu=znver4 -C link-arg=-fuse-ld=/usr/local/bin/mold -C opt-level=3" \
         CFLAGS="$CFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=256" \
         CXXFLAGS="$CXXFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=256"; \
+    elif [ "$ARCH_TARGET" = "multinode-epyc-pinned" ]; then \
+        # Optimized for multiple nodes on EPYC systems with CPU pinning - dedicated cores
+        RUSTFLAGS="-C target-cpu=znver4 -C link-arg=-fuse-ld=/usr/local/bin/mold -C opt-level=3 -C llvm-args=-enable-machine-outliner -C llvm-args=-enable-gvn-hoist -C llvm-args=-slp-vectorize-hor-store -C llvm-args=-data-sections -C llvm-args=-function-sections" \
+        CFLAGS="$CFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=1024 --param l3-cache-size=16384" \
+        CXXFLAGS="$CXXFLAGS_BASE -march=znver4 -mtune=znver4 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=1024 --param l3-cache-size=16384"; \
     elif [ "$ARCH_TARGET" = "multinode-generic" ]; then \
         # Generic multinode optimization - conservative cache assumptions
         RUSTFLAGS="-C target-cpu=x86-64-v3 -C link-arg=-fuse-ld=/usr/local/bin/mold -C opt-level=3" \
         CFLAGS="$CFLAGS_BASE -march=x86-64-v3 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=256" \
         CXXFLAGS="$CXXFLAGS_BASE -march=x86-64-v3 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=256"; \
+    elif [ "$ARCH_TARGET" = "multinode-generic-pinned" ]; then \
+        # Generic multinode optimization with CPU pinning - dedicated cores
+        RUSTFLAGS="-C target-cpu=x86-64-v3 -C link-arg=-fuse-ld=/usr/local/bin/mold -C opt-level=3 -C llvm-args=-enable-machine-outliner" \
+        CFLAGS="$CFLAGS_BASE -march=x86-64-v3 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=512 --param l3-cache-size=8192" \
+        CXXFLAGS="$CXXFLAGS_BASE -march=x86-64-v3 --param l1-cache-line-size=64 --param l1-cache-size=32 --param l2-cache-size=512 --param l3-cache-size=8192"; \
     else \
         RUSTFLAGS="-C target-cpu=$ARCH_TARGET -C link-arg=-fuse-ld=/usr/local/bin/mold -C opt-level=3" \
         CFLAGS="$CFLAGS_BASE -march=$ARCH_TARGET" \