From 51c432ae7813fbb2b9f66e3df0533a05fd7dc2d7 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 13 Feb 2022 20:49:01 +0100 Subject: [PATCH] refactor the compose files in modules and start to generate prometheus scrape targets. added haproxy. --- docker-compose.bsc.yml | 29 +++++++ docker-compose.monitoring.yml | 104 +++++++++++++++++++++++++ docker-compose.traefik.yml | 46 +++++++++++ haproxy/Dockerfile | 7 ++ haproxy/ethereum-healthcheck.sh | 25 ++++++ haproxy/haproxy.cfg.example | 83 ++++++++++++++++++++ prometheus/prometheus.autodiscover.yml | 34 ++++++++ 7 files changed, 328 insertions(+) create mode 100644 docker-compose.bsc.yml create mode 100644 docker-compose.monitoring.yml create mode 100644 docker-compose.traefik.yml create mode 100644 haproxy/Dockerfile create mode 100755 haproxy/ethereum-healthcheck.sh create mode 100644 haproxy/haproxy.cfg.example create mode 100644 prometheus/prometheus.autodiscover.yml diff --git a/docker-compose.bsc.yml b/docker-compose.bsc.yml new file mode 100644 index 00000000..26f6122b --- /dev/null +++ b/docker-compose.bsc.yml @@ -0,0 +1,29 @@ +version: '3.1' + +services: + + binance-smart-chain: + image: dysnix/bsc + expose: + - "8545" + - "8546" + - "30303" + volumes: + - /root/bsc-datadir/server1/dataseed:/datadir + - /root/bsc-config/config.toml:/config.toml + command: "--cache 18000 --rpc --rpcaddr=\"0.0.0.0\" --rpc.allow-unprotected-txs --txlookuplimit 0 --syncmode snap --config /config.toml --datadir /datadir" + restart: unless-stopped + networks: + - chains + labels: + - "traefik.enable=true" + - "traefik.http.middlewares.ipwhitelist.ipwhitelist.sourcerange=$WHITELIST" + - "traefik.http.middlewares.bsc-stripprefix.stripprefix.prefixes=/bsc" + - "traefik.http.services.bsc.loadbalancer.server.port=8545" + - "traefik.http.routers.bsc.entrypoints=websecure" + - "traefik.http.routers.bsc.tls.certresolver=myresolver" + - "traefik.http.routers.bsc.rule=Host(`$DOMAIN`) && PathPrefix(`/bsc`)" + - "traefik.http.routers.bsc.middlewares=avalanche-stripprefix, ipwhitelist" + + + diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml new file mode 100644 index 00000000..a3058fe3 --- /dev/null +++ b/docker-compose.monitoring.yml @@ -0,0 +1,104 @@ +version: '3.1' + +services: + +### WIREGUARD + + wireguard: + image: lscr.io/linuxserver/wireguard + container_name: wireguard + healthcheck: + test: [ "CMD", "ping", "-c", "1", "10.13.13.1" ] + timeout: 10s + interval: 5s + retries: 3 + cap_add: + - NET_ADMIN + - SYS_MODULE + environment: + - PUID=1000 + - PGID=1000 + volumes: + - ./wireguard/config/wg0.conf:/config/wg0.conf + - /lib/modules:/lib/modules + # Expose prometheus port + expose: + - 9090 + ports: + - 51820:51820/udp + sysctls: + - net.ipv4.conf.all.src_valid_mark=1 + restart: unless-stopped + + +### MONITORING + + prometheus: + image: prom/prometheus:v2.25.0 + container_name: prometheus + volumes: + - ./prometheus/prometheus.autodiscover.yml:/etc/prometheus/prometheus.yml + - prometheus_data:/prometheus + - prometheus-docker-sd:/prometheus-docker-sd:rw + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--storage.tsdb.retention.time=200h' + - '--web.enable-lifecycle' + restart: unless-stopped + network_mode: "service:wireguard" + labels: + org.label-schema.group: "monitoring" + depends_on: + - wireguard + + prometheus-docker-sd: + image: "stucky/prometheus-docker-sd:latest" + restart: unless-stopped + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - prometheus-docker-sd:/prometheus-docker-sd:rw + + nodeexporter: + image: prom/node-exporter:v1.2.2 + container_name: nodeexporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.rootfs=/rootfs' + - '--path.sysfs=/host/sys' + - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' + restart: unless-stopped + expose: + - 9100 + labels: + org.label-schema.group: "monitoring" + + cadvisor: + image: gcr.io/cadvisor/cadvisor:v0.42.0 + container_name: cadvisor + privileged: true + devices: + - /dev/kmsg:/dev/kmsg + volumes: + - /:/rootfs:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker:/var/lib/docker:ro + #- /cgroup:/cgroup:ro #doesn't work on MacOS only for Linux + restart: unless-stopped + expose: + - 8080 + labels: + org.label-schema.group: "monitoring" + +### VOLUMES + +volumes: + prometheus_data: + prometheus-docker-sd: diff --git a/docker-compose.traefik.yml b/docker-compose.traefik.yml new file mode 100644 index 00000000..50d3e9fd --- /dev/null +++ b/docker-compose.traefik.yml @@ -0,0 +1,46 @@ +version: '3.1' + +services: + + traefik: + image: traefik:latest + container_name: traefik + restart: always + expose: + - "8082" + ports: + - "443:443" + - "127.0.0.1:8080:8080" + command: + - "--api=true" + - "--api.insecure=true" + - "--api.dashboard=true" + - "--log.level=DEBUG" + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.websecure.address=:443" + - "--entryPoints.metrics.address=:8082" + - "--metrics.prometheus.entryPoint=metrics" + - "--certificatesresolvers.myresolver.acme.tlschallenge=true" + # TESTING + # - "--certificatesresolvers.myresolver.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory" + - "--certificatesresolvers.myresolver.acme.email=$MAIL" + - "--certificatesresolvers.myresolver.acme.storage=/letsencrypt/acme.json" + volumes: + - "./traefik/letsencrypt:/letsencrypt" + - "/var/run/docker.sock:/var/run/docker.sock:ro" + networks: + - chains + #labels: + # - "traefik.enable=true" + # - "prometheus-scrape.enabled=true" + # - "prometheus-scrape.port=8082" + # - "prometheus-scrape.job_name=traefik" + + +networks: + chains: + driver: bridge + ipam: + config: + - subnet: 192.168.0.1/27 \ No newline at end of file diff --git a/haproxy/Dockerfile b/haproxy/Dockerfile new file mode 100644 index 00000000..d1826f27 --- /dev/null +++ b/haproxy/Dockerfile @@ -0,0 +1,7 @@ +FROM haproxy:alpine + +USER root +RUN apk add --no-cache curl bash jq +USER haproxy + +CMD ["haproxy", "-f", "/usr/local/etc/haproxy/haproxy.cfg"] diff --git a/haproxy/ethereum-healthcheck.sh b/haproxy/ethereum-healthcheck.sh new file mode 100755 index 00000000..f830fa32 --- /dev/null +++ b/haproxy/ethereum-healthcheck.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# $1 = Virtual Service IP (VIP) +# $2 = Virtual Service Port (VPT) +# $3 = Real Server IP (RIP) +# $4 = Real Server Port (RPT) +# $5 = Check Source IP + +PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +VIP=$1 +VPT=$2 +RIP=$3 +RPT=$4 +# RPT=8545 + +# Run curl with appropriate options +curl -s -X POST -H "Content-Type: application/json" -m 2 -d '{"jsonrpc":"2.0","method":"eth_syncing","params": [],"id":1}' https://$HAPROXY_SERVER_NAME:$RPT/$HAPROXY_PROXY_NAME 2>/dev/null | jq '.result' -r | grep -q false +exit1=$? + +peers=$(curl -s -X POST -H "Content-Type: application/json" -m 2 -d '{"jsonrpc":"2.0","method":"net_peerCount","params": [],"id":1}' https://$HAPROXY_SERVER_NAME:$RPT/$HAPROXY_PROXY_NAME 2>/dev/null | jq '.result' -r) + +# If any of the above tests failed, then exit 1. +if [[ "$exit1" -ne 0 ]]; then exit 1; fi +if [[ `printf "%d" $peers` == "0" || `printf "%d" $peers` == "1" ]]; then exit 1; fi +exit 0 diff --git a/haproxy/haproxy.cfg.example b/haproxy/haproxy.cfg.example new file mode 100644 index 00000000..85c6f0a4 --- /dev/null +++ b/haproxy/haproxy.cfg.example @@ -0,0 +1,83 @@ +global + + #nbthread 2 + cpu-map auto:1/1-2 0-1 + + log /dev/log local0 + log /dev/log local1 notice + #chroot /var/lib/haproxy + user haproxy + group haproxy + daemon + external-check + insecure-fork-wanted + +defaults + mode http + log global + option httplog + option http-keep-alive + option dontlognull + option redispatch + option contstats + retries 3 + backlog 10000 + timeout client 50s + timeout connect 5s + timeout server 50s + timeout tunnel 3600s + timeout http-keep-alive 2s + timeout http-request 15s + timeout queue 30s + timeout tarpit 60s + default-server inter 3s rise 2 fall 3 + option forwardfor + + +listen stats + bind *:9600 + stats enable + stats uri /stats + stats realm Haproxy\ Statistics + stats auth pocket:P@ssw0rd00! + +frontend stats2 + bind *:8404 + #option http-use-htx + http-request use-service prometheus-exporter if { path /metrics } + stats enable + stats uri /stats + stats refresh 10s + +frontend rpc-frontend + bind *:80 + acl host_is_erigon path_beg /erigon + #acl host_is_goerli path_beg /goerli + #acl host_is_avalanche path_beg /avalanche + #acl host_is_ropsten path_beg /ropsten + #acl host_is_geth path_beg /geth + #acl host_is_rinkeby path_beg /rinkeby + + use_backend erigon if host_is_erigon + #use_backend goerli if host_is_goerli + #use_backend avalanche if host_is_avalanche + #use_backend ropsten if host_is_ropsten + #use_backend geth if host_is_geth + #use_backend rinkeby if host_is_rinkeby + + default_backend backend-no-match + +backend backend-no-match + http-request deny deny_status 400 + +backend erigon + mode http + balance roundrobin + + option external-check + external-check path "/usr/bin:/bin" + external-check command /usr/local/etc/haproxy/ethereum-healthcheck.sh + http-send-name-header Host + + server rpc-de-1.stakesquid-db.ml rpc-de-1.stakesquid-db.ml:443 check inter 10000 fall 3 rise 2 maxconn 2000 ssl verify none + server rpc-fi-1.stakesquid-db.ml rpc-fi-1.stakesquid-db.ml:443 check inter 10000 fall 3 rise 2 maxconn 2000 ssl verify none backup diff --git a/prometheus/prometheus.autodiscover.yml b/prometheus/prometheus.autodiscover.yml new file mode 100644 index 00000000..1e5f561a --- /dev/null +++ b/prometheus/prometheus.autodiscover.yml @@ -0,0 +1,34 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'docker-host-alpha' + +# Load and evaluate rules in this file every 'evaluation_interval' seconds. +rule_files: + - "alert.rules" + +# A scrape configuration containing exactly one endpoint to scrape. +scrape_configs: + - job_name: 'nodeexporter' + scrape_interval: 5s + static_configs: + - targets: ['nodeexporter:9100'] + + - job_name: 'cadvisor' + scrape_interval: 5s + static_configs: + - targets: ['cadvisor:8080'] + + - job_name: 'prometheus' + scrape_interval: 10s + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'service_discovery' + file_sd_configs: + - files: + - /prometheus-docker-sd/docker-targets.json