Skip to content

Commit b28affe

Browse files
authored
[tmpnet] Misc cleanup for monitoring tooling (#3527)
1 parent 6217810 commit b28affe

File tree

5 files changed

+53
-31
lines changed

5 files changed

+53
-31
lines changed

scripts/configure-local-metrics-collection.sh

+6-1
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,9 @@ echo "Wrote promtail configuration to ${PROMTAIL_CONFIG_FILE}"
5151

5252
echo "Metrics collection by prometheus can be started with ./scripts/run_prometheus.sh"
5353
echo "Log collection by promtail can be started with ./scripts/run_promtail.sh"
54-
echo "Grafana link: https://grafana-poc.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?var-filter=network_uuid%7C%3D%7C${METRICS_UUID}"
54+
55+
GRAFANA_LINK="https://grafana-poc.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?var-filter=network_uuid%7C%3D%7C${METRICS_UUID}"
56+
METRICS_PATH="${HOME}/.avalanchego/metrics.txt"
57+
echo "${GRAFANA_LINK}" > "${METRICS_PATH}"
58+
echo "Metrics and logs can be viewed at: ${GRAFANA_LINK}"
59+
echo "Link also saved to ${METRICS_PATH}"

scripts/run_prometheus.sh

+18-16
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,16 @@
22

33
set -euo pipefail
44

5-
# Starts a prometheus instance in agent-mode, forwarding to a central
6-
# instance. Intended to enable metrics collection from temporary networks running
7-
# locally and in CI.
5+
# - Starts a prometheus instance in agent-mode to collect metrics from nodes running
6+
# locally and in CI.
87
#
9-
# The prometheus instance will remain running in the background and will forward
10-
# metrics to the central instance for all tmpnet networks.
8+
# - promtail will remain running in the background and will forward metrics to the
9+
# specified prometheus endpoint.
1110
#
12-
# To stop it:
13-
#
14-
# $ kill -9 `cat ~/.tmpnet/prometheus/run.pid` && rm ~/.tmpnet/prometheus/run.pid
11+
# - Each node is configured with a file written to ~/.tmpnet/prometheus/file_sd_configs
1512
#
13+
# - To stop the running instance:
14+
# $ kill -9 `cat ~/.tmpnet/promtheus/run.pid` && rm ~/.tmpnet/promtail/run.pid
1615

1716
# e.g.,
1817
# PROMETHEUS_ID=<id> PROMETHEUS_PASSWORD=<password> ./scripts/run_prometheus.sh
@@ -45,7 +44,7 @@ fi
4544

4645
PROMETHEUS_PASSWORD="${PROMETHEUS_PASSWORD:-}"
4746
if [[ -z "${PROMETHEUS_PASSWORD}" ]]; then
48-
echo "Plase provide a value for PROMETHEUS_PASSWORD"
47+
echo "Please provide a value for PROMETHEUS_PASSWORD"
4948
exit 1
5049
fi
5150

@@ -64,13 +63,13 @@ if ! command -v "${CMD}" &> /dev/null; then
6463

6564
# Determine the arch
6665
if which sw_vers &> /dev/null; then
67-
echo "on macos, only amd64 binaries are available so rosetta is required on apple silicon machines."
68-
echo "to avoid using rosetta, install via homebrew: brew install prometheus"
66+
echo "On macos, only amd64 binaries are available so rosetta is required on apple silicon machines."
67+
echo "To avoid using rosetta, install via homebrew: brew install prometheus"
6968
DIST=darwin
7069
else
7170
ARCH="$(uname -i)"
7271
if [[ "${ARCH}" != "x86_64" ]]; then
73-
echo "on linux, only amd64 binaries are available. manual installation of prometheus is required."
72+
echo "On linux, only amd64 binaries are available. manual installation of prometheus is required."
7473
exit 1
7574
else
7675
DIST="linux"
@@ -90,8 +89,8 @@ fi
9089
FILE_SD_PATH="${PROMETHEUS_WORKING_DIR}/file_sd_configs"
9190
mkdir -p "${FILE_SD_PATH}"
9291

93-
echo "writing configuration..."
94-
cat >"${PROMETHEUS_WORKING_DIR}"/prometheus.yaml <<EOL
92+
CONFIG_PATH="${PROMETHEUS_WORKING_DIR}/prometheus.yaml"
93+
cat > "${CONFIG_PATH}" <<EOL
9594
# my global config
9695
global:
9796
# Make sure this value takes into account the network-shutdown-delay in tests/fixture/e2e/env.go
@@ -112,9 +111,12 @@ remote_write:
112111
username: "${PROMETHEUS_ID}"
113112
password: "${PROMETHEUS_PASSWORD}"
114113
EOL
114+
echo "Wrote configuration to ${CONFIG_PATH}"
115115

116-
echo "starting prometheus..."
116+
echo "Starting prometheus..."
117117
cd "${PROMETHEUS_WORKING_DIR}"
118118
nohup "${CMD}" --config.file=prometheus.yaml --web.listen-address=localhost:0 --enable-feature=agent > prometheus.log 2>&1 &
119119
echo $! > "${PIDFILE}"
120-
echo "running with pid $(cat "${PIDFILE}")"
120+
echo "prometheus started with pid $(cat "${PIDFILE}")"
121+
# shellcheck disable=SC2016
122+
echo 'To stop prometheus: "kill -SIGTERM `cat ~/.tmpnet/prometheus/run.pid` && rm ~/.tmpnet/prometheus/run.pid"'

scripts/run_promtail.sh

+14-12
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,15 @@
22

33
set -euo pipefail
44

5-
# Starts a promtail instance to collect logs from temporary networks
6-
# running locally and in CI.
5+
# - Starts a promtail instance to collect logs from nodes running locally and in CI.
76
#
8-
# The promtail instance will remain running in the background and will forward
9-
# logs to the central instance for all tmpnet networks.
7+
# - promtail will remain running in the background and will forward logs to the
8+
# specified Loki endpoint.
109
#
11-
# To stop it:
12-
#
13-
# $ kill -9 `cat ~/.tmpnet/promtail/run.pid` && rm ~/.tmpnet/promtail/run.pid
10+
# - Each node is configured with a file written to ~/.tmpnet/promtail/file_sd_configs/
1411
#
12+
# - To stop the running instance:
13+
# $ kill -9 `cat ~/.tmpnet/promtail/run.pid` && rm ~/.tmpnet/promtail/run.pid
1514

1615
# e.g.,
1716
# LOKI_ID=<id> LOKI_PASSWORD=<password> ./scripts/run_promtail.sh
@@ -44,7 +43,7 @@ fi
4443

4544
LOKI_PASSWORD="${LOKI_PASSWORD:-}"
4645
if [[ -z "${LOKI_PASSWORD}" ]]; then
47-
echo "Plase provide a value for LOKI_PASSWORD"
46+
echo "Please provide a value for LOKI_PASSWORD"
4847
exit 1
4948
fi
5049

@@ -86,8 +85,8 @@ fi
8685
FILE_SD_PATH="${PROMTAIL_WORKING_DIR}/file_sd_configs"
8786
mkdir -p "${FILE_SD_PATH}"
8887

89-
echo "writing configuration..."
90-
cat >"${PROMTAIL_WORKING_DIR}"/promtail.yaml <<EOL
88+
CONFIG_PATH="${PROMTAIL_WORKING_DIR}/promtail.yaml"
89+
cat > "${CONFIG_PATH}" <<EOL
9190
server:
9291
http_listen_port: 0
9392
grpc_listen_port: 0
@@ -107,9 +106,12 @@ scrape_configs:
107106
- files:
108107
- '${FILE_SD_PATH}/*.json'
109108
EOL
109+
echo "Wrote configuration to ${CONFIG_PATH}"
110110

111-
echo "starting promtail..."
111+
echo "Starting promtail..."
112112
cd "${PROMTAIL_WORKING_DIR}"
113113
nohup "${CMD}" -config.file=promtail.yaml > promtail.log 2>&1 &
114114
echo $! > "${PIDFILE}"
115-
echo "running with pid $(cat "${PIDFILE}")"
115+
echo "promtail started with pid $(cat "${PIDFILE}")"
116+
# shellcheck disable=SC2016
117+
echo 'To stop promtail: "kill -SIGTERM `cat ~/.tmpnet/promtail/run.pid` && rm ~/.tmpnet/promtail/run.pid"'

tests/fixture/tmpnet/README.md

+6
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ HOME
167167
│ └── config.json // Custom chain configuration for all nodes
168168
├── config.json // Common configuration (including defaults and pre-funded keys)
169169
├── genesis.json // Genesis for all nodes
170+
├── metrics.txt // Link for metrics and logs collected from the network (see: Monitoring)
170171
├── network.env // Sets network dir env var to simplify network usage
171172
└── subnets // Directory containing subnet config for both avalanchego and tmpnet
172173
├── subnet-a.json // tmpnet configuration for subnet-a and its chain(s)
@@ -269,6 +270,11 @@ LOKI_ID=<id> LOKI_PASSWORD=<password> ./scripts/run_promtail.sh
269270

270271
# Network start emits link to grafana displaying collected logs and metrics
271272
./build/tmpnetctl start-network
273+
274+
# Configure metrics collection from a local node binding to the default API
275+
# port of 9650 and storing its logs in ~/.avalanchego/logs. The script will
276+
# also emit a link to grafana.
277+
./scripts/configure-local-metrics-collection.sh
272278
```
273279

274280
### Metrics collection

tests/fixture/tmpnet/network.go

+9-2
Original file line numberDiff line numberDiff line change
@@ -373,10 +373,17 @@ func (n *Network) StartNodes(ctx context.Context, w io.Writer, nodesToStart ...*
373373
if _, err := fmt.Fprintf(w, "\nStarted network %s (UUID: %s)\n", n.Dir, n.UUID); err != nil {
374374
return err
375375
}
376-
// Provide a link to the main dashboard filtered by the uuid and showing results from now till whenever the link is viewed
376+
377+
// Generate a link to the main dashboard filtered by the uuid and showing results from now till whenever the link is viewed
377378
startTimeStr := strconv.FormatInt(startTime.UnixMilli(), 10)
378379
metricsURL := MetricsLinkForNetwork(n.UUID, startTimeStr, "")
379-
if _, err := fmt.Fprintf(w, "\nMetrics: %s\n", metricsURL); err != nil {
380+
381+
// Write link to the network path and to stdout
382+
metricsPath := filepath.Join(n.Dir, "metrics.txt")
383+
if err := os.WriteFile(metricsPath, []byte(metricsURL+"\n"), perms.ReadWrite); err != nil {
384+
return fmt.Errorf("failed to write metrics link to %s: %w", metricsPath, err)
385+
}
386+
if _, err := fmt.Fprintf(w, "\nMetrics: %s\nLink also saved to %s\n", metricsURL, metricsPath); err != nil {
380387
return err
381388
}
382389

0 commit comments

Comments
 (0)