Skip to content

Commit d71fad6

Browse files
committed
Add OTel kuttl tests.
1 parent eaa2aa2 commit d71fad6

24 files changed

+1256
-2
lines changed

.github/workflows/test.yaml

+3-2
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ jobs:
111111
registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.4-0
112112
registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-17.4-0
113113
registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-17.4-3.4-0
114+
registry.developers.crunchydata.com/crunchydata/postgres-operator:latest
114115
- run: go mod download
115116
- name: Build executable
116117
run: PGO_VERSION='${{ github.sha }}' make build-postgres-operator
@@ -143,8 +144,8 @@ jobs:
143144
--env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-17.4-0' \
144145
--env 'RELATED_IMAGE_POSTGRES_17_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-17.4-3.4-0' \
145146
--env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-8.14-2' \
146-
--env 'RELATED_IMAGE_COLLECTOR=ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.119.0' \
147-
--env 'PGO_FEATURE_GATES=TablespaceVolumes=true' \
147+
--env 'RELATED_IMAGE_COLLECTOR=registry.developers.crunchydata.com/crunchydata/postgres-operator:latest' \
148+
--env 'PGO_FEATURE_GATES=TablespaceVolumes=true,OpenTelemetryLogs=true,OpenTelemetryMetrics=true' \
148149
--name 'postgres-operator' ubuntu \
149150
postgres-operator
150151
- name: Install kuttl
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
apply:
4+
- files/00--create-cluster.yaml
5+
assert:
6+
- files/00-cluster-created.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
apply:
4+
- files/01--add-instrumentation.yaml
5+
assert:
6+
- files/01-instrumentation-added.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestAssert
3+
commands:
4+
# First, check that all containers in the instance pod are ready.
5+
# Then, grab the collector metrics output and check that a metric from both 5m
6+
# and 5s queries are present, as well as patroni metrics.
7+
# Then, check the collector logs for patroni, pgbackrest, and postgres logs.
8+
# Finally, ensure the monitoring user exists and is configured.
9+
- script: |
10+
retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; }
11+
check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; }
12+
contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; }
13+
14+
pod=$(kubectl get pods -o name -n "${NAMESPACE}" \
15+
-l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres)
16+
[ "$pod" = "" ] && retry "Pod not found" && exit 1
17+
18+
condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}")
19+
[ "$condition_json" = "" ] && retry "conditions not found" && exit 1
20+
{ check_containers_ready "$condition_json"; } || {
21+
retry "containers not ready"
22+
exit 1
23+
}
24+
25+
scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \
26+
curl --insecure --silent http://localhost:9187/metrics)
27+
{ contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || {
28+
retry "5 second metric not found"
29+
exit 1
30+
}
31+
{ contains "${scrape_metrics}" 'ccp_database_size_bytes'; } || {
32+
retry "5 minute metric not found"
33+
exit 1
34+
}
35+
{ contains "${scrape_metrics}" 'patroni_postgres_running'; } || {
36+
retry "patroni metric not found"
37+
exit 1
38+
}
39+
40+
logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope)
41+
{ contains "${logs}" 'InstrumentationScope patroni'; } || {
42+
retry "patroni logs not found"
43+
exit 1
44+
}
45+
{ contains "${logs}" 'InstrumentationScope pgbackrest'; } || {
46+
retry "pgbackrest logs not found"
47+
exit 1
48+
}
49+
{ contains "${logs}" 'InstrumentationScope postgres'; } || {
50+
retry "postgres logs not found"
51+
exit 1
52+
}
53+
54+
kubectl exec --stdin "${pod}" --namespace "${NAMESPACE}" -c database \
55+
-- psql -qb --set ON_ERROR_STOP=1 --file=- <<'SQL'
56+
DO $$
57+
DECLARE
58+
result record;
59+
BEGIN
60+
SELECT * INTO result FROM pg_catalog.pg_roles WHERE rolname = 'ccp_monitoring';
61+
ASSERT FOUND, 'user not found';
62+
END $$
63+
SQL
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestAssert
3+
commands:
4+
# First, check that all containers in the pgbouncer pod are ready.
5+
# Then, scrape the collector metrics and check that pgbouncer metrics are present.
6+
# Then, check the collector logs for pgbouncer logs.
7+
- script: |
8+
retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; }
9+
check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; }
10+
contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; }
11+
12+
pod=$(kubectl get pods -o name -n "${NAMESPACE}" \
13+
-l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/role=pgbouncer)
14+
[ "$pod" = "" ] && retry "Pod not found" && exit 1
15+
16+
condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}")
17+
[ "$condition_json" = "" ] && retry "conditions not found" && exit 1
18+
{ check_containers_ready "$condition_json"; } || {
19+
retry "containers not ready"
20+
exit 1
21+
}
22+
23+
scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \
24+
curl --insecure --silent http://localhost:9187/metrics)
25+
{ contains "${scrape_metrics}" 'ccp_pgbouncer_clients_wait_seconds'; } || {
26+
retry "pgbouncer metric not found"
27+
exit 1
28+
}
29+
30+
logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope)
31+
{ contains "${logs}" 'InstrumentationScope pgbouncer'; } || {
32+
retry "pgbouncer logs not found"
33+
exit 1
34+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestAssert
3+
commands:
4+
# First, check that all containers in the pgadmin pod are ready.
5+
# Then, check the collector logs for pgadmin and gunicorn logs.
6+
- script: |
7+
retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; }
8+
check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; }
9+
contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; }
10+
11+
pod=$(kubectl get pods -o name -n "${NAMESPACE}" \
12+
-l postgres-operator.crunchydata.com/pgadmin=otel-pgadmin)
13+
[ "$pod" = "" ] && retry "Pod not found" && exit 1
14+
15+
condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}")
16+
[ "$condition_json" = "" ] && retry "conditions not found" && exit 1
17+
{ check_containers_ready "$condition_json"; } || {
18+
retry "containers not ready"
19+
exit 1
20+
}
21+
22+
logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope)
23+
{ contains "${logs}" 'InstrumentationScope pgadmin'; } || {
24+
retry "pgadmin logs not found"
25+
exit 1
26+
}
27+
{ contains "${logs}" 'InstrumentationScope gunicorn.access'; } || {
28+
retry "gunicorn logs not found"
29+
exit 1
30+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestAssert
3+
commands:
4+
# First, check that all containers in the repo host pod are ready.
5+
# Then, ensure that the collector logs for the repo-host do not contain any
6+
# pgbackrest logs as the backup completed before the collector started up and we
7+
# have the collector configured to only ingest new log records on start up.
8+
- script: |
9+
retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; }
10+
check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; }
11+
contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; }
12+
13+
pod=$(kubectl get pods -o name -n "${NAMESPACE}" \
14+
-l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=pgbackrest)
15+
[ "$pod" = "" ] && retry "Pod not found" && exit 1
16+
17+
condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}")
18+
[ "$condition_json" = "" ] && retry "conditions not found" && exit 1
19+
{ check_containers_ready "$condition_json"; } || {
20+
retry "containers not ready"
21+
exit 1
22+
}
23+
24+
logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope)
25+
{ !(contains "${logs}" 'InstrumentationScope pgbackrest') } || {
26+
retry "pgbackrest logs were found when we did not expect any"
27+
exit 1
28+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
apply:
4+
- files/06--annotate-cluster.yaml
5+
assert:
6+
- files/06-backup-completed.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestAssert
3+
commands:
4+
# First, check that all containers in the repo host pod are ready.
5+
# Then, ensure that the repo-host collector logs have pgbackrest logs.
6+
- script: |
7+
retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; }
8+
check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; }
9+
contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; }
10+
11+
pod=$(kubectl get pods -o name -n "${NAMESPACE}" \
12+
-l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=pgbackrest)
13+
[ "$pod" = "" ] && retry "Pod not found" && exit 1
14+
15+
condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}")
16+
[ "$condition_json" = "" ] && retry "conditions not found" && exit 1
17+
{ check_containers_ready "$condition_json"; } || {
18+
retry "containers not ready"
19+
exit 1
20+
}
21+
22+
logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope)
23+
{ contains "${logs}" 'InstrumentationScope pgbackrest'; } || {
24+
retry "pgbackrest logs were not found"
25+
exit 1
26+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
apply:
4+
- files/08--add-custom-queries.yaml
5+
assert:
6+
- files/08-custom-queries-added.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestAssert
3+
commands:
4+
# First, check that all containers in the instance pod are ready.
5+
# Then, grab the collector metrics output and check that the two metrics that we
6+
# checked for earlier are no longer there.
7+
# Then, check that the two custom metrics that we added are present.
8+
- script: |
9+
retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; }
10+
check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; }
11+
contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; }
12+
13+
pod=$(kubectl get pods -o name -n "${NAMESPACE}" \
14+
-l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres)
15+
[ "$pod" = "" ] && retry "Pod not found" && exit 1
16+
17+
condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}")
18+
[ "$condition_json" = "" ] && retry "conditions not found" && exit 1
19+
{ check_containers_ready "$condition_json"; } || {
20+
retry "containers not ready"
21+
exit 1
22+
}
23+
24+
scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \
25+
curl --insecure --silent http://localhost:9187/metrics)
26+
{ !(contains "${scrape_metrics}" 'ccp_connection_stats_active') } || {
27+
retry "5 second metric still present"
28+
exit 1
29+
}
30+
{ !(contains "${scrape_metrics}" 'ccp_database_size_bytes') } || {
31+
retry "5 minute metric still present"
32+
exit 1
33+
}
34+
{ contains "${scrape_metrics}" 'custom_table_count'; } || {
35+
retry "fast custom metric not found"
36+
exit 1
37+
}
38+
{ contains "${scrape_metrics}" 'custom_pg_stat_statements_row_count'; } || {
39+
retry "slow custom metric not found"
40+
exit 1
41+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
apply:
4+
- files/10--add-logs-exporter.yaml
5+
assert:
6+
- files/10-logs-exporter-added.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestAssert
3+
commands:
4+
# First, check that the standalone otel-collector container is ready.
5+
# Then, check the standalone collector logs for logs from all six potential
6+
# sources: patroni, pgbackrest, postgres, pgbouncer, pgadmin, and gunicorn.
7+
- script: |
8+
retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; }
9+
check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; }
10+
contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; }
11+
12+
pod=$(kubectl get pods -o name -n "${NAMESPACE}" -l app=opentelemetry)
13+
[ "$pod" = "" ] && retry "Pod not found" && exit 1
14+
15+
condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}")
16+
[ "$condition_json" = "" ] && retry "conditions not found" && exit 1
17+
{ check_containers_ready "$condition_json"; } || {
18+
retry "containers not ready"
19+
exit 1
20+
}
21+
22+
logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c otel-collector | grep InstrumentationScope)
23+
{ contains "${logs}" 'InstrumentationScope patroni'; } || {
24+
retry "patroni logs not found"
25+
exit 1
26+
}
27+
{ contains "${logs}" 'InstrumentationScope pgbackrest'; } || {
28+
retry "pgbackrest logs not found"
29+
exit 1
30+
}
31+
{ contains "${logs}" 'InstrumentationScope postgres'; } || {
32+
retry "postgres logs not found"
33+
exit 1
34+
}
35+
{ contains "${logs}" 'InstrumentationScope pgbouncer'; } || {
36+
retry "pgbouncer logs not found"
37+
exit 1
38+
}
39+
{ contains "${logs}" 'InstrumentationScope pgadmin'; } || {
40+
retry "pgadmin logs not found"
41+
exit 1
42+
}
43+
{ contains "${logs}" 'InstrumentationScope gunicorn.access'; } || {
44+
retry "gunicorn logs not found"
45+
exit 1
46+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Test OTel Logging and Metrics
2+
3+
## Assumptions
4+
5+
This test assumes that the operator has both OpenTelemetryLogs and OpenTelemetryMetrics feature gates turned on and that you are using an operator versioned 5.8 or greater.
6+
7+
## Process
8+
9+
1. Create a basic cluster with pgbouncer and pgadmin in place.
10+
1. Ensure cluster comes up, that all containers are running and ready, and that the initial backup is complete.
11+
2. Add the `instrumentation` spec to both PostgresCluster and PGAdmin manifests.
12+
1. Ensure that OTel collector containers and `crunchy-otel-collector` labels are added to the four pods (postgres instance, repo-host, pgbouncer, & pgadmin) and that the collector containers are running and ready.
13+
2. Assert that the instance pod collector is getting postgres and patroni metrics and postgres, patroni, and pgbackrest logs.
14+
3. Assert that the pgbouncer pod collector is getting pgbouncer metrics and logs.
15+
4. Assert that the pgAdmin pod collector is getting pgAdmin and gunicorn logs.
16+
5. Assert that the repo-host pod collector is NOT getting pgbackrest logs. We do not expect logs yet as the initial backup completed and created a log file; however, we configure the collector to only ingest new logs after it has started up.
17+
6. Create a manual backup and ensure that it completes successfully.
18+
7. Ensure that the repo-host pod collector is now getting pgbackrest logs.
19+
3. Add both "add" and "remove" custom queries to the PostgresCluster `instrumentation` spec and create a ConfigMap that holds the custom queries to add.
20+
1. Ensure that the ConfigMap is created.
21+
2. Assert that the metrics that were removed (which we checked for earlier) are in fact no longer present in the collector metrics.
22+
3. Assert that the custom metrics that were added are present in the collector metrics.
23+
4. Add an `otlp` exporter to both PostgresCluster and PGAdmin `instrumentation` specs and create a standalone OTel collector to receive data from our sidecar collectors.
24+
1. Ensure that the ConfigMap, Service, and Deployment for the standalone OTel collector come up and that the collector container is running and ready.
25+
2. Assert that the standalone collector is receiving logs from all of our components (i.e. the standalone collector is getting logs for postgres, patroni, pgbackrest, pgbouncer, pgadmin, and gunicorn).
26+
27+
### NOTES
28+
29+
It is possible this test could flake if for some reason a component is not producing any logs. If we start to see this happen, we could either create some test steps that execute some actions that should trigger logs or turn up the log levels (although the latter option could create more problems as we have seen issues with the collector when the stream of logs is too voluminous).

0 commit comments

Comments
 (0)