Skip to content

Commit d1cdcf4

Browse files
committed
Fix for PGO-2380:
Only add logrotate volume mounts to instance pod when backups are enabled. Add kuttl tests to ensure that collector will run on postgres instance when backups are disabled.
1 parent d7d2a1d commit d1cdcf4

15 files changed

+283
-3
lines changed

internal/controller/postgrescluster/instance.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,11 +1218,12 @@ func (r *Reconciler) reconcileInstance(
12181218
}
12191219
}
12201220

1221-
// For now, we are not using logrotate to rotate postgres or patroni logs
1222-
// but we are using it for pgbackrest logs in the postgres pod
1221+
// For now, we are not using logrotate to rotate postgres or patroni logs,
1222+
// but we are using it for pgbackrest logs in the postgres pod, so we will
1223+
// set includeLogrotate to true, but only if backups are enabled.
12231224
collector.AddToPod(ctx, cluster.Spec.Instrumentation, cluster.Spec.ImagePullPolicy, instanceConfigMap, &instance.Spec.Template,
12241225
[]corev1.VolumeMount{postgres.DataVolumeMount()}, pgPassword,
1225-
[]string{naming.PGBackRestPGDataLogPath}, true, true)
1226+
[]string{naming.PGBackRestPGDataLogPath}, backupsSpecFound, true)
12261227
}
12271228

12281229
// Add postgres-exporter to the instance Pod spec
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
apply:
4+
- files/12--create-cluster.yaml
5+
assert:
6+
- files/12-cluster-created.yaml
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestAssert
3+
commands:
4+
# First, check that all containers in the instance pod are ready.
5+
# Then, grab the collector metrics output and check that a postgres
6+
# metric is present, as well as a patroni metric.
7+
# Then, check the collector logs for patroni, and postgres logs.
8+
# Finally, ensure the monitoring user exists and is configured.
9+
- script: |
10+
retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; }
11+
check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; }
12+
contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; }
13+
14+
pod=$(kubectl get pods -o name -n "${NAMESPACE}" \
15+
-l postgres-operator.crunchydata.com/cluster=otel-cluster-no-backups,postgres-operator.crunchydata.com/data=postgres)
16+
[ "$pod" = "" ] && retry "Pod not found" && exit 1
17+
18+
condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}")
19+
[ "$condition_json" = "" ] && retry "conditions not found" && exit 1
20+
{ check_containers_ready "$condition_json"; } || {
21+
retry "containers not ready"
22+
exit 1
23+
}
24+
25+
scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \
26+
curl --insecure --silent http://localhost:9187/metrics)
27+
{ contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || {
28+
retry "5 second metric not found"
29+
exit 1
30+
}
31+
{ contains "${scrape_metrics}" 'patroni_postgres_running'; } || {
32+
retry "patroni metric not found"
33+
exit 1
34+
}
35+
36+
logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope)
37+
{ contains "${logs}" 'InstrumentationScope patroni'; } || {
38+
retry "patroni logs not found"
39+
exit 1
40+
}
41+
{ contains "${logs}" 'InstrumentationScope postgres'; } || {
42+
retry "postgres logs not found"
43+
exit 1
44+
}
45+
46+
kubectl exec --stdin "${pod}" --namespace "${NAMESPACE}" -c database \
47+
-- psql -qb --set ON_ERROR_STOP=1 --file=- <<'SQL'
48+
DO $$
49+
DECLARE
50+
result record;
51+
BEGIN
52+
SELECT * INTO result FROM pg_catalog.pg_roles WHERE rolname = 'ccp_monitoring';
53+
ASSERT FOUND, 'user not found';
54+
END $$
55+
SQL
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
apply:
4+
- files/14--add-backups.yaml
5+
assert:
6+
- files/14-backups-added.yaml
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
commands:
4+
- command: |-
5+
kubectl patch postgrescluster otel-cluster-no-backups --type 'merge' -p '{"spec":{"backups": null}}'
6+
namespaced: true
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
commands:
4+
- command: kubectl annotate postgrescluster otel-cluster-no-backups postgres-operator.crunchydata.com/authorizeBackupRemoval="true"
5+
namespaced: true
6+
assert:
7+
- files/16-backups-removed.yaml

testing/kuttl/e2e/otel-logging-and-metrics/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ This test assumes that the operator has both OpenTelemetryLogs and OpenTelemetry
2323
4. Add an `otlp` exporter to both PostgresCluster and PGAdmin `instrumentation` specs and create a standalone OTel collector to receive data from our sidecar collectors.
2424
1. Ensure that the ConfigMap, Service, and Deployment for the standalone OTel collector come up and that the collector container is running and ready.
2525
2. Assert that the standalone collector is receiving logs from all of our components (i.e. the standalone collector is getting logs for postgres, patroni, pgbackrest, pgbouncer, pgadmin, and gunicorn).
26+
5. Create a new cluster with `instrumentation` spec in place, but no `backups` spec to test the OTel features with optional backups.
27+
1. Ensure that the cluster comes up and the database and collector containers are running and ready.
28+
2. Add a backups spec to the new cluster and ensure that pgbackrest is added to the instance pod, a repo-host pod is created, and the collector runs on both pods.
29+
3. Remove the backups spec from the new cluster.
30+
4. Annotate the cluster to allow backups to be removed.
31+
5. Ensure that the repo-host pod is destroyed, pgbackrest is removed from the instance pod, and the collector continues to run on the instance pod.
2632

2733
### NOTES
2834

testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ metadata:
4646
labels:
4747
postgres-operator.crunchydata.com/data: pgbackrest
4848
postgres-operator.crunchydata.com/cluster: otel-cluster
49+
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
4950
status:
5051
containerStatuses:
5152
- name: collector

testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ metadata:
4646
labels:
4747
postgres-operator.crunchydata.com/data: pgbackrest
4848
postgres-operator.crunchydata.com/cluster: otel-cluster
49+
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
4950
status:
5051
containerStatuses:
5152
- name: collector

testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ metadata:
4646
labels:
4747
postgres-operator.crunchydata.com/data: pgbackrest
4848
postgres-operator.crunchydata.com/cluster: otel-cluster
49+
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
4950
status:
5051
containerStatuses:
5152
- name: collector
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
---
2+
apiVersion: postgres-operator.crunchydata.com/v1beta1
3+
kind: PostgresCluster
4+
metadata:
5+
name: otel-cluster-no-backups
6+
spec:
7+
postgresVersion: ${KUTTL_PG_VERSION}
8+
instances:
9+
- name: instance1
10+
dataVolumeClaimSpec:
11+
accessModes:
12+
- "ReadWriteOnce"
13+
resources:
14+
requests:
15+
storage: 1Gi
16+
instrumentation: {}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
apiVersion: postgres-operator.crunchydata.com/v1beta1
2+
kind: PostgresCluster
3+
metadata:
4+
name: otel-cluster-no-backups
5+
status:
6+
instances:
7+
- name: instance1
8+
readyReplicas: 1
9+
replicas: 1
10+
updatedReplicas: 1
11+
---
12+
apiVersion: v1
13+
kind: Pod
14+
metadata:
15+
labels:
16+
postgres-operator.crunchydata.com/data: postgres
17+
postgres-operator.crunchydata.com/role: master
18+
postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups
19+
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
20+
status:
21+
containerStatuses:
22+
- name: collector
23+
ready: true
24+
started: true
25+
- name: database
26+
ready: true
27+
started: true
28+
- name: replication-cert-copy
29+
ready: true
30+
started: true
31+
phase: Running
32+
---
33+
apiVersion: v1
34+
kind: Service
35+
metadata:
36+
name: otel-cluster-no-backups-primary
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
---
2+
apiVersion: postgres-operator.crunchydata.com/v1beta1
3+
kind: PostgresCluster
4+
metadata:
5+
name: otel-cluster-no-backups
6+
spec:
7+
postgresVersion: ${KUTTL_PG_VERSION}
8+
instances:
9+
- name: instance1
10+
dataVolumeClaimSpec:
11+
accessModes:
12+
- "ReadWriteOnce"
13+
resources:
14+
requests:
15+
storage: 1Gi
16+
backups:
17+
pgbackrest:
18+
manual:
19+
repoName: repo1
20+
options:
21+
- --type=diff
22+
repos:
23+
- name: repo1
24+
volume:
25+
volumeClaimSpec:
26+
accessModes:
27+
- "ReadWriteOnce"
28+
resources:
29+
requests:
30+
storage: 1Gi
31+
instrumentation: {}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
apiVersion: postgres-operator.crunchydata.com/v1beta1
2+
kind: PostgresCluster
3+
metadata:
4+
name: otel-cluster-no-backups
5+
status:
6+
instances:
7+
- name: instance1
8+
readyReplicas: 1
9+
replicas: 1
10+
updatedReplicas: 1
11+
---
12+
apiVersion: v1
13+
kind: Pod
14+
metadata:
15+
labels:
16+
postgres-operator.crunchydata.com/data: postgres
17+
postgres-operator.crunchydata.com/role: master
18+
postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups
19+
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
20+
status:
21+
containerStatuses:
22+
- name: collector
23+
ready: true
24+
started: true
25+
- name: database
26+
ready: true
27+
started: true
28+
- name: pgbackrest
29+
ready: true
30+
started: true
31+
- name: pgbackrest-config
32+
ready: true
33+
started: true
34+
- name: replication-cert-copy
35+
ready: true
36+
started: true
37+
phase: Running
38+
---
39+
apiVersion: v1
40+
kind: Pod
41+
metadata:
42+
labels:
43+
postgres-operator.crunchydata.com/data: pgbackrest
44+
postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups
45+
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
46+
status:
47+
containerStatuses:
48+
- name: collector
49+
ready: true
50+
started: true
51+
- name: pgbackrest
52+
ready: true
53+
started: true
54+
- name: pgbackrest-config
55+
ready: true
56+
started: true
57+
phase: Running
58+
---
59+
apiVersion: batch/v1
60+
kind: Job
61+
metadata:
62+
labels:
63+
postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups
64+
postgres-operator.crunchydata.com/pgbackrest-backup: replica-create
65+
status:
66+
succeeded: 1
67+
---
68+
apiVersion: v1
69+
kind: Service
70+
metadata:
71+
name: otel-cluster-no-backups-primary
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
apiVersion: postgres-operator.crunchydata.com/v1beta1
2+
kind: PostgresCluster
3+
metadata:
4+
name: otel-cluster-no-backups
5+
status:
6+
instances:
7+
- name: instance1
8+
readyReplicas: 1
9+
replicas: 1
10+
updatedReplicas: 1
11+
---
12+
apiVersion: v1
13+
kind: Pod
14+
metadata:
15+
labels:
16+
postgres-operator.crunchydata.com/data: postgres
17+
postgres-operator.crunchydata.com/role: master
18+
postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups
19+
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
20+
status:
21+
containerStatuses:
22+
- name: collector
23+
ready: true
24+
started: true
25+
- name: database
26+
ready: true
27+
started: true
28+
- name: replication-cert-copy
29+
ready: true
30+
started: true
31+
phase: Running
32+
---
33+
apiVersion: v1
34+
kind: Service
35+
metadata:
36+
name: otel-cluster-no-backups-primary

0 commit comments

Comments
 (0)