Skip to content

Commit 000e6af

Browse files
committed
Take snapshots of pgdata using a dedicated volume. Whenever a backup finishes successfully, do a delta restore into dedicated volume and then snapshot the volume.
Add/adjust tests for snapshots. Co-authored by: Anthony Landreth <[email protected]>
1 parent 6707a99 commit 000e6af

File tree

15 files changed

+1736
-439
lines changed

15 files changed

+1736
-439
lines changed

config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4330,6 +4330,7 @@ spec:
43304330
volumeSnapshotClassName:
43314331
description: Name of the VolumeSnapshotClass that should be
43324332
used by VolumeSnapshots
4333+
minLength: 1
43334334
type: string
43344335
required:
43354336
- volumeSnapshotClassName

internal/controller/postgrescluster/controller.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ func (r *Reconciler) Reconcile(
168168
err error
169169
backupsSpecFound bool
170170
backupsReconciliationAllowed bool
171+
dedicatedSnapshotPVC *corev1.PersistentVolumeClaim
171172
)
172173

173174
patchClusterStatus := func() error {
@@ -364,7 +365,10 @@ func (r *Reconciler) Reconcile(
364365
}
365366
}
366367
if err == nil {
367-
err = r.reconcileVolumeSnapshots(ctx, cluster, instances, clusterVolumes)
368+
dedicatedSnapshotPVC, err = r.reconcileDedicatedSnapshotVolume(ctx, cluster, clusterVolumes)
369+
}
370+
if err == nil {
371+
err = r.reconcileVolumeSnapshots(ctx, cluster, dedicatedSnapshotPVC)
368372
}
369373
if err == nil {
370374
err = r.reconcilePGBouncer(ctx, cluster, instances, primaryCertificate, rootCA)

internal/controller/postgrescluster/helpers_test.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"testing"
1212
"time"
1313

14+
batchv1 "k8s.io/api/batch/v1"
1415
corev1 "k8s.io/api/core/v1"
1516
"k8s.io/apimachinery/pkg/api/resource"
1617
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -21,6 +22,7 @@ import (
2122

2223
"github.com/crunchydata/postgres-operator/internal/controller/runtime"
2324
"github.com/crunchydata/postgres-operator/internal/initialize"
25+
"github.com/crunchydata/postgres-operator/internal/naming"
2426
"github.com/crunchydata/postgres-operator/internal/testing/require"
2527
"github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
2628
)
@@ -99,6 +101,7 @@ func testVolumeClaimSpec() corev1.PersistentVolumeClaimSpec {
99101
},
100102
}
101103
}
104+
102105
func testCluster() *v1beta1.PostgresCluster {
103106
// Defines a base cluster spec that can be used by tests to generate a
104107
// cluster with an expected number of instances
@@ -138,6 +141,58 @@ func testCluster() *v1beta1.PostgresCluster {
138141
return cluster.DeepCopy()
139142
}
140143

144+
func testBackupJob(cluster *v1beta1.PostgresCluster) *batchv1.Job {
145+
job := batchv1.Job{
146+
TypeMeta: metav1.TypeMeta{
147+
APIVersion: batchv1.SchemeGroupVersion.String(),
148+
Kind: "Job",
149+
},
150+
ObjectMeta: metav1.ObjectMeta{
151+
Name: "backup-job-1",
152+
Namespace: cluster.Namespace,
153+
Labels: map[string]string{
154+
naming.LabelCluster: cluster.Name,
155+
naming.LabelPGBackRestBackup: "",
156+
naming.LabelPGBackRestRepo: "repo1",
157+
},
158+
},
159+
Spec: batchv1.JobSpec{
160+
Template: corev1.PodTemplateSpec{
161+
Spec: corev1.PodSpec{
162+
Containers: []corev1.Container{{Name: "test", Image: "test"}},
163+
RestartPolicy: corev1.RestartPolicyNever,
164+
},
165+
},
166+
},
167+
}
168+
169+
return job.DeepCopy()
170+
}
171+
172+
func testRestoreJob(cluster *v1beta1.PostgresCluster) *batchv1.Job {
173+
job := batchv1.Job{
174+
TypeMeta: metav1.TypeMeta{
175+
APIVersion: batchv1.SchemeGroupVersion.String(),
176+
Kind: "Job",
177+
},
178+
ObjectMeta: metav1.ObjectMeta{
179+
Name: "restore-job-1",
180+
Namespace: cluster.Namespace,
181+
Labels: naming.PGBackRestRestoreJobLabels(cluster.Name),
182+
},
183+
Spec: batchv1.JobSpec{
184+
Template: corev1.PodTemplateSpec{
185+
Spec: corev1.PodSpec{
186+
Containers: []corev1.Container{{Name: "test", Image: "test"}},
187+
RestartPolicy: corev1.RestartPolicyNever,
188+
},
189+
},
190+
},
191+
}
192+
193+
return job.DeepCopy()
194+
}
195+
141196
// setupManager creates the runtime manager used during controller testing
142197
func setupManager(t *testing.T, cfg *rest.Config,
143198
controllerSetup func(mgr manager.Manager)) (context.Context, context.CancelFunc) {

internal/controller/postgrescluster/pgbackrest.go

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3333

3434
"github.com/crunchydata/postgres-operator/internal/config"
35+
"github.com/crunchydata/postgres-operator/internal/feature"
3536
"github.com/crunchydata/postgres-operator/internal/initialize"
3637
"github.com/crunchydata/postgres-operator/internal/logging"
3738
"github.com/crunchydata/postgres-operator/internal/naming"
@@ -197,7 +198,7 @@ func (r *Reconciler) applyRepoVolumeIntent(ctx context.Context,
197198
// getPGBackRestResources returns the existing pgBackRest resources that should utilized by the
198199
// PostgresCluster controller during reconciliation. Any items returned are verified to be owned
199200
// by the PostgresCluster controller and still applicable per the current PostgresCluster spec.
200-
// Additionally, and resources identified that no longer correspond to any current configuration
201+
// Additionally, any resources identified that no longer correspond to any current configuration
201202
// are deleted.
202203
func (r *Reconciler) getPGBackRestResources(ctx context.Context,
203204
postgresCluster *v1beta1.PostgresCluster,
@@ -374,6 +375,15 @@ func (r *Reconciler) cleanupRepoResources(ctx context.Context,
374375
if !backupsSpecFound {
375376
break
376377
}
378+
379+
// If the restore job has the PGBackRestBackupJobCompletion annotation, it is
380+
// used for volume snapshots and should not be deleted (volume snapshots code
381+
// will clean it up when appropriate).
382+
if _, ok := owned.GetAnnotations()[naming.PGBackRestBackupJobCompletion]; ok {
383+
ownedNoDelete = append(ownedNoDelete, owned)
384+
delete = false
385+
}
386+
377387
// When a cluster is prepared for restore, the system identifier is removed from status
378388
// and the cluster is therefore no longer bootstrapped. Only once the restore Job is
379389
// complete will the cluster then be bootstrapped again, which means by the time we
@@ -762,7 +772,7 @@ func (r *Reconciler) generateRepoVolumeIntent(postgresCluster *v1beta1.PostgresC
762772
}
763773

764774
// generateBackupJobSpecIntent generates a JobSpec for a pgBackRest backup job
765-
func generateBackupJobSpecIntent(postgresCluster *v1beta1.PostgresCluster,
775+
func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster,
766776
repo v1beta1.PGBackRestRepo, serviceAccountName string,
767777
labels, annotations map[string]string, opts ...string) *batchv1.JobSpec {
768778

@@ -771,6 +781,11 @@ func generateBackupJobSpecIntent(postgresCluster *v1beta1.PostgresCluster,
771781
"--stanza=" + pgbackrest.DefaultStanzaName,
772782
"--repo=" + repoIndex,
773783
}
784+
// If VolumeSnapshots are enabled, use archive-copy and archive-check options
785+
if postgresCluster.Spec.Backups.Snapshots != nil && feature.Enabled(ctx, feature.VolumeSnapshots) {
786+
cmdOpts = append(cmdOpts, "--archive-copy=y", "--archive-check=y")
787+
}
788+
774789
cmdOpts = append(cmdOpts, opts...)
775790

776791
container := corev1.Container{
@@ -1634,6 +1649,9 @@ func (r *Reconciler) reconcilePostgresClusterDataSource(ctx context.Context,
16341649
return errors.WithStack(err)
16351650
}
16361651

1652+
// TODO(snapshots): If pgdata is being sourced by a VolumeSnapshot then don't perform a typical restore job;
1653+
// we only want to replay the WAL.
1654+
16371655
// reconcile the pgBackRest restore Job to populate the cluster's data directory
16381656
if err := r.reconcileRestoreJob(ctx, cluster, sourceCluster, pgdata, pgwal, pgtablespaces,
16391657
dataSource, instanceName, instanceSetName, configHash, pgbackrest.DefaultStanzaName); err != nil {
@@ -2362,7 +2380,7 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context,
23622380
backupJob.ObjectMeta.Labels = labels
23632381
backupJob.ObjectMeta.Annotations = annotations
23642382

2365-
spec := generateBackupJobSpecIntent(postgresCluster, repo,
2383+
spec := generateBackupJobSpecIntent(ctx, postgresCluster, repo,
23662384
serviceAccount.GetName(), labels, annotations, backupOpts...)
23672385

23682386
backupJob.Spec = *spec
@@ -2523,7 +2541,7 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context,
25232541
backupJob.ObjectMeta.Labels = labels
25242542
backupJob.ObjectMeta.Annotations = annotations
25252543

2526-
spec := generateBackupJobSpecIntent(postgresCluster, replicaCreateRepo,
2544+
spec := generateBackupJobSpecIntent(ctx, postgresCluster, replicaCreateRepo,
25272545
serviceAccount.GetName(), labels, annotations)
25282546

25292547
backupJob.Spec = *spec
@@ -2886,8 +2904,7 @@ func (r *Reconciler) reconcilePGBackRestCronJob(
28862904
labels := naming.Merge(
28872905
cluster.Spec.Metadata.GetLabelsOrNil(),
28882906
cluster.Spec.Backups.PGBackRest.Metadata.GetLabelsOrNil(),
2889-
naming.PGBackRestCronJobLabels(cluster.Name, repo.Name, backupType),
2890-
)
2907+
naming.PGBackRestCronJobLabels(cluster.Name, repo.Name, backupType))
28912908
objectmeta := naming.PGBackRestCronJob(cluster, backupType, repo.Name)
28922909

28932910
// Look for an existing CronJob by the associated Labels. If one exists,
@@ -2951,7 +2968,7 @@ func (r *Reconciler) reconcilePGBackRestCronJob(
29512968
// set backup type (i.e. "full", "diff", "incr")
29522969
backupOpts := []string{"--type=" + backupType}
29532970

2954-
jobSpec := generateBackupJobSpecIntent(cluster, repo,
2971+
jobSpec := generateBackupJobSpecIntent(ctx, cluster, repo,
29552972
serviceAccount.GetName(), labels, annotations, backupOpts...)
29562973

29572974
// Suspend cronjobs when shutdown or read-only. Any jobs that have already

internal/controller/postgrescluster/pgbackrest_test.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2438,8 +2438,9 @@ func TestCopyConfigurationResources(t *testing.T) {
24382438
}
24392439

24402440
func TestGenerateBackupJobIntent(t *testing.T) {
2441+
ctx := context.Background()
24412442
t.Run("empty", func(t *testing.T) {
2442-
spec := generateBackupJobSpecIntent(
2443+
spec := generateBackupJobSpecIntent(ctx,
24432444
&v1beta1.PostgresCluster{}, v1beta1.PGBackRestRepo{},
24442445
"",
24452446
nil, nil,
@@ -2512,7 +2513,7 @@ volumes:
25122513
ImagePullPolicy: corev1.PullAlways,
25132514
},
25142515
}
2515-
job := generateBackupJobSpecIntent(
2516+
job := generateBackupJobSpecIntent(ctx,
25162517
cluster, v1beta1.PGBackRestRepo{},
25172518
"",
25182519
nil, nil,
@@ -2527,7 +2528,7 @@ volumes:
25272528
cluster.Spec.Backups = v1beta1.Backups{
25282529
PGBackRest: v1beta1.PGBackRestArchive{},
25292530
}
2530-
job := generateBackupJobSpecIntent(
2531+
job := generateBackupJobSpecIntent(ctx,
25312532
cluster, v1beta1.PGBackRestRepo{},
25322533
"",
25332534
nil, nil,
@@ -2544,7 +2545,7 @@ volumes:
25442545
},
25452546
},
25462547
}
2547-
job := generateBackupJobSpecIntent(
2548+
job := generateBackupJobSpecIntent(ctx,
25482549
cluster, v1beta1.PGBackRestRepo{},
25492550
"",
25502551
nil, nil,
@@ -2583,7 +2584,7 @@ volumes:
25832584
},
25842585
},
25852586
}
2586-
job := generateBackupJobSpecIntent(
2587+
job := generateBackupJobSpecIntent(ctx,
25872588
cluster, v1beta1.PGBackRestRepo{},
25882589
"",
25892590
nil, nil,
@@ -2596,7 +2597,7 @@ volumes:
25962597
cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{
25972598
PriorityClassName: initialize.String("some-priority-class"),
25982599
}
2599-
job := generateBackupJobSpecIntent(
2600+
job := generateBackupJobSpecIntent(ctx,
26002601
cluster, v1beta1.PGBackRestRepo{},
26012602
"",
26022603
nil, nil,
@@ -2614,7 +2615,7 @@ volumes:
26142615
cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{
26152616
Tolerations: tolerations,
26162617
}
2617-
job := generateBackupJobSpecIntent(
2618+
job := generateBackupJobSpecIntent(ctx,
26182619
cluster, v1beta1.PGBackRestRepo{},
26192620
"",
26202621
nil, nil,
@@ -2628,14 +2629,14 @@ volumes:
26282629
t.Run("Undefined", func(t *testing.T) {
26292630
cluster.Spec.Backups.PGBackRest.Jobs = nil
26302631

2631-
spec := generateBackupJobSpecIntent(
2632+
spec := generateBackupJobSpecIntent(ctx,
26322633
cluster, v1beta1.PGBackRestRepo{}, "", nil, nil,
26332634
)
26342635
assert.Assert(t, spec.TTLSecondsAfterFinished == nil)
26352636

26362637
cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{}
26372638

2638-
spec = generateBackupJobSpecIntent(
2639+
spec = generateBackupJobSpecIntent(ctx,
26392640
cluster, v1beta1.PGBackRestRepo{}, "", nil, nil,
26402641
)
26412642
assert.Assert(t, spec.TTLSecondsAfterFinished == nil)
@@ -2646,7 +2647,7 @@ volumes:
26462647
TTLSecondsAfterFinished: initialize.Int32(0),
26472648
}
26482649

2649-
spec := generateBackupJobSpecIntent(
2650+
spec := generateBackupJobSpecIntent(ctx,
26502651
cluster, v1beta1.PGBackRestRepo{}, "", nil, nil,
26512652
)
26522653
if assert.Check(t, spec.TTLSecondsAfterFinished != nil) {
@@ -2659,7 +2660,7 @@ volumes:
26592660
TTLSecondsAfterFinished: initialize.Int32(100),
26602661
}
26612662

2662-
spec := generateBackupJobSpecIntent(
2663+
spec := generateBackupJobSpecIntent(ctx,
26632664
cluster, v1beta1.PGBackRestRepo{}, "", nil, nil,
26642665
)
26652666
if assert.Check(t, spec.TTLSecondsAfterFinished != nil) {

0 commit comments

Comments
 (0)