Skip to content

OADP-5782: Add hypershift-oadp-plugin E2E tests #1684

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,7 @@ VELERO_INSTANCE_NAME ?= velero-test
ARTIFACT_DIR ?= /tmp
HCO_UPSTREAM ?= false
TEST_VIRT ?= false
TEST_HCP ?= false
TEST_UPGRADE ?= false
TEST_FILTER = (($(shell echo '! aws && ! gcp && ! azure && ! ibmcloud' | \
$(SED) -r "s/[&]* [!] $(CLUSTER_TYPE)|[!] $(CLUSTER_TYPE) [&]*//")) || $(CLUSTER_TYPE))
Expand All @@ -573,6 +574,11 @@ ifeq ($(TEST_UPGRADE),true)
else
TEST_FILTER += && (! upgrade)
endif
ifeq ($(TEST_HCP),true)
TEST_FILTER += && (hcp)
else
TEST_FILTER += && (! hcp)
endif

.PHONY: test-e2e
test-e2e: test-e2e-setup install-ginkgo ## Run E2E tests against OADP operator installed in cluster. For more information, check docs/developer/testing/TESTING.md
Expand Down
3 changes: 2 additions & 1 deletion bundle/manifests/oadp-operator.clusterserviceversion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ metadata:
"defaultPlugins": [
"openshift",
"aws",
"kubevirt"
"kubevirt",
"hypershift"
],
"disableFsBackup": false
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ spec:
- openshift
- aws
- kubevirt
- hypershift
nodeAgent:
enable: true
uploaderType: kopia
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/onsi/ginkgo/v2 v2.19.0
github.com/onsi/gomega v1.33.1
github.com/openshift/api v0.0.0-20240524162738-d899f8877d22 // release-4.12
github.com/openshift/hypershift/api v0.0.0-20241128081537-8326d865eaf5
github.com/operator-framework/api v0.10.7
github.com/operator-framework/operator-lib v0.9.0
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.51.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,8 @@ github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
github.com/openshift/api v0.0.0-20240524162738-d899f8877d22 h1:AW8KUN4k7qR2egrCCe3x95URHQ3N188+a/b0qpRyAHg=
github.com/openshift/api v0.0.0-20240524162738-d899f8877d22/go.mod h1:7Hm1kLJGxWT6eysOpD2zUztdn+w91eiERn6KtI5o9aw=
github.com/openshift/hypershift/api v0.0.0-20241128081537-8326d865eaf5 h1:z8AkPjlJ/CPqED/EPtlgQKYEt8+Edc30ZR8eQWOEigA=
github.com/openshift/hypershift/api v0.0.0-20241128081537-8326d865eaf5/go.mod h1:3UlUlywmXBCEMF3GACTvMAOvv2lU5qzUDvTYFXeGbKU=
github.com/openshift/velero v0.10.2-0.20250313160323-584cf1148a74 h1:ZHO0O6g1Enel2O4rAk7VfWLHlQKYkOcdWGAmoiZ3fQw=
github.com/openshift/velero v0.10.2-0.20250313160323-584cf1148a74/go.mod h1:sASoDB9pLWqvIi1nD1ZFOpmj5JB+p10lHVm+f+Hp1oU=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
Expand Down
30 changes: 6 additions & 24 deletions tests/e2e/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,17 @@ import (
"testing"
"time"

volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
openshiftappsv1 "github.com/openshift/api/apps/v1"
openshiftbuildv1 "github.com/openshift/api/build/v1"
openshiftroutev1 "github.com/openshift/api/route/v1"
openshiftsecurityv1 "github.com/openshift/api/security/v1"
openshifttemplatev1 "github.com/openshift/api/template/v1"
operatorsv1 "github.com/operator-framework/api/pkg/operators/v1"
operatorsv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

oadpv1alpha1 "github.com/openshift/oadp-operator/api/v1alpha1"
"github.com/openshift/oadp-operator/tests/e2e/lib"
)

Expand Down Expand Up @@ -136,21 +127,9 @@ func TestOADPE2E(t *testing.T) {
kubernetesClientForSuiteRun, err = kubernetes.NewForConfig(kubeConfig)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

runTimeClientForSuiteRun, err = client.New(kubeConfig, client.Options{})
runTimeClientForSuiteRun, err = client.New(kubeConfig, client.Options{Scheme: lib.Scheme})
gomega.Expect(err).NotTo(gomega.HaveOccurred())

oadpv1alpha1.AddToScheme(runTimeClientForSuiteRun.Scheme())
velerov1.AddToScheme(runTimeClientForSuiteRun.Scheme())
openshiftappsv1.AddToScheme(runTimeClientForSuiteRun.Scheme())
openshiftbuildv1.AddToScheme(runTimeClientForSuiteRun.Scheme())
openshiftsecurityv1.AddToScheme(runTimeClientForSuiteRun.Scheme())
openshifttemplatev1.AddToScheme(runTimeClientForSuiteRun.Scheme())
openshiftroutev1.AddToScheme(runTimeClientForSuiteRun.Scheme())
corev1.AddToScheme(runTimeClientForSuiteRun.Scheme())
volumesnapshotv1.AddToScheme(runTimeClientForSuiteRun.Scheme())
operatorsv1alpha1.AddToScheme(runTimeClientForSuiteRun.Scheme())
operatorsv1.AddToScheme(runTimeClientForSuiteRun.Scheme())

dynamicClientForSuiteRun, err = dynamic.NewForConfig(kubeConfig)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

Expand Down Expand Up @@ -186,6 +165,9 @@ func TestOADPE2E(t *testing.T) {
}

var _ = ginkgo.BeforeSuite(func() {
// Initialize controller-runtime logger
ctrl.SetLogger(zap.New(zap.UseDevMode(true)))

// TODO create logger (hh:mm:ss message) to be used by all functions
log.Printf("Creating Secrets")
bslCredFileData, err := lib.ReadFile(bslCredFile)
Expand Down
236 changes: 236 additions & 0 deletions tests/e2e/hcp_backup_restore_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
package e2e_test

import (
"context"
"fmt"
"log"
"time"

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"

"github.com/openshift/oadp-operator/tests/e2e/lib"
libhcp "github.com/openshift/oadp-operator/tests/e2e/lib/hcp"
)

type HCPBackupRestoreCase struct {
BackupRestoreCase
Template string
Provider string
}

func runHCPBackupAndRestore(brCase HCPBackupRestoreCase, updateLastBRcase func(brCase HCPBackupRestoreCase), h *libhcp.HCHandler) {
updateLastBRcase(brCase)

log.Printf("Preparing backup and restore")
backupName, restoreName := prepareBackupAndRestore(brCase.BackupRestoreCase, func() {})

err := h.AddHCPPluginToDPA(dpaCR.Namespace, dpaCR.Name, false)
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to add HCP plugin to DPA: %v", err)
// TODO: move the wait for HC just after the DPA modification to allow reconciliation to go ahead without waiting for the HC to be created

//Wait for HCP plugin to be added
gomega.Eventually(libhcp.IsHCPPluginAdded(h.Client, dpaCR.Namespace, dpaCR.Name), 3*time.Minute, 1*time.Second).Should(gomega.BeTrue())

// Create the HostedCluster for the test
h.HCPNamespace = libhcp.GetHCPNamespace(brCase.BackupRestoreCase.Name, libhcp.ClustersNamespace)
h.HostedCluster, err = h.DeployHCManifest(brCase.Template, brCase.Provider, brCase.BackupRestoreCase.Name)
gomega.Expect(err).ToNot(gomega.HaveOccurred())

if brCase.PreBackupVerify != nil {
err := brCase.PreBackupVerify(runTimeClientForSuiteRun, brCase.Namespace)
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to run HCP pre-backup verification: %v", err)
}

// Backup HCP & HC
log.Printf("Backing up HC")
includedResources := libhcp.HCPIncludedResources
excludedResources := libhcp.HCPExcludedResources
includedNamespaces := append(libhcp.HCPIncludedNamespaces, libhcp.GetHCPNamespace(h.HostedCluster.Name, libhcp.ClustersNamespace))

nsRequiresResticDCWorkaround := runHCPBackup(brCase.BackupRestoreCase, backupName, h, includedNamespaces, includedResources, excludedResources)

// Delete everything in HCP namespace
log.Printf("Deleting HCP & HC")
err = h.RemoveHCP(libhcp.Wait10Min)
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to remove HCP: %v", err)

// Restore HC
log.Printf("Restoring HC")
runHCPRestore(brCase.BackupRestoreCase, backupName, restoreName, nsRequiresResticDCWorkaround)

// Wait for HCP to be restored
log.Printf("Validating HC")
err = libhcp.ValidateHCP(libhcp.ValidateHCPTimeout, libhcp.Wait10Min, []string{}, h.HCPNamespace)(h.Client, libhcp.ClustersNamespace)
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to run HCP post-restore verification: %v", err)
}

var _ = ginkgo.Describe("HCP Backup and Restore tests", ginkgo.Ordered, func() {
var (
lastInstallTime time.Time
lastBRCase HCPBackupRestoreCase
h *libhcp.HCHandler
err error
ctx = context.Background()
)

updateLastBRcase := func(brCase HCPBackupRestoreCase) {
lastBRCase = brCase
}

// Before All
var _ = ginkgo.BeforeAll(func() {
reqOperators := []libhcp.RequiredOperator{
{
Name: libhcp.MCEName,
Namespace: libhcp.MCENamespace,
OperatorGroup: libhcp.MCEOperatorGroup,
},
}

// Install MCE and Hypershift operators
h, err = libhcp.InstallRequiredOperators(ctx, runTimeClientForSuiteRun, reqOperators)
gomega.Expect(err).ToNot(gomega.HaveOccurred())
gomega.Expect(h).ToNot(gomega.BeNil())
gomega.Eventually(lib.IsDeploymentReady(h.Client, libhcp.MCENamespace, libhcp.MCEOperatorName), libhcp.Wait10Min, time.Second*5).Should(gomega.BeTrue())

// Deploy the MCE manifest
err = h.DeployMCEManifest()
gomega.Expect(err).ToNot(gomega.HaveOccurred())

// Deploy the MCE and wait for it to be ready
gomega.Eventually(lib.IsDeploymentReady(h.Client, libhcp.MCENamespace, libhcp.MCEOperatorName), libhcp.Wait10Min, time.Second*5).Should(gomega.BeTrue())
gomega.Expect(err).ToNot(gomega.HaveOccurred())

// Validate the Hypershift operator
gomega.Eventually(lib.IsDeploymentReady(h.Client, libhcp.HONamespace, libhcp.HypershiftOperatorName), libhcp.Wait10Min, time.Second*5).Should(gomega.BeTrue())
gomega.Expect(err).ToNot(gomega.HaveOccurred())
})

// After All
var _ = ginkgo.AfterAll(func() {
err := h.RemoveHCP(libhcp.Wait10Min)
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to remove HCP: %v", err)
})

// After Each
var _ = ginkgo.AfterEach(func(ctx ginkgo.SpecContext) {
h.RemoveHCP(libhcp.Wait10Min)
tearDownBackupAndRestore(lastBRCase.BackupRestoreCase, lastInstallTime, ctx.SpecReport())
})

ginkgo.DescribeTable("Basic HCP backup and restore test",
func(brCase HCPBackupRestoreCase, expectedErr error) {
if ginkgo.CurrentSpecReport().NumAttempts > 1 && !knownFlake {
ginkgo.Fail("No known FLAKE found in a previous run, marking test as failed.")
}
runHCPBackupAndRestore(brCase, updateLastBRcase, h)
},

// Test Cases
ginkgo.Entry("None HostedCluster backup and restore", ginkgo.Label("hcp"), HCPBackupRestoreCase{
Template: libhcp.HCPNoneManifest,
Provider: "None",
BackupRestoreCase: BackupRestoreCase{
Namespace: libhcp.GetHCPNamespace(fmt.Sprintf("%s-none", libhcp.HostedClusterPrefix), libhcp.ClustersNamespace),
Name: fmt.Sprintf("%s-none", libhcp.HostedClusterPrefix),
BackupRestoreType: lib.CSIDataMover,
PreBackupVerify: libhcp.ValidateHCP(libhcp.ValidateHCPTimeout, libhcp.Wait10Min, []string{}, libhcp.GetHCPNamespace(fmt.Sprintf("%s-none", libhcp.HostedClusterPrefix), libhcp.ClustersNamespace)),
PostRestoreVerify: libhcp.ValidateHCP(libhcp.ValidateHCPTimeout, libhcp.Wait10Min, []string{}, libhcp.GetHCPNamespace(fmt.Sprintf("%s-none", libhcp.HostedClusterPrefix), libhcp.ClustersNamespace)),
BackupTimeout: libhcp.HCPBackupTimeout,
},
}, nil),

ginkgo.Entry("Agent HostedCluster backup and restore", ginkgo.Label("hcp"), HCPBackupRestoreCase{
Template: libhcp.HCPAgentManifest,
Provider: "Agent",
BackupRestoreCase: BackupRestoreCase{
Namespace: libhcp.GetHCPNamespace(fmt.Sprintf("%s-agent", libhcp.HostedClusterPrefix), libhcp.ClustersNamespace),
Name: fmt.Sprintf("%s-agent", libhcp.HostedClusterPrefix),
BackupRestoreType: lib.CSIDataMover,
PreBackupVerify: libhcp.ValidateHCP(libhcp.ValidateHCPTimeout, libhcp.Wait10Min, []string{}, libhcp.GetHCPNamespace(fmt.Sprintf("%s-agent", libhcp.HostedClusterPrefix), libhcp.ClustersNamespace)),
PostRestoreVerify: libhcp.ValidateHCP(libhcp.ValidateHCPTimeout, libhcp.Wait10Min, []string{}, libhcp.GetHCPNamespace(fmt.Sprintf("%s-agent", libhcp.HostedClusterPrefix), libhcp.ClustersNamespace)),
BackupTimeout: libhcp.HCPBackupTimeout,
},
}, nil),
)
})

// TODO: Modify the runBackup function to inject the filtered error logs to avoid repeating code with this
func runHCPBackup(brCase BackupRestoreCase, backupName string, h *libhcp.HCHandler, namespaces []string, includedResources, excludedResources []string) bool {
nsRequiresResticDCWorkaround, err := lib.NamespaceRequiresResticDCWorkaround(h.Client, brCase.Namespace)
gomega.Expect(err).ToNot(gomega.HaveOccurred())

// create backup
log.Printf("Creating backup %s for case %s", backupName, brCase.Name)
err = lib.CreateCustomBackupForNamespaces(h.Client, namespace, backupName, namespaces, includedResources, excludedResources, brCase.BackupRestoreType == lib.RESTIC || brCase.BackupRestoreType == lib.KOPIA, brCase.BackupRestoreType == lib.CSIDataMover)
gomega.Expect(err).ToNot(gomega.HaveOccurred())

// wait for backup to not be running
gomega.Eventually(lib.IsBackupDone(h.Client, namespace, backupName), brCase.BackupTimeout, time.Second*10).Should(gomega.BeTrue())
// TODO only log on fail?
describeBackup := lib.DescribeBackup(h.Client, namespace, backupName)
ginkgo.GinkgoWriter.Println(describeBackup)

backupLogs := lib.BackupLogs(kubernetesClientForSuiteRun, h.Client, namespace, backupName)
backupErrorLogs := lib.BackupErrorLogs(kubernetesClientForSuiteRun, h.Client, namespace, backupName)
accumulatedTestLogs = append(accumulatedTestLogs, describeBackup, backupLogs)

// Check error logs for non-relevant errors
filteredBackupErrorLogs := libhcp.FilterErrorLogs(backupErrorLogs)

if !brCase.SkipVerifyLogs {
gomega.Expect(filteredBackupErrorLogs).Should(gomega.Equal([]string{}))
}

// check if backup succeeded
succeeded, err := lib.IsBackupCompletedSuccessfully(kubernetesClientForSuiteRun, h.Client, namespace, backupName)
gomega.Expect(err).ToNot(gomega.HaveOccurred())
gomega.Expect(succeeded).To(gomega.Equal(true))
log.Printf("Backup for case %s succeeded", brCase.Name)

if brCase.BackupRestoreType == lib.CSI {
// wait for volume snapshot to be Ready
gomega.Eventually(lib.AreVolumeSnapshotsReady(h.Client, backupName), time.Minute*4, time.Second*10).Should(gomega.BeTrue())
}

return nsRequiresResticDCWorkaround
}

// TODO: Modify the runRestore function to inject the filtered error logs to avoid repeating code with this
func runHCPRestore(brCase BackupRestoreCase, backupName string, restoreName string, nsRequiresResticDCWorkaround bool) {
log.Printf("Creating restore %s for case %s", restoreName, brCase.Name)
err := lib.CreateRestoreFromBackup(dpaCR.Client, namespace, backupName, restoreName)
gomega.Expect(err).ToNot(gomega.HaveOccurred())
gomega.Eventually(lib.IsRestoreDone(dpaCR.Client, namespace, restoreName), time.Minute*60, time.Second*10).Should(gomega.BeTrue())
// TODO only log on fail?
describeRestore := lib.DescribeRestore(dpaCR.Client, namespace, restoreName)
ginkgo.GinkgoWriter.Println(describeRestore)

restoreLogs := lib.RestoreLogs(kubernetesClientForSuiteRun, dpaCR.Client, namespace, restoreName)
restoreErrorLogs := lib.RestoreErrorLogs(kubernetesClientForSuiteRun, dpaCR.Client, namespace, restoreName)
accumulatedTestLogs = append(accumulatedTestLogs, describeRestore, restoreLogs)

// Check error logs for non-relevant errors
filteredRestoreErrorLogs := libhcp.FilterErrorLogs(restoreErrorLogs)

if !brCase.SkipVerifyLogs {
gomega.Expect(filteredRestoreErrorLogs).Should(gomega.Equal([]string{}))
}

// Check if restore succeeded
succeeded, err := lib.IsRestoreCompletedSuccessfully(kubernetesClientForSuiteRun, dpaCR.Client, namespace, restoreName)
gomega.Expect(err).ToNot(gomega.HaveOccurred())
gomega.Expect(succeeded).To(gomega.Equal(true))

if nsRequiresResticDCWorkaround {
// We run the dc-post-restore.sh script for both restic and
// kopia backups and for any DCs with attached volumes,
// regardless of whether it was restic or kopia backup.
// The script is designed to work with labels set by the
// openshift-velero-plugin and can be run without pre-conditions.
log.Printf("Running dc-post-restore.sh script.")
err = lib.RunDcPostRestoreScript(restoreName)
gomega.Expect(err).ToNot(gomega.HaveOccurred())
}
}
25 changes: 25 additions & 0 deletions tests/e2e/lib/apps.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ func IsDeploymentReady(ocClient client.Client, namespace, dName string) wait.Con
if err != nil {
return false, err
}
log.Printf("Deployment %s status: %v", dName, deployment.Status)
if deployment.Status.AvailableReplicas != deployment.Status.Replicas || deployment.Status.Replicas == 0 {
for _, condition := range deployment.Status.Conditions {
if len(condition.Message) > 0 {
Expand All @@ -297,6 +298,30 @@ func IsDeploymentReady(ocClient client.Client, namespace, dName string) wait.Con
}
}

// IsStatefulSetReady checks if a StatefulSet is ready
func IsStatefulSetReady(ocClient client.Client, namespace, name string) wait.ConditionFunc {
return func() (bool, error) {
sts := &appsv1.StatefulSet{}
err := ocClient.Get(context.Background(), client.ObjectKey{
Namespace: namespace,
Name: name,
}, sts)
if err != nil {
return false, err
}
log.Printf("StatefulSet %s status: %v", name, sts.Status)
if sts.Status.ReadyReplicas != sts.Status.Replicas || sts.Status.Replicas == 0 {
for _, condition := range sts.Status.Conditions {
if len(condition.Message) > 0 {
ginkgo.GinkgoWriter.Write([]byte(fmt.Sprintf("statefulset not available with condition: %s\n", condition.Message)))
}
}
return false, errors.New("statefulset is not in a ready state")
}
return true, nil
}
}

func AreApplicationPodsRunning(c *kubernetes.Clientset, namespace string) wait.ConditionFunc {
return func() (bool, error) {
podList, err := GetAllPodsWithLabel(c, namespace, e2eAppLabel)
Expand Down
Loading