Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-4.15] NO-JIRA:Fix the failures in qe ci jobs #389

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hack/ci-integration.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ go run ./vendor/github.com/onsi/ginkgo/v2/ginkgo \
-v \
--timeout=115m \
--grace-period=5m \
--fail-fast \
--fail-fast=false \
--no-color \
--junit-report="junit_cluster_api_actuator_pkg_e2e.xml" \
--output-dir="${OUTPUT_DIR}" \
Expand Down
17 changes: 17 additions & 0 deletions pkg/framework/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"time"

. "github.com/onsi/gomega"
configv1 "github.com/openshift/api/config/v1"
cov1helpers "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers"
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
Expand Down Expand Up @@ -271,3 +273,18 @@ func NewGatherer() (*gatherer.StateGatherer, error) {

return gatherer.NewStateGatherer(context.Background(), cli, time.Now()), nil
}

// IsCustomerVPC check if cluster is customer vpc cluster.
func IsCustomerVPC(oc *gatherer.CLI) bool {
installConfig, err := oc.WithoutNamespace().Run("get").Args("cm", "cluster-config-v1", "-n", "kube-system", "-o=jsonpath={.data.install-config}").Output()
Expect(err).NotTo(HaveOccurred(), "Failed to get install-config")

switch platform {
case configv1.AWSPlatformType:
return strings.Contains(installConfig, "subnets:")
case configv1.AzurePlatformType:
return strings.Contains(installConfig, "virtualNetwork:")
default:
return false
}
}
30 changes: 23 additions & 7 deletions pkg/framework/machinesets.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func BuildPerArchMachineSetParamsList(ctx context.Context, client runtimeclient.
var params MachineSetParams

for _, worker := range workers {
if arch, err = getArchitectureFromMachineSetNodes(ctx, client, worker); err != nil {
if arch, err = GetArchitectureFromMachineSetNodes(ctx, client, worker); err != nil {
klog.Warningf("unable to get the architecture for the machine set %s: %v", worker.Name, err)
continue
}
Expand Down Expand Up @@ -176,7 +176,7 @@ func CreateMachineSet(c runtimeclient.Client, params MachineSetParams) (*machine
}

// BuildMachineSetParamsList creates a list of MachineSetParams based on the given machineSetParams with modified instance type.
func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platform configv1.PlatformType) ([]MachineSetParams, error) {
func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platform configv1.PlatformType, arch string) ([]MachineSetParams, error) {
baseMachineSetParams := machineSetParams
baseProviderSpec := baseMachineSetParams.ProviderSpec.DeepCopy()

Expand All @@ -185,7 +185,15 @@ func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platfor
switch platform {
case configv1.AWSPlatformType:
// Using cheapest compute optimized instances that meet openshift minimum requirements (4 vCPU, 8GiB RAM)
alternativeInstanceTypes := []string{"c5.xlarge", "c5a.xlarge", "m5.xlarge"}
var alternativeInstanceTypes []string

switch arch {
case "arm64":
alternativeInstanceTypes = []string{"m6g.large", "t4g.nano", "t4g.micro", "m6gd.xlarge"}
default:
alternativeInstanceTypes = []string{"c5.xlarge", "c5a.xlarge", "m5.xlarge"}
}

for _, instanceType := range alternativeInstanceTypes {
updatedProviderSpec, err := updateProviderSpecAWSInstanceType(baseProviderSpec, instanceType)
if err != nil {
Expand All @@ -196,7 +204,15 @@ func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platfor
output = append(output, baseMachineSetParams)
}
case configv1.AzurePlatformType:
alternativeVMSizes := []string{"Standard_F4s_v2", "Standard_D4as_v5", "Standard_D4as_v4"}
var alternativeVMSizes []string

switch arch {
case "arm64":
alternativeVMSizes = []string{"Standard_D2ps_v5", "Standard_D3ps_v5", "Standard_D4ps_v5"}
default:
alternativeVMSizes = []string{"Standard_F4s_v2", "Standard_D4as_v5", "Standard_D4as_v4"}
}

for _, VMSize := range alternativeVMSizes {
updatedProviderSpec, err := updateProviderSpecAzureVMSize(baseProviderSpec, VMSize)
if err != nil {
Expand Down Expand Up @@ -334,13 +350,13 @@ func GetWorkerMachineSets(ctx context.Context, client runtimeclient.Client) ([]*
return result, nil
}

// getArchitectureFromMachineSetNodes returns the architecture of the nodes controlled by the given machineSet's machines.
func getArchitectureFromMachineSetNodes(ctx context.Context, client runtimeclient.Client, machineSet *machinev1.MachineSet) (string, error) {
// GetArchitectureFromMachineSetNodes returns the architecture of the nodes controlled by the given machineSet's machines.
func GetArchitectureFromMachineSetNodes(ctx context.Context, client runtimeclient.Client, machineSet *machinev1.MachineSet) (string, error) {
nodes, err := GetNodesFromMachineSet(ctx, client, machineSet)
if err != nil || len(nodes) == 0 {
klog.Warningf("error getting the machineSet's nodes or no nodes associated with %s. Using the capacity annotation", machineSet.Name)

for _, kv := range strings.Split(machineSet.Labels[labelsKey], ",") {
for _, kv := range strings.Split(machineSet.Annotations[labelsKey], ",") {
if strings.Contains(kv, "kubernetes.io/arch") {
return strings.Split(kv, "=")[1], nil
}
Expand Down
20 changes: 16 additions & 4 deletions pkg/infra/spot.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ var _ = Describe("Running on Spot", framework.LabelMAPI, framework.LabelDisrupti
var client runtimeclient.Client
var machineSet *machinev1.MachineSet
var platform configv1.PlatformType

var arch string
var delObjects map[string]runtimeclient.Object

var gatherer *gatherer.StateGatherer
Expand Down Expand Up @@ -97,11 +97,22 @@ var _ = Describe("Running on Spot", framework.LabelMAPI, framework.LabelDisrupti
default:
Skip(fmt.Sprintf("Platform %s does not support Spot, skipping.", platform))
}
oc, _ := framework.NewCLI()
if framework.IsCustomerVPC(oc) {
//The termination-simulator will hit network error on customer vpc cluster, cannot mark the node as terminating, skip for now.
Skip("Skip this test on customer vpc cluster.")
}

By("Creating a Spot backed MachineSet", func() {
machineSetReady := false
machineSetParams := framework.BuildMachineSetParams(ctx, client, machinesCount)
machineSetParamsList, err := framework.BuildAlternativeMachineSetParams(machineSetParams, platform)

workers, err := framework.GetWorkerMachineSets(ctx, client)
Expect(err).ToNot(HaveOccurred(), "listing Worker MachineSets should not error.")

arch, err = framework.GetArchitectureFromMachineSetNodes(ctx, client, workers[0])
Expect(err).NotTo(HaveOccurred(), "unable to get the architecture for the machine set")
machineSetParamsList, err := framework.BuildAlternativeMachineSetParams(machineSetParams, platform, arch)
Expect(err).ToNot(HaveOccurred(), "Should be able to build list of MachineSet parameters")
for i, machineSetParams := range machineSetParamsList {
if i >= spotMachineSetMaxProvisioningRetryCount {
Expand Down Expand Up @@ -376,8 +387,9 @@ func getMetadataMockDeployment(platform configv1.PlatformType) *appsv1.Deploymen
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "metadata-mock",
Image: "golang:1.14",
Name: "metadata-mock",
// This is a golang:1.22 image which is mirrored in https://quay.io/repository/openshifttest/golang, so that disconnected cluster can access.
Image: "quay.io/openshifttest/golang@sha256:8f1c43387f0a107535906c7ee918a9d46079cc7be5e80a18424e8558d8afc702",
Command: []string{"/usr/local/go/bin/go"},
Args: []string{
"run",
Expand Down
24 changes: 24 additions & 0 deletions pkg/infra/webhooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/klog"
runtimeclient "sigs.k8s.io/controller-runtime/pkg/client"

configv1 "github.com/openshift/api/config/v1"
Expand Down Expand Up @@ -112,6 +113,21 @@ var _ = Describe("Webhooks", framework.LabelMAPI, framework.LabelDisruptive, fun
if err != nil {
return err
}

failed := framework.FilterMachines([]*machinev1beta1.Machine{m}, framework.MachinePhaseFailed)
if len(failed) > 0 {
reason := "failureReason not present in Machine.status"
if m.Status.ErrorReason != nil {
reason = string(*m.Status.ErrorReason)
}
message := "failureMessage not present in Machine.status"
if m.Status.ErrorMessage != nil {
message = *m.Status.ErrorMessage
}
klog.Errorf("Failed machine: %s, Reason: %s, Message: %s", m.Name, reason, message)
}
Expect(len(failed)).To(Equal(0), "zero machines should be in a Failed phase")

running := framework.FilterRunningMachines([]*machinev1beta1.Machine{m})
if len(running) == 0 {
return fmt.Errorf("machine not yet running")
Expand Down Expand Up @@ -252,6 +268,9 @@ func minimalAzureProviderSpec(ps *machinev1beta1.ProviderSpec) (*machinev1beta1.
OSDisk: machinev1beta1.OSDisk{
DiskSizeGB: fullProviderSpec.OSDisk.DiskSizeGB,
},
Vnet: fullProviderSpec.Vnet,
Subnet: fullProviderSpec.Subnet,
NetworkResourceGroup: fullProviderSpec.NetworkResourceGroup,
},
},
}, nil
Expand All @@ -270,6 +289,11 @@ func minimalGCPProviderSpec(ps *machinev1beta1.ProviderSpec) (*machinev1beta1.Pr
Region: fullProviderSpec.Region,
Zone: fullProviderSpec.Zone,
ServiceAccounts: fullProviderSpec.ServiceAccounts,
NetworkInterfaces: []*machinev1beta1.GCPNetworkInterface{{
Network: fullProviderSpec.NetworkInterfaces[0].Network,
Subnetwork: fullProviderSpec.NetworkInterfaces[0].Subnetwork,
ProjectID: fullProviderSpec.NetworkInterfaces[0].ProjectID,
}},
},
},
}, nil
Expand Down