Skip to content

Commit 36b69f6

Browse files
sunzhaohua2openshift-cherrypick-robot
authored and
openshift-cherrypick-robot
committedApr 1, 2025·
Fix the failures in qe ci jobs
1 parent 2b89bea commit 36b69f6

File tree

5 files changed

+81
-12
lines changed

5 files changed

+81
-12
lines changed
 

‎hack/ci-integration.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ go run ./vendor/github.com/onsi/ginkgo/v2/ginkgo \
77
-v \
88
--timeout=115m \
99
--grace-period=5m \
10-
--fail-fast \
10+
--fail-fast=false \
1111
--no-color \
1212
--junit-report="junit_cluster_api_actuator_pkg_e2e.xml" \
1313
--output-dir="${OUTPUT_DIR}" \

‎pkg/framework/framework.go

+17
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@ import (
66
"fmt"
77
"os"
88
"path/filepath"
9+
"strings"
910
"time"
1011

12+
. "github.com/onsi/gomega"
1113
configv1 "github.com/openshift/api/config/v1"
1214
cov1helpers "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers"
1315
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
@@ -271,3 +273,18 @@ func NewGatherer() (*gatherer.StateGatherer, error) {
271273

272274
return gatherer.NewStateGatherer(context.Background(), cli, time.Now()), nil
273275
}
276+
277+
// IsCustomerVPC check if cluster is customer vpc cluster.
278+
func IsCustomerVPC(oc *gatherer.CLI) bool {
279+
installConfig, err := oc.WithoutNamespace().Run("get").Args("cm", "cluster-config-v1", "-n", "kube-system", "-o=jsonpath={.data.install-config}").Output()
280+
Expect(err).NotTo(HaveOccurred(), "Failed to get install-config")
281+
282+
switch platform {
283+
case configv1.AWSPlatformType:
284+
return strings.Contains(installConfig, "subnets:")
285+
case configv1.AzurePlatformType:
286+
return strings.Contains(installConfig, "virtualNetwork:")
287+
default:
288+
return false
289+
}
290+
}

‎pkg/framework/machinesets.go

+23-7
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func BuildPerArchMachineSetParamsList(ctx context.Context, client runtimeclient.
7474
var params MachineSetParams
7575

7676
for _, worker := range workers {
77-
if arch, err = getArchitectureFromMachineSetNodes(ctx, client, worker); err != nil {
77+
if arch, err = GetArchitectureFromMachineSetNodes(ctx, client, worker); err != nil {
7878
klog.Warningf("unable to get the architecture for the machine set %s: %v", worker.Name, err)
7979
continue
8080
}
@@ -176,7 +176,7 @@ func CreateMachineSet(c runtimeclient.Client, params MachineSetParams) (*machine
176176
}
177177

178178
// BuildMachineSetParamsList creates a list of MachineSetParams based on the given machineSetParams with modified instance type.
179-
func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platform configv1.PlatformType) ([]MachineSetParams, error) {
179+
func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platform configv1.PlatformType, arch string) ([]MachineSetParams, error) {
180180
baseMachineSetParams := machineSetParams
181181
baseProviderSpec := baseMachineSetParams.ProviderSpec.DeepCopy()
182182

@@ -185,7 +185,15 @@ func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platfor
185185
switch platform {
186186
case configv1.AWSPlatformType:
187187
// Using cheapest compute optimized instances that meet openshift minimum requirements (4 vCPU, 8GiB RAM)
188-
alternativeInstanceTypes := []string{"c5.xlarge", "c5a.xlarge", "m5.xlarge"}
188+
var alternativeInstanceTypes []string
189+
190+
switch arch {
191+
case "arm64":
192+
alternativeInstanceTypes = []string{"m6g.large", "t4g.nano", "t4g.micro", "m6gd.xlarge"}
193+
default:
194+
alternativeInstanceTypes = []string{"c5.xlarge", "c5a.xlarge", "m5.xlarge"}
195+
}
196+
189197
for _, instanceType := range alternativeInstanceTypes {
190198
updatedProviderSpec, err := updateProviderSpecAWSInstanceType(baseProviderSpec, instanceType)
191199
if err != nil {
@@ -196,7 +204,15 @@ func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platfor
196204
output = append(output, baseMachineSetParams)
197205
}
198206
case configv1.AzurePlatformType:
199-
alternativeVMSizes := []string{"Standard_F4s_v2", "Standard_D4as_v5", "Standard_D4as_v4"}
207+
var alternativeVMSizes []string
208+
209+
switch arch {
210+
case "arm64":
211+
alternativeVMSizes = []string{"Standard_D2ps_v5", "Standard_D3ps_v5", "Standard_D4ps_v5"}
212+
default:
213+
alternativeVMSizes = []string{"Standard_F4s_v2", "Standard_D4as_v5", "Standard_D4as_v4"}
214+
}
215+
200216
for _, VMSize := range alternativeVMSizes {
201217
updatedProviderSpec, err := updateProviderSpecAzureVMSize(baseProviderSpec, VMSize)
202218
if err != nil {
@@ -334,13 +350,13 @@ func GetWorkerMachineSets(ctx context.Context, client runtimeclient.Client) ([]*
334350
return result, nil
335351
}
336352

337-
// getArchitectureFromMachineSetNodes returns the architecture of the nodes controlled by the given machineSet's machines.
338-
func getArchitectureFromMachineSetNodes(ctx context.Context, client runtimeclient.Client, machineSet *machinev1.MachineSet) (string, error) {
353+
// GetArchitectureFromMachineSetNodes returns the architecture of the nodes controlled by the given machineSet's machines.
354+
func GetArchitectureFromMachineSetNodes(ctx context.Context, client runtimeclient.Client, machineSet *machinev1.MachineSet) (string, error) {
339355
nodes, err := GetNodesFromMachineSet(ctx, client, machineSet)
340356
if err != nil || len(nodes) == 0 {
341357
klog.Warningf("error getting the machineSet's nodes or no nodes associated with %s. Using the capacity annotation", machineSet.Name)
342358

343-
for _, kv := range strings.Split(machineSet.Labels[labelsKey], ",") {
359+
for _, kv := range strings.Split(machineSet.Annotations[labelsKey], ",") {
344360
if strings.Contains(kv, "kubernetes.io/arch") {
345361
return strings.Split(kv, "=")[1], nil
346362
}

‎pkg/infra/spot.go

+16-4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ var _ = Describe("Running on Spot", framework.LabelMAPI, framework.LabelDisrupti
4444
var client runtimeclient.Client
4545
var machineSet *machinev1.MachineSet
4646
var platform configv1.PlatformType
47-
47+
var arch string
4848
var delObjects map[string]runtimeclient.Object
4949

5050
var gatherer *gatherer.StateGatherer
@@ -75,11 +75,22 @@ var _ = Describe("Running on Spot", framework.LabelMAPI, framework.LabelDisrupti
7575
default:
7676
Skip(fmt.Sprintf("Platform %s does not support Spot, skipping.", platform))
7777
}
78+
oc, _ := framework.NewCLI()
79+
if framework.IsCustomerVPC(oc) {
80+
//The termination-simulator will hit network error on customer vpc cluster, cannot mark the node as terminating, skip for now.
81+
Skip("Skip this test on customer vpc cluster.")
82+
}
7883

7984
By("Creating a Spot backed MachineSet", func() {
8085
machineSetReady := false
8186
machineSetParams := framework.BuildMachineSetParams(ctx, client, machinesCount)
82-
machineSetParamsList, err := framework.BuildAlternativeMachineSetParams(machineSetParams, platform)
87+
88+
workers, err := framework.GetWorkerMachineSets(ctx, client)
89+
Expect(err).ToNot(HaveOccurred(), "listing Worker MachineSets should not error.")
90+
91+
arch, err = framework.GetArchitectureFromMachineSetNodes(ctx, client, workers[0])
92+
Expect(err).NotTo(HaveOccurred(), "unable to get the architecture for the machine set")
93+
machineSetParamsList, err := framework.BuildAlternativeMachineSetParams(machineSetParams, platform, arch)
8394
Expect(err).ToNot(HaveOccurred(), "Should be able to build list of MachineSet parameters")
8495
for i, machineSetParams := range machineSetParamsList {
8596
if i >= spotMachineSetMaxProvisioningRetryCount {
@@ -373,8 +384,9 @@ func getMetadataMockDeployment(platform configv1.PlatformType) *appsv1.Deploymen
373384
Spec: corev1.PodSpec{
374385
Containers: []corev1.Container{
375386
{
376-
Name: "metadata-mock",
377-
Image: "golang:1.14",
387+
Name: "metadata-mock",
388+
// This is a golang:1.22 image which is mirrored in https://quay.io/repository/openshifttest/golang, so that disconnected cluster can access.
389+
Image: "quay.io/openshifttest/golang@sha256:8f1c43387f0a107535906c7ee918a9d46079cc7be5e80a18424e8558d8afc702",
378390
Command: []string{"/usr/local/go/bin/go"},
379391
Args: []string{
380392
"run",

‎pkg/infra/webhooks.go

+24
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1313
"k8s.io/apimachinery/pkg/runtime"
1414
"k8s.io/apimachinery/pkg/util/intstr"
15+
"k8s.io/klog"
1516
runtimeclient "sigs.k8s.io/controller-runtime/pkg/client"
1617

1718
configv1 "github.com/openshift/api/config/v1"
@@ -109,6 +110,21 @@ var _ = Describe("Webhooks", framework.LabelMAPI, framework.LabelDisruptive, fun
109110
if err != nil {
110111
return err
111112
}
113+
114+
failed := framework.FilterMachines([]*machinev1beta1.Machine{m}, framework.MachinePhaseFailed)
115+
if len(failed) > 0 {
116+
reason := "failureReason not present in Machine.status"
117+
if m.Status.ErrorReason != nil {
118+
reason = string(*m.Status.ErrorReason)
119+
}
120+
message := "failureMessage not present in Machine.status"
121+
if m.Status.ErrorMessage != nil {
122+
message = *m.Status.ErrorMessage
123+
}
124+
klog.Errorf("Failed machine: %s, Reason: %s, Message: %s", m.Name, reason, message)
125+
}
126+
Expect(len(failed)).To(Equal(0), "zero machines should be in a Failed phase")
127+
112128
running := framework.FilterRunningMachines([]*machinev1beta1.Machine{m})
113129
if len(running) == 0 {
114130
return fmt.Errorf("machine not yet running")
@@ -249,6 +265,9 @@ func minimalAzureProviderSpec(ps *machinev1beta1.ProviderSpec) (*machinev1beta1.
249265
OSDisk: machinev1beta1.OSDisk{
250266
DiskSizeGB: fullProviderSpec.OSDisk.DiskSizeGB,
251267
},
268+
Vnet: fullProviderSpec.Vnet,
269+
Subnet: fullProviderSpec.Subnet,
270+
NetworkResourceGroup: fullProviderSpec.NetworkResourceGroup,
252271
},
253272
},
254273
}, nil
@@ -267,6 +286,11 @@ func minimalGCPProviderSpec(ps *machinev1beta1.ProviderSpec) (*machinev1beta1.Pr
267286
Region: fullProviderSpec.Region,
268287
Zone: fullProviderSpec.Zone,
269288
ServiceAccounts: fullProviderSpec.ServiceAccounts,
289+
NetworkInterfaces: []*machinev1beta1.GCPNetworkInterface{{
290+
Network: fullProviderSpec.NetworkInterfaces[0].Network,
291+
Subnetwork: fullProviderSpec.NetworkInterfaces[0].Subnetwork,
292+
ProjectID: fullProviderSpec.NetworkInterfaces[0].ProjectID,
293+
}},
270294
},
271295
},
272296
}, nil

0 commit comments

Comments
 (0)
Please sign in to comment.