Skip to content

Commit ddbbd8d

Browse files
sunzhaohua2openshift-cherrypick-robot
authored and
openshift-cherrypick-robot
committed
Fix the failures in qe ci jobs
1 parent 61f5a84 commit ddbbd8d

File tree

5 files changed

+81
-12
lines changed

5 files changed

+81
-12
lines changed

hack/ci-integration.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ go run ./vendor/github.com/onsi/ginkgo/v2/ginkgo \
77
-v \
88
--timeout=115m \
99
--grace-period=5m \
10-
--fail-fast \
10+
--fail-fast=false \
1111
--no-color \
1212
--junit-report="junit_cluster_api_actuator_pkg_e2e.xml" \
1313
--output-dir="${OUTPUT_DIR}" \

pkg/framework/framework.go

+17
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@ import (
66
"fmt"
77
"os"
88
"path/filepath"
9+
"strings"
910
"time"
1011

12+
. "github.com/onsi/gomega"
1113
configv1 "github.com/openshift/api/config/v1"
1214
cov1helpers "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers"
1315
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
@@ -271,3 +273,18 @@ func NewGatherer() (*gatherer.StateGatherer, error) {
271273

272274
return gatherer.NewStateGatherer(context.Background(), cli, time.Now()), nil
273275
}
276+
277+
// IsCustomerVPC check if cluster is customer vpc cluster.
278+
func IsCustomerVPC(oc *gatherer.CLI) bool {
279+
installConfig, err := oc.WithoutNamespace().Run("get").Args("cm", "cluster-config-v1", "-n", "kube-system", "-o=jsonpath={.data.install-config}").Output()
280+
Expect(err).NotTo(HaveOccurred(), "Failed to get install-config")
281+
282+
switch platform {
283+
case configv1.AWSPlatformType:
284+
return strings.Contains(installConfig, "subnets:")
285+
case configv1.AzurePlatformType:
286+
return strings.Contains(installConfig, "virtualNetwork:")
287+
default:
288+
return false
289+
}
290+
}

pkg/framework/machinesets.go

+23-7
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func BuildPerArchMachineSetParamsList(ctx context.Context, client runtimeclient.
7474
var params MachineSetParams
7575

7676
for _, worker := range workers {
77-
if arch, err = getArchitectureFromMachineSetNodes(ctx, client, worker); err != nil {
77+
if arch, err = GetArchitectureFromMachineSetNodes(ctx, client, worker); err != nil {
7878
klog.Warningf("unable to get the architecture for the machine set %s: %v", worker.Name, err)
7979
continue
8080
}
@@ -176,7 +176,7 @@ func CreateMachineSet(c runtimeclient.Client, params MachineSetParams) (*machine
176176
}
177177

178178
// BuildMachineSetParamsList creates a list of MachineSetParams based on the given machineSetParams with modified instance type.
179-
func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platform configv1.PlatformType) ([]MachineSetParams, error) {
179+
func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platform configv1.PlatformType, arch string) ([]MachineSetParams, error) {
180180
baseMachineSetParams := machineSetParams
181181
baseProviderSpec := baseMachineSetParams.ProviderSpec.DeepCopy()
182182

@@ -185,7 +185,15 @@ func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platfor
185185
switch platform {
186186
case configv1.AWSPlatformType:
187187
// Using cheapest compute optimized instances that meet openshift minimum requirements (4 vCPU, 8GiB RAM)
188-
alternativeInstanceTypes := []string{"c5.xlarge", "c5a.xlarge", "m5.xlarge"}
188+
var alternativeInstanceTypes []string
189+
190+
switch arch {
191+
case "arm64":
192+
alternativeInstanceTypes = []string{"m6g.large", "t4g.nano", "t4g.micro", "m6gd.xlarge"}
193+
default:
194+
alternativeInstanceTypes = []string{"c5.xlarge", "c5a.xlarge", "m5.xlarge"}
195+
}
196+
189197
for _, instanceType := range alternativeInstanceTypes {
190198
updatedProviderSpec, err := updateProviderSpecAWSInstanceType(baseProviderSpec, instanceType)
191199
if err != nil {
@@ -196,7 +204,15 @@ func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platfor
196204
output = append(output, baseMachineSetParams)
197205
}
198206
case configv1.AzurePlatformType:
199-
alternativeVMSizes := []string{"Standard_F4s_v2", "Standard_D4as_v5", "Standard_D4as_v4"}
207+
var alternativeVMSizes []string
208+
209+
switch arch {
210+
case "arm64":
211+
alternativeVMSizes = []string{"Standard_D2ps_v5", "Standard_D3ps_v5", "Standard_D4ps_v5"}
212+
default:
213+
alternativeVMSizes = []string{"Standard_F4s_v2", "Standard_D4as_v5", "Standard_D4as_v4"}
214+
}
215+
200216
for _, VMSize := range alternativeVMSizes {
201217
updatedProviderSpec, err := updateProviderSpecAzureVMSize(baseProviderSpec, VMSize)
202218
if err != nil {
@@ -334,13 +350,13 @@ func GetWorkerMachineSets(ctx context.Context, client runtimeclient.Client) ([]*
334350
return result, nil
335351
}
336352

337-
// getArchitectureFromMachineSetNodes returns the architecture of the nodes controlled by the given machineSet's machines.
338-
func getArchitectureFromMachineSetNodes(ctx context.Context, client runtimeclient.Client, machineSet *machinev1.MachineSet) (string, error) {
353+
// GetArchitectureFromMachineSetNodes returns the architecture of the nodes controlled by the given machineSet's machines.
354+
func GetArchitectureFromMachineSetNodes(ctx context.Context, client runtimeclient.Client, machineSet *machinev1.MachineSet) (string, error) {
339355
nodes, err := GetNodesFromMachineSet(ctx, client, machineSet)
340356
if err != nil || len(nodes) == 0 {
341357
klog.Warningf("error getting the machineSet's nodes or no nodes associated with %s. Using the capacity annotation", machineSet.Name)
342358

343-
for _, kv := range strings.Split(machineSet.Labels[labelsKey], ",") {
359+
for _, kv := range strings.Split(machineSet.Annotations[labelsKey], ",") {
344360
if strings.Contains(kv, "kubernetes.io/arch") {
345361
return strings.Split(kv, "=")[1], nil
346362
}

pkg/infra/spot.go

+16-4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ var _ = Describe("Running on Spot", framework.LabelMAPI, framework.LabelDisrupti
4444
var client runtimeclient.Client
4545
var machineSet *machinev1.MachineSet
4646
var platform configv1.PlatformType
47-
47+
var arch string
4848
var delObjects map[string]runtimeclient.Object
4949

5050
var gatherer *gatherer.StateGatherer
@@ -97,11 +97,22 @@ var _ = Describe("Running on Spot", framework.LabelMAPI, framework.LabelDisrupti
9797
default:
9898
Skip(fmt.Sprintf("Platform %s does not support Spot, skipping.", platform))
9999
}
100+
oc, _ := framework.NewCLI()
101+
if framework.IsCustomerVPC(oc) {
102+
//The termination-simulator will hit network error on customer vpc cluster, cannot mark the node as terminating, skip for now.
103+
Skip("Skip this test on customer vpc cluster.")
104+
}
100105

101106
By("Creating a Spot backed MachineSet", func() {
102107
machineSetReady := false
103108
machineSetParams := framework.BuildMachineSetParams(ctx, client, machinesCount)
104-
machineSetParamsList, err := framework.BuildAlternativeMachineSetParams(machineSetParams, platform)
109+
110+
workers, err := framework.GetWorkerMachineSets(ctx, client)
111+
Expect(err).ToNot(HaveOccurred(), "listing Worker MachineSets should not error.")
112+
113+
arch, err = framework.GetArchitectureFromMachineSetNodes(ctx, client, workers[0])
114+
Expect(err).NotTo(HaveOccurred(), "unable to get the architecture for the machine set")
115+
machineSetParamsList, err := framework.BuildAlternativeMachineSetParams(machineSetParams, platform, arch)
105116
Expect(err).ToNot(HaveOccurred(), "Should be able to build list of MachineSet parameters")
106117
for i, machineSetParams := range machineSetParamsList {
107118
if i >= spotMachineSetMaxProvisioningRetryCount {
@@ -376,8 +387,9 @@ func getMetadataMockDeployment(platform configv1.PlatformType) *appsv1.Deploymen
376387
Spec: corev1.PodSpec{
377388
Containers: []corev1.Container{
378389
{
379-
Name: "metadata-mock",
380-
Image: "golang:1.14",
390+
Name: "metadata-mock",
391+
// This is a golang:1.22 image which is mirrored in https://quay.io/repository/openshifttest/golang, so that disconnected cluster can access.
392+
Image: "quay.io/openshifttest/golang@sha256:8f1c43387f0a107535906c7ee918a9d46079cc7be5e80a18424e8558d8afc702",
381393
Command: []string{"/usr/local/go/bin/go"},
382394
Args: []string{
383395
"run",

pkg/infra/webhooks.go

+24
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1313
"k8s.io/apimachinery/pkg/runtime"
1414
"k8s.io/apimachinery/pkg/util/intstr"
15+
"k8s.io/klog"
1516
runtimeclient "sigs.k8s.io/controller-runtime/pkg/client"
1617

1718
configv1 "github.com/openshift/api/config/v1"
@@ -112,6 +113,21 @@ var _ = Describe("Webhooks", framework.LabelMAPI, framework.LabelDisruptive, fun
112113
if err != nil {
113114
return err
114115
}
116+
117+
failed := framework.FilterMachines([]*machinev1beta1.Machine{m}, framework.MachinePhaseFailed)
118+
if len(failed) > 0 {
119+
reason := "failureReason not present in Machine.status"
120+
if m.Status.ErrorReason != nil {
121+
reason = string(*m.Status.ErrorReason)
122+
}
123+
message := "failureMessage not present in Machine.status"
124+
if m.Status.ErrorMessage != nil {
125+
message = *m.Status.ErrorMessage
126+
}
127+
klog.Errorf("Failed machine: %s, Reason: %s, Message: %s", m.Name, reason, message)
128+
}
129+
Expect(len(failed)).To(Equal(0), "zero machines should be in a Failed phase")
130+
115131
running := framework.FilterRunningMachines([]*machinev1beta1.Machine{m})
116132
if len(running) == 0 {
117133
return fmt.Errorf("machine not yet running")
@@ -252,6 +268,9 @@ func minimalAzureProviderSpec(ps *machinev1beta1.ProviderSpec) (*machinev1beta1.
252268
OSDisk: machinev1beta1.OSDisk{
253269
DiskSizeGB: fullProviderSpec.OSDisk.DiskSizeGB,
254270
},
271+
Vnet: fullProviderSpec.Vnet,
272+
Subnet: fullProviderSpec.Subnet,
273+
NetworkResourceGroup: fullProviderSpec.NetworkResourceGroup,
255274
},
256275
},
257276
}, nil
@@ -270,6 +289,11 @@ func minimalGCPProviderSpec(ps *machinev1beta1.ProviderSpec) (*machinev1beta1.Pr
270289
Region: fullProviderSpec.Region,
271290
Zone: fullProviderSpec.Zone,
272291
ServiceAccounts: fullProviderSpec.ServiceAccounts,
292+
NetworkInterfaces: []*machinev1beta1.GCPNetworkInterface{{
293+
Network: fullProviderSpec.NetworkInterfaces[0].Network,
294+
Subnetwork: fullProviderSpec.NetworkInterfaces[0].Subnetwork,
295+
ProjectID: fullProviderSpec.NetworkInterfaces[0].ProjectID,
296+
}},
273297
},
274298
},
275299
}, nil

0 commit comments

Comments
 (0)