Skip to content

Commit 135fde7

Browse files
Merge pull request #374 from sunzhaohua2/fix-failure
OCPQE-27285: Fix the failures in qe ci jobs
2 parents 7b14200 + 5f891b6 commit 135fde7

File tree

5 files changed

+80
-12
lines changed

5 files changed

+80
-12
lines changed

hack/ci-integration.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ go run ./vendor/github.com/onsi/ginkgo/v2/ginkgo \
77
-v \
88
--timeout=115m \
99
--grace-period=5m \
10-
--fail-fast \
10+
--fail-fast=false \
1111
--no-color \
1212
--junit-report="junit_cluster_api_actuator_pkg_e2e.xml" \
1313
--output-dir="${OUTPUT_DIR}" \

pkg/framework/framework.go

+16
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"fmt"
88
"os"
99
"path/filepath"
10+
"strings"
1011
"time"
1112

1213
. "github.com/onsi/ginkgo/v2"
@@ -313,3 +314,18 @@ func GetCredentialsFromCluster(oc *gatherer.CLI) ([]byte, []byte, string) {
313314

314315
return accessKeyID, secureKey, clusterRegion
315316
}
317+
318+
// IsCustomerVPC check if cluster is customer vpc cluster.
319+
func IsCustomerVPC(oc *gatherer.CLI) bool {
320+
installConfig, err := oc.WithoutNamespace().Run("get").Args("cm", "cluster-config-v1", "-n", "kube-system", "-o=jsonpath={.data.install-config}").Output()
321+
Expect(err).NotTo(HaveOccurred(), "Failed to get install-config")
322+
323+
switch platform {
324+
case configv1.AWSPlatformType:
325+
return strings.Contains(installConfig, "subnets:")
326+
case configv1.AzurePlatformType:
327+
return strings.Contains(installConfig, "virtualNetwork:")
328+
default:
329+
return false
330+
}
331+
}

pkg/framework/machinesets.go

+23-7
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ func BuildPerArchMachineSetParamsList(ctx context.Context, client runtimeclient.
7777
var params MachineSetParams
7878

7979
for _, worker := range workers {
80-
if arch, err = getArchitectureFromMachineSetNodes(ctx, client, worker); err != nil {
80+
if arch, err = GetArchitectureFromMachineSetNodes(ctx, client, worker); err != nil {
8181
klog.Warningf("unable to get the architecture for the machine set %s: %v", worker.Name, err)
8282
continue
8383
}
@@ -180,7 +180,7 @@ func CreateMachineSet(c runtimeclient.Client, params MachineSetParams) (*machine
180180
}
181181

182182
// BuildMachineSetParamsList creates a list of MachineSetParams based on the given machineSetParams with modified instance type.
183-
func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platform configv1.PlatformType) ([]MachineSetParams, error) {
183+
func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platform configv1.PlatformType, arch string) ([]MachineSetParams, error) {
184184
baseMachineSetParams := machineSetParams
185185
baseProviderSpec := baseMachineSetParams.ProviderSpec.DeepCopy()
186186

@@ -189,7 +189,15 @@ func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platfor
189189
switch platform {
190190
case configv1.AWSPlatformType:
191191
// Using cheapest compute optimized instances that meet openshift minimum requirements (4 vCPU, 8GiB RAM)
192-
alternativeInstanceTypes := []string{"c5.xlarge", "c5a.xlarge", "m5.xlarge"}
192+
var alternativeInstanceTypes []string
193+
194+
switch arch {
195+
case "arm64":
196+
alternativeInstanceTypes = []string{"m6g.large", "t4g.nano", "t4g.micro", "m6gd.xlarge"}
197+
default:
198+
alternativeInstanceTypes = []string{"c5.xlarge", "c5a.xlarge", "m5.xlarge"}
199+
}
200+
193201
for _, instanceType := range alternativeInstanceTypes {
194202
updatedProviderSpec, err := updateProviderSpecAWSInstanceType(baseProviderSpec, instanceType)
195203
if err != nil {
@@ -200,7 +208,15 @@ func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platfor
200208
output = append(output, baseMachineSetParams)
201209
}
202210
case configv1.AzurePlatformType:
203-
alternativeVMSizes := []string{"Standard_F4s_v2", "Standard_D4as_v5", "Standard_D4as_v4"}
211+
var alternativeVMSizes []string
212+
213+
switch arch {
214+
case "arm64":
215+
alternativeVMSizes = []string{"Standard_D2ps_v5", "Standard_D3ps_v5", "Standard_D4ps_v5"}
216+
default:
217+
alternativeVMSizes = []string{"Standard_F4s_v2", "Standard_D4as_v5", "Standard_D4as_v4"}
218+
}
219+
204220
for _, VMSize := range alternativeVMSizes {
205221
updatedProviderSpec, err := updateProviderSpecAzureVMSize(baseProviderSpec, VMSize)
206222
if err != nil {
@@ -338,13 +354,13 @@ func GetWorkerMachineSets(ctx context.Context, client runtimeclient.Client) ([]*
338354
return result, nil
339355
}
340356

341-
// getArchitectureFromMachineSetNodes returns the architecture of the nodes controlled by the given machineSet's machines.
342-
func getArchitectureFromMachineSetNodes(ctx context.Context, client runtimeclient.Client, machineSet *machinev1.MachineSet) (string, error) {
357+
// GetArchitectureFromMachineSetNodes returns the architecture of the nodes controlled by the given machineSet's machines.
358+
func GetArchitectureFromMachineSetNodes(ctx context.Context, client runtimeclient.Client, machineSet *machinev1.MachineSet) (string, error) {
343359
nodes, err := GetNodesFromMachineSet(ctx, client, machineSet)
344360
if err != nil || len(nodes) == 0 {
345361
klog.Warningf("error getting the machineSet's nodes or no nodes associated with %s. Using the capacity annotation", machineSet.Name)
346362

347-
for _, kv := range strings.Split(machineSet.Labels[labelsKey], ",") {
363+
for _, kv := range strings.Split(machineSet.Annotations[labelsKey], ",") {
348364
if strings.Contains(kv, "kubernetes.io/arch") {
349365
return strings.Split(kv, "=")[1], nil
350366
}

pkg/infra/spot.go

+16-4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ var _ = Describe("Running on Spot", framework.LabelMAPI, framework.LabelDisrupti
4444
var client runtimeclient.Client
4545
var machineSet *machinev1.MachineSet
4646
var platform configv1.PlatformType
47-
47+
var arch string
4848
var delObjects map[string]runtimeclient.Object
4949

5050
var gatherer *gatherer.StateGatherer
@@ -97,11 +97,22 @@ var _ = Describe("Running on Spot", framework.LabelMAPI, framework.LabelDisrupti
9797
default:
9898
Skip(fmt.Sprintf("Platform %s does not support Spot, skipping.", platform))
9999
}
100+
oc, _ := framework.NewCLI()
101+
if framework.IsCustomerVPC(oc) {
102+
//The termination-simulator will hit network error on customer vpc cluster, cannot mark the node as terminating, skip for now.
103+
Skip("Skip this test on customer vpc cluster.")
104+
}
100105

101106
By("Creating a Spot backed MachineSet", func() {
102107
machineSetReady := false
103108
machineSetParams := framework.BuildMachineSetParams(ctx, client, machinesCount)
104-
machineSetParamsList, err := framework.BuildAlternativeMachineSetParams(machineSetParams, platform)
109+
110+
workers, err := framework.GetWorkerMachineSets(ctx, client)
111+
Expect(err).ToNot(HaveOccurred(), "listing Worker MachineSets should not error.")
112+
113+
arch, err = framework.GetArchitectureFromMachineSetNodes(ctx, client, workers[0])
114+
Expect(err).NotTo(HaveOccurred(), "unable to get the architecture for the machine set")
115+
machineSetParamsList, err := framework.BuildAlternativeMachineSetParams(machineSetParams, platform, arch)
105116
Expect(err).ToNot(HaveOccurred(), "Should be able to build list of MachineSet parameters")
106117
for i, machineSetParams := range machineSetParamsList {
107118
if i >= spotMachineSetMaxProvisioningRetryCount {
@@ -376,8 +387,9 @@ func getMetadataMockDeployment(platform configv1.PlatformType) *appsv1.Deploymen
376387
Spec: corev1.PodSpec{
377388
Containers: []corev1.Container{
378389
{
379-
Name: "metadata-mock",
380-
Image: "golang:1.14",
390+
Name: "metadata-mock",
391+
// This is a golang:1.22 image which is mirrored in https://quay.io/repository/openshifttest/golang, so that disconnected cluster can access.
392+
Image: "quay.io/openshifttest/golang@sha256:8f1c43387f0a107535906c7ee918a9d46079cc7be5e80a18424e8558d8afc702",
381393
Command: []string{"/usr/local/go/bin/go"},
382394
Args: []string{
383395
"run",

pkg/infra/webhooks.go

+24
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1313
"k8s.io/apimachinery/pkg/runtime"
1414
"k8s.io/apimachinery/pkg/util/intstr"
15+
"k8s.io/klog"
1516
runtimeclient "sigs.k8s.io/controller-runtime/pkg/client"
1617

1718
configv1 "github.com/openshift/api/config/v1"
@@ -112,6 +113,21 @@ var _ = Describe("Webhooks", framework.LabelMAPI, framework.LabelDisruptive, fun
112113
if err != nil {
113114
return err
114115
}
116+
117+
failed := framework.FilterMachines([]*machinev1beta1.Machine{m}, framework.MachinePhaseFailed)
118+
if len(failed) > 0 {
119+
reason := "failureReason not present in Machine.status"
120+
if m.Status.ErrorReason != nil {
121+
reason = string(*m.Status.ErrorReason)
122+
}
123+
message := "failureMessage not present in Machine.status"
124+
if m.Status.ErrorMessage != nil {
125+
message = *m.Status.ErrorMessage
126+
}
127+
klog.Errorf("Failed machine: %s, Reason: %s, Message: %s", m.Name, reason, message)
128+
}
129+
Expect(len(failed)).To(Equal(0), "zero machines should be in a Failed phase")
130+
115131
running := framework.FilterRunningMachines([]*machinev1beta1.Machine{m})
116132
if len(running) == 0 {
117133
return fmt.Errorf("machine not yet running")
@@ -252,6 +268,9 @@ func minimalAzureProviderSpec(ps *machinev1beta1.ProviderSpec) (*machinev1beta1.
252268
OSDisk: machinev1beta1.OSDisk{
253269
DiskSizeGB: fullProviderSpec.OSDisk.DiskSizeGB,
254270
},
271+
Vnet: fullProviderSpec.Vnet,
272+
Subnet: fullProviderSpec.Subnet,
273+
NetworkResourceGroup: fullProviderSpec.NetworkResourceGroup,
255274
},
256275
},
257276
}, nil
@@ -270,6 +289,11 @@ func minimalGCPProviderSpec(ps *machinev1beta1.ProviderSpec) (*machinev1beta1.Pr
270289
Region: fullProviderSpec.Region,
271290
Zone: fullProviderSpec.Zone,
272291
ServiceAccounts: fullProviderSpec.ServiceAccounts,
292+
NetworkInterfaces: []*machinev1beta1.GCPNetworkInterface{{
293+
Network: fullProviderSpec.NetworkInterfaces[0].Network,
294+
Subnetwork: fullProviderSpec.NetworkInterfaces[0].Subnetwork,
295+
ProjectID: fullProviderSpec.NetworkInterfaces[0].ProjectID,
296+
}},
273297
},
274298
},
275299
}, nil

0 commit comments

Comments
 (0)