Skip to content

Commit 8e12d3f

Browse files
committed
Fix the failures in qe ci jobs
1 parent e0af696 commit 8e12d3f

File tree

4 files changed

+75
-12
lines changed

4 files changed

+75
-12
lines changed

Diff for: hack/ci-integration.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ go run ./vendor/github.com/onsi/ginkgo/v2/ginkgo \
77
-v \
88
--timeout=115m \
99
--grace-period=5m \
10-
--fail-fast \
10+
--fail-fast=false \
1111
--no-color \
1212
--junit-report="junit_cluster_api_actuator_pkg_e2e.xml" \
1313
--output-dir="${OUTPUT_DIR}" \

Diff for: pkg/framework/machinesets.go

+23-7
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ func BuildPerArchMachineSetParamsList(ctx context.Context, client runtimeclient.
7777
var params MachineSetParams
7878

7979
for _, worker := range workers {
80-
if arch, err = getArchitectureFromMachineSetNodes(ctx, client, worker); err != nil {
80+
if arch, err = GetArchitectureFromMachineSetNodes(ctx, client, worker); err != nil {
8181
klog.Warningf("unable to get the architecture for the machine set %s: %v", worker.Name, err)
8282
continue
8383
}
@@ -180,7 +180,7 @@ func CreateMachineSet(c runtimeclient.Client, params MachineSetParams) (*machine
180180
}
181181

182182
// BuildMachineSetParamsList creates a list of MachineSetParams based on the given machineSetParams with modified instance type.
183-
func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platform configv1.PlatformType) ([]MachineSetParams, error) {
183+
func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platform configv1.PlatformType, arch string) ([]MachineSetParams, error) {
184184
baseMachineSetParams := machineSetParams
185185
baseProviderSpec := baseMachineSetParams.ProviderSpec.DeepCopy()
186186

@@ -189,7 +189,15 @@ func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platfor
189189
switch platform {
190190
case configv1.AWSPlatformType:
191191
// Using cheapest compute optimized instances that meet openshift minimum requirements (4 vCPU, 8GiB RAM)
192-
alternativeInstanceTypes := []string{"c5.xlarge", "c5a.xlarge", "m5.xlarge"}
192+
var alternativeInstanceTypes []string
193+
194+
switch arch {
195+
case "arm64":
196+
alternativeInstanceTypes = []string{"m6g.large", "t4g.nano", "t4g.micro", "m6gd.xlarge"}
197+
default:
198+
alternativeInstanceTypes = []string{"c5.xlarge", "c5a.xlarge", "m5.xlarge"}
199+
}
200+
193201
for _, instanceType := range alternativeInstanceTypes {
194202
updatedProviderSpec, err := updateProviderSpecAWSInstanceType(baseProviderSpec, instanceType)
195203
if err != nil {
@@ -200,7 +208,15 @@ func BuildAlternativeMachineSetParams(machineSetParams MachineSetParams, platfor
200208
output = append(output, baseMachineSetParams)
201209
}
202210
case configv1.AzurePlatformType:
203-
alternativeVMSizes := []string{"Standard_F4s_v2", "Standard_D4as_v5", "Standard_D4as_v4"}
211+
var alternativeVMSizes []string
212+
213+
switch arch {
214+
case "arm64":
215+
alternativeVMSizes = []string{"Standard_D2ps_v5", "Standard_D3ps_v5", "Standard_D4ps_v5"}
216+
default:
217+
alternativeVMSizes = []string{"Standard_F4s_v2", "Standard_D4as_v5", "Standard_D4as_v4"}
218+
}
219+
204220
for _, VMSize := range alternativeVMSizes {
205221
updatedProviderSpec, err := updateProviderSpecAzureVMSize(baseProviderSpec, VMSize)
206222
if err != nil {
@@ -338,13 +354,13 @@ func GetWorkerMachineSets(ctx context.Context, client runtimeclient.Client) ([]*
338354
return result, nil
339355
}
340356

341-
// getArchitectureFromMachineSetNodes returns the architecture of the nodes controlled by the given machineSet's machines.
342-
func getArchitectureFromMachineSetNodes(ctx context.Context, client runtimeclient.Client, machineSet *machinev1.MachineSet) (string, error) {
357+
// GetArchitectureFromMachineSetNodes returns the architecture of the nodes controlled by the given machineSet's machines.
358+
func GetArchitectureFromMachineSetNodes(ctx context.Context, client runtimeclient.Client, machineSet *machinev1.MachineSet) (string, error) {
343359
nodes, err := GetNodesFromMachineSet(ctx, client, machineSet)
344360
if err != nil || len(nodes) == 0 {
345361
klog.Warningf("error getting the machineSet's nodes or no nodes associated with %s. Using the capacity annotation", machineSet.Name)
346362

347-
for _, kv := range strings.Split(machineSet.Labels[labelsKey], ",") {
363+
for _, kv := range strings.Split(machineSet.Annotations[labelsKey], ",") {
348364
if strings.Contains(kv, "kubernetes.io/arch") {
349365
return strings.Split(kv, "=")[1], nil
350366
}

Diff for: pkg/infra/spot.go

+28-4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"fmt"
88
"math/rand"
99
"os"
10+
"strings"
1011
"time"
1112

1213
. "github.com/onsi/ginkgo/v2"
@@ -44,7 +45,7 @@ var _ = Describe("Running on Spot", framework.LabelMAPI, framework.LabelDisrupti
4445
var client runtimeclient.Client
4546
var machineSet *machinev1.MachineSet
4647
var platform configv1.PlatformType
47-
48+
var arch string
4849
var delObjects map[string]runtimeclient.Object
4950

5051
var gatherer *gatherer.StateGatherer
@@ -101,7 +102,13 @@ var _ = Describe("Running on Spot", framework.LabelMAPI, framework.LabelDisrupti
101102
By("Creating a Spot backed MachineSet", func() {
102103
machineSetReady := false
103104
machineSetParams := framework.BuildMachineSetParams(ctx, client, machinesCount)
104-
machineSetParamsList, err := framework.BuildAlternativeMachineSetParams(machineSetParams, platform)
105+
106+
workers, err := framework.GetWorkerMachineSets(ctx, client)
107+
Expect(err).ToNot(HaveOccurred(), "listing Worker MachineSets should not error.")
108+
109+
arch, err = framework.GetArchitectureFromMachineSetNodes(ctx, client, workers[0])
110+
Expect(err).NotTo(HaveOccurred(), "unable to get the architecture for the machine set")
111+
machineSetParamsList, err := framework.BuildAlternativeMachineSetParams(machineSetParams, platform, arch)
105112
Expect(err).ToNot(HaveOccurred(), "Should be able to build list of MachineSet parameters")
106113
for i, machineSetParams := range machineSetParamsList {
107114
if i >= spotMachineSetMaxProvisioningRetryCount {
@@ -258,6 +265,22 @@ var _ = Describe("Running on Spot", framework.LabelMAPI, framework.LabelDisrupti
258265
delObjects[job.Name] = job
259266
})
260267

268+
By("Skip case on disconnected cluster")
269+
oc, _ := framework.NewCLI()
270+
Eventually(func() string {
271+
podStatus, err := oc.WithoutNamespace().Run("get").Args("pod", "-l", "job-name=termination-simulator", "-n", "openshift-machine-api").Output()
272+
Expect(err).ToNot(HaveOccurred(), "Should be able to get termination-simulator pod")
273+
fmt.Println(podStatus)
274+
if strings.Contains(podStatus, "ImagePullBackOff") {
275+
Skip("Skip as termination-simulator pod can not be deployed on a disconnected cluster!")
276+
}
277+
278+
return podStatus
279+
}, framework.WaitMedium, framework.RetryMedium).Should(SatisfyAny(
280+
ContainSubstring("Completed"),
281+
ContainSubstring("Running"),
282+
))
283+
261284
// If the job deploys correctly, the Machine will go away
262285
By(fmt.Sprintf("Waiting for machine %q to be deleted", machine.Name), func() {
263286
framework.WaitForMachinesDeleted(client, machine)
@@ -376,8 +399,9 @@ func getMetadataMockDeployment(platform configv1.PlatformType) *appsv1.Deploymen
376399
Spec: corev1.PodSpec{
377400
Containers: []corev1.Container{
378401
{
379-
Name: "metadata-mock",
380-
Image: "golang:1.14",
402+
Name: "metadata-mock",
403+
// This is a golang image which is mirrored in https://quay.io/repository/openshifttest/golang, so that disconnected cluster can access.
404+
Image: "quay.io/openshifttest/golang@sha256:8f1c43387f0a107535906c7ee918a9d46079cc7be5e80a18424e8558d8afc702",
381405
Command: []string{"/usr/local/go/bin/go"},
382406
Args: []string{
383407
"run",

Diff for: pkg/infra/webhooks.go

+23
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1313
"k8s.io/apimachinery/pkg/runtime"
1414
"k8s.io/apimachinery/pkg/util/intstr"
15+
"k8s.io/klog"
1516
runtimeclient "sigs.k8s.io/controller-runtime/pkg/client"
1617

1718
configv1 "github.com/openshift/api/config/v1"
@@ -112,6 +113,21 @@ var _ = Describe("Webhooks", framework.LabelMAPI, framework.LabelDisruptive, fun
112113
if err != nil {
113114
return err
114115
}
116+
117+
failed := framework.FilterMachines([]*machinev1beta1.Machine{m}, framework.MachinePhaseFailed)
118+
if len(failed) > 0 {
119+
reason := "failureReason not present in Machine.status"
120+
if m.Status.ErrorReason != nil {
121+
reason = string(*m.Status.ErrorReason)
122+
}
123+
message := "failureMessage not present in Machine.status"
124+
if m.Status.ErrorMessage != nil {
125+
message = *m.Status.ErrorMessage
126+
}
127+
klog.Errorf("Failed machine: %s, Reason: %s, Message: %s", m.Name, reason, message)
128+
}
129+
Expect(len(failed)).To(Equal(0), "zero machines should be in a Failed phase")
130+
115131
running := framework.FilterRunningMachines([]*machinev1beta1.Machine{m})
116132
if len(running) == 0 {
117133
return fmt.Errorf("machine not yet running")
@@ -252,6 +268,9 @@ func minimalAzureProviderSpec(ps *machinev1beta1.ProviderSpec) (*machinev1beta1.
252268
OSDisk: machinev1beta1.OSDisk{
253269
DiskSizeGB: fullProviderSpec.OSDisk.DiskSizeGB,
254270
},
271+
Vnet: fullProviderSpec.Vnet,
272+
Subnet: fullProviderSpec.Subnet,
273+
NetworkResourceGroup: fullProviderSpec.NetworkResourceGroup,
255274
},
256275
},
257276
}, nil
@@ -270,6 +289,10 @@ func minimalGCPProviderSpec(ps *machinev1beta1.ProviderSpec) (*machinev1beta1.Pr
270289
Region: fullProviderSpec.Region,
271290
Zone: fullProviderSpec.Zone,
272291
ServiceAccounts: fullProviderSpec.ServiceAccounts,
292+
NetworkInterfaces: []*machinev1beta1.GCPNetworkInterface{{
293+
Network: fullProviderSpec.NetworkInterfaces[0].Network,
294+
Subnetwork: fullProviderSpec.NetworkInterfaces[0].Subnetwork,
295+
}},
273296
},
274297
},
275298
}, nil

0 commit comments

Comments
 (0)