@@ -47,19 +47,154 @@ import (
47
47
"k8s.io/client-go/restmapper"
48
48
"k8s.io/client-go/util/retry"
49
49
featuregatetesting "k8s.io/component-base/featuregate/testing"
50
+ basemetrics "k8s.io/component-base/metrics"
51
+ "k8s.io/component-base/metrics/testutil"
50
52
"k8s.io/controller-manager/pkg/informerfactory"
51
53
"k8s.io/klog/v2"
52
54
kubeapiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing"
53
55
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
54
56
"k8s.io/kubernetes/pkg/controller/garbagecollector"
55
57
jobcontroller "k8s.io/kubernetes/pkg/controller/job"
58
+ "k8s.io/kubernetes/pkg/controller/job/metrics"
56
59
"k8s.io/kubernetes/pkg/features"
57
60
"k8s.io/kubernetes/test/integration/framework"
58
61
"k8s.io/utils/pointer"
59
62
)
60
63
61
64
const waitInterval = time .Second
62
65
66
+ type metricLabelsWithValue struct {
67
+ Labels []string
68
+ Value int
69
+ }
70
+
71
+ func TestMetrics (t * testing.T ) {
72
+ nonIndexedCompletion := batchv1 .NonIndexedCompletion
73
+ indexedCompletion := batchv1 .IndexedCompletion
74
+ wFinalizers := true
75
+ defer featuregatetesting .SetFeatureGateDuringTest (t , feature .DefaultFeatureGate , features .JobTrackingWithFinalizers , wFinalizers )()
76
+
77
+ // setup the job controller
78
+ closeFn , restConfig , clientSet , ns := setup (t , "simple" )
79
+ defer closeFn ()
80
+ ctx , cancel := startJobControllerAndWaitForCaches (restConfig )
81
+ defer func () {
82
+ cancel ()
83
+ }()
84
+
85
+ testCases := map [string ]struct {
86
+ job * batchv1.Job
87
+ wantJobFinishedNumMetricDelta metricLabelsWithValue
88
+ wantJobPodsFinishedMetricDelta metricLabelsWithValue
89
+ }{
90
+ "non-indexed job" : {
91
+ job : & batchv1.Job {
92
+ Spec : batchv1.JobSpec {
93
+ Completions : pointer .Int32 (2 ),
94
+ Parallelism : pointer .Int32 (2 ),
95
+ CompletionMode : & nonIndexedCompletion ,
96
+ },
97
+ },
98
+ wantJobFinishedNumMetricDelta : metricLabelsWithValue {
99
+ Labels : []string {"NonIndexed" , "succeeded" },
100
+ Value : 1 ,
101
+ },
102
+ wantJobPodsFinishedMetricDelta : metricLabelsWithValue {
103
+ Labels : []string {"NonIndexed" , "succeeded" },
104
+ Value : 2 ,
105
+ },
106
+ },
107
+ "indexed job" : {
108
+ job : & batchv1.Job {
109
+ Spec : batchv1.JobSpec {
110
+ Completions : pointer .Int32 (2 ),
111
+ Parallelism : pointer .Int32 (2 ),
112
+ CompletionMode : & indexedCompletion ,
113
+ },
114
+ },
115
+ wantJobFinishedNumMetricDelta : metricLabelsWithValue {
116
+ Labels : []string {"Indexed" , "succeeded" },
117
+ Value : 1 ,
118
+ },
119
+ wantJobPodsFinishedMetricDelta : metricLabelsWithValue {
120
+ Labels : []string {"Indexed" , "succeeded" },
121
+ Value : 2 ,
122
+ },
123
+ },
124
+ }
125
+ job_index := 0 // job index to avoid collisions between job names created by different test cases
126
+ for name , tc := range testCases {
127
+ t .Run (name , func (t * testing.T ) {
128
+
129
+ // record the metrics after the job is created
130
+ jobFinishedNumBefore , err := getCounterMetricValueForLabels (metrics .JobFinishedNum , tc .wantJobFinishedNumMetricDelta .Labels )
131
+ if err != nil {
132
+ t .Fatalf ("Failed to collect the JobFinishedNum metric before creating the job: %q" , err )
133
+ }
134
+ jobPodsFinishedBefore , err := getCounterMetricValueForLabels (metrics .JobPodsFinished , tc .wantJobPodsFinishedMetricDelta .Labels )
135
+ if err != nil {
136
+ t .Fatalf ("Failed to collect the JobPodsFinished metric before creating the job: %q" , err )
137
+ }
138
+
139
+ // create a single job and wait for its completion
140
+ job := tc .job .DeepCopy ()
141
+ job .Name = fmt .Sprintf ("job-%v" , job_index )
142
+ job_index ++
143
+ jobObj , err := createJobWithDefaults (ctx , clientSet , ns .Name , job )
144
+ if err != nil {
145
+ t .Fatalf ("Failed to create Job: %v" , err )
146
+ }
147
+ validateJobPodsStatus (ctx , t , clientSet , jobObj , podsByStatus {
148
+ Active : int (* jobObj .Spec .Parallelism ),
149
+ Ready : pointer .Int32 (0 ),
150
+ }, wFinalizers )
151
+ if err , _ := setJobPodsPhase (ctx , clientSet , jobObj , v1 .PodSucceeded , int (* jobObj .Spec .Parallelism )); err != nil {
152
+ t .Fatalf ("Failed setting phase %s on Job Pod: %v" , v1 .PodSucceeded , err )
153
+ }
154
+ validateJobSucceeded (ctx , t , clientSet , jobObj )
155
+
156
+ // verify metric values after the job is finished
157
+ validateMetricValueDeltas (t , metrics .JobFinishedNum , tc .wantJobFinishedNumMetricDelta , jobFinishedNumBefore )
158
+ validateMetricValueDeltas (t , metrics .JobPodsFinished , tc .wantJobPodsFinishedMetricDelta , jobPodsFinishedBefore )
159
+ })
160
+ }
161
+ }
162
+
163
+ func validateMetricValueDeltas (t * testing.T , counterVer * basemetrics.CounterVec , wantMetricDelta metricLabelsWithValue , metricValuesBefore metricLabelsWithValue ) {
164
+ t .Helper ()
165
+ var cmpErr error
166
+ err := wait .PollImmediate (10 * time .Millisecond , 10 * time .Second , func () (bool , error ) {
167
+ cmpErr = nil
168
+ metricValuesAfter , err := getCounterMetricValueForLabels (counterVer , wantMetricDelta .Labels )
169
+ if err != nil {
170
+ return true , fmt .Errorf ("Failed to collect the %q metric after the job is finished: %q" , counterVer .Name , err )
171
+ }
172
+ wantDelta := wantMetricDelta .Value
173
+ gotDelta := metricValuesAfter .Value - metricValuesBefore .Value
174
+ if wantDelta != gotDelta {
175
+ cmpErr = fmt .Errorf ("Unexepected metric delta for %q metric with labels %q. want: %v, got: %v" , counterVer .Name , wantMetricDelta .Labels , wantDelta , gotDelta )
176
+ return false , nil
177
+ }
178
+ return true , nil
179
+ })
180
+ if err != nil {
181
+ t .Errorf ("Failed waiting for expected metric delta: %q" , err )
182
+ }
183
+ if cmpErr != nil {
184
+ t .Error (cmpErr )
185
+ }
186
+ }
187
+
188
+ func getCounterMetricValueForLabels (counterVec * basemetrics.CounterVec , labels []string ) (metricLabelsWithValue , error ) {
189
+ var result metricLabelsWithValue = metricLabelsWithValue {Labels : labels }
190
+ value , err := testutil .GetCounterMetricValue (counterVec .WithLabelValues (labels ... ))
191
+ if err != nil {
192
+ return result , err
193
+ }
194
+ result .Value = int (value )
195
+ return result , nil
196
+ }
197
+
63
198
// TestJobPodFailurePolicyWithFailedPodDeletedDuringControllerRestart verifies that the job is properly marked as Failed
64
199
// in a scenario when the job controller crashes between removing pod finalizers and marking the job as Failed (based on
65
200
// the pod failure policy). After the finalizer for the failed pod is removed we remove the failed pod. This step is
0 commit comments