@@ -71,21 +71,31 @@ public JobVertexScaler(AutoScalerEventHandler<KEY, Context> autoScalerEventHandl
71
71
this .autoScalerEventHandler = autoScalerEventHandler ;
72
72
}
73
73
74
- public int computeScaleTargetParallelism (
74
+ public VertexScalingResult computeScaleTargetParallelism (
75
75
Context context ,
76
76
JobVertexID vertex ,
77
77
Collection <ShipStrategy > inputShipStrategies ,
78
78
Map <ScalingMetric , EvaluatedScalingMetric > evaluatedMetrics ,
79
79
SortedMap <Instant , ScalingSummary > history ,
80
- Duration restartTime ) {
80
+ Duration restartTime ,
81
+ double backpropagationScaleFactor ) {
81
82
var conf = context .getConfiguration ();
83
+
84
+ boolean excluded =
85
+ conf .get (AutoScalerOptions .VERTEX_EXCLUDE_IDS ).contains (vertex .toHexString ());
86
+ if (excluded ) {
87
+ LOG .debug (
88
+ "Vertex {} is part of `vertex.exclude.ids` config, Check for bottleneck but not scale" ,
89
+ vertex );
90
+ }
91
+
82
92
var currentParallelism = (int ) evaluatedMetrics .get (PARALLELISM ).getCurrent ();
83
93
double averageTrueProcessingRate = evaluatedMetrics .get (TRUE_PROCESSING_RATE ).getAverage ();
84
94
if (Double .isNaN (averageTrueProcessingRate )) {
85
95
LOG .warn (
86
96
"True processing rate is not available for {}, cannot compute new parallelism" ,
87
97
vertex );
88
- return currentParallelism ;
98
+ return VertexScalingResult . normalScaling ( currentParallelism ) ;
89
99
}
90
100
91
101
double targetCapacity =
@@ -95,9 +105,11 @@ public int computeScaleTargetParallelism(
95
105
LOG .warn (
96
106
"Target data rate is not available for {}, cannot compute new parallelism" ,
97
107
vertex );
98
- return currentParallelism ;
108
+ return VertexScalingResult . normalScaling ( currentParallelism ) ;
99
109
}
100
110
111
+ targetCapacity *= backpropagationScaleFactor ;
112
+
101
113
LOG .debug ("Target processing capacity for {} is {}" , vertex , targetCapacity );
102
114
double scaleFactor = targetCapacity / averageTrueProcessingRate ;
103
115
double minScaleFactor = 1 - conf .get (MAX_SCALE_DOWN_FACTOR );
@@ -122,32 +134,44 @@ public int computeScaleTargetParallelism(
122
134
double cappedTargetCapacity = averageTrueProcessingRate * scaleFactor ;
123
135
LOG .debug ("Capped target processing capacity for {} is {}" , vertex , cappedTargetCapacity );
124
136
125
- int newParallelism =
137
+ int parallelismLowerLimit =
138
+ excluded
139
+ ? currentParallelism
140
+ : Math .min (currentParallelism , conf .getInteger (VERTEX_MIN_PARALLELISM ));
141
+ int parallelismUpperLimit =
142
+ excluded
143
+ ? currentParallelism
144
+ : Math .max (currentParallelism , conf .getInteger (VERTEX_MAX_PARALLELISM ));
145
+
146
+ var scalingResult =
126
147
scale (
127
148
currentParallelism ,
128
149
inputShipStrategies ,
129
150
(int ) evaluatedMetrics .get (MAX_PARALLELISM ).getCurrent (),
130
151
scaleFactor ,
131
- Math . min ( currentParallelism , conf . getInteger ( VERTEX_MIN_PARALLELISM )) ,
132
- Math . max ( currentParallelism , conf . getInteger ( VERTEX_MAX_PARALLELISM )) );
152
+ parallelismLowerLimit ,
153
+ parallelismUpperLimit );
133
154
134
- if (newParallelism == currentParallelism
155
+ if (scalingResult . getParallelism () == currentParallelism
135
156
|| blockScalingBasedOnPastActions (
136
157
context ,
137
158
vertex ,
138
159
conf ,
139
160
evaluatedMetrics ,
140
161
history ,
141
162
currentParallelism ,
142
- newParallelism )) {
143
- return currentParallelism ;
163
+ scalingResult .getParallelism ())) {
164
+ return new VertexScalingResult (
165
+ currentParallelism ,
166
+ scalingResult .getBottleneckScaleFactor (),
167
+ scalingResult .isBottleneck ());
144
168
}
145
169
146
170
// We record our expectations for this scaling operation
147
171
evaluatedMetrics .put (
148
172
ScalingMetric .EXPECTED_PROCESSING_RATE ,
149
173
EvaluatedScalingMetric .of (cappedTargetCapacity ));
150
- return newParallelism ;
174
+ return scalingResult ;
151
175
}
152
176
153
177
private boolean blockScalingBasedOnPastActions (
@@ -249,9 +273,12 @@ private boolean detectIneffectiveScaleUp(
249
273
* <p>Also, in order to ensure the data is evenly spread across subtasks, we try to adjust the
250
274
* parallelism for source and keyed vertex such that it divides the maxParallelism without a
251
275
* remainder.
276
+ *
277
+ * <p>If newParallelism exceeds min(parallelismUpperLimit, maxParallelism) the job vertex
278
+ * considered to be a bottleneck.
252
279
*/
253
280
@ VisibleForTesting
254
- protected static int scale (
281
+ protected static VertexScalingResult scale (
255
282
int currentParallelism ,
256
283
Collection <ShipStrategy > inputShipStrategies ,
257
284
int maxParallelism ,
@@ -284,26 +311,36 @@ protected static int scale(
284
311
// parallelism upper limit
285
312
final int upperBound = Math .min (maxParallelism , parallelismUpperLimit );
286
313
314
+ boolean isBottleneck = false ;
315
+ double bottleneckScaleFactor = 1.0 ;
316
+
317
+ // If required parallelism is higher than upper bound ---> the vertex is a bottleneck
318
+ if (newParallelism > upperBound ) {
319
+ isBottleneck = true ;
320
+ bottleneckScaleFactor = (double ) upperBound / newParallelism ;
321
+ newParallelism = upperBound ;
322
+ }
323
+
287
324
// Apply min/max parallelism
288
- newParallelism = Math .min ( Math . max (parallelismLowerLimit , newParallelism ), upperBound );
325
+ newParallelism = Math .max (parallelismLowerLimit , newParallelism );
289
326
290
327
var adjustByMaxParallelism =
291
328
inputShipStrategies .isEmpty () || inputShipStrategies .contains (HASH );
292
329
if (!adjustByMaxParallelism ) {
293
- return newParallelism ;
330
+ return new VertexScalingResult ( newParallelism , bottleneckScaleFactor , isBottleneck ) ;
294
331
}
295
332
296
333
// When the shuffle type of vertex inputs contains keyBy or vertex is a source, we try to
297
334
// adjust the parallelism such that it divides the maxParallelism without a remainder
298
335
// => data is evenly spread across subtasks
299
336
for (int p = newParallelism ; p <= maxParallelism / 2 && p <= upperBound ; p ++) {
300
337
if (maxParallelism % p == 0 ) {
301
- return p ;
338
+ return new VertexScalingResult ( p , bottleneckScaleFactor , isBottleneck ) ;
302
339
}
303
340
}
304
341
305
342
// If parallelism adjustment fails, use originally computed parallelism
306
- return newParallelism ;
343
+ return new VertexScalingResult ( newParallelism , bottleneckScaleFactor , isBottleneck ) ;
307
344
}
308
345
309
346
@ VisibleForTesting
0 commit comments