24
24
import org .elasticsearch .cluster .DiskUsage ;
25
25
import org .elasticsearch .cluster .routing .RoutingNode ;
26
26
import org .elasticsearch .cluster .routing .ShardRouting ;
27
+ import org .elasticsearch .cluster .routing .ShardRoutingState ;
27
28
import org .elasticsearch .cluster .routing .allocation .RoutingAllocation ;
28
29
import org .elasticsearch .common .inject .Inject ;
29
30
import org .elasticsearch .common .settings .Settings ;
30
31
import org .elasticsearch .common .unit .ByteSizeValue ;
31
32
import org .elasticsearch .common .unit .RatioValue ;
32
33
import org .elasticsearch .node .settings .NodeSettingsService ;
33
34
35
+ import java .util .List ;
34
36
import java .util .Map ;
35
37
36
38
import static org .elasticsearch .cluster .InternalClusterInfoService .shardIdentifierFromRouting ;
@@ -66,24 +68,32 @@ public class DiskThresholdDecider extends AllocationDecider {
66
68
private volatile Double freeDiskThresholdHigh ;
67
69
private volatile ByteSizeValue freeBytesThresholdLow ;
68
70
private volatile ByteSizeValue freeBytesThresholdHigh ;
71
+ private volatile boolean includeRelocations ;
69
72
private volatile boolean enabled ;
70
73
71
74
public static final String CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED = "cluster.routing.allocation.disk.threshold_enabled" ;
72
75
public static final String CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK = "cluster.routing.allocation.disk.watermark.low" ;
73
76
public static final String CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK = "cluster.routing.allocation.disk.watermark.high" ;
77
+ public static final String CLUSTER_ROUTING_ALLOCATION_INCLUDE_RELOCATIONS = "cluster.routing.allocation.disk.include_relocations" ;
74
78
75
79
class ApplySettings implements NodeSettingsService .Listener {
76
80
@ Override
77
81
public void onRefreshSettings (Settings settings ) {
78
82
String newLowWatermark = settings .get (CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK , null );
79
83
String newHighWatermark = settings .get (CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK , null );
84
+ Boolean newRelocationsSetting = settings .getAsBoolean (CLUSTER_ROUTING_ALLOCATION_INCLUDE_RELOCATIONS , null );
80
85
Boolean newEnableSetting = settings .getAsBoolean (CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED , null );
81
86
82
87
if (newEnableSetting != null ) {
83
88
logger .info ("updating [{}] from [{}] to [{}]" , CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED ,
84
89
DiskThresholdDecider .this .enabled , newEnableSetting );
85
90
DiskThresholdDecider .this .enabled = newEnableSetting ;
86
91
}
92
+ if (newRelocationsSetting != null ) {
93
+ logger .info ("updating [{}] from [{}] to [{}]" , CLUSTER_ROUTING_ALLOCATION_INCLUDE_RELOCATIONS ,
94
+ DiskThresholdDecider .this .includeRelocations , newRelocationsSetting );
95
+ DiskThresholdDecider .this .includeRelocations = newRelocationsSetting ;
96
+ }
87
97
if (newLowWatermark != null ) {
88
98
if (!validWatermarkSetting (newLowWatermark )) {
89
99
throw new ElasticsearchParseException ("Unable to parse low watermark: [" + newLowWatermark + "]" );
@@ -125,11 +135,29 @@ public DiskThresholdDecider(Settings settings, NodeSettingsService nodeSettingsS
125
135
126
136
this .freeBytesThresholdLow = thresholdBytesFromWatermark (lowWatermark );
127
137
this .freeBytesThresholdHigh = thresholdBytesFromWatermark (highWatermark );
138
+ this .includeRelocations = settings .getAsBoolean (CLUSTER_ROUTING_ALLOCATION_INCLUDE_RELOCATIONS , true );
128
139
129
140
this .enabled = settings .getAsBoolean (CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED , true );
130
141
nodeSettingsService .addListener (new ApplySettings ());
131
142
}
132
143
144
+ /**
145
+ * Returns the size of all shards that are currently being relocated to
146
+ * the node, but may not be finished transfering yet.
147
+ */
148
+ public long sizeOfRelocatingShards (RoutingNode node , RoutingAllocation allocation , Map <String , Long > shardSizes ) {
149
+ List <ShardRouting > relocatingShards = allocation .routingTable ().shardsWithState (ShardRoutingState .RELOCATING );
150
+ long totalSize = 0 ;
151
+ for (ShardRouting routing : relocatingShards ) {
152
+ if (routing .relocatingNodeId ().equals (node .nodeId ())) {
153
+ Long shardSize = shardSizes .get (shardIdentifierFromRouting (routing ));
154
+ shardSize = shardSize == null ? 0 : shardSize ;
155
+ totalSize += shardSize ;
156
+ }
157
+ }
158
+ return totalSize ;
159
+ }
160
+
133
161
public Decision canAllocate (ShardRouting shardRouting , RoutingNode node , RoutingAllocation allocation ) {
134
162
135
163
// Always allow allocation if the decider is disabled
@@ -175,6 +203,16 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
175
203
}
176
204
}
177
205
206
+ if (includeRelocations ) {
207
+ long relocatingShardsSize = sizeOfRelocatingShards (node , allocation , shardSizes );
208
+ DiskUsage usageIncludingRelocations = new DiskUsage (node .nodeId (), usage .getTotalBytes (), usage .getFreeBytes () - relocatingShardsSize );
209
+ if (logger .isDebugEnabled ()) {
210
+ logger .debug ("usage without relocations: {}" , usage );
211
+ logger .debug ("usage with relocations: [{} bytes] {}" , relocatingShardsSize , usageIncludingRelocations );
212
+ }
213
+ usage = usageIncludingRelocations ;
214
+ }
215
+
178
216
// First, check that the node currently over the low watermark
179
217
double freeDiskPercentage = usage .getFreeDiskAsPercentage ();
180
218
long freeBytes = usage .getFreeBytes ();
@@ -226,7 +264,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
226
264
freeDiskThresholdLow , freeDiskPercentage , node .nodeId ());
227
265
}
228
266
return allocation .decision (Decision .NO , NAME , "less than required [%s%%] free disk on node, free: [%s%%]" ,
229
- freeDiskThresholdLow , freeDiskThresholdLow );
267
+ freeDiskThresholdLow , freeDiskPercentage );
230
268
} else if (freeDiskPercentage > freeDiskThresholdHigh ) {
231
269
// Allow the shard to be allocated because it is primary that
232
270
// has never been allocated if it's under the high watermark
@@ -245,7 +283,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
245
283
freeDiskThresholdHigh , freeDiskPercentage , node .nodeId ());
246
284
}
247
285
return allocation .decision (Decision .NO , NAME , "less than required [%s%%] free disk on node, free: [%s%%]" ,
248
- freeDiskThresholdLow , freeDiskThresholdLow );
286
+ freeDiskThresholdLow , freeDiskPercentage );
249
287
}
250
288
}
251
289
@@ -306,6 +344,17 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl
306
344
}
307
345
}
308
346
347
+ if (includeRelocations ) {
348
+ Map <String , Long > shardSizes = clusterInfo .getShardSizes ();
349
+ long relocatingShardsSize = sizeOfRelocatingShards (node , allocation , shardSizes );
350
+ DiskUsage usageIncludingRelocations = new DiskUsage (node .nodeId (), usage .getTotalBytes (), usage .getFreeBytes () - relocatingShardsSize );
351
+ if (logger .isDebugEnabled ()) {
352
+ logger .debug ("usage without relocations: {}" , usage );
353
+ logger .debug ("usage with relocations: [{} bytes] {}" , relocatingShardsSize , usageIncludingRelocations );
354
+ }
355
+ usage = usageIncludingRelocations ;
356
+ }
357
+
309
358
// If this node is already above the high threshold, the shard cannot remain (get it off!)
310
359
double freeDiskPercentage = usage .getFreeDiskAsPercentage ();
311
360
long freeBytes = usage .getFreeBytes ();
0 commit comments