@@ -64,7 +64,6 @@ public class DatafeedManager {
64
64
private final DatafeedJobBuilder datafeedJobBuilder ;
65
65
private final TaskRunner taskRunner = new TaskRunner ();
66
66
private final AutodetectProcessManager autodetectProcessManager ;
67
- private volatile boolean isolated ;
68
67
69
68
public DatafeedManager (ThreadPool threadPool , Client client , ClusterService clusterService , DatafeedJobBuilder datafeedJobBuilder ,
70
69
Supplier <Long > currentTimeSupplier , Auditor auditor , AutodetectProcessManager autodetectProcessManager ) {
@@ -130,18 +129,20 @@ public void stopAllDatafeedsOnThisNode(String reason) {
130
129
* This is used before the JVM is killed. It differs from stopAllDatafeedsOnThisNode in that it leaves
131
130
* the datafeed tasks in the "started" state, so that they get restarted on a different node.
132
131
*/
133
- public void isolateAllDatafeedsOnThisNode () {
134
- isolated = true ;
132
+ public void isolateAllDatafeedsOnThisNodeBeforeShutdown () {
135
133
Iterator <Holder > iter = runningDatafeedsOnThisNode .values ().iterator ();
136
134
while (iter .hasNext ()) {
137
135
Holder next = iter .next ();
138
136
next .isolateDatafeed ();
139
- next .setRelocating ();
137
+ // TODO: it's not ideal that this "isolate" method does something a bit different to the one below
138
+ next .setNodeIsShuttingDown ();
140
139
iter .remove ();
141
140
}
142
141
}
143
142
144
143
public void isolateDatafeed (long allocationId ) {
144
+ // This calls get() rather than remove() because we expect that the persistent task will
145
+ // be removed shortly afterwards and that operation needs to be able to find the holder
145
146
Holder holder = runningDatafeedsOnThisNode .get (allocationId );
146
147
if (holder != null ) {
147
148
holder .isolateDatafeed ();
@@ -195,7 +196,7 @@ protected void doRun() {
195
196
holder .stop ("general_lookback_failure" , TimeValue .timeValueSeconds (20 ), e );
196
197
return ;
197
198
}
198
- if (isolated == false ) {
199
+ if (holder . isIsolated () == false ) {
199
200
if (next != null ) {
200
201
doDatafeedRealtime (next , holder .datafeedJob .getJobId (), holder );
201
202
} else {
@@ -298,7 +299,7 @@ public class Holder {
298
299
private final ProblemTracker problemTracker ;
299
300
private final Consumer <Exception > finishHandler ;
300
301
volatile Scheduler .Cancellable cancellable ;
301
- private volatile boolean isRelocating ;
302
+ private volatile boolean isNodeShuttingDown ;
302
303
303
304
Holder (TransportStartDatafeedAction .DatafeedTask task , String datafeedId , DatafeedJob datafeedJob ,
304
305
ProblemTracker problemTracker , Consumer <Exception > finishHandler ) {
@@ -324,7 +325,7 @@ boolean isIsolated() {
324
325
}
325
326
326
327
public void stop (String source , TimeValue timeout , Exception e ) {
327
- if (isRelocating ) {
328
+ if (isNodeShuttingDown ) {
328
329
return ;
329
330
}
330
331
@@ -344,11 +345,12 @@ public void stop(String source, TimeValue timeout, Exception e) {
344
345
if (cancellable != null ) {
345
346
cancellable .cancel ();
346
347
}
347
- auditor .info (datafeedJob .getJobId (), Messages .getMessage (Messages .JOB_AUDIT_DATAFEED_STOPPED ));
348
+ auditor .info (datafeedJob .getJobId (),
349
+ Messages .getMessage (isIsolated () ? Messages .JOB_AUDIT_DATAFEED_ISOLATED : Messages .JOB_AUDIT_DATAFEED_STOPPED ));
348
350
finishHandler .accept (e );
349
351
logger .info ("[{}] datafeed [{}] for job [{}] has been stopped{}" , source , datafeedId , datafeedJob .getJobId (),
350
352
acquired ? "" : ", but there may be pending tasks as the timeout [" + timeout .getStringRep () + "] expired" );
351
- if (autoCloseJob ) {
353
+ if (autoCloseJob && isIsolated () == false ) {
352
354
closeJob ();
353
355
}
354
356
if (acquired ) {
@@ -361,16 +363,18 @@ public void stop(String source, TimeValue timeout, Exception e) {
361
363
}
362
364
363
365
/**
364
- * This stops a datafeed WITHOUT updating the corresponding persistent task. It must ONLY be called
365
- * immediately prior to shutting down a node. Then the datafeed task can remain "started", and be
366
- * relocated to a different node. Calling this method at any other time will ruin the datafeed.
366
+ * This stops a datafeed WITHOUT updating the corresponding persistent task. When called it
367
+ * will stop the datafeed from sending data to its job as quickly as possible. The caller
368
+ * must do something sensible with the corresponding persistent task. If the node is shutting
369
+ * down the task will automatically get reassigned. Otherwise the caller must take action to
370
+ * remove or reassign the persistent task, or the datafeed will be left in limbo.
367
371
*/
368
372
public void isolateDatafeed () {
369
373
datafeedJob .isolate ();
370
374
}
371
375
372
- public void setRelocating () {
373
- isRelocating = true ;
376
+ public void setNodeIsShuttingDown () {
377
+ isNodeShuttingDown = true ;
374
378
}
375
379
376
380
private Long executeLookBack (long startTime , Long endTime ) throws Exception {
0 commit comments