Skip to content

Commit c5d5cad

Browse files
mcdoncmnaberez
authored andcommitted
work towards faster start and stop (#131)
Conflicts: supervisor/rpcinterface.py
1 parent dad9f8d commit c5d5cad

File tree

3 files changed

+118
-88
lines changed

3 files changed

+118
-88
lines changed

supervisor/rpcinterface.py

+105-78
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import time
33
import datetime
44
import errno
5+
import types
56

67
from supervisor.options import readFile
78
from supervisor.options import tailFile
@@ -23,7 +24,10 @@
2324
from supervisor.states import getSupervisorStateDescription
2425
from supervisor.states import ProcessStates
2526
from supervisor.states import getProcessStateDescription
26-
from supervisor.states import RUNNING_STATES
27+
from supervisor.states import (
28+
RUNNING_STATES,
29+
STOPPED_STATES,
30+
)
2731

2832
API_VERSION = '3.0'
2933

@@ -269,49 +273,56 @@ def startProcess(self, name, wait=True):
269273
except (NotExecutable, NoPermission), why:
270274
raise RPCError(Faults.NOT_EXECUTABLE, why.args[0])
271275

272-
started = []
276+
if process.get_state() in RUNNING_STATES:
277+
raise RPCError(Faults.ALREADY_STARTED, name)
273278

274-
startsecs = process.config.startsecs
279+
process.spawn()
275280

276-
def startit():
277-
if not started:
281+
# We call reap() in order to more quickly obtain the side effects of
282+
# process.finish(), which reap() eventually ends up calling. This
283+
# might be the case if the spawn() was successful but then the process
284+
# died before its startsecs elapsed or it exited with an unexpected
285+
# exit code. In particular, finish() may set spawnerr, which we can
286+
# check and immediately raise an RPCError, avoiding the need to
287+
# defer by returning a callback.
278288

279-
if process.get_state() in RUNNING_STATES:
280-
raise RPCError(Faults.ALREADY_STARTED, name)
289+
self.supervisord.reap()
281290

282-
process.spawn()
291+
if process.spawnerr:
292+
raise RPCError(Faults.SPAWN_ERROR, name)
283293

294+
# We call process.transition() in order to more quickly obtain its
295+
# side effects. In particular, it might set the process' state from
296+
# STARTING->RUNNING if the process has a startsecs==0.
297+
process.transition()
298+
299+
if wait and process.get_state() != ProcessStates.RUNNING:
300+
# by default, this branch will almost always be hit for processes
301+
# with default startsecs configurations, because the default number
302+
# of startsecs for a process is "1", and the process will not have
303+
# entered the RUNNING state yet even though we've called
304+
# transition() on it. This is because a process is not considered
305+
# RUNNING until it has stayed up > startsecs.
306+
307+
def onwait():
284308
if process.spawnerr:
285309
raise RPCError(Faults.SPAWN_ERROR, name)
286310

287-
# we use a list here to fake out lexical scoping;
288-
# using a direct assignment to 'started' in the
289-
# function appears to not work (symptom: 2nd or 3rd
290-
# call through, it forgets about 'started', claiming
291-
# it's undeclared).
292-
started.append(time.time())
311+
state = process.get_state()
293312

294-
if not wait or not startsecs:
295-
return True
313+
if state not in (ProcessStates.STARTING, ProcessStates.RUNNING):
314+
raise RPCError(Faults.ABNORMAL_TERMINATION, name)
296315

297-
t = time.time()
298-
runtime = (t - started[0])
299-
state = process.get_state()
300-
301-
if state not in (ProcessStates.STARTING, ProcessStates.RUNNING):
302-
raise RPCError(Faults.ABNORMAL_TERMINATION, name)
316+
if state == ProcessStates.RUNNING:
317+
return True
303318

304-
if runtime < startsecs:
305319
return NOT_DONE_YET
306320

307-
if state == ProcessStates.RUNNING:
308-
return True
309-
310-
raise RPCError(Faults.ABNORMAL_TERMINATION, name)
321+
onwait.delay = 0.05
322+
onwait.rpcinterface = self
323+
return onwait # deferred
311324

312-
startit.delay = 0.05
313-
startit.rpcinterface = self
314-
return startit # deferred
325+
return True
315326

316327
def startProcessGroup(self, name, wait=True):
317328
""" Start all processes in the group named 'name'
@@ -369,36 +380,43 @@ def stopProcess(self, name, wait=True):
369380
group_name, process_name = split_namespec(name)
370381
return self.stopProcessGroup(group_name, wait)
371382

372-
stopped = []
373-
called = []
374-
375-
def killit():
376-
if not called:
377-
if process.get_state() not in RUNNING_STATES:
378-
raise RPCError(Faults.NOT_RUNNING)
379-
# use a mutable for lexical scoping; see startProcess
380-
called.append(1)
381-
382-
if not stopped:
383-
msg = process.stop()
384-
if msg is not None:
385-
raise RPCError(Faults.FAILED, msg)
386-
stopped.append(1)
387-
388-
if wait:
383+
if process.get_state() not in RUNNING_STATES:
384+
raise RPCError(Faults.NOT_RUNNING)
385+
386+
msg = process.stop()
387+
if msg is not None:
388+
raise RPCError(Faults.FAILED, msg)
389+
390+
# We'll try to reap any killed child. FWIW, reap calls waitpid, and
391+
# then, if waitpid returns a pid, calls finish() on the process with
392+
# that pid, which drains any I/O from the process' dispatchers and
393+
# changes the process' state. I chose to call reap without once=True
394+
# because we don't really care if we reap more than one child. Even if
395+
# we only reap one child. we may not even be reaping the child that we
396+
# just stopped (this is all async, and process.stop() may not work, and
397+
# we'll need to wait for SIGKILL during process.transition() as the
398+
# result of normal select looping).
399+
400+
self.supervisord.reap()
401+
402+
if wait and process.get_state() not in STOPPED_STATES:
403+
404+
def onwait():
405+
# process will eventually enter a stopped state by
406+
# virtue of the supervisord.reap() method being called
407+
# during normal operations
408+
self.supervisord.options.logger.info(
409+
'waiting for %s to stop' % process.config.name
410+
)
411+
if process.get_state() not in STOPPED_STATES:
389412
return NOT_DONE_YET
390-
else:
391-
return True
392-
393-
if process.get_state() not in (ProcessStates.STOPPED,
394-
ProcessStates.EXITED):
395-
return NOT_DONE_YET
396-
else:
397413
return True
398414

399-
killit.delay = 0.2
400-
killit.rpcinterface = self
401-
return killit # deferred
415+
onwait.delay = 0
416+
onwait.rpcinterface = self
417+
return onwait # deferred
418+
419+
return True
402420

403421
def stopProcessGroup(self, name, wait=True):
404422
""" Stop all processes in the process group named 'name'
@@ -794,46 +812,55 @@ def allfunc(
794812
callbacks=callbacks, # used only to fool scoping, never passed by caller
795813
results=results, # used only to fool scoping, never passed by caller
796814
):
815+
797816
if not callbacks:
798817

799818
for group, process in processes:
800819
name = make_namespec(group.config.name, process.config.name)
801820
if predicate(process):
802821
try:
803822
callback = func(name, **extra_kwargs)
804-
callbacks.append((group, process, callback))
805823
except RPCError, e:
806824
results.append({'name':process.config.name,
807825
'group':group.config.name,
808826
'status':e.code,
809827
'description':e.text})
810828
continue
829+
if isinstance(callback, types.FunctionType):
830+
callbacks.append((group, process, callback))
831+
else:
832+
results.append(
833+
{'name':process.config.name,
834+
'group':group.config.name,
835+
'status':Faults.SUCCESS,
836+
'description':'OK'}
837+
)
811838

812839
if not callbacks:
813840
return results
814841

815-
group, process, callback = callbacks.pop(0)
842+
for struct in callbacks[:]:
816843

817-
try:
818-
value = callback()
819-
except RPCError, e:
820-
results.append(
821-
{'name':process.config.name,
822-
'group':group.config.name,
823-
'status':e.code,
824-
'description':e.text})
825-
return NOT_DONE_YET
844+
group, process, cb = struct
826845

827-
if value is NOT_DONE_YET:
828-
# push it back into the queue; it will finish eventually
829-
callbacks.append((group, process, callback))
830-
else:
831-
results.append(
832-
{'name':process.config.name,
833-
'group':group.config.name,
834-
'status':Faults.SUCCESS,
835-
'description':'OK'}
836-
)
846+
try:
847+
value = cb()
848+
except RPCError, e:
849+
results.append(
850+
{'name':process.config.name,
851+
'group':group.config.name,
852+
'status':e.code,
853+
'description':e.text})
854+
value = None
855+
856+
if value is not NOT_DONE_YET:
857+
results.append(
858+
{'name':process.config.name,
859+
'group':group.config.name,
860+
'status':Faults.SUCCESS,
861+
'description':'OK'}
862+
)
863+
callbacks.remove(struct)
837864

838865
if callbacks:
839866
return NOT_DONE_YET

supervisor/tests/base.py

+3
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,9 @@ def set_procattr(self, process_name, attr_name, val, group_name=None):
983983
process = self.process_groups[group_name].processes[process_name]
984984
setattr(process, attr_name, val)
985985

986+
def reap(self):
987+
self.reaped = True
988+
986989
class DummyDispatcher:
987990
write_event_handled = False
988991
read_event_handled = False

supervisor/tests/test_rpcinterfaces.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -315,9 +315,10 @@ def test_startProcess_already_started(self):
315315
supervisord = PopulatedDummySupervisor(options, 'foo', pconfig)
316316
supervisord.set_procattr('foo', 'pid', 10)
317317
interface = self._makeOne(supervisord)
318-
callback = interface.startProcess('foo')
319-
self._assertRPCError(xmlrpc.Faults.ALREADY_STARTED,
320-
callback)
318+
self._assertRPCError(
319+
xmlrpc.Faults.ALREADY_STARTED,
320+
interface.startProcess, 'foo'
321+
)
321322

322323
def test_startProcess_bad_group_name(self):
323324
options = DummyOptions()
@@ -371,26 +372,25 @@ def test_startProcess_spawnerr(self):
371372
process = supervisord.process_groups['foo'].processes['foo']
372373
process.spawnerr = 'abc'
373374
interface = self._makeOne(supervisord)
374-
callback = interface.startProcess('foo')
375-
self._assertRPCError(xmlrpc.Faults.SPAWN_ERROR, callback)
375+
self._assertRPCError(
376+
xmlrpc.Faults.SPAWN_ERROR,
377+
interface.startProcess,
378+
'foo'
379+
)
376380

377381
def test_startProcess(self):
378-
from supervisor import http
379382
options = DummyOptions()
380383
pconfig = DummyPConfig(options, 'foo', __file__, autostart=False,
381384
startsecs=.01)
382385
from supervisor.process import ProcessStates
383386
supervisord = PopulatedDummySupervisor(options, 'foo', pconfig)
384387
supervisord.set_procattr('foo', 'state', ProcessStates.STOPPED)
385388
interface = self._makeOne(supervisord)
386-
callback = interface.startProcess('foo')
387-
self.assertEqual(callback(), http.NOT_DONE_YET)
389+
result = interface.startProcess('foo')
388390
process = supervisord.process_groups['foo'].processes['foo']
389391
self.assertEqual(process.spawned, True)
390392
self.assertEqual(interface.update_text, 'startProcess')
391393
process.state = ProcessStates.RUNNING
392-
time.sleep(.02)
393-
result = callback()
394394
self.assertEqual(result, True)
395395

396396
def test_startProcess_nowait(self):

0 commit comments

Comments
 (0)