1
1
import logging
2
+ from pdb import Pdb
2
3
import sys
3
4
import time
4
5
from pathlib import Path
10
11
11
12
logger = logging .getLogger (__name__ )
12
13
13
- current_dir = Path (sys .argv [0 ] if __name__ == "__main__" else __file__ ).resolve ().parent
14
+ current_dir = Path (sys .argv [0 ] if __name__ == "__main__" else __file__ ).resolve ().parent
14
15
15
16
WAIT_TIME_SECS = 20
16
17
RETRY_COUNT = 7
17
- MAX_WAIT_TIME = 240
18
+ MAX_WAIT_TIME = 240
18
19
19
20
# https://docs.docker.com/engine/swarm/how-swarm-mode-works/swarm-task-states/
20
- pre_states = [
21
- "NEW" ,
22
- "PENDING" ,
23
- "ASSIGNED" ,
24
- "PREPARING" ,
25
- "STARTING"
26
- ]
21
+ pre_states = ["NEW" , "PENDING" , "ASSIGNED" , "PREPARING" , "STARTING" ]
27
22
28
23
failed_states = [
29
24
"COMPLETE" ,
32
27
"REJECTED" ,
33
28
"ORPHANED" ,
34
29
"REMOVE" ,
35
- "CREATED"
30
+ "CREATED" ,
36
31
]
37
32
# UTILS --------------------------------
38
33
34
+
39
35
def get_tasks_summary (tasks ):
40
36
msg = ""
41
37
for t in tasks :
42
- t ["Status" ].setdefault ("Err" , '' )
38
+ t ["Status" ].setdefault ("Err" , "" )
43
39
msg += "- task ID:{ID}, STATE: {Status[State]}, ERROR: '{Status[Err]}' \n " .format (
44
- ** t )
40
+ ** t
41
+ )
45
42
return msg
46
43
47
44
48
45
def get_failed_tasks_logs (service , docker_client ):
49
46
failed_logs = ""
50
47
for t in service .tasks ():
51
- if t [' Status' ][ ' State' ].upper () in failed_states :
52
- cid = t [' Status' ][ ' ContainerStatus' ][ ' ContainerID' ]
48
+ if t [" Status" ][ " State" ].upper () in failed_states :
49
+ cid = t [" Status" ][ " ContainerStatus" ][ " ContainerID" ]
53
50
failed_logs += "{2} {0} - {1} BEGIN {2}\n " .format (
54
- service .name , t ['ID' ], "=" * 10 )
51
+ service .name , t ["ID" ], "=" * 10
52
+ )
55
53
if cid :
56
54
container = docker_client .containers .get (cid )
57
- failed_logs += container .logs ().decode (' utf-8' )
55
+ failed_logs += container .logs ().decode (" utf-8" )
58
56
else :
59
57
failed_logs += " log unavailable. container does not exists\n "
60
58
failed_logs += "{2} {0} - {1} END {2}\n " .format (
61
- service .name , t ['ID' ], "=" * 10 )
59
+ service .name , t ["ID" ], "=" * 10
60
+ )
62
61
63
62
return failed_logs
63
+
64
+
64
65
# --------------------------------------------------------------------------------
65
66
67
+
66
68
def osparc_simcore_root_dir () -> Path :
67
69
WILDCARD = "services/web/server"
68
70
@@ -81,46 +83,66 @@ def osparc_simcore_root_dir() -> Path:
81
83
def core_docker_compose_file () -> Path :
82
84
return osparc_simcore_root_dir () / ".stack-simcore-version.yml"
83
85
86
+
84
87
def core_services () -> List [str ]:
85
88
with core_docker_compose_file ().open () as fp :
86
89
dc_specs = yaml .safe_load (fp )
87
90
return [x for x in dc_specs ["services" ].keys ()]
88
91
92
+
89
93
def ops_docker_compose_file () -> Path :
90
94
return osparc_simcore_root_dir () / ".stack-ops.yml"
91
95
96
+
92
97
def ops_services () -> List [str ]:
93
98
with ops_docker_compose_file ().open () as fp :
94
99
dc_specs = yaml .safe_load (fp )
95
100
return [x for x in dc_specs ["services" ].keys ()]
96
101
97
- def wait_for_services () -> bool :
102
+
103
+ def wait_for_services () -> None :
98
104
# get all services
99
105
services = core_services () + ops_services ()
100
106
101
107
client = docker .from_env ()
102
- running_services = [x for x in client .services .list () if x .name .split ("_" )[1 ] in services ]
108
+ running_services = [
109
+ x for x in client .services .list () if x .name .split ("_" )[- 1 ] in services
110
+ ]
111
+
103
112
# check all services are in
104
113
assert len (running_services ), "no services started!"
105
- assert len (services ) == len (running_services ), "Some services are missing"
114
+ assert len (services ) == len (
115
+ running_services
116
+ ), f"Some services are missing:\n expected: { services } \n got: { running_services } "
106
117
# now check they are in running mode
107
118
for service in running_services :
119
+ task = None
108
120
for n in range (RETRY_COUNT ):
109
- task = service .tasks ()[0 ]
110
- if task ['Status' ]['State' ].upper () in pre_states :
111
- print ("Waiting [{}/{}] for {}...\n {}" .format (n , RETRY_COUNT , service .name , get_tasks_summary (service .tasks ())))
121
+ # get last updated task
122
+ sorted_tasks = sorted (service .tasks (), key = lambda task : task ["UpdatedAt" ])
123
+ task = sorted_tasks [- 1 ]
124
+
125
+ if task ["Status" ]["State" ].upper () in pre_states :
126
+ print (
127
+ "Waiting [{}/{}] for {}...\n {}" .format (
128
+ n , RETRY_COUNT , service .name , get_tasks_summary (service .tasks ())
129
+ )
130
+ )
112
131
time .sleep (WAIT_TIME_SECS )
113
- elif task ['Status' ]['State' ].upper () in failed_states :
114
- print (f"Waiting [{ n } /{ RETRY_COUNT } ] Service { service .name } failed once...\n { get_tasks_summary (service .tasks ())} " )
132
+ elif task ["Status" ]["State" ].upper () in failed_states :
133
+ print (
134
+ f"Waiting [{ n } /{ RETRY_COUNT } ] Service { service .name } failed once...\n { get_tasks_summary (service .tasks ())} "
135
+ )
115
136
time .sleep (WAIT_TIME_SECS )
116
137
else :
117
138
break
118
- assert task ['Status' ]['State' ].upper () == "RUNNING" ,\
119
- "Expected running, got \n {}\n {}" .format (
120
- pformat (task ),
121
- get_tasks_summary (service .tasks ()))
122
- # get_failed_tasks_logs(service, client))
123
-
139
+ assert task
140
+ assert (
141
+ task ["Status" ]["State" ].upper () == "RUNNING"
142
+ ), "Expected running, got \n {}\n {}" .format (
143
+ pformat (task ), get_tasks_summary (service .tasks ())
144
+ )
145
+ # get_failed_tasks_logs(service, client))
124
146
125
147
126
148
if __name__ == "__main__" :
0 commit comments