@@ -43,7 +43,7 @@ case "$op" in
43
43
# inside the container I get (from journald):
44
44
# Nov 02 11:44:36 hostname cluster-18f3852f-e067-6394-8159-66a7b8da2ecc[1088457]: Error: Cannot open an HTTP server: socket.error reported -2
45
45
# Nov 02 11:44:36 hostname cluster-18f3852f-e067-6394-8159-66a7b8da2ecc[1088457]: For help, use /nix/store/izqhlj5i1x9ldyn43d02kcy4mafmj3ci-python3.9-supervisor-4.2.4/bin/supervisord -h
46
- setenvjqstr ' supervisord_url' " http ://127.0.0.1:9001 "
46
+ setenvjqstr ' supervisord_url' " unix :///tmp/supervisor.sock "
47
47
# Look up `cluster` OCI image's name and tag (also Nix profile).
48
48
setenvjqstr ' oci_image_name' ${WB_OCI_IMAGE_NAME:- $(cat " $profile_dir /clusterImageName" )}
49
49
setenvjqstr ' oci_image_tag' ${WB_OCI_IMAGE_TAG:- $(cat " $profile_dir /clusterImageTag" )}
@@ -169,6 +169,7 @@ case "$op" in
169
169
# constraints, resource exhaustion, etc), then the exit code will be 2.
170
170
# Any other errors, including client connection issues or internal
171
171
# errors, are indicated by exit code 1.
172
+ # FIXME: Timeout for "Deployment "XXX" in progress..."
172
173
nomad job run -verbose " $dir /nomad/job-cluster.hcl"
173
174
# Assuming that `nomad` placement is enough wait.
174
175
local nomad_alloc_id=$( nomad job allocs -json cluster | jq -r ' .[0].ID' )
@@ -180,9 +181,9 @@ case "$op" in
180
181
local container_supervisord_conf=$( envjqr ' container_supervisord_conf' )
181
182
msg " Supervisor status inside container ..."
182
183
# Print the command used for debugging purposes.
183
- msg " 'nomad alloc --task cluster exec --task cluster \" $nomad_alloc_id \" \" $container_supervisor_nix \" /bin/supervisorctl --serverurl \" $supervisord_url \" --configuration \" $container_supervisord_conf \" status'"
184
+ msg " 'nomad alloc exec --task node-0 \" $nomad_alloc_id \" \" $container_supervisor_nix \" /bin/supervisorctl --serverurl \" $supervisord_url \" --configuration \" $container_supervisord_conf \" status'"
184
185
# Execute the actual command.
185
- nomad alloc exec --task cluster " $nomad_alloc_id " " $container_supervisor_nix " /bin/supervisorctl --serverurl " $supervisord_url " --configuration " $container_supervisord_conf " status || true
186
+ nomad alloc exec --task node-0 " $nomad_alloc_id " " $container_supervisor_nix " /bin/supervisorctl --serverurl " $supervisord_url " --configuration " $container_supervisord_conf " status || true
186
187
;;
187
188
188
189
describe-run )
@@ -198,7 +199,7 @@ case "$op" in
198
199
local dir=${1:? $usage } ; shift
199
200
local service=${1:? $usage } ; shift
200
201
201
- backend_nomad nomad-alloc-exec-supervisorctl " $dir " cluster start " $service "
202
+ backend_nomad nomad-alloc-exec-supervisorctl " $dir " " $service " start " $service "
202
203
;;
203
204
204
205
# Nomad-specific
@@ -207,7 +208,7 @@ case "$op" in
207
208
local dir=${1:? $usage } ; shift
208
209
local service=${1:? $usage } ; shift
209
210
210
- backend_nomad nomad-alloc-exec-supervisorctl " $dir " cluster stop " $service "
211
+ backend_nomad nomad-alloc-exec-supervisorctl " $dir " " $service " stop " $service "
211
212
;;
212
213
213
214
# Nomad-specific
@@ -216,7 +217,7 @@ case "$op" in
216
217
local dir=${1:? $usage } ; shift
217
218
local service=${1:? $usage } ; shift
218
219
219
- backend_nomad nomad-alloc-exec-supervisorctl " $dir " cluster status " $service " > /dev/null && true
220
+ backend_nomad nomad-alloc-exec-supervisorctl " $dir " " $service " status " $service " > /dev/null && true
220
221
;;
221
222
222
223
# Nomad-specific
@@ -388,11 +389,12 @@ case "$op" in
388
389
local usage=" USAGE: wb nomad $op RUN-DIR"
389
390
local dir=${1:? $usage } ; shift
390
391
391
- local nomad_alloc_id=$( envjqr ' nomad_alloc_id' )
392
- local supervisord_url=$( envjqr ' supervisord_url' )
393
- local container_supervisor_nix=$( envjqr ' container_supervisor_nix' )
394
- local container_supervisord_conf=$( envjqr ' container_supervisord_conf' )
395
- nomad alloc exec --task cluster " $nomad_alloc_id " " $container_supervisor_nix " /bin/supervisorctl --serverurl " $supervisord_url " --configuration " $container_supervisord_conf " stop all || true > /dev/null
392
+ backend_nomad nomad-alloc-exec-supervisorctl " $dir " generator stop all
393
+ backend_nomad nomad-alloc-exec-supervisorctl " $dir " tracer stop all
394
+ for node in $( jq_tolist ' keys' " $dir " /node-specs.json)
395
+ do
396
+ backend_nomad nomad-alloc-exec-supervisorctl " $dir " " $node " stop all
397
+ done
396
398
397
399
nomad job stop -global -no-shutdown-delay -purge -yes cluster
398
400
local nomad_pid=$( envjqr ' nomad_pid' )
@@ -477,6 +479,10 @@ nomad_create_folders_and_config() {
477
479
volumes {
478
480
enabled = true
479
481
}
482
+ recover_stopped = false
483
+ gc {
484
+ container = false
485
+ }
480
486
}
481
487
}
482
488
EOF
@@ -583,16 +589,30 @@ job "cluster" {
583
589
mode = "host"
584
590
}
585
591
EOF
586
- local task_stanza_name=" cluster"
587
- local task_stanza_file=" $dir /nomad/job-cluster-task-$task_stanza_name .hcl"
588
- nomad_create_task_stanza " $task_stanza_file " " $task_stanza_name " " $podman_volumes "
589
- cat " $task_stanza_file " >> " $dir /nomad/job-cluster.hcl"
590
-
591
- local task_stanza_name=" cluster2"
592
- local task_stanza_file=" $dir /nomad/job-cluster-task-$task_stanza_name .hcl"
593
- nomad_create_task_stanza " $task_stanza_file " " $task_stanza_name " " $podman_volumes "
592
+ # Cluster
593
+ # local task_stanza_name_c="cluster"
594
+ # local task_stanza_file_c="$dir/nomad/job-cluster-task-$task_stanza_name_c.hcl"
595
+ # nomad_create_task_stanza "$task_stanza_file_c" "$task_stanza_name_c" "$podman_volumes"
596
+ # cat "$task_stanza_file_c" >> "$dir/nomad/job-cluster.hcl"
597
+ # Nodes
598
+ for node in $( jq_tolist ' keys' " $dir " /node-specs.json)
599
+ do
600
+ local task_stanza_name=" $node "
601
+ local task_stanza_file=" $dir /nomad/job-cluster-task-$task_stanza_name .hcl"
602
+ nomad_create_task_stanza " $task_stanza_file " " $task_stanza_name " " $podman_volumes "
594
603
cat " $task_stanza_file " >> " $dir /nomad/job-cluster.hcl"
595
-
604
+ done
605
+ # Tracer
606
+ local task_stanza_name_t=" tracer"
607
+ local task_stanza_file_t=" $dir /nomad/job-cluster-task-$task_stanza_name_t .hcl"
608
+ nomad_create_task_stanza " $task_stanza_file_t " " $task_stanza_name_t " " $podman_volumes "
609
+ cat " $task_stanza_file_t " >> " $dir /nomad/job-cluster.hcl"
610
+ # Generator
611
+ local task_stanza_name_g=" generator"
612
+ local task_stanza_file_g=" $dir /nomad/job-cluster-task-$task_stanza_name_g .hcl"
613
+ nomad_create_task_stanza " $task_stanza_file_g " " $task_stanza_name_g " " $podman_volumes "
614
+ cat " $task_stanza_file_g " >> " $dir /nomad/job-cluster.hcl"
615
+ # The end.
596
616
cat >> " $dir /nomad/job-cluster.hcl" << - EOF
597
617
}
598
618
}
@@ -622,12 +642,13 @@ task "$name" {
622
642
#logging = {
623
643
# driver = "nomad"
624
644
#}
645
+ hostname = "$name "
646
+ network_mode = "host"
625
647
tmpfs = [
626
648
"/tmp"
627
649
]
628
650
volumes = ${podman_volumes}
629
651
working_dir = "${container_workdir} "
630
- hostname = "$name "
631
652
}
632
653
env = {
633
654
SUPERVISOR_NIX = "${container_supervisor_nix} "
0 commit comments