Skip to content

Commit a316730

Browse files
committed
workbench: make nomad backend use multiple containers
1 parent 8a9fde8 commit a316730

File tree

4 files changed

+59
-24
lines changed

4 files changed

+59
-24
lines changed

nix/workbench/backend/nomad.nix

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ let
4444
pkgs lib stateDir
4545
basePort
4646
extraBackendConfig;
47+
unixHttpServerPort = "/tmp/supervisor.sock";
4748
};
4849
nomadConf =
4950
import ./nomad-conf.nix

nix/workbench/backend/nomad.sh

+42-21
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ case "$op" in
4343
# inside the container I get (from journald):
4444
# Nov 02 11:44:36 hostname cluster-18f3852f-e067-6394-8159-66a7b8da2ecc[1088457]: Error: Cannot open an HTTP server: socket.error reported -2
4545
# Nov 02 11:44:36 hostname cluster-18f3852f-e067-6394-8159-66a7b8da2ecc[1088457]: For help, use /nix/store/izqhlj5i1x9ldyn43d02kcy4mafmj3ci-python3.9-supervisor-4.2.4/bin/supervisord -h
46-
setenvjqstr 'supervisord_url' "http://127.0.0.1:9001"
46+
setenvjqstr 'supervisord_url' "unix:///tmp/supervisor.sock"
4747
# Look up `cluster` OCI image's name and tag (also Nix profile).
4848
setenvjqstr 'oci_image_name' ${WB_OCI_IMAGE_NAME:-$(cat "$profile_dir/clusterImageName")}
4949
setenvjqstr 'oci_image_tag' ${WB_OCI_IMAGE_TAG:-$(cat "$profile_dir/clusterImageTag")}
@@ -169,6 +169,7 @@ case "$op" in
169169
# constraints, resource exhaustion, etc), then the exit code will be 2.
170170
# Any other errors, including client connection issues or internal
171171
# errors, are indicated by exit code 1.
172+
# FIXME: Timeout for "Deployment "XXX" in progress..."
172173
nomad job run -verbose "$dir/nomad/job-cluster.hcl"
173174
# Assuming that `nomad` placement is enough wait.
174175
local nomad_alloc_id=$(nomad job allocs -json cluster | jq -r '.[0].ID')
@@ -180,9 +181,9 @@ case "$op" in
180181
local container_supervisord_conf=$(envjqr 'container_supervisord_conf')
181182
msg "Supervisor status inside container ..."
182183
# Print the command used for debugging purposes.
183-
msg "'nomad alloc --task cluster exec --task cluster \"$nomad_alloc_id\" \"$container_supervisor_nix\"/bin/supervisorctl --serverurl \"$supervisord_url\" --configuration \"$container_supervisord_conf\" status'"
184+
msg "'nomad alloc exec --task node-0 \"$nomad_alloc_id\" \"$container_supervisor_nix\"/bin/supervisorctl --serverurl \"$supervisord_url\" --configuration \"$container_supervisord_conf\" status'"
184185
# Execute the actual command.
185-
nomad alloc exec --task cluster "$nomad_alloc_id" "$container_supervisor_nix"/bin/supervisorctl --serverurl "$supervisord_url" --configuration "$container_supervisord_conf" status || true
186+
nomad alloc exec --task node-0 "$nomad_alloc_id" "$container_supervisor_nix"/bin/supervisorctl --serverurl "$supervisord_url" --configuration "$container_supervisord_conf" status || true
186187
;;
187188

188189
describe-run )
@@ -198,7 +199,7 @@ case "$op" in
198199
local dir=${1:?$usage}; shift
199200
local service=${1:?$usage}; shift
200201

201-
backend_nomad nomad-alloc-exec-supervisorctl "$dir" cluster start "$service"
202+
backend_nomad nomad-alloc-exec-supervisorctl "$dir" "$service" start "$service"
202203
;;
203204

204205
# Nomad-specific
@@ -207,7 +208,7 @@ case "$op" in
207208
local dir=${1:?$usage}; shift
208209
local service=${1:?$usage}; shift
209210

210-
backend_nomad nomad-alloc-exec-supervisorctl "$dir" cluster stop "$service"
211+
backend_nomad nomad-alloc-exec-supervisorctl "$dir" "$service" stop "$service"
211212
;;
212213

213214
# Nomad-specific
@@ -216,7 +217,7 @@ case "$op" in
216217
local dir=${1:?$usage}; shift
217218
local service=${1:?$usage}; shift
218219

219-
backend_nomad nomad-alloc-exec-supervisorctl "$dir" cluster status "$service" > /dev/null && true
220+
backend_nomad nomad-alloc-exec-supervisorctl "$dir" "$service" status "$service" > /dev/null && true
220221
;;
221222

222223
# Nomad-specific
@@ -388,11 +389,12 @@ case "$op" in
388389
local usage="USAGE: wb nomad $op RUN-DIR"
389390
local dir=${1:?$usage}; shift
390391

391-
local nomad_alloc_id=$(envjqr 'nomad_alloc_id')
392-
local supervisord_url=$(envjqr 'supervisord_url')
393-
local container_supervisor_nix=$(envjqr 'container_supervisor_nix')
394-
local container_supervisord_conf=$(envjqr 'container_supervisord_conf')
395-
nomad alloc exec --task cluster "$nomad_alloc_id" "$container_supervisor_nix"/bin/supervisorctl --serverurl "$supervisord_url" --configuration "$container_supervisord_conf" stop all || true > /dev/null
392+
backend_nomad nomad-alloc-exec-supervisorctl "$dir" generator stop all
393+
backend_nomad nomad-alloc-exec-supervisorctl "$dir" tracer stop all
394+
for node in $(jq_tolist 'keys' "$dir"/node-specs.json)
395+
do
396+
backend_nomad nomad-alloc-exec-supervisorctl "$dir" "$node" stop all
397+
done
396398

397399
nomad job stop -global -no-shutdown-delay -purge -yes cluster
398400
local nomad_pid=$(envjqr 'nomad_pid')
@@ -477,6 +479,10 @@ nomad_create_folders_and_config() {
477479
volumes {
478480
enabled = true
479481
}
482+
recover_stopped = false
483+
gc {
484+
container = false
485+
}
480486
}
481487
}
482488
EOF
@@ -583,16 +589,30 @@ job "cluster" {
583589
mode = "host"
584590
}
585591
EOF
586-
local task_stanza_name="cluster"
587-
local task_stanza_file="$dir/nomad/job-cluster-task-$task_stanza_name.hcl"
588-
nomad_create_task_stanza "$task_stanza_file" "$task_stanza_name" "$podman_volumes"
589-
cat "$task_stanza_file" >> "$dir/nomad/job-cluster.hcl"
590-
591-
local task_stanza_name="cluster2"
592-
local task_stanza_file="$dir/nomad/job-cluster-task-$task_stanza_name.hcl"
593-
nomad_create_task_stanza "$task_stanza_file" "$task_stanza_name" "$podman_volumes"
592+
# Cluster
593+
# local task_stanza_name_c="cluster"
594+
# local task_stanza_file_c="$dir/nomad/job-cluster-task-$task_stanza_name_c.hcl"
595+
# nomad_create_task_stanza "$task_stanza_file_c" "$task_stanza_name_c" "$podman_volumes"
596+
#cat "$task_stanza_file_c" >> "$dir/nomad/job-cluster.hcl"
597+
# Nodes
598+
for node in $(jq_tolist 'keys' "$dir"/node-specs.json)
599+
do
600+
local task_stanza_name="$node"
601+
local task_stanza_file="$dir/nomad/job-cluster-task-$task_stanza_name.hcl"
602+
nomad_create_task_stanza "$task_stanza_file" "$task_stanza_name" "$podman_volumes"
594603
cat "$task_stanza_file" >> "$dir/nomad/job-cluster.hcl"
595-
604+
done
605+
# Tracer
606+
local task_stanza_name_t="tracer"
607+
local task_stanza_file_t="$dir/nomad/job-cluster-task-$task_stanza_name_t.hcl"
608+
nomad_create_task_stanza "$task_stanza_file_t" "$task_stanza_name_t" "$podman_volumes"
609+
cat "$task_stanza_file_t" >> "$dir/nomad/job-cluster.hcl"
610+
# Generator
611+
local task_stanza_name_g="generator"
612+
local task_stanza_file_g="$dir/nomad/job-cluster-task-$task_stanza_name_g.hcl"
613+
nomad_create_task_stanza "$task_stanza_file_g" "$task_stanza_name_g" "$podman_volumes"
614+
cat "$task_stanza_file_g" >> "$dir/nomad/job-cluster.hcl"
615+
# The end.
596616
cat >> "$dir/nomad/job-cluster.hcl" <<- EOF
597617
}
598618
}
@@ -622,12 +642,13 @@ task "$name" {
622642
#logging = {
623643
# driver = "nomad"
624644
#}
645+
hostname = "$name"
646+
network_mode = "host"
625647
tmpfs = [
626648
"/tmp"
627649
]
628650
volumes = ${podman_volumes}
629651
working_dir = "${container_workdir}"
630-
hostname = "$name"
631652
}
632653
env = {
633654
SUPERVISOR_NIX = "${container_supervisor_nix}"

nix/workbench/backend/supervisor-conf.nix

+15-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
, stateDir
44
, basePort
55
, node-services
6+
, unixHttpServerPort ? null
7+
, inetHttpServerPort ? null
68
## Last-moment overrides:
79
, extraBackendConfig
810
}:
@@ -23,14 +25,24 @@ let
2325
strip_ansi = true;
2426
};
2527
supervisorctl = {};
26-
inet_http_server = {
27-
port = "127.0.0.1:9001";
28-
};
2928
"rpcinterface:supervisor" = {
3029
"supervisor.rpcinterface_factory" = "supervisor.rpcinterface:make_main_rpcinterface";
3130
};
3231
}
3332
//
33+
lib.attrsets.optionalAttrs (unixHttpServerPort != null) {
34+
unix_http_server = {
35+
file = unixHttpServerPort;
36+
chmod = "0777";
37+
};
38+
}
39+
//
40+
lib.attrsets.optionalAttrs (inetHttpServerPort != null) {
41+
inet_http_server = {
42+
port = inetHttpServerPort;
43+
};
44+
}
45+
//
3446
listToAttrs
3547
(mapAttrsToList (_: nodeSvcSupervisorProgram) node-services)
3648
//

nix/workbench/backend/supervisor.nix

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ let
5555
pkgs lib stateDir
5656
basePort
5757
extraBackendConfig;
58+
inetHttpServerPort = "127.0.0.1:9001";
5859
};
5960
}
6061
''

0 commit comments

Comments
 (0)