Skip to content

Commit 5bb2b76

Browse files
committed
workbench: distributed nomad clusters
1 parent e7d85bb commit 5bb2b76

File tree

1 file changed

+99
-105
lines changed

1 file changed

+99
-105
lines changed

Diff for: nix/workbench/backend/nomad-job.nix

+99-105
Original file line numberDiff line numberDiff line change
@@ -192,101 +192,96 @@ let
192192
# on the same client (host). All tasks within a group will be
193193
# placed on the same host.
194194
# https://developer.hashicorp.com/nomad/docs/job-specification/group
195-
group."workbench-cluster-job-group" = groupDefaults // {
196-
197-
# Specifies the number of instances that should be running under for this
198-
# group. This value must be non-negative. This defaults to the min value
199-
# specified in the scaling block, if present; otherwise, this defaults to
200-
# 1
201-
count = 1;
202-
203-
# The reschedule stanza specifies the group's rescheduling strategy. If
204-
# specified at the job level, the configuration will apply to all groups
205-
# within the job. If the reschedule stanza is present on both the job and
206-
# the group, they are merged with the group stanza taking the highest
207-
# precedence and then the job.
208-
# To disable rescheduling, set the attempts parameter to zero and unlimited
209-
# to false.
210-
reschedule = {
211-
# Specifies the number of reschedule attempts allowed in the configured
212-
# interval. Defaults vary by job type.
213-
attempts = 0;
214-
# Enables unlimited reschedule attempts. If this is set to true the
215-
# attempts and interval fields are not used.
216-
unlimited = false;
217-
};
218-
219-
# Specifies the restart policy for all tasks in this group. If omitted, a
220-
# default policy exists for each job type, which can be found in the restart
221-
# stanza documentation.
222-
restart = {
223-
attempts = 0;
224-
mode = "fail";
225-
};
226-
227-
# Specifies a key-value map that annotates with user-defined metadata.
228-
# Used as a "template" to generate the envars passed to the container.
229-
# This makes it easier to change them using `jq` inside the workbench!
230-
meta = null;
231-
232-
# The network stanza specifies the networking requirements for the task
233-
# group, including the network mode and port allocations.
234-
# https://developer.hashicorp.com/nomad/docs/job-specification/network
235-
# TODO: Use "bridge" mode and port allocations ?
236-
network = {
237-
# FIXME: "bridge" right now is not working. Client error is:
238-
# {"@level":"error","@message":"prerun failed","@module":"client.alloc_runner","@timestamp":"2023-02-01T13:52:24.948596Z","alloc_id":"03faca46-0fdc-4ba0-01e9-50f67c088f99","error":"pre-run hook \"network\" failed: failed to create network for alloc: mkdir /var/run/netns: permission denied"}
239-
# {"@level":"info","@message":"waiting for task to exit","@module":"client.alloc_runner","@timestamp":"2023-02-01T13:52:24.983021Z","alloc_id":"03faca46-0fdc-4ba0-01e9-50f67c088f99","task":"tracer"}
240-
# {"@level":"info","@message":"marking allocation for GC","@module":"client.gc","@timestamp":"2023-02-01T13:52:24.983055Z","alloc_id":"03faca46-0fdc-4ba0-01e9-50f67c088f99"}
241-
# {"@level":"info","@message":"node registration complete","@module":"client","@timestamp":"2023-02-01T13:52:27.489795Z"}
242-
mode = "host";
243-
port = lib.listToAttrs (
244-
# If not oneTracerPerNode, an individual tracer task is needed (instead
245-
# of running a tracer alongside a node with supervisor)
246-
lib.optionals (profileNix.value.node.tracer && !oneTracerPerNode) [
247-
# TODO: Leave empty or invent one?
248-
{name = "tracer"; value = {};}
249-
]
250-
++
251-
(lib.mapAttrsToList
252-
(_: nodeSpec: {
253-
# All names of the form node#, without the "-", instead of node-#
254-
name = "node" + (toString nodeSpec.i);
255-
value =
256-
# The "podman" driver accepts "Mapped Ports", but not the "exec" driver
257-
# https://developer.hashicorp.com/nomad/docs/job-specification/network#mapped-ports
258-
# If you use a network in bridge mode you can use "Mapped Ports"
259-
# https://developer.hashicorp.com/nomad/docs/job-specification/network#bridge-mode
260-
if execTaskDriver
261-
then {
262-
to = ''${toString nodeSpec.port}'';
263-
static = ''${toString nodeSpec.port}'';
264-
}
265-
else {
266-
to = ''${toString nodeSpec.port}'';
267-
};
268-
})
269-
(profileNix.node-specs.value)
270-
)
271-
);
272-
};
273-
274-
# TODO:
275-
# Specifies the volumes that are required by tasks within the group.
276-
# volume
277-
278-
# The Consul namespace in which group and task-level services within the
279-
# group will be registered. Use of template to access Consul KV will read
280-
# from the specified Consul namespace. Specifying namespace takes
281-
# precedence over the -consul-namespace command line argument in job run.
282-
# namespace = "";
283-
# Not available as the documentations says: Extraneous JSON object property; No argument or block type is named "namespace".
284-
285-
# The task stanza creates an individual unit of work, such as a Docker
286-
# container, web application, or batch processing.
287-
# https://developer.hashicorp.com/nomad/docs/job-specification/task
288-
task = let
289-
valueF = (taskName: serviceName: portName: nodeSpec: (taskDefaults // {
195+
group = let
196+
valueF = (taskName: serviceName: portName: portNum: nodeSpec: (groupDefaults // {
197+
198+
# Specifies the number of instances that should be running under for
199+
# this group. This value must be non-negative. This defaults to the min
200+
# value specified in the scaling block, if present; otherwise, this
201+
# defaults to 1
202+
count = 1;
203+
204+
# The reschedule stanza specifies the group's rescheduling strategy. If
205+
# specified at the job level, the configuration will apply to all groups
206+
# within the job. If the reschedule stanza is present on both the job
207+
# and the group, they are merged with the group stanza taking the highest
208+
# precedence and then the job.
209+
# To disable rescheduling, set the attempts parameter to zero and
210+
# unlimited to false.
211+
reschedule = {
212+
# Specifies the number of reschedule attempts allowed in the
213+
# configured interval. Defaults vary by job type.
214+
attempts = 0;
215+
# Enables unlimited reschedule attempts. If this is set to true the
216+
# attempts and interval fields are not used.
217+
unlimited = false;
218+
};
219+
220+
# Specifies the restart policy for all tasks in this group. If omitted,
221+
# a default policy exists for each job type, which can be found in the
222+
# restart stanza documentation.
223+
restart = {
224+
attempts = 0;
225+
mode = "fail";
226+
};
227+
228+
# Specifies a key-value map that annotates with user-defined metadata.
229+
# Used as a "template" to generate the envars passed to the container.
230+
# This makes it easier to change them using `jq` inside the workbench!
231+
meta = null;
232+
233+
# The network stanza specifies the networking requirements for the task
234+
# group, including the network mode and port allocations.
235+
# https://developer.hashicorp.com/nomad/docs/job-specification/network
236+
# TODO: Use "bridge" mode and port allocations ?
237+
network = {
238+
# FIXME: "bridge" right now is not working. Client error is:
239+
# {"@level":"error","@message":"prerun failed","@module":"client.alloc_runner","@timestamp":"2023-02-01T13:52:24.948596Z","alloc_id":"03faca46-0fdc-4ba0-01e9-50f67c088f99","error":"pre-run hook \"network\" failed: failed to create network for alloc: mkdir /var/run/netns: permission denied"}
240+
# {"@level":"info","@message":"waiting for task to exit","@module":"client.alloc_runner","@timestamp":"2023-02-01T13:52:24.983021Z","alloc_id":"03faca46-0fdc-4ba0-01e9-50f67c088f99","task":"tracer"}
241+
# {"@level":"info","@message":"marking allocation for GC","@module":"client.gc","@timestamp":"2023-02-01T13:52:24.983055Z","alloc_id":"03faca46-0fdc-4ba0-01e9-50f67c088f99"}
242+
# {"@level":"info","@message":"node registration complete","@module":"client","@timestamp":"2023-02-01T13:52:27.489795Z"}
243+
mode = "host";
244+
port = lib.listToAttrs (
245+
# If not oneTracerPerNode, an individual tracer task is needed (instead
246+
# of running a tracer alongside a node with supervisor)
247+
lib.optionals (profileNix.value.node.tracer && !oneTracerPerNode) [
248+
# TODO: Leave empty or invent one?
249+
{name = "tracer"; value = {};}
250+
]
251+
++
252+
[
253+
{
254+
# All names of the form node#, without the "-", instead of node-#
255+
name = portName;
256+
value =
257+
# The "podman" driver accepts "Mapped Ports", but not the "exec" driver
258+
# https://developer.hashicorp.com/nomad/docs/job-specification/network#mapped-ports
259+
# If you use a network in bridge mode you can use "Mapped Ports"
260+
# https://developer.hashicorp.com/nomad/docs/job-specification/network#bridge-mode
261+
if execTaskDriver
262+
then {
263+
to = ''${toString portNum}'';
264+
static = ''${toString portNum}'';
265+
}
266+
else {
267+
to = ''${toString portNum}'';
268+
};
269+
}
270+
]
271+
);
272+
};
273+
274+
# The Consul namespace in which group and task-level services within the
275+
# group will be registered. Use of template to access Consul KV will read
276+
# from the specified Consul namespace. Specifying namespace takes
277+
# precedence over the -consul-namespace command line argument in job run.
278+
# namespace = "";
279+
# Not available as the documentations says: Extraneous JSON object property; No argument or block type is named "namespace".
280+
281+
# The task stanza creates an individual unit of work, such as a Docker
282+
# container, web application, or batch processing.
283+
# https://developer.hashicorp.com/nomad/docs/job-specification/task
284+
task.${taskName} = taskDefaults // {
290285

291286
# The meta stanza allows for user-defined arbitrary key-value pairs.
292287
# It is possible to use the meta stanza at the job, group, or task
@@ -572,9 +567,7 @@ let
572567
;
573568

574569
};
575-
576-
}
577-
else {
570+
} else {
578571
driver = "podman";
579572

580573
# Specifies the driver configuration, which is passed directly to the
@@ -636,9 +629,8 @@ let
636629

637630
};
638631
}
639-
)
640-
)
641-
);
632+
);
633+
}));
642634
in lib.listToAttrs (
643635
# If not oneTracerPerNode, an individual tracer task is needed (instead
644636
# of running a tracer alongside a node with supervisor)
@@ -649,6 +641,7 @@ let
649641
"tracer"
650642
"perf-tracer"
651643
"tracer"
644+
0
652645
{};
653646
}
654647
]
@@ -664,9 +657,10 @@ let
664657
*/
665658
name = nodeSpec.name;
666659
value = valueF
667-
nodeSpec.name
668-
("perf-node-" + (toString nodeSpec.i))
669-
("node" + (toString nodeSpec.i))
660+
nodeSpec.name # taskName
661+
("perf-node-" + (toString nodeSpec.i)) # serviceName
662+
("node" + (toString nodeSpec.i)) # portName
663+
nodeSpec.port # portNum
670664
nodeSpec;
671665
})
672666
(profileNix.node-specs.value)
@@ -675,7 +669,7 @@ let
675669

676670
};
677671

678-
};};
672+
};
679673

680674
jobDefaults = {
681675
########################################

0 commit comments

Comments
 (0)