Skip to content

Commit f8f7497

Browse files
authored
Merge pull request #13 from malaval/plugin-v2
Correct bug when node ID > 9
2 parents 238249f + 67d8e37 commit f8f7497

File tree

5 files changed

+13
-6
lines changed

5 files changed

+13
-6
lines changed

.DS_Store

6 KB
Binary file not shown.

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ This JSON file specifies the groups of nodes and associated partitions that Slur
134134
* `Partitions`: List of partitions
135135
* `PartitionName`: Name of the partition. Must match the pattern `^[a-zA-Z0-9_]+$`.
136136
* `NodeGroups`: List of node groups for this partition. A node group is a set of nodes that share the same specifications.
137-
* `NodeGroupName`: Name of the node group. Must match the pattern `^[a-zA-Z0-9_]+$`.
137+
* `NodeGroupName`: Name of the node group. Must match the pattern `^[a-zA-Z0-9_]+[a-zA-Z_]$`.
138138
* `MaxNodes`: Maximum number of nodes that Slurm can launch for this node group. For each node group, `generate_conf.py` will issue a line with `NodeName=[partition_name]-[nodegroup_name][0-(max_nodes-1)]`
139139
* `Region`: Name of the AWS region where to launch EC2 instances for this node group. Example: `us-east-1`.
140140
* [OPTIONAL] `ProfileName`: Name of the AWS CLI profile to use to authenticate AWS requests. If you don't specify a profile name, it uses the default profile name of EC2 metadata credentials.

common.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,14 @@ def validate_partitions(data):
8282

8383
for i_partition, partition in enumerate(data['Partitions']):
8484
assert 'PartitionName' in partition, 'Missing "PartitionName" in root["Partitions"][%s]' %i_partition
85-
assert re.match('^[a-zA-Z0-9_]+$', partition['PartitionName']), 'root["Partitions"][%s]["PartitionName"] does not match ^[a-zA-Z0-9-]+$' %i_partition
85+
assert re.match('^[a-zA-Z0-9_]+$', partition['PartitionName']), 'root["Partitions"][%s]["PartitionName"] does not match ^[a-zA-Z0-9_]+$' %i_partition
8686

8787
assert 'NodeGroups' in partition, 'Missing "NodeGroups" in root["Partitions"][%s]' %i_partition
8888
assert isinstance(partition['NodeGroups'], list), 'root["Partitions"][%s]["NodeGroups"] is not an array' %i_partition
8989

9090
for i_nodegroup, nodegroup in enumerate(partition['NodeGroups']):
9191
assert 'NodeGroupName' in nodegroup, 'Missing "NodeGroupName" in root["Partitions"][%s]["NodeGroups"][%s]' %(i_partition, i_nodegroup)
92-
assert re.match('^[a-zA-Z0-9_]+$', nodegroup['NodeGroupName']), 'root["Partitions"][%s]["NodeGroups"][%s]["NodeGroupName"] does not match ^[a-zA-Z0-9-]+$' %(i_partition, i_nodegroup)
92+
assert re.match('^[a-zA-Z0-9_]+[a-zA-Z_]$', nodegroup['NodeGroupName']), 'root["Partitions"][%s]["NodeGroups"][%s]["NodeGroupName"] does not match ^[a-zA-Z0-9_]+[a-zA-Z_]$' %(i_partition, i_nodegroup)
9393

9494
assert 'MaxNodes' in nodegroup, 'Missing "MaxNodes" in root["Partitions"][%s]["NodeGroups"][%s]' %(i_partition, i_nodegroup)
9595
assert isinstance(nodegroup['MaxNodes'], int), 'root["Partitions"][%s]["NodeGroups"][%s]["MaxNodes"] is not a number' %(i_partition, i_nodegroup)
@@ -238,7 +238,7 @@ def parse_node_names(node_names):
238238
for node_name in node_names:
239239

240240
# For each node: extract partition name, node group name and node id
241-
pattern = '^([a-zA-Z0-9_]+)-([a-zA-Z0-9_]+)([0-9]+)$'
241+
pattern = '^([a-zA-Z0-9_]+)-([a-zA-Z0-9_]+[a-zA-Z_])([0-9]+)$'
242242
match = re.match(pattern, node_name)
243243
if match:
244244
partition_name, nodegroup_name, node_id = match.groups()

resume.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,14 @@
3030

3131
nb_nodes_to_resume = len(node_ids)
3232
nodegroup = common.get_partition_nodegroup(partition_name, nodegroup_name)
33-
client = common.get_ec2_client(nodegroup)
3433

3534
# Ignore if the partition and the node group are not in partitions.json
3635
if nodegroup is None:
37-
logger.debug('Skipping partition=%s nodegroup=%s: not in partition.json' %(partition_name, nodegroup_name))
36+
logger.warning('Skipping partition=%s nodegroup=%s: not in partition.json' %(partition_name, nodegroup_name))
3837
continue
3938

39+
client = common.get_ec2_client(nodegroup)
40+
4041
# Create a dict for the EC2 CreateFleet request
4142
request_fleet = {
4243
'LaunchTemplateConfigs': [

suspend.py

+6
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@
2727
for nodegroup_name, node_ids in nodegroups.items():
2828

2929
nodegroup = common.get_partition_nodegroup(partition_name, nodegroup_name)
30+
31+
# Ignore if the partition and the node group are not in partitions.json
32+
if nodegroup is None:
33+
logger.warning('Skipping partition=%s nodegroup=%s: not in partition.json' %(partition_name, nodegroup_name))
34+
continue
35+
3036
client = common.get_ec2_client(nodegroup)
3137

3238
# Retrieve the list of instances to terminate based on the tag Name

0 commit comments

Comments
 (0)