Skip to content

Commit 0c94361

Browse files
author
malaval
authored
Merge pull request #22 from TexasDex/plugin-v2
Adds a PartitionOptions config parameter to allow arbitrary Slurm options
2 parents 6b39f6e + 3ce51d1 commit 0c94361

File tree

3 files changed

+32
-9
lines changed

3 files changed

+32
-9
lines changed

README.md

+23-7
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,11 @@ This JSON file specifies the groups of nodes and associated partitions that Slur
124124
]
125125
},
126126
...
127-
]
127+
],
128+
"PartitionOptions": {
129+
"Option1": "STRING",
130+
"Option2": "STRING"
131+
}
128132
},
129133
...
130134
]
@@ -148,6 +152,7 @@ This JSON file specifies the groups of nodes and associated partitions that Slur
148152
* `Tags`: List of tags applied to the EC2 instances launched for this node group.
149153
* A tag `Name` is automatically added at launch, whose value is the name of the node `[partition_name]-[nodegroup_name]-[id]`. You should not delete or override this tag, because the script `suspend.py` uses it to find which instance is associated with the node to suspend.
150154
* You use the sequence `{ip_address}` in the value of tag, it will be replaced with the IP address. Similarly, `{node_name}` will be replaced with the name of the node, `{hostname}` with the EC2 hostname.
155+
* `PartitionOptions`: List of Slurm configuration attributes for the partition (optional).
151156

152157
Refer to the section **Examples of `partitions.json`** for examples of file content.
153158

@@ -399,7 +404,10 @@ Single `aws` partition with 2 node groups:
399404
}
400405
]
401406
}
402-
]
407+
],
408+
"PartitionOptions": {
409+
"TRESBillingWeights": "cpu=4"
410+
}
403411
}
404412
]
405413
}
@@ -513,7 +521,7 @@ Single `aws` partition with 3 node groups:
513521

514522
### Example 3
515523

516-
Two partitions `aws` and `awsspot` with one node group in each. You could use Slurm access permissions to allow "standard" users to use only Spot instances, and "VIP" users to use Spot and On-demand instances.
524+
Two partitions `aws` and `awsspot` with one node group in each. It uses Slurm access permissions to allow users in the "standard" account to use only Spot instances, and "VIP" account users to use Spot and On-demand instances, but weights the on-demand instances more heavily for accounting purposes.
517525

518526
```
519527
{
@@ -523,9 +531,9 @@ Two partitions `aws` and `awsspot` with one node group in each. You could use Sl
523531
"NodeGroups": [
524532
{
525533
"NodeGroupName": "node",
526-
"MaxNodes: 100,
534+
"MaxNodes": 100,
527535
"Region": "us-east-1",
528-
"SlurmSpecifications: {
536+
"SlurmSpecifications": {
529537
"CPUs": "4",
530538
"Weight": "1"
531539
},
@@ -550,16 +558,20 @@ Two partitions `aws` and `awsspot` with one node group in each. You could use Sl
550558
"subnet-22222222"
551559
]
552560
}
561+
],
562+
"PartitionOptions": {
563+
"TRESBillingWeights": "cpu=30",
564+
"AllowAccounts": "standard,VIP"
553565
}
554566
},
555567
{
556568
"PartitionName": "awsspot",
557569
"NodeGroups": [
558570
{
559571
"NodeGroupName": "node",
560-
"MaxNodes: 100,
572+
"MaxNodes": 100,
561573
"Region": "us-east-1",
562-
"SlurmSpecifications: {
574+
"SlurmSpecifications": {
563575
"CPUs": "4",
564576
"Weight": "1"
565577
},
@@ -584,6 +596,10 @@ Two partitions `aws` and `awsspot` with one node group in each. You could use Sl
584596
"subnet-22222222"
585597
]
586598
}
599+
],
600+
"PartitionOptions": {
601+
"TRESBillingWeights": "cpu=10",
602+
"AllowAccounts": "standard"
587603
}
588604
}
589605
]

common.py

+2
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ def validate_partitions(data):
8585
assert re.match('^[a-zA-Z0-9]+$', partition['PartitionName']), 'root["Partitions"][%s]["PartitionName"] does not match ^[a-zA-Z0-9]+$' %i_partition
8686

8787
assert 'NodeGroups' in partition, 'Missing "NodeGroups" in root["Partitions"][%s]' %i_partition
88+
if 'PartitionOptions' in partition:
89+
assert isinstance(partition['PartitionOptions'], dict), 'root["Paritions"][%s]["PartitionOptions"] is not a dict' %(i_partition)
8890
assert isinstance(partition['NodeGroups'], list), 'root["Partitions"][%s]["NodeGroups"] is not an array' %i_partition
8991

9092
for i_nodegroup, nodegroup in enumerate(partition['NodeGroups']):

generate_conf.py

100644100755
+7-2
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,14 @@
2828
# Write a line for each node group
2929
line = 'NodeName=%s State=CLOUD %s' %(nodes, ' '.join(nodegroup_specs))
3030
f.write('%s\n' %line)
31-
31+
32+
part_options = ()
33+
if 'PartitionOptions' in partition:
34+
for key, value in partition['PartitionOptions'].items():
35+
part_options += '%s=%s' %(key, value),
36+
3237
# Write a line for each partition
33-
line = 'PartitionName=%s Nodes=%s Default=No MaxTime=INFINITE State=UP' %(partition['PartitionName'], ','.join(partition_nodes))
38+
line = 'PartitionName=%s Nodes=%s Default=No MaxTime=INFINITE State=UP %s' %(partition['PartitionName'], ','.join(partition_nodes), ' '.join(part_options))
3439
f.write('%s\n\n' %line)
3540

3641
logger.info('Output file: %s' %filename)

0 commit comments

Comments
 (0)