Skip to content

Commit d7f97e8

Browse files
committed
prepare for lua support in pcluster
1 parent 3221f94 commit d7f97e8

6 files changed

+119
-12
lines changed

Diff for: modules/03.configure.slurm.acct.headnode.sh

+105-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ set -e
2121

2222
configureFederatedSlurmDBD(){
2323
# slurm accounting must be preinstalled in the VPC.
24-
# slurm accouting secrets must be defined
24+
# slurm accounting secrets must be defined
2525
aws s3 cp --quiet "${post_install_base}/sacct/slurm/slurm_fed_sacct.conf" /tmp/ --region "${cfn_region}" || exit 1
2626
aws s3 cp --quiet "${post_install_base}/sacct/slurm/munge.key.gpg" /tmp/ --region "${cfn_region}" || exit 1
2727
export SLURM_FED_DBD_HOST="$(aws secretsmanager get-secret-value --secret-id "SLURM_FED_DBD_HOST" --query SecretString --output text --region "${cfn_region}")"
@@ -37,9 +37,111 @@ configureFederatedSlurmDBD(){
3737

3838
patchSlurmConfig() {
3939
sed -i "s/ClusterName=parallelcluster.*/ClusterName=parallelcluster-${stack_name}/" "/opt/slurm/etc/slurm.conf"
40-
sed -i "s/SlurmctldPort=6820-6829/SlurmctldPort=6820-6849/" "/opt/slurm/etc/slurm.conf"
40+
#sed -i "s/SlurmctldPort=6820-6829/SlurmctldPort=6820-6849/" "/opt/slurm/etc/slurm.conf"
4141
rm -f /var/spool/slurm.state/clustername
42-
ifconfig eth0 txqueuelen 512
42+
#ifconfig eth0 txqueuelen 512
43+
}
44+
45+
installLuaSubmit() {
46+
yum install -y lua-devel luarocks redis
47+
luarocks install redis-lua
48+
luarocks install lua-cjson
49+
export token="$(aws secretsmanager get-secret-value --secret-id "ADtokenPSU" --query SecretString --output text --region "${cfn_region}")"
50+
cat > /opt/slurm/etc/job_submit.lua << EOF
51+
local redis = require 'redis'
52+
local client = redis.connect('127.0.0.1', 6379)
53+
local socket = require("socket")
54+
local http = require("socket.http")
55+
local ltn12 = require("ltn12")
56+
local json = require('cjson')
57+
58+
function apiCall(user,project,ngpu)
59+
local path = "http://internal-Int-AD-API-2115331254.us-east-1.elb.amazonaws.com/auth"
60+
local payload = '{"user": "'..user..'", "parameters": {"project": "'..project..'"}, "numGpus": '..ngpu..'}'
61+
local response_body = { }
62+
local tab = { }
63+
local res, code, response_headers, status = http.request
64+
{
65+
url = path,
66+
method = "POST",
67+
headers =
68+
{
69+
["Authorization"] = "$token",
70+
["Content-Type"] = "application/json",
71+
["Content-Length"] = payload:len()
72+
},
73+
source = ltn12.source.string(payload),
74+
sink = ltn12.sink.table(response_body),
75+
create=function()
76+
local req_sock = socket.tcp()
77+
req_sock:settimeout(3, 'b')
78+
req_sock:settimeout(7, 't')
79+
return req_sock
80+
end
81+
}
82+
if (res ~= nil)
83+
then
84+
tab = json.decode(table.concat(response_body))
85+
--print('[0] Result: ' .. tab.result .. ' Message: ' .. tab.message,-1)
86+
else
87+
code=400
88+
end
89+
if (code==200)
90+
then
91+
client:set(user..':'..project..':authorization', tab.result)
92+
client:set(user..':'..project..':message', tab.message)
93+
else
94+
--print("[warning] Authorization endpoint failure. Attempting to use local cache.")
95+
tab.result = client:get(user..':'..project..':authorization')
96+
tab.message = client:get(user..':'..project..':message')
97+
end
98+
if (tab.result == nil)
99+
then
100+
tab.result = "rejected"
101+
tab.message = "[error] General error encountered in the authorization system. Please try again later."
102+
end
103+
return tab
104+
end
105+
function slurm_job_submit(job_desc, part_list, submit_uid)
106+
if job_desc.account == nil then
107+
if job_desc.comment == nil then
108+
slurm.log_user("You need to specify a project. Use '--account projectname'. Please be aware that '--comment projectname' will be deprecated.")
109+
slurm.log_user("You can find your allocated projects by running 'id --name --groups'")
110+
return slurm.ESLURM_INVALID_ACCOUNT
111+
end
112+
job_desc.account = job_desc.comment
113+
end
114+
local tab = apiCall(job_desc.user_name, job_desc.account,0)
115+
if tab.result=="rejected" then
116+
slurm.log_user(tab.message)
117+
return slurm.ESLURM_INVALID_ACCOUNT
118+
end
119+
return slurm.SUCCESS
120+
end
121+
function slurm_job_modify(job_desc, job_rec, part_list, modify_uid)
122+
if job_desc.account == nil then
123+
if job_desc.comment == nil then
124+
slurm.log_user("[warning] You need to specify a project. Use '--account projectname'. Please be aware that '--comment projectname' will be deprecated.")
125+
slurm.log_user("[warning] You can find your allocated projects by running 'id --name --groups'")
126+
return slurm.ESLURM_INVALID_ACCOUNT
127+
end
128+
job_desc.account = job_desc.comment
129+
end
130+
local tab = apiCall(job_desc.user_name, job_desc.account,0)
131+
if tab.result=="rejected" then
132+
slurm.log_user(tab.message)
133+
return slurm.ESLURM_INVALID_ACCOUNT
134+
end
135+
return slurm.SUCCESS
136+
end
137+
return slurm.SUCCESS
138+
EOF
139+
140+
echo 'JobSubmitPlugins=lua' >> /opt/slurm/etc/slurm.conf
141+
142+
cat > /etc/sudoers.d/100-AD-admins << EOF
143+
144+
EOF
43145
}
44146

45147
restartSlurmDaemons() {

Diff for: modules/33.install.cost.control.sh

+5-7
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,12 @@ if [ "$saved" != "$current" ]; then
6666
fi
6767
6868
if [ ${update} -eq 1 ]; then
69+
# Instance ID
70+
MyInstID=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -v http://169.254.169.254/latest/meta-data/instance-id)
6971
70-
# Instance ID
71-
MyInstID=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -v -s http://169.254.169.254/latest/meta-data/instance-id)
72-
tag_project=$(cat /tmp/jobs/tag_project)
73-
aws ec2 create-tags --resources ${MyInstID} --tags Key=aws-parallelcluster-username,Value="${active_users}"
74-
aws ec2 create-tags --resources ${MyInstID} --tags Key=aws-parallelcluster-jobid,Value="${active_jobs}"
75-
aws ec2 create-tags --resources ${MyInstID} --tags Key=aws-parallelcluster-project,Value="${active_projects}"
76-
72+
aws ec2 create-tags --resources ${MyInstID} --tags Key=aws-parallelcluster-username,Value="${active_users}"
73+
aws ec2 create-tags --resources ${MyInstID} --tags Key=aws-parallelcluster-jobid,Value="${active_jobs}"
74+
aws ec2 create-tags --resources ${MyInstID} --tags Key=aws-parallelcluster-project,Value="${active_projects}"
7775
fi
7876
EOF
7977

Diff for: parallelcluster/config.us-east-1.sample.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ Scheduling:
112112
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
113113
#- Policy: arn:aws:iam::aws:policy/AWSSupportAccess
114114
- Policy: arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
115-
#- Policy: arn:aws:iam::${AWS_ACCOUNT}:policy/pclusterTagsAndBudget #make sure this policy is created https://pcluster.cloud/02-tutorials/04-cost-tracking.html
115+
- Policy: arn:aws:iam::${AWS_ACCOUNT}:policy/pclusterTagsAndBudget #make sure this policy is created https://pcluster.cloud/02-tutorials/04-cost-tracking.html
116116
S3Access:
117117
- BucketName: '*'
118118
EnableWriteAccess: true

Diff for: parallelcluster/config.us-east-2.sample.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ Scheduling:
9494
- Policy: arn:aws:iam::aws:policy/AmazonSSMFullAccess
9595
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
9696
- Policy: arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
97+
- Policy: arn:aws:iam::${AWS_ACCOUNT}:policy/pclusterTagsAndBudget #make sure this policy is created https://pcluster.cloud/02-tutorials/04-cost-tracking.html
9798
S3Access:
9899
- BucketName: '*'
99100
EnableWriteAccess: true

Diff for: parallelcluster/config.us-west-2.sample.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ Scheduling:
110110
- Policy: arn:aws:iam::aws:policy/AWSPriceListServiceFullAccess
111111
- Policy: arn:aws:iam::aws:policy/AmazonSSMFullAccess
112112
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
113+
- Policy: arn:aws:iam::${AWS_ACCOUNT}:policy/pclusterTagsAndBudget #make sure this policy is created https://pcluster.cloud/02-tutorials/04-cost-tracking.html
113114
S3Access:
114115
- BucketName: '*'
115116
EnableWriteAccess: true
@@ -163,6 +164,7 @@ Scheduling:
163164
- Policy: arn:aws:iam::aws:policy/AWSPriceListServiceFullAccess
164165
- Policy: arn:aws:iam::aws:policy/AmazonSSMFullAccess
165166
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
167+
- Policy: arn:aws:iam::${AWS_ACCOUNT}:policy/pclusterTagsAndBudget #make sure this policy is created https://pcluster.cloud/02-tutorials/04-cost-tracking.html
166168
S3Access:
167169
- BucketName: '*'
168170
EnableWriteAccess: true
@@ -221,6 +223,7 @@ Scheduling:
221223
- Policy: arn:aws:iam::aws:policy/AWSPriceListServiceFullAccess
222224
- Policy: arn:aws:iam::aws:policy/AmazonSSMFullAccess
223225
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
226+
- Policy: arn:aws:iam::${AWS_ACCOUNT}:policy/pclusterTagsAndBudget #make sure this policy is created https://pcluster.cloud/02-tutorials/04-cost-tracking.html
224227
S3Access:
225228
- BucketName: '*'
226229
EnableWriteAccess: true
@@ -274,6 +277,7 @@ Scheduling:
274277
- Policy: arn:aws:iam::aws:policy/AWSPriceListServiceFullAccess
275278
- Policy: arn:aws:iam::aws:policy/AmazonSSMFullAccess
276279
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
280+
- Policy: arn:aws:iam::${AWS_ACCOUNT}:policy/pclusterTagsAndBudget #make sure this policy is created https://pcluster.cloud/02-tutorials/04-cost-tracking.html
277281
S3Access:
278282
- BucketName: '*'
279283
EnableWriteAccess: true
@@ -317,6 +321,7 @@ Scheduling:
317321
- Policy: arn:aws:iam::aws:policy/AWSPriceListServiceFullAccess
318322
- Policy: arn:aws:iam::aws:policy/AmazonSSMFullAccess
319323
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
324+
- Policy: arn:aws:iam::${AWS_ACCOUNT}:policy/pclusterTagsAndBudget #make sure this policy is created https://pcluster.cloud/02-tutorials/04-cost-tracking.html
320325
S3Access:
321326
- BucketName: '*'
322327
EnableWriteAccess: true
@@ -370,6 +375,7 @@ Scheduling:
370375
- Policy: arn:aws:iam::aws:policy/AWSPriceListServiceFullAccess
371376
- Policy: arn:aws:iam::aws:policy/AmazonSSMFullAccess
372377
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
378+
- Policy: arn:aws:iam::${AWS_ACCOUNT}:policy/pclusterTagsAndBudget #make sure this policy is created https://pcluster.cloud/02-tutorials/04-cost-tracking.html
373379
S3Access:
374380
- BucketName: '*'
375381
EnableWriteAccess: true

Diff for: sacct/slurm/slurm_fed_sacct.conf

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ AccountingStorageHost=$SLURM_FED_DBD_HOST
1010
AccountingStorageUser=admin
1111
AccountingStoragePort=6819
1212
AccountingStorageTRES=gres/gpu
13-
#AccountingStorageEnforce=qos,limits
13+
AccountingStorageEnforce=qos,limits

0 commit comments

Comments
 (0)