Skip to content

Commit 06f8e93

Browse files
committed
add manual provisioning scripts
1 parent 4bb9378 commit 06f8e93

File tree

3 files changed

+125
-0
lines changed

3 files changed

+125
-0
lines changed

Diff for: scripts/manual-provisioning/cluster_env

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/bin/bash
2+
3+
export NLB_PUBLIC_DNS_NAME="ldaps-xxxxxxxxxxxxxxx.elb.us-west-2.amazonaws.com"
4+
export SECRET_ARN='arn:aws:secretsmanager:us-west-2:123456789012:secret:ADpasswordAdmin-xxxxxx'
5+
export AWS_PROFILE="your-aws-profile"
6+
export S3_BUCKET="${CLUSTER_NAME}-${RANDOM}"
7+
export AWS_DEFAULT_REGION="us-west-2"
8+
export AWS_REGION_NAME="us-west-2"
9+
export AWS_ACCOUNT=123456789012
10+
export MAP_MIGRATED="mmtoken"
11+
export CUSTOMAD="true"
12+
export AD_ID="d-xxxxxxxxxx"
13+
export PUBLIC_SUBNET_ID="subnet-xxxxxxxxxxxxxx"
14+
export PRIVATE_SUBNET_ID="subnet-yyyyyyyyyyyyyy"
15+
export ODCRGROUP="AUTO"
16+
export POST_INSTALL="stability-hpc/scripts/post.install.sh"
17+
export KEY_PAIR="${CLUSTER_NAME}-${RANDOM}"

Diff for: scripts/manual-provisioning/clustername.conf

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
export CLUSTER_NAME="clustername" # Name of the cluster, must be unique
4+
export USE_PUBLIC_IPS='true' # true for private networks (this is wrong, but it works)
5+
export FSX_ID="fs-xxxxxxxxxxxxx" # FSX ID or AUTO
6+
export ADMINFSX_ID="fs-xxxxxxxxxxxxxx" # FSX ID or AUTO
7+
export TEMPLATE="config.us-west-2.cpu.yaml" # Path to the cluster config template

Diff for: scripts/manual-provisioning/installcluster

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#!/bin/bash
2+
3+
set -x
4+
5+
CLUSTER_NAME=$1
6+
rm -rf stability-hpc
7+
git clone "https://github.com/stability-ai/stability-hpc"
8+
9+
python3 -m pip install "aws-parallelcluster" --user --upgrade --quiet
10+
11+
#source cluster profile
12+
. cluster_env
13+
14+
#source cluster parameters
15+
. ${CLUSTER_NAME}.conf
16+
17+
#create the S3 bucket for the cluster
18+
aws s3api create-bucket --bucket "${S3_BUCKET}" --region "${AWS_REGION_NAME}" --create-bucket-configuration LocationConstraint="${AWS_REGION_NAME}" > /dev/null 2>&1
19+
aws s3 cp --quiet --recursive stability-hpc "s3://${S3_BUCKET}/1click-hpc" --region ${AWS_REGION_NAME}
20+
21+
#needed to config the domain
22+
ADName=$(aws ds describe-directories --directory-id "${AD_ID}" --query 'DirectoryDescriptions[*].Name' --output text)
23+
24+
export DC0=$(echo "${ADName}" | awk -F'.' '{print $1}')
25+
export DC1=$(echo "${ADName}" | awk -F'.' '{print $2}')
26+
export DC2=$(echo "${ADName}" | awk -F'.' '{print $3}')
27+
export OU="AD-Manage"
28+
29+
#note that pcluster will refuse to create two lustre file systems if both values are AUTO
30+
31+
if [[ $FSX_ID == "AUTO" ]];then
32+
FSX=$(cat <<EOF
33+
- MountDir: /fsx
34+
Name: newfsx
35+
StorageType: FsxLustre
36+
FsxLustreSettings:
37+
StorageCapacity: 1200
38+
DeploymentType: SCRATCH_2
39+
DataCompressionType: LZ4
40+
EOF
41+
)
42+
else
43+
FSX=$(cat <<EOF
44+
- MountDir: /fsx
45+
Name: existingfsx
46+
StorageType: FsxLustre
47+
FsxLustreSettings:
48+
FileSystemId: ${FSX_ID}
49+
EOF
50+
)
51+
fi
52+
53+
if [[ $ADMINFSX_ID == "AUTO" ]];then
54+
ADMINFSX=$(cat <<EOF
55+
- MountDir: /admin
56+
Name: newadmin
57+
StorageType: FsxLustre
58+
FsxLustreSettings:
59+
StorageCapacity: 1200
60+
DeploymentType: SCRATCH_2
61+
DataCompressionType: LZ4
62+
EOF
63+
)
64+
else
65+
ADMINFSX=$(cat <<EOF
66+
- MountDir: /admin
67+
Name: existingadmin
68+
StorageType: FsxLustre
69+
FsxLustreSettings:
70+
FileSystemId: ${ADMINFSX_ID}
71+
EOF
72+
)
73+
fi
74+
75+
export FSX=${FSX}
76+
export ADMINFSX=${ADMINFSX}
77+
78+
if [[ $PRIVATE_SUBNET_ID == "NONE" ]];then
79+
export SUBNET_ID="${PUBLIC_SUBNET_ID}"
80+
export USE_PUBLIC_IPS='true'
81+
export HN_SUBNET_ID="${PUBLIC_SUBNET_ID}"
82+
else
83+
export SUBNET_ID="${PRIVATE_SUBNET_ID}"
84+
export USE_PUBLIC_IPS='false'
85+
export HN_SUBNET_ID="${PUBLIC_SUBNET_ID}"
86+
fi
87+
88+
/usr/bin/envsubst < "stability-hpc/parallelcluster/${TEMPLATE}" > config.${CLUSTER_NAME}.yaml
89+
90+
aws ec2 create-key-pair --key-name ${KEY_PAIR} --query KeyMaterial --output text > "/home/ec2-user/.ssh/id_rsa_${CLUSTER_NAME}"
91+
if [ $? -ne 0 ]; then
92+
aws ec2 delete-key-pair --key-name ${KEY_PAIR}
93+
aws ec2 create-key-pair --key-name ${KEY_PAIR} --query KeyMaterial --output text > "/home/ec2-user/.ssh/id_rsa_${CLUSTER_NAME}"
94+
fi
95+
sudo chmod 600 "/home/ec2-user/.ssh/id_rsa_${CLUSTER_NAME}"
96+
97+
#Create the cluster and wait
98+
/home/ec2-user/.local/bin/pcluster create-cluster --cluster-name "stability-${CLUSTER_NAME}" --cluster-configuration config.${CLUSTER_NAME}.yaml --rollback-on-failure false --wait >> bootstrap.log
99+
100+
aws s3 cp --quiet bootstrap.log "s3://${S3_BUCKET}/install.log" --region "${AWS_REGION_NAME}"
101+
aws s3 cp --quiet config.${AWS_REGION_NAME}.yaml "s3://${S3_BUCKET}/config.${AWS_REGION_NAME}.yaml" --region "${AWS_REGION_NAME}"

0 commit comments

Comments
 (0)