Skip to content

Commit 7dd49ae

Browse files
committed
test: update dashboard test input 1 to have updated pod-stats data (with ray head node)
1 parent a8680ae commit 7dd49ae

File tree

8 files changed

+1476
-1413
lines changed

8 files changed

+1476
-1413
lines changed

Diff for: tests/plugin-codeflare/dashboard/inputs/1/choices.json

+6-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"madwizard/apriori/arch": "x64",
44
"madwizard/apriori/platform": "darwin",
55
"madwizard/apriori/mac-installer": "Homebrew",
6-
"madwizard/apriori/in-terminal": "HTML",
6+
"madwizard/apriori/in-terminal": "Text",
77
"Training####Fine Tuning": "Fine Tuning",
88
"GLUE": "GLUE",
99
"AWS####IBM": "AWS",
@@ -17,5 +17,9 @@
1717
"Choose your Model File.expand([ -n \"$MC_CONFIG_DIR\" ] && [ -n \"$S3_FILEPATH\" ] && [ -n \"$S3_FILEPATH${S3_BUCKET_SUFFIX}\" ] && mc -q --config-dir ${MC_CONFIG_DIR} ls \"s3/$S3_FILEPATH${S3_BUCKET_SUFFIX}\" | awk '{print $NF}', S3 Objects)": "roberta-base",
1818
"Choose your Glue Data File.expand([ -n \"$MC_CONFIG_DIR\" ] && [ -n \"$S3_FILEPATH\" ] && [ -n \"$S3_FILEPATH${S3_BUCKET_SUFFIX}\" ] && mc -q --config-dir ${MC_CONFIG_DIR} ls \"s3/$S3_FILEPATH${S3_BUCKET_SUFFIX}\" | awk '{print $NF}', S3 Objects)": "glue_data",
1919
"BERT": "BERT",
20-
"Example: Using Ray Tasks to Parallelize a Function####Example: Using Ray Actors to Parallelize a Class####Example: Creating and Transforming Datasets####Example: Training Using PyTorch####Example: Hyperparameter Tuning####Example: Serving a scikit-learn gradient boosting classifier": "Example: Using Ray Tasks to Parallelize a Function"
20+
"Example: Using Ray Tasks to Parallelize a Function####Example: Using Ray Actors to Parallelize a Class####Example: Creating and Transforming Datasets####Example: Training Using PyTorch####Example: Hyperparameter Tuning####Example: Serving a scikit-learn gradient boosting classifier": "Example: Using Ray Tasks to Parallelize a Function",
21+
"Number of CPUs####Number of GPUs": "{\"Number of CPUs\":4,\"Number of GPUs\":3}",
22+
"expand(echo ${A-error} ; echo ${B-4} ; echo ${C-5})": "3",
23+
"XXXXXX.11111####222222": "11111",
24+
"YYYYYY.11111####222222": "222222"
2125
}
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
LAST SEEN TYPE REASON OBJECT MESSAGE
2-
0s Normal Scheduled pod/mycluster-ray-head-type-k64tk Successfully assigned nvidia-gpu-operator/mycluster-ray-head-type-k64tk to ip-10-0-129-173.ec2.internal
3-
0s Normal AddedInterface pod/mycluster-ray-head-type-k64tk Add eth0 [10.131.34.18/23] from openshift-sdn
4-
0s Normal Pulling pod/mycluster-ray-head-type-k64tk Pulling image "rayproject/ray-ml:1.12.2-py37-gpu"
5-
0s Normal Pulled pod/mycluster-ray-head-type-k64tk Successfully pulled image "rayproject/ray-ml:1.12.2-py37-gpu" in 5m15.335982782s
6-
0s Normal Created pod/mycluster-ray-head-type-k64tk Created container ray-node
7-
0s Normal Started pod/mycluster-ray-head-type-k64tk Started container ray-node
8-
0s Normal Scheduled pod/mycluster-ray-worker-type-lbd4g Successfully assigned nvidia-gpu-operator/mycluster-ray-worker-type-lbd4g to ip-10-0-135-132.ec2.internal
9-
0s Normal AddedInterface pod/mycluster-ray-worker-type-lbd4g Add eth0 [10.128.36.32/23] from openshift-sdn
10-
0s Normal Pulling pod/mycluster-ray-worker-type-lbd4g Pulling image "rayproject/ray-ml:1.12.2-py37-gpu"
11-
0s Normal Pulled pod/mycluster-ray-worker-type-lbd4g Successfully pulled image "rayproject/ray-ml:1.12.2-py37-gpu" in 6m10.715997692s
12-
0s Normal Created pod/mycluster-ray-worker-type-lbd4g Created container ray-node
13-
0s Normal Started pod/mycluster-ray-worker-type-lbd4g Started container ray-node
2+
0s Normal Scheduled pod/mycluster-ray-head-type-krlr4 Successfully assigned nvidia-gpu-operator/mycluster-ray-head-type-krlr4 to ip-10-0-128-169.ec2.internal
3+
0s Normal AddedInterface pod/mycluster-ray-head-type-krlr4 Add eth0 [10.128.44.144/23] from openshift-sdn
4+
0s Normal Pulling pod/mycluster-ray-head-type-krlr4 Pulling image "rayproject/ray-ml:1.13.0-py37-gpu"
5+
0s Normal Pulled pod/mycluster-ray-head-type-krlr4 Successfully pulled image "rayproject/ray-ml:1.13.0-py37-gpu" in 6m48.700535275s
6+
0s Normal Created pod/mycluster-ray-head-type-krlr4 Created container ray-node
7+
0s Normal Started pod/mycluster-ray-head-type-krlr4 Started container ray-node
8+
0s Normal Scheduled pod/mycluster-ray-worker-type-6r7hp Successfully assigned nvidia-gpu-operator/mycluster-ray-worker-type-6r7hp to ip-10-0-133-106.ec2.internal
9+
0s Normal AddedInterface pod/mycluster-ray-worker-type-6r7hp Add eth0 [10.131.42.42/23] from openshift-sdn
10+
0s Normal Pulling pod/mycluster-ray-worker-type-6r7hp Pulling image "rayproject/ray-ml:1.13.0-py37-gpu"
11+
0s Normal Pulled pod/mycluster-ray-worker-type-6r7hp Successfully pulled image "rayproject/ray-ml:1.13.0-py37-gpu" in 6m14.380152399s
12+
0s Normal Created pod/mycluster-ray-worker-type-6r7hp Created container ray-node
13+
0s Normal Started pod/mycluster-ray-worker-type-6r7hp Started container ray-node

Diff for: tests/plugin-codeflare/dashboard/inputs/1/job.json

+19-14
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,28 @@
11
{
2-
"jobid": "84d94981-8197-4bfd-a0a3-a93dd67dc2b7",
2+
"jobid": "87ed37bc-a837-4f20-aad0-0a12754452f4",
33
"cmdline": {
4-
"appPart": "python3 tmp-3113-RmwfLb44cTU3 -v -b ${S3_BUCKET} -m ${S3_OBJECTMODEL} -g ${S3_OBJECTGLUEDATA} -t WNLI -M -s 40 41 42 43",
5-
"systemPart": "ray job submit --runtime-env=/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/tmp-3113-8BRkUtu0rNmo --job-id ${JOB_ID} --no-wait"
4+
"appPart": "python3 tmp-39715-vOKRmyNRUiZ6 -v -b ${S3_BUCKET} -m ${S3_OBJECTMODEL} -g ${S3_OBJECTGLUEDATA} -t WNLI -M -s 40 41 42 43",
5+
"systemPart": "ray job submit --runtime-env=/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/tmp-39715-KyWwb7w9Tliq --job-id ${JOB_ID} --no-wait"
66
},
77
"runtimeEnv": {
88
"env_vars": {
9-
"JOB_ID": "84d94981-8197-4bfd-a0a3-a93dd67dc2b7",
9+
"JOB_ID": "87ed37bc-a837-4f20-aad0-0a12754452f4",
10+
"S3_PROVIDER": "aws",
1011
"S3_ENDPOINT": "https://s3.amazonaws.com",
1112
"S3_ACCESS_KEY_ID": "********",
1213
"AWS_ACCESS_KEY_ID": "********",
1314
"S3_SECRET_ACCESS_KEY": "********",
1415
"AWS_SECRET_ACCESS_KEY": "********",
15-
"MC_CONFIG_DIR": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/tmp.VAPQnMBX",
16+
"MC_CONFIG_DIR": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/tmp.NOeQ5szv",
1617
"S3_BUCKETRAYLOGS": "browsey",
1718
"S3_FILEPATHRAYLOGS": "RAYLOGS",
18-
"S3_LOGDIR": "browsey/codeflare/84d94981-8197-4bfd-a0a3-a93dd67dc2b7",
19-
"LOGDIR_STAGE": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/logdir-stage.Vqva5wI0",
20-
"LOGDIR_URI": "s3://browsey/codeflare/84d94981-8197-4bfd-a0a3-a93dd67dc2b7",
21-
"LOGDIR_MC": "s3/browsey/codeflare/84d94981-8197-4bfd-a0a3-a93dd67dc2b7",
22-
"STREAMCONSUMER_LOGS": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/logdir-stage.Vqva5wI0/logs/",
23-
"STREAMCONSUMER_EVENTS": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/logdir-stage.Vqva5wI0/events/",
24-
"STREAMCONSUMER_RESOURCES": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/logdir-stage.Vqva5wI0/resources/",
19+
"S3_LOGDIR": "browsey/codeflare/87ed37bc-a837-4f20-aad0-0a12754452f4",
20+
"LOGDIR_STAGE": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/logdir-stage.n2ngzgZg",
21+
"LOGDIR_URI": "s3://browsey/codeflare/87ed37bc-a837-4f20-aad0-0a12754452f4",
22+
"LOGDIR_MC": "s3/browsey/codeflare/87ed37bc-a837-4f20-aad0-0a12754452f4",
23+
"STREAMCONSUMER_LOGS": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/logdir-stage.n2ngzgZg/logs/",
24+
"STREAMCONSUMER_EVENTS": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/logdir-stage.n2ngzgZg/events/",
25+
"STREAMCONSUMER_RESOURCES": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/logdir-stage.n2ngzgZg/resources/",
2526
"KUBE_CONTEXT": "default/api-codeflare-train-v11-codeflare-openshift-com:6443/kube:admin",
2627
"KUBE_NS": "nvidia-gpu-operator",
2728
"NUM_CPUS": "1",
@@ -30,10 +31,14 @@
3031
"MAX_WORKERS": "1",
3132
"WORKER_MEMORY": "32Gi",
3233
"HEAD_MEMORY": "32Gi",
34+
"RAY_OPERATOR_IMAGE": "rayproject/ray:1.13.0-py37",
35+
"RAY_IMAGE": "rayproject/ray-ml:1.13.0-py37-gpu",
36+
"HELM_CLONE_TEMPDIR": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/tmp.VerQFNF6",
3337
"RAY_KUBE_CLUSTER_NAME": "mycluster",
38+
"KUBE_POD_LABEL_SELECTOR": "ray-user-node-type=rayWorkerType",
39+
"KUBE_PODFULL_LABEL_SELECTOR": "ray-node-type",
3440
"RAY_KUBE_PORT": "8266",
3541
"RAY_ADDRESS": "http://127.0.0.1:8266",
36-
"KUBE_POD_LABEL_SELECTOR": "ray-user-node-type=rayWorkerType",
3742
"S3_BUCKET": "browsey",
3843
"S3_FILEPATH": "browsey",
3944
"S3_OBJECTMODEL": "roberta-base",
@@ -43,7 +48,7 @@
4348
"WANDB_CONFIG_DIR": "/tmp",
4449
"WANDB_DISABLED": "true"
4550
},
46-
"working_dir": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/tmp-3113-ma7eFXGAJZjS",
51+
"working_dir": "/var/folders/2k/7mgd1tq55gdbghf0xkl2t_l80000gp/T/tmp-39715-QAaimTMZf5wC",
4752
"pip": ["boto3", "ray[default]", "ray_lightning", "pytorch_lightning", "torchvision", "transformers==3.0.2"]
4853
},
4954
"language": "python",

Diff for: tests/plugin-codeflare/dashboard/inputs/1/jobid.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
84d94981-8197-4bfd-a0a3-a93dd67dc2b7
1+
87ed37bc-a837-4f20-aad0-0a12754452f4

0 commit comments

Comments
 (0)