Skip to content

Commit fdd57c4

Browse files
authored
Merge pull request #1 from bigscience-workshop/lumi_eval
Add LUMI eval compat
2 parents 277e1d3 + 32f039c commit fdd57c4

File tree

5 files changed

+117
-2
lines changed

5 files changed

+117
-2
lines changed

examples/run_evalharness_deepspeed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Get lm-eval harness (https://github.com/EleutherAI/lm-evaluation-harness) and `b
1515
start-prod
1616
pip install best-download==0.0.7
1717
pip install git+https://github.com/EleutherAI/lm-evaluation-harness
18+
pip install --upgrade scipy
1819
```
1920

2021
2. Pre-download needed datasets

examples/run_evalharness_lumi.sh

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/bin/bash
2+
#SBATCH --exclude=nid005159
3+
#SBATCH --nodes=1
4+
#SBATCH --ntasks-per-node=1
5+
#SBATCH --cpus-per-task=32
6+
#SBATCH --mem=256G
7+
#SBATCH -p eap
8+
#SBATCH -t 2-0:00:00
9+
#SBATCH --gpus-per-node=mi250:1
10+
#SBATCH --exclusive=user
11+
#SBATCH --hint=nomultithread
12+
#SBATCH --account=project_462000119
13+
#SBATCH -o logs/%j.out
14+
#SBATCH -e logs/%j.err
15+
16+
# if run without sbatch, invoke here
17+
if [ -z $SLURM_JOB_ID ]; then
18+
mkdir -p logs
19+
sbatch "$0"
20+
exit
21+
fi
22+
23+
set -euo pipefail
24+
25+
# symlink logs/latest_eval.out and logs/latest_eval.err
26+
ln -f -s $SLURM_JOB_ID.out logs/latest_eval.out
27+
ln -f -s $SLURM_JOB_ID.err logs/latest_eval.err
28+
29+
# Data
30+
CHECKPOINT_PATH=/scratch/project_462000119/muennighoff/nov-2022-optimization/checkpoints/global_step10
31+
VARIANT=global_step10
32+
33+
export HF_DATASETS_OFFLINE=1
34+
export HF_DATASETS_CACHE=/scratch/project_462000119/ds_cache
35+
36+
VOCAB_FILE="gpt2/vocab.json"
37+
MERGE_FILE="gpt2/merges.txt"
38+
39+
PP_SIZE=1
40+
TP_SIZE=1
41+
# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS
42+
# make as big as it can fit into gpu w/o OOM, but not too close to 100%
43+
EVAL_MICRO_BATCH_SIZE=1
44+
MICRO_BS_MULTIPLIER=1
45+
46+
# Model parameters
47+
SEQ_LEN=2048
48+
49+
# Dummy arguments
50+
MEGATRON_REQUIRED_ARGS=" \
51+
--num-layers -1 \
52+
--hidden-size -1 \
53+
--num-attention-heads -1 \
54+
--seq-length -1 \
55+
--max-position-embeddings -1 \
56+
"
57+
58+
ZERO_STAGE=0
59+
60+
mkdir -p ds_configs
61+
DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json"
62+
63+
cat <<EOF > $DS_CONFIG_PATH
64+
{
65+
"train_micro_batch_size_per_gpu": 1,
66+
"train_batch_size": 1,
67+
"gradient_clipping": 1.0,
68+
"zero_optimization": {
69+
"stage": $ZERO_STAGE
70+
},
71+
"bf16": {
72+
"enabled": true
73+
},
74+
"steps_per_print": 2000,
75+
"wall_clock_breakdown": false
76+
}
77+
EOF
78+
79+
DEEPSPEED_ARGS=" \
80+
--deepspeed \
81+
--deepspeed_config $DS_CONFIG_PATH \
82+
--zero-stage $ZERO_STAGE \
83+
"
84+
85+
CMD="Megatron-DeepSpeed/tasks/eval_harness/evaluate.py \
86+
--load $CHECKPOINT_PATH \
87+
--results_path $VARIANT-results.json \
88+
--tensor-model-parallel-size $TP_SIZE \
89+
--pipeline-model-parallel-size $PP_SIZE \
90+
--vocab-file $VOCAB_FILE \
91+
--merge-file $MERGE_FILE \
92+
--micro-batch-size $EVAL_MICRO_BATCH_SIZE \
93+
--no-load-optim \
94+
--no-load-rng \
95+
--bf16 \
96+
--inference \
97+
--seq-length $SEQ_LEN \
98+
--task_list copa,piqa,rte,winogrande,hendrycksTest-abstract_algebra,hendrycksTest-anatomy,hendrycksTest-astronomy,hendrycksTest-business_ethics,hendrycksTest-clinical_knowledge,hendrycksTest-college_biology,hendrycksTest-college_chemistry,hendrycksTest-college_computer_science,hendrycksTest-college_mathematics,hendrycksTest-college_medicine,hendrycksTest-college_physics,hendrycksTest-computer_security,hendrycksTest-conceptual_physics,hendrycksTest-econometrics,hendrycksTest-electrical_engineering,hendrycksTest-elementary_mathematics,hendrycksTest-formal_logic,hendrycksTest-global_facts,hendrycksTest-high_school_biology,hendrycksTest-high_school_chemistry,hendrycksTest-high_school_computer_science,hendrycksTest-high_school_european_history,hendrycksTest-high_school_geography,hendrycksTest-high_school_government_and_politics,hendrycksTest-high_school_macroeconomics,hendrycksTest-high_school_mathematics,hendrycksTest-high_school_microeconomics,hendrycksTest-high_school_physics,hendrycksTest-high_school_psychology,hendrycksTest-high_school_statistics,hendrycksTest-high_school_us_history,hendrycksTest-high_school_world_history,hendrycksTest-human_aging,hendrycksTest-human_sexuality,hendrycksTest-international_law,hendrycksTest-jurisprudence,hendrycksTest-logical_fallacies,hendrycksTest-machine_learning,hendrycksTest-management,hendrycksTest-marketing,hendrycksTest-medical_genetics,hendrycksTest-miscellaneous,hendrycksTest-moral_disputes,hendrycksTest-moral_scenarios,hendrycksTest-nutrition,hendrycksTest-philosophy,hendrycksTest-prehistory,hendrycksTest-professional_accounting,hendrycksTest-professional_law,hendrycksTest-professional_medicine,hendrycksTest-professional_psychology,hendrycksTest-public_relations,hendrycksTest-security_studies,hendrycksTest-sociology,hendrycksTest-us_foreign_policy,hendrycksTest-virology,hendrycksTest-world_religions \
99+
--intermed_results \
100+
--adaptive_seq_len \
101+
--micro_bs_multiplier $MICRO_BS_MULTIPLIER \
102+
$MEGATRON_REQUIRED_ARGS \
103+
$DEEPSPEED_ARGS \
104+
"
105+
106+
echo $CMD
107+
108+
echo "START $SLURM_JOBID: $(date)"
109+
110+
srun --label launch.sh $CMD
111+
112+
echo "END $SLURM_JOBID: $(date)"
113+

tasks/eval_harness/download.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Downloads the specified taks in the evaluation harness
22
# This is particularly useful when running in environments where the GPU nodes
33
# do not have internet access. This way we can pre-download them and use the cached data-set during evaluation.
4+
# May want to set a cache before, e.g. export HF_DATASETS_CACHE=/scratch/project_462000119/ds_cache
45

56
from lm_eval import tasks
67
from lm_eval.tasks import ALL_TASKS

tasks/eval_harness/evaluate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def tokenizer_encode(self, text):
260260
from megatron.initialize import initialize_megatron
261261
import megatron
262262

263-
from tools.convert_checkpoint.deepspeed_checkpoint import DeepSpeedCheckpoint
263+
from deepspeed.checkpoint.deepspeed_checkpoint import DeepSpeedCheckpoint
264264
from tools.convert_checkpoint.deepspeed_to_megatron import _create_rank_checkpoint
265265

266266
def override_args(args, override_args, skip_keys, skip_if_specified_keys):

tools/convert_checkpoint/deepspeed_to_megatron.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import os
55
import torch
66
from collections import OrderedDict
7-
from .deepspeed_checkpoint import ARGS_KEY, DeepSpeedCheckpoint
7+
from deepspeed.checkpoint.deepspeed_checkpoint import DeepSpeedCheckpoint
88

99
MODEL_KEY = 'model'
1010
ARGS_KEY = 'args'

0 commit comments

Comments
 (0)