|
| 1 | +#! /bin/bash |
| 2 | + |
| 3 | +#SBATCH --job-name=EVALUATE_Llama-3.2-1B-Instruct |
| 4 | +#SBATCH --account=brb@h100 |
| 5 | +#SBATCH --output=evaluation.log |
| 6 | +#SBATCH --error=evaluation.log |
| 7 | +#SBATCH --nodes=2 |
| 8 | +#SBATCH --ntasks-per-node=1 |
| 9 | +#SBATCH --cpus-per-task=96 |
| 10 | +#SBATCH --gres=gpu:4 |
| 11 | +#SBATCH --hint=nomultithread |
| 12 | +#SBATCH --constraint=h100 |
| 13 | +#SBATCH --time=02:00:00 |
| 14 | +#SBATCH --exclusive |
| 15 | +#SBATCH --parsable |
| 16 | + |
| 17 | +set -e |
| 18 | +set -x |
| 19 | + |
| 20 | +module purge |
| 21 | +module load miniforge/24.9.0 |
| 22 | +conda activate $WORKDIR/lighteval-h100 |
| 23 | + |
| 24 | +function find_available_port { |
| 25 | + printf $(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1]); s.close()') |
| 26 | +} |
| 27 | + |
| 28 | +PORT=$(find_available_port) |
| 29 | +NODELIST=($(scontrol show hostnames $SLURM_JOB_NODELIST)) |
| 30 | +MASTER=${NODELIST[0]} |
| 31 | +MASTER_IP=$(hostname --ip-address) |
| 32 | + |
| 33 | +export HF_HOME=$WORKDIR/HF_HOME |
| 34 | +export MODEL_DIRECTORY=$WORKDIR/HuggingFace_Models/meta-llama/Llama-3.2-1B-Instruct |
| 35 | +export TASK_FILE=$WORKDIR/community_tasks/french_eval.py |
| 36 | +export HF_HUB_OFFLINE=1 |
| 37 | +export VLLM_WORKER_MULTIPROC_METHOD=spawn |
| 38 | + |
| 39 | +function set_VLLM_HOST_IP { |
| 40 | + export VLLM_HOST_IP=$(hostname --ip-address); |
| 41 | +} |
| 42 | +export -f set_VLLM_HOST_IP; |
| 43 | + |
| 44 | +srun -N1 -n1 -c $(( SLURM_CPUS_PER_TASK/2 )) -w $MASTER bash -c "set_VLLM_HOST_IP; ray start --head --port=$PORT --block" & |
| 45 | +sleep 5 |
| 46 | + |
| 47 | +if [[ $SLURM_NNODES -gt 1 ]]; then |
| 48 | + srun -N $(( SLURM_NNODES-1 )) --ntasks-per-node=1 -c $(( SLURM_CPUS_PER_TASK/2 )) -x $MASTER bash -c "set_VLLM_HOST_IP; ray start --address=$MASTER_IP:$PORT --block" & |
| 49 | + sleep 5 |
| 50 | +fi |
| 51 | + |
| 52 | +set_VLLM_HOST_IP |
| 53 | + |
| 54 | +MODEL_NAME="Llama-3.2-1B-Instruct" |
| 55 | +SERVER_PORT=$(find_available_port) |
| 56 | +export OPENAI_API_KEY="I-love-vllm-serve" |
| 57 | +export OPENAI_BASE_URL="http://localhost:$SERVER_PORT/v1" |
| 58 | + |
| 59 | +vllm serve $MODEL_DIRECTORY \ |
| 60 | + --served-model-name $MODEL_NAME \ |
| 61 | + --api-key $OPENAI_API_KEY \ |
| 62 | + --enforce-eager \ |
| 63 | + --port $SERVER_PORT \ |
| 64 | + --tensor-parallel-size 8 \ |
| 65 | + --dtype bfloat16 \ |
| 66 | + --max-model-len 16384 \ |
| 67 | + --gpu-memory-utilization 0.8 \ |
| 68 | + --disable-custom-all-reduce \ |
| 69 | + 1>vllm.stdout 2>vllm.stderr & |
| 70 | + |
| 71 | + |
| 72 | +ATTEMPT=0 |
| 73 | +DELAY=5 |
| 74 | +MAX_ATTEMPTS=60 |
| 75 | +until curl -s -o /dev/null -w "%{http_code}" $OPENAI_BASE_URL/models -H "Authorization: Bearer $OPENAI_API_KEY" | grep -E "^2[0-9]{2}$"; do |
| 76 | + ATTEMPT=$((ATTEMPT + 1)) |
| 77 | + echo "$ATTEMPT attempts" |
| 78 | + if [ "$ATTEMPT" -ge "$MAX_ATTEMPTS" ]; then |
| 79 | + echo "Failed: the server did not respond any of the $MAX_ATTEMPTS requests." |
| 80 | + exit 1 |
| 81 | + fi |
| 82 | + echo "vllm serve is not ready yet" |
| 83 | + sleep $DELAY |
| 84 | +done |
| 85 | + |
| 86 | +export TOKENIZER_PATH="$MODEL_DIRECTORY" |
| 87 | + |
| 88 | +TASK_ARGS="community|gpqa-fr|0|0,community|ifeval-fr|0|0" |
| 89 | + |
| 90 | +lighteval endpoint openai "$MODEL_NAME" "$TASK_ARGS" --custom-tasks $TASK_FILE |
0 commit comments