From f88741dc0eeb8fb6e1bb41818fcf70167ad46a9b Mon Sep 17 00:00:00 2001 From: "wangang.wa" Date: Tue, 25 Jun 2024 15:52:01 +0800 Subject: [PATCH 1/2] add USE_TORCH_XLA=0 flag --- .../scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh | 3 ++- .../llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh | 4 ++-- .../llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh | 2 +- .../torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh | 2 +- .../scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh | 2 +- .../scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh | 2 +- .../scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh | 2 +- .../llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh | 6 +++--- .../llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh | 2 +- 9 files changed, 13 insertions(+), 12 deletions(-) diff --git a/examples/pytorch/llm/scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh index c72771dbbf..72d54086e0 100644 --- a/examples/pytorch/llm/scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh +++ b/examples/pytorch/llm/scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh @@ -2,7 +2,8 @@ # 80GB GPU memory # Note: TorchAcc is currently only available internally. -# MASTER_ADDR=127.0.0.1 \ + +export USE_TORCH_XLA=0 NPROC_PER_NODE=2 \ CUDA_VISIBLE_DEVICES=0,1 \ diff --git a/examples/pytorch/llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh index 0fd4e5d4b6..427ca158ba 100644 --- a/examples/pytorch/llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh +++ b/examples/pytorch/llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh @@ -2,8 +2,8 @@ # 80GB GPU memory # Note: TorchAcc is currently only available internally. -# MASTER_ADDR=127.0.0.1 \ -# MASTER_PORT=12356 \ +export USE_TORCH_XLA=0 + NPROC_PER_NODE=2 \ CUDA_VISIBLE_DEVICES=0,1 \ swift sft \ diff --git a/examples/pytorch/llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh index 3fc24e19e1..ad0789a9cb 100644 --- a/examples/pytorch/llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh +++ b/examples/pytorch/llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh @@ -2,7 +2,7 @@ # 80GB GPU memory # Note: TorchAcc is currently only available internally. -# MASTER_ADDR=127.0.0.1 \ +export USE_TORCH_XLA=0 NPROC_PER_NODE=2 \ CUDA_VISIBLE_DEVICES=0,1 \ diff --git a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh index d1c983e1b1..274182055e 100644 --- a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh +++ b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh @@ -2,7 +2,7 @@ # 80GB GPU memory # Note: TorchAcc is currently only available internally. export USE_TORCHACC=1 -export TORCHACC_TRIM_GRAPH=1 +#export TORCHACC_TRIM_GRAPH=1 export XLA_IR_SHAPE_CACHE_SIZE=100000000 export XLA_ALLOCATOR_FRACTION=0.95 export XLA_EXPERIMENTAL=nonzero:masked_select diff --git a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh index 3454bdb26e..bae2e2a8bd 100644 --- a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh +++ b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh @@ -2,7 +2,7 @@ # 80GB GPU memory # Note: TorchAcc is currently only available internally. -# MASTER_ADDR=127.0.0.1 \ +export USE_TORCH_XLA=0 NPROC_PER_NODE=2 \ CUDA_VISIBLE_DEVICES=0,1 \ diff --git a/examples/pytorch/llm/scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh index da4d328e04..baef03bbc9 100644 --- a/examples/pytorch/llm/scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh +++ b/examples/pytorch/llm/scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh @@ -2,7 +2,7 @@ # 80GB GPU memory # Note: TorchAcc is currently only available internally. -# MASTER_ADDR=127.0.0.1 \ +export USE_TORCH_XLA=0 NPROC_PER_NODE=2 \ CUDA_VISIBLE_DEVICES=0,1 \ diff --git a/examples/pytorch/llm/scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh index 25a1accc7d..4d4ae91172 100644 --- a/examples/pytorch/llm/scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh +++ b/examples/pytorch/llm/scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh @@ -2,7 +2,7 @@ # 80GB GPU memory # Note: TorchAcc is currently only available internally. -# MASTER_ADDR=127.0.0.1 \ +export USE_TORCH_XLA=0 NPROC_PER_NODE=2 \ CUDA_VISIBLE_DEVICES=0,1,2,3 \ diff --git a/examples/pytorch/llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh index 30347822fc..d13cd75da4 100644 --- a/examples/pytorch/llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh +++ b/examples/pytorch/llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh @@ -2,10 +2,10 @@ # 80GB GPU memory # Note: TorchAcc is currently only available internally. -# MASTER_ADDR=127.0.0.1 \ +export USE_TORCH_XLA=0 -NPROC_PER_NODE=1 \ -CUDA_VISIBLE_DEVICES=7 \ +NPROC_PER_NODE=2 \ +CUDA_VISIBLE_DEVICES=0,1,2,3 \ swift sft \ --model_id_or_path qwen/Qwen-72B-Chat \ --dataset codefuse-python-en \ diff --git a/examples/pytorch/llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh index 623177e41b..d9f9ef8d65 100644 --- a/examples/pytorch/llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh +++ b/examples/pytorch/llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh @@ -2,7 +2,7 @@ # 80GB GPU memory # Note: TorchAcc is currently only available internally. -# MASTER_ADDR=127.0.0.1 \ +export USE_TORCH_XLA=0 NPROC_PER_NODE=2 \ CUDA_VISIBLE_DEVICES=0,1,2,3 \ From 858bd352bb4ff921c9100ab3c31d78313f7e165e Mon Sep 17 00:00:00 2001 From: "wangang.wa" Date: Tue, 25 Jun 2024 15:53:39 +0800 Subject: [PATCH 2/2] remove trim graph --- .../llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh index 274182055e..73bd3735d6 100644 --- a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh +++ b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh @@ -2,7 +2,6 @@ # 80GB GPU memory # Note: TorchAcc is currently only available internally. export USE_TORCHACC=1 -#export TORCHACC_TRIM_GRAPH=1 export XLA_IR_SHAPE_CACHE_SIZE=100000000 export XLA_ALLOCATOR_FRACTION=0.95 export XLA_EXPERIMENTAL=nonzero:masked_select