modelscope · baoleai · Jun 25, 2024 · Jun 25, 2024 · Jun 25, 2024
diff --git a/examples/pytorch/llm/scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh
@@ -2,7 +2,8 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1 \

diff --git a/examples/pytorch/llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh
@@ -2,8 +2,8 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
-# MASTER_PORT=12356 \
+export USE_TORCH_XLA=0
+
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1 \
 swift sft \

diff --git a/examples/pytorch/llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh
@@ -2,7 +2,7 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1 \

diff --git a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh
@@ -2,7 +2,6 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 export USE_TORCHACC=1
-export TORCHACC_TRIM_GRAPH=1
 export XLA_IR_SHAPE_CACHE_SIZE=100000000
 export XLA_ALLOCATOR_FRACTION=0.95
 export XLA_EXPERIMENTAL=nonzero:masked_select

diff --git a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh
@@ -2,7 +2,7 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1 \

diff --git a/examples/pytorch/llm/scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh
@@ -2,7 +2,7 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1 \

diff --git a/examples/pytorch/llm/scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh
@@ -2,7 +2,7 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1,2,3 \

diff --git a/examples/pytorch/llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh
@@ -2,10 +2,10 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
-NPROC_PER_NODE=1 \
-CUDA_VISIBLE_DEVICES=7 \
+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
 swift sft \
   --model_id_or_path qwen/Qwen-72B-Chat \
   --dataset codefuse-python-en \

diff --git a/examples/pytorch/llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh
@@ -2,7 +2,7 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1,2,3 \