From f88741dc0eeb8fb6e1bb41818fcf70167ad46a9b Mon Sep 17 00:00:00 2001
From: "wangang.wa" <wangang.wa@alibaba-inc.com>
Date: Tue, 25 Jun 2024 15:52:01 +0800
Subject: [PATCH 1/2] add USE_TORCH_XLA=0 flag

---
 .../scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh   | 3 ++-
 .../llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh      | 4 ++--
 .../llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh  | 2 +-
 .../torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh        | 2 +-
 .../scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh   | 2 +-
 .../scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh   | 2 +-
 .../scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh   | 2 +-
 .../llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh    | 6 +++---
 .../llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh      | 2 +-
 9 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/examples/pytorch/llm/scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh
index c72771dbbf..72d54086e0 100644
--- a/examples/pytorch/llm/scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh
+++ b/examples/pytorch/llm/scripts/torchacc/baichuan2_13b_chat/swift_lora_sft.sh
@@ -2,7 +2,8 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1 \
diff --git a/examples/pytorch/llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh
index 0fd4e5d4b6..427ca158ba 100644
--- a/examples/pytorch/llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh
+++ b/examples/pytorch/llm/scripts/torchacc/chatglm3_6b/swift_lora_sft.sh
@@ -2,8 +2,8 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
-# MASTER_PORT=12356 \
+export USE_TORCH_XLA=0
+
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1 \
 swift sft \
diff --git a/examples/pytorch/llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh
index 3fc24e19e1..ad0789a9cb 100644
--- a/examples/pytorch/llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh
+++ b/examples/pytorch/llm/scripts/torchacc/llama2_13b_chat/swift_lora_sft.sh
@@ -2,7 +2,7 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1 \
diff --git a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh
index d1c983e1b1..274182055e 100644
--- a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh
+++ b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh
@@ -2,7 +2,7 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 export USE_TORCHACC=1
-export TORCHACC_TRIM_GRAPH=1
+#export TORCHACC_TRIM_GRAPH=1
 export XLA_IR_SHAPE_CACHE_SIZE=100000000
 export XLA_ALLOCATOR_FRACTION=0.95
 export XLA_EXPERIMENTAL=nonzero:masked_select
diff --git a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh
index 3454bdb26e..bae2e2a8bd 100644
--- a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh
+++ b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/swift_lora_sft.sh
@@ -2,7 +2,7 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1 \
diff --git a/examples/pytorch/llm/scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh
index da4d328e04..baef03bbc9 100644
--- a/examples/pytorch/llm/scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh
+++ b/examples/pytorch/llm/scripts/torchacc/qwen1half_14b_chat/swift_lora_sft.sh
@@ -2,7 +2,7 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1 \
diff --git a/examples/pytorch/llm/scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh
index 25a1accc7d..4d4ae91172 100644
--- a/examples/pytorch/llm/scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh
+++ b/examples/pytorch/llm/scripts/torchacc/qwen1half_32b_chat/swift_lora_sft.sh
@@ -2,7 +2,7 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
diff --git a/examples/pytorch/llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh
index 30347822fc..d13cd75da4 100644
--- a/examples/pytorch/llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh
+++ b/examples/pytorch/llm/scripts/torchacc/qwen_72b_chat/swift_lora_sft.sh
@@ -2,10 +2,10 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
-NPROC_PER_NODE=1 \
-CUDA_VISIBLE_DEVICES=7 \
+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
 swift sft \
   --model_id_or_path qwen/Qwen-72B-Chat \
   --dataset codefuse-python-en \
diff --git a/examples/pytorch/llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh b/examples/pytorch/llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh
index 623177e41b..d9f9ef8d65 100644
--- a/examples/pytorch/llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh
+++ b/examples/pytorch/llm/scripts/torchacc/yi_34b_chat/swift_lora_sft.sh
@@ -2,7 +2,7 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 
-# MASTER_ADDR=127.0.0.1 \
+export USE_TORCH_XLA=0
 
 NPROC_PER_NODE=2 \
 CUDA_VISIBLE_DEVICES=0,1,2,3 \

From 858bd352bb4ff921c9100ab3c31d78313f7e165e Mon Sep 17 00:00:00 2001
From: "wangang.wa" <wangang.wa@alibaba-inc.com>
Date: Tue, 25 Jun 2024 15:53:39 +0800
Subject: [PATCH 2/2] remove trim graph

---
 .../llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh
index 274182055e..73bd3735d6 100644
--- a/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh
+++ b/examples/pytorch/llm/scripts/torchacc/llama3_8b_instruct/acc_lora_fsdp_sft.sh
@@ -2,7 +2,6 @@
 # 80GB GPU memory
 # Note: TorchAcc is currently only available internally.
 export USE_TORCHACC=1
-#export TORCHACC_TRIM_GRAPH=1
 export XLA_IR_SHAPE_CACHE_SIZE=100000000
 export XLA_ALLOCATOR_FRACTION=0.95
 export XLA_EXPERIMENTAL=nonzero:masked_select