fix issue that some example with no trainer use accelerator.end_train… (#37435)

we1559 · SunMarc · web-flow · commit b0c6ff5e13d1 · 2025-04-18T17:59:42.000+02:00
* fix issue that some example with no trainer use accelerator.end_training in a wrong way

* reformat code

---------

Co-authored-by: Marc Sun &lt;57196510+SunMarc@users.noreply.github.com&gt;
diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py
@@ -617,9 +617,6 @@ def collate_fn(examples):
                 output_dir = os.path.join(args.output_dir, output_dir)
             accelerator.save_state(output_dir)
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -640,6 +637,9 @@ def collate_fn(examples):
             with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
                 json.dump(all_results, f)
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/pytorch/image-pretraining/run_mim_no_trainer.py b/examples/pytorch/image-pretraining/run_mim_no_trainer.py
@@ -778,9 +778,6 @@ def preprocess_images(examples):
                 output_dir = os.path.join(args.output_dir, output_dir)
             accelerator.save_state(output_dir)
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -798,6 +795,9 @@ def preprocess_images(examples):
                     token=args.hub_token,
                 )
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py b/examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py
@@ -714,9 +714,6 @@ def main():
 
     logger.info(f"Test metrics: {metrics}")
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -739,6 +736,9 @@ def main():
                     ignore_patterns=["epoch_*"],
                 )
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py
@@ -697,9 +697,6 @@ def group_texts(examples):
                 output_dir = os.path.join(args.output_dir, output_dir)
             accelerator.save_state(output_dir)
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -719,6 +716,9 @@ def group_texts(examples):
             with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
                 json.dump({"perplexity": perplexity}, f)
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/pytorch/language-modeling/run_fim_no_trainer.py b/examples/pytorch/language-modeling/run_fim_no_trainer.py
@@ -891,9 +891,6 @@ def apply_fim(examples):
                 output_dir = os.path.join(args.output_dir, output_dir)
             accelerator.save_state(output_dir)
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -908,6 +905,9 @@ def apply_fim(examples):
             with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
                 json.dump({"perplexity": perplexity}, f)
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py
@@ -735,9 +735,6 @@ def group_texts(examples):
                 output_dir = os.path.join(args.output_dir, output_dir)
             accelerator.save_state(output_dir)
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -757,6 +754,9 @@ def group_texts(examples):
             with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
                 json.dump({"perplexity": perplexity}, f)
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py
@@ -622,9 +622,6 @@ def preprocess_function(examples):
                 output_dir = os.path.join(args.output_dir, output_dir)
             accelerator.save_state(output_dir)
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -645,6 +642,9 @@ def preprocess_function(examples):
             with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
                 json.dump(all_results, f)
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/pytorch/object-detection/run_object_detection_no_trainer.py b/examples/pytorch/object-detection/run_object_detection_no_trainer.py
@@ -759,9 +759,6 @@ def main():
 
     logger.info(f"Test metrics: {metrics}")
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -784,6 +781,9 @@ def main():
                     ignore_patterns=["epoch_*"],
                 )
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
@@ -602,9 +602,6 @@ def preprocess_batch(example_batch, transforms: A.Compose):
                 output_dir = os.path.join(args.output_dir, output_dir)
             accelerator.save_state(output_dir)
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -628,6 +625,9 @@ def preprocess_batch(example_batch, transforms: A.Compose):
             with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
                 json.dump(all_results, f, indent=2)
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py
@@ -634,9 +634,6 @@ def preprocess_function(examples):
                 output_dir = os.path.join(args.output_dir, output_dir)
             accelerator.save_state(output_dir)
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -679,6 +676,9 @@ def preprocess_function(examples):
         with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
             json.dump(all_results, f)
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py
@@ -794,9 +794,6 @@ def compute_metrics():
                 output_dir = os.path.join(args.output_dir, output_dir)
             accelerator.save_state(output_dir)
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -826,6 +823,9 @@ def compute_metrics():
                         all_results[key] = int(value)
                 json.dump(all_results, f)
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py
@@ -762,9 +762,6 @@ def postprocess_text(preds, labels):
                 output_dir = os.path.join(args.output_dir, output_dir)
             accelerator.save_state(output_dir)
 
-    if args.with_tracking:
-        accelerator.end_training()
-
     if args.output_dir is not None:
         accelerator.wait_for_everyone()
         unwrapped_model = accelerator.unwrap_model(model)
@@ -784,6 +781,9 @@ def postprocess_text(preds, labels):
         with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
             json.dump({"eval_bleu": eval_metric["score"]}, f)
 
+    accelerator.wait_for_everyone()
+    accelerator.end_training()
+
 
 if __name__ == "__main__":
     main()