override for fix from huggingface/transformers#37162

winglian · winglian · commit 396f170e535a · 2025-03-31T23:49:39.000-04:00
diff --git a/src/axolotl/core/trainers/base.py b/src/axolotl/core/trainers/base.py
@@ -28,6 +28,7 @@
 
 from axolotl.core.trainers.mixins import (
     OptimizerMixin,
+    RngLoaderMixin,
     SchedulerMixin,
     SequenceParallelMixin,
 )
@@ -40,7 +41,9 @@
 LOG = logging.getLogger(__name__)
 
 
-class AxolotlTrainer(SchedulerMixin, OptimizerMixin, SequenceParallelMixin, Trainer):
+class AxolotlTrainer(
+    SchedulerMixin, OptimizerMixin, RngLoaderMixin, SequenceParallelMixin, Trainer
+):
     """Extend the base Trainer for axolotl helpers"""
 
     args = None  # type: "AxolotlTrainingArguments"  # type: ignore[name-defined]
diff --git a/src/axolotl/core/trainers/dpo/trainer.py b/src/axolotl/core/trainers/dpo/trainer.py
@@ -13,7 +13,7 @@
 from transformers.utils import is_sagemaker_mp_enabled
 from trl import DPOTrainer
 
-from axolotl.core.trainers.mixins import SchedulerMixin
+from axolotl.core.trainers.mixins import RngLoaderMixin, SchedulerMixin
 from axolotl.core.trainers.utils import (
     sanitize_kwargs_for_ds_tagging,
     sanitize_kwargs_for_tagging,
@@ -23,7 +23,7 @@
     import smdistributed.modelparallel.torch as smp
 
 
-class AxolotlDPOTrainer(SchedulerMixin, DPOTrainer):
+class AxolotlDPOTrainer(RngLoaderMixin, SchedulerMixin, DPOTrainer):
     """
     Extend the base DPOTrainer for axolotl helpers
     """
diff --git a/src/axolotl/core/trainers/grpo/trainer.py b/src/axolotl/core/trainers/grpo/trainer.py
@@ -8,13 +8,13 @@
 from trl import GRPOTrainer
 from trl.extras.profiling import profiling_decorator
 
-from axolotl.core.trainers.base import SchedulerMixin
+from axolotl.core.trainers.mixins import RngLoaderMixin, SchedulerMixin
 
 if is_deepspeed_available():
     import deepspeed
 
 
-class AxolotlGRPOTrainer(SchedulerMixin, GRPOTrainer):
+class AxolotlGRPOTrainer(RngLoaderMixin, SchedulerMixin, GRPOTrainer):
     """
     Extend the base GRPOTrainer for axolotl helpers
     """
diff --git a/src/axolotl/core/trainers/mixins/__init__.py b/src/axolotl/core/trainers/mixins/__init__.py
@@ -4,5 +4,6 @@
 # flake8: noqa
 
 from .optimizer import OptimizerMixin
+from .rng_state_loader import RngLoaderMixin
 from .scheduler import SchedulerMixin
 from .sequence_parallel import SequenceParallelMixin
diff --git a/src/axolotl/core/trainers/mixins/rng_state_loader.py b/src/axolotl/core/trainers/mixins/rng_state_loader.py
@@ -0,0 +1,63 @@
+"""
+Temporary fix/override for bug in resume from checkpoint
+
+See https://github.com/huggingface/transformers/pull/37162
+"""
+
+import logging
+import os
+import random
+
+import numpy as np
+import torch
+from transformers import Trainer, is_torch_npu_available
+from transformers.trainer import safe_globals
+from transformers.trainer_pt_utils import set_rng_state_for_device
+from transformers.training_args import ParallelMode
+
+LOG = logging.getLogger(__name__)
+
+
+class RngLoaderMixin(Trainer):
+    """
+    mixin for method override to load RNG states from a checkpoint
+    """
+
+    def _load_rng_state(self, checkpoint):
+        # Load RNG states from `checkpoint`
+        if checkpoint is None:
+            return
+
+        if self.args.world_size > 1:
+            process_index = self.args.process_index
+            rng_file = os.path.join(checkpoint, f"rng_state_{process_index}.pth")
+            if not os.path.isfile(rng_file):
+                LOG.info(
+                    f"Didn't find an RNG file for process {process_index}, if you are resuming a training that "
+                    "wasn't launched in a distributed fashion, reproducibility is not guaranteed."
+                )
+                return
+        else:
+            rng_file = os.path.join(checkpoint, "rng_state.pth")
+            if not os.path.isfile(rng_file):
+                LOG.info(
+                    "Didn't find an RNG file, if you are resuming a training that was launched in a distributed "
+                    "fashion, reproducibility is not guaranteed."
+                )
+                return
+
+        with safe_globals():
+            checkpoint_rng_state = torch.load(rng_file)  # nosec B614
+        random.setstate(checkpoint_rng_state["python"])
+        np.random.set_state(checkpoint_rng_state["numpy"])
+        torch.random.set_rng_state(checkpoint_rng_state["cpu"])
+
+        is_distributed = self.args.parallel_mode == ParallelMode.DISTRIBUTED
+        if torch.cuda.is_available():
+            set_rng_state_for_device(
+                "CUDA", torch.cuda, checkpoint_rng_state, is_distributed
+            )
+        if is_torch_npu_available():
+            set_rng_state_for_device(
+                "NPU", torch.npu, checkpoint_rng_state, is_distributed
+            )
diff --git a/src/axolotl/core/trainers/trl.py b/src/axolotl/core/trainers/trl.py
@@ -13,6 +13,7 @@
     RewardTrainer,
 )
 
+from axolotl.core.trainers.mixins import RngLoaderMixin
 from axolotl.core.trainers.mixins.scheduler import SchedulerMixin
 
 
@@ -74,7 +75,7 @@ def train(
             )
 
 
-class AxolotlORPOTrainer(SchedulerMixin, ORPOTrainer):
+class AxolotlORPOTrainer(RngLoaderMixin, SchedulerMixin, ORPOTrainer):
     """
     Extend the base ORPOTrainer for axolotl helpers
     """
@@ -154,15 +155,15 @@ def get_batch_loss_metrics(
         return loss, metrics
 
 
-class AxolotlKTOTrainer(SchedulerMixin, KTOTrainer):
+class AxolotlKTOTrainer(RngLoaderMixin, SchedulerMixin, KTOTrainer):
     """
     Extend the base KTOTrainer for axolotl helpers
     """
 
     tag_names = ["axolotl", "kto"]
 
 
-class AxolotlCPOTrainer(SchedulerMixin, CPOTrainer):
+class AxolotlCPOTrainer(RngLoaderMixin, SchedulerMixin, CPOTrainer):
     """
     Extend the base CPOTrainer for axolotl helpers
     """
@@ -244,15 +245,15 @@ def get_batch_loss_metrics(
         return loss, metrics
 
 
-class AxolotlRewardTrainer(SchedulerMixin, RewardTrainer):
+class AxolotlRewardTrainer(RngLoaderMixin, SchedulerMixin, RewardTrainer):
     """
     Extend the base RewardTrainer for axolotl helpers
     """
 
     tag_names = ["axolotl", "reward"]
 
 
-class AxolotlPRMTrainer(SchedulerMixin, PRMTrainer):
+class AxolotlPRMTrainer(RngLoaderMixin, SchedulerMixin, PRMTrainer):
     """
     Extend the base trl.PRMTrainer for axolotl helpers
     """