Fix PyTorch stateful RNN/LSTM gradient computation error resolves #20875 (#20916)

praveenhosdrug123 · web-flow · commit f7115c2bf126 · 2025-03-02T20:17:30.000-08:00
* Fix PyTorch stateful RNN gradient computation error

* Updates post feedback
diff --git a/keras/src/layers/rnn/rnn.py b/keras/src/layers/rnn/rnn.py
@@ -331,6 +331,12 @@ def inner_loop(self, sequences, initial_state, mask, training=False):
             cell_kwargs["training"] = training
 
         def step(inputs, states):
+            # Create new tensor copies when using PyTorch backend
+            # with stateful=True. This prevents in-place modifications
+            # that would otherwise break PyTorch's autograd functionality
+            # by modifying tensors needed for gradient computation.
+            if backend.backend() == "torch" and self.stateful:
+                states = tree.map_structure(ops.copy, states)
             output, new_states = self.cell(inputs, states, **cell_kwargs)
             if not tree.is_nested(new_states):
                 new_states = [new_states]