diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index 57d1f8d8591..cce52eefdb3 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -149,7 +149,7 @@ def forward(self, x: Tensor) -> Tensor: # into ``batch_size`` columns. If the data does not divide evenly into # ``batch_size`` columns, then the data is trimmed to fit. For instance, with # the alphabet as the data (total length of 26) and ``batch_size=4``, we would -# divide the alphabet into 4 sequences of length 6: +# divide the alphabet into sequences of length 6, resulting in 4 of such sequences. # # .. math:: # \begin{bmatrix}