mlcommons
diff --git a/‎algorithmic_efficiency/random_utils.py
+3-3 b/‎algorithmic_efficiency/random_utils.py
+3-3
diff --git a/‎algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
+5-5 b/‎algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py
+5-5
diff --git a/‎algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
+5-5 b/‎algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py
+5-5
diff --git a/‎algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
+6-6 b/‎algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py
+6-6
diff --git a/‎algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
+6-6 b/‎algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py
+6-6
diff --git a/‎algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
+6-6 b/‎algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py
+6-6
diff --git a/‎algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
+6-6 b/‎algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py
+6-6
diff --git a/‎algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
+6-6 b/‎algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py
+6-6
diff --git a/‎algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
+6-5 b/‎algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py
+6-5
diff --git a/‎algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
+6-6 b/‎algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py
+6-6
@@ -26,11 +26,11 @@
 
 def _signed_to_unsigned(seed: SeedType) -> SeedType:
   if isinstance(seed, int):
-    return seed + 2**32 if seed < 0 else seed
+    return seed % 2**32
   if isinstance(seed, list):
-    return [s + 2**32 if s < 0 else s for s in seed]
+    return [s % 2**32 for s in seed]
   if isinstance(seed, np.ndarray):
-    return np.array([s + 2**32 if s < 0 else s for s in seed.tolist()])
+    return np.array([s % 2**32 for s in seed.tolist()])
 
 
 def _fold_in(seed: SeedType, data: Any) -> List[Union[SeedType, Any]]:
 
@@ -173,7 +173,7 @@ def use_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.123744
+    return 0.123757
 
   @property
   def test_target_value(self) -> float:
@@ -191,23 +191,23 @@ def use_resnet(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124027
+    return 0.12415
 
   @property
   def test_target_value(self) -> float:
-    return 0.126468
+    return 0.12648
 
 
 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124286
+    return 0.129657
 
   @property
   def test_target_value(self) -> float:
     # Todo
-    return 0.126725
+    return 0.131967
 
   @property
   def embedding_init_multiplier(self) -> float:
 
@@ -254,7 +254,7 @@ def use_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.123744
+    return 0.123757
 
   @property
   def test_target_value(self) -> float:
@@ -272,23 +272,23 @@ def use_resnet(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124027
+    return 0.12415
 
   @property
   def test_target_value(self) -> float:
-    return 0.126468
+    return 0.12648
 
 
 class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
 
   @property
   def validation_target_value(self) -> float:
-    return 0.124286
+    return 0.129657
 
   @property
   def test_target_value(self) -> float:
     # Todo
-    return 0.126725
+    return 0.131967
 
   @property
   def embedding_init_multiplier(self) -> float:
 
@@ -272,11 +272,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22009
+    return 0.75445
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3426
+    return 0.6323
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -287,11 +287,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22077
+    return 0.76765
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3402
+    return 0.6519
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -302,8 +302,8 @@ def bn_init_scale(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.23474
+    return 0.76526
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3577
+    return 0.6423
@@ -326,11 +326,11 @@ def use_silu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22009
+    return 0.75445
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.342
+    return 0.6323
 
 
 class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -341,11 +341,11 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22077
+    return 0.76765
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3402
+    return 0.6519
 
 
 class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -356,8 +356,8 @@ def bn_init_scale(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.23474
+    return 0.76526
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3577
+    return 0.6423
@@ -99,11 +99,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.2233
+    return 0.75738
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3455
+    return 0.6359
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -114,11 +114,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.24688
+    return 0.75312
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3714
+    return 0.6286
 
 
 class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -129,8 +129,8 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22886
+    return 0.77113
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3477
+    return 0.6523
@@ -90,11 +90,11 @@ def use_glu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.2233
+    return 0.75738
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3455
+    return 0.6359
 
 
 class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -105,11 +105,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.24688
+    return 0.75312
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3714
+    return 0.6286
 
 
 class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -120,8 +120,8 @@ def use_map(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 1 - 0.22886
+    return 0.77113
 
   @property
   def test_target_value(self) -> float:
-    return 1 - 0.3477
+    return 0.6523
@@ -388,11 +388,11 @@ def attention_temperature(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.082665
+    return 0.109977
 
   @property
   def test_target_value(self) -> float:
-    return 0.50168
+    return 0.068065
 
 
 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -403,11 +403,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.085371
+    return 0.09731
 
   @property
   def test_target_value(self) -> float:
-    return 0.053096
+    return 0.05996
 
 
 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -418,8 +418,8 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.077958
+    return 0.094114
 
   @property
   def test_target_value(self) -> float:
-    return 0.047643
+    return 0.056629
@@ -93,7 +93,7 @@ def __init__(self,
         out_features=self.encoder_dim,
         bias=True)
     self.pos_encode = AddPositionalEmbedding(embedding_dim=self.encoder_dim)
-    self.dropout = nn.Dropout(p=self.input_dropout_rate)
+    self.dropout = nn.Dropout(p=self.input_dropout_rate, inplace=True)
 
   def forward(self, inputs, input_paddings):
     output_paddings = input_paddings
@@ -195,7 +195,7 @@ def __init__(self, config: ConformerConfig):
         in_features=config.encoder_dim,
         out_features=config.encoder_dim * config.feed_forward_expansion_factor,
         bias=True)
-    self.dropout1 = nn.Dropout(p=config.feed_forward_dropout_rate)
+    self.dropout1 = nn.Dropout(p=config.feed_forward_dropout_rate, inplace=True)
     self.linear2 = nn.Linear(
         in_features=config.encoder_dim * config.feed_forward_expansion_factor,
         out_features=config.encoder_dim,
@@ -206,7 +206,8 @@ def __init__(self, config: ConformerConfig):
     else:
       feed_forward_residual_dropout_rate = (
           config.feed_forward_residual_dropout_rate)
-    self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate)
+    self.dropout2 = nn.Dropout(
+        p=feed_forward_residual_dropout_rate, inplace=True)
 
   def forward(self, inputs, padding_mask):
     inputs = self.ln(inputs)
@@ -316,7 +317,7 @@ def __init__(self, config: ConformerConfig):
       attention_residual_dropout_rate = 0.1
     else:
       attention_residual_dropout_rate = config.attention_residual_dropout_rate
-    self.dropout = nn.Dropout(p=attention_residual_dropout_rate)
+    self.dropout = nn.Dropout(p=attention_residual_dropout_rate, inplace=True)
 
   def forward(self, outputs, paddings):
     outputs = self.ln(outputs)
@@ -407,7 +408,7 @@ def __init__(self, config):
       conv_residual_dropout_rate = 0.0
     else:
       conv_residual_dropout_rate = config.conv_residual_dropout_rate
-    self.dropout = nn.Dropout(p=conv_residual_dropout_rate)
+    self.dropout = nn.Dropout(p=conv_residual_dropout_rate, inplace=True)
 
   def forward(self, inputs, input_paddings):
     inputs = self.ln(inputs)
 
@@ -354,11 +354,11 @@ def attention_temperature(self) -> float:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.082665
+    return 0.109977
 
   @property
   def test_target_value(self) -> float:
-    return 0.050168
+    return 0.068065
 
 
 class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -369,11 +369,11 @@ def use_post_layer_norm(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.085371
+    return 0.09731
 
   @property
   def test_target_value(self) -> float:
-    return 0.053096
+    return 0.05996
 
 
 class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -384,8 +384,8 @@ def use_gelu(self) -> bool:
 
   @property
   def validation_target_value(self) -> float:
-    return 0.077958
+    return 0.094114
 
   @property
   def test_target_value(self) -> float:
-    return 0.047643
+    return 0.056629