Skip to content

Commit d23243a

Browse files
Merge pull request #743 from mlcommons/dev
dev -> main
2 parents 576d5e3 + 784b915 commit d23243a

File tree

19 files changed

+170
-96
lines changed

19 files changed

+170
-96
lines changed

algorithmic_efficiency/random_utils.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@
2626

2727
def _signed_to_unsigned(seed: SeedType) -> SeedType:
2828
if isinstance(seed, int):
29-
return seed + 2**32 if seed < 0 else seed
29+
return seed % 2**32
3030
if isinstance(seed, list):
31-
return [s + 2**32 if s < 0 else s for s in seed]
31+
return [s % 2**32 for s in seed]
3232
if isinstance(seed, np.ndarray):
33-
return np.array([s + 2**32 if s < 0 else s for s in seed.tolist()])
33+
return np.array([s % 2**32 for s in seed.tolist()])
3434

3535

3636
def _fold_in(seed: SeedType, data: Any) -> List[Union[SeedType, Any]]:

algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/workload.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def use_layer_norm(self) -> bool:
173173

174174
@property
175175
def validation_target_value(self) -> float:
176-
return 0.123744
176+
return 0.123757
177177

178178
@property
179179
def test_target_value(self) -> float:
@@ -191,23 +191,23 @@ def use_resnet(self) -> bool:
191191

192192
@property
193193
def validation_target_value(self) -> float:
194-
return 0.124027
194+
return 0.12415
195195

196196
@property
197197
def test_target_value(self) -> float:
198-
return 0.126468
198+
return 0.12648
199199

200200

201201
class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
202202

203203
@property
204204
def validation_target_value(self) -> float:
205-
return 0.124286
205+
return 0.129657
206206

207207
@property
208208
def test_target_value(self) -> float:
209209
# Todo
210-
return 0.126725
210+
return 0.131967
211211

212212
@property
213213
def embedding_init_multiplier(self) -> float:

algorithmic_efficiency/workloads/criteo1tb/criteo1tb_pytorch/workload.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def use_layer_norm(self) -> bool:
254254

255255
@property
256256
def validation_target_value(self) -> float:
257-
return 0.123744
257+
return 0.123757
258258

259259
@property
260260
def test_target_value(self) -> float:
@@ -272,23 +272,23 @@ def use_resnet(self) -> bool:
272272

273273
@property
274274
def validation_target_value(self) -> float:
275-
return 0.124027
275+
return 0.12415
276276

277277
@property
278278
def test_target_value(self) -> float:
279-
return 0.126468
279+
return 0.12648
280280

281281

282282
class Criteo1TbDlrmSmallEmbedInitWorkload(Criteo1TbDlrmSmallWorkload):
283283

284284
@property
285285
def validation_target_value(self) -> float:
286-
return 0.124286
286+
return 0.129657
287287

288288
@property
289289
def test_target_value(self) -> float:
290290
# Todo
291-
return 0.126725
291+
return 0.131967
292292

293293
@property
294294
def embedding_init_multiplier(self) -> float:

algorithmic_efficiency/workloads/imagenet_resnet/imagenet_jax/workload.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -272,11 +272,11 @@ def use_silu(self) -> bool:
272272

273273
@property
274274
def validation_target_value(self) -> float:
275-
return 1 - 0.22009
275+
return 0.75445
276276

277277
@property
278278
def test_target_value(self) -> float:
279-
return 1 - 0.3426
279+
return 0.6323
280280

281281

282282
class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -287,11 +287,11 @@ def use_gelu(self) -> bool:
287287

288288
@property
289289
def validation_target_value(self) -> float:
290-
return 1 - 0.22077
290+
return 0.76765
291291

292292
@property
293293
def test_target_value(self) -> float:
294-
return 1 - 0.3402
294+
return 0.6519
295295

296296

297297
class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -302,8 +302,8 @@ def bn_init_scale(self) -> float:
302302

303303
@property
304304
def validation_target_value(self) -> float:
305-
return 1 - 0.23474
305+
return 0.76526
306306

307307
@property
308308
def test_target_value(self) -> float:
309-
return 1 - 0.3577
309+
return 0.6423

algorithmic_efficiency/workloads/imagenet_resnet/imagenet_pytorch/workload.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -326,11 +326,11 @@ def use_silu(self) -> bool:
326326

327327
@property
328328
def validation_target_value(self) -> float:
329-
return 1 - 0.22009
329+
return 0.75445
330330

331331
@property
332332
def test_target_value(self) -> float:
333-
return 1 - 0.342
333+
return 0.6323
334334

335335

336336
class ImagenetResNetGELUWorkload(ImagenetResNetWorkload):
@@ -341,11 +341,11 @@ def use_gelu(self) -> bool:
341341

342342
@property
343343
def validation_target_value(self) -> float:
344-
return 1 - 0.22077
344+
return 0.76765
345345

346346
@property
347347
def test_target_value(self) -> float:
348-
return 1 - 0.3402
348+
return 0.6519
349349

350350

351351
class ImagenetResNetLargeBNScaleWorkload(ImagenetResNetWorkload):
@@ -356,8 +356,8 @@ def bn_init_scale(self) -> float:
356356

357357
@property
358358
def validation_target_value(self) -> float:
359-
return 1 - 0.23474
359+
return 0.76526
360360

361361
@property
362362
def test_target_value(self) -> float:
363-
return 1 - 0.3577
363+
return 0.6423

algorithmic_efficiency/workloads/imagenet_vit/imagenet_jax/workload.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,11 @@ def use_glu(self) -> bool:
9999

100100
@property
101101
def validation_target_value(self) -> float:
102-
return 1 - 0.2233
102+
return 0.75738
103103

104104
@property
105105
def test_target_value(self) -> float:
106-
return 1 - 0.3455
106+
return 0.6359
107107

108108

109109
class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -114,11 +114,11 @@ def use_post_layer_norm(self) -> bool:
114114

115115
@property
116116
def validation_target_value(self) -> float:
117-
return 1 - 0.24688
117+
return 0.75312
118118

119119
@property
120120
def test_target_value(self) -> float:
121-
return 1 - 0.3714
121+
return 0.6286
122122

123123

124124
class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -129,8 +129,8 @@ def use_map(self) -> bool:
129129

130130
@property
131131
def validation_target_value(self) -> float:
132-
return 1 - 0.22886
132+
return 0.77113
133133

134134
@property
135135
def test_target_value(self) -> float:
136-
return 1 - 0.3477
136+
return 0.6523

algorithmic_efficiency/workloads/imagenet_vit/imagenet_pytorch/workload.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,11 @@ def use_glu(self) -> bool:
9090

9191
@property
9292
def validation_target_value(self) -> float:
93-
return 1 - 0.2233
93+
return 0.75738
9494

9595
@property
9696
def test_target_value(self) -> float:
97-
return 1 - 0.3455
97+
return 0.6359
9898

9999

100100
class ImagenetVitPostLNWorkload(ImagenetVitWorkload):
@@ -105,11 +105,11 @@ def use_post_layer_norm(self) -> bool:
105105

106106
@property
107107
def validation_target_value(self) -> float:
108-
return 1 - 0.24688
108+
return 0.75312
109109

110110
@property
111111
def test_target_value(self) -> float:
112-
return 1 - 0.3714
112+
return 0.6286
113113

114114

115115
class ImagenetVitMapWorkload(ImagenetVitWorkload):
@@ -120,8 +120,8 @@ def use_map(self) -> bool:
120120

121121
@property
122122
def validation_target_value(self) -> float:
123-
return 1 - 0.22886
123+
return 0.77113
124124

125125
@property
126126
def test_target_value(self) -> float:
127-
return 1 - 0.3477
127+
return 0.6523

algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -388,11 +388,11 @@ def attention_temperature(self) -> float:
388388

389389
@property
390390
def validation_target_value(self) -> float:
391-
return 0.082665
391+
return 0.109977
392392

393393
@property
394394
def test_target_value(self) -> float:
395-
return 0.50168
395+
return 0.068065
396396

397397

398398
class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -403,11 +403,11 @@ def use_post_layer_norm(self) -> bool:
403403

404404
@property
405405
def validation_target_value(self) -> float:
406-
return 0.085371
406+
return 0.09731
407407

408408
@property
409409
def test_target_value(self) -> float:
410-
return 0.053096
410+
return 0.05996
411411

412412

413413
class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -418,8 +418,8 @@ def use_gelu(self) -> bool:
418418

419419
@property
420420
def validation_target_value(self) -> float:
421-
return 0.077958
421+
return 0.094114
422422

423423
@property
424424
def test_target_value(self) -> float:
425-
return 0.047643
425+
return 0.056629

algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/models.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def __init__(self,
9393
out_features=self.encoder_dim,
9494
bias=True)
9595
self.pos_encode = AddPositionalEmbedding(embedding_dim=self.encoder_dim)
96-
self.dropout = nn.Dropout(p=self.input_dropout_rate)
96+
self.dropout = nn.Dropout(p=self.input_dropout_rate, inplace=True)
9797

9898
def forward(self, inputs, input_paddings):
9999
output_paddings = input_paddings
@@ -195,7 +195,7 @@ def __init__(self, config: ConformerConfig):
195195
in_features=config.encoder_dim,
196196
out_features=config.encoder_dim * config.feed_forward_expansion_factor,
197197
bias=True)
198-
self.dropout1 = nn.Dropout(p=config.feed_forward_dropout_rate)
198+
self.dropout1 = nn.Dropout(p=config.feed_forward_dropout_rate, inplace=True)
199199
self.linear2 = nn.Linear(
200200
in_features=config.encoder_dim * config.feed_forward_expansion_factor,
201201
out_features=config.encoder_dim,
@@ -206,7 +206,8 @@ def __init__(self, config: ConformerConfig):
206206
else:
207207
feed_forward_residual_dropout_rate = (
208208
config.feed_forward_residual_dropout_rate)
209-
self.dropout2 = nn.Dropout(p=feed_forward_residual_dropout_rate)
209+
self.dropout2 = nn.Dropout(
210+
p=feed_forward_residual_dropout_rate, inplace=True)
210211

211212
def forward(self, inputs, padding_mask):
212213
inputs = self.ln(inputs)
@@ -316,7 +317,7 @@ def __init__(self, config: ConformerConfig):
316317
attention_residual_dropout_rate = 0.1
317318
else:
318319
attention_residual_dropout_rate = config.attention_residual_dropout_rate
319-
self.dropout = nn.Dropout(p=attention_residual_dropout_rate)
320+
self.dropout = nn.Dropout(p=attention_residual_dropout_rate, inplace=True)
320321

321322
def forward(self, outputs, paddings):
322323
outputs = self.ln(outputs)
@@ -407,7 +408,7 @@ def __init__(self, config):
407408
conv_residual_dropout_rate = 0.0
408409
else:
409410
conv_residual_dropout_rate = config.conv_residual_dropout_rate
410-
self.dropout = nn.Dropout(p=conv_residual_dropout_rate)
411+
self.dropout = nn.Dropout(p=conv_residual_dropout_rate, inplace=True)
411412

412413
def forward(self, inputs, input_paddings):
413414
inputs = self.ln(inputs)

algorithmic_efficiency/workloads/librispeech_conformer/librispeech_pytorch/workload.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -354,11 +354,11 @@ def attention_temperature(self) -> float:
354354

355355
@property
356356
def validation_target_value(self) -> float:
357-
return 0.082665
357+
return 0.109977
358358

359359
@property
360360
def test_target_value(self) -> float:
361-
return 0.050168
361+
return 0.068065
362362

363363

364364
class LibriSpeechConformerLayerNormWorkload(LibriSpeechConformerWorkload):
@@ -369,11 +369,11 @@ def use_post_layer_norm(self) -> bool:
369369

370370
@property
371371
def validation_target_value(self) -> float:
372-
return 0.085371
372+
return 0.09731
373373

374374
@property
375375
def test_target_value(self) -> float:
376-
return 0.053096
376+
return 0.05996
377377

378378

379379
class LibriSpeechConformerGeluWorkload(LibriSpeechConformerWorkload):
@@ -384,8 +384,8 @@ def use_gelu(self) -> bool:
384384

385385
@property
386386
def validation_target_value(self) -> float:
387-
return 0.077958
387+
return 0.094114
388388

389389
@property
390390
def test_target_value(self) -> float:
391-
return 0.047643
391+
return 0.056629

0 commit comments

Comments
 (0)