[KV-Cache Injection][MPT] Update config (#1801)

dbogunowicz · bfineran · commit 4741e8e01aa2 · 2023-11-16T16:47:36.000-05:00
* Update export.py

* quality

* Update configs.py

* add comment regarding MPT version
diff --git a/src/sparseml/exporters/transforms/kv_cache/configs.py b/src/sparseml/exporters/transforms/kv_cache/configs.py
@@ -108,13 +108,17 @@ class Config:
     multiply_batch_by_num_att_heads=False,
 )
 
+# the injection config for MPT config is compatible
+# with the MPT model in HF Space 'mosaicml/mpt-7b'
+# at the state corresponding to the commit
+# `68e1a8e0ebb9b30f3c45c1ef6195980f29063ae2`
 MPT_CONFIG = KeyValueCacheConfig(
     model_name="mpt",
     additional_transforms=AdditionalTransformsMPT,
     key_num_attention_heads="n_heads",
     key_num_embedding_hidden_size="d_model",
-    transpose_value_input=None,
-    transpose_key_input=(0, 1, 3, 2),
+    transpose_value_input=(0, 2, 1, 3),
+    transpose_key_input=(0, 2, 1, 3),
     multiply_batch_by_num_att_heads=False,
 )