neuralmagic · dbogunowicz · Mar 16, 2023 · Mar 8, 2023 · Mar 10, 2023 · Mar 10, 2023
diff --git a/src/deepsparse/tasks.py b/src/deepsparse/tasks.py
@@ -82,6 +82,7 @@ class SupportedTasks:
             "token_classification",
             "zero_shot_text_classification",
             "transformers_embedding_extraction",
+            "text_generation",
         ],
     )(
         question_answering=AliasedTask("question_answering", ["qa"]),
@@ -93,6 +94,7 @@ class SupportedTasks:
         transformers_embedding_extraction=AliasedTask(
             "transformers_embedding_extraction", []
         ),
+        text_generation=AliasedTask("text_generation", ["codegen"]),
     )
 
     image_classification = namedtuple("image_classification", ["image_classification"])(

diff --git a/src/deepsparse/transformers/helpers.py b/src/deepsparse/transformers/helpers.py
@@ -159,8 +159,10 @@ def overwrite_transformer_onnx_model_inputs(
     ]
     input_names = []
     for external_input in external_inputs:
-        external_input.type.tensor_type.shape.dim[0].dim_value = batch_size
-        external_input.type.tensor_type.shape.dim[1].dim_value = max_length
+        # Commenting this out for now, as it is not needed for the ORT backend
+        # Will be crucial for DeepSparse backend
+        # external_input.type.tensor_type.shape.dim[0].dim_value = batch_size
+        # external_input.type.tensor_type.shape.dim[1].dim_value = max_length
         input_names.append(external_input.name)
 
     # Save modified model

diff --git a/src/deepsparse/transformers/pipelines/__init__.py b/src/deepsparse/transformers/pipelines/__init__.py
@@ -21,3 +21,4 @@
 from .token_classification import *
 from .zero_shot_text_classification import *
 from .embedding_extraction import *
+from .text_generation import *
diff --git a/src/deepsparse/transformers/pipelines/pipeline.py b/src/deepsparse/transformers/pipelines/pipeline.py
@@ -109,7 +109,8 @@ def setup_onnx_file_path(self) -> str:
             config_path, finetuning_task=self.task if hasattr(self, "task") else None
         )
         self.tokenizer = AutoTokenizer.from_pretrained(
-            tokenizer_path, model_max_length=self.sequence_length
+            tokenizer_path,
+            model_max_length=self.sequence_length,
         )
         self.config_path = os.path.join(config_path, "config.json")
         self.tokenizer_config_path = os.path.join(tokenizer_path, "tokenizer.json")
@@ -126,19 +127,22 @@ def setup_onnx_file_path(self) -> str:
         return onnx_path
 
     def tokens_to_engine_input(
-        self, tokens: Mapping[Any, numpy.ndarray]
+        self,
+        tokens: Mapping[Any, numpy.ndarray],
+        onnx_input_names: Optional[List[str]] = None,
     ) -> List[numpy.ndarray]:
         """
         :param tokens: outputs of the pipeline tokenizer
         :return: list of numpy arrays in expected order for model input
         """
-        if not all(name in tokens for name in self.onnx_input_names):
+        onnx_input_names = onnx_input_names or self.onnx_input_names
+        if not all(name in tokens for name in onnx_input_names):
             raise ValueError(
-                f"pipeline expected arrays with names {self.onnx_input_names}, "
+                f"pipeline expected arrays with names {onnx_input_names}, "
                 f"received inputs: {list(tokens.keys())}"
             )
 
-        return [tokens[name] for name in self.onnx_input_names]
+        return [tokens[name] for name in onnx_input_names]
 
     @staticmethod
     def should_bucket(*args, **kwargs) -> bool: