neuralmagic
diff --git a/‎src/deepsparse/transformers/helpers.py
+80-1 b/‎src/deepsparse/transformers/helpers.py
+80-1
diff --git a/‎src/deepsparse/transformers/pipelines/pipeline.py
+1 b/‎src/deepsparse/transformers/pipelines/pipeline.py
+1
diff --git a/‎src/deepsparse/transformers/utils/helpers.py
+91-1 b/‎src/deepsparse/transformers/utils/helpers.py
+91-1
diff --git a/‎src/deepsparse/v2/__init__.py
+22 b/‎src/deepsparse/v2/__init__.py
+22
diff --git a/‎src/deepsparse/v2/image_classification/__init__.py
+20 b/‎src/deepsparse/v2/image_classification/__init__.py
+20
diff --git a/‎src/deepsparse/v2/image_classification/pipeline.py
+62 b/‎src/deepsparse/v2/image_classification/pipeline.py
+62
@@ -17,14 +17,16 @@
 """
 
 
+import logging
 import os
 import re
 from pathlib import Path
 from tempfile import NamedTemporaryFile
-from typing import List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import numpy
 import onnx
+import transformers
 from onnx import ModelProto
 
 from deepsparse.log import get_main_logger
@@ -38,6 +40,7 @@
 
 __all__ = [
     "get_deployment_path",
+    "setup_transformers_pipeline",
     "overwrite_transformer_onnx_model_inputs",
     "fix_numpy_types",
     "get_transformer_layer_init_names",
@@ -47,6 +50,82 @@
 _LOGGER = get_main_logger()
 
 
+def setup_transformers_pipeline(
+    model_path: str,
+    sequence_length: int,
+    tokenizer_padding_side: str = "left",
+    engine_kwargs: Optional[Dict] = None,
+) -> Tuple[
+    str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer, Dict[str, Any]
+]:
+    """
+    A helper function that sets up the model path, config, tokenizer,
+    and engine kwargs for a transformers model.
+    :param model_path: The path to the model to load
+    :param sequence_length: The sequence length to use for the model
+    :param tokenizer_padding_side: The side to pad on for the tokenizer,
+        either "left" or "right"
+    :param engine_kwargs: The kwargs to pass to the engine
+    :return The model path, config, tokenizer, and engine kwargs
+    """
+    model_path, config, tokenizer = fetch_onnx_file_path(model_path, sequence_length)
+
+    tokenizer.padding_side = tokenizer_padding_side
+    if not tokenizer.pad_token:
+        tokenizer.pad_token = tokenizer.eos_token
+
+    engine_kwargs = engine_kwargs or {}
+    if engine_kwargs.get("model_path"):
+        raise ValueError(
+            "The engine kwargs already specify "
+            f"a model path: {engine_kwargs['model_path']}, "
+            f"but a model path was also provided: {model_path}. "
+            "Please only provide one."
+        )
+    engine_kwargs["model_path"] = model_path
+    return model_path, config, tokenizer, engine_kwargs
+
+
+def fetch_onnx_file_path(
+    model_path: str,
+    sequence_length: int,
+    task: Optional[str] = None,
+) -> Tuple[str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer]:
+    """
+    Parses ONNX model from the `model_path` provided. It additionally
+    creates config and tokenizer objects from the `deployment path`,
+    derived from the `model_path` provided.
+    :param model_path: path to the model to be parsed
+    :param sequence_length: maximum sequence length of the model
+    :return: file path to the processed ONNX file for the engine to compile
+    """
+    deployment_path, onnx_path = get_deployment_path(model_path)
+
+    hf_logger = logging.getLogger("transformers")
+    hf_logger_level = hf_logger.level
+    hf_logger.setLevel(logging.ERROR)
+
+    config = transformers.PretrainedConfig.from_pretrained(
+        deployment_path, finetuning_task=task
+    )
+    hf_logger.setLevel(hf_logger_level)
+
+    trust_remote_code = False
+    tokenizer = transformers.AutoTokenizer.from_pretrained(
+        deployment_path,
+        trust_remote_code=trust_remote_code,
+        model_max_length=sequence_length,
+    )
+
+    if not config or not tokenizer:
+        raise RuntimeError(
+            "Invalid config or tokenizer provided. Please provide "
+            "paths to the files or ensure they exist in the `model_path` provided. "
+            "See `tokenizer` and `config` arguments for details."
+        )
+    return onnx_path, config, tokenizer
+
+
 def get_deployment_path(model_path: str) -> Tuple[str, str]:
     """
     Returns the path to the deployment directory
 
@@ -124,6 +124,7 @@ def setup_onnx_file_path(self) -> str:
 
         :return: file path to the processed ONNX file for the engine to compile
         """
+
         deployment_path, onnx_path = get_deployment_path(self.model_path)
         self._deployment_path = deployment_path
 
 
@@ -14,7 +14,7 @@
 import logging
 import pathlib
 import uuid
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy
 from transformers import AutoTokenizer, GenerationConfig
@@ -33,6 +33,7 @@
     "override_config",
     "process_generation_config",
     "validate_session_ids",
+    "compute_engine_inputs",
     "set_generated_length",
 ]
 
@@ -82,6 +83,95 @@ def set_generated_length(
     )
 
 
+def compute_engine_inputs(onnx_input_names: str, **kwargs) -> List[numpy.ndarray]:
+    """
+    Given the names of the onnx inputs, compute the inputs
+    to the engine. The inputs will be calculating from the
+    passed kwargs. The information about the required kwargs
+    can be found in the docstring of the individual compute
+    functions.
+
+    :param onnx_input_names: The names of the onnx inputs
+    :param kwargs: The kwargs to compute the inputs from
+    :return: The computed inputs to the engine
+    """
+    engine_inputs = []
+    for input_name in onnx_input_names:
+        if input_name == "causal_mask":
+            # delay the computation of the causal mask
+            continue
+        # fetch the compute function for the
+        # given input_name
+        compute_func = _get_compute_func(input_name)
+        # compute the engine input from the kwargs
+        # and append it to the engine_inputs
+        engine_inputs.append(compute_func(**kwargs))
+
+    if "causal_mask" in onnx_input_names:
+        # compute the causal mask and append it to the engine_inputs
+        input_ids, attention_mask, *_ = engine_inputs
+        engine_inputs.append(create_causal_mask(input_ids, attention_mask))
+
+    return engine_inputs
+
+
+def _get_compute_func(input_name: str) -> Callable[..., numpy.ndarray]:
+    # given the input_name, return the appropriate compute function
+    compute_func = {
+        "input_ids": _compute_input_ids,
+        "attention_mask": _compute_attention_mask,
+        "positions": _compute_positions,
+    }.get(input_name)
+    if compute_func is None:
+        raise ValueError(
+            "Could not find compute function " f"for the input_name: {input_name}"
+        )
+    return compute_func
+
+
+def _compute_input_ids(token_batch: List[int], **kwargs) -> numpy.ndarray:
+    # convert the token_batch to a numpy array
+    return numpy.array([token_batch])
+
+
+def _compute_attention_mask(
+    sequence_length: int,
+    prompt_sequence_length: int,
+    num_total_processed_tokens: int,
+    **kwargs,
+) -> numpy.ndarray:
+    # create a fully masked attention mask with the appropriate
+    # shape (equal to the sequence_length)
+    attention_mask = numpy.zeros((1, sequence_length), dtype=numpy.int64)
+    # unmask the appropriate number of tokens, the sum of
+    # - the number of tokens already processed and cached (num_total_processed_tokens)
+    # - the number of tokens currently processed (prompt_sequence_length)
+    # the sum cannot exceed the maximum length of the attention_mask
+    num_attention_entries_to_unmask = min(
+        num_total_processed_tokens + prompt_sequence_length, sequence_length
+    )
+    # unmask the bits from the right-hand side
+    attention_mask[:, -num_attention_entries_to_unmask:] = 1
+    return attention_mask
+
+
+def _compute_positions(
+    num_total_processed_tokens: int, prompt_sequence_length: int, **kwargs
+):
+    # create the positions array with the appropriate shape
+    # positions count starts from the number of tokens already processed
+    # and ends at the number of tokens already processed + the number of tokens
+    # currently processed
+    return (
+        numpy.arange(
+            num_total_processed_tokens,
+            num_total_processed_tokens + prompt_sequence_length,
+        )
+        .reshape(1, -1)
+        .astype(numpy.int64)
+    )
+
+
 def validate_session_ids(
     session_ids: Optional[str], other_attributes: Dict[str, Any]
 ) -> Optional[List[str]]:
 
@@ -0,0 +1,22 @@
+# flake8: noqa
+
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .operators import *
+from .pipeline import *
+from .routers import *
+from .schedulers import *
+from .task import *
+from .utils import *
@@ -0,0 +1,20 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# flake8: noqa
+from .postprocess_operator import *
+from .preprocess_operator import *
+
+
+from .pipeline import *  # isort:skip
@@ -0,0 +1,62 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import warnings
+from typing import Dict, Optional, Tuple, Union
+
+from deepsparse.v2.image_classification.postprocess_operator import (
+    ImageClassificationPostProcess,
+)
+from deepsparse.v2.image_classification.preprocess_operator import (
+    ImageClassificationPreProcess,
+)
+from deepsparse.v2.operators.engine_operator import EngineOperator
+from deepsparse.v2.pipeline import Pipeline
+from deepsparse.v2.routers.router import LinearRouter
+from deepsparse.v2.schedulers.scheduler import OperatorScheduler
+
+
+_LOGGER = logging.getLogger(__name__)
+
+__all__ = ["ImageClassificationPipeline"]
+
+
+class ImageClassificationPipeline(Pipeline):
+    def __init__(
+        self,
+        model_path: str,
+        engine_kwargs: Optional[Dict] = None,
+        class_names: Union[None, str, Dict[str, str]] = None,
+        image_size: Optional[Tuple[int]] = None,
+        top_k: int = 1,
+    ):
+        if not engine_kwargs:
+            engine_kwargs = {}
+            engine_kwargs["model_path"] = model_path
+        elif engine_kwargs.get("model_path") != model_path:
+            warnings.warn(f"Updating engine_kwargs to include {model_path}")
+
+        engine = EngineOperator(**engine_kwargs)
+        preproces = ImageClassificationPreProcess(
+            model_path=engine.model_path, image_size=image_size
+        )
+        postprocess = ImageClassificationPostProcess(
+            top_k=top_k, class_names=class_names
+        )
+
+        ops = [preproces, engine, postprocess]
+        router = LinearRouter(end_route=len(ops))
+        scheduler = [OperatorScheduler()]
+        super().__init__(ops=ops, router=router, schedulers=scheduler)