[DeepSparse Evaluation API] UX Improvements (#1568)

dbogunowicz · web-flow · commit 59e0602ba1ac · 2024-02-05T10:56:34.000-05:00
* initial commit

* add some more tests for hardening

* Update src/deepsparse/evaluation/cli.py

* Update src/deepsparse/transformers/pipelines/text_generation/pipeline.py

* Apply suggestions from code review

* quality

* Update test_evaluator.py

* quality
diff --git a/src/deepsparse/evaluation/cli.py b/src/deepsparse/evaluation/cli.py
@@ -20,7 +20,8 @@
   Module for evaluating models on the various evaluation integrations
 
 OPTIONS:
-    --target TARGET     A path to a remote or local directory containing ONNX/torch model
+    --model_path MODEL_PATH
+                        A path to an ONNX model, local directory containing ONNX model
                         (including all the auxiliary files) or a SparseZoo stub
     -d DATASET, --dataset DATASET
                         The dataset to evaluate on. The user may pass multiple datasets
@@ -30,9 +31,7 @@
                         integration name that is registered in the evaluation registry
     -e ENGINE_TYPE, --engine_type ENGINE_TYPE
                         Inference engine to use for the evaluation. The default
-                        is the DeepSparse engine. If the evaluation should be run
-                        without initializing a pipeline (e.g. for the evaluation
-                        of a torch model), the engine type should be set to None
+                        is the DeepSparse engine.
     -s SAVE_PATH, --save_path SAVE_PATH
                         The path to save the evaluation results.
                         By default the results will be saved in the
@@ -90,10 +89,10 @@
     )
 )
 @click.option(
-    "--target",
+    "--model_path",
     type=click.Path(dir_okay=True, file_okay=True),
     required=True,
-    help="A path to a remote or local directory containing ONNX/torch model "
+    help="A path to an ONNX model, local directory containing ONNX model"
     "(including all the auxiliary files) or a SparseZoo stub",
 )
 @click.option(
@@ -118,9 +117,7 @@
     type=click.Choice([DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE]),
     default=DEEPSPARSE_ENGINE,
     help="The engine to use for the evaluation. The default is the "
-    "DeepSparse engine. If the evaluation should be run without "
-    "initializing a pipeline (e.g. for the evaluation of a torch "
-    "model), the engine type should be set to None",
+    "DeepSparse engine. ",
 )
 @click.option(
     "-s",
@@ -167,7 +164,7 @@
 )
 @click.argument("integration_args", nargs=-1, type=click.UNPROCESSED)
 def main(
-    target,
+    model_path,
     dataset,
     integration,
     engine_type,
@@ -183,14 +180,9 @@ def main(
     # format kwargs to a  dict
     integration_args = args_to_dict(integration_args)
 
-    _LOGGER.info(f"Target to evaluate: {target}")
-    if engine_type:
-        _LOGGER.info(f"A pipeline with the engine type: {engine_type} will be created")
-    else:
-        _LOGGER.info(
-            "No engine type specified. The target "
-            "will be evaluated using the native framework"
-        )
+    _LOGGER.info(
+        f"Creating {engine_type} pipeline to evaluate from model path: {model_path}"
+    )
 
     _LOGGER.info(
         f"Datasets to evaluate on: {datasets}\n"
@@ -201,7 +193,7 @@ def main(
     )
 
     result: Result = evaluate(
-        target=target,
+        model=model_path,
         datasets=datasets,
         integration=integration,
         engine_type=engine_type,
diff --git a/src/deepsparse/evaluation/evaluator.py b/src/deepsparse/evaluation/evaluator.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import Any, List, Optional, Union
+from pathlib import Path
+from typing import List, Optional, Union
 
+from deepsparse import Pipeline
 from deepsparse.evaluation.registry import EvaluationRegistry
 from deepsparse.evaluation.results import Result
-from deepsparse.evaluation.utils import create_model_from_target
+from deepsparse.evaluation.utils import create_pipeline
 from deepsparse.operators.engine_operator import (
     DEEPSPARSE_ENGINE,
     ORT_ENGINE,
@@ -30,30 +32,38 @@
 
 
 def evaluate(
-    target: Any,
+    model: Union[Pipeline, Path, str],
     datasets: Union[str, List[str]],
     integration: Optional[str] = None,
     engine_type: Union[
-        DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE, None
+        DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE
     ] = DEEPSPARSE_ENGINE,
     batch_size: int = 1,
     splits: Union[List[str], str, None] = None,
     metrics: Union[List[str], str, None] = None,
     **kwargs,
 ) -> Result:
 
-    # if target is a string, turn it into an appropriate model/pipeline
-    # otherwise assume it is a model/pipeline
-    model = (
-        create_model_from_target(target, engine_type)
-        if isinstance(target, str)
-        else target
+    if isinstance(model, Pipeline):
+        _LOGGER.info(
+            "Passed a Pipeline object into evaluate function. This will "
+            "override the following arguments:"
+        )
+        batch_size = model.batch_size
+        _LOGGER.info(f"batch_size: {batch_size}")
+        engine_type = engine_type
+        _LOGGER.info(f"engine_type: {engine_type}")
+
+    # if target is a string, turn it into an appropriate pipeline
+    # otherwise assume it is a pipeline
+    pipeline = (
+        create_pipeline(model, engine_type) if isinstance(model, (Path, str)) else model
     )
 
-    eval_integration = EvaluationRegistry.resolve(model, datasets, integration)
+    eval_integration = EvaluationRegistry.resolve(pipeline, datasets, integration)
 
     return eval_integration(
-        model=model,
+        pipeline=pipeline,
         datasets=datasets,
         engine_type=engine_type,
         batch_size=batch_size,
diff --git a/src/deepsparse/evaluation/registry.py b/src/deepsparse/evaluation/registry.py
@@ -15,8 +15,9 @@
 Implementation of a registry for evaluation functions
 """
 import logging
-from typing import Any, Callable, List, Optional, Union
+from typing import Callable, List, Optional, Union
 
+from deepsparse import Pipeline
 from sparsezoo.utils.registry import RegistryMixin
 
 
@@ -38,7 +39,7 @@ def load_from_registry(cls, name: str) -> Callable[..., "Result"]:  # noqa: F821
     @classmethod
     def resolve(
         cls,
-        model: Any,
+        pipeline: Pipeline,
         datasets: Union[str, List[str]],
         integration: Optional[str] = None,
     ) -> Callable[..., "Result"]:  # noqa: F821
@@ -59,12 +60,12 @@ def resolve(
                 "No integration specified, inferring the evaluation"
                 "function from the input arguments..."
             )
-            integration = resolve_integration(model, datasets)
+            integration = resolve_integration(pipeline, datasets)
 
             if integration is None:
                 raise ValueError(
                     "Unable to resolve an evaluation function for the given model. "
-                    "Specify an integration name or use a model that is supported "
+                    "Specify an integration name or use a pipeline that is supported "
                 )
             _LOGGER.info(f"Inferred the evaluation function: {integration}")
 
diff --git a/src/deepsparse/evaluation/utils.py b/src/deepsparse/evaluation/utils.py
@@ -15,21 +15,11 @@
 import os
 from typing import Any, Dict, List, Optional, Tuple, Union
 
-
-try:
-    from transformers import AutoModelForCausalLM, PreTrainedModel
-
-    transformers_error = None
-except ImportError as import_error:
-    transformers_error = import_error
-
-
 from deepsparse import Pipeline
-from deepsparse.operators.engine_operator import DEEPSPARSE_ENGINE, ORT_ENGINE
 
 
 __all__ = [
-    "create_model_from_target",
+    "create_pipeline",
     "get_save_path",
     "args_to_dict",
     "resolve_integration",
@@ -57,36 +47,36 @@ def potentially_check_dependency_import(integration_name: str) -> bool:
 
 
 def resolve_integration(
-    model: Union[Pipeline, "PreTrainedModel"], datasets: Union[str, List[str]]
+    pipeline: Pipeline, datasets: Union[str, List[str]]
 ) -> Union[str, None]:
     """
-    Given a model and dataset, infer the name of the evaluation integration
+    Given a pipeline and dataset, infer the name of the evaluation integration
     to use. If unable to infer a name, return None.
 
     Currently:
         if the model is a generative language model,
         default to 'lm-evaluation-harness' otherwise return None
 
-    :param model: The model to infer the integration for
+    :param pipeline: The pipeline to infer the integration for
     :param datasets: The datasets to infer the integration for
     :return: The name of the integration to use or None if unable to infer
     """
-    if if_generative_language_model(model):
+    if if_generative_language_model(pipeline):
         return LM_EVALUATION_HARNESS
     return None
 
 
-def if_generative_language_model(model: Any) -> bool:
+def if_generative_language_model(pipeline: Pipeline) -> bool:
     """
     Checks if the model is a generative language model.
     """
-    _check_transformers_dependency()
-    if isinstance(model, Pipeline):
-        return model.__class__.__name__ == "TextGenerationPipeline"
-    elif isinstance(model, PreTrainedModel):
-        return "CausalLM" in model.__class__.__name__
-    else:
-        return False
+    pipeline_name = pipeline.__class__.__name__
+    if pipeline_name == "TextGenerationPipeline" or (
+        pipeline_name == "TextGenerationPipelineNoKVCache"
+    ):
+        return True
+
+    return False
 
 
 def args_to_dict(args: Tuple[Any, ...]) -> Dict[str, Any]:
@@ -134,43 +124,30 @@ def get_save_path(
     return os.path.join(base_path, file_name)
 
 
-def create_model_from_target(
-    target: str,
+def create_pipeline(
+    model_path: str,
     engine_type: Optional[str] = None,
     **kwargs,
-) -> Union[Pipeline, "AutoModelForCausalLM"]:
+) -> Pipeline:
     """
-    Create a model or a pipeline from a target path.
+    Create a pipeline for evaluation
 
-    Note: This function is currently limited to:
-        - creating pipelines of type 'text-generation'
-        - creating dense huggingface models of type 'AutoModelForCausalLM'
-    This function will be expanded in the future to support more
-    model types and frameworks.
+    Note: This function is currently primarily
+    focused on creating pipelines of type 'text-generation'
+    This function will be expanded in the future to support
+    more tasks and models
 
-    :param target: The target path to initialize the
+    :param model_path: The target path to initialize the
         text generation model from. This can be a local
         or remote path to the model or a sparsezoo stub
     :param engine_type: The engine type to initialize the model with.
-    :return: The initialized model
+    :return: The initialized pipeline
     """
-    _check_transformers_dependency()
-
-    if engine_type in [DEEPSPARSE_ENGINE, ORT_ENGINE]:
-        return Pipeline.create(
-            task="text-generation",
-            model_path=target,
-            sequence_length=kwargs.pop("sequence_length", 2048),
-            engine_type=engine_type,
-            batch_size=kwargs.pop("batch_size", 1),
-            **kwargs,
-        )
-    else:
-        return AutoModelForCausalLM.from_pretrained(target, **kwargs)
-
-
-def _check_transformers_dependency():
-    if transformers_error:
-        raise ImportError(
-            "transformers is needed to use this module"
-        ) from transformers_error
+    return Pipeline.create(
+        task=kwargs.pop("task", "text-generation"),
+        model_path=model_path,
+        sequence_length=kwargs.pop("sequence_length", 2048),
+        engine_type=engine_type,
+        batch_size=kwargs.pop("batch_size", 1),
+        **kwargs,
+    )
diff --git a/src/deepsparse/transformers/pipelines/text_generation/pipeline.py b/src/deepsparse/transformers/pipelines/text_generation/pipeline.py
@@ -357,6 +357,14 @@ def sequence_length(self) -> int:
         """
         return self.ops["single_engine"].sequence_length
 
+    @property
+    def batch_size(self) -> int:
+        return self.ops["single_engine"].batch_size
+
+    @property
+    def engine_type(self) -> str:
+        return self.ops["single_engine"]._engine_type
+
     def _get_continuous_batching_scheduler(
         self, batch_sizes: List[int], engines: List[EngineOperator]
     ) -> ContinuousBatchingScheduler:
diff --git a/src/deepsparse/transformers/pipelines/text_generation/pipeline_no_kv_cache.py b/src/deepsparse/transformers/pipelines/text_generation/pipeline_no_kv_cache.py
@@ -127,3 +127,11 @@ def expand_inputs(self, items, batch_size):
         out, orig_batch_size = split_engine_inputs(items, batch_size)
         combined_batches = [{"input_ids": b[0], "attention_mask": b[1]} for b in out]
         return combined_batches, orig_batch_size
+
+    @property
+    def batch_size(self) -> int:
+        return self.ops["engine_operator"].batch_size
+
+    @property
+    def engine_type(self) -> str:
+        return self.ops["engine_operator"]._engine_type
diff --git a/tests/deepsparse/evaluation/integrations/test_lm_evaluation_harness.py b/tests/deepsparse/evaluation/integrations/test_lm_evaluation_harness.py
@@ -12,19 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from transformers import AutoModelForCausalLM
+
 import pytest
 from deepsparse.evaluation.integrations import try_import_lm_evaluation_harness
-from deepsparse.evaluation.utils import create_model_from_target
+from deepsparse.evaluation.utils import create_pipeline
 
 
 @pytest.mark.parametrize(
     "pipeline, model_torch",
     [
         (
-            create_model_from_target(
+            create_pipeline(
                 "hf:mgoin/TinyStories-1M-deepsparse", engine_type="onnxruntime"
             ),
-            create_model_from_target("roneneldan/TinyStories-1M"),
+            AutoModelForCausalLM.from_pretrained("roneneldan/TinyStories-1M"),
         )
     ],
 )
diff --git a/tests/deepsparse/evaluation/test_evaluator.py b/tests/deepsparse/evaluation/test_evaluator.py
diff --git a/tests/deepsparse/evaluation/test_utils.py b/tests/deepsparse/evaluation/test_utils.py

Original file line number	Diff line number	Diff line change
`@@ -12,19 +12,21 @@`
`12`	`12`	`# See the License for the specific language governing permissions and`
`13`	`13`	`# limitations under the License.`
`14`	`14`
	`15`	`+from transformers import AutoModelForCausalLM`
	`16`	`+`
`15`	`17`	`import pytest`
`16`	`18`	`from deepsparse.evaluation.integrations import try_import_lm_evaluation_harness`
`17`		`-from deepsparse.evaluation.utils import create_model_from_target`
	`19`	`+from deepsparse.evaluation.utils import create_pipeline`
`18`	`20`
`19`	`21`
`20`	`22`	`@pytest.mark.parametrize(`
`21`	`23`	`"pipeline, model_torch",`
`22`	`24`	`[`
`23`	`25`	`(`
`24`		`- create_model_from_target(`
	`26`	`+ create_pipeline(`
`25`	`27`	`"hf:mgoin/TinyStories-1M-deepsparse", engine_type="onnxruntime"`
`26`	`28`	`),`
`27`		`- create_model_from_target("roneneldan/TinyStories-1M"),`
	`29`	`+ AutoModelForCausalLM.from_pretrained("roneneldan/TinyStories-1M"),`
`28`	`30`	`)`
`29`	`31`	`],`
`30`	`32`	`)`