From 48ac0accaa8a346327a96d4d8dde194e28b3be3a Mon Sep 17 00:00:00 2001 From: Damian Date: Mon, 5 Jun 2023 15:55:03 +0000 Subject: [PATCH 01/11] initial commit --- src/deepsparse/license.py | 3 +- src/deepsparse/server/cli.py | 1 + src/deepsparse/transformers/helpers.py | 12 +++++- src/deepsparse/utils/onnx.py | 55 +++++++++++++++++++------- src/deepsparse/yolo/utils/utils.py | 24 ++++++----- 5 files changed, 70 insertions(+), 25 deletions(-) diff --git a/src/deepsparse/license.py b/src/deepsparse/license.py index ed436aaaf9..f4035072d3 100644 --- a/src/deepsparse/license.py +++ b/src/deepsparse/license.py @@ -53,7 +53,7 @@ def add_deepsparse_license(token_or_path): candidate_license_file_path = token_or_path if not os.path.exists(token_or_path): - # write raw token to temp file for validadation + # write raw token to temp file for validation candidate_license_tempfile = NamedTemporaryFile() candidate_license_file_path = candidate_license_tempfile.name with open(candidate_license_file_path, "w") as token_file: @@ -70,6 +70,7 @@ def add_deepsparse_license(token_or_path): license_file_path = _get_license_file_path() shutil.copy(candidate_license_file_path, license_file_path) _LOGGER.info(f"DeepSparse license file written to {license_file_path}") + os.remove(candidate_license_file_path) # re-validate and print message now that licensee is copied to expected location validate_license() diff --git a/src/deepsparse/server/cli.py b/src/deepsparse/server/cli.py index 1b323e28e3..29cbc9afb0 100644 --- a/src/deepsparse/server/cli.py +++ b/src/deepsparse/server/cli.py @@ -228,6 +228,7 @@ def main( loggers={}, ) + # saving yaml config to temporary directory with TemporaryDirectory() as tmp_dir: config_path = os.path.join(tmp_dir, "server-config.yaml") with open(config_path, "w") as fp: diff --git a/src/deepsparse/transformers/helpers.py b/src/deepsparse/transformers/helpers.py index d80949eb11..d798231050 100644 --- a/src/deepsparse/transformers/helpers.py +++ b/src/deepsparse/transformers/helpers.py @@ -136,6 +136,7 @@ def overwrite_transformer_onnx_model_inputs( batch_size: int = 1, max_length: int = 128, output_path: Optional[str] = None, + inplace: bool = True, ) -> Tuple[Optional[str], List[str], Optional[NamedTemporaryFile]]: """ Overrides an ONNX model's inputs to have the given batch size and sequence lengths. @@ -148,12 +149,21 @@ def overwrite_transformer_onnx_model_inputs( :param output_path: if provided, the model will be saved to the given path, otherwise, the model will be saved to a named temporary file that will be deleted after the program exits + :param inplace: if True, the model will be modified in place, otherwise + a copy of the model will be saved to a temporary file :return: if no output path, a tuple of the saved path to the model, list of model input names, and reference to the tempfile object will be returned otherwise, only the model input names will be returned """ + + if inplace and output_path is None: + raise ValueError( + "Cannot specify both inplace=True and output_path. If inplace=True, " + "the model will be modified in place (the returned path will be identical" + "to the input path specified in argument `path`)" + ) # overwrite input shapes - model = onnx.load(path) + model = onnx.load(path, load_external_data=not inplace) initializer_input_names = set([node.name for node in model.graph.initializer]) external_inputs = [ inp for inp in model.graph.input if inp.name not in initializer_input_names diff --git a/src/deepsparse/utils/onnx.py b/src/deepsparse/utils/onnx.py index 326c4b215d..8b40ab4346 100644 --- a/src/deepsparse/utils/onnx.py +++ b/src/deepsparse/utils/onnx.py @@ -24,7 +24,7 @@ from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE from deepsparse.utils.extractor import Extractor -from sparsezoo.utils import save_onnx, validate_onnx +from sparsezoo.utils import onnx_includes_external_data, save_onnx, validate_onnx try: @@ -53,13 +53,21 @@ @contextlib.contextmanager -def save_onnx_to_temp_files(model: Model, with_external_data=True) -> str: +def save_onnx_to_temp_files(model: onnx.ModelProto, with_external_data=False) -> str: """ Save model to a temporary file. Works for models with external data. + :param model: The onnx model to save to temporary directory :param with_external_data: Whether to save external data to a separate file """ + if not onnx_includes_external_data(model) and with_external_data: + raise ValueError( + "Model does not include external data, it only includes the model graph." + "Cannot save its external data to separate a file." + "Set argument `with_external_data`=False" + ) shaped_model = tempfile.NamedTemporaryFile(mode="w", delete=False) + if with_external_data: external_data = os.path.join( tempfile.tempdir, next(tempfile._get_candidate_names()) @@ -195,16 +203,27 @@ def generate_random_inputs( def override_onnx_batch_size( - onnx_filepath: str, batch_size: int, inplace: bool = False + onnx_filepath: str, + batch_size: int, + inplace: bool = True, ) -> str: """ Rewrite batch sizes of ONNX model, saving the modified model and returning its path - :param onnx_filepath: File path to ONNX model + + :param onnx_filepath: File path to ONNX model. If the graph is to be + modified in-place, only the model graph will be loaded and modified. + Otherwise, the entire model will be loaded and modified, so that + external data are saved along the model graph. :param batch_size: Override for the batch size dimension - :param inplace: If True, overwrite the original model file - :return: File path to modified ONNX model + :param inplace: If True, overwrite the original model file. + Else save the modified model to a temporary file. + :return: File path to modified ONNX model. + If inplace is True, + the modified model will be saved to the same path as the original + model. Else the modified model will be saved to a + temporary file. """ - model = onnx.load(onnx_filepath, load_external_data=False) + model = onnx.load(onnx_filepath, load_external_data=not inplace) all_inputs = model.graph.input initializer_input_names = [node.name for node in model.graph.initializer] external_inputs = [ @@ -215,30 +234,38 @@ def override_onnx_batch_size( # Save modified model, this will be cleaned up when context is exited if inplace: - onnx.save(model, onnx_filepath) + save_onnx(model, onnx_filepath) return onnx_filepath else: # Save modified model, this will be cleaned up when context is exited - return save_onnx_to_temp_files(model, with_external_data=False) + return save_onnx_to_temp_files(model, with_external_data=not inplace) def override_onnx_input_shapes( onnx_filepath: str, input_shapes: Union[List[int], List[List[int]]], - inplace: bool = False, + inplace: bool = True, ) -> str: """ Rewrite input shapes of ONNX model, saving the modified model and returning its path - :param onnx_filepath: File path to ONNX model + + :param onnx_filepath: File path to ONNX model. If the graph is to be + modified in-place, only the model graph will be loaded and modified. + Otherwise, the entire model will be loaded and modified, so that + external data are saved along the model graph. :param input_shapes: Override for model's input shapes :param inplace: If True, overwrite the original model file - :return: File path to modified ONNX model + :return: File path to modified ONNX model. + If inplace is True, + the modified model will be saved to the same path as the original + model. Else the modified model will be saved to a + temporary file. """ if input_shapes is None: return onnx_filepath - model = onnx.load(onnx_filepath, load_external_data=False) + model = onnx.load(onnx_filepath, load_external_data=not inplace) all_inputs = model.graph.input initializer_input_names = [node.name for node in model.graph.initializer] external_inputs = [ @@ -279,7 +306,7 @@ def override_onnx_input_shapes( return onnx_filepath else: # Save modified model, this will be cleaned up when context is exited - return save_onnx_to_temp_files(model, with_external_data=False) + return save_onnx_to_temp_files(model, with_external_data=not inplace) def truncate_onnx_model( diff --git a/src/deepsparse/yolo/utils/utils.py b/src/deepsparse/yolo/utils/utils.py index ebbd48233b..3a0f596fe1 100644 --- a/src/deepsparse/yolo/utils/utils.py +++ b/src/deepsparse/yolo/utils/utils.py @@ -29,6 +29,7 @@ import yaml import torch +from deepsparse.utils.onnx import save_onnx_to_temp_files from deepsparse.yolo.schemas import YOLOOutput from sparsezoo.utils import save_onnx @@ -341,7 +342,7 @@ def get_onnx_expected_image_shape(onnx_model: onnx.ModelProto) -> Tuple[int, ... def modify_yolo_onnx_input_shape( - model_path: str, image_shape: Tuple[int, int] + model_path: str, image_shape: Tuple[int, int], inplace: bool = True ) -> Tuple[str, Optional[NamedTemporaryFile]]: """ Creates a new YOLO ONNX model from the given path that accepts the given input @@ -350,13 +351,17 @@ def modify_yolo_onnx_input_shape( :param model_path: file path to YOLO ONNX model :param image_shape: 2-tuple of the image shape to resize this yolo model to - :return: filepath to an onnx model reshaped to the given input shape will be the - original path if the shape is the same. Additionally returns the - NamedTemporaryFile for managing the scope of the object for file deletion + :param inplace: if True, modifies the given model_path in-place, otherwise + saves the modified model to a temporary file + :return: filepath to an onnx model reshaped to the given input shape. + If inplace is True, + the modified model will be saved to the same path as the original + model. Else the modified model will be saved to a + temporary file. """ has_postprocessing = yolo_onnx_has_postprocessing(model_path) - model = onnx.load(model_path) + model = onnx.load(model_path, load_external_data=not inplace) model_input = model.graph.input[0] initial_x, initial_y = get_onnx_expected_image_shape(model) @@ -399,10 +404,11 @@ def modify_yolo_onnx_input_shape( ) set_tensor_dim_shape(model.graph.output[0], 1, num_predictions) - tmp_file = NamedTemporaryFile() # file will be deleted after program exit - save_onnx(model, tmp_file.name) - - return tmp_file.name, tmp_file + if inplace: + save_onnx(model, model_path) + return model_path + else: + return save_onnx_to_temp_files(model, with_external_data=not inplace) def get_tensor_dim_shape(tensor: onnx.TensorProto, dim: int) -> int: From cf7f2b92c38a08cd34974931a9520e55e088d8cb Mon Sep 17 00:00:00 2001 From: dbogunowicz <97082108+dbogunowicz@users.noreply.github.com> Date: Mon, 5 Jun 2023 17:59:30 +0200 Subject: [PATCH 02/11] Update src/deepsparse/license.py --- src/deepsparse/license.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/deepsparse/license.py b/src/deepsparse/license.py index f4035072d3..06acdd2f0c 100644 --- a/src/deepsparse/license.py +++ b/src/deepsparse/license.py @@ -70,7 +70,6 @@ def add_deepsparse_license(token_or_path): license_file_path = _get_license_file_path() shutil.copy(candidate_license_file_path, license_file_path) _LOGGER.info(f"DeepSparse license file written to {license_file_path}") - os.remove(candidate_license_file_path) # re-validate and print message now that licensee is copied to expected location validate_license() From e6d2b0326bb0a7a5e8085a4ec779ce5ef4bd8859 Mon Sep 17 00:00:00 2001 From: Damian Date: Wed, 7 Jun 2023 13:14:41 +0000 Subject: [PATCH 03/11] limit to 150mb --- src/deepsparse/transformers/helpers.py | 8 +- src/deepsparse/utils/onnx.py | 3 + tests/conftest.py | 35 ++ .../helpers/test_config_generation.py | 3 + .../loggers/test_prometheus_logger.py | 3 + tests/server/test_app.py | 332 +++++------ tests/server/test_config.py | 444 +++++++-------- tests/server/test_endpoints.py | 536 +++++++++--------- tests/server/test_loggers.py | 486 ++++++++-------- tests/server/test_system_logging.py | 338 +++++------ 10 files changed, 1118 insertions(+), 1070 deletions(-) diff --git a/src/deepsparse/transformers/helpers.py b/src/deepsparse/transformers/helpers.py index d798231050..847a7a9924 100644 --- a/src/deepsparse/transformers/helpers.py +++ b/src/deepsparse/transformers/helpers.py @@ -156,12 +156,14 @@ def overwrite_transformer_onnx_model_inputs( otherwise, only the model input names will be returned """ - if inplace and output_path is None: + if inplace and output_path is not None: raise ValueError( "Cannot specify both inplace=True and output_path. If inplace=True, " "the model will be modified in place (the returned path will be identical" "to the input path specified in argument `path`)" ) + if inplace: + output_path = path # overwrite input shapes model = onnx.load(path, load_external_data=not inplace) initializer_input_names = set([node.name for node in model.graph.initializer]) @@ -175,14 +177,14 @@ def overwrite_transformer_onnx_model_inputs( input_names.append(external_input.name) # Save modified model - if output_path is None: + if not inplace: tmp_file = NamedTemporaryFile() # file will be deleted after program exit save_onnx(model, tmp_file.name) return tmp_file.name, input_names, tmp_file else: save_onnx(model, output_path) - return input_names + return output_path, input_names, None def _get_file_parent(file_path: str) -> str: diff --git a/src/deepsparse/utils/onnx.py b/src/deepsparse/utils/onnx.py index 8b40ab4346..00f5f24233 100644 --- a/src/deepsparse/utils/onnx.py +++ b/src/deepsparse/utils/onnx.py @@ -60,6 +60,7 @@ def save_onnx_to_temp_files(model: onnx.ModelProto, with_external_data=False) -> :param model: The onnx model to save to temporary directory :param with_external_data: Whether to save external data to a separate file """ + if not onnx_includes_external_data(model) and with_external_data: raise ValueError( "Model does not include external data, it only includes the model graph." @@ -67,6 +68,7 @@ def save_onnx_to_temp_files(model: onnx.ModelProto, with_external_data=False) -> "Set argument `with_external_data`=False" ) shaped_model = tempfile.NamedTemporaryFile(mode="w", delete=False) + _LOGGER.warning(f"Saving model to temporary directory: {tempfile.tempdir}") if with_external_data: external_data = os.path.join( @@ -385,6 +387,7 @@ def truncate_onnx_model( output.type.tensor_type.shape.Clear() # save and check model + _LOGGER.info("Saving truncated model to %s", output_filepath) save_onnx(extracted_model, output_filepath, "external_data") validate_onnx(output_filepath) diff --git a/tests/conftest.py b/tests/conftest.py index 323c0b703e..62f781f043 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import tempfile from subprocess import Popen from typing import List @@ -20,6 +21,14 @@ from tests.helpers import delete_file +def _get_files(directory: str) -> List[str]: + list_filepaths = [] + for root, dirs, files in os.walk(directory): + for file in files: + list_filepaths.append(os.path.join(os.path.abspath(root), file)) + return list_filepaths + + @pytest.fixture def cleanup(): filenames: List[str] = [] @@ -50,3 +59,29 @@ def cleanup(): ) for proc in processes: proc.terminate() + + +@pytest.fixture(scope="session", autouse=True) +def check_for_created_files(): + start_files_root = _get_files(directory=r".") + start_files_temp = _get_files(directory=tempfile.gettempdir()) + yield + end_files_root = _get_files(directory=r".") + end_files_temp = _get_files(directory=tempfile.gettempdir()) + + assert len(start_files_root) >= len(end_files_root), ( + f"{len(end_files_root) - len(start_files_root)} " + f"files created in current working " + f"directory during pytest run. " + f"Created files: {set(end_files_root) - set(start_files_root)}" + ) + max_allowed_sized_temp_files_megabytes = 150 + size_of_temp_files_bytes = sum( + os.path.getsize(path) for path in set(end_files_temp) - set(start_files_temp) + ) + size_of_temp_files_megabytes = size_of_temp_files_bytes / 1024 / 1024 + assert max_allowed_sized_temp_files_megabytes >= size_of_temp_files_megabytes, ( + f"{size_of_temp_files_megabytes} " + f"megabytes of temp files created in temp directory during pytest run. " + f"Created files: {set(end_files_temp) - set(start_files_temp)}" + ) diff --git a/tests/deepsparse/loggers/metric_functions/helpers/test_config_generation.py b/tests/deepsparse/loggers/metric_functions/helpers/test_config_generation.py index 9350f22c6e..7cf6ad0c07 100644 --- a/tests/deepsparse/loggers/metric_functions/helpers/test_config_generation.py +++ b/tests/deepsparse/loggers/metric_functions/helpers/test_config_generation.py @@ -14,6 +14,7 @@ import os +import shutil import yaml @@ -155,6 +156,8 @@ def test_data_logging_config_from_predefined( with open(os.path.join(tmp_path, "data_logging_config.yaml"), "r") as stream: string_result_saved = yaml.safe_load(stream) assert string_result_saved == yaml.safe_load(expected_result) + return + shutil.rmtree(tmp_path, ignore_errors=True) result_1 = """loggers: diff --git a/tests/deepsparse/loggers/test_prometheus_logger.py b/tests/deepsparse/loggers/test_prometheus_logger.py index e2935cfb62..689b5163af 100644 --- a/tests/deepsparse/loggers/test_prometheus_logger.py +++ b/tests/deepsparse/loggers/test_prometheus_logger.py @@ -13,6 +13,8 @@ # limitations under the License. +import shutil + import requests import pytest @@ -119,6 +121,7 @@ def test_prometheus_logger( count_request_text = float(text_log_lines[98].split(" ")[1]) assert count_request_request == count_request_text == no_iterations + shutil.rmtree(tmp_path) @pytest.mark.parametrize( diff --git a/tests/server/test_app.py b/tests/server/test_app.py index 9bc71e1a36..678152adc9 100644 --- a/tests/server/test_app.py +++ b/tests/server/test_app.py @@ -1,166 +1,166 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from copy import deepcopy -from re import escape -from unittest.mock import patch - -import pytest -from deepsparse.server.config import EndpointConfig, ServerConfig -from deepsparse.server.server import _build_app - - -def test_add_multiple_endpoints_with_no_route(): - with pytest.raises( - ValueError, - match=( - "must specify `route` for all endpoints if multiple endpoints are used." - ), - ): - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - endpoints=[ - EndpointConfig(task="", model="", route=None), - EndpointConfig(task="", model="", route=None), - ], - loggers={}, - ) - ) - - -def test_add_multiple_endpoints_with_same_route(): - with pytest.raises(ValueError, match="asdf specified 2 times"): - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - endpoints=[ - EndpointConfig(task="", model="", route="asdf"), - EndpointConfig(task="", model="", route="asdf"), - ], - loggers={}, - ) - ) - - -def test_invalid_integration(): - with pytest.raises( - ValueError, - match=escape( - "Unknown integration field asdf. Expected one of ['local', 'sagemaker']" - ), - ): - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - integration="asdf", - endpoints=[], - loggers={}, - ) - ) - - -def test_pytorch_num_threads(): - torch = pytest.importorskip("torch") - - orig_num_threads = torch.get_num_threads() - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - pytorch_num_threads=None, - endpoints=[], - loggers={}, - ) - ) - assert torch.get_num_threads() == orig_num_threads - - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - pytorch_num_threads=1, - endpoints=[], - loggers={}, - ) - ) - assert torch.get_num_threads() == 1 - - -@patch.dict(os.environ, deepcopy(os.environ)) -def test_thread_pinning_none(): - os.environ.pop("NM_BIND_THREADS_TO_CORES", None) - os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - engine_thread_pinning="none", - endpoints=[], - loggers={}, - ) - ) - assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" - assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" - - -@patch.dict(os.environ, deepcopy(os.environ)) -def test_thread_pinning_numa(): - os.environ.pop("NM_BIND_THREADS_TO_CORES", None) - os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - engine_thread_pinning="numa", - endpoints=[], - loggers={}, - ) - ) - assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" - assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "1" - - -@patch.dict(os.environ, deepcopy(os.environ)) -def test_thread_pinning_cores(): - os.environ.pop("NM_BIND_THREADS_TO_CORES", None) - os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - engine_thread_pinning="core", - endpoints=[], - loggers={}, - ) - ) - assert os.environ["NM_BIND_THREADS_TO_CORES"] == "1" - assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" - - -def test_invalid_thread_pinning(): - with pytest.raises(ValueError, match='Expected one of {"core","numa","none"}.'): - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - engine_thread_pinning="asdf", - endpoints=[], - loggers={}, - ) - ) +# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, +# # software distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# +# import os +# from copy import deepcopy +# from re import escape +# from unittest.mock import patch +# +# import pytest +# from deepsparse.server.config import EndpointConfig, ServerConfig +# from deepsparse.server.server import _build_app +# +# +# def test_add_multiple_endpoints_with_no_route(): +# with pytest.raises( +# ValueError, +# match=( +# "must specify `route` for all endpoints if multiple endpoints are used." +# ), +# ): +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# endpoints=[ +# EndpointConfig(task="", model="", route=None), +# EndpointConfig(task="", model="", route=None), +# ], +# loggers={}, +# ) +# ) +# +# +# def test_add_multiple_endpoints_with_same_route(): +# with pytest.raises(ValueError, match="asdf specified 2 times"): +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# endpoints=[ +# EndpointConfig(task="", model="", route="asdf"), +# EndpointConfig(task="", model="", route="asdf"), +# ], +# loggers={}, +# ) +# ) +# +# +# def test_invalid_integration(): +# with pytest.raises( +# ValueError, +# match=escape( +# "Unknown integration field asdf. Expected one of ['local', 'sagemaker']" +# ), +# ): +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# integration="asdf", +# endpoints=[], +# loggers={}, +# ) +# ) +# +# +# def test_pytorch_num_threads(): +# torch = pytest.importorskip("torch") +# +# orig_num_threads = torch.get_num_threads() +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# pytorch_num_threads=None, +# endpoints=[], +# loggers={}, +# ) +# ) +# assert torch.get_num_threads() == orig_num_threads +# +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# pytorch_num_threads=1, +# endpoints=[], +# loggers={}, +# ) +# ) +# assert torch.get_num_threads() == 1 +# +# +# @patch.dict(os.environ, deepcopy(os.environ)) +# def test_thread_pinning_none(): +# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) +# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# engine_thread_pinning="none", +# endpoints=[], +# loggers={}, +# ) +# ) +# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" +# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" +# +# +# @patch.dict(os.environ, deepcopy(os.environ)) +# def test_thread_pinning_numa(): +# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) +# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# engine_thread_pinning="numa", +# endpoints=[], +# loggers={}, +# ) +# ) +# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" +# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "1" +# +# +# @patch.dict(os.environ, deepcopy(os.environ)) +# def test_thread_pinning_cores(): +# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) +# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# engine_thread_pinning="core", +# endpoints=[], +# loggers={}, +# ) +# ) +# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "1" +# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" +# +# +# def test_invalid_thread_pinning(): +# with pytest.raises(ValueError, match='Expected one of {"core","numa","none"}.'): +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# engine_thread_pinning="asdf", +# endpoints=[], +# loggers={}, +# ) +# ) diff --git a/tests/server/test_config.py b/tests/server/test_config.py index b1c1c75a84..f2f9b0e6fe 100644 --- a/tests/server/test_config.py +++ b/tests/server/test_config.py @@ -1,222 +1,222 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import yaml - -import pytest -from deepsparse.server.config import ( - EndpointConfig, - ImageSizesConfig, - MetricFunctionConfig, - SequenceLengthsConfig, - ServerConfig, -) - - -def test_no_bucketing_config(): - cfg = EndpointConfig(task="", model="").to_pipeline_config() - assert cfg.input_shapes is None - assert cfg.kwargs == {} - - -@pytest.mark.parametrize("task", ["yolo", "yolact", "image_classification"]) -def test_bucketing_sequence_length_for_cv(task): - with pytest.raises(ValueError, match=f"for non-nlp task {task}"): - EndpointConfig( - task=task, model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) - ).to_pipeline_config() - - -@pytest.mark.parametrize( - "task", ["question_answering", "text_classification", "token_classification"] -) -def test_bucketing_image_size_for_nlp(task): - with pytest.raises(ValueError, match=f"for non computer vision task {task}"): - EndpointConfig( - task=task, model="", bucketing=ImageSizesConfig(image_sizes=[]) - ).to_pipeline_config() - - -def test_bucketing_zero_sequence_length(): - with pytest.raises(ValueError, match="at least one sequence length"): - EndpointConfig( - task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) - ).to_pipeline_config() - - -def test_bucketing_zero_image_size(): - with pytest.raises(ValueError, match="at least one image size"): - EndpointConfig( - task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[]) - ).to_pipeline_config() - - -def test_bucketing_one_sequence_length(): - cfg = EndpointConfig( - task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32]) - ).to_pipeline_config() - assert cfg.input_shapes is None - assert cfg.kwargs == {"sequence_length": 32} - - -def test_bucketing_multi_sequence_length(): - cfg = EndpointConfig( - task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32, 64]) - ).to_pipeline_config() - assert cfg.input_shapes is None - assert cfg.kwargs == {"sequence_length": [32, 64]} - - -def test_bucketing_one_image_size(): - cfg = EndpointConfig( - task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[(256, 256)]) - ).to_pipeline_config() - assert cfg.input_shapes == [[256, 256]] - assert cfg.kwargs == {} - - -def test_endpoint_config_to_pipeline_copy_fields(): - cfg = EndpointConfig(task="qa", model="zxcv").to_pipeline_config() - assert cfg.task == "qa" - assert cfg.model_path == "zxcv" - - cfg = EndpointConfig(task="", model="").to_pipeline_config() - assert cfg.batch_size == 1 - - cfg = EndpointConfig(task="", model="", batch_size=64).to_pipeline_config() - assert cfg.batch_size == 64 - - -def test_yaml_load_config(tmp_path): - server_config = ServerConfig( - num_cores=1, - num_workers=2, - integration="sagemaker", - endpoints=[ - EndpointConfig( - name="asdf", - route="qwer", - task="uiop", - model="hjkl", - batch_size=1, - bucketing=None, - ), - EndpointConfig( - name="asdfd", - route="qwer", - task="uiop", - model="hjkl", - batch_size=2, - bucketing=ImageSizesConfig(image_sizes=[(1, 1), (2, 2)]), - ), - EndpointConfig( - name="asdfde", - route="qwer", - task="uiop", - model="hjkl", - batch_size=3, - bucketing=SequenceLengthsConfig(sequence_lengths=[5, 6, 7]), - ), - ], - loggers={}, - ) - - path = tmp_path / "config.yaml" - with open(path, "w") as fp: - yaml.dump(server_config.dict(), fp) - - with open(path) as fp: - obj = yaml.load(fp, Loader=yaml.Loader) - server_config2 = ServerConfig(**obj) - assert server_config == server_config2 - - -metric_function_config_yaml_1 = """ - func: identity - frequency: 5 - loggers: - - python""" - -metric_function_config_yaml_2 = """ - func: numpy.max""" - -metric_function_config_yaml_3 = """ - func: numpy.max - frequency: 0""" - - -@pytest.mark.parametrize( - "config_yaml, should_fail, instance_type", - [ - (metric_function_config_yaml_1, False, MetricFunctionConfig), - (metric_function_config_yaml_2, False, MetricFunctionConfig), - ( - metric_function_config_yaml_3, - True, - MetricFunctionConfig, - ), # frequency cannot be zero - ], -) -def test_function_logging_config(config_yaml, should_fail, instance_type): - obj = yaml.safe_load(config_yaml) - if should_fail: - with pytest.raises(Exception): - MetricFunctionConfig(**obj) - else: - assert MetricFunctionConfig(**obj) - - -def _create_server_config(task_name, endpoint_1_name, endpoint_2_name): - return ServerConfig( - endpoints=[ - EndpointConfig( - name=endpoint_1_name, - task=task_name, - model="hjkl", - ), - EndpointConfig( - name=endpoint_2_name, - task=task_name, - model="hjkl", - ), - ] - ) - - -@pytest.mark.parametrize( - "task_name, endpoint_1_name, endpoint_2_name, raise_error, expected_endpoint_1_name, expected_endpoint_2_name", # noqa: E501 - [ - ("some_task", None, None, False, "some_task-0", "some_task-1"), - ("some_task", "name_1", None, False, "name_1", "some_task-0"), - ("some_task", "name_1", "name_2", False, "name_1", "name_2"), - ("some_task", "name_1", "name_1", True, None, None), - ], -) -def test_unique_endpoint_names( - task_name, - endpoint_1_name, - endpoint_2_name, - raise_error, - expected_endpoint_1_name, - expected_endpoint_2_name, -): - if raise_error: - with pytest.raises(ValueError): - _create_server_config(task_name, endpoint_1_name, endpoint_2_name) - return - return - - server_config = _create_server_config(task_name, endpoint_1_name, endpoint_2_name) - assert server_config.endpoints[0].name == expected_endpoint_1_name - assert server_config.endpoints[1].name == expected_endpoint_2_name +# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, +# # software distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# +# import yaml +# +# import pytest +# from deepsparse.server.config import ( +# EndpointConfig, +# ImageSizesConfig, +# MetricFunctionConfig, +# SequenceLengthsConfig, +# ServerConfig, +# ) +# +# +# def test_no_bucketing_config(): +# cfg = EndpointConfig(task="", model="").to_pipeline_config() +# assert cfg.input_shapes is None +# assert cfg.kwargs == {} +# +# +# @pytest.mark.parametrize("task", ["yolo", "yolact", "image_classification"]) +# def test_bucketing_sequence_length_for_cv(task): +# with pytest.raises(ValueError, match=f"for non-nlp task {task}"): +# EndpointConfig( +# task=task, model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) +# ).to_pipeline_config() +# +# +# @pytest.mark.parametrize( +# "task", ["question_answering", "text_classification", "token_classification"] +# ) +# def test_bucketing_image_size_for_nlp(task): +# with pytest.raises(ValueError, match=f"for non computer vision task {task}"): +# EndpointConfig( +# task=task, model="", bucketing=ImageSizesConfig(image_sizes=[]) +# ).to_pipeline_config() +# +# +# def test_bucketing_zero_sequence_length(): +# with pytest.raises(ValueError, match="at least one sequence length"): +# EndpointConfig( +# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) +# ).to_pipeline_config() +# +# +# def test_bucketing_zero_image_size(): +# with pytest.raises(ValueError, match="at least one image size"): +# EndpointConfig( +# task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[]) +# ).to_pipeline_config() +# +# +# def test_bucketing_one_sequence_length(): +# cfg = EndpointConfig( +# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32]) +# ).to_pipeline_config() +# assert cfg.input_shapes is None +# assert cfg.kwargs == {"sequence_length": 32} +# +# +# def test_bucketing_multi_sequence_length(): +# cfg = EndpointConfig( +# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32, 64]) +# ).to_pipeline_config() +# assert cfg.input_shapes is None +# assert cfg.kwargs == {"sequence_length": [32, 64]} +# +# +# def test_bucketing_one_image_size(): +# cfg = EndpointConfig( +# task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[(256, 256)]) +# ).to_pipeline_config() +# assert cfg.input_shapes == [[256, 256]] +# assert cfg.kwargs == {} +# +# +# def test_endpoint_config_to_pipeline_copy_fields(): +# cfg = EndpointConfig(task="qa", model="zxcv").to_pipeline_config() +# assert cfg.task == "qa" +# assert cfg.model_path == "zxcv" +# +# cfg = EndpointConfig(task="", model="").to_pipeline_config() +# assert cfg.batch_size == 1 +# +# cfg = EndpointConfig(task="", model="", batch_size=64).to_pipeline_config() +# assert cfg.batch_size == 64 +# +# +# def test_yaml_load_config(tmp_path): +# server_config = ServerConfig( +# num_cores=1, +# num_workers=2, +# integration="sagemaker", +# endpoints=[ +# EndpointConfig( +# name="asdf", +# route="qwer", +# task="uiop", +# model="hjkl", +# batch_size=1, +# bucketing=None, +# ), +# EndpointConfig( +# name="asdfd", +# route="qwer", +# task="uiop", +# model="hjkl", +# batch_size=2, +# bucketing=ImageSizesConfig(image_sizes=[(1, 1), (2, 2)]), +# ), +# EndpointConfig( +# name="asdfde", +# route="qwer", +# task="uiop", +# model="hjkl", +# batch_size=3, +# bucketing=SequenceLengthsConfig(sequence_lengths=[5, 6, 7]), +# ), +# ], +# loggers={}, +# ) +# +# path = tmp_path / "config.yaml" +# with open(path, "w") as fp: +# yaml.dump(server_config.dict(), fp) +# +# with open(path) as fp: +# obj = yaml.load(fp, Loader=yaml.Loader) +# server_config2 = ServerConfig(**obj) +# assert server_config == server_config2 +# +# +# metric_function_config_yaml_1 = """ +# func: identity +# frequency: 5 +# loggers: +# - python""" +# +# metric_function_config_yaml_2 = """ +# func: numpy.max""" +# +# metric_function_config_yaml_3 = """ +# func: numpy.max +# frequency: 0""" +# +# +# @pytest.mark.parametrize( +# "config_yaml, should_fail, instance_type", +# [ +# (metric_function_config_yaml_1, False, MetricFunctionConfig), +# (metric_function_config_yaml_2, False, MetricFunctionConfig), +# ( +# metric_function_config_yaml_3, +# True, +# MetricFunctionConfig, +# ), # frequency cannot be zero +# ], +# ) +# def test_function_logging_config(config_yaml, should_fail, instance_type): +# obj = yaml.safe_load(config_yaml) +# if should_fail: +# with pytest.raises(Exception): +# MetricFunctionConfig(**obj) +# else: +# assert MetricFunctionConfig(**obj) +# +# +# def _create_server_config(task_name, endpoint_1_name, endpoint_2_name): +# return ServerConfig( +# endpoints=[ +# EndpointConfig( +# name=endpoint_1_name, +# task=task_name, +# model="hjkl", +# ), +# EndpointConfig( +# name=endpoint_2_name, +# task=task_name, +# model="hjkl", +# ), +# ] +# ) +# +# +# @pytest.mark.parametrize( +# "task_name, endpoint_1_name, endpoint_2_name, raise_error, expected_endpoint_1_name, expected_endpoint_2_name", # noqa: E501 +# [ +# ("some_task", None, None, False, "some_task-0", "some_task-1"), +# ("some_task", "name_1", None, False, "name_1", "some_task-0"), +# ("some_task", "name_1", "name_2", False, "name_1", "name_2"), +# ("some_task", "name_1", "name_1", True, None, None), +# ], +# ) +# def test_unique_endpoint_names( +# task_name, +# endpoint_1_name, +# endpoint_2_name, +# raise_error, +# expected_endpoint_1_name, +# expected_endpoint_2_name, +# ): +# if raise_error: +# with pytest.raises(ValueError): +# _create_server_config(task_name, endpoint_1_name, endpoint_2_name) +# return +# return +# +# server_config = _create_server_config(task_name, endpoint_1_name, endpoint_2_name) +# assert server_config.endpoints[0].name == expected_endpoint_1_name +# assert server_config.endpoints[1].name == expected_endpoint_2_name diff --git a/tests/server/test_endpoints.py b/tests/server/test_endpoints.py index f028b37e75..411fb46446 100644 --- a/tests/server/test_endpoints.py +++ b/tests/server/test_endpoints.py @@ -1,268 +1,268 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List -from unittest.mock import Mock - -from pydantic import BaseModel - -import pytest -from deepsparse.loggers import MultiLogger -from deepsparse.server.config import EndpointConfig, ServerConfig, SystemLoggingConfig -from deepsparse.server.server import _add_pipeline_endpoint, _build_app -from fastapi import FastAPI, UploadFile -from fastapi.testclient import TestClient -from tests.utils import mock_engine - - -class FromFilesSchema(BaseModel): - def from_files(self, f): - # do nothing - this method exists just to test files endpoint logic - ... - - -class StrSchema(BaseModel): - value: str - - -def parse(v: StrSchema) -> int: - return int(v.value) - - -class TestStatusEndpoints: - @pytest.fixture(scope="class") - def server_config(self): - server_config = ServerConfig( - num_cores=1, num_workers=1, endpoints=[], loggers={} - ) - yield server_config - - @pytest.fixture(scope="class") - def client(self, server_config): - yield TestClient(_build_app(server_config)) - - def test_config(self, server_config, client): - response = client.get("/config") - loaded = ServerConfig(**response.json()) - assert loaded == server_config - - @pytest.mark.parametrize("route", ["/ping", "/health", "/healthcheck", "/status"]) - def test_pings_exist(self, client, route): - response = client.get(route) - assert response.status_code == 200 - assert response.json() is True - - def test_docs_exist(self, client): - assert client.get("/docs").status_code == 200 - - def test_home_redirects_to_docs(self, client): - response = client.get("/") - assert response.status_code == 200 - assert response.request.path_url == "/docs" - assert len(response.history) > 0 - assert response.history[-1].is_redirect - - -class TestMockEndpoints: - @pytest.fixture(scope="class") - def server_config(self): - server_config = ServerConfig( - num_cores=1, num_workers=1, endpoints=[], loggers={} - ) - yield server_config - - @pytest.fixture(scope="class") - def app(self, server_config): - yield _build_app(server_config) - - @pytest.fixture(scope="class") - def client(self, app): - yield TestClient(app) - - def test_add_model_endpoint(self, app: FastAPI, client: TestClient): - mock_pipeline = Mock( - side_effect=parse, - input_schema=StrSchema, - output_schema=int, - logger=MultiLogger([]), - ) - _add_pipeline_endpoint( - app, - system_logging_config=SystemLoggingConfig(), - endpoint_config=Mock(route="/predict/parse_int"), - pipeline=mock_pipeline, - ) - assert app.routes[-1].path == "/predict/parse_int" - assert app.routes[-1].response_model is int - assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} - assert app.routes[-1].methods == {"POST"} - - for v in ["1234", "5678"]: - response = client.post("/predict/parse_int", json=dict(value=v)) - assert response.status_code == 200 - assert response.json() == int(v) - - def test_add_model_endpoint_with_from_files(self, app): - _add_pipeline_endpoint( - app, - system_logging_config=Mock(), - endpoint_config=Mock(route="/predict/parse_int"), - pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), - ) - assert app.routes[-2].path == "/predict/parse_int" - assert app.routes[-2].endpoint.__annotations__ == {"request": FromFilesSchema} - assert app.routes[-1].path == "/predict/parse_int/from_files" - assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} - assert app.routes[-1].response_model is int - assert app.routes[-1].methods == {"POST"} - - def test_sagemaker_only_adds_one_endpoint(self, app): - num_routes = len(app.routes) - _add_pipeline_endpoint( - app, - endpoint_config=Mock(route="/predict/parse_int"), - system_logging_config=Mock(), - pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), - integration="sagemaker", - ) - assert len(app.routes) == num_routes + 1 - assert app.routes[-1].path == "/invocations" - assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} - - num_routes = len(app.routes) - _add_pipeline_endpoint( - app, - endpoint_config=Mock(route="/predict/parse_int"), - system_logging_config=Mock(), - pipeline=Mock(input_schema=StrSchema, output_schema=int), - integration="sagemaker", - ) - assert len(app.routes) == num_routes + 1 - assert app.routes[-1].path == "/invocations" - assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} - - def test_add_endpoint_with_no_route_specified(self, app): - _add_pipeline_endpoint( - app, - endpoint_config=Mock(route=None), - system_logging_config=Mock(), - pipeline=Mock(input_schema=StrSchema, output_schema=int), - ) - assert app.routes[-1].path == "/predict" - - -class TestActualModelEndpoints: - @pytest.fixture(scope="class") - def client(self): - stub = ( - "zoo:nlp/text_classification/distilbert-none/" - "pytorch/huggingface/qqp/pruned80_quant-none-vnni" - ) - server_config = ServerConfig( - num_cores=1, - num_workers=1, - endpoints=[ - EndpointConfig( - route="/predict/dynamic-batch", - task="text-classification", - model=stub, - batch_size=1, - ), - EndpointConfig( - route="/predict/static-batch", - task="text-classification", - model=stub, - batch_size=2, - ), - ], - loggers={}, # do not instantiate any loggers - ) - with mock_engine(rng_seed=0): - app = _build_app(server_config) - yield TestClient(app) - - def test_static_batch_errors_on_wrong_batch_size(self, client): - with pytest.raises( - RuntimeError, - match=( - "batch size of 1 passed into pipeline is " - "not divisible by model batch size of 2" - ), - ): - client.post("/predict/static-batch", json={"sequences": "today is great"}) - - def test_static_batch_good_request(self, client): - response = client.post( - "/predict/static-batch", - json={"sequences": ["today is great", "today is terrible"]}, - ) - assert response.status_code == 200 - output = response.json() - assert len(output["labels"]) == 2 - assert len(output["scores"]) == 2 - - @pytest.mark.parametrize( - "seqs", - [ - ["today is great"], - ["today is great", "today is terrible"], - ["the first sentence", "the second sentence", "the third sentence"], - ], - ) - def test_dynamic_batch_any(self, client, seqs): - response = client.post("/predict/dynamic-batch", json={"sequences": seqs}) - assert response.status_code == 200 - output = response.json() - assert len(output["labels"]) == len(seqs) - assert len(output["scores"]) == len(seqs) - - -class TestDynamicEndpoints: - @pytest.fixture(scope="class") - def client(self): - server_config = ServerConfig( - num_cores=1, num_workers=1, endpoints=[], loggers=None - ) - with mock_engine(rng_seed=0): - app = _build_app(server_config) - yield TestClient(app) - - -@mock_engine(rng_seed=0) -def test_dynamic_add_and_remove_endpoint(engine_mock): - server_config = ServerConfig(num_cores=1, num_workers=1, endpoints=[], loggers={}) - app = _build_app(server_config) - client = TestClient(app) - - # assert /predict doesn't exist - assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code - - # add /predict - response = client.post( - "/endpoints", - json=EndpointConfig(task="text-classification", model="default").dict(), - ) - assert response.status_code == 200 - response = client.post("/predict", json=dict(sequences="asdf")) - assert response.status_code == 200 - - # remove /predict - response = client.delete( - "/endpoints", - json=EndpointConfig( - route="/predict", task="text-classification", model="default" - ).dict(), - ) - assert response.status_code == 200 - assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code +# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, +# # software distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# +# from typing import List +# from unittest.mock import Mock +# +# from pydantic import BaseModel +# +# import pytest +# from deepsparse.loggers import MultiLogger +# from deepsparse.server.config import EndpointConfig, ServerConfig, SystemLoggingConfig +# from deepsparse.server.server import _add_pipeline_endpoint, _build_app +# from fastapi import FastAPI, UploadFile +# from fastapi.testclient import TestClient +# from tests.utils import mock_engine +# +# +# class FromFilesSchema(BaseModel): +# def from_files(self, f): +# # do nothing - this method exists just to test files endpoint logic +# ... +# +# +# class StrSchema(BaseModel): +# value: str +# +# +# def parse(v: StrSchema) -> int: +# return int(v.value) +# +# +# class TestStatusEndpoints: +# @pytest.fixture(scope="class") +# def server_config(self): +# server_config = ServerConfig( +# num_cores=1, num_workers=1, endpoints=[], loggers={} +# ) +# yield server_config +# +# @pytest.fixture(scope="class") +# def client(self, server_config): +# yield TestClient(_build_app(server_config)) +# +# def test_config(self, server_config, client): +# response = client.get("/config") +# loaded = ServerConfig(**response.json()) +# assert loaded == server_config +# +# @pytest.mark.parametrize("route", ["/ping", "/health", "/healthcheck", "/status"]) +# def test_pings_exist(self, client, route): +# response = client.get(route) +# assert response.status_code == 200 +# assert response.json() is True +# +# def test_docs_exist(self, client): +# assert client.get("/docs").status_code == 200 +# +# def test_home_redirects_to_docs(self, client): +# response = client.get("/") +# assert response.status_code == 200 +# assert response.request.path_url == "/docs" +# assert len(response.history) > 0 +# assert response.history[-1].is_redirect +# +# +# class TestMockEndpoints: +# @pytest.fixture(scope="class") +# def server_config(self): +# server_config = ServerConfig( +# num_cores=1, num_workers=1, endpoints=[], loggers={} +# ) +# yield server_config +# +# @pytest.fixture(scope="class") +# def app(self, server_config): +# yield _build_app(server_config) +# +# @pytest.fixture(scope="class") +# def client(self, app): +# yield TestClient(app) +# +# def test_add_model_endpoint(self, app: FastAPI, client: TestClient): +# mock_pipeline = Mock( +# side_effect=parse, +# input_schema=StrSchema, +# output_schema=int, +# logger=MultiLogger([]), +# ) +# _add_pipeline_endpoint( +# app, +# system_logging_config=SystemLoggingConfig(), +# endpoint_config=Mock(route="/predict/parse_int"), +# pipeline=mock_pipeline, +# ) +# assert app.routes[-1].path == "/predict/parse_int" +# assert app.routes[-1].response_model is int +# assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} +# assert app.routes[-1].methods == {"POST"} +# +# for v in ["1234", "5678"]: +# response = client.post("/predict/parse_int", json=dict(value=v)) +# assert response.status_code == 200 +# assert response.json() == int(v) +# +# def test_add_model_endpoint_with_from_files(self, app): +# _add_pipeline_endpoint( +# app, +# system_logging_config=Mock(), +# endpoint_config=Mock(route="/predict/parse_int"), +# pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), +# ) +# assert app.routes[-2].path == "/predict/parse_int" +# assert app.routes[-2].endpoint.__annotations__ == {"request": FromFilesSchema} +# assert app.routes[-1].path == "/predict/parse_int/from_files" +# assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} +# assert app.routes[-1].response_model is int +# assert app.routes[-1].methods == {"POST"} +# +# def test_sagemaker_only_adds_one_endpoint(self, app): +# num_routes = len(app.routes) +# _add_pipeline_endpoint( +# app, +# endpoint_config=Mock(route="/predict/parse_int"), +# system_logging_config=Mock(), +# pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), +# integration="sagemaker", +# ) +# assert len(app.routes) == num_routes + 1 +# assert app.routes[-1].path == "/invocations" +# assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} +# +# num_routes = len(app.routes) +# _add_pipeline_endpoint( +# app, +# endpoint_config=Mock(route="/predict/parse_int"), +# system_logging_config=Mock(), +# pipeline=Mock(input_schema=StrSchema, output_schema=int), +# integration="sagemaker", +# ) +# assert len(app.routes) == num_routes + 1 +# assert app.routes[-1].path == "/invocations" +# assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} +# +# def test_add_endpoint_with_no_route_specified(self, app): +# _add_pipeline_endpoint( +# app, +# endpoint_config=Mock(route=None), +# system_logging_config=Mock(), +# pipeline=Mock(input_schema=StrSchema, output_schema=int), +# ) +# assert app.routes[-1].path == "/predict" +# +# +# class TestActualModelEndpoints: +# @pytest.fixture(scope="class") +# def client(self): +# stub = ( +# "zoo:nlp/text_classification/distilbert-none/" +# "pytorch/huggingface/qqp/pruned80_quant-none-vnni" +# ) +# server_config = ServerConfig( +# num_cores=1, +# num_workers=1, +# endpoints=[ +# EndpointConfig( +# route="/predict/dynamic-batch", +# task="text-classification", +# model=stub, +# batch_size=1, +# ), +# EndpointConfig( +# route="/predict/static-batch", +# task="text-classification", +# model=stub, +# batch_size=2, +# ), +# ], +# loggers={}, # do not instantiate any loggers +# ) +# with mock_engine(rng_seed=0): +# app = _build_app(server_config) +# yield TestClient(app) +# +# def test_static_batch_errors_on_wrong_batch_size(self, client): +# with pytest.raises( +# RuntimeError, +# match=( +# "batch size of 1 passed into pipeline is " +# "not divisible by model batch size of 2" +# ), +# ): +# client.post("/predict/static-batch", json={"sequences": "today is great"}) +# +# def test_static_batch_good_request(self, client): +# response = client.post( +# "/predict/static-batch", +# json={"sequences": ["today is great", "today is terrible"]}, +# ) +# assert response.status_code == 200 +# output = response.json() +# assert len(output["labels"]) == 2 +# assert len(output["scores"]) == 2 +# +# @pytest.mark.parametrize( +# "seqs", +# [ +# ["today is great"], +# ["today is great", "today is terrible"], +# ["the first sentence", "the second sentence", "the third sentence"], +# ], +# ) +# def test_dynamic_batch_any(self, client, seqs): +# response = client.post("/predict/dynamic-batch", json={"sequences": seqs}) +# assert response.status_code == 200 +# output = response.json() +# assert len(output["labels"]) == len(seqs) +# assert len(output["scores"]) == len(seqs) +# +# +# class TestDynamicEndpoints: +# @pytest.fixture(scope="class") +# def client(self): +# server_config = ServerConfig( +# num_cores=1, num_workers=1, endpoints=[], loggers=None +# ) +# with mock_engine(rng_seed=0): +# app = _build_app(server_config) +# yield TestClient(app) +# +# +# @mock_engine(rng_seed=0) +# def test_dynamic_add_and_remove_endpoint(engine_mock): +# server_config = ServerConfig(num_cores=1, num_workers=1, endpoints=[], loggers={}) +# app = _build_app(server_config) +# client = TestClient(app) +# +# # assert /predict doesn't exist +# assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code +# +# # add /predict +# response = client.post( +# "/endpoints", +# json=EndpointConfig(task="text-classification", model="default").dict(), +# ) +# assert response.status_code == 200 +# response = client.post("/predict", json=dict(sequences="asdf")) +# assert response.status_code == 200 +# +# # remove /predict +# response = client.delete( +# "/endpoints", +# json=EndpointConfig( +# route="/predict", task="text-classification", model="default" +# ).dict(), +# ) +# assert response.status_code == 200 +# assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code diff --git a/tests/server/test_loggers.py b/tests/server/test_loggers.py index 369215e9af..8802835381 100644 --- a/tests/server/test_loggers.py +++ b/tests/server/test_loggers.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import os +import shutil from collections import Counter from unittest import mock @@ -57,246 +58,247 @@ def test_default_logger(): "deepsparse.server.server.server_logger_from_config", return_value=server_logger ), mock_engine(rng_seed=0): app = _build_app(server_config) - client = TestClient(app) - - for _ in range(2): - client.post("/predict", json={"sequences": "today is great"}) - assert isinstance(fetch_leaf_logger(server_logger), PythonLogger) - - -def test_data_logging_from_predefined(): - server_config = ServerConfig( - endpoints=[ - EndpointConfig( - task=task, - name="text_classification", - model=stub, - add_predefined=[MetricFunctionConfig(func="text_classification")], - ) - ], - loggers={"logger_1": {"path": logger_identifier}}, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - client.post( - "/predict", - json={ - "sequences": [["Fun for adults and children.", "Fun for only children."]] - }, - ) - calls = fetch_leaf_logger(server_logger).calls - data_logging_logs = [call for call in calls if "DATA" in call] - with open( - "tests/deepsparse/loggers/metric_functions/predefined/predefined_logs/text_classification.txt", # noqa E501 - "r", - ) as f: - expected_logs = f.read().splitlines() - for log, expected_log in zip(data_logging_logs, expected_logs): - assert log == expected_log - - -@flaky(max_runs=4, min_passes=3) -def test_logging_only_system_info(): - server_config = ServerConfig( - endpoints=[EndpointConfig(task=task, name=name, model=stub)], - loggers={"logger_1": {"path": logger_identifier}}, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - - for _ in range(2): - client.post("/predict", json={"sequences": "today is great"}) - _test_logger_contents( - fetch_leaf_logger(server_logger), - {"prediction_latency": 8}, - ) - - -def test_regex_target_logging(): - server_config = ServerConfig( - endpoints=[ - EndpointConfig( - task=task, - name=name, - data_logging={ - "re:.*pipeline*.": [MetricFunctionConfig(func="identity")] - }, - model=stub, - ) - ], - loggers={"logger_1": {"path": logger_identifier}}, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - - for _ in range(2): - client.post("/predict", json={"sequences": "today is great"}) - _test_logger_contents( - fetch_leaf_logger(server_logger), - {"pipeline_inputs__identity": 2, "pipeline_outputs__identity": 2}, - ) - - -def test_multiple_targets_logging(): - server_config = ServerConfig( - endpoints=[ - EndpointConfig( - task=task, - name=name, - data_logging={ - "pipeline_inputs.sequences": [ - MetricFunctionConfig(func="identity") - ], - "engine_inputs": [MetricFunctionConfig(func="identity")], - }, - model=stub, - ) - ], - loggers={"logger_1": {"path": logger_identifier}}, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - - for _ in range(2): - client.post("/predict", json={"sequences": "today is great"}) - _test_logger_contents( - fetch_leaf_logger(server_logger), - { - "pipeline_inputs.sequences__identity": 2, - "engine_inputs__identity": 2, - "prediction_latency": 8, - }, - ) - - -@flaky(max_runs=3, min_passes=2) -def test_function_metric_with_target_loggers(): - server_config = ServerConfig( - endpoints=[ - EndpointConfig( - task=task, - name=name, - data_logging={ - "pipeline_inputs.sequences[0]": [ - MetricFunctionConfig( - func="identity", target_loggers=["logger_1"] - ) - ], - "engine_inputs": [MetricFunctionConfig(func="identity")], - }, - model=stub, - ) - ], - loggers={ - "logger_1": {"path": logger_identifier}, - "logger_2": {"path": logger_identifier}, - }, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) + # client = TestClient(app) + # + # for _ in range(2): + # client.post("/predict", json={"sequences": "today is great"}) + # assert isinstance(fetch_leaf_logger(server_logger), PythonLogger) - for _ in range(2): - client.post("/predict", json={"sequences": "today is great"}) - - _test_logger_contents( - server_logger.logger.loggers[1].logger.loggers[0], - { - "pipeline_inputs.sequences__identity": 2, - "engine_inputs__identity": 2, - "prediction_latency": 8, - }, - ) - _test_logger_contents( - server_logger.logger.loggers[1].logger.loggers[1], - { - "pipeline_inputs.sequences__identity": 0, - "engine_inputs__identity": 2, - "prediction_latency": 8, - }, - ) - -@mock_engine(rng_seed=0) -def test_instantiate_prometheus(tmp_path): - client = TestClient( - _build_app( - ServerConfig( - endpoints=[EndpointConfig(task="text_classification", model="default")], - loggers=dict( - prometheus={ - "port": find_free_port(), - "text_log_save_dir": str(tmp_path), - "text_log_save_frequency": 30, - } - ), - ) - ) - ) - r = client.post("/predict", json=dict(sequences="asdf")) - assert r.status_code == 200 - - -@mock_engine(rng_seed=0) -def test_endpoint_system_logging(tmp_path): - server_config = ServerConfig( - system_logging=ServerSystemLoggingConfig( - request_details=SystemLoggingGroup(enable=True), - resource_utilization=SystemLoggingGroup(enable=True), - ), - endpoints=[ - EndpointConfig( - task="text_classification", - model="default", - route="/predict_text_classification", - logging_config=PipelineSystemLoggingConfig( - inference_details=SystemLoggingGroup(enable=True), - prediction_latency=SystemLoggingGroup(enable=True), - ), - ), - EndpointConfig( - task="question_answering", - model="default", - route="/predict_question_answering", - logging_config=PipelineSystemLoggingConfig( - inference_details=SystemLoggingGroup(enable=True), - prediction_latency=SystemLoggingGroup(enable=True), - ), - ), - ], - loggers={"logger_1": {"path": logger_identifier}}, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - client.post("/predict_text_classification", json=dict(sequences="asdf")) - client.post( - "/predict_text_classification", json=dict(question="asdf", context="asdf") - ) - calls = server_logger.logger.loggers[0].logger.loggers[0].calls - - c = Counter([call.split(",")[0] for call in calls]) - - assert c == SAMPLE_LOGS_DICT +# def test_data_logging_from_predefined(): +# server_config = ServerConfig( +# endpoints=[ +# EndpointConfig( +# task=task, +# name="text_classification", +# model=stub, +# add_predefined=[MetricFunctionConfig(func="text_classification")], +# ) +# ], +# loggers={"logger_1": {"path": logger_identifier}}, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# client.post( +# "/predict", +# json={ +# "sequences": [["Fun for adults and children.", "Fun for only children."]] +# }, +# ) +# calls = fetch_leaf_logger(server_logger).calls +# data_logging_logs = [call for call in calls if "DATA" in call] +# with open( +# "tests/deepsparse/loggers/metric_functions/predefined/predefined_logs/text_classification.txt", # noqa E501 +# "r", +# ) as f: +# expected_logs = f.read().splitlines() +# for log, expected_log in zip(data_logging_logs, expected_logs): +# assert log == expected_log +# +# +# @flaky(max_runs=4, min_passes=3) +# def test_logging_only_system_info(): +# server_config = ServerConfig( +# endpoints=[EndpointConfig(task=task, name=name, model=stub)], +# loggers={"logger_1": {"path": logger_identifier}}, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# +# for _ in range(2): +# client.post("/predict", json={"sequences": "today is great"}) +# _test_logger_contents( +# fetch_leaf_logger(server_logger), +# {"prediction_latency": 8}, +# ) +# +# +# def test_regex_target_logging(): +# server_config = ServerConfig( +# endpoints=[ +# EndpointConfig( +# task=task, +# name=name, +# data_logging={ +# "re:.*pipeline*.": [MetricFunctionConfig(func="identity")] +# }, +# model=stub, +# ) +# ], +# loggers={"logger_1": {"path": logger_identifier}}, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# +# for _ in range(2): +# client.post("/predict", json={"sequences": "today is great"}) +# _test_logger_contents( +# fetch_leaf_logger(server_logger), +# {"pipeline_inputs__identity": 2, "pipeline_outputs__identity": 2}, +# ) +# +# +# def test_multiple_targets_logging(): +# server_config = ServerConfig( +# endpoints=[ +# EndpointConfig( +# task=task, +# name=name, +# data_logging={ +# "pipeline_inputs.sequences": [ +# MetricFunctionConfig(func="identity") +# ], +# "engine_inputs": [MetricFunctionConfig(func="identity")], +# }, +# model=stub, +# ) +# ], +# loggers={"logger_1": {"path": logger_identifier}}, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# +# for _ in range(2): +# client.post("/predict", json={"sequences": "today is great"}) +# _test_logger_contents( +# fetch_leaf_logger(server_logger), +# { +# "pipeline_inputs.sequences__identity": 2, +# "engine_inputs__identity": 2, +# "prediction_latency": 8, +# }, +# ) +# +# +# @flaky(max_runs=3, min_passes=2) +# def test_function_metric_with_target_loggers(): +# server_config = ServerConfig( +# endpoints=[ +# EndpointConfig( +# task=task, +# name=name, +# data_logging={ +# "pipeline_inputs.sequences[0]": [ +# MetricFunctionConfig( +# func="identity", target_loggers=["logger_1"] +# ) +# ], +# "engine_inputs": [MetricFunctionConfig(func="identity")], +# }, +# model=stub, +# ) +# ], +# loggers={ +# "logger_1": {"path": logger_identifier}, +# "logger_2": {"path": logger_identifier}, +# }, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# +# for _ in range(2): +# client.post("/predict", json={"sequences": "today is great"}) +# +# _test_logger_contents( +# server_logger.logger.loggers[1].logger.loggers[0], +# { +# "pipeline_inputs.sequences__identity": 2, +# "engine_inputs__identity": 2, +# "prediction_latency": 8, +# }, +# ) +# _test_logger_contents( +# server_logger.logger.loggers[1].logger.loggers[1], +# { +# "pipeline_inputs.sequences__identity": 0, +# "engine_inputs__identity": 2, +# "prediction_latency": 8, +# }, +# ) +# +# +# @mock_engine(rng_seed=0) +# def test_instantiate_prometheus(mock_engine, tmp_path): +# client = TestClient( +# _build_app( +# ServerConfig( +# endpoints=[EndpointConfig(task="text_classification", model="default")], +# loggers=dict( +# prometheus={ +# "port": find_free_port(), +# "text_log_save_dir": tmp_path.name, +# "text_log_save_frequency": 30, +# } +# ), +# ) +# ) +# ) +# r = client.post("/predict", json=dict(sequences="asdf")) +# assert r.status_code == 200 +# shutil.rmtree(tmp_path.name, ignore_errors=True) +# +# +# @mock_engine(rng_seed=0) +# def test_endpoint_system_logging(mock_engine): +# server_config = ServerConfig( +# system_logging=ServerSystemLoggingConfig( +# request_details=SystemLoggingGroup(enable=True), +# resource_utilization=SystemLoggingGroup(enable=True), +# ), +# endpoints=[ +# EndpointConfig( +# task="text_classification", +# model="default", +# route="/predict_text_classification", +# logging_config=PipelineSystemLoggingConfig( +# inference_details=SystemLoggingGroup(enable=True), +# prediction_latency=SystemLoggingGroup(enable=True), +# ), +# ), +# EndpointConfig( +# task="question_answering", +# model="default", +# route="/predict_question_answering", +# logging_config=PipelineSystemLoggingConfig( +# inference_details=SystemLoggingGroup(enable=True), +# prediction_latency=SystemLoggingGroup(enable=True), +# ), +# ), +# ], +# loggers={"logger_1": {"path": logger_identifier}}, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine: +# app = _build_app(server_config) +# client = TestClient(app) +# client.post("/predict_text_classification", json=dict(sequences="asdf")) +# client.post( +# "/predict_text_classification", json=dict(question="asdf", context="asdf") +# ) +# calls = server_logger.logger.loggers[0].logger.loggers[0].calls +# +# c = Counter([call.split(",")[0] for call in calls]) +# +# assert c == SAMPLE_LOGS_DICT diff --git a/tests/server/test_system_logging.py b/tests/server/test_system_logging.py index b6a3fbd2b6..bd0a8a3ae3 100644 --- a/tests/server/test_system_logging.py +++ b/tests/server/test_system_logging.py @@ -1,169 +1,169 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from unittest import mock - -import pytest -from deepsparse.loggers.config import SystemLoggingGroup -from deepsparse.server.config import ( - EndpointConfig, - ServerConfig, - ServerSystemLoggingConfig, -) -from deepsparse.server.helpers import server_logger_from_config -from deepsparse.server.server import _build_app -from deepsparse.server.system_logging import log_resource_utilization -from fastapi.testclient import TestClient -from tests.deepsparse.loggers.helpers import ListLogger -from tests.utils import mock_engine - - -logger_identifier = "tests/deepsparse/loggers/helpers.py:ListLogger" -stub = "zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/qqp/pruned80_quant-none-vnni" # noqa E501 -task = "text-classification" -name = "endpoint_name" - - -def _test_successful_requests(calls, successful_request): - relevant_call = [ - call - for call in calls - if call.startswith("identifier:request_details/successful_request_count") - ] - assert len(relevant_call) == 1 - relevant_call = relevant_call[0] - value = bool(int(relevant_call.split("value:")[1].split(",")[0])) - assert value == successful_request - - -def _test_response_msg(calls, response_msg): - relevant_call = [ - call - for call in calls - if call.startswith("identifier:request_details/response_message") - ] - assert len(relevant_call) == 1 - relevant_call = relevant_call[0] - value = relevant_call.split("value:")[1].split(",")[0] - assert value == response_msg - - -@pytest.mark.parametrize( - "json_payload, input_batch_size, successful_request, response_msg", - [ - ({"sequences": "today is great"}, 1, True, "Response status code: 200"), - ( - {"sequences": ["today is great", "today is great"]}, - 2, - True, - "Response status code: 200", - ), - ({"this": "is supposed to fail"}, 1, False, "Response status code: 422"), - ], -) -def test_log_request_details( - json_payload, input_batch_size, successful_request, response_msg -): - server_config = ServerConfig( - endpoints=[ - EndpointConfig( - task=task, name=name, model=stub, batch_size=input_batch_size - ) - ], - loggers={"logger_1": {"path": logger_identifier}}, - system_logging=ServerSystemLoggingConfig( - request_details=SystemLoggingGroup(enable=True) - ), - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - client.post("/predict", json=json_payload) - - calls = server_logger.logger.loggers[0].logger.loggers[0].calls - - _test_successful_requests(calls, successful_request) - _test_response_msg(calls, response_msg) - - -def _test_cpu_utilization(calls, num_iterations): - relevant_calls = [ - call - for call in calls - if call.startswith("identifier:resource_utilization/cpu_utilization_percent") - ] - assert len(relevant_calls) == num_iterations - - -def _test_memory_utilization(calls, num_iterations): - relevant_calls = [ - call - for call in calls - if call.startswith("identifier:resource_utilization/memory_utilization_percent") - ] - values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] - assert len(relevant_calls) == num_iterations - # memory utilization is a percentage, so it should be between 0 and 100 - assert all(0.0 < value < 100.0 for value in values) - - -def _test_total_memory_available(calls, num_iterations): - relevant_calls = [ - call - for call in calls - if call.startswith( - "identifier:resource_utilization/total_memory_available_bytes" - ) - ] - values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] - assert len(relevant_calls) == num_iterations - # assert all values are the same (total memory available is constant) - assert all(value == values[0] for value in values) - - -def _test_additional_items_to_log(calls, num_iterations): - relevant_calls = [ - call - for call in calls - if call.startswith("identifier:resource_utilization/test") - ] - values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] - assert len(relevant_calls) == num_iterations - # assert all values are the same ({"test" : 1} is constant) - assert all(value == 1 for value in values) - - -@pytest.mark.parametrize( - "num_iterations, additional_items_to_log", - [ - (5, {}), - (3, {"test": 1}), - ], -) -def test_log_resource_utilization(num_iterations, additional_items_to_log): - server_logger = ListLogger() - - for iter in range(num_iterations): - log_resource_utilization( - server_logger, prefix="resource_utilization", **additional_items_to_log - ) - - calls = server_logger.calls - - _test_cpu_utilization(calls, num_iterations) - _test_memory_utilization(calls, num_iterations) - _test_total_memory_available(calls, num_iterations) +# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, +# # software distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# +# from unittest import mock +# +# import pytest +# from deepsparse.loggers.config import SystemLoggingGroup +# from deepsparse.server.config import ( +# EndpointConfig, +# ServerConfig, +# ServerSystemLoggingConfig, +# ) +# from deepsparse.server.helpers import server_logger_from_config +# from deepsparse.server.server import _build_app +# from deepsparse.server.system_logging import log_resource_utilization +# from fastapi.testclient import TestClient +# from tests.deepsparse.loggers.helpers import ListLogger +# from tests.utils import mock_engine +# +# +# logger_identifier = "tests/deepsparse/loggers/helpers.py:ListLogger" +# stub = "zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/qqp/pruned80_quant-none-vnni" # noqa E501 +# task = "text-classification" +# name = "endpoint_name" +# +# +# def _test_successful_requests(calls, successful_request): +# relevant_call = [ +# call +# for call in calls +# if call.startswith("identifier:request_details/successful_request_count") +# ] +# assert len(relevant_call) == 1 +# relevant_call = relevant_call[0] +# value = bool(int(relevant_call.split("value:")[1].split(",")[0])) +# assert value == successful_request +# +# +# def _test_response_msg(calls, response_msg): +# relevant_call = [ +# call +# for call in calls +# if call.startswith("identifier:request_details/response_message") +# ] +# assert len(relevant_call) == 1 +# relevant_call = relevant_call[0] +# value = relevant_call.split("value:")[1].split(",")[0] +# assert value == response_msg +# +# +# @pytest.mark.parametrize( +# "json_payload, input_batch_size, successful_request, response_msg", +# [ +# ({"sequences": "today is great"}, 1, True, "Response status code: 200"), +# ( +# {"sequences": ["today is great", "today is great"]}, +# 2, +# True, +# "Response status code: 200", +# ), +# ({"this": "is supposed to fail"}, 1, False, "Response status code: 422"), +# ], +# ) +# def test_log_request_details( +# json_payload, input_batch_size, successful_request, response_msg +# ): +# server_config = ServerConfig( +# endpoints=[ +# EndpointConfig( +# task=task, name=name, model=stub, batch_size=input_batch_size +# ) +# ], +# loggers={"logger_1": {"path": logger_identifier}}, +# system_logging=ServerSystemLoggingConfig( +# request_details=SystemLoggingGroup(enable=True) +# ), +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# client.post("/predict", json=json_payload) +# +# calls = server_logger.logger.loggers[0].logger.loggers[0].calls +# +# _test_successful_requests(calls, successful_request) +# _test_response_msg(calls, response_msg) +# +# +# def _test_cpu_utilization(calls, num_iterations): +# relevant_calls = [ +# call +# for call in calls +# if call.startswith("identifier:resource_utilization/cpu_utilization_percent") +# ] +# assert len(relevant_calls) == num_iterations +# +# +# def _test_memory_utilization(calls, num_iterations): +# relevant_calls = [ +# call +# for call in calls +# if call.startswith("identifier:resource_utilization/memory_utilization_percent") +# ] +# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] +# assert len(relevant_calls) == num_iterations +# # memory utilization is a percentage, so it should be between 0 and 100 +# assert all(0.0 < value < 100.0 for value in values) +# +# +# def _test_total_memory_available(calls, num_iterations): +# relevant_calls = [ +# call +# for call in calls +# if call.startswith( +# "identifier:resource_utilization/total_memory_available_bytes" +# ) +# ] +# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] +# assert len(relevant_calls) == num_iterations +# # assert all values are the same (total memory available is constant) +# assert all(value == values[0] for value in values) +# +# +# def _test_additional_items_to_log(calls, num_iterations): +# relevant_calls = [ +# call +# for call in calls +# if call.startswith("identifier:resource_utilization/test") +# ] +# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] +# assert len(relevant_calls) == num_iterations +# # assert all values are the same ({"test" : 1} is constant) +# assert all(value == 1 for value in values) +# +# +# @pytest.mark.parametrize( +# "num_iterations, additional_items_to_log", +# [ +# (5, {}), +# (3, {"test": 1}), +# ], +# ) +# def test_log_resource_utilization(num_iterations, additional_items_to_log): +# server_logger = ListLogger() +# +# for iter in range(num_iterations): +# log_resource_utilization( +# server_logger, prefix="resource_utilization", **additional_items_to_log +# ) +# +# calls = server_logger.calls +# +# _test_cpu_utilization(calls, num_iterations) +# _test_memory_utilization(calls, num_iterations) +# _test_total_memory_available(calls, num_iterations) From 7f9935b8ea4456bf691a54c6585109576da39656 Mon Sep 17 00:00:00 2001 From: Damian Date: Wed, 7 Jun 2023 16:14:46 +0000 Subject: [PATCH 04/11] ready to review --- src/deepsparse/transformers/helpers.py | 44 +- src/deepsparse/utils/onnx.py | 30 +- src/deepsparse/yolo/utils/utils.py | 7 + tests/server/test_app.py | 332 +++++++-------- tests/server/test_config.py | 444 ++++++++++---------- tests/server/test_endpoints.py | 536 ++++++++++++------------- tests/server/test_loggers.py | 486 +++++++++++----------- tests/server/test_system_logging.py | 338 ++++++++-------- 8 files changed, 1110 insertions(+), 1107 deletions(-) diff --git a/src/deepsparse/transformers/helpers.py b/src/deepsparse/transformers/helpers.py index 847a7a9924..83b519baa5 100644 --- a/src/deepsparse/transformers/helpers.py +++ b/src/deepsparse/transformers/helpers.py @@ -135,7 +135,6 @@ def overwrite_transformer_onnx_model_inputs( path: str, batch_size: int = 1, max_length: int = 128, - output_path: Optional[str] = None, inplace: bool = True, ) -> Tuple[Optional[str], List[str], Optional[NamedTemporaryFile]]: """ @@ -146,25 +145,16 @@ def overwrite_transformer_onnx_model_inputs( :param path: path to the ONNX model to override :param batch_size: batch size to set :param max_length: max sequence length to set - :param output_path: if provided, the model will be saved to the given path, - otherwise, the model will be saved to a named temporary file that will - be deleted after the program exits - :param inplace: if True, the model will be modified in place, otherwise - a copy of the model will be saved to a temporary file - :return: if no output path, a tuple of the saved path to the model, list of - model input names, and reference to the tempfile object will be returned - otherwise, only the model input names will be returned + :param inplace: if True, the model will be modified in place (its inputs will + be overwritten). Else, a copy of that model, with overwritten inputs, + will be saved to a temporary file + :return: tuple of (path to the overwritten model, list of input names that were + overwritten, and a temporary file containing the overwritten model if + `inplace=False`, else None) """ - - if inplace and output_path is not None: - raise ValueError( - "Cannot specify both inplace=True and output_path. If inplace=True, " - "the model will be modified in place (the returned path will be identical" - "to the input path specified in argument `path`)" - ) - if inplace: - output_path = path # overwrite input shapes + # if > 2Gb model is to be modified in-place, operate + # exclusively on the model graph model = onnx.load(path, load_external_data=not inplace) initializer_input_names = set([node.name for node in model.graph.initializer]) external_inputs = [ @@ -177,14 +167,20 @@ def overwrite_transformer_onnx_model_inputs( input_names.append(external_input.name) # Save modified model - if not inplace: - tmp_file = NamedTemporaryFile() # file will be deleted after program exit + if inplace: + _LOGGER.info( + f"Overwriting in-place the input shapes of the transformer model at {path}" + ) + save_onnx(model, path) + return path, input_names, None + else: + tmp_file = NamedTemporaryFile() + _LOGGER.info( + f"Saving a copy of the transformer model: {path} " + f"with overwritten input shapes to {tmp_file.name}" + ) save_onnx(model, tmp_file.name) return tmp_file.name, input_names, tmp_file - else: - save_onnx(model, output_path) - - return output_path, input_names, None def _get_file_parent(file_path: str) -> str: diff --git a/src/deepsparse/utils/onnx.py b/src/deepsparse/utils/onnx.py index 00f5f24233..eb31179bc9 100644 --- a/src/deepsparse/utils/onnx.py +++ b/src/deepsparse/utils/onnx.py @@ -24,7 +24,7 @@ from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE from deepsparse.utils.extractor import Extractor -from sparsezoo.utils import onnx_includes_external_data, save_onnx, validate_onnx +from sparsezoo.utils import save_onnx, validate_onnx try: @@ -60,21 +60,15 @@ def save_onnx_to_temp_files(model: onnx.ModelProto, with_external_data=False) -> :param model: The onnx model to save to temporary directory :param with_external_data: Whether to save external data to a separate file """ - - if not onnx_includes_external_data(model) and with_external_data: - raise ValueError( - "Model does not include external data, it only includes the model graph." - "Cannot save its external data to separate a file." - "Set argument `with_external_data`=False" - ) shaped_model = tempfile.NamedTemporaryFile(mode="w", delete=False) - _LOGGER.warning(f"Saving model to temporary directory: {tempfile.tempdir}") + _LOGGER.info(f"Saving model to temporary directory: {tempfile.tempdir}") if with_external_data: external_data = os.path.join( tempfile.tempdir, next(tempfile._get_candidate_names()) ) has_external_data = save_onnx(model, shaped_model.name, external_data) + _LOGGER.info(f"Saving external data to temporary directory: {external_data}") else: has_external_data = save_onnx(model, shaped_model.name) try: @@ -218,7 +212,7 @@ def override_onnx_batch_size( external data are saved along the model graph. :param batch_size: Override for the batch size dimension :param inplace: If True, overwrite the original model file. - Else save the modified model to a temporary file. + Else, save the modified model to a temporary file. :return: File path to modified ONNX model. If inplace is True, the modified model will be saved to the same path as the original @@ -234,12 +228,13 @@ def override_onnx_batch_size( for external_input in external_inputs: external_input.type.tensor_type.shape.dim[0].dim_value = batch_size - # Save modified model, this will be cleaned up when context is exited if inplace: + _LOGGER.info( + f"Overwriting in-place the batch size of the model at {onnx_filepath}" + ) save_onnx(model, onnx_filepath) return onnx_filepath else: - # Save modified model, this will be cleaned up when context is exited return save_onnx_to_temp_files(model, with_external_data=not inplace) @@ -302,12 +297,17 @@ def override_onnx_input_shapes( for dim_idx, dim in enumerate(external_input.type.tensor_type.shape.dim): dim.dim_value = input_shapes[input_idx][dim_idx] - # Save modified model, this will be cleaned up when context is exited if inplace: + _LOGGER.info( + "Overwriting in-place the input shapes of the model " f"at {onnx_filepath}" + ) onnx.save(model, onnx_filepath) return onnx_filepath else: - # Save modified model, this will be cleaned up when context is exited + _LOGGER.info( + f"Saving the input shapes of the model at {onnx_filepath} " + f"to a temporary file" + ) return save_onnx_to_temp_files(model, with_external_data=not inplace) @@ -387,7 +387,7 @@ def truncate_onnx_model( output.type.tensor_type.shape.Clear() # save and check model - _LOGGER.info("Saving truncated model to %s", output_filepath) + _LOGGER.debug(f"Saving truncated model to {output_filepath}") save_onnx(extracted_model, output_filepath, "external_data") validate_onnx(output_filepath) diff --git a/src/deepsparse/yolo/utils/utils.py b/src/deepsparse/yolo/utils/utils.py index 3a0f596fe1..e778fabe17 100644 --- a/src/deepsparse/yolo/utils/utils.py +++ b/src/deepsparse/yolo/utils/utils.py @@ -405,9 +405,16 @@ def modify_yolo_onnx_input_shape( set_tensor_dim_shape(model.graph.output[0], 1, num_predictions) if inplace: + _LOGGER.info( + "Overwriting in-place the ONNX model " + f"at {model_path} with the new input shape" + ) save_onnx(model, model_path) return model_path else: + _LOGGER.info( + "Saving the ONNX model with the " "new input shape to a temporary file" + ) return save_onnx_to_temp_files(model, with_external_data=not inplace) diff --git a/tests/server/test_app.py b/tests/server/test_app.py index 678152adc9..9bc71e1a36 100644 --- a/tests/server/test_app.py +++ b/tests/server/test_app.py @@ -1,166 +1,166 @@ -# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, -# # software distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# -# import os -# from copy import deepcopy -# from re import escape -# from unittest.mock import patch -# -# import pytest -# from deepsparse.server.config import EndpointConfig, ServerConfig -# from deepsparse.server.server import _build_app -# -# -# def test_add_multiple_endpoints_with_no_route(): -# with pytest.raises( -# ValueError, -# match=( -# "must specify `route` for all endpoints if multiple endpoints are used." -# ), -# ): -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# endpoints=[ -# EndpointConfig(task="", model="", route=None), -# EndpointConfig(task="", model="", route=None), -# ], -# loggers={}, -# ) -# ) -# -# -# def test_add_multiple_endpoints_with_same_route(): -# with pytest.raises(ValueError, match="asdf specified 2 times"): -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# endpoints=[ -# EndpointConfig(task="", model="", route="asdf"), -# EndpointConfig(task="", model="", route="asdf"), -# ], -# loggers={}, -# ) -# ) -# -# -# def test_invalid_integration(): -# with pytest.raises( -# ValueError, -# match=escape( -# "Unknown integration field asdf. Expected one of ['local', 'sagemaker']" -# ), -# ): -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# integration="asdf", -# endpoints=[], -# loggers={}, -# ) -# ) -# -# -# def test_pytorch_num_threads(): -# torch = pytest.importorskip("torch") -# -# orig_num_threads = torch.get_num_threads() -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# pytorch_num_threads=None, -# endpoints=[], -# loggers={}, -# ) -# ) -# assert torch.get_num_threads() == orig_num_threads -# -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# pytorch_num_threads=1, -# endpoints=[], -# loggers={}, -# ) -# ) -# assert torch.get_num_threads() == 1 -# -# -# @patch.dict(os.environ, deepcopy(os.environ)) -# def test_thread_pinning_none(): -# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) -# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# engine_thread_pinning="none", -# endpoints=[], -# loggers={}, -# ) -# ) -# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" -# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" -# -# -# @patch.dict(os.environ, deepcopy(os.environ)) -# def test_thread_pinning_numa(): -# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) -# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# engine_thread_pinning="numa", -# endpoints=[], -# loggers={}, -# ) -# ) -# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" -# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "1" -# -# -# @patch.dict(os.environ, deepcopy(os.environ)) -# def test_thread_pinning_cores(): -# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) -# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# engine_thread_pinning="core", -# endpoints=[], -# loggers={}, -# ) -# ) -# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "1" -# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" -# -# -# def test_invalid_thread_pinning(): -# with pytest.raises(ValueError, match='Expected one of {"core","numa","none"}.'): -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# engine_thread_pinning="asdf", -# endpoints=[], -# loggers={}, -# ) -# ) +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from copy import deepcopy +from re import escape +from unittest.mock import patch + +import pytest +from deepsparse.server.config import EndpointConfig, ServerConfig +from deepsparse.server.server import _build_app + + +def test_add_multiple_endpoints_with_no_route(): + with pytest.raises( + ValueError, + match=( + "must specify `route` for all endpoints if multiple endpoints are used." + ), + ): + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + endpoints=[ + EndpointConfig(task="", model="", route=None), + EndpointConfig(task="", model="", route=None), + ], + loggers={}, + ) + ) + + +def test_add_multiple_endpoints_with_same_route(): + with pytest.raises(ValueError, match="asdf specified 2 times"): + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + endpoints=[ + EndpointConfig(task="", model="", route="asdf"), + EndpointConfig(task="", model="", route="asdf"), + ], + loggers={}, + ) + ) + + +def test_invalid_integration(): + with pytest.raises( + ValueError, + match=escape( + "Unknown integration field asdf. Expected one of ['local', 'sagemaker']" + ), + ): + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + integration="asdf", + endpoints=[], + loggers={}, + ) + ) + + +def test_pytorch_num_threads(): + torch = pytest.importorskip("torch") + + orig_num_threads = torch.get_num_threads() + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + pytorch_num_threads=None, + endpoints=[], + loggers={}, + ) + ) + assert torch.get_num_threads() == orig_num_threads + + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + pytorch_num_threads=1, + endpoints=[], + loggers={}, + ) + ) + assert torch.get_num_threads() == 1 + + +@patch.dict(os.environ, deepcopy(os.environ)) +def test_thread_pinning_none(): + os.environ.pop("NM_BIND_THREADS_TO_CORES", None) + os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + engine_thread_pinning="none", + endpoints=[], + loggers={}, + ) + ) + assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" + assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" + + +@patch.dict(os.environ, deepcopy(os.environ)) +def test_thread_pinning_numa(): + os.environ.pop("NM_BIND_THREADS_TO_CORES", None) + os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + engine_thread_pinning="numa", + endpoints=[], + loggers={}, + ) + ) + assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" + assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "1" + + +@patch.dict(os.environ, deepcopy(os.environ)) +def test_thread_pinning_cores(): + os.environ.pop("NM_BIND_THREADS_TO_CORES", None) + os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + engine_thread_pinning="core", + endpoints=[], + loggers={}, + ) + ) + assert os.environ["NM_BIND_THREADS_TO_CORES"] == "1" + assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" + + +def test_invalid_thread_pinning(): + with pytest.raises(ValueError, match='Expected one of {"core","numa","none"}.'): + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + engine_thread_pinning="asdf", + endpoints=[], + loggers={}, + ) + ) diff --git a/tests/server/test_config.py b/tests/server/test_config.py index f2f9b0e6fe..b1c1c75a84 100644 --- a/tests/server/test_config.py +++ b/tests/server/test_config.py @@ -1,222 +1,222 @@ -# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, -# # software distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# -# import yaml -# -# import pytest -# from deepsparse.server.config import ( -# EndpointConfig, -# ImageSizesConfig, -# MetricFunctionConfig, -# SequenceLengthsConfig, -# ServerConfig, -# ) -# -# -# def test_no_bucketing_config(): -# cfg = EndpointConfig(task="", model="").to_pipeline_config() -# assert cfg.input_shapes is None -# assert cfg.kwargs == {} -# -# -# @pytest.mark.parametrize("task", ["yolo", "yolact", "image_classification"]) -# def test_bucketing_sequence_length_for_cv(task): -# with pytest.raises(ValueError, match=f"for non-nlp task {task}"): -# EndpointConfig( -# task=task, model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) -# ).to_pipeline_config() -# -# -# @pytest.mark.parametrize( -# "task", ["question_answering", "text_classification", "token_classification"] -# ) -# def test_bucketing_image_size_for_nlp(task): -# with pytest.raises(ValueError, match=f"for non computer vision task {task}"): -# EndpointConfig( -# task=task, model="", bucketing=ImageSizesConfig(image_sizes=[]) -# ).to_pipeline_config() -# -# -# def test_bucketing_zero_sequence_length(): -# with pytest.raises(ValueError, match="at least one sequence length"): -# EndpointConfig( -# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) -# ).to_pipeline_config() -# -# -# def test_bucketing_zero_image_size(): -# with pytest.raises(ValueError, match="at least one image size"): -# EndpointConfig( -# task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[]) -# ).to_pipeline_config() -# -# -# def test_bucketing_one_sequence_length(): -# cfg = EndpointConfig( -# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32]) -# ).to_pipeline_config() -# assert cfg.input_shapes is None -# assert cfg.kwargs == {"sequence_length": 32} -# -# -# def test_bucketing_multi_sequence_length(): -# cfg = EndpointConfig( -# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32, 64]) -# ).to_pipeline_config() -# assert cfg.input_shapes is None -# assert cfg.kwargs == {"sequence_length": [32, 64]} -# -# -# def test_bucketing_one_image_size(): -# cfg = EndpointConfig( -# task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[(256, 256)]) -# ).to_pipeline_config() -# assert cfg.input_shapes == [[256, 256]] -# assert cfg.kwargs == {} -# -# -# def test_endpoint_config_to_pipeline_copy_fields(): -# cfg = EndpointConfig(task="qa", model="zxcv").to_pipeline_config() -# assert cfg.task == "qa" -# assert cfg.model_path == "zxcv" -# -# cfg = EndpointConfig(task="", model="").to_pipeline_config() -# assert cfg.batch_size == 1 -# -# cfg = EndpointConfig(task="", model="", batch_size=64).to_pipeline_config() -# assert cfg.batch_size == 64 -# -# -# def test_yaml_load_config(tmp_path): -# server_config = ServerConfig( -# num_cores=1, -# num_workers=2, -# integration="sagemaker", -# endpoints=[ -# EndpointConfig( -# name="asdf", -# route="qwer", -# task="uiop", -# model="hjkl", -# batch_size=1, -# bucketing=None, -# ), -# EndpointConfig( -# name="asdfd", -# route="qwer", -# task="uiop", -# model="hjkl", -# batch_size=2, -# bucketing=ImageSizesConfig(image_sizes=[(1, 1), (2, 2)]), -# ), -# EndpointConfig( -# name="asdfde", -# route="qwer", -# task="uiop", -# model="hjkl", -# batch_size=3, -# bucketing=SequenceLengthsConfig(sequence_lengths=[5, 6, 7]), -# ), -# ], -# loggers={}, -# ) -# -# path = tmp_path / "config.yaml" -# with open(path, "w") as fp: -# yaml.dump(server_config.dict(), fp) -# -# with open(path) as fp: -# obj = yaml.load(fp, Loader=yaml.Loader) -# server_config2 = ServerConfig(**obj) -# assert server_config == server_config2 -# -# -# metric_function_config_yaml_1 = """ -# func: identity -# frequency: 5 -# loggers: -# - python""" -# -# metric_function_config_yaml_2 = """ -# func: numpy.max""" -# -# metric_function_config_yaml_3 = """ -# func: numpy.max -# frequency: 0""" -# -# -# @pytest.mark.parametrize( -# "config_yaml, should_fail, instance_type", -# [ -# (metric_function_config_yaml_1, False, MetricFunctionConfig), -# (metric_function_config_yaml_2, False, MetricFunctionConfig), -# ( -# metric_function_config_yaml_3, -# True, -# MetricFunctionConfig, -# ), # frequency cannot be zero -# ], -# ) -# def test_function_logging_config(config_yaml, should_fail, instance_type): -# obj = yaml.safe_load(config_yaml) -# if should_fail: -# with pytest.raises(Exception): -# MetricFunctionConfig(**obj) -# else: -# assert MetricFunctionConfig(**obj) -# -# -# def _create_server_config(task_name, endpoint_1_name, endpoint_2_name): -# return ServerConfig( -# endpoints=[ -# EndpointConfig( -# name=endpoint_1_name, -# task=task_name, -# model="hjkl", -# ), -# EndpointConfig( -# name=endpoint_2_name, -# task=task_name, -# model="hjkl", -# ), -# ] -# ) -# -# -# @pytest.mark.parametrize( -# "task_name, endpoint_1_name, endpoint_2_name, raise_error, expected_endpoint_1_name, expected_endpoint_2_name", # noqa: E501 -# [ -# ("some_task", None, None, False, "some_task-0", "some_task-1"), -# ("some_task", "name_1", None, False, "name_1", "some_task-0"), -# ("some_task", "name_1", "name_2", False, "name_1", "name_2"), -# ("some_task", "name_1", "name_1", True, None, None), -# ], -# ) -# def test_unique_endpoint_names( -# task_name, -# endpoint_1_name, -# endpoint_2_name, -# raise_error, -# expected_endpoint_1_name, -# expected_endpoint_2_name, -# ): -# if raise_error: -# with pytest.raises(ValueError): -# _create_server_config(task_name, endpoint_1_name, endpoint_2_name) -# return -# return -# -# server_config = _create_server_config(task_name, endpoint_1_name, endpoint_2_name) -# assert server_config.endpoints[0].name == expected_endpoint_1_name -# assert server_config.endpoints[1].name == expected_endpoint_2_name +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml + +import pytest +from deepsparse.server.config import ( + EndpointConfig, + ImageSizesConfig, + MetricFunctionConfig, + SequenceLengthsConfig, + ServerConfig, +) + + +def test_no_bucketing_config(): + cfg = EndpointConfig(task="", model="").to_pipeline_config() + assert cfg.input_shapes is None + assert cfg.kwargs == {} + + +@pytest.mark.parametrize("task", ["yolo", "yolact", "image_classification"]) +def test_bucketing_sequence_length_for_cv(task): + with pytest.raises(ValueError, match=f"for non-nlp task {task}"): + EndpointConfig( + task=task, model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) + ).to_pipeline_config() + + +@pytest.mark.parametrize( + "task", ["question_answering", "text_classification", "token_classification"] +) +def test_bucketing_image_size_for_nlp(task): + with pytest.raises(ValueError, match=f"for non computer vision task {task}"): + EndpointConfig( + task=task, model="", bucketing=ImageSizesConfig(image_sizes=[]) + ).to_pipeline_config() + + +def test_bucketing_zero_sequence_length(): + with pytest.raises(ValueError, match="at least one sequence length"): + EndpointConfig( + task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) + ).to_pipeline_config() + + +def test_bucketing_zero_image_size(): + with pytest.raises(ValueError, match="at least one image size"): + EndpointConfig( + task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[]) + ).to_pipeline_config() + + +def test_bucketing_one_sequence_length(): + cfg = EndpointConfig( + task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32]) + ).to_pipeline_config() + assert cfg.input_shapes is None + assert cfg.kwargs == {"sequence_length": 32} + + +def test_bucketing_multi_sequence_length(): + cfg = EndpointConfig( + task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32, 64]) + ).to_pipeline_config() + assert cfg.input_shapes is None + assert cfg.kwargs == {"sequence_length": [32, 64]} + + +def test_bucketing_one_image_size(): + cfg = EndpointConfig( + task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[(256, 256)]) + ).to_pipeline_config() + assert cfg.input_shapes == [[256, 256]] + assert cfg.kwargs == {} + + +def test_endpoint_config_to_pipeline_copy_fields(): + cfg = EndpointConfig(task="qa", model="zxcv").to_pipeline_config() + assert cfg.task == "qa" + assert cfg.model_path == "zxcv" + + cfg = EndpointConfig(task="", model="").to_pipeline_config() + assert cfg.batch_size == 1 + + cfg = EndpointConfig(task="", model="", batch_size=64).to_pipeline_config() + assert cfg.batch_size == 64 + + +def test_yaml_load_config(tmp_path): + server_config = ServerConfig( + num_cores=1, + num_workers=2, + integration="sagemaker", + endpoints=[ + EndpointConfig( + name="asdf", + route="qwer", + task="uiop", + model="hjkl", + batch_size=1, + bucketing=None, + ), + EndpointConfig( + name="asdfd", + route="qwer", + task="uiop", + model="hjkl", + batch_size=2, + bucketing=ImageSizesConfig(image_sizes=[(1, 1), (2, 2)]), + ), + EndpointConfig( + name="asdfde", + route="qwer", + task="uiop", + model="hjkl", + batch_size=3, + bucketing=SequenceLengthsConfig(sequence_lengths=[5, 6, 7]), + ), + ], + loggers={}, + ) + + path = tmp_path / "config.yaml" + with open(path, "w") as fp: + yaml.dump(server_config.dict(), fp) + + with open(path) as fp: + obj = yaml.load(fp, Loader=yaml.Loader) + server_config2 = ServerConfig(**obj) + assert server_config == server_config2 + + +metric_function_config_yaml_1 = """ + func: identity + frequency: 5 + loggers: + - python""" + +metric_function_config_yaml_2 = """ + func: numpy.max""" + +metric_function_config_yaml_3 = """ + func: numpy.max + frequency: 0""" + + +@pytest.mark.parametrize( + "config_yaml, should_fail, instance_type", + [ + (metric_function_config_yaml_1, False, MetricFunctionConfig), + (metric_function_config_yaml_2, False, MetricFunctionConfig), + ( + metric_function_config_yaml_3, + True, + MetricFunctionConfig, + ), # frequency cannot be zero + ], +) +def test_function_logging_config(config_yaml, should_fail, instance_type): + obj = yaml.safe_load(config_yaml) + if should_fail: + with pytest.raises(Exception): + MetricFunctionConfig(**obj) + else: + assert MetricFunctionConfig(**obj) + + +def _create_server_config(task_name, endpoint_1_name, endpoint_2_name): + return ServerConfig( + endpoints=[ + EndpointConfig( + name=endpoint_1_name, + task=task_name, + model="hjkl", + ), + EndpointConfig( + name=endpoint_2_name, + task=task_name, + model="hjkl", + ), + ] + ) + + +@pytest.mark.parametrize( + "task_name, endpoint_1_name, endpoint_2_name, raise_error, expected_endpoint_1_name, expected_endpoint_2_name", # noqa: E501 + [ + ("some_task", None, None, False, "some_task-0", "some_task-1"), + ("some_task", "name_1", None, False, "name_1", "some_task-0"), + ("some_task", "name_1", "name_2", False, "name_1", "name_2"), + ("some_task", "name_1", "name_1", True, None, None), + ], +) +def test_unique_endpoint_names( + task_name, + endpoint_1_name, + endpoint_2_name, + raise_error, + expected_endpoint_1_name, + expected_endpoint_2_name, +): + if raise_error: + with pytest.raises(ValueError): + _create_server_config(task_name, endpoint_1_name, endpoint_2_name) + return + return + + server_config = _create_server_config(task_name, endpoint_1_name, endpoint_2_name) + assert server_config.endpoints[0].name == expected_endpoint_1_name + assert server_config.endpoints[1].name == expected_endpoint_2_name diff --git a/tests/server/test_endpoints.py b/tests/server/test_endpoints.py index 411fb46446..f028b37e75 100644 --- a/tests/server/test_endpoints.py +++ b/tests/server/test_endpoints.py @@ -1,268 +1,268 @@ -# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, -# # software distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# -# from typing import List -# from unittest.mock import Mock -# -# from pydantic import BaseModel -# -# import pytest -# from deepsparse.loggers import MultiLogger -# from deepsparse.server.config import EndpointConfig, ServerConfig, SystemLoggingConfig -# from deepsparse.server.server import _add_pipeline_endpoint, _build_app -# from fastapi import FastAPI, UploadFile -# from fastapi.testclient import TestClient -# from tests.utils import mock_engine -# -# -# class FromFilesSchema(BaseModel): -# def from_files(self, f): -# # do nothing - this method exists just to test files endpoint logic -# ... -# -# -# class StrSchema(BaseModel): -# value: str -# -# -# def parse(v: StrSchema) -> int: -# return int(v.value) -# -# -# class TestStatusEndpoints: -# @pytest.fixture(scope="class") -# def server_config(self): -# server_config = ServerConfig( -# num_cores=1, num_workers=1, endpoints=[], loggers={} -# ) -# yield server_config -# -# @pytest.fixture(scope="class") -# def client(self, server_config): -# yield TestClient(_build_app(server_config)) -# -# def test_config(self, server_config, client): -# response = client.get("/config") -# loaded = ServerConfig(**response.json()) -# assert loaded == server_config -# -# @pytest.mark.parametrize("route", ["/ping", "/health", "/healthcheck", "/status"]) -# def test_pings_exist(self, client, route): -# response = client.get(route) -# assert response.status_code == 200 -# assert response.json() is True -# -# def test_docs_exist(self, client): -# assert client.get("/docs").status_code == 200 -# -# def test_home_redirects_to_docs(self, client): -# response = client.get("/") -# assert response.status_code == 200 -# assert response.request.path_url == "/docs" -# assert len(response.history) > 0 -# assert response.history[-1].is_redirect -# -# -# class TestMockEndpoints: -# @pytest.fixture(scope="class") -# def server_config(self): -# server_config = ServerConfig( -# num_cores=1, num_workers=1, endpoints=[], loggers={} -# ) -# yield server_config -# -# @pytest.fixture(scope="class") -# def app(self, server_config): -# yield _build_app(server_config) -# -# @pytest.fixture(scope="class") -# def client(self, app): -# yield TestClient(app) -# -# def test_add_model_endpoint(self, app: FastAPI, client: TestClient): -# mock_pipeline = Mock( -# side_effect=parse, -# input_schema=StrSchema, -# output_schema=int, -# logger=MultiLogger([]), -# ) -# _add_pipeline_endpoint( -# app, -# system_logging_config=SystemLoggingConfig(), -# endpoint_config=Mock(route="/predict/parse_int"), -# pipeline=mock_pipeline, -# ) -# assert app.routes[-1].path == "/predict/parse_int" -# assert app.routes[-1].response_model is int -# assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} -# assert app.routes[-1].methods == {"POST"} -# -# for v in ["1234", "5678"]: -# response = client.post("/predict/parse_int", json=dict(value=v)) -# assert response.status_code == 200 -# assert response.json() == int(v) -# -# def test_add_model_endpoint_with_from_files(self, app): -# _add_pipeline_endpoint( -# app, -# system_logging_config=Mock(), -# endpoint_config=Mock(route="/predict/parse_int"), -# pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), -# ) -# assert app.routes[-2].path == "/predict/parse_int" -# assert app.routes[-2].endpoint.__annotations__ == {"request": FromFilesSchema} -# assert app.routes[-1].path == "/predict/parse_int/from_files" -# assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} -# assert app.routes[-1].response_model is int -# assert app.routes[-1].methods == {"POST"} -# -# def test_sagemaker_only_adds_one_endpoint(self, app): -# num_routes = len(app.routes) -# _add_pipeline_endpoint( -# app, -# endpoint_config=Mock(route="/predict/parse_int"), -# system_logging_config=Mock(), -# pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), -# integration="sagemaker", -# ) -# assert len(app.routes) == num_routes + 1 -# assert app.routes[-1].path == "/invocations" -# assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} -# -# num_routes = len(app.routes) -# _add_pipeline_endpoint( -# app, -# endpoint_config=Mock(route="/predict/parse_int"), -# system_logging_config=Mock(), -# pipeline=Mock(input_schema=StrSchema, output_schema=int), -# integration="sagemaker", -# ) -# assert len(app.routes) == num_routes + 1 -# assert app.routes[-1].path == "/invocations" -# assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} -# -# def test_add_endpoint_with_no_route_specified(self, app): -# _add_pipeline_endpoint( -# app, -# endpoint_config=Mock(route=None), -# system_logging_config=Mock(), -# pipeline=Mock(input_schema=StrSchema, output_schema=int), -# ) -# assert app.routes[-1].path == "/predict" -# -# -# class TestActualModelEndpoints: -# @pytest.fixture(scope="class") -# def client(self): -# stub = ( -# "zoo:nlp/text_classification/distilbert-none/" -# "pytorch/huggingface/qqp/pruned80_quant-none-vnni" -# ) -# server_config = ServerConfig( -# num_cores=1, -# num_workers=1, -# endpoints=[ -# EndpointConfig( -# route="/predict/dynamic-batch", -# task="text-classification", -# model=stub, -# batch_size=1, -# ), -# EndpointConfig( -# route="/predict/static-batch", -# task="text-classification", -# model=stub, -# batch_size=2, -# ), -# ], -# loggers={}, # do not instantiate any loggers -# ) -# with mock_engine(rng_seed=0): -# app = _build_app(server_config) -# yield TestClient(app) -# -# def test_static_batch_errors_on_wrong_batch_size(self, client): -# with pytest.raises( -# RuntimeError, -# match=( -# "batch size of 1 passed into pipeline is " -# "not divisible by model batch size of 2" -# ), -# ): -# client.post("/predict/static-batch", json={"sequences": "today is great"}) -# -# def test_static_batch_good_request(self, client): -# response = client.post( -# "/predict/static-batch", -# json={"sequences": ["today is great", "today is terrible"]}, -# ) -# assert response.status_code == 200 -# output = response.json() -# assert len(output["labels"]) == 2 -# assert len(output["scores"]) == 2 -# -# @pytest.mark.parametrize( -# "seqs", -# [ -# ["today is great"], -# ["today is great", "today is terrible"], -# ["the first sentence", "the second sentence", "the third sentence"], -# ], -# ) -# def test_dynamic_batch_any(self, client, seqs): -# response = client.post("/predict/dynamic-batch", json={"sequences": seqs}) -# assert response.status_code == 200 -# output = response.json() -# assert len(output["labels"]) == len(seqs) -# assert len(output["scores"]) == len(seqs) -# -# -# class TestDynamicEndpoints: -# @pytest.fixture(scope="class") -# def client(self): -# server_config = ServerConfig( -# num_cores=1, num_workers=1, endpoints=[], loggers=None -# ) -# with mock_engine(rng_seed=0): -# app = _build_app(server_config) -# yield TestClient(app) -# -# -# @mock_engine(rng_seed=0) -# def test_dynamic_add_and_remove_endpoint(engine_mock): -# server_config = ServerConfig(num_cores=1, num_workers=1, endpoints=[], loggers={}) -# app = _build_app(server_config) -# client = TestClient(app) -# -# # assert /predict doesn't exist -# assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code -# -# # add /predict -# response = client.post( -# "/endpoints", -# json=EndpointConfig(task="text-classification", model="default").dict(), -# ) -# assert response.status_code == 200 -# response = client.post("/predict", json=dict(sequences="asdf")) -# assert response.status_code == 200 -# -# # remove /predict -# response = client.delete( -# "/endpoints", -# json=EndpointConfig( -# route="/predict", task="text-classification", model="default" -# ).dict(), -# ) -# assert response.status_code == 200 -# assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List +from unittest.mock import Mock + +from pydantic import BaseModel + +import pytest +from deepsparse.loggers import MultiLogger +from deepsparse.server.config import EndpointConfig, ServerConfig, SystemLoggingConfig +from deepsparse.server.server import _add_pipeline_endpoint, _build_app +from fastapi import FastAPI, UploadFile +from fastapi.testclient import TestClient +from tests.utils import mock_engine + + +class FromFilesSchema(BaseModel): + def from_files(self, f): + # do nothing - this method exists just to test files endpoint logic + ... + + +class StrSchema(BaseModel): + value: str + + +def parse(v: StrSchema) -> int: + return int(v.value) + + +class TestStatusEndpoints: + @pytest.fixture(scope="class") + def server_config(self): + server_config = ServerConfig( + num_cores=1, num_workers=1, endpoints=[], loggers={} + ) + yield server_config + + @pytest.fixture(scope="class") + def client(self, server_config): + yield TestClient(_build_app(server_config)) + + def test_config(self, server_config, client): + response = client.get("/config") + loaded = ServerConfig(**response.json()) + assert loaded == server_config + + @pytest.mark.parametrize("route", ["/ping", "/health", "/healthcheck", "/status"]) + def test_pings_exist(self, client, route): + response = client.get(route) + assert response.status_code == 200 + assert response.json() is True + + def test_docs_exist(self, client): + assert client.get("/docs").status_code == 200 + + def test_home_redirects_to_docs(self, client): + response = client.get("/") + assert response.status_code == 200 + assert response.request.path_url == "/docs" + assert len(response.history) > 0 + assert response.history[-1].is_redirect + + +class TestMockEndpoints: + @pytest.fixture(scope="class") + def server_config(self): + server_config = ServerConfig( + num_cores=1, num_workers=1, endpoints=[], loggers={} + ) + yield server_config + + @pytest.fixture(scope="class") + def app(self, server_config): + yield _build_app(server_config) + + @pytest.fixture(scope="class") + def client(self, app): + yield TestClient(app) + + def test_add_model_endpoint(self, app: FastAPI, client: TestClient): + mock_pipeline = Mock( + side_effect=parse, + input_schema=StrSchema, + output_schema=int, + logger=MultiLogger([]), + ) + _add_pipeline_endpoint( + app, + system_logging_config=SystemLoggingConfig(), + endpoint_config=Mock(route="/predict/parse_int"), + pipeline=mock_pipeline, + ) + assert app.routes[-1].path == "/predict/parse_int" + assert app.routes[-1].response_model is int + assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} + assert app.routes[-1].methods == {"POST"} + + for v in ["1234", "5678"]: + response = client.post("/predict/parse_int", json=dict(value=v)) + assert response.status_code == 200 + assert response.json() == int(v) + + def test_add_model_endpoint_with_from_files(self, app): + _add_pipeline_endpoint( + app, + system_logging_config=Mock(), + endpoint_config=Mock(route="/predict/parse_int"), + pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), + ) + assert app.routes[-2].path == "/predict/parse_int" + assert app.routes[-2].endpoint.__annotations__ == {"request": FromFilesSchema} + assert app.routes[-1].path == "/predict/parse_int/from_files" + assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} + assert app.routes[-1].response_model is int + assert app.routes[-1].methods == {"POST"} + + def test_sagemaker_only_adds_one_endpoint(self, app): + num_routes = len(app.routes) + _add_pipeline_endpoint( + app, + endpoint_config=Mock(route="/predict/parse_int"), + system_logging_config=Mock(), + pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), + integration="sagemaker", + ) + assert len(app.routes) == num_routes + 1 + assert app.routes[-1].path == "/invocations" + assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} + + num_routes = len(app.routes) + _add_pipeline_endpoint( + app, + endpoint_config=Mock(route="/predict/parse_int"), + system_logging_config=Mock(), + pipeline=Mock(input_schema=StrSchema, output_schema=int), + integration="sagemaker", + ) + assert len(app.routes) == num_routes + 1 + assert app.routes[-1].path == "/invocations" + assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} + + def test_add_endpoint_with_no_route_specified(self, app): + _add_pipeline_endpoint( + app, + endpoint_config=Mock(route=None), + system_logging_config=Mock(), + pipeline=Mock(input_schema=StrSchema, output_schema=int), + ) + assert app.routes[-1].path == "/predict" + + +class TestActualModelEndpoints: + @pytest.fixture(scope="class") + def client(self): + stub = ( + "zoo:nlp/text_classification/distilbert-none/" + "pytorch/huggingface/qqp/pruned80_quant-none-vnni" + ) + server_config = ServerConfig( + num_cores=1, + num_workers=1, + endpoints=[ + EndpointConfig( + route="/predict/dynamic-batch", + task="text-classification", + model=stub, + batch_size=1, + ), + EndpointConfig( + route="/predict/static-batch", + task="text-classification", + model=stub, + batch_size=2, + ), + ], + loggers={}, # do not instantiate any loggers + ) + with mock_engine(rng_seed=0): + app = _build_app(server_config) + yield TestClient(app) + + def test_static_batch_errors_on_wrong_batch_size(self, client): + with pytest.raises( + RuntimeError, + match=( + "batch size of 1 passed into pipeline is " + "not divisible by model batch size of 2" + ), + ): + client.post("/predict/static-batch", json={"sequences": "today is great"}) + + def test_static_batch_good_request(self, client): + response = client.post( + "/predict/static-batch", + json={"sequences": ["today is great", "today is terrible"]}, + ) + assert response.status_code == 200 + output = response.json() + assert len(output["labels"]) == 2 + assert len(output["scores"]) == 2 + + @pytest.mark.parametrize( + "seqs", + [ + ["today is great"], + ["today is great", "today is terrible"], + ["the first sentence", "the second sentence", "the third sentence"], + ], + ) + def test_dynamic_batch_any(self, client, seqs): + response = client.post("/predict/dynamic-batch", json={"sequences": seqs}) + assert response.status_code == 200 + output = response.json() + assert len(output["labels"]) == len(seqs) + assert len(output["scores"]) == len(seqs) + + +class TestDynamicEndpoints: + @pytest.fixture(scope="class") + def client(self): + server_config = ServerConfig( + num_cores=1, num_workers=1, endpoints=[], loggers=None + ) + with mock_engine(rng_seed=0): + app = _build_app(server_config) + yield TestClient(app) + + +@mock_engine(rng_seed=0) +def test_dynamic_add_and_remove_endpoint(engine_mock): + server_config = ServerConfig(num_cores=1, num_workers=1, endpoints=[], loggers={}) + app = _build_app(server_config) + client = TestClient(app) + + # assert /predict doesn't exist + assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code + + # add /predict + response = client.post( + "/endpoints", + json=EndpointConfig(task="text-classification", model="default").dict(), + ) + assert response.status_code == 200 + response = client.post("/predict", json=dict(sequences="asdf")) + assert response.status_code == 200 + + # remove /predict + response = client.delete( + "/endpoints", + json=EndpointConfig( + route="/predict", task="text-classification", model="default" + ).dict(), + ) + assert response.status_code == 200 + assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code diff --git a/tests/server/test_loggers.py b/tests/server/test_loggers.py index 8802835381..ce2576c09f 100644 --- a/tests/server/test_loggers.py +++ b/tests/server/test_loggers.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os + import shutil from collections import Counter from unittest import mock @@ -58,247 +58,247 @@ def test_default_logger(): "deepsparse.server.server.server_logger_from_config", return_value=server_logger ), mock_engine(rng_seed=0): app = _build_app(server_config) - # client = TestClient(app) - # - # for _ in range(2): - # client.post("/predict", json={"sequences": "today is great"}) - # assert isinstance(fetch_leaf_logger(server_logger), PythonLogger) + client = TestClient(app) + for _ in range(2): + client.post("/predict", json={"sequences": "today is great"}) + assert isinstance(fetch_leaf_logger(server_logger), PythonLogger) -# def test_data_logging_from_predefined(): -# server_config = ServerConfig( -# endpoints=[ -# EndpointConfig( -# task=task, -# name="text_classification", -# model=stub, -# add_predefined=[MetricFunctionConfig(func="text_classification")], -# ) -# ], -# loggers={"logger_1": {"path": logger_identifier}}, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# client.post( -# "/predict", -# json={ -# "sequences": [["Fun for adults and children.", "Fun for only children."]] -# }, -# ) -# calls = fetch_leaf_logger(server_logger).calls -# data_logging_logs = [call for call in calls if "DATA" in call] -# with open( -# "tests/deepsparse/loggers/metric_functions/predefined/predefined_logs/text_classification.txt", # noqa E501 -# "r", -# ) as f: -# expected_logs = f.read().splitlines() -# for log, expected_log in zip(data_logging_logs, expected_logs): -# assert log == expected_log -# -# -# @flaky(max_runs=4, min_passes=3) -# def test_logging_only_system_info(): -# server_config = ServerConfig( -# endpoints=[EndpointConfig(task=task, name=name, model=stub)], -# loggers={"logger_1": {"path": logger_identifier}}, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# -# for _ in range(2): -# client.post("/predict", json={"sequences": "today is great"}) -# _test_logger_contents( -# fetch_leaf_logger(server_logger), -# {"prediction_latency": 8}, -# ) -# -# -# def test_regex_target_logging(): -# server_config = ServerConfig( -# endpoints=[ -# EndpointConfig( -# task=task, -# name=name, -# data_logging={ -# "re:.*pipeline*.": [MetricFunctionConfig(func="identity")] -# }, -# model=stub, -# ) -# ], -# loggers={"logger_1": {"path": logger_identifier}}, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# -# for _ in range(2): -# client.post("/predict", json={"sequences": "today is great"}) -# _test_logger_contents( -# fetch_leaf_logger(server_logger), -# {"pipeline_inputs__identity": 2, "pipeline_outputs__identity": 2}, -# ) -# -# -# def test_multiple_targets_logging(): -# server_config = ServerConfig( -# endpoints=[ -# EndpointConfig( -# task=task, -# name=name, -# data_logging={ -# "pipeline_inputs.sequences": [ -# MetricFunctionConfig(func="identity") -# ], -# "engine_inputs": [MetricFunctionConfig(func="identity")], -# }, -# model=stub, -# ) -# ], -# loggers={"logger_1": {"path": logger_identifier}}, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# -# for _ in range(2): -# client.post("/predict", json={"sequences": "today is great"}) -# _test_logger_contents( -# fetch_leaf_logger(server_logger), -# { -# "pipeline_inputs.sequences__identity": 2, -# "engine_inputs__identity": 2, -# "prediction_latency": 8, -# }, -# ) -# -# -# @flaky(max_runs=3, min_passes=2) -# def test_function_metric_with_target_loggers(): -# server_config = ServerConfig( -# endpoints=[ -# EndpointConfig( -# task=task, -# name=name, -# data_logging={ -# "pipeline_inputs.sequences[0]": [ -# MetricFunctionConfig( -# func="identity", target_loggers=["logger_1"] -# ) -# ], -# "engine_inputs": [MetricFunctionConfig(func="identity")], -# }, -# model=stub, -# ) -# ], -# loggers={ -# "logger_1": {"path": logger_identifier}, -# "logger_2": {"path": logger_identifier}, -# }, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# -# for _ in range(2): -# client.post("/predict", json={"sequences": "today is great"}) -# -# _test_logger_contents( -# server_logger.logger.loggers[1].logger.loggers[0], -# { -# "pipeline_inputs.sequences__identity": 2, -# "engine_inputs__identity": 2, -# "prediction_latency": 8, -# }, -# ) -# _test_logger_contents( -# server_logger.logger.loggers[1].logger.loggers[1], -# { -# "pipeline_inputs.sequences__identity": 0, -# "engine_inputs__identity": 2, -# "prediction_latency": 8, -# }, -# ) -# -# -# @mock_engine(rng_seed=0) -# def test_instantiate_prometheus(mock_engine, tmp_path): -# client = TestClient( -# _build_app( -# ServerConfig( -# endpoints=[EndpointConfig(task="text_classification", model="default")], -# loggers=dict( -# prometheus={ -# "port": find_free_port(), -# "text_log_save_dir": tmp_path.name, -# "text_log_save_frequency": 30, -# } -# ), -# ) -# ) -# ) -# r = client.post("/predict", json=dict(sequences="asdf")) -# assert r.status_code == 200 -# shutil.rmtree(tmp_path.name, ignore_errors=True) -# -# -# @mock_engine(rng_seed=0) -# def test_endpoint_system_logging(mock_engine): -# server_config = ServerConfig( -# system_logging=ServerSystemLoggingConfig( -# request_details=SystemLoggingGroup(enable=True), -# resource_utilization=SystemLoggingGroup(enable=True), -# ), -# endpoints=[ -# EndpointConfig( -# task="text_classification", -# model="default", -# route="/predict_text_classification", -# logging_config=PipelineSystemLoggingConfig( -# inference_details=SystemLoggingGroup(enable=True), -# prediction_latency=SystemLoggingGroup(enable=True), -# ), -# ), -# EndpointConfig( -# task="question_answering", -# model="default", -# route="/predict_question_answering", -# logging_config=PipelineSystemLoggingConfig( -# inference_details=SystemLoggingGroup(enable=True), -# prediction_latency=SystemLoggingGroup(enable=True), -# ), -# ), -# ], -# loggers={"logger_1": {"path": logger_identifier}}, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine: -# app = _build_app(server_config) -# client = TestClient(app) -# client.post("/predict_text_classification", json=dict(sequences="asdf")) -# client.post( -# "/predict_text_classification", json=dict(question="asdf", context="asdf") -# ) -# calls = server_logger.logger.loggers[0].logger.loggers[0].calls -# -# c = Counter([call.split(",")[0] for call in calls]) -# -# assert c == SAMPLE_LOGS_DICT + +def test_data_logging_from_predefined(): + server_config = ServerConfig( + endpoints=[ + EndpointConfig( + task=task, + name="text_classification", + model=stub, + add_predefined=[MetricFunctionConfig(func="text_classification")], + ) + ], + loggers={"logger_1": {"path": logger_identifier}}, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + client.post( + "/predict", + json={ + "sequences": [["Fun for adults and children.", "Fun for only children."]] + }, + ) + calls = fetch_leaf_logger(server_logger).calls + data_logging_logs = [call for call in calls if "DATA" in call] + with open( + "tests/deepsparse/loggers/metric_functions/predefined/predefined_logs/text_classification.txt", # noqa E501 + "r", + ) as f: + expected_logs = f.read().splitlines() + for log, expected_log in zip(data_logging_logs, expected_logs): + assert log == expected_log + + +@flaky(max_runs=4, min_passes=3) +def test_logging_only_system_info(): + server_config = ServerConfig( + endpoints=[EndpointConfig(task=task, name=name, model=stub)], + loggers={"logger_1": {"path": logger_identifier}}, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + + for _ in range(2): + client.post("/predict", json={"sequences": "today is great"}) + _test_logger_contents( + fetch_leaf_logger(server_logger), + {"prediction_latency": 8}, + ) + + +def test_regex_target_logging(): + server_config = ServerConfig( + endpoints=[ + EndpointConfig( + task=task, + name=name, + data_logging={ + "re:.*pipeline*.": [MetricFunctionConfig(func="identity")] + }, + model=stub, + ) + ], + loggers={"logger_1": {"path": logger_identifier}}, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + + for _ in range(2): + client.post("/predict", json={"sequences": "today is great"}) + _test_logger_contents( + fetch_leaf_logger(server_logger), + {"pipeline_inputs__identity": 2, "pipeline_outputs__identity": 2}, + ) + + +def test_multiple_targets_logging(): + server_config = ServerConfig( + endpoints=[ + EndpointConfig( + task=task, + name=name, + data_logging={ + "pipeline_inputs.sequences": [ + MetricFunctionConfig(func="identity") + ], + "engine_inputs": [MetricFunctionConfig(func="identity")], + }, + model=stub, + ) + ], + loggers={"logger_1": {"path": logger_identifier}}, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + + for _ in range(2): + client.post("/predict", json={"sequences": "today is great"}) + _test_logger_contents( + fetch_leaf_logger(server_logger), + { + "pipeline_inputs.sequences__identity": 2, + "engine_inputs__identity": 2, + "prediction_latency": 8, + }, + ) + + +@flaky(max_runs=3, min_passes=2) +def test_function_metric_with_target_loggers(): + server_config = ServerConfig( + endpoints=[ + EndpointConfig( + task=task, + name=name, + data_logging={ + "pipeline_inputs.sequences[0]": [ + MetricFunctionConfig( + func="identity", target_loggers=["logger_1"] + ) + ], + "engine_inputs": [MetricFunctionConfig(func="identity")], + }, + model=stub, + ) + ], + loggers={ + "logger_1": {"path": logger_identifier}, + "logger_2": {"path": logger_identifier}, + }, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + + for _ in range(2): + client.post("/predict", json={"sequences": "today is great"}) + + _test_logger_contents( + server_logger.logger.loggers[1].logger.loggers[0], + { + "pipeline_inputs.sequences__identity": 2, + "engine_inputs__identity": 2, + "prediction_latency": 8, + }, + ) + _test_logger_contents( + server_logger.logger.loggers[1].logger.loggers[1], + { + "pipeline_inputs.sequences__identity": 0, + "engine_inputs__identity": 2, + "prediction_latency": 8, + }, + ) + + +@mock_engine(rng_seed=0) +def test_instantiate_prometheus(mock_engine, tmp_path): + client = TestClient( + _build_app( + ServerConfig( + endpoints=[EndpointConfig(task="text_classification", model="default")], + loggers=dict( + prometheus={ + "port": find_free_port(), + "text_log_save_dir": tmp_path.name, + "text_log_save_frequency": 30, + } + ), + ) + ) + ) + r = client.post("/predict", json=dict(sequences="asdf")) + assert r.status_code == 200 + shutil.rmtree(tmp_path.name, ignore_errors=True) + + +@mock_engine(rng_seed=0) +def test_endpoint_system_logging(mock_engine): + server_config = ServerConfig( + system_logging=ServerSystemLoggingConfig( + request_details=SystemLoggingGroup(enable=True), + resource_utilization=SystemLoggingGroup(enable=True), + ), + endpoints=[ + EndpointConfig( + task="text_classification", + model="default", + route="/predict_text_classification", + logging_config=PipelineSystemLoggingConfig( + inference_details=SystemLoggingGroup(enable=True), + prediction_latency=SystemLoggingGroup(enable=True), + ), + ), + EndpointConfig( + task="question_answering", + model="default", + route="/predict_question_answering", + logging_config=PipelineSystemLoggingConfig( + inference_details=SystemLoggingGroup(enable=True), + prediction_latency=SystemLoggingGroup(enable=True), + ), + ), + ], + loggers={"logger_1": {"path": logger_identifier}}, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine: + app = _build_app(server_config) + client = TestClient(app) + client.post("/predict_text_classification", json=dict(sequences="asdf")) + client.post( + "/predict_text_classification", json=dict(question="asdf", context="asdf") + ) + calls = server_logger.logger.loggers[0].logger.loggers[0].calls + + c = Counter([call.split(",")[0] for call in calls]) + + assert c == SAMPLE_LOGS_DICT diff --git a/tests/server/test_system_logging.py b/tests/server/test_system_logging.py index bd0a8a3ae3..b6a3fbd2b6 100644 --- a/tests/server/test_system_logging.py +++ b/tests/server/test_system_logging.py @@ -1,169 +1,169 @@ -# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, -# # software distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# -# from unittest import mock -# -# import pytest -# from deepsparse.loggers.config import SystemLoggingGroup -# from deepsparse.server.config import ( -# EndpointConfig, -# ServerConfig, -# ServerSystemLoggingConfig, -# ) -# from deepsparse.server.helpers import server_logger_from_config -# from deepsparse.server.server import _build_app -# from deepsparse.server.system_logging import log_resource_utilization -# from fastapi.testclient import TestClient -# from tests.deepsparse.loggers.helpers import ListLogger -# from tests.utils import mock_engine -# -# -# logger_identifier = "tests/deepsparse/loggers/helpers.py:ListLogger" -# stub = "zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/qqp/pruned80_quant-none-vnni" # noqa E501 -# task = "text-classification" -# name = "endpoint_name" -# -# -# def _test_successful_requests(calls, successful_request): -# relevant_call = [ -# call -# for call in calls -# if call.startswith("identifier:request_details/successful_request_count") -# ] -# assert len(relevant_call) == 1 -# relevant_call = relevant_call[0] -# value = bool(int(relevant_call.split("value:")[1].split(",")[0])) -# assert value == successful_request -# -# -# def _test_response_msg(calls, response_msg): -# relevant_call = [ -# call -# for call in calls -# if call.startswith("identifier:request_details/response_message") -# ] -# assert len(relevant_call) == 1 -# relevant_call = relevant_call[0] -# value = relevant_call.split("value:")[1].split(",")[0] -# assert value == response_msg -# -# -# @pytest.mark.parametrize( -# "json_payload, input_batch_size, successful_request, response_msg", -# [ -# ({"sequences": "today is great"}, 1, True, "Response status code: 200"), -# ( -# {"sequences": ["today is great", "today is great"]}, -# 2, -# True, -# "Response status code: 200", -# ), -# ({"this": "is supposed to fail"}, 1, False, "Response status code: 422"), -# ], -# ) -# def test_log_request_details( -# json_payload, input_batch_size, successful_request, response_msg -# ): -# server_config = ServerConfig( -# endpoints=[ -# EndpointConfig( -# task=task, name=name, model=stub, batch_size=input_batch_size -# ) -# ], -# loggers={"logger_1": {"path": logger_identifier}}, -# system_logging=ServerSystemLoggingConfig( -# request_details=SystemLoggingGroup(enable=True) -# ), -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# client.post("/predict", json=json_payload) -# -# calls = server_logger.logger.loggers[0].logger.loggers[0].calls -# -# _test_successful_requests(calls, successful_request) -# _test_response_msg(calls, response_msg) -# -# -# def _test_cpu_utilization(calls, num_iterations): -# relevant_calls = [ -# call -# for call in calls -# if call.startswith("identifier:resource_utilization/cpu_utilization_percent") -# ] -# assert len(relevant_calls) == num_iterations -# -# -# def _test_memory_utilization(calls, num_iterations): -# relevant_calls = [ -# call -# for call in calls -# if call.startswith("identifier:resource_utilization/memory_utilization_percent") -# ] -# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] -# assert len(relevant_calls) == num_iterations -# # memory utilization is a percentage, so it should be between 0 and 100 -# assert all(0.0 < value < 100.0 for value in values) -# -# -# def _test_total_memory_available(calls, num_iterations): -# relevant_calls = [ -# call -# for call in calls -# if call.startswith( -# "identifier:resource_utilization/total_memory_available_bytes" -# ) -# ] -# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] -# assert len(relevant_calls) == num_iterations -# # assert all values are the same (total memory available is constant) -# assert all(value == values[0] for value in values) -# -# -# def _test_additional_items_to_log(calls, num_iterations): -# relevant_calls = [ -# call -# for call in calls -# if call.startswith("identifier:resource_utilization/test") -# ] -# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] -# assert len(relevant_calls) == num_iterations -# # assert all values are the same ({"test" : 1} is constant) -# assert all(value == 1 for value in values) -# -# -# @pytest.mark.parametrize( -# "num_iterations, additional_items_to_log", -# [ -# (5, {}), -# (3, {"test": 1}), -# ], -# ) -# def test_log_resource_utilization(num_iterations, additional_items_to_log): -# server_logger = ListLogger() -# -# for iter in range(num_iterations): -# log_resource_utilization( -# server_logger, prefix="resource_utilization", **additional_items_to_log -# ) -# -# calls = server_logger.calls -# -# _test_cpu_utilization(calls, num_iterations) -# _test_memory_utilization(calls, num_iterations) -# _test_total_memory_available(calls, num_iterations) +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import pytest +from deepsparse.loggers.config import SystemLoggingGroup +from deepsparse.server.config import ( + EndpointConfig, + ServerConfig, + ServerSystemLoggingConfig, +) +from deepsparse.server.helpers import server_logger_from_config +from deepsparse.server.server import _build_app +from deepsparse.server.system_logging import log_resource_utilization +from fastapi.testclient import TestClient +from tests.deepsparse.loggers.helpers import ListLogger +from tests.utils import mock_engine + + +logger_identifier = "tests/deepsparse/loggers/helpers.py:ListLogger" +stub = "zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/qqp/pruned80_quant-none-vnni" # noqa E501 +task = "text-classification" +name = "endpoint_name" + + +def _test_successful_requests(calls, successful_request): + relevant_call = [ + call + for call in calls + if call.startswith("identifier:request_details/successful_request_count") + ] + assert len(relevant_call) == 1 + relevant_call = relevant_call[0] + value = bool(int(relevant_call.split("value:")[1].split(",")[0])) + assert value == successful_request + + +def _test_response_msg(calls, response_msg): + relevant_call = [ + call + for call in calls + if call.startswith("identifier:request_details/response_message") + ] + assert len(relevant_call) == 1 + relevant_call = relevant_call[0] + value = relevant_call.split("value:")[1].split(",")[0] + assert value == response_msg + + +@pytest.mark.parametrize( + "json_payload, input_batch_size, successful_request, response_msg", + [ + ({"sequences": "today is great"}, 1, True, "Response status code: 200"), + ( + {"sequences": ["today is great", "today is great"]}, + 2, + True, + "Response status code: 200", + ), + ({"this": "is supposed to fail"}, 1, False, "Response status code: 422"), + ], +) +def test_log_request_details( + json_payload, input_batch_size, successful_request, response_msg +): + server_config = ServerConfig( + endpoints=[ + EndpointConfig( + task=task, name=name, model=stub, batch_size=input_batch_size + ) + ], + loggers={"logger_1": {"path": logger_identifier}}, + system_logging=ServerSystemLoggingConfig( + request_details=SystemLoggingGroup(enable=True) + ), + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + client.post("/predict", json=json_payload) + + calls = server_logger.logger.loggers[0].logger.loggers[0].calls + + _test_successful_requests(calls, successful_request) + _test_response_msg(calls, response_msg) + + +def _test_cpu_utilization(calls, num_iterations): + relevant_calls = [ + call + for call in calls + if call.startswith("identifier:resource_utilization/cpu_utilization_percent") + ] + assert len(relevant_calls) == num_iterations + + +def _test_memory_utilization(calls, num_iterations): + relevant_calls = [ + call + for call in calls + if call.startswith("identifier:resource_utilization/memory_utilization_percent") + ] + values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] + assert len(relevant_calls) == num_iterations + # memory utilization is a percentage, so it should be between 0 and 100 + assert all(0.0 < value < 100.0 for value in values) + + +def _test_total_memory_available(calls, num_iterations): + relevant_calls = [ + call + for call in calls + if call.startswith( + "identifier:resource_utilization/total_memory_available_bytes" + ) + ] + values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] + assert len(relevant_calls) == num_iterations + # assert all values are the same (total memory available is constant) + assert all(value == values[0] for value in values) + + +def _test_additional_items_to_log(calls, num_iterations): + relevant_calls = [ + call + for call in calls + if call.startswith("identifier:resource_utilization/test") + ] + values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] + assert len(relevant_calls) == num_iterations + # assert all values are the same ({"test" : 1} is constant) + assert all(value == 1 for value in values) + + +@pytest.mark.parametrize( + "num_iterations, additional_items_to_log", + [ + (5, {}), + (3, {"test": 1}), + ], +) +def test_log_resource_utilization(num_iterations, additional_items_to_log): + server_logger = ListLogger() + + for iter in range(num_iterations): + log_resource_utilization( + server_logger, prefix="resource_utilization", **additional_items_to_log + ) + + calls = server_logger.calls + + _test_cpu_utilization(calls, num_iterations) + _test_memory_utilization(calls, num_iterations) + _test_total_memory_available(calls, num_iterations) From dc3d61bee8faf0aade60b07b4576db0cf7cfcfdc Mon Sep 17 00:00:00 2001 From: Damian Date: Mon, 5 Jun 2023 15:55:03 +0000 Subject: [PATCH 05/11] initial commit --- src/deepsparse/license.py | 3 +- src/deepsparse/server/cli.py | 1 + src/deepsparse/transformers/helpers.py | 12 +++++- src/deepsparse/utils/onnx.py | 55 +++++++++++++++++++------- src/deepsparse/yolo/utils/utils.py | 24 ++++++----- 5 files changed, 70 insertions(+), 25 deletions(-) diff --git a/src/deepsparse/license.py b/src/deepsparse/license.py index ed436aaaf9..f4035072d3 100644 --- a/src/deepsparse/license.py +++ b/src/deepsparse/license.py @@ -53,7 +53,7 @@ def add_deepsparse_license(token_or_path): candidate_license_file_path = token_or_path if not os.path.exists(token_or_path): - # write raw token to temp file for validadation + # write raw token to temp file for validation candidate_license_tempfile = NamedTemporaryFile() candidate_license_file_path = candidate_license_tempfile.name with open(candidate_license_file_path, "w") as token_file: @@ -70,6 +70,7 @@ def add_deepsparse_license(token_or_path): license_file_path = _get_license_file_path() shutil.copy(candidate_license_file_path, license_file_path) _LOGGER.info(f"DeepSparse license file written to {license_file_path}") + os.remove(candidate_license_file_path) # re-validate and print message now that licensee is copied to expected location validate_license() diff --git a/src/deepsparse/server/cli.py b/src/deepsparse/server/cli.py index 1b323e28e3..29cbc9afb0 100644 --- a/src/deepsparse/server/cli.py +++ b/src/deepsparse/server/cli.py @@ -228,6 +228,7 @@ def main( loggers={}, ) + # saving yaml config to temporary directory with TemporaryDirectory() as tmp_dir: config_path = os.path.join(tmp_dir, "server-config.yaml") with open(config_path, "w") as fp: diff --git a/src/deepsparse/transformers/helpers.py b/src/deepsparse/transformers/helpers.py index d80949eb11..d798231050 100644 --- a/src/deepsparse/transformers/helpers.py +++ b/src/deepsparse/transformers/helpers.py @@ -136,6 +136,7 @@ def overwrite_transformer_onnx_model_inputs( batch_size: int = 1, max_length: int = 128, output_path: Optional[str] = None, + inplace: bool = True, ) -> Tuple[Optional[str], List[str], Optional[NamedTemporaryFile]]: """ Overrides an ONNX model's inputs to have the given batch size and sequence lengths. @@ -148,12 +149,21 @@ def overwrite_transformer_onnx_model_inputs( :param output_path: if provided, the model will be saved to the given path, otherwise, the model will be saved to a named temporary file that will be deleted after the program exits + :param inplace: if True, the model will be modified in place, otherwise + a copy of the model will be saved to a temporary file :return: if no output path, a tuple of the saved path to the model, list of model input names, and reference to the tempfile object will be returned otherwise, only the model input names will be returned """ + + if inplace and output_path is None: + raise ValueError( + "Cannot specify both inplace=True and output_path. If inplace=True, " + "the model will be modified in place (the returned path will be identical" + "to the input path specified in argument `path`)" + ) # overwrite input shapes - model = onnx.load(path) + model = onnx.load(path, load_external_data=not inplace) initializer_input_names = set([node.name for node in model.graph.initializer]) external_inputs = [ inp for inp in model.graph.input if inp.name not in initializer_input_names diff --git a/src/deepsparse/utils/onnx.py b/src/deepsparse/utils/onnx.py index 326c4b215d..8b40ab4346 100644 --- a/src/deepsparse/utils/onnx.py +++ b/src/deepsparse/utils/onnx.py @@ -24,7 +24,7 @@ from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE from deepsparse.utils.extractor import Extractor -from sparsezoo.utils import save_onnx, validate_onnx +from sparsezoo.utils import onnx_includes_external_data, save_onnx, validate_onnx try: @@ -53,13 +53,21 @@ @contextlib.contextmanager -def save_onnx_to_temp_files(model: Model, with_external_data=True) -> str: +def save_onnx_to_temp_files(model: onnx.ModelProto, with_external_data=False) -> str: """ Save model to a temporary file. Works for models with external data. + :param model: The onnx model to save to temporary directory :param with_external_data: Whether to save external data to a separate file """ + if not onnx_includes_external_data(model) and with_external_data: + raise ValueError( + "Model does not include external data, it only includes the model graph." + "Cannot save its external data to separate a file." + "Set argument `with_external_data`=False" + ) shaped_model = tempfile.NamedTemporaryFile(mode="w", delete=False) + if with_external_data: external_data = os.path.join( tempfile.tempdir, next(tempfile._get_candidate_names()) @@ -195,16 +203,27 @@ def generate_random_inputs( def override_onnx_batch_size( - onnx_filepath: str, batch_size: int, inplace: bool = False + onnx_filepath: str, + batch_size: int, + inplace: bool = True, ) -> str: """ Rewrite batch sizes of ONNX model, saving the modified model and returning its path - :param onnx_filepath: File path to ONNX model + + :param onnx_filepath: File path to ONNX model. If the graph is to be + modified in-place, only the model graph will be loaded and modified. + Otherwise, the entire model will be loaded and modified, so that + external data are saved along the model graph. :param batch_size: Override for the batch size dimension - :param inplace: If True, overwrite the original model file - :return: File path to modified ONNX model + :param inplace: If True, overwrite the original model file. + Else save the modified model to a temporary file. + :return: File path to modified ONNX model. + If inplace is True, + the modified model will be saved to the same path as the original + model. Else the modified model will be saved to a + temporary file. """ - model = onnx.load(onnx_filepath, load_external_data=False) + model = onnx.load(onnx_filepath, load_external_data=not inplace) all_inputs = model.graph.input initializer_input_names = [node.name for node in model.graph.initializer] external_inputs = [ @@ -215,30 +234,38 @@ def override_onnx_batch_size( # Save modified model, this will be cleaned up when context is exited if inplace: - onnx.save(model, onnx_filepath) + save_onnx(model, onnx_filepath) return onnx_filepath else: # Save modified model, this will be cleaned up when context is exited - return save_onnx_to_temp_files(model, with_external_data=False) + return save_onnx_to_temp_files(model, with_external_data=not inplace) def override_onnx_input_shapes( onnx_filepath: str, input_shapes: Union[List[int], List[List[int]]], - inplace: bool = False, + inplace: bool = True, ) -> str: """ Rewrite input shapes of ONNX model, saving the modified model and returning its path - :param onnx_filepath: File path to ONNX model + + :param onnx_filepath: File path to ONNX model. If the graph is to be + modified in-place, only the model graph will be loaded and modified. + Otherwise, the entire model will be loaded and modified, so that + external data are saved along the model graph. :param input_shapes: Override for model's input shapes :param inplace: If True, overwrite the original model file - :return: File path to modified ONNX model + :return: File path to modified ONNX model. + If inplace is True, + the modified model will be saved to the same path as the original + model. Else the modified model will be saved to a + temporary file. """ if input_shapes is None: return onnx_filepath - model = onnx.load(onnx_filepath, load_external_data=False) + model = onnx.load(onnx_filepath, load_external_data=not inplace) all_inputs = model.graph.input initializer_input_names = [node.name for node in model.graph.initializer] external_inputs = [ @@ -279,7 +306,7 @@ def override_onnx_input_shapes( return onnx_filepath else: # Save modified model, this will be cleaned up when context is exited - return save_onnx_to_temp_files(model, with_external_data=False) + return save_onnx_to_temp_files(model, with_external_data=not inplace) def truncate_onnx_model( diff --git a/src/deepsparse/yolo/utils/utils.py b/src/deepsparse/yolo/utils/utils.py index ebbd48233b..3a0f596fe1 100644 --- a/src/deepsparse/yolo/utils/utils.py +++ b/src/deepsparse/yolo/utils/utils.py @@ -29,6 +29,7 @@ import yaml import torch +from deepsparse.utils.onnx import save_onnx_to_temp_files from deepsparse.yolo.schemas import YOLOOutput from sparsezoo.utils import save_onnx @@ -341,7 +342,7 @@ def get_onnx_expected_image_shape(onnx_model: onnx.ModelProto) -> Tuple[int, ... def modify_yolo_onnx_input_shape( - model_path: str, image_shape: Tuple[int, int] + model_path: str, image_shape: Tuple[int, int], inplace: bool = True ) -> Tuple[str, Optional[NamedTemporaryFile]]: """ Creates a new YOLO ONNX model from the given path that accepts the given input @@ -350,13 +351,17 @@ def modify_yolo_onnx_input_shape( :param model_path: file path to YOLO ONNX model :param image_shape: 2-tuple of the image shape to resize this yolo model to - :return: filepath to an onnx model reshaped to the given input shape will be the - original path if the shape is the same. Additionally returns the - NamedTemporaryFile for managing the scope of the object for file deletion + :param inplace: if True, modifies the given model_path in-place, otherwise + saves the modified model to a temporary file + :return: filepath to an onnx model reshaped to the given input shape. + If inplace is True, + the modified model will be saved to the same path as the original + model. Else the modified model will be saved to a + temporary file. """ has_postprocessing = yolo_onnx_has_postprocessing(model_path) - model = onnx.load(model_path) + model = onnx.load(model_path, load_external_data=not inplace) model_input = model.graph.input[0] initial_x, initial_y = get_onnx_expected_image_shape(model) @@ -399,10 +404,11 @@ def modify_yolo_onnx_input_shape( ) set_tensor_dim_shape(model.graph.output[0], 1, num_predictions) - tmp_file = NamedTemporaryFile() # file will be deleted after program exit - save_onnx(model, tmp_file.name) - - return tmp_file.name, tmp_file + if inplace: + save_onnx(model, model_path) + return model_path + else: + return save_onnx_to_temp_files(model, with_external_data=not inplace) def get_tensor_dim_shape(tensor: onnx.TensorProto, dim: int) -> int: From a294265a794c72046cb13115b5142fb6a70c2c68 Mon Sep 17 00:00:00 2001 From: dbogunowicz <97082108+dbogunowicz@users.noreply.github.com> Date: Mon, 5 Jun 2023 17:59:30 +0200 Subject: [PATCH 06/11] Update src/deepsparse/license.py --- src/deepsparse/license.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/deepsparse/license.py b/src/deepsparse/license.py index f4035072d3..06acdd2f0c 100644 --- a/src/deepsparse/license.py +++ b/src/deepsparse/license.py @@ -70,7 +70,6 @@ def add_deepsparse_license(token_or_path): license_file_path = _get_license_file_path() shutil.copy(candidate_license_file_path, license_file_path) _LOGGER.info(f"DeepSparse license file written to {license_file_path}") - os.remove(candidate_license_file_path) # re-validate and print message now that licensee is copied to expected location validate_license() From af97f2b4e31ea31eca2b57dc95c8e8be969af423 Mon Sep 17 00:00:00 2001 From: Damian Date: Wed, 7 Jun 2023 13:14:41 +0000 Subject: [PATCH 07/11] limit to 150mb --- src/deepsparse/transformers/helpers.py | 8 +- src/deepsparse/utils/onnx.py | 3 + tests/conftest.py | 35 ++ .../helpers/test_config_generation.py | 3 + .../loggers/test_prometheus_logger.py | 3 + tests/server/test_app.py | 332 +++++------ tests/server/test_config.py | 444 +++++++-------- tests/server/test_endpoints.py | 536 +++++++++--------- tests/server/test_loggers.py | 486 ++++++++-------- tests/server/test_system_logging.py | 338 +++++------ 10 files changed, 1118 insertions(+), 1070 deletions(-) diff --git a/src/deepsparse/transformers/helpers.py b/src/deepsparse/transformers/helpers.py index d798231050..847a7a9924 100644 --- a/src/deepsparse/transformers/helpers.py +++ b/src/deepsparse/transformers/helpers.py @@ -156,12 +156,14 @@ def overwrite_transformer_onnx_model_inputs( otherwise, only the model input names will be returned """ - if inplace and output_path is None: + if inplace and output_path is not None: raise ValueError( "Cannot specify both inplace=True and output_path. If inplace=True, " "the model will be modified in place (the returned path will be identical" "to the input path specified in argument `path`)" ) + if inplace: + output_path = path # overwrite input shapes model = onnx.load(path, load_external_data=not inplace) initializer_input_names = set([node.name for node in model.graph.initializer]) @@ -175,14 +177,14 @@ def overwrite_transformer_onnx_model_inputs( input_names.append(external_input.name) # Save modified model - if output_path is None: + if not inplace: tmp_file = NamedTemporaryFile() # file will be deleted after program exit save_onnx(model, tmp_file.name) return tmp_file.name, input_names, tmp_file else: save_onnx(model, output_path) - return input_names + return output_path, input_names, None def _get_file_parent(file_path: str) -> str: diff --git a/src/deepsparse/utils/onnx.py b/src/deepsparse/utils/onnx.py index 8b40ab4346..00f5f24233 100644 --- a/src/deepsparse/utils/onnx.py +++ b/src/deepsparse/utils/onnx.py @@ -60,6 +60,7 @@ def save_onnx_to_temp_files(model: onnx.ModelProto, with_external_data=False) -> :param model: The onnx model to save to temporary directory :param with_external_data: Whether to save external data to a separate file """ + if not onnx_includes_external_data(model) and with_external_data: raise ValueError( "Model does not include external data, it only includes the model graph." @@ -67,6 +68,7 @@ def save_onnx_to_temp_files(model: onnx.ModelProto, with_external_data=False) -> "Set argument `with_external_data`=False" ) shaped_model = tempfile.NamedTemporaryFile(mode="w", delete=False) + _LOGGER.warning(f"Saving model to temporary directory: {tempfile.tempdir}") if with_external_data: external_data = os.path.join( @@ -385,6 +387,7 @@ def truncate_onnx_model( output.type.tensor_type.shape.Clear() # save and check model + _LOGGER.info("Saving truncated model to %s", output_filepath) save_onnx(extracted_model, output_filepath, "external_data") validate_onnx(output_filepath) diff --git a/tests/conftest.py b/tests/conftest.py index 323c0b703e..62f781f043 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import tempfile from subprocess import Popen from typing import List @@ -20,6 +21,14 @@ from tests.helpers import delete_file +def _get_files(directory: str) -> List[str]: + list_filepaths = [] + for root, dirs, files in os.walk(directory): + for file in files: + list_filepaths.append(os.path.join(os.path.abspath(root), file)) + return list_filepaths + + @pytest.fixture def cleanup(): filenames: List[str] = [] @@ -50,3 +59,29 @@ def cleanup(): ) for proc in processes: proc.terminate() + + +@pytest.fixture(scope="session", autouse=True) +def check_for_created_files(): + start_files_root = _get_files(directory=r".") + start_files_temp = _get_files(directory=tempfile.gettempdir()) + yield + end_files_root = _get_files(directory=r".") + end_files_temp = _get_files(directory=tempfile.gettempdir()) + + assert len(start_files_root) >= len(end_files_root), ( + f"{len(end_files_root) - len(start_files_root)} " + f"files created in current working " + f"directory during pytest run. " + f"Created files: {set(end_files_root) - set(start_files_root)}" + ) + max_allowed_sized_temp_files_megabytes = 150 + size_of_temp_files_bytes = sum( + os.path.getsize(path) for path in set(end_files_temp) - set(start_files_temp) + ) + size_of_temp_files_megabytes = size_of_temp_files_bytes / 1024 / 1024 + assert max_allowed_sized_temp_files_megabytes >= size_of_temp_files_megabytes, ( + f"{size_of_temp_files_megabytes} " + f"megabytes of temp files created in temp directory during pytest run. " + f"Created files: {set(end_files_temp) - set(start_files_temp)}" + ) diff --git a/tests/deepsparse/loggers/metric_functions/helpers/test_config_generation.py b/tests/deepsparse/loggers/metric_functions/helpers/test_config_generation.py index 9350f22c6e..7cf6ad0c07 100644 --- a/tests/deepsparse/loggers/metric_functions/helpers/test_config_generation.py +++ b/tests/deepsparse/loggers/metric_functions/helpers/test_config_generation.py @@ -14,6 +14,7 @@ import os +import shutil import yaml @@ -155,6 +156,8 @@ def test_data_logging_config_from_predefined( with open(os.path.join(tmp_path, "data_logging_config.yaml"), "r") as stream: string_result_saved = yaml.safe_load(stream) assert string_result_saved == yaml.safe_load(expected_result) + return + shutil.rmtree(tmp_path, ignore_errors=True) result_1 = """loggers: diff --git a/tests/deepsparse/loggers/test_prometheus_logger.py b/tests/deepsparse/loggers/test_prometheus_logger.py index e2935cfb62..689b5163af 100644 --- a/tests/deepsparse/loggers/test_prometheus_logger.py +++ b/tests/deepsparse/loggers/test_prometheus_logger.py @@ -13,6 +13,8 @@ # limitations under the License. +import shutil + import requests import pytest @@ -119,6 +121,7 @@ def test_prometheus_logger( count_request_text = float(text_log_lines[98].split(" ")[1]) assert count_request_request == count_request_text == no_iterations + shutil.rmtree(tmp_path) @pytest.mark.parametrize( diff --git a/tests/server/test_app.py b/tests/server/test_app.py index 9bc71e1a36..678152adc9 100644 --- a/tests/server/test_app.py +++ b/tests/server/test_app.py @@ -1,166 +1,166 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from copy import deepcopy -from re import escape -from unittest.mock import patch - -import pytest -from deepsparse.server.config import EndpointConfig, ServerConfig -from deepsparse.server.server import _build_app - - -def test_add_multiple_endpoints_with_no_route(): - with pytest.raises( - ValueError, - match=( - "must specify `route` for all endpoints if multiple endpoints are used." - ), - ): - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - endpoints=[ - EndpointConfig(task="", model="", route=None), - EndpointConfig(task="", model="", route=None), - ], - loggers={}, - ) - ) - - -def test_add_multiple_endpoints_with_same_route(): - with pytest.raises(ValueError, match="asdf specified 2 times"): - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - endpoints=[ - EndpointConfig(task="", model="", route="asdf"), - EndpointConfig(task="", model="", route="asdf"), - ], - loggers={}, - ) - ) - - -def test_invalid_integration(): - with pytest.raises( - ValueError, - match=escape( - "Unknown integration field asdf. Expected one of ['local', 'sagemaker']" - ), - ): - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - integration="asdf", - endpoints=[], - loggers={}, - ) - ) - - -def test_pytorch_num_threads(): - torch = pytest.importorskip("torch") - - orig_num_threads = torch.get_num_threads() - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - pytorch_num_threads=None, - endpoints=[], - loggers={}, - ) - ) - assert torch.get_num_threads() == orig_num_threads - - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - pytorch_num_threads=1, - endpoints=[], - loggers={}, - ) - ) - assert torch.get_num_threads() == 1 - - -@patch.dict(os.environ, deepcopy(os.environ)) -def test_thread_pinning_none(): - os.environ.pop("NM_BIND_THREADS_TO_CORES", None) - os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - engine_thread_pinning="none", - endpoints=[], - loggers={}, - ) - ) - assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" - assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" - - -@patch.dict(os.environ, deepcopy(os.environ)) -def test_thread_pinning_numa(): - os.environ.pop("NM_BIND_THREADS_TO_CORES", None) - os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - engine_thread_pinning="numa", - endpoints=[], - loggers={}, - ) - ) - assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" - assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "1" - - -@patch.dict(os.environ, deepcopy(os.environ)) -def test_thread_pinning_cores(): - os.environ.pop("NM_BIND_THREADS_TO_CORES", None) - os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - engine_thread_pinning="core", - endpoints=[], - loggers={}, - ) - ) - assert os.environ["NM_BIND_THREADS_TO_CORES"] == "1" - assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" - - -def test_invalid_thread_pinning(): - with pytest.raises(ValueError, match='Expected one of {"core","numa","none"}.'): - _build_app( - ServerConfig( - num_cores=1, - num_workers=1, - engine_thread_pinning="asdf", - endpoints=[], - loggers={}, - ) - ) +# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, +# # software distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# +# import os +# from copy import deepcopy +# from re import escape +# from unittest.mock import patch +# +# import pytest +# from deepsparse.server.config import EndpointConfig, ServerConfig +# from deepsparse.server.server import _build_app +# +# +# def test_add_multiple_endpoints_with_no_route(): +# with pytest.raises( +# ValueError, +# match=( +# "must specify `route` for all endpoints if multiple endpoints are used." +# ), +# ): +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# endpoints=[ +# EndpointConfig(task="", model="", route=None), +# EndpointConfig(task="", model="", route=None), +# ], +# loggers={}, +# ) +# ) +# +# +# def test_add_multiple_endpoints_with_same_route(): +# with pytest.raises(ValueError, match="asdf specified 2 times"): +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# endpoints=[ +# EndpointConfig(task="", model="", route="asdf"), +# EndpointConfig(task="", model="", route="asdf"), +# ], +# loggers={}, +# ) +# ) +# +# +# def test_invalid_integration(): +# with pytest.raises( +# ValueError, +# match=escape( +# "Unknown integration field asdf. Expected one of ['local', 'sagemaker']" +# ), +# ): +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# integration="asdf", +# endpoints=[], +# loggers={}, +# ) +# ) +# +# +# def test_pytorch_num_threads(): +# torch = pytest.importorskip("torch") +# +# orig_num_threads = torch.get_num_threads() +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# pytorch_num_threads=None, +# endpoints=[], +# loggers={}, +# ) +# ) +# assert torch.get_num_threads() == orig_num_threads +# +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# pytorch_num_threads=1, +# endpoints=[], +# loggers={}, +# ) +# ) +# assert torch.get_num_threads() == 1 +# +# +# @patch.dict(os.environ, deepcopy(os.environ)) +# def test_thread_pinning_none(): +# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) +# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# engine_thread_pinning="none", +# endpoints=[], +# loggers={}, +# ) +# ) +# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" +# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" +# +# +# @patch.dict(os.environ, deepcopy(os.environ)) +# def test_thread_pinning_numa(): +# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) +# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# engine_thread_pinning="numa", +# endpoints=[], +# loggers={}, +# ) +# ) +# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" +# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "1" +# +# +# @patch.dict(os.environ, deepcopy(os.environ)) +# def test_thread_pinning_cores(): +# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) +# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# engine_thread_pinning="core", +# endpoints=[], +# loggers={}, +# ) +# ) +# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "1" +# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" +# +# +# def test_invalid_thread_pinning(): +# with pytest.raises(ValueError, match='Expected one of {"core","numa","none"}.'): +# _build_app( +# ServerConfig( +# num_cores=1, +# num_workers=1, +# engine_thread_pinning="asdf", +# endpoints=[], +# loggers={}, +# ) +# ) diff --git a/tests/server/test_config.py b/tests/server/test_config.py index b1c1c75a84..f2f9b0e6fe 100644 --- a/tests/server/test_config.py +++ b/tests/server/test_config.py @@ -1,222 +1,222 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import yaml - -import pytest -from deepsparse.server.config import ( - EndpointConfig, - ImageSizesConfig, - MetricFunctionConfig, - SequenceLengthsConfig, - ServerConfig, -) - - -def test_no_bucketing_config(): - cfg = EndpointConfig(task="", model="").to_pipeline_config() - assert cfg.input_shapes is None - assert cfg.kwargs == {} - - -@pytest.mark.parametrize("task", ["yolo", "yolact", "image_classification"]) -def test_bucketing_sequence_length_for_cv(task): - with pytest.raises(ValueError, match=f"for non-nlp task {task}"): - EndpointConfig( - task=task, model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) - ).to_pipeline_config() - - -@pytest.mark.parametrize( - "task", ["question_answering", "text_classification", "token_classification"] -) -def test_bucketing_image_size_for_nlp(task): - with pytest.raises(ValueError, match=f"for non computer vision task {task}"): - EndpointConfig( - task=task, model="", bucketing=ImageSizesConfig(image_sizes=[]) - ).to_pipeline_config() - - -def test_bucketing_zero_sequence_length(): - with pytest.raises(ValueError, match="at least one sequence length"): - EndpointConfig( - task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) - ).to_pipeline_config() - - -def test_bucketing_zero_image_size(): - with pytest.raises(ValueError, match="at least one image size"): - EndpointConfig( - task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[]) - ).to_pipeline_config() - - -def test_bucketing_one_sequence_length(): - cfg = EndpointConfig( - task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32]) - ).to_pipeline_config() - assert cfg.input_shapes is None - assert cfg.kwargs == {"sequence_length": 32} - - -def test_bucketing_multi_sequence_length(): - cfg = EndpointConfig( - task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32, 64]) - ).to_pipeline_config() - assert cfg.input_shapes is None - assert cfg.kwargs == {"sequence_length": [32, 64]} - - -def test_bucketing_one_image_size(): - cfg = EndpointConfig( - task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[(256, 256)]) - ).to_pipeline_config() - assert cfg.input_shapes == [[256, 256]] - assert cfg.kwargs == {} - - -def test_endpoint_config_to_pipeline_copy_fields(): - cfg = EndpointConfig(task="qa", model="zxcv").to_pipeline_config() - assert cfg.task == "qa" - assert cfg.model_path == "zxcv" - - cfg = EndpointConfig(task="", model="").to_pipeline_config() - assert cfg.batch_size == 1 - - cfg = EndpointConfig(task="", model="", batch_size=64).to_pipeline_config() - assert cfg.batch_size == 64 - - -def test_yaml_load_config(tmp_path): - server_config = ServerConfig( - num_cores=1, - num_workers=2, - integration="sagemaker", - endpoints=[ - EndpointConfig( - name="asdf", - route="qwer", - task="uiop", - model="hjkl", - batch_size=1, - bucketing=None, - ), - EndpointConfig( - name="asdfd", - route="qwer", - task="uiop", - model="hjkl", - batch_size=2, - bucketing=ImageSizesConfig(image_sizes=[(1, 1), (2, 2)]), - ), - EndpointConfig( - name="asdfde", - route="qwer", - task="uiop", - model="hjkl", - batch_size=3, - bucketing=SequenceLengthsConfig(sequence_lengths=[5, 6, 7]), - ), - ], - loggers={}, - ) - - path = tmp_path / "config.yaml" - with open(path, "w") as fp: - yaml.dump(server_config.dict(), fp) - - with open(path) as fp: - obj = yaml.load(fp, Loader=yaml.Loader) - server_config2 = ServerConfig(**obj) - assert server_config == server_config2 - - -metric_function_config_yaml_1 = """ - func: identity - frequency: 5 - loggers: - - python""" - -metric_function_config_yaml_2 = """ - func: numpy.max""" - -metric_function_config_yaml_3 = """ - func: numpy.max - frequency: 0""" - - -@pytest.mark.parametrize( - "config_yaml, should_fail, instance_type", - [ - (metric_function_config_yaml_1, False, MetricFunctionConfig), - (metric_function_config_yaml_2, False, MetricFunctionConfig), - ( - metric_function_config_yaml_3, - True, - MetricFunctionConfig, - ), # frequency cannot be zero - ], -) -def test_function_logging_config(config_yaml, should_fail, instance_type): - obj = yaml.safe_load(config_yaml) - if should_fail: - with pytest.raises(Exception): - MetricFunctionConfig(**obj) - else: - assert MetricFunctionConfig(**obj) - - -def _create_server_config(task_name, endpoint_1_name, endpoint_2_name): - return ServerConfig( - endpoints=[ - EndpointConfig( - name=endpoint_1_name, - task=task_name, - model="hjkl", - ), - EndpointConfig( - name=endpoint_2_name, - task=task_name, - model="hjkl", - ), - ] - ) - - -@pytest.mark.parametrize( - "task_name, endpoint_1_name, endpoint_2_name, raise_error, expected_endpoint_1_name, expected_endpoint_2_name", # noqa: E501 - [ - ("some_task", None, None, False, "some_task-0", "some_task-1"), - ("some_task", "name_1", None, False, "name_1", "some_task-0"), - ("some_task", "name_1", "name_2", False, "name_1", "name_2"), - ("some_task", "name_1", "name_1", True, None, None), - ], -) -def test_unique_endpoint_names( - task_name, - endpoint_1_name, - endpoint_2_name, - raise_error, - expected_endpoint_1_name, - expected_endpoint_2_name, -): - if raise_error: - with pytest.raises(ValueError): - _create_server_config(task_name, endpoint_1_name, endpoint_2_name) - return - return - - server_config = _create_server_config(task_name, endpoint_1_name, endpoint_2_name) - assert server_config.endpoints[0].name == expected_endpoint_1_name - assert server_config.endpoints[1].name == expected_endpoint_2_name +# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, +# # software distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# +# import yaml +# +# import pytest +# from deepsparse.server.config import ( +# EndpointConfig, +# ImageSizesConfig, +# MetricFunctionConfig, +# SequenceLengthsConfig, +# ServerConfig, +# ) +# +# +# def test_no_bucketing_config(): +# cfg = EndpointConfig(task="", model="").to_pipeline_config() +# assert cfg.input_shapes is None +# assert cfg.kwargs == {} +# +# +# @pytest.mark.parametrize("task", ["yolo", "yolact", "image_classification"]) +# def test_bucketing_sequence_length_for_cv(task): +# with pytest.raises(ValueError, match=f"for non-nlp task {task}"): +# EndpointConfig( +# task=task, model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) +# ).to_pipeline_config() +# +# +# @pytest.mark.parametrize( +# "task", ["question_answering", "text_classification", "token_classification"] +# ) +# def test_bucketing_image_size_for_nlp(task): +# with pytest.raises(ValueError, match=f"for non computer vision task {task}"): +# EndpointConfig( +# task=task, model="", bucketing=ImageSizesConfig(image_sizes=[]) +# ).to_pipeline_config() +# +# +# def test_bucketing_zero_sequence_length(): +# with pytest.raises(ValueError, match="at least one sequence length"): +# EndpointConfig( +# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) +# ).to_pipeline_config() +# +# +# def test_bucketing_zero_image_size(): +# with pytest.raises(ValueError, match="at least one image size"): +# EndpointConfig( +# task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[]) +# ).to_pipeline_config() +# +# +# def test_bucketing_one_sequence_length(): +# cfg = EndpointConfig( +# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32]) +# ).to_pipeline_config() +# assert cfg.input_shapes is None +# assert cfg.kwargs == {"sequence_length": 32} +# +# +# def test_bucketing_multi_sequence_length(): +# cfg = EndpointConfig( +# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32, 64]) +# ).to_pipeline_config() +# assert cfg.input_shapes is None +# assert cfg.kwargs == {"sequence_length": [32, 64]} +# +# +# def test_bucketing_one_image_size(): +# cfg = EndpointConfig( +# task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[(256, 256)]) +# ).to_pipeline_config() +# assert cfg.input_shapes == [[256, 256]] +# assert cfg.kwargs == {} +# +# +# def test_endpoint_config_to_pipeline_copy_fields(): +# cfg = EndpointConfig(task="qa", model="zxcv").to_pipeline_config() +# assert cfg.task == "qa" +# assert cfg.model_path == "zxcv" +# +# cfg = EndpointConfig(task="", model="").to_pipeline_config() +# assert cfg.batch_size == 1 +# +# cfg = EndpointConfig(task="", model="", batch_size=64).to_pipeline_config() +# assert cfg.batch_size == 64 +# +# +# def test_yaml_load_config(tmp_path): +# server_config = ServerConfig( +# num_cores=1, +# num_workers=2, +# integration="sagemaker", +# endpoints=[ +# EndpointConfig( +# name="asdf", +# route="qwer", +# task="uiop", +# model="hjkl", +# batch_size=1, +# bucketing=None, +# ), +# EndpointConfig( +# name="asdfd", +# route="qwer", +# task="uiop", +# model="hjkl", +# batch_size=2, +# bucketing=ImageSizesConfig(image_sizes=[(1, 1), (2, 2)]), +# ), +# EndpointConfig( +# name="asdfde", +# route="qwer", +# task="uiop", +# model="hjkl", +# batch_size=3, +# bucketing=SequenceLengthsConfig(sequence_lengths=[5, 6, 7]), +# ), +# ], +# loggers={}, +# ) +# +# path = tmp_path / "config.yaml" +# with open(path, "w") as fp: +# yaml.dump(server_config.dict(), fp) +# +# with open(path) as fp: +# obj = yaml.load(fp, Loader=yaml.Loader) +# server_config2 = ServerConfig(**obj) +# assert server_config == server_config2 +# +# +# metric_function_config_yaml_1 = """ +# func: identity +# frequency: 5 +# loggers: +# - python""" +# +# metric_function_config_yaml_2 = """ +# func: numpy.max""" +# +# metric_function_config_yaml_3 = """ +# func: numpy.max +# frequency: 0""" +# +# +# @pytest.mark.parametrize( +# "config_yaml, should_fail, instance_type", +# [ +# (metric_function_config_yaml_1, False, MetricFunctionConfig), +# (metric_function_config_yaml_2, False, MetricFunctionConfig), +# ( +# metric_function_config_yaml_3, +# True, +# MetricFunctionConfig, +# ), # frequency cannot be zero +# ], +# ) +# def test_function_logging_config(config_yaml, should_fail, instance_type): +# obj = yaml.safe_load(config_yaml) +# if should_fail: +# with pytest.raises(Exception): +# MetricFunctionConfig(**obj) +# else: +# assert MetricFunctionConfig(**obj) +# +# +# def _create_server_config(task_name, endpoint_1_name, endpoint_2_name): +# return ServerConfig( +# endpoints=[ +# EndpointConfig( +# name=endpoint_1_name, +# task=task_name, +# model="hjkl", +# ), +# EndpointConfig( +# name=endpoint_2_name, +# task=task_name, +# model="hjkl", +# ), +# ] +# ) +# +# +# @pytest.mark.parametrize( +# "task_name, endpoint_1_name, endpoint_2_name, raise_error, expected_endpoint_1_name, expected_endpoint_2_name", # noqa: E501 +# [ +# ("some_task", None, None, False, "some_task-0", "some_task-1"), +# ("some_task", "name_1", None, False, "name_1", "some_task-0"), +# ("some_task", "name_1", "name_2", False, "name_1", "name_2"), +# ("some_task", "name_1", "name_1", True, None, None), +# ], +# ) +# def test_unique_endpoint_names( +# task_name, +# endpoint_1_name, +# endpoint_2_name, +# raise_error, +# expected_endpoint_1_name, +# expected_endpoint_2_name, +# ): +# if raise_error: +# with pytest.raises(ValueError): +# _create_server_config(task_name, endpoint_1_name, endpoint_2_name) +# return +# return +# +# server_config = _create_server_config(task_name, endpoint_1_name, endpoint_2_name) +# assert server_config.endpoints[0].name == expected_endpoint_1_name +# assert server_config.endpoints[1].name == expected_endpoint_2_name diff --git a/tests/server/test_endpoints.py b/tests/server/test_endpoints.py index f028b37e75..411fb46446 100644 --- a/tests/server/test_endpoints.py +++ b/tests/server/test_endpoints.py @@ -1,268 +1,268 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List -from unittest.mock import Mock - -from pydantic import BaseModel - -import pytest -from deepsparse.loggers import MultiLogger -from deepsparse.server.config import EndpointConfig, ServerConfig, SystemLoggingConfig -from deepsparse.server.server import _add_pipeline_endpoint, _build_app -from fastapi import FastAPI, UploadFile -from fastapi.testclient import TestClient -from tests.utils import mock_engine - - -class FromFilesSchema(BaseModel): - def from_files(self, f): - # do nothing - this method exists just to test files endpoint logic - ... - - -class StrSchema(BaseModel): - value: str - - -def parse(v: StrSchema) -> int: - return int(v.value) - - -class TestStatusEndpoints: - @pytest.fixture(scope="class") - def server_config(self): - server_config = ServerConfig( - num_cores=1, num_workers=1, endpoints=[], loggers={} - ) - yield server_config - - @pytest.fixture(scope="class") - def client(self, server_config): - yield TestClient(_build_app(server_config)) - - def test_config(self, server_config, client): - response = client.get("/config") - loaded = ServerConfig(**response.json()) - assert loaded == server_config - - @pytest.mark.parametrize("route", ["/ping", "/health", "/healthcheck", "/status"]) - def test_pings_exist(self, client, route): - response = client.get(route) - assert response.status_code == 200 - assert response.json() is True - - def test_docs_exist(self, client): - assert client.get("/docs").status_code == 200 - - def test_home_redirects_to_docs(self, client): - response = client.get("/") - assert response.status_code == 200 - assert response.request.path_url == "/docs" - assert len(response.history) > 0 - assert response.history[-1].is_redirect - - -class TestMockEndpoints: - @pytest.fixture(scope="class") - def server_config(self): - server_config = ServerConfig( - num_cores=1, num_workers=1, endpoints=[], loggers={} - ) - yield server_config - - @pytest.fixture(scope="class") - def app(self, server_config): - yield _build_app(server_config) - - @pytest.fixture(scope="class") - def client(self, app): - yield TestClient(app) - - def test_add_model_endpoint(self, app: FastAPI, client: TestClient): - mock_pipeline = Mock( - side_effect=parse, - input_schema=StrSchema, - output_schema=int, - logger=MultiLogger([]), - ) - _add_pipeline_endpoint( - app, - system_logging_config=SystemLoggingConfig(), - endpoint_config=Mock(route="/predict/parse_int"), - pipeline=mock_pipeline, - ) - assert app.routes[-1].path == "/predict/parse_int" - assert app.routes[-1].response_model is int - assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} - assert app.routes[-1].methods == {"POST"} - - for v in ["1234", "5678"]: - response = client.post("/predict/parse_int", json=dict(value=v)) - assert response.status_code == 200 - assert response.json() == int(v) - - def test_add_model_endpoint_with_from_files(self, app): - _add_pipeline_endpoint( - app, - system_logging_config=Mock(), - endpoint_config=Mock(route="/predict/parse_int"), - pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), - ) - assert app.routes[-2].path == "/predict/parse_int" - assert app.routes[-2].endpoint.__annotations__ == {"request": FromFilesSchema} - assert app.routes[-1].path == "/predict/parse_int/from_files" - assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} - assert app.routes[-1].response_model is int - assert app.routes[-1].methods == {"POST"} - - def test_sagemaker_only_adds_one_endpoint(self, app): - num_routes = len(app.routes) - _add_pipeline_endpoint( - app, - endpoint_config=Mock(route="/predict/parse_int"), - system_logging_config=Mock(), - pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), - integration="sagemaker", - ) - assert len(app.routes) == num_routes + 1 - assert app.routes[-1].path == "/invocations" - assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} - - num_routes = len(app.routes) - _add_pipeline_endpoint( - app, - endpoint_config=Mock(route="/predict/parse_int"), - system_logging_config=Mock(), - pipeline=Mock(input_schema=StrSchema, output_schema=int), - integration="sagemaker", - ) - assert len(app.routes) == num_routes + 1 - assert app.routes[-1].path == "/invocations" - assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} - - def test_add_endpoint_with_no_route_specified(self, app): - _add_pipeline_endpoint( - app, - endpoint_config=Mock(route=None), - system_logging_config=Mock(), - pipeline=Mock(input_schema=StrSchema, output_schema=int), - ) - assert app.routes[-1].path == "/predict" - - -class TestActualModelEndpoints: - @pytest.fixture(scope="class") - def client(self): - stub = ( - "zoo:nlp/text_classification/distilbert-none/" - "pytorch/huggingface/qqp/pruned80_quant-none-vnni" - ) - server_config = ServerConfig( - num_cores=1, - num_workers=1, - endpoints=[ - EndpointConfig( - route="/predict/dynamic-batch", - task="text-classification", - model=stub, - batch_size=1, - ), - EndpointConfig( - route="/predict/static-batch", - task="text-classification", - model=stub, - batch_size=2, - ), - ], - loggers={}, # do not instantiate any loggers - ) - with mock_engine(rng_seed=0): - app = _build_app(server_config) - yield TestClient(app) - - def test_static_batch_errors_on_wrong_batch_size(self, client): - with pytest.raises( - RuntimeError, - match=( - "batch size of 1 passed into pipeline is " - "not divisible by model batch size of 2" - ), - ): - client.post("/predict/static-batch", json={"sequences": "today is great"}) - - def test_static_batch_good_request(self, client): - response = client.post( - "/predict/static-batch", - json={"sequences": ["today is great", "today is terrible"]}, - ) - assert response.status_code == 200 - output = response.json() - assert len(output["labels"]) == 2 - assert len(output["scores"]) == 2 - - @pytest.mark.parametrize( - "seqs", - [ - ["today is great"], - ["today is great", "today is terrible"], - ["the first sentence", "the second sentence", "the third sentence"], - ], - ) - def test_dynamic_batch_any(self, client, seqs): - response = client.post("/predict/dynamic-batch", json={"sequences": seqs}) - assert response.status_code == 200 - output = response.json() - assert len(output["labels"]) == len(seqs) - assert len(output["scores"]) == len(seqs) - - -class TestDynamicEndpoints: - @pytest.fixture(scope="class") - def client(self): - server_config = ServerConfig( - num_cores=1, num_workers=1, endpoints=[], loggers=None - ) - with mock_engine(rng_seed=0): - app = _build_app(server_config) - yield TestClient(app) - - -@mock_engine(rng_seed=0) -def test_dynamic_add_and_remove_endpoint(engine_mock): - server_config = ServerConfig(num_cores=1, num_workers=1, endpoints=[], loggers={}) - app = _build_app(server_config) - client = TestClient(app) - - # assert /predict doesn't exist - assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code - - # add /predict - response = client.post( - "/endpoints", - json=EndpointConfig(task="text-classification", model="default").dict(), - ) - assert response.status_code == 200 - response = client.post("/predict", json=dict(sequences="asdf")) - assert response.status_code == 200 - - # remove /predict - response = client.delete( - "/endpoints", - json=EndpointConfig( - route="/predict", task="text-classification", model="default" - ).dict(), - ) - assert response.status_code == 200 - assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code +# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, +# # software distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# +# from typing import List +# from unittest.mock import Mock +# +# from pydantic import BaseModel +# +# import pytest +# from deepsparse.loggers import MultiLogger +# from deepsparse.server.config import EndpointConfig, ServerConfig, SystemLoggingConfig +# from deepsparse.server.server import _add_pipeline_endpoint, _build_app +# from fastapi import FastAPI, UploadFile +# from fastapi.testclient import TestClient +# from tests.utils import mock_engine +# +# +# class FromFilesSchema(BaseModel): +# def from_files(self, f): +# # do nothing - this method exists just to test files endpoint logic +# ... +# +# +# class StrSchema(BaseModel): +# value: str +# +# +# def parse(v: StrSchema) -> int: +# return int(v.value) +# +# +# class TestStatusEndpoints: +# @pytest.fixture(scope="class") +# def server_config(self): +# server_config = ServerConfig( +# num_cores=1, num_workers=1, endpoints=[], loggers={} +# ) +# yield server_config +# +# @pytest.fixture(scope="class") +# def client(self, server_config): +# yield TestClient(_build_app(server_config)) +# +# def test_config(self, server_config, client): +# response = client.get("/config") +# loaded = ServerConfig(**response.json()) +# assert loaded == server_config +# +# @pytest.mark.parametrize("route", ["/ping", "/health", "/healthcheck", "/status"]) +# def test_pings_exist(self, client, route): +# response = client.get(route) +# assert response.status_code == 200 +# assert response.json() is True +# +# def test_docs_exist(self, client): +# assert client.get("/docs").status_code == 200 +# +# def test_home_redirects_to_docs(self, client): +# response = client.get("/") +# assert response.status_code == 200 +# assert response.request.path_url == "/docs" +# assert len(response.history) > 0 +# assert response.history[-1].is_redirect +# +# +# class TestMockEndpoints: +# @pytest.fixture(scope="class") +# def server_config(self): +# server_config = ServerConfig( +# num_cores=1, num_workers=1, endpoints=[], loggers={} +# ) +# yield server_config +# +# @pytest.fixture(scope="class") +# def app(self, server_config): +# yield _build_app(server_config) +# +# @pytest.fixture(scope="class") +# def client(self, app): +# yield TestClient(app) +# +# def test_add_model_endpoint(self, app: FastAPI, client: TestClient): +# mock_pipeline = Mock( +# side_effect=parse, +# input_schema=StrSchema, +# output_schema=int, +# logger=MultiLogger([]), +# ) +# _add_pipeline_endpoint( +# app, +# system_logging_config=SystemLoggingConfig(), +# endpoint_config=Mock(route="/predict/parse_int"), +# pipeline=mock_pipeline, +# ) +# assert app.routes[-1].path == "/predict/parse_int" +# assert app.routes[-1].response_model is int +# assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} +# assert app.routes[-1].methods == {"POST"} +# +# for v in ["1234", "5678"]: +# response = client.post("/predict/parse_int", json=dict(value=v)) +# assert response.status_code == 200 +# assert response.json() == int(v) +# +# def test_add_model_endpoint_with_from_files(self, app): +# _add_pipeline_endpoint( +# app, +# system_logging_config=Mock(), +# endpoint_config=Mock(route="/predict/parse_int"), +# pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), +# ) +# assert app.routes[-2].path == "/predict/parse_int" +# assert app.routes[-2].endpoint.__annotations__ == {"request": FromFilesSchema} +# assert app.routes[-1].path == "/predict/parse_int/from_files" +# assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} +# assert app.routes[-1].response_model is int +# assert app.routes[-1].methods == {"POST"} +# +# def test_sagemaker_only_adds_one_endpoint(self, app): +# num_routes = len(app.routes) +# _add_pipeline_endpoint( +# app, +# endpoint_config=Mock(route="/predict/parse_int"), +# system_logging_config=Mock(), +# pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), +# integration="sagemaker", +# ) +# assert len(app.routes) == num_routes + 1 +# assert app.routes[-1].path == "/invocations" +# assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} +# +# num_routes = len(app.routes) +# _add_pipeline_endpoint( +# app, +# endpoint_config=Mock(route="/predict/parse_int"), +# system_logging_config=Mock(), +# pipeline=Mock(input_schema=StrSchema, output_schema=int), +# integration="sagemaker", +# ) +# assert len(app.routes) == num_routes + 1 +# assert app.routes[-1].path == "/invocations" +# assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} +# +# def test_add_endpoint_with_no_route_specified(self, app): +# _add_pipeline_endpoint( +# app, +# endpoint_config=Mock(route=None), +# system_logging_config=Mock(), +# pipeline=Mock(input_schema=StrSchema, output_schema=int), +# ) +# assert app.routes[-1].path == "/predict" +# +# +# class TestActualModelEndpoints: +# @pytest.fixture(scope="class") +# def client(self): +# stub = ( +# "zoo:nlp/text_classification/distilbert-none/" +# "pytorch/huggingface/qqp/pruned80_quant-none-vnni" +# ) +# server_config = ServerConfig( +# num_cores=1, +# num_workers=1, +# endpoints=[ +# EndpointConfig( +# route="/predict/dynamic-batch", +# task="text-classification", +# model=stub, +# batch_size=1, +# ), +# EndpointConfig( +# route="/predict/static-batch", +# task="text-classification", +# model=stub, +# batch_size=2, +# ), +# ], +# loggers={}, # do not instantiate any loggers +# ) +# with mock_engine(rng_seed=0): +# app = _build_app(server_config) +# yield TestClient(app) +# +# def test_static_batch_errors_on_wrong_batch_size(self, client): +# with pytest.raises( +# RuntimeError, +# match=( +# "batch size of 1 passed into pipeline is " +# "not divisible by model batch size of 2" +# ), +# ): +# client.post("/predict/static-batch", json={"sequences": "today is great"}) +# +# def test_static_batch_good_request(self, client): +# response = client.post( +# "/predict/static-batch", +# json={"sequences": ["today is great", "today is terrible"]}, +# ) +# assert response.status_code == 200 +# output = response.json() +# assert len(output["labels"]) == 2 +# assert len(output["scores"]) == 2 +# +# @pytest.mark.parametrize( +# "seqs", +# [ +# ["today is great"], +# ["today is great", "today is terrible"], +# ["the first sentence", "the second sentence", "the third sentence"], +# ], +# ) +# def test_dynamic_batch_any(self, client, seqs): +# response = client.post("/predict/dynamic-batch", json={"sequences": seqs}) +# assert response.status_code == 200 +# output = response.json() +# assert len(output["labels"]) == len(seqs) +# assert len(output["scores"]) == len(seqs) +# +# +# class TestDynamicEndpoints: +# @pytest.fixture(scope="class") +# def client(self): +# server_config = ServerConfig( +# num_cores=1, num_workers=1, endpoints=[], loggers=None +# ) +# with mock_engine(rng_seed=0): +# app = _build_app(server_config) +# yield TestClient(app) +# +# +# @mock_engine(rng_seed=0) +# def test_dynamic_add_and_remove_endpoint(engine_mock): +# server_config = ServerConfig(num_cores=1, num_workers=1, endpoints=[], loggers={}) +# app = _build_app(server_config) +# client = TestClient(app) +# +# # assert /predict doesn't exist +# assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code +# +# # add /predict +# response = client.post( +# "/endpoints", +# json=EndpointConfig(task="text-classification", model="default").dict(), +# ) +# assert response.status_code == 200 +# response = client.post("/predict", json=dict(sequences="asdf")) +# assert response.status_code == 200 +# +# # remove /predict +# response = client.delete( +# "/endpoints", +# json=EndpointConfig( +# route="/predict", task="text-classification", model="default" +# ).dict(), +# ) +# assert response.status_code == 200 +# assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code diff --git a/tests/server/test_loggers.py b/tests/server/test_loggers.py index 369215e9af..8802835381 100644 --- a/tests/server/test_loggers.py +++ b/tests/server/test_loggers.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import os +import shutil from collections import Counter from unittest import mock @@ -57,246 +58,247 @@ def test_default_logger(): "deepsparse.server.server.server_logger_from_config", return_value=server_logger ), mock_engine(rng_seed=0): app = _build_app(server_config) - client = TestClient(app) - - for _ in range(2): - client.post("/predict", json={"sequences": "today is great"}) - assert isinstance(fetch_leaf_logger(server_logger), PythonLogger) - - -def test_data_logging_from_predefined(): - server_config = ServerConfig( - endpoints=[ - EndpointConfig( - task=task, - name="text_classification", - model=stub, - add_predefined=[MetricFunctionConfig(func="text_classification")], - ) - ], - loggers={"logger_1": {"path": logger_identifier}}, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - client.post( - "/predict", - json={ - "sequences": [["Fun for adults and children.", "Fun for only children."]] - }, - ) - calls = fetch_leaf_logger(server_logger).calls - data_logging_logs = [call for call in calls if "DATA" in call] - with open( - "tests/deepsparse/loggers/metric_functions/predefined/predefined_logs/text_classification.txt", # noqa E501 - "r", - ) as f: - expected_logs = f.read().splitlines() - for log, expected_log in zip(data_logging_logs, expected_logs): - assert log == expected_log - - -@flaky(max_runs=4, min_passes=3) -def test_logging_only_system_info(): - server_config = ServerConfig( - endpoints=[EndpointConfig(task=task, name=name, model=stub)], - loggers={"logger_1": {"path": logger_identifier}}, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - - for _ in range(2): - client.post("/predict", json={"sequences": "today is great"}) - _test_logger_contents( - fetch_leaf_logger(server_logger), - {"prediction_latency": 8}, - ) - - -def test_regex_target_logging(): - server_config = ServerConfig( - endpoints=[ - EndpointConfig( - task=task, - name=name, - data_logging={ - "re:.*pipeline*.": [MetricFunctionConfig(func="identity")] - }, - model=stub, - ) - ], - loggers={"logger_1": {"path": logger_identifier}}, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - - for _ in range(2): - client.post("/predict", json={"sequences": "today is great"}) - _test_logger_contents( - fetch_leaf_logger(server_logger), - {"pipeline_inputs__identity": 2, "pipeline_outputs__identity": 2}, - ) - - -def test_multiple_targets_logging(): - server_config = ServerConfig( - endpoints=[ - EndpointConfig( - task=task, - name=name, - data_logging={ - "pipeline_inputs.sequences": [ - MetricFunctionConfig(func="identity") - ], - "engine_inputs": [MetricFunctionConfig(func="identity")], - }, - model=stub, - ) - ], - loggers={"logger_1": {"path": logger_identifier}}, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - - for _ in range(2): - client.post("/predict", json={"sequences": "today is great"}) - _test_logger_contents( - fetch_leaf_logger(server_logger), - { - "pipeline_inputs.sequences__identity": 2, - "engine_inputs__identity": 2, - "prediction_latency": 8, - }, - ) - - -@flaky(max_runs=3, min_passes=2) -def test_function_metric_with_target_loggers(): - server_config = ServerConfig( - endpoints=[ - EndpointConfig( - task=task, - name=name, - data_logging={ - "pipeline_inputs.sequences[0]": [ - MetricFunctionConfig( - func="identity", target_loggers=["logger_1"] - ) - ], - "engine_inputs": [MetricFunctionConfig(func="identity")], - }, - model=stub, - ) - ], - loggers={ - "logger_1": {"path": logger_identifier}, - "logger_2": {"path": logger_identifier}, - }, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) + # client = TestClient(app) + # + # for _ in range(2): + # client.post("/predict", json={"sequences": "today is great"}) + # assert isinstance(fetch_leaf_logger(server_logger), PythonLogger) - for _ in range(2): - client.post("/predict", json={"sequences": "today is great"}) - - _test_logger_contents( - server_logger.logger.loggers[1].logger.loggers[0], - { - "pipeline_inputs.sequences__identity": 2, - "engine_inputs__identity": 2, - "prediction_latency": 8, - }, - ) - _test_logger_contents( - server_logger.logger.loggers[1].logger.loggers[1], - { - "pipeline_inputs.sequences__identity": 0, - "engine_inputs__identity": 2, - "prediction_latency": 8, - }, - ) - -@mock_engine(rng_seed=0) -def test_instantiate_prometheus(tmp_path): - client = TestClient( - _build_app( - ServerConfig( - endpoints=[EndpointConfig(task="text_classification", model="default")], - loggers=dict( - prometheus={ - "port": find_free_port(), - "text_log_save_dir": str(tmp_path), - "text_log_save_frequency": 30, - } - ), - ) - ) - ) - r = client.post("/predict", json=dict(sequences="asdf")) - assert r.status_code == 200 - - -@mock_engine(rng_seed=0) -def test_endpoint_system_logging(tmp_path): - server_config = ServerConfig( - system_logging=ServerSystemLoggingConfig( - request_details=SystemLoggingGroup(enable=True), - resource_utilization=SystemLoggingGroup(enable=True), - ), - endpoints=[ - EndpointConfig( - task="text_classification", - model="default", - route="/predict_text_classification", - logging_config=PipelineSystemLoggingConfig( - inference_details=SystemLoggingGroup(enable=True), - prediction_latency=SystemLoggingGroup(enable=True), - ), - ), - EndpointConfig( - task="question_answering", - model="default", - route="/predict_question_answering", - logging_config=PipelineSystemLoggingConfig( - inference_details=SystemLoggingGroup(enable=True), - prediction_latency=SystemLoggingGroup(enable=True), - ), - ), - ], - loggers={"logger_1": {"path": logger_identifier}}, - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - client.post("/predict_text_classification", json=dict(sequences="asdf")) - client.post( - "/predict_text_classification", json=dict(question="asdf", context="asdf") - ) - calls = server_logger.logger.loggers[0].logger.loggers[0].calls - - c = Counter([call.split(",")[0] for call in calls]) - - assert c == SAMPLE_LOGS_DICT +# def test_data_logging_from_predefined(): +# server_config = ServerConfig( +# endpoints=[ +# EndpointConfig( +# task=task, +# name="text_classification", +# model=stub, +# add_predefined=[MetricFunctionConfig(func="text_classification")], +# ) +# ], +# loggers={"logger_1": {"path": logger_identifier}}, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# client.post( +# "/predict", +# json={ +# "sequences": [["Fun for adults and children.", "Fun for only children."]] +# }, +# ) +# calls = fetch_leaf_logger(server_logger).calls +# data_logging_logs = [call for call in calls if "DATA" in call] +# with open( +# "tests/deepsparse/loggers/metric_functions/predefined/predefined_logs/text_classification.txt", # noqa E501 +# "r", +# ) as f: +# expected_logs = f.read().splitlines() +# for log, expected_log in zip(data_logging_logs, expected_logs): +# assert log == expected_log +# +# +# @flaky(max_runs=4, min_passes=3) +# def test_logging_only_system_info(): +# server_config = ServerConfig( +# endpoints=[EndpointConfig(task=task, name=name, model=stub)], +# loggers={"logger_1": {"path": logger_identifier}}, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# +# for _ in range(2): +# client.post("/predict", json={"sequences": "today is great"}) +# _test_logger_contents( +# fetch_leaf_logger(server_logger), +# {"prediction_latency": 8}, +# ) +# +# +# def test_regex_target_logging(): +# server_config = ServerConfig( +# endpoints=[ +# EndpointConfig( +# task=task, +# name=name, +# data_logging={ +# "re:.*pipeline*.": [MetricFunctionConfig(func="identity")] +# }, +# model=stub, +# ) +# ], +# loggers={"logger_1": {"path": logger_identifier}}, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# +# for _ in range(2): +# client.post("/predict", json={"sequences": "today is great"}) +# _test_logger_contents( +# fetch_leaf_logger(server_logger), +# {"pipeline_inputs__identity": 2, "pipeline_outputs__identity": 2}, +# ) +# +# +# def test_multiple_targets_logging(): +# server_config = ServerConfig( +# endpoints=[ +# EndpointConfig( +# task=task, +# name=name, +# data_logging={ +# "pipeline_inputs.sequences": [ +# MetricFunctionConfig(func="identity") +# ], +# "engine_inputs": [MetricFunctionConfig(func="identity")], +# }, +# model=stub, +# ) +# ], +# loggers={"logger_1": {"path": logger_identifier}}, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# +# for _ in range(2): +# client.post("/predict", json={"sequences": "today is great"}) +# _test_logger_contents( +# fetch_leaf_logger(server_logger), +# { +# "pipeline_inputs.sequences__identity": 2, +# "engine_inputs__identity": 2, +# "prediction_latency": 8, +# }, +# ) +# +# +# @flaky(max_runs=3, min_passes=2) +# def test_function_metric_with_target_loggers(): +# server_config = ServerConfig( +# endpoints=[ +# EndpointConfig( +# task=task, +# name=name, +# data_logging={ +# "pipeline_inputs.sequences[0]": [ +# MetricFunctionConfig( +# func="identity", target_loggers=["logger_1"] +# ) +# ], +# "engine_inputs": [MetricFunctionConfig(func="identity")], +# }, +# model=stub, +# ) +# ], +# loggers={ +# "logger_1": {"path": logger_identifier}, +# "logger_2": {"path": logger_identifier}, +# }, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# +# for _ in range(2): +# client.post("/predict", json={"sequences": "today is great"}) +# +# _test_logger_contents( +# server_logger.logger.loggers[1].logger.loggers[0], +# { +# "pipeline_inputs.sequences__identity": 2, +# "engine_inputs__identity": 2, +# "prediction_latency": 8, +# }, +# ) +# _test_logger_contents( +# server_logger.logger.loggers[1].logger.loggers[1], +# { +# "pipeline_inputs.sequences__identity": 0, +# "engine_inputs__identity": 2, +# "prediction_latency": 8, +# }, +# ) +# +# +# @mock_engine(rng_seed=0) +# def test_instantiate_prometheus(mock_engine, tmp_path): +# client = TestClient( +# _build_app( +# ServerConfig( +# endpoints=[EndpointConfig(task="text_classification", model="default")], +# loggers=dict( +# prometheus={ +# "port": find_free_port(), +# "text_log_save_dir": tmp_path.name, +# "text_log_save_frequency": 30, +# } +# ), +# ) +# ) +# ) +# r = client.post("/predict", json=dict(sequences="asdf")) +# assert r.status_code == 200 +# shutil.rmtree(tmp_path.name, ignore_errors=True) +# +# +# @mock_engine(rng_seed=0) +# def test_endpoint_system_logging(mock_engine): +# server_config = ServerConfig( +# system_logging=ServerSystemLoggingConfig( +# request_details=SystemLoggingGroup(enable=True), +# resource_utilization=SystemLoggingGroup(enable=True), +# ), +# endpoints=[ +# EndpointConfig( +# task="text_classification", +# model="default", +# route="/predict_text_classification", +# logging_config=PipelineSystemLoggingConfig( +# inference_details=SystemLoggingGroup(enable=True), +# prediction_latency=SystemLoggingGroup(enable=True), +# ), +# ), +# EndpointConfig( +# task="question_answering", +# model="default", +# route="/predict_question_answering", +# logging_config=PipelineSystemLoggingConfig( +# inference_details=SystemLoggingGroup(enable=True), +# prediction_latency=SystemLoggingGroup(enable=True), +# ), +# ), +# ], +# loggers={"logger_1": {"path": logger_identifier}}, +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine: +# app = _build_app(server_config) +# client = TestClient(app) +# client.post("/predict_text_classification", json=dict(sequences="asdf")) +# client.post( +# "/predict_text_classification", json=dict(question="asdf", context="asdf") +# ) +# calls = server_logger.logger.loggers[0].logger.loggers[0].calls +# +# c = Counter([call.split(",")[0] for call in calls]) +# +# assert c == SAMPLE_LOGS_DICT diff --git a/tests/server/test_system_logging.py b/tests/server/test_system_logging.py index b6a3fbd2b6..bd0a8a3ae3 100644 --- a/tests/server/test_system_logging.py +++ b/tests/server/test_system_logging.py @@ -1,169 +1,169 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from unittest import mock - -import pytest -from deepsparse.loggers.config import SystemLoggingGroup -from deepsparse.server.config import ( - EndpointConfig, - ServerConfig, - ServerSystemLoggingConfig, -) -from deepsparse.server.helpers import server_logger_from_config -from deepsparse.server.server import _build_app -from deepsparse.server.system_logging import log_resource_utilization -from fastapi.testclient import TestClient -from tests.deepsparse.loggers.helpers import ListLogger -from tests.utils import mock_engine - - -logger_identifier = "tests/deepsparse/loggers/helpers.py:ListLogger" -stub = "zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/qqp/pruned80_quant-none-vnni" # noqa E501 -task = "text-classification" -name = "endpoint_name" - - -def _test_successful_requests(calls, successful_request): - relevant_call = [ - call - for call in calls - if call.startswith("identifier:request_details/successful_request_count") - ] - assert len(relevant_call) == 1 - relevant_call = relevant_call[0] - value = bool(int(relevant_call.split("value:")[1].split(",")[0])) - assert value == successful_request - - -def _test_response_msg(calls, response_msg): - relevant_call = [ - call - for call in calls - if call.startswith("identifier:request_details/response_message") - ] - assert len(relevant_call) == 1 - relevant_call = relevant_call[0] - value = relevant_call.split("value:")[1].split(",")[0] - assert value == response_msg - - -@pytest.mark.parametrize( - "json_payload, input_batch_size, successful_request, response_msg", - [ - ({"sequences": "today is great"}, 1, True, "Response status code: 200"), - ( - {"sequences": ["today is great", "today is great"]}, - 2, - True, - "Response status code: 200", - ), - ({"this": "is supposed to fail"}, 1, False, "Response status code: 422"), - ], -) -def test_log_request_details( - json_payload, input_batch_size, successful_request, response_msg -): - server_config = ServerConfig( - endpoints=[ - EndpointConfig( - task=task, name=name, model=stub, batch_size=input_batch_size - ) - ], - loggers={"logger_1": {"path": logger_identifier}}, - system_logging=ServerSystemLoggingConfig( - request_details=SystemLoggingGroup(enable=True) - ), - ) - server_logger = server_logger_from_config(server_config) - with mock.patch( - "deepsparse.server.server.server_logger_from_config", return_value=server_logger - ), mock_engine(rng_seed=0): - app = _build_app(server_config) - client = TestClient(app) - client.post("/predict", json=json_payload) - - calls = server_logger.logger.loggers[0].logger.loggers[0].calls - - _test_successful_requests(calls, successful_request) - _test_response_msg(calls, response_msg) - - -def _test_cpu_utilization(calls, num_iterations): - relevant_calls = [ - call - for call in calls - if call.startswith("identifier:resource_utilization/cpu_utilization_percent") - ] - assert len(relevant_calls) == num_iterations - - -def _test_memory_utilization(calls, num_iterations): - relevant_calls = [ - call - for call in calls - if call.startswith("identifier:resource_utilization/memory_utilization_percent") - ] - values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] - assert len(relevant_calls) == num_iterations - # memory utilization is a percentage, so it should be between 0 and 100 - assert all(0.0 < value < 100.0 for value in values) - - -def _test_total_memory_available(calls, num_iterations): - relevant_calls = [ - call - for call in calls - if call.startswith( - "identifier:resource_utilization/total_memory_available_bytes" - ) - ] - values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] - assert len(relevant_calls) == num_iterations - # assert all values are the same (total memory available is constant) - assert all(value == values[0] for value in values) - - -def _test_additional_items_to_log(calls, num_iterations): - relevant_calls = [ - call - for call in calls - if call.startswith("identifier:resource_utilization/test") - ] - values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] - assert len(relevant_calls) == num_iterations - # assert all values are the same ({"test" : 1} is constant) - assert all(value == 1 for value in values) - - -@pytest.mark.parametrize( - "num_iterations, additional_items_to_log", - [ - (5, {}), - (3, {"test": 1}), - ], -) -def test_log_resource_utilization(num_iterations, additional_items_to_log): - server_logger = ListLogger() - - for iter in range(num_iterations): - log_resource_utilization( - server_logger, prefix="resource_utilization", **additional_items_to_log - ) - - calls = server_logger.calls - - _test_cpu_utilization(calls, num_iterations) - _test_memory_utilization(calls, num_iterations) - _test_total_memory_available(calls, num_iterations) +# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, +# # software distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# +# from unittest import mock +# +# import pytest +# from deepsparse.loggers.config import SystemLoggingGroup +# from deepsparse.server.config import ( +# EndpointConfig, +# ServerConfig, +# ServerSystemLoggingConfig, +# ) +# from deepsparse.server.helpers import server_logger_from_config +# from deepsparse.server.server import _build_app +# from deepsparse.server.system_logging import log_resource_utilization +# from fastapi.testclient import TestClient +# from tests.deepsparse.loggers.helpers import ListLogger +# from tests.utils import mock_engine +# +# +# logger_identifier = "tests/deepsparse/loggers/helpers.py:ListLogger" +# stub = "zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/qqp/pruned80_quant-none-vnni" # noqa E501 +# task = "text-classification" +# name = "endpoint_name" +# +# +# def _test_successful_requests(calls, successful_request): +# relevant_call = [ +# call +# for call in calls +# if call.startswith("identifier:request_details/successful_request_count") +# ] +# assert len(relevant_call) == 1 +# relevant_call = relevant_call[0] +# value = bool(int(relevant_call.split("value:")[1].split(",")[0])) +# assert value == successful_request +# +# +# def _test_response_msg(calls, response_msg): +# relevant_call = [ +# call +# for call in calls +# if call.startswith("identifier:request_details/response_message") +# ] +# assert len(relevant_call) == 1 +# relevant_call = relevant_call[0] +# value = relevant_call.split("value:")[1].split(",")[0] +# assert value == response_msg +# +# +# @pytest.mark.parametrize( +# "json_payload, input_batch_size, successful_request, response_msg", +# [ +# ({"sequences": "today is great"}, 1, True, "Response status code: 200"), +# ( +# {"sequences": ["today is great", "today is great"]}, +# 2, +# True, +# "Response status code: 200", +# ), +# ({"this": "is supposed to fail"}, 1, False, "Response status code: 422"), +# ], +# ) +# def test_log_request_details( +# json_payload, input_batch_size, successful_request, response_msg +# ): +# server_config = ServerConfig( +# endpoints=[ +# EndpointConfig( +# task=task, name=name, model=stub, batch_size=input_batch_size +# ) +# ], +# loggers={"logger_1": {"path": logger_identifier}}, +# system_logging=ServerSystemLoggingConfig( +# request_details=SystemLoggingGroup(enable=True) +# ), +# ) +# server_logger = server_logger_from_config(server_config) +# with mock.patch( +# "deepsparse.server.server.server_logger_from_config", return_value=server_logger +# ), mock_engine(rng_seed=0): +# app = _build_app(server_config) +# client = TestClient(app) +# client.post("/predict", json=json_payload) +# +# calls = server_logger.logger.loggers[0].logger.loggers[0].calls +# +# _test_successful_requests(calls, successful_request) +# _test_response_msg(calls, response_msg) +# +# +# def _test_cpu_utilization(calls, num_iterations): +# relevant_calls = [ +# call +# for call in calls +# if call.startswith("identifier:resource_utilization/cpu_utilization_percent") +# ] +# assert len(relevant_calls) == num_iterations +# +# +# def _test_memory_utilization(calls, num_iterations): +# relevant_calls = [ +# call +# for call in calls +# if call.startswith("identifier:resource_utilization/memory_utilization_percent") +# ] +# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] +# assert len(relevant_calls) == num_iterations +# # memory utilization is a percentage, so it should be between 0 and 100 +# assert all(0.0 < value < 100.0 for value in values) +# +# +# def _test_total_memory_available(calls, num_iterations): +# relevant_calls = [ +# call +# for call in calls +# if call.startswith( +# "identifier:resource_utilization/total_memory_available_bytes" +# ) +# ] +# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] +# assert len(relevant_calls) == num_iterations +# # assert all values are the same (total memory available is constant) +# assert all(value == values[0] for value in values) +# +# +# def _test_additional_items_to_log(calls, num_iterations): +# relevant_calls = [ +# call +# for call in calls +# if call.startswith("identifier:resource_utilization/test") +# ] +# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] +# assert len(relevant_calls) == num_iterations +# # assert all values are the same ({"test" : 1} is constant) +# assert all(value == 1 for value in values) +# +# +# @pytest.mark.parametrize( +# "num_iterations, additional_items_to_log", +# [ +# (5, {}), +# (3, {"test": 1}), +# ], +# ) +# def test_log_resource_utilization(num_iterations, additional_items_to_log): +# server_logger = ListLogger() +# +# for iter in range(num_iterations): +# log_resource_utilization( +# server_logger, prefix="resource_utilization", **additional_items_to_log +# ) +# +# calls = server_logger.calls +# +# _test_cpu_utilization(calls, num_iterations) +# _test_memory_utilization(calls, num_iterations) +# _test_total_memory_available(calls, num_iterations) From c117788fb9f256585a31c4d11741d630cfe136bc Mon Sep 17 00:00:00 2001 From: Damian Date: Wed, 7 Jun 2023 16:14:46 +0000 Subject: [PATCH 08/11] ready to review --- src/deepsparse/transformers/helpers.py | 44 +- src/deepsparse/utils/onnx.py | 30 +- src/deepsparse/yolo/utils/utils.py | 7 + tests/server/test_app.py | 332 +++++++-------- tests/server/test_config.py | 444 ++++++++++---------- tests/server/test_endpoints.py | 536 ++++++++++++------------- tests/server/test_loggers.py | 486 +++++++++++----------- tests/server/test_system_logging.py | 338 ++++++++-------- 8 files changed, 1110 insertions(+), 1107 deletions(-) diff --git a/src/deepsparse/transformers/helpers.py b/src/deepsparse/transformers/helpers.py index 847a7a9924..83b519baa5 100644 --- a/src/deepsparse/transformers/helpers.py +++ b/src/deepsparse/transformers/helpers.py @@ -135,7 +135,6 @@ def overwrite_transformer_onnx_model_inputs( path: str, batch_size: int = 1, max_length: int = 128, - output_path: Optional[str] = None, inplace: bool = True, ) -> Tuple[Optional[str], List[str], Optional[NamedTemporaryFile]]: """ @@ -146,25 +145,16 @@ def overwrite_transformer_onnx_model_inputs( :param path: path to the ONNX model to override :param batch_size: batch size to set :param max_length: max sequence length to set - :param output_path: if provided, the model will be saved to the given path, - otherwise, the model will be saved to a named temporary file that will - be deleted after the program exits - :param inplace: if True, the model will be modified in place, otherwise - a copy of the model will be saved to a temporary file - :return: if no output path, a tuple of the saved path to the model, list of - model input names, and reference to the tempfile object will be returned - otherwise, only the model input names will be returned + :param inplace: if True, the model will be modified in place (its inputs will + be overwritten). Else, a copy of that model, with overwritten inputs, + will be saved to a temporary file + :return: tuple of (path to the overwritten model, list of input names that were + overwritten, and a temporary file containing the overwritten model if + `inplace=False`, else None) """ - - if inplace and output_path is not None: - raise ValueError( - "Cannot specify both inplace=True and output_path. If inplace=True, " - "the model will be modified in place (the returned path will be identical" - "to the input path specified in argument `path`)" - ) - if inplace: - output_path = path # overwrite input shapes + # if > 2Gb model is to be modified in-place, operate + # exclusively on the model graph model = onnx.load(path, load_external_data=not inplace) initializer_input_names = set([node.name for node in model.graph.initializer]) external_inputs = [ @@ -177,14 +167,20 @@ def overwrite_transformer_onnx_model_inputs( input_names.append(external_input.name) # Save modified model - if not inplace: - tmp_file = NamedTemporaryFile() # file will be deleted after program exit + if inplace: + _LOGGER.info( + f"Overwriting in-place the input shapes of the transformer model at {path}" + ) + save_onnx(model, path) + return path, input_names, None + else: + tmp_file = NamedTemporaryFile() + _LOGGER.info( + f"Saving a copy of the transformer model: {path} " + f"with overwritten input shapes to {tmp_file.name}" + ) save_onnx(model, tmp_file.name) return tmp_file.name, input_names, tmp_file - else: - save_onnx(model, output_path) - - return output_path, input_names, None def _get_file_parent(file_path: str) -> str: diff --git a/src/deepsparse/utils/onnx.py b/src/deepsparse/utils/onnx.py index 00f5f24233..eb31179bc9 100644 --- a/src/deepsparse/utils/onnx.py +++ b/src/deepsparse/utils/onnx.py @@ -24,7 +24,7 @@ from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE from deepsparse.utils.extractor import Extractor -from sparsezoo.utils import onnx_includes_external_data, save_onnx, validate_onnx +from sparsezoo.utils import save_onnx, validate_onnx try: @@ -60,21 +60,15 @@ def save_onnx_to_temp_files(model: onnx.ModelProto, with_external_data=False) -> :param model: The onnx model to save to temporary directory :param with_external_data: Whether to save external data to a separate file """ - - if not onnx_includes_external_data(model) and with_external_data: - raise ValueError( - "Model does not include external data, it only includes the model graph." - "Cannot save its external data to separate a file." - "Set argument `with_external_data`=False" - ) shaped_model = tempfile.NamedTemporaryFile(mode="w", delete=False) - _LOGGER.warning(f"Saving model to temporary directory: {tempfile.tempdir}") + _LOGGER.info(f"Saving model to temporary directory: {tempfile.tempdir}") if with_external_data: external_data = os.path.join( tempfile.tempdir, next(tempfile._get_candidate_names()) ) has_external_data = save_onnx(model, shaped_model.name, external_data) + _LOGGER.info(f"Saving external data to temporary directory: {external_data}") else: has_external_data = save_onnx(model, shaped_model.name) try: @@ -218,7 +212,7 @@ def override_onnx_batch_size( external data are saved along the model graph. :param batch_size: Override for the batch size dimension :param inplace: If True, overwrite the original model file. - Else save the modified model to a temporary file. + Else, save the modified model to a temporary file. :return: File path to modified ONNX model. If inplace is True, the modified model will be saved to the same path as the original @@ -234,12 +228,13 @@ def override_onnx_batch_size( for external_input in external_inputs: external_input.type.tensor_type.shape.dim[0].dim_value = batch_size - # Save modified model, this will be cleaned up when context is exited if inplace: + _LOGGER.info( + f"Overwriting in-place the batch size of the model at {onnx_filepath}" + ) save_onnx(model, onnx_filepath) return onnx_filepath else: - # Save modified model, this will be cleaned up when context is exited return save_onnx_to_temp_files(model, with_external_data=not inplace) @@ -302,12 +297,17 @@ def override_onnx_input_shapes( for dim_idx, dim in enumerate(external_input.type.tensor_type.shape.dim): dim.dim_value = input_shapes[input_idx][dim_idx] - # Save modified model, this will be cleaned up when context is exited if inplace: + _LOGGER.info( + "Overwriting in-place the input shapes of the model " f"at {onnx_filepath}" + ) onnx.save(model, onnx_filepath) return onnx_filepath else: - # Save modified model, this will be cleaned up when context is exited + _LOGGER.info( + f"Saving the input shapes of the model at {onnx_filepath} " + f"to a temporary file" + ) return save_onnx_to_temp_files(model, with_external_data=not inplace) @@ -387,7 +387,7 @@ def truncate_onnx_model( output.type.tensor_type.shape.Clear() # save and check model - _LOGGER.info("Saving truncated model to %s", output_filepath) + _LOGGER.debug(f"Saving truncated model to {output_filepath}") save_onnx(extracted_model, output_filepath, "external_data") validate_onnx(output_filepath) diff --git a/src/deepsparse/yolo/utils/utils.py b/src/deepsparse/yolo/utils/utils.py index 3a0f596fe1..e778fabe17 100644 --- a/src/deepsparse/yolo/utils/utils.py +++ b/src/deepsparse/yolo/utils/utils.py @@ -405,9 +405,16 @@ def modify_yolo_onnx_input_shape( set_tensor_dim_shape(model.graph.output[0], 1, num_predictions) if inplace: + _LOGGER.info( + "Overwriting in-place the ONNX model " + f"at {model_path} with the new input shape" + ) save_onnx(model, model_path) return model_path else: + _LOGGER.info( + "Saving the ONNX model with the " "new input shape to a temporary file" + ) return save_onnx_to_temp_files(model, with_external_data=not inplace) diff --git a/tests/server/test_app.py b/tests/server/test_app.py index 678152adc9..9bc71e1a36 100644 --- a/tests/server/test_app.py +++ b/tests/server/test_app.py @@ -1,166 +1,166 @@ -# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, -# # software distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# -# import os -# from copy import deepcopy -# from re import escape -# from unittest.mock import patch -# -# import pytest -# from deepsparse.server.config import EndpointConfig, ServerConfig -# from deepsparse.server.server import _build_app -# -# -# def test_add_multiple_endpoints_with_no_route(): -# with pytest.raises( -# ValueError, -# match=( -# "must specify `route` for all endpoints if multiple endpoints are used." -# ), -# ): -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# endpoints=[ -# EndpointConfig(task="", model="", route=None), -# EndpointConfig(task="", model="", route=None), -# ], -# loggers={}, -# ) -# ) -# -# -# def test_add_multiple_endpoints_with_same_route(): -# with pytest.raises(ValueError, match="asdf specified 2 times"): -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# endpoints=[ -# EndpointConfig(task="", model="", route="asdf"), -# EndpointConfig(task="", model="", route="asdf"), -# ], -# loggers={}, -# ) -# ) -# -# -# def test_invalid_integration(): -# with pytest.raises( -# ValueError, -# match=escape( -# "Unknown integration field asdf. Expected one of ['local', 'sagemaker']" -# ), -# ): -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# integration="asdf", -# endpoints=[], -# loggers={}, -# ) -# ) -# -# -# def test_pytorch_num_threads(): -# torch = pytest.importorskip("torch") -# -# orig_num_threads = torch.get_num_threads() -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# pytorch_num_threads=None, -# endpoints=[], -# loggers={}, -# ) -# ) -# assert torch.get_num_threads() == orig_num_threads -# -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# pytorch_num_threads=1, -# endpoints=[], -# loggers={}, -# ) -# ) -# assert torch.get_num_threads() == 1 -# -# -# @patch.dict(os.environ, deepcopy(os.environ)) -# def test_thread_pinning_none(): -# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) -# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# engine_thread_pinning="none", -# endpoints=[], -# loggers={}, -# ) -# ) -# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" -# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" -# -# -# @patch.dict(os.environ, deepcopy(os.environ)) -# def test_thread_pinning_numa(): -# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) -# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# engine_thread_pinning="numa", -# endpoints=[], -# loggers={}, -# ) -# ) -# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" -# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "1" -# -# -# @patch.dict(os.environ, deepcopy(os.environ)) -# def test_thread_pinning_cores(): -# os.environ.pop("NM_BIND_THREADS_TO_CORES", None) -# os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# engine_thread_pinning="core", -# endpoints=[], -# loggers={}, -# ) -# ) -# assert os.environ["NM_BIND_THREADS_TO_CORES"] == "1" -# assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" -# -# -# def test_invalid_thread_pinning(): -# with pytest.raises(ValueError, match='Expected one of {"core","numa","none"}.'): -# _build_app( -# ServerConfig( -# num_cores=1, -# num_workers=1, -# engine_thread_pinning="asdf", -# endpoints=[], -# loggers={}, -# ) -# ) +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from copy import deepcopy +from re import escape +from unittest.mock import patch + +import pytest +from deepsparse.server.config import EndpointConfig, ServerConfig +from deepsparse.server.server import _build_app + + +def test_add_multiple_endpoints_with_no_route(): + with pytest.raises( + ValueError, + match=( + "must specify `route` for all endpoints if multiple endpoints are used." + ), + ): + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + endpoints=[ + EndpointConfig(task="", model="", route=None), + EndpointConfig(task="", model="", route=None), + ], + loggers={}, + ) + ) + + +def test_add_multiple_endpoints_with_same_route(): + with pytest.raises(ValueError, match="asdf specified 2 times"): + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + endpoints=[ + EndpointConfig(task="", model="", route="asdf"), + EndpointConfig(task="", model="", route="asdf"), + ], + loggers={}, + ) + ) + + +def test_invalid_integration(): + with pytest.raises( + ValueError, + match=escape( + "Unknown integration field asdf. Expected one of ['local', 'sagemaker']" + ), + ): + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + integration="asdf", + endpoints=[], + loggers={}, + ) + ) + + +def test_pytorch_num_threads(): + torch = pytest.importorskip("torch") + + orig_num_threads = torch.get_num_threads() + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + pytorch_num_threads=None, + endpoints=[], + loggers={}, + ) + ) + assert torch.get_num_threads() == orig_num_threads + + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + pytorch_num_threads=1, + endpoints=[], + loggers={}, + ) + ) + assert torch.get_num_threads() == 1 + + +@patch.dict(os.environ, deepcopy(os.environ)) +def test_thread_pinning_none(): + os.environ.pop("NM_BIND_THREADS_TO_CORES", None) + os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + engine_thread_pinning="none", + endpoints=[], + loggers={}, + ) + ) + assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" + assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" + + +@patch.dict(os.environ, deepcopy(os.environ)) +def test_thread_pinning_numa(): + os.environ.pop("NM_BIND_THREADS_TO_CORES", None) + os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + engine_thread_pinning="numa", + endpoints=[], + loggers={}, + ) + ) + assert os.environ["NM_BIND_THREADS_TO_CORES"] == "0" + assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "1" + + +@patch.dict(os.environ, deepcopy(os.environ)) +def test_thread_pinning_cores(): + os.environ.pop("NM_BIND_THREADS_TO_CORES", None) + os.environ.pop("NM_BIND_THREADS_TO_SOCKETS", None) + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + engine_thread_pinning="core", + endpoints=[], + loggers={}, + ) + ) + assert os.environ["NM_BIND_THREADS_TO_CORES"] == "1" + assert os.environ["NM_BIND_THREADS_TO_SOCKETS"] == "0" + + +def test_invalid_thread_pinning(): + with pytest.raises(ValueError, match='Expected one of {"core","numa","none"}.'): + _build_app( + ServerConfig( + num_cores=1, + num_workers=1, + engine_thread_pinning="asdf", + endpoints=[], + loggers={}, + ) + ) diff --git a/tests/server/test_config.py b/tests/server/test_config.py index f2f9b0e6fe..b1c1c75a84 100644 --- a/tests/server/test_config.py +++ b/tests/server/test_config.py @@ -1,222 +1,222 @@ -# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, -# # software distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# -# import yaml -# -# import pytest -# from deepsparse.server.config import ( -# EndpointConfig, -# ImageSizesConfig, -# MetricFunctionConfig, -# SequenceLengthsConfig, -# ServerConfig, -# ) -# -# -# def test_no_bucketing_config(): -# cfg = EndpointConfig(task="", model="").to_pipeline_config() -# assert cfg.input_shapes is None -# assert cfg.kwargs == {} -# -# -# @pytest.mark.parametrize("task", ["yolo", "yolact", "image_classification"]) -# def test_bucketing_sequence_length_for_cv(task): -# with pytest.raises(ValueError, match=f"for non-nlp task {task}"): -# EndpointConfig( -# task=task, model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) -# ).to_pipeline_config() -# -# -# @pytest.mark.parametrize( -# "task", ["question_answering", "text_classification", "token_classification"] -# ) -# def test_bucketing_image_size_for_nlp(task): -# with pytest.raises(ValueError, match=f"for non computer vision task {task}"): -# EndpointConfig( -# task=task, model="", bucketing=ImageSizesConfig(image_sizes=[]) -# ).to_pipeline_config() -# -# -# def test_bucketing_zero_sequence_length(): -# with pytest.raises(ValueError, match="at least one sequence length"): -# EndpointConfig( -# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) -# ).to_pipeline_config() -# -# -# def test_bucketing_zero_image_size(): -# with pytest.raises(ValueError, match="at least one image size"): -# EndpointConfig( -# task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[]) -# ).to_pipeline_config() -# -# -# def test_bucketing_one_sequence_length(): -# cfg = EndpointConfig( -# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32]) -# ).to_pipeline_config() -# assert cfg.input_shapes is None -# assert cfg.kwargs == {"sequence_length": 32} -# -# -# def test_bucketing_multi_sequence_length(): -# cfg = EndpointConfig( -# task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32, 64]) -# ).to_pipeline_config() -# assert cfg.input_shapes is None -# assert cfg.kwargs == {"sequence_length": [32, 64]} -# -# -# def test_bucketing_one_image_size(): -# cfg = EndpointConfig( -# task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[(256, 256)]) -# ).to_pipeline_config() -# assert cfg.input_shapes == [[256, 256]] -# assert cfg.kwargs == {} -# -# -# def test_endpoint_config_to_pipeline_copy_fields(): -# cfg = EndpointConfig(task="qa", model="zxcv").to_pipeline_config() -# assert cfg.task == "qa" -# assert cfg.model_path == "zxcv" -# -# cfg = EndpointConfig(task="", model="").to_pipeline_config() -# assert cfg.batch_size == 1 -# -# cfg = EndpointConfig(task="", model="", batch_size=64).to_pipeline_config() -# assert cfg.batch_size == 64 -# -# -# def test_yaml_load_config(tmp_path): -# server_config = ServerConfig( -# num_cores=1, -# num_workers=2, -# integration="sagemaker", -# endpoints=[ -# EndpointConfig( -# name="asdf", -# route="qwer", -# task="uiop", -# model="hjkl", -# batch_size=1, -# bucketing=None, -# ), -# EndpointConfig( -# name="asdfd", -# route="qwer", -# task="uiop", -# model="hjkl", -# batch_size=2, -# bucketing=ImageSizesConfig(image_sizes=[(1, 1), (2, 2)]), -# ), -# EndpointConfig( -# name="asdfde", -# route="qwer", -# task="uiop", -# model="hjkl", -# batch_size=3, -# bucketing=SequenceLengthsConfig(sequence_lengths=[5, 6, 7]), -# ), -# ], -# loggers={}, -# ) -# -# path = tmp_path / "config.yaml" -# with open(path, "w") as fp: -# yaml.dump(server_config.dict(), fp) -# -# with open(path) as fp: -# obj = yaml.load(fp, Loader=yaml.Loader) -# server_config2 = ServerConfig(**obj) -# assert server_config == server_config2 -# -# -# metric_function_config_yaml_1 = """ -# func: identity -# frequency: 5 -# loggers: -# - python""" -# -# metric_function_config_yaml_2 = """ -# func: numpy.max""" -# -# metric_function_config_yaml_3 = """ -# func: numpy.max -# frequency: 0""" -# -# -# @pytest.mark.parametrize( -# "config_yaml, should_fail, instance_type", -# [ -# (metric_function_config_yaml_1, False, MetricFunctionConfig), -# (metric_function_config_yaml_2, False, MetricFunctionConfig), -# ( -# metric_function_config_yaml_3, -# True, -# MetricFunctionConfig, -# ), # frequency cannot be zero -# ], -# ) -# def test_function_logging_config(config_yaml, should_fail, instance_type): -# obj = yaml.safe_load(config_yaml) -# if should_fail: -# with pytest.raises(Exception): -# MetricFunctionConfig(**obj) -# else: -# assert MetricFunctionConfig(**obj) -# -# -# def _create_server_config(task_name, endpoint_1_name, endpoint_2_name): -# return ServerConfig( -# endpoints=[ -# EndpointConfig( -# name=endpoint_1_name, -# task=task_name, -# model="hjkl", -# ), -# EndpointConfig( -# name=endpoint_2_name, -# task=task_name, -# model="hjkl", -# ), -# ] -# ) -# -# -# @pytest.mark.parametrize( -# "task_name, endpoint_1_name, endpoint_2_name, raise_error, expected_endpoint_1_name, expected_endpoint_2_name", # noqa: E501 -# [ -# ("some_task", None, None, False, "some_task-0", "some_task-1"), -# ("some_task", "name_1", None, False, "name_1", "some_task-0"), -# ("some_task", "name_1", "name_2", False, "name_1", "name_2"), -# ("some_task", "name_1", "name_1", True, None, None), -# ], -# ) -# def test_unique_endpoint_names( -# task_name, -# endpoint_1_name, -# endpoint_2_name, -# raise_error, -# expected_endpoint_1_name, -# expected_endpoint_2_name, -# ): -# if raise_error: -# with pytest.raises(ValueError): -# _create_server_config(task_name, endpoint_1_name, endpoint_2_name) -# return -# return -# -# server_config = _create_server_config(task_name, endpoint_1_name, endpoint_2_name) -# assert server_config.endpoints[0].name == expected_endpoint_1_name -# assert server_config.endpoints[1].name == expected_endpoint_2_name +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml + +import pytest +from deepsparse.server.config import ( + EndpointConfig, + ImageSizesConfig, + MetricFunctionConfig, + SequenceLengthsConfig, + ServerConfig, +) + + +def test_no_bucketing_config(): + cfg = EndpointConfig(task="", model="").to_pipeline_config() + assert cfg.input_shapes is None + assert cfg.kwargs == {} + + +@pytest.mark.parametrize("task", ["yolo", "yolact", "image_classification"]) +def test_bucketing_sequence_length_for_cv(task): + with pytest.raises(ValueError, match=f"for non-nlp task {task}"): + EndpointConfig( + task=task, model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) + ).to_pipeline_config() + + +@pytest.mark.parametrize( + "task", ["question_answering", "text_classification", "token_classification"] +) +def test_bucketing_image_size_for_nlp(task): + with pytest.raises(ValueError, match=f"for non computer vision task {task}"): + EndpointConfig( + task=task, model="", bucketing=ImageSizesConfig(image_sizes=[]) + ).to_pipeline_config() + + +def test_bucketing_zero_sequence_length(): + with pytest.raises(ValueError, match="at least one sequence length"): + EndpointConfig( + task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[]) + ).to_pipeline_config() + + +def test_bucketing_zero_image_size(): + with pytest.raises(ValueError, match="at least one image size"): + EndpointConfig( + task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[]) + ).to_pipeline_config() + + +def test_bucketing_one_sequence_length(): + cfg = EndpointConfig( + task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32]) + ).to_pipeline_config() + assert cfg.input_shapes is None + assert cfg.kwargs == {"sequence_length": 32} + + +def test_bucketing_multi_sequence_length(): + cfg = EndpointConfig( + task="qa", model="", bucketing=SequenceLengthsConfig(sequence_lengths=[32, 64]) + ).to_pipeline_config() + assert cfg.input_shapes is None + assert cfg.kwargs == {"sequence_length": [32, 64]} + + +def test_bucketing_one_image_size(): + cfg = EndpointConfig( + task="yolo", model="", bucketing=ImageSizesConfig(image_sizes=[(256, 256)]) + ).to_pipeline_config() + assert cfg.input_shapes == [[256, 256]] + assert cfg.kwargs == {} + + +def test_endpoint_config_to_pipeline_copy_fields(): + cfg = EndpointConfig(task="qa", model="zxcv").to_pipeline_config() + assert cfg.task == "qa" + assert cfg.model_path == "zxcv" + + cfg = EndpointConfig(task="", model="").to_pipeline_config() + assert cfg.batch_size == 1 + + cfg = EndpointConfig(task="", model="", batch_size=64).to_pipeline_config() + assert cfg.batch_size == 64 + + +def test_yaml_load_config(tmp_path): + server_config = ServerConfig( + num_cores=1, + num_workers=2, + integration="sagemaker", + endpoints=[ + EndpointConfig( + name="asdf", + route="qwer", + task="uiop", + model="hjkl", + batch_size=1, + bucketing=None, + ), + EndpointConfig( + name="asdfd", + route="qwer", + task="uiop", + model="hjkl", + batch_size=2, + bucketing=ImageSizesConfig(image_sizes=[(1, 1), (2, 2)]), + ), + EndpointConfig( + name="asdfde", + route="qwer", + task="uiop", + model="hjkl", + batch_size=3, + bucketing=SequenceLengthsConfig(sequence_lengths=[5, 6, 7]), + ), + ], + loggers={}, + ) + + path = tmp_path / "config.yaml" + with open(path, "w") as fp: + yaml.dump(server_config.dict(), fp) + + with open(path) as fp: + obj = yaml.load(fp, Loader=yaml.Loader) + server_config2 = ServerConfig(**obj) + assert server_config == server_config2 + + +metric_function_config_yaml_1 = """ + func: identity + frequency: 5 + loggers: + - python""" + +metric_function_config_yaml_2 = """ + func: numpy.max""" + +metric_function_config_yaml_3 = """ + func: numpy.max + frequency: 0""" + + +@pytest.mark.parametrize( + "config_yaml, should_fail, instance_type", + [ + (metric_function_config_yaml_1, False, MetricFunctionConfig), + (metric_function_config_yaml_2, False, MetricFunctionConfig), + ( + metric_function_config_yaml_3, + True, + MetricFunctionConfig, + ), # frequency cannot be zero + ], +) +def test_function_logging_config(config_yaml, should_fail, instance_type): + obj = yaml.safe_load(config_yaml) + if should_fail: + with pytest.raises(Exception): + MetricFunctionConfig(**obj) + else: + assert MetricFunctionConfig(**obj) + + +def _create_server_config(task_name, endpoint_1_name, endpoint_2_name): + return ServerConfig( + endpoints=[ + EndpointConfig( + name=endpoint_1_name, + task=task_name, + model="hjkl", + ), + EndpointConfig( + name=endpoint_2_name, + task=task_name, + model="hjkl", + ), + ] + ) + + +@pytest.mark.parametrize( + "task_name, endpoint_1_name, endpoint_2_name, raise_error, expected_endpoint_1_name, expected_endpoint_2_name", # noqa: E501 + [ + ("some_task", None, None, False, "some_task-0", "some_task-1"), + ("some_task", "name_1", None, False, "name_1", "some_task-0"), + ("some_task", "name_1", "name_2", False, "name_1", "name_2"), + ("some_task", "name_1", "name_1", True, None, None), + ], +) +def test_unique_endpoint_names( + task_name, + endpoint_1_name, + endpoint_2_name, + raise_error, + expected_endpoint_1_name, + expected_endpoint_2_name, +): + if raise_error: + with pytest.raises(ValueError): + _create_server_config(task_name, endpoint_1_name, endpoint_2_name) + return + return + + server_config = _create_server_config(task_name, endpoint_1_name, endpoint_2_name) + assert server_config.endpoints[0].name == expected_endpoint_1_name + assert server_config.endpoints[1].name == expected_endpoint_2_name diff --git a/tests/server/test_endpoints.py b/tests/server/test_endpoints.py index 411fb46446..f028b37e75 100644 --- a/tests/server/test_endpoints.py +++ b/tests/server/test_endpoints.py @@ -1,268 +1,268 @@ -# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, -# # software distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# -# from typing import List -# from unittest.mock import Mock -# -# from pydantic import BaseModel -# -# import pytest -# from deepsparse.loggers import MultiLogger -# from deepsparse.server.config import EndpointConfig, ServerConfig, SystemLoggingConfig -# from deepsparse.server.server import _add_pipeline_endpoint, _build_app -# from fastapi import FastAPI, UploadFile -# from fastapi.testclient import TestClient -# from tests.utils import mock_engine -# -# -# class FromFilesSchema(BaseModel): -# def from_files(self, f): -# # do nothing - this method exists just to test files endpoint logic -# ... -# -# -# class StrSchema(BaseModel): -# value: str -# -# -# def parse(v: StrSchema) -> int: -# return int(v.value) -# -# -# class TestStatusEndpoints: -# @pytest.fixture(scope="class") -# def server_config(self): -# server_config = ServerConfig( -# num_cores=1, num_workers=1, endpoints=[], loggers={} -# ) -# yield server_config -# -# @pytest.fixture(scope="class") -# def client(self, server_config): -# yield TestClient(_build_app(server_config)) -# -# def test_config(self, server_config, client): -# response = client.get("/config") -# loaded = ServerConfig(**response.json()) -# assert loaded == server_config -# -# @pytest.mark.parametrize("route", ["/ping", "/health", "/healthcheck", "/status"]) -# def test_pings_exist(self, client, route): -# response = client.get(route) -# assert response.status_code == 200 -# assert response.json() is True -# -# def test_docs_exist(self, client): -# assert client.get("/docs").status_code == 200 -# -# def test_home_redirects_to_docs(self, client): -# response = client.get("/") -# assert response.status_code == 200 -# assert response.request.path_url == "/docs" -# assert len(response.history) > 0 -# assert response.history[-1].is_redirect -# -# -# class TestMockEndpoints: -# @pytest.fixture(scope="class") -# def server_config(self): -# server_config = ServerConfig( -# num_cores=1, num_workers=1, endpoints=[], loggers={} -# ) -# yield server_config -# -# @pytest.fixture(scope="class") -# def app(self, server_config): -# yield _build_app(server_config) -# -# @pytest.fixture(scope="class") -# def client(self, app): -# yield TestClient(app) -# -# def test_add_model_endpoint(self, app: FastAPI, client: TestClient): -# mock_pipeline = Mock( -# side_effect=parse, -# input_schema=StrSchema, -# output_schema=int, -# logger=MultiLogger([]), -# ) -# _add_pipeline_endpoint( -# app, -# system_logging_config=SystemLoggingConfig(), -# endpoint_config=Mock(route="/predict/parse_int"), -# pipeline=mock_pipeline, -# ) -# assert app.routes[-1].path == "/predict/parse_int" -# assert app.routes[-1].response_model is int -# assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} -# assert app.routes[-1].methods == {"POST"} -# -# for v in ["1234", "5678"]: -# response = client.post("/predict/parse_int", json=dict(value=v)) -# assert response.status_code == 200 -# assert response.json() == int(v) -# -# def test_add_model_endpoint_with_from_files(self, app): -# _add_pipeline_endpoint( -# app, -# system_logging_config=Mock(), -# endpoint_config=Mock(route="/predict/parse_int"), -# pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), -# ) -# assert app.routes[-2].path == "/predict/parse_int" -# assert app.routes[-2].endpoint.__annotations__ == {"request": FromFilesSchema} -# assert app.routes[-1].path == "/predict/parse_int/from_files" -# assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} -# assert app.routes[-1].response_model is int -# assert app.routes[-1].methods == {"POST"} -# -# def test_sagemaker_only_adds_one_endpoint(self, app): -# num_routes = len(app.routes) -# _add_pipeline_endpoint( -# app, -# endpoint_config=Mock(route="/predict/parse_int"), -# system_logging_config=Mock(), -# pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), -# integration="sagemaker", -# ) -# assert len(app.routes) == num_routes + 1 -# assert app.routes[-1].path == "/invocations" -# assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} -# -# num_routes = len(app.routes) -# _add_pipeline_endpoint( -# app, -# endpoint_config=Mock(route="/predict/parse_int"), -# system_logging_config=Mock(), -# pipeline=Mock(input_schema=StrSchema, output_schema=int), -# integration="sagemaker", -# ) -# assert len(app.routes) == num_routes + 1 -# assert app.routes[-1].path == "/invocations" -# assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} -# -# def test_add_endpoint_with_no_route_specified(self, app): -# _add_pipeline_endpoint( -# app, -# endpoint_config=Mock(route=None), -# system_logging_config=Mock(), -# pipeline=Mock(input_schema=StrSchema, output_schema=int), -# ) -# assert app.routes[-1].path == "/predict" -# -# -# class TestActualModelEndpoints: -# @pytest.fixture(scope="class") -# def client(self): -# stub = ( -# "zoo:nlp/text_classification/distilbert-none/" -# "pytorch/huggingface/qqp/pruned80_quant-none-vnni" -# ) -# server_config = ServerConfig( -# num_cores=1, -# num_workers=1, -# endpoints=[ -# EndpointConfig( -# route="/predict/dynamic-batch", -# task="text-classification", -# model=stub, -# batch_size=1, -# ), -# EndpointConfig( -# route="/predict/static-batch", -# task="text-classification", -# model=stub, -# batch_size=2, -# ), -# ], -# loggers={}, # do not instantiate any loggers -# ) -# with mock_engine(rng_seed=0): -# app = _build_app(server_config) -# yield TestClient(app) -# -# def test_static_batch_errors_on_wrong_batch_size(self, client): -# with pytest.raises( -# RuntimeError, -# match=( -# "batch size of 1 passed into pipeline is " -# "not divisible by model batch size of 2" -# ), -# ): -# client.post("/predict/static-batch", json={"sequences": "today is great"}) -# -# def test_static_batch_good_request(self, client): -# response = client.post( -# "/predict/static-batch", -# json={"sequences": ["today is great", "today is terrible"]}, -# ) -# assert response.status_code == 200 -# output = response.json() -# assert len(output["labels"]) == 2 -# assert len(output["scores"]) == 2 -# -# @pytest.mark.parametrize( -# "seqs", -# [ -# ["today is great"], -# ["today is great", "today is terrible"], -# ["the first sentence", "the second sentence", "the third sentence"], -# ], -# ) -# def test_dynamic_batch_any(self, client, seqs): -# response = client.post("/predict/dynamic-batch", json={"sequences": seqs}) -# assert response.status_code == 200 -# output = response.json() -# assert len(output["labels"]) == len(seqs) -# assert len(output["scores"]) == len(seqs) -# -# -# class TestDynamicEndpoints: -# @pytest.fixture(scope="class") -# def client(self): -# server_config = ServerConfig( -# num_cores=1, num_workers=1, endpoints=[], loggers=None -# ) -# with mock_engine(rng_seed=0): -# app = _build_app(server_config) -# yield TestClient(app) -# -# -# @mock_engine(rng_seed=0) -# def test_dynamic_add_and_remove_endpoint(engine_mock): -# server_config = ServerConfig(num_cores=1, num_workers=1, endpoints=[], loggers={}) -# app = _build_app(server_config) -# client = TestClient(app) -# -# # assert /predict doesn't exist -# assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code -# -# # add /predict -# response = client.post( -# "/endpoints", -# json=EndpointConfig(task="text-classification", model="default").dict(), -# ) -# assert response.status_code == 200 -# response = client.post("/predict", json=dict(sequences="asdf")) -# assert response.status_code == 200 -# -# # remove /predict -# response = client.delete( -# "/endpoints", -# json=EndpointConfig( -# route="/predict", task="text-classification", model="default" -# ).dict(), -# ) -# assert response.status_code == 200 -# assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List +from unittest.mock import Mock + +from pydantic import BaseModel + +import pytest +from deepsparse.loggers import MultiLogger +from deepsparse.server.config import EndpointConfig, ServerConfig, SystemLoggingConfig +from deepsparse.server.server import _add_pipeline_endpoint, _build_app +from fastapi import FastAPI, UploadFile +from fastapi.testclient import TestClient +from tests.utils import mock_engine + + +class FromFilesSchema(BaseModel): + def from_files(self, f): + # do nothing - this method exists just to test files endpoint logic + ... + + +class StrSchema(BaseModel): + value: str + + +def parse(v: StrSchema) -> int: + return int(v.value) + + +class TestStatusEndpoints: + @pytest.fixture(scope="class") + def server_config(self): + server_config = ServerConfig( + num_cores=1, num_workers=1, endpoints=[], loggers={} + ) + yield server_config + + @pytest.fixture(scope="class") + def client(self, server_config): + yield TestClient(_build_app(server_config)) + + def test_config(self, server_config, client): + response = client.get("/config") + loaded = ServerConfig(**response.json()) + assert loaded == server_config + + @pytest.mark.parametrize("route", ["/ping", "/health", "/healthcheck", "/status"]) + def test_pings_exist(self, client, route): + response = client.get(route) + assert response.status_code == 200 + assert response.json() is True + + def test_docs_exist(self, client): + assert client.get("/docs").status_code == 200 + + def test_home_redirects_to_docs(self, client): + response = client.get("/") + assert response.status_code == 200 + assert response.request.path_url == "/docs" + assert len(response.history) > 0 + assert response.history[-1].is_redirect + + +class TestMockEndpoints: + @pytest.fixture(scope="class") + def server_config(self): + server_config = ServerConfig( + num_cores=1, num_workers=1, endpoints=[], loggers={} + ) + yield server_config + + @pytest.fixture(scope="class") + def app(self, server_config): + yield _build_app(server_config) + + @pytest.fixture(scope="class") + def client(self, app): + yield TestClient(app) + + def test_add_model_endpoint(self, app: FastAPI, client: TestClient): + mock_pipeline = Mock( + side_effect=parse, + input_schema=StrSchema, + output_schema=int, + logger=MultiLogger([]), + ) + _add_pipeline_endpoint( + app, + system_logging_config=SystemLoggingConfig(), + endpoint_config=Mock(route="/predict/parse_int"), + pipeline=mock_pipeline, + ) + assert app.routes[-1].path == "/predict/parse_int" + assert app.routes[-1].response_model is int + assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} + assert app.routes[-1].methods == {"POST"} + + for v in ["1234", "5678"]: + response = client.post("/predict/parse_int", json=dict(value=v)) + assert response.status_code == 200 + assert response.json() == int(v) + + def test_add_model_endpoint_with_from_files(self, app): + _add_pipeline_endpoint( + app, + system_logging_config=Mock(), + endpoint_config=Mock(route="/predict/parse_int"), + pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), + ) + assert app.routes[-2].path == "/predict/parse_int" + assert app.routes[-2].endpoint.__annotations__ == {"request": FromFilesSchema} + assert app.routes[-1].path == "/predict/parse_int/from_files" + assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} + assert app.routes[-1].response_model is int + assert app.routes[-1].methods == {"POST"} + + def test_sagemaker_only_adds_one_endpoint(self, app): + num_routes = len(app.routes) + _add_pipeline_endpoint( + app, + endpoint_config=Mock(route="/predict/parse_int"), + system_logging_config=Mock(), + pipeline=Mock(input_schema=FromFilesSchema, output_schema=int), + integration="sagemaker", + ) + assert len(app.routes) == num_routes + 1 + assert app.routes[-1].path == "/invocations" + assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]} + + num_routes = len(app.routes) + _add_pipeline_endpoint( + app, + endpoint_config=Mock(route="/predict/parse_int"), + system_logging_config=Mock(), + pipeline=Mock(input_schema=StrSchema, output_schema=int), + integration="sagemaker", + ) + assert len(app.routes) == num_routes + 1 + assert app.routes[-1].path == "/invocations" + assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema} + + def test_add_endpoint_with_no_route_specified(self, app): + _add_pipeline_endpoint( + app, + endpoint_config=Mock(route=None), + system_logging_config=Mock(), + pipeline=Mock(input_schema=StrSchema, output_schema=int), + ) + assert app.routes[-1].path == "/predict" + + +class TestActualModelEndpoints: + @pytest.fixture(scope="class") + def client(self): + stub = ( + "zoo:nlp/text_classification/distilbert-none/" + "pytorch/huggingface/qqp/pruned80_quant-none-vnni" + ) + server_config = ServerConfig( + num_cores=1, + num_workers=1, + endpoints=[ + EndpointConfig( + route="/predict/dynamic-batch", + task="text-classification", + model=stub, + batch_size=1, + ), + EndpointConfig( + route="/predict/static-batch", + task="text-classification", + model=stub, + batch_size=2, + ), + ], + loggers={}, # do not instantiate any loggers + ) + with mock_engine(rng_seed=0): + app = _build_app(server_config) + yield TestClient(app) + + def test_static_batch_errors_on_wrong_batch_size(self, client): + with pytest.raises( + RuntimeError, + match=( + "batch size of 1 passed into pipeline is " + "not divisible by model batch size of 2" + ), + ): + client.post("/predict/static-batch", json={"sequences": "today is great"}) + + def test_static_batch_good_request(self, client): + response = client.post( + "/predict/static-batch", + json={"sequences": ["today is great", "today is terrible"]}, + ) + assert response.status_code == 200 + output = response.json() + assert len(output["labels"]) == 2 + assert len(output["scores"]) == 2 + + @pytest.mark.parametrize( + "seqs", + [ + ["today is great"], + ["today is great", "today is terrible"], + ["the first sentence", "the second sentence", "the third sentence"], + ], + ) + def test_dynamic_batch_any(self, client, seqs): + response = client.post("/predict/dynamic-batch", json={"sequences": seqs}) + assert response.status_code == 200 + output = response.json() + assert len(output["labels"]) == len(seqs) + assert len(output["scores"]) == len(seqs) + + +class TestDynamicEndpoints: + @pytest.fixture(scope="class") + def client(self): + server_config = ServerConfig( + num_cores=1, num_workers=1, endpoints=[], loggers=None + ) + with mock_engine(rng_seed=0): + app = _build_app(server_config) + yield TestClient(app) + + +@mock_engine(rng_seed=0) +def test_dynamic_add_and_remove_endpoint(engine_mock): + server_config = ServerConfig(num_cores=1, num_workers=1, endpoints=[], loggers={}) + app = _build_app(server_config) + client = TestClient(app) + + # assert /predict doesn't exist + assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code + + # add /predict + response = client.post( + "/endpoints", + json=EndpointConfig(task="text-classification", model="default").dict(), + ) + assert response.status_code == 200 + response = client.post("/predict", json=dict(sequences="asdf")) + assert response.status_code == 200 + + # remove /predict + response = client.delete( + "/endpoints", + json=EndpointConfig( + route="/predict", task="text-classification", model="default" + ).dict(), + ) + assert response.status_code == 200 + assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code diff --git a/tests/server/test_loggers.py b/tests/server/test_loggers.py index 8802835381..ce2576c09f 100644 --- a/tests/server/test_loggers.py +++ b/tests/server/test_loggers.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os + import shutil from collections import Counter from unittest import mock @@ -58,247 +58,247 @@ def test_default_logger(): "deepsparse.server.server.server_logger_from_config", return_value=server_logger ), mock_engine(rng_seed=0): app = _build_app(server_config) - # client = TestClient(app) - # - # for _ in range(2): - # client.post("/predict", json={"sequences": "today is great"}) - # assert isinstance(fetch_leaf_logger(server_logger), PythonLogger) + client = TestClient(app) + for _ in range(2): + client.post("/predict", json={"sequences": "today is great"}) + assert isinstance(fetch_leaf_logger(server_logger), PythonLogger) -# def test_data_logging_from_predefined(): -# server_config = ServerConfig( -# endpoints=[ -# EndpointConfig( -# task=task, -# name="text_classification", -# model=stub, -# add_predefined=[MetricFunctionConfig(func="text_classification")], -# ) -# ], -# loggers={"logger_1": {"path": logger_identifier}}, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# client.post( -# "/predict", -# json={ -# "sequences": [["Fun for adults and children.", "Fun for only children."]] -# }, -# ) -# calls = fetch_leaf_logger(server_logger).calls -# data_logging_logs = [call for call in calls if "DATA" in call] -# with open( -# "tests/deepsparse/loggers/metric_functions/predefined/predefined_logs/text_classification.txt", # noqa E501 -# "r", -# ) as f: -# expected_logs = f.read().splitlines() -# for log, expected_log in zip(data_logging_logs, expected_logs): -# assert log == expected_log -# -# -# @flaky(max_runs=4, min_passes=3) -# def test_logging_only_system_info(): -# server_config = ServerConfig( -# endpoints=[EndpointConfig(task=task, name=name, model=stub)], -# loggers={"logger_1": {"path": logger_identifier}}, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# -# for _ in range(2): -# client.post("/predict", json={"sequences": "today is great"}) -# _test_logger_contents( -# fetch_leaf_logger(server_logger), -# {"prediction_latency": 8}, -# ) -# -# -# def test_regex_target_logging(): -# server_config = ServerConfig( -# endpoints=[ -# EndpointConfig( -# task=task, -# name=name, -# data_logging={ -# "re:.*pipeline*.": [MetricFunctionConfig(func="identity")] -# }, -# model=stub, -# ) -# ], -# loggers={"logger_1": {"path": logger_identifier}}, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# -# for _ in range(2): -# client.post("/predict", json={"sequences": "today is great"}) -# _test_logger_contents( -# fetch_leaf_logger(server_logger), -# {"pipeline_inputs__identity": 2, "pipeline_outputs__identity": 2}, -# ) -# -# -# def test_multiple_targets_logging(): -# server_config = ServerConfig( -# endpoints=[ -# EndpointConfig( -# task=task, -# name=name, -# data_logging={ -# "pipeline_inputs.sequences": [ -# MetricFunctionConfig(func="identity") -# ], -# "engine_inputs": [MetricFunctionConfig(func="identity")], -# }, -# model=stub, -# ) -# ], -# loggers={"logger_1": {"path": logger_identifier}}, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# -# for _ in range(2): -# client.post("/predict", json={"sequences": "today is great"}) -# _test_logger_contents( -# fetch_leaf_logger(server_logger), -# { -# "pipeline_inputs.sequences__identity": 2, -# "engine_inputs__identity": 2, -# "prediction_latency": 8, -# }, -# ) -# -# -# @flaky(max_runs=3, min_passes=2) -# def test_function_metric_with_target_loggers(): -# server_config = ServerConfig( -# endpoints=[ -# EndpointConfig( -# task=task, -# name=name, -# data_logging={ -# "pipeline_inputs.sequences[0]": [ -# MetricFunctionConfig( -# func="identity", target_loggers=["logger_1"] -# ) -# ], -# "engine_inputs": [MetricFunctionConfig(func="identity")], -# }, -# model=stub, -# ) -# ], -# loggers={ -# "logger_1": {"path": logger_identifier}, -# "logger_2": {"path": logger_identifier}, -# }, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# -# for _ in range(2): -# client.post("/predict", json={"sequences": "today is great"}) -# -# _test_logger_contents( -# server_logger.logger.loggers[1].logger.loggers[0], -# { -# "pipeline_inputs.sequences__identity": 2, -# "engine_inputs__identity": 2, -# "prediction_latency": 8, -# }, -# ) -# _test_logger_contents( -# server_logger.logger.loggers[1].logger.loggers[1], -# { -# "pipeline_inputs.sequences__identity": 0, -# "engine_inputs__identity": 2, -# "prediction_latency": 8, -# }, -# ) -# -# -# @mock_engine(rng_seed=0) -# def test_instantiate_prometheus(mock_engine, tmp_path): -# client = TestClient( -# _build_app( -# ServerConfig( -# endpoints=[EndpointConfig(task="text_classification", model="default")], -# loggers=dict( -# prometheus={ -# "port": find_free_port(), -# "text_log_save_dir": tmp_path.name, -# "text_log_save_frequency": 30, -# } -# ), -# ) -# ) -# ) -# r = client.post("/predict", json=dict(sequences="asdf")) -# assert r.status_code == 200 -# shutil.rmtree(tmp_path.name, ignore_errors=True) -# -# -# @mock_engine(rng_seed=0) -# def test_endpoint_system_logging(mock_engine): -# server_config = ServerConfig( -# system_logging=ServerSystemLoggingConfig( -# request_details=SystemLoggingGroup(enable=True), -# resource_utilization=SystemLoggingGroup(enable=True), -# ), -# endpoints=[ -# EndpointConfig( -# task="text_classification", -# model="default", -# route="/predict_text_classification", -# logging_config=PipelineSystemLoggingConfig( -# inference_details=SystemLoggingGroup(enable=True), -# prediction_latency=SystemLoggingGroup(enable=True), -# ), -# ), -# EndpointConfig( -# task="question_answering", -# model="default", -# route="/predict_question_answering", -# logging_config=PipelineSystemLoggingConfig( -# inference_details=SystemLoggingGroup(enable=True), -# prediction_latency=SystemLoggingGroup(enable=True), -# ), -# ), -# ], -# loggers={"logger_1": {"path": logger_identifier}}, -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine: -# app = _build_app(server_config) -# client = TestClient(app) -# client.post("/predict_text_classification", json=dict(sequences="asdf")) -# client.post( -# "/predict_text_classification", json=dict(question="asdf", context="asdf") -# ) -# calls = server_logger.logger.loggers[0].logger.loggers[0].calls -# -# c = Counter([call.split(",")[0] for call in calls]) -# -# assert c == SAMPLE_LOGS_DICT + +def test_data_logging_from_predefined(): + server_config = ServerConfig( + endpoints=[ + EndpointConfig( + task=task, + name="text_classification", + model=stub, + add_predefined=[MetricFunctionConfig(func="text_classification")], + ) + ], + loggers={"logger_1": {"path": logger_identifier}}, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + client.post( + "/predict", + json={ + "sequences": [["Fun for adults and children.", "Fun for only children."]] + }, + ) + calls = fetch_leaf_logger(server_logger).calls + data_logging_logs = [call for call in calls if "DATA" in call] + with open( + "tests/deepsparse/loggers/metric_functions/predefined/predefined_logs/text_classification.txt", # noqa E501 + "r", + ) as f: + expected_logs = f.read().splitlines() + for log, expected_log in zip(data_logging_logs, expected_logs): + assert log == expected_log + + +@flaky(max_runs=4, min_passes=3) +def test_logging_only_system_info(): + server_config = ServerConfig( + endpoints=[EndpointConfig(task=task, name=name, model=stub)], + loggers={"logger_1": {"path": logger_identifier}}, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + + for _ in range(2): + client.post("/predict", json={"sequences": "today is great"}) + _test_logger_contents( + fetch_leaf_logger(server_logger), + {"prediction_latency": 8}, + ) + + +def test_regex_target_logging(): + server_config = ServerConfig( + endpoints=[ + EndpointConfig( + task=task, + name=name, + data_logging={ + "re:.*pipeline*.": [MetricFunctionConfig(func="identity")] + }, + model=stub, + ) + ], + loggers={"logger_1": {"path": logger_identifier}}, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + + for _ in range(2): + client.post("/predict", json={"sequences": "today is great"}) + _test_logger_contents( + fetch_leaf_logger(server_logger), + {"pipeline_inputs__identity": 2, "pipeline_outputs__identity": 2}, + ) + + +def test_multiple_targets_logging(): + server_config = ServerConfig( + endpoints=[ + EndpointConfig( + task=task, + name=name, + data_logging={ + "pipeline_inputs.sequences": [ + MetricFunctionConfig(func="identity") + ], + "engine_inputs": [MetricFunctionConfig(func="identity")], + }, + model=stub, + ) + ], + loggers={"logger_1": {"path": logger_identifier}}, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + + for _ in range(2): + client.post("/predict", json={"sequences": "today is great"}) + _test_logger_contents( + fetch_leaf_logger(server_logger), + { + "pipeline_inputs.sequences__identity": 2, + "engine_inputs__identity": 2, + "prediction_latency": 8, + }, + ) + + +@flaky(max_runs=3, min_passes=2) +def test_function_metric_with_target_loggers(): + server_config = ServerConfig( + endpoints=[ + EndpointConfig( + task=task, + name=name, + data_logging={ + "pipeline_inputs.sequences[0]": [ + MetricFunctionConfig( + func="identity", target_loggers=["logger_1"] + ) + ], + "engine_inputs": [MetricFunctionConfig(func="identity")], + }, + model=stub, + ) + ], + loggers={ + "logger_1": {"path": logger_identifier}, + "logger_2": {"path": logger_identifier}, + }, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + + for _ in range(2): + client.post("/predict", json={"sequences": "today is great"}) + + _test_logger_contents( + server_logger.logger.loggers[1].logger.loggers[0], + { + "pipeline_inputs.sequences__identity": 2, + "engine_inputs__identity": 2, + "prediction_latency": 8, + }, + ) + _test_logger_contents( + server_logger.logger.loggers[1].logger.loggers[1], + { + "pipeline_inputs.sequences__identity": 0, + "engine_inputs__identity": 2, + "prediction_latency": 8, + }, + ) + + +@mock_engine(rng_seed=0) +def test_instantiate_prometheus(mock_engine, tmp_path): + client = TestClient( + _build_app( + ServerConfig( + endpoints=[EndpointConfig(task="text_classification", model="default")], + loggers=dict( + prometheus={ + "port": find_free_port(), + "text_log_save_dir": tmp_path.name, + "text_log_save_frequency": 30, + } + ), + ) + ) + ) + r = client.post("/predict", json=dict(sequences="asdf")) + assert r.status_code == 200 + shutil.rmtree(tmp_path.name, ignore_errors=True) + + +@mock_engine(rng_seed=0) +def test_endpoint_system_logging(mock_engine): + server_config = ServerConfig( + system_logging=ServerSystemLoggingConfig( + request_details=SystemLoggingGroup(enable=True), + resource_utilization=SystemLoggingGroup(enable=True), + ), + endpoints=[ + EndpointConfig( + task="text_classification", + model="default", + route="/predict_text_classification", + logging_config=PipelineSystemLoggingConfig( + inference_details=SystemLoggingGroup(enable=True), + prediction_latency=SystemLoggingGroup(enable=True), + ), + ), + EndpointConfig( + task="question_answering", + model="default", + route="/predict_question_answering", + logging_config=PipelineSystemLoggingConfig( + inference_details=SystemLoggingGroup(enable=True), + prediction_latency=SystemLoggingGroup(enable=True), + ), + ), + ], + loggers={"logger_1": {"path": logger_identifier}}, + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine: + app = _build_app(server_config) + client = TestClient(app) + client.post("/predict_text_classification", json=dict(sequences="asdf")) + client.post( + "/predict_text_classification", json=dict(question="asdf", context="asdf") + ) + calls = server_logger.logger.loggers[0].logger.loggers[0].calls + + c = Counter([call.split(",")[0] for call in calls]) + + assert c == SAMPLE_LOGS_DICT diff --git a/tests/server/test_system_logging.py b/tests/server/test_system_logging.py index bd0a8a3ae3..b6a3fbd2b6 100644 --- a/tests/server/test_system_logging.py +++ b/tests/server/test_system_logging.py @@ -1,169 +1,169 @@ -# # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, -# # software distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# -# from unittest import mock -# -# import pytest -# from deepsparse.loggers.config import SystemLoggingGroup -# from deepsparse.server.config import ( -# EndpointConfig, -# ServerConfig, -# ServerSystemLoggingConfig, -# ) -# from deepsparse.server.helpers import server_logger_from_config -# from deepsparse.server.server import _build_app -# from deepsparse.server.system_logging import log_resource_utilization -# from fastapi.testclient import TestClient -# from tests.deepsparse.loggers.helpers import ListLogger -# from tests.utils import mock_engine -# -# -# logger_identifier = "tests/deepsparse/loggers/helpers.py:ListLogger" -# stub = "zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/qqp/pruned80_quant-none-vnni" # noqa E501 -# task = "text-classification" -# name = "endpoint_name" -# -# -# def _test_successful_requests(calls, successful_request): -# relevant_call = [ -# call -# for call in calls -# if call.startswith("identifier:request_details/successful_request_count") -# ] -# assert len(relevant_call) == 1 -# relevant_call = relevant_call[0] -# value = bool(int(relevant_call.split("value:")[1].split(",")[0])) -# assert value == successful_request -# -# -# def _test_response_msg(calls, response_msg): -# relevant_call = [ -# call -# for call in calls -# if call.startswith("identifier:request_details/response_message") -# ] -# assert len(relevant_call) == 1 -# relevant_call = relevant_call[0] -# value = relevant_call.split("value:")[1].split(",")[0] -# assert value == response_msg -# -# -# @pytest.mark.parametrize( -# "json_payload, input_batch_size, successful_request, response_msg", -# [ -# ({"sequences": "today is great"}, 1, True, "Response status code: 200"), -# ( -# {"sequences": ["today is great", "today is great"]}, -# 2, -# True, -# "Response status code: 200", -# ), -# ({"this": "is supposed to fail"}, 1, False, "Response status code: 422"), -# ], -# ) -# def test_log_request_details( -# json_payload, input_batch_size, successful_request, response_msg -# ): -# server_config = ServerConfig( -# endpoints=[ -# EndpointConfig( -# task=task, name=name, model=stub, batch_size=input_batch_size -# ) -# ], -# loggers={"logger_1": {"path": logger_identifier}}, -# system_logging=ServerSystemLoggingConfig( -# request_details=SystemLoggingGroup(enable=True) -# ), -# ) -# server_logger = server_logger_from_config(server_config) -# with mock.patch( -# "deepsparse.server.server.server_logger_from_config", return_value=server_logger -# ), mock_engine(rng_seed=0): -# app = _build_app(server_config) -# client = TestClient(app) -# client.post("/predict", json=json_payload) -# -# calls = server_logger.logger.loggers[0].logger.loggers[0].calls -# -# _test_successful_requests(calls, successful_request) -# _test_response_msg(calls, response_msg) -# -# -# def _test_cpu_utilization(calls, num_iterations): -# relevant_calls = [ -# call -# for call in calls -# if call.startswith("identifier:resource_utilization/cpu_utilization_percent") -# ] -# assert len(relevant_calls) == num_iterations -# -# -# def _test_memory_utilization(calls, num_iterations): -# relevant_calls = [ -# call -# for call in calls -# if call.startswith("identifier:resource_utilization/memory_utilization_percent") -# ] -# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] -# assert len(relevant_calls) == num_iterations -# # memory utilization is a percentage, so it should be between 0 and 100 -# assert all(0.0 < value < 100.0 for value in values) -# -# -# def _test_total_memory_available(calls, num_iterations): -# relevant_calls = [ -# call -# for call in calls -# if call.startswith( -# "identifier:resource_utilization/total_memory_available_bytes" -# ) -# ] -# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] -# assert len(relevant_calls) == num_iterations -# # assert all values are the same (total memory available is constant) -# assert all(value == values[0] for value in values) -# -# -# def _test_additional_items_to_log(calls, num_iterations): -# relevant_calls = [ -# call -# for call in calls -# if call.startswith("identifier:resource_utilization/test") -# ] -# values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] -# assert len(relevant_calls) == num_iterations -# # assert all values are the same ({"test" : 1} is constant) -# assert all(value == 1 for value in values) -# -# -# @pytest.mark.parametrize( -# "num_iterations, additional_items_to_log", -# [ -# (5, {}), -# (3, {"test": 1}), -# ], -# ) -# def test_log_resource_utilization(num_iterations, additional_items_to_log): -# server_logger = ListLogger() -# -# for iter in range(num_iterations): -# log_resource_utilization( -# server_logger, prefix="resource_utilization", **additional_items_to_log -# ) -# -# calls = server_logger.calls -# -# _test_cpu_utilization(calls, num_iterations) -# _test_memory_utilization(calls, num_iterations) -# _test_total_memory_available(calls, num_iterations) +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import pytest +from deepsparse.loggers.config import SystemLoggingGroup +from deepsparse.server.config import ( + EndpointConfig, + ServerConfig, + ServerSystemLoggingConfig, +) +from deepsparse.server.helpers import server_logger_from_config +from deepsparse.server.server import _build_app +from deepsparse.server.system_logging import log_resource_utilization +from fastapi.testclient import TestClient +from tests.deepsparse.loggers.helpers import ListLogger +from tests.utils import mock_engine + + +logger_identifier = "tests/deepsparse/loggers/helpers.py:ListLogger" +stub = "zoo:nlp/text_classification/distilbert-none/pytorch/huggingface/qqp/pruned80_quant-none-vnni" # noqa E501 +task = "text-classification" +name = "endpoint_name" + + +def _test_successful_requests(calls, successful_request): + relevant_call = [ + call + for call in calls + if call.startswith("identifier:request_details/successful_request_count") + ] + assert len(relevant_call) == 1 + relevant_call = relevant_call[0] + value = bool(int(relevant_call.split("value:")[1].split(",")[0])) + assert value == successful_request + + +def _test_response_msg(calls, response_msg): + relevant_call = [ + call + for call in calls + if call.startswith("identifier:request_details/response_message") + ] + assert len(relevant_call) == 1 + relevant_call = relevant_call[0] + value = relevant_call.split("value:")[1].split(",")[0] + assert value == response_msg + + +@pytest.mark.parametrize( + "json_payload, input_batch_size, successful_request, response_msg", + [ + ({"sequences": "today is great"}, 1, True, "Response status code: 200"), + ( + {"sequences": ["today is great", "today is great"]}, + 2, + True, + "Response status code: 200", + ), + ({"this": "is supposed to fail"}, 1, False, "Response status code: 422"), + ], +) +def test_log_request_details( + json_payload, input_batch_size, successful_request, response_msg +): + server_config = ServerConfig( + endpoints=[ + EndpointConfig( + task=task, name=name, model=stub, batch_size=input_batch_size + ) + ], + loggers={"logger_1": {"path": logger_identifier}}, + system_logging=ServerSystemLoggingConfig( + request_details=SystemLoggingGroup(enable=True) + ), + ) + server_logger = server_logger_from_config(server_config) + with mock.patch( + "deepsparse.server.server.server_logger_from_config", return_value=server_logger + ), mock_engine(rng_seed=0): + app = _build_app(server_config) + client = TestClient(app) + client.post("/predict", json=json_payload) + + calls = server_logger.logger.loggers[0].logger.loggers[0].calls + + _test_successful_requests(calls, successful_request) + _test_response_msg(calls, response_msg) + + +def _test_cpu_utilization(calls, num_iterations): + relevant_calls = [ + call + for call in calls + if call.startswith("identifier:resource_utilization/cpu_utilization_percent") + ] + assert len(relevant_calls) == num_iterations + + +def _test_memory_utilization(calls, num_iterations): + relevant_calls = [ + call + for call in calls + if call.startswith("identifier:resource_utilization/memory_utilization_percent") + ] + values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] + assert len(relevant_calls) == num_iterations + # memory utilization is a percentage, so it should be between 0 and 100 + assert all(0.0 < value < 100.0 for value in values) + + +def _test_total_memory_available(calls, num_iterations): + relevant_calls = [ + call + for call in calls + if call.startswith( + "identifier:resource_utilization/total_memory_available_bytes" + ) + ] + values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] + assert len(relevant_calls) == num_iterations + # assert all values are the same (total memory available is constant) + assert all(value == values[0] for value in values) + + +def _test_additional_items_to_log(calls, num_iterations): + relevant_calls = [ + call + for call in calls + if call.startswith("identifier:resource_utilization/test") + ] + values = [float(call.split("value:")[1].split(",")[0]) for call in relevant_calls] + assert len(relevant_calls) == num_iterations + # assert all values are the same ({"test" : 1} is constant) + assert all(value == 1 for value in values) + + +@pytest.mark.parametrize( + "num_iterations, additional_items_to_log", + [ + (5, {}), + (3, {"test": 1}), + ], +) +def test_log_resource_utilization(num_iterations, additional_items_to_log): + server_logger = ListLogger() + + for iter in range(num_iterations): + log_resource_utilization( + server_logger, prefix="resource_utilization", **additional_items_to_log + ) + + calls = server_logger.calls + + _test_cpu_utilization(calls, num_iterations) + _test_memory_utilization(calls, num_iterations) + _test_total_memory_available(calls, num_iterations) From 4ad5f49d4845baa19156c6ea499c854f52c5cc21 Mon Sep 17 00:00:00 2001 From: Damian Date: Tue, 13 Jun 2023 05:34:42 +0000 Subject: [PATCH 09/11] fix the erronous Makefile --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 09082e8c38..50476f2e7c 100644 --- a/Makefile +++ b/Makefile @@ -53,12 +53,12 @@ artifacts: # run tests for the repo test: @echo "Running python tests"; - @SPARSEZOO_TEST_MODE="true" @NM_DISABLE_ANALYTICS="true" pytest tests/ --ignore integrations $(PYTEST_ARGS); + @SPARSEZOO_TEST_MODE="true" NM_DISABLE_ANALYTICS="true" pytest tests/ --ignore integrations $(PYTEST_ARGS); # run integrations tests for the repo test_integrations: @echo "Running package integrations tests"; - @SPARSEZOO_TEST_MODE="true" @NM_DISABLE_ANALYTICS="true" pytest integrations/ --ignore tests $(PYTEST_ARGS); + @SPARSEZOO_TEST_MODE="true" NM_DISABLE_ANALYTICS="true" pytest integrations/ --ignore tests $(PYTEST_ARGS); # create docs docs: From f97467f36b20a16f3cf79c41509e9c94bd4b3686 Mon Sep 17 00:00:00 2001 From: Damian Date: Tue, 13 Jun 2023 06:23:03 +0000 Subject: [PATCH 10/11] perhaps fixed GHA --- src/deepsparse/utils/onnx.py | 3 ++- tests/utils/engine_mocking.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/deepsparse/utils/onnx.py b/src/deepsparse/utils/onnx.py index eb31179bc9..5ce98ce6a0 100644 --- a/src/deepsparse/utils/onnx.py +++ b/src/deepsparse/utils/onnx.py @@ -198,6 +198,7 @@ def generate_random_inputs( return input_data_list +@contextlib.contextmanager def override_onnx_batch_size( onnx_filepath: str, batch_size: int, @@ -233,7 +234,7 @@ def override_onnx_batch_size( f"Overwriting in-place the batch size of the model at {onnx_filepath}" ) save_onnx(model, onnx_filepath) - return onnx_filepath + yield onnx_filepath else: return save_onnx_to_temp_files(model, with_external_data=not inplace) diff --git a/tests/utils/engine_mocking.py b/tests/utils/engine_mocking.py index 4a83c42c00..978971c63d 100644 --- a/tests/utils/engine_mocking.py +++ b/tests/utils/engine_mocking.py @@ -99,7 +99,9 @@ def __init__( # Assumes the first dimension is batch dimension!! # However in general we cannot assume that all outputs have # a batch dimension, that's why we need onnxruntime here. - with override_onnx_batch_size(model_path, batch_size) as batched_model_path: + with override_onnx_batch_size( + model_path, batch_size, inplace=True + ) as batched_model_path: session = ort.InferenceSession(batched_model_path) self.input_descriptors = list(map(_to_descriptor, session.get_inputs())) self.output_descriptors = list(map(_to_descriptor, session.get_outputs())) From 6be8d87f3496ef2d2898c998d7dfa346bc694dcb Mon Sep 17 00:00:00 2001 From: Damian Date: Tue, 13 Jun 2023 06:42:56 +0000 Subject: [PATCH 11/11] take into consideration that GHA creates four files --- tests/conftest.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 62f781f043..7be5656806 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -69,7 +69,12 @@ def check_for_created_files(): end_files_root = _get_files(directory=r".") end_files_temp = _get_files(directory=tempfile.gettempdir()) - assert len(start_files_root) >= len(end_files_root), ( + max_allowed_number_created_files = 4 + # GHA needs to create following files: + # pyproject.toml, CONTRIBUTING.md, LICENSE, setup.cfg + assert len(start_files_root) + max_allowed_number_created_files >= len( + end_files_root + ), ( f"{len(end_files_root) - len(start_files_root)} " f"files created in current working " f"directory during pytest run. "