From f8bab8f375c40fb5f9893b374371cd734444c9e8 Mon Sep 17 00:00:00 2001 From: gs-olive <113141689+gs-olive@users.noreply.github.com> Date: Wed, 7 Jun 2023 15:43:27 -0700 Subject: [PATCH] fix: Repair argument passing in both Dynamo paths - Pass-through new TRT args in export - Pass-through build failures arg in compile - Remove deprecated options including `explicit_batch_dimension` and `explicit_precision` from Dynamo utilities and update references to those options in settings --- py/torch_tensorrt/dynamo/backend/__init__.py | 2 ++ py/torch_tensorrt/dynamo/backend/conversion.py | 1 - .../dynamo/fx_ts_compat/fx2trt.py | 16 ++++------------ py/torch_tensorrt/dynamo/fx_ts_compat/lower.py | 18 ++++++++++-------- .../dynamo/fx_ts_compat/lower_setting.py | 3 --- 5 files changed, 16 insertions(+), 24 deletions(-) diff --git a/py/torch_tensorrt/dynamo/backend/__init__.py b/py/torch_tensorrt/dynamo/backend/__init__.py index 037294965c..44797ba959 100644 --- a/py/torch_tensorrt/dynamo/backend/__init__.py +++ b/py/torch_tensorrt/dynamo/backend/__init__.py @@ -45,6 +45,7 @@ def compile( min_block_size=MIN_BLOCK_SIZE, torch_executed_ops=[], torch_executed_modules=[], + pass_through_build_failures=PASS_THROUGH_BUILD_FAILURES, **kwargs, ): if debug: @@ -86,6 +87,7 @@ def compile( workspace_size=workspace_size, min_block_size=min_block_size, torch_executed_ops=torch_executed_ops, + pass_through_build_failures=pass_through_build_failures, **kwargs, ) diff --git a/py/torch_tensorrt/dynamo/backend/conversion.py b/py/torch_tensorrt/dynamo/backend/conversion.py index f2631f0c87..310b6f86ce 100644 --- a/py/torch_tensorrt/dynamo/backend/conversion.py +++ b/py/torch_tensorrt/dynamo/backend/conversion.py @@ -36,7 +36,6 @@ def convert_module( interpreter = TRTInterpreter( module, InputTensorSpec.from_tensors(inputs), - explicit_batch_dimension=True, logger_level=(trt.Logger.VERBOSE if settings.debug else trt.Logger.WARNING), output_dtypes=output_dtypes, ) diff --git a/py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py b/py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py index 444efc0f4e..bfc8425a95 100644 --- a/py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py +++ b/py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py @@ -38,8 +38,6 @@ def __init__( self, module: torch.fx.GraphModule, input_specs: List[InputTensorSpec], - explicit_batch_dimension: bool = True, - explicit_precision: bool = False, logger_level=None, output_dtypes=None, ): @@ -49,17 +47,11 @@ def __init__( self.builder = trt.Builder(self.logger) flag = 0 - if explicit_batch_dimension: - EXPLICIT_BATCH = 1 << (int)( - trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH - ) - flag |= EXPLICIT_BATCH - if explicit_precision: - EXPLICIT_PRECISION = 1 << (int)( - trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION - ) - flag |= EXPLICIT_PRECISION + # It is deprecated to not use this flag + EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + flag |= EXPLICIT_BATCH + self.network = self.builder.create_network(flag) missing_ops = self.validate_conversion() diff --git a/py/torch_tensorrt/dynamo/fx_ts_compat/lower.py b/py/torch_tensorrt/dynamo/fx_ts_compat/lower.py index 8131edb540..63477d894f 100644 --- a/py/torch_tensorrt/dynamo/fx_ts_compat/lower.py +++ b/py/torch_tensorrt/dynamo/fx_ts_compat/lower.py @@ -49,6 +49,9 @@ def compile( cuda_graph_batch_size=-1, is_aten=False, use_experimental_fx_rt=False, + max_aux_streams=None, + version_compatible=False, + optimization_level=None, num_avg_timing_iters=1, torch_executed_ops=[], torch_executed_modules=[], @@ -68,14 +71,12 @@ def compile( save_timing_cache: Update timing cache with current timing cache data if set to True. cuda_graph_batch_size: Cuda graph batch size, default to be -1. use_experimental_fx_rt: Uses the next generation TRTModule which supports both Python and TorchScript based execution (including in C++). + max_aux_streams: max number of aux stream to use + version_compatible: enable version compatible feature + optimization_level: builder optimization level Returns: A torch.nn.Module lowered by TensorRT. """ - if use_experimental_fx_rt and not explicit_batch_dimension: - raise ValueError( - "The experimental unifed runtime only supports explicit batch. Please make sure to set explicit_batch_dimension=True when use_experimental_fx_rt=True" - ) - logger.warn( "For ir=fx_ts_compat backend only the " + "following arguments are supported: " @@ -123,6 +124,9 @@ def compile( cuda_graph_batch_size=cuda_graph_batch_size, is_aten=is_aten, use_experimental_rt=use_experimental_fx_rt, + max_aux_streams=max_aux_streams, + version_compatible=version_compatible, + optimization_level=optimization_level, ) lowerer = Lowerer.create(lower_setting=lower_setting) return lowerer(module, inputs) @@ -162,8 +166,6 @@ def __call__(self, mod, input, split_name) -> TRTInterpreterResult: interpreter = TRTInterpreter( mod, input_specs=self.lower_setting.input_specs, - explicit_batch_dimension=self.lower_setting.explicit_batch_dimension, - explicit_precision=self.lower_setting.explicit_precision, logger_level=trt.Logger.VERBOSE if self.lower_setting.debug else trt.Logger.WARNING, @@ -198,7 +200,7 @@ def default_split_function( model: fx.GraphModule, inputs: Input, lower_setting: LowerSetting ) -> SplitResult: splitter_setting = TRTSplitterSetting() - splitter_setting.use_implicit_batch_dim = not lower_setting.explicit_batch_dimension + splitter_setting.use_implicit_batch_dim = False splitter_setting.min_block_size = lower_setting.min_block_size splitter_setting.use_experimental_rt = lower_setting.use_experimental_rt splitter = TRTSplitter(model, inputs, settings=splitter_setting) diff --git a/py/torch_tensorrt/dynamo/fx_ts_compat/lower_setting.py b/py/torch_tensorrt/dynamo/fx_ts_compat/lower_setting.py index 64fa1bf267..64a67d1cc2 100644 --- a/py/torch_tensorrt/dynamo/fx_ts_compat/lower_setting.py +++ b/py/torch_tensorrt/dynamo/fx_ts_compat/lower_setting.py @@ -44,7 +44,6 @@ class LowerSetting(LowerSettingBasic): Args: input_specs: Specs for inputs to engine, can either be a single size or a range defined by Min, Optimal, Max sizes. - explicit_precision: Use explicit precision during lowering. workspace_size: The maximum workspace size. The maximum GPU temporary memory which the TensorRT engine can use at execution time. strict_type_constraints: Require TensorRT engine to strictly follow data type @@ -76,8 +75,6 @@ class LowerSetting(LowerSettingBasic): """ input_specs: List[InputTensorSpec] = dc.field(default_factory=list) - explicit_batch_dimension: bool = True - explicit_precision: bool = False workspace_size: int = 0 strict_type_constraints: bool = False customized_fuse_pass: PassManager = dc.field(