fix: Address review comments

gs-olive · gs-olive · commit 54843d392777 · 2023-06-08T21:09:54.000-07:00
- Fix typing issues, add depedencies to `setup.py`, add qualified name
checking for module registry
- Add detailed tutorial descriptions to sample module substitution with
step-by-step detailed instructions for creating a new module
substitution
- Update `custom_op` for new Torch schema
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -258,7 +258,7 @@ commands:
             name: Set up python environment
             command: |
               pip3 install --upgrade pip
-              pip3 install wheel setuptools pyyaml
+              pip3 install wheel setuptools
               pip3 install nvidia-pyindex
               pip3 install tabulate
               pip3 install tensorrt==<< parameters.trt-version-long >> nvidia-cudnn-cu11==<< parameters.cudnn-version-long >>
diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py
@@ -94,7 +94,7 @@ def _find_lib(name, paths):
 
 from torch_tensorrt import fx
 
-if version.parse(torch.__version__) >= version.parse("2.dev"):
+if version.parse(torch.__version__) >= version.parse("2.1.dev"):
     from torch_tensorrt import dynamo
     from torch_tensorrt.dynamo import backend
 
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -69,7 +69,7 @@ def aot_torch_tensorrt_aten_backend(
 
     logger.debug("Pre-module replacement graph:\n" + str(gm.graph))
 
-    # Enable Pre-AOT Lowering for Module-Level Replacement
+    # Perform Pre-AOT Lowering for Module-Level Replacement
     gm = pre_aot_module_replacement(gm)
 
     logger.debug("Post-module replacement graph:\n" + str(gm.graph))
diff --git a/py/torch_tensorrt/dynamo/backend/lowering/_partition.py b/py/torch_tensorrt/dynamo/backend/lowering/_partition.py
@@ -16,7 +16,7 @@
 logger = logging.getLogger(__name__)
 
 DEFAULT_SINGLE_NODE_PARTITIONS: Set[str] = set(
-    "torch.ops." + str(module.new_operator)
+    _get_qualified_name(module.new_operator)
     for module in MODULE_SUBSTITUTION_REGISTRY.values()
 )
 
diff --git a/py/torch_tensorrt/dynamo/backend/lowering/_pre_aot_lowering.py b/py/torch_tensorrt/dynamo/backend/lowering/_pre_aot_lowering.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Any, Callable, Dict
+from typing import Any, Callable, Dict, Type
 import torch
 import logging
 
@@ -23,11 +23,11 @@ class ModuleReplacement:
 
 
 # Dictionary mapping module to ModuleReplacement instance
-MODULE_SUBSTITUTION_REGISTRY: Dict[torch.nn.Module, ModuleReplacement] = dict()
+MODULE_SUBSTITUTION_REGISTRY: Dict[Type[torch.nn.Module], ModuleReplacement] = dict()
 
 
 def module_substitution(
-    module_to_replace: torch.nn.Module,
+    module_to_replace: Type[torch.nn.Module],
     new_operator: torch._ops.OpOverload,
     enabled: bool = True,
 ) -> Callable[[Any], Any]:
@@ -102,6 +102,7 @@ def pre_aot_module_replacement(gm: torch.fx.GraphModule):
                     # Replace all original node uses and clean up graph
                     n.replace_all_uses_with(new_node)
                     gm.graph.eliminate_dead_code()
+                    gm.graph.lint()
                     gm.recompile()
 
                 # A module replacement can fail in the event that the specific instance of the submodule cannot
@@ -115,5 +116,6 @@ def pre_aot_module_replacement(gm: torch.fx.GraphModule):
 
     # Perform cleanup and recompilation before returning module
     gm.graph.eliminate_dead_code()
+    gm.graph.lint()
     gm.recompile()
     return gm
diff --git a/py/torch_tensorrt/dynamo/backend/lowering/module_substitutions/maxpool1d.py b/py/torch_tensorrt/dynamo/backend/lowering/module_substitutions/maxpool1d.py
@@ -1,6 +1,6 @@
 from typing import Dict, Tuple
 import torch
-from torch._custom_op import custom_op
+from torch._custom_op.impl import custom_op
 from torch.fx.node import Argument, Target
 
 from torch_tensorrt.fx.converter_registry import tensorrt_converter
@@ -10,30 +10,94 @@
 from torch_tensorrt.dynamo.backend.lowering import module_substitution
 
 
+# This file serves as an example and a tutorial for excluding custom modules from
+# torch.compile tracing. Each required step is labeled with a number indicating the
+# preferable implementation order.
+
+
+# 1. The Placeholder
+#
+# Specify the schema and namespace of the operator, as well as a placeholder function
+# representing the schema. The schema should be in torch JIT syntax, indicating input and output
+# types. The namespace, such as tensorrt, will cause the op to be registered as torch.ops.tensorrt.your_op
+# Then, create a placeholder function with no operations, but having the same schema and naming as that
+# used in the decorator
 @custom_op(
-    "(Tensor x, int[1] kernel_size, int[1] stride=[], int[1] padding=[], int[1] dilation=[], bool ceil_mode=False) -> Tensor",
-    ns="tensorrt",
+    qualname="tensorrt::maxpool1d",
+    manual_schema="(Tensor x, int[1] kernel_size, int[1] stride, int[1] padding, int[1] dilation, bool ceil_mode) -> Tensor",
 )
-def maxpool1d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False):
+def maxpool1d(x, kernel_size, stride, padding, dilation, ceil_mode):
     # Defines operator schema, name, namespace, and function header
     ...
 
 
+# 2. The Generic Implementation
+#
+# Define the default implementation of the operator in torch syntax. This is used for autograd
+# and other tracing functionality. Generally, the torch.nn.functional analog of the operator to replace
+# is desirable. If the operator to replace is a custom module you've written, then add its Torch
+# implementation here. Note that the function header to the generic function can have specific arguments
+# as in the above placeholder
 @maxpool1d.impl("cpu")
 @maxpool1d.impl("cuda")
 def maxpool1d_generic(
     *args,
     **kwargs,
 ):
-    # Defines a converter implementation for AOT Autograd to use for shape analysis/propagation
+    # Defines an implementation for AOT Autograd to use for shape analysis/propagation
     return torch.nn.functional.max_pool1d(
         *args,
         **kwargs,
     )
 
 
+# 3. The Module Substitution Function
+#
+# Define a function which can intercept a node of the kind to be replaced, extract
+# the relevant data from that node/submodule, and then re-package the information
+# for use by an accelerated implementation (to be implemented in step 4). This function
+# should use the operator defined in step 1 (for example torch.ops.tensorrt.maxpool1d).
+# It should refactor the args and kwargs as is needed by the accelerated implementation.
+#
+# If the submodule has weights or other Tensor fields which the accelerated implementation
+# needs, the function should insert the necessary nodes to access those weights. For example,
+# if the weight Tensor of a submodule is needed, one could write:
+#
+#       weights = gm.graph.get_attr(n.target + ".weight", torch.Tensor)
+#       bias = gm.graph.get_attr(n.target + ".bias", torch.Tensor)
+#       ...
+#       kwargs={"weight": weights,
+#               "bias": bias,
+#               ...
+#
+@module_substitution(torch.nn.MaxPool1d, torch.ops.tensorrt.maxpool1d)
+def maxpool1d_insertion_fn(
+    gm: torch.fx.GraphModule, submodule: torch.nn.Module, node: torch.fx.Node
+) -> torch.fx.Node:
+    # Defines insertion function for new node
+    new_node = gm.graph.call_function(
+        torch.ops.tensorrt.maxpool1d,
+        args=node.args,
+        kwargs={
+            "kernel_size": submodule.kernel_size,
+            "stride": submodule.stride,
+            "padding": submodule.padding,
+            "dilation": submodule.dilation,
+            "ceil_mode": submodule.ceil_mode,
+        },
+    )
+
+    return new_node
+
+
+# 4. The Accelerated Implementation
+#
+# Define an accelerated implementation of the operator, and register it as necessary.
+# This accelerated implementation should consume the args/kwargs specified in step 3.
+# One should expect that torch.compile will compress all kwargs into the args field in
+# the order specified in the schema written in step 1.
 @tensorrt_converter(torch.ops.tensorrt.maxpool1d.default)
-def aten_ops_maxpool1d(
+def tensorrt_maxpool1d(
     network: TRTNetwork,
     target: Target,
     args: Tuple[Argument, ...],
@@ -55,21 +119,8 @@ def aten_ops_maxpool1d(
     )
 
 
-@module_substitution(torch.nn.MaxPool1d, torch.ops.tensorrt.maxpool1d)
-def maxpool1d_insertion_fn(
-    gm: torch.fx.GraphModule, submodule: torch.nn.Module, node: torch.fx.Node
-) -> torch.fx.Node:
-    # Defines insertion function for new node
-    new_node = gm.graph.call_function(
-        torch.ops.tensorrt.maxpool1d,
-        args=node.args,
-        kwargs={
-            "kernel_size": submodule.kernel_size,
-            "stride": submodule.stride,
-            "padding": submodule.padding,
-            "dilation": submodule.dilation,
-            "ceil_mode": submodule.ceil_mode,
-        },
-    )
-
-    return new_node
+# 5. Add Imports
+#
+# Add your accelerated module file to the __init__.py in this directory, to ensure
+# all registrations are run. For instance, if the new module file is called new_mod.py,
+# one should add `from .new_mod import *` to the __init__.py

Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@`
`16`	`16`	`logger = logging.getLogger(__name__)`
`17`	`17`
`18`	`18`	`DEFAULT_SINGLE_NODE_PARTITIONS: Set[str] = set(`
`19`		`- "torch.ops." + str(module.new_operator)`
	`19`	`+ _get_qualified_name(module.new_operator)`
`20`	`20`	`for module in MODULE_SUBSTITUTION_REGISTRY.values()`
`21`	`21`	`)`
`22`	`22`