triton-inference-server
diff --git a/‎.clang-format
+37 b/‎.clang-format
+37
diff --git a/‎CMakeLists.txt
+229 b/‎CMakeLists.txt
+229
diff --git a/‎README.md
+63 b/‎README.md
+63
diff --git a/‎cmake/TritonTensorRTBackendConfig.cmake.in
+39 b/‎cmake/TritonTensorRTBackendConfig.cmake.in
+39
@@ -0,0 +1,37 @@
+---
+BasedOnStyle: Google
+
+IndentWidth: 2
+ContinuationIndentWidth: 4
+UseTab: Never
+MaxEmptyLinesToKeep: 2
+
+SortIncludes: true
+CompactNamespaces: true
+ReflowComments: true
+
+DerivePointerAlignment: false
+PointerAlignment: Left
+
+AllowShortIfStatementsOnASingleLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+
+AlwaysBreakAfterReturnType: TopLevelDefinitions
+AlignAfterOpenBracket: AlwaysBreak
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: false
+  AfterStruct: false
+  AfterUnion: false
+  BeforeCatch: true
+
+BinPackArguments: true
+BinPackParameters: true
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+
+IndentCaseLabels: true
@@ -0,0 +1,229 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.17)
+
+project(tritontensorrtbackend LANGUAGES C CXX)
+
+#
+# Options
+#
+option(TRITON_ENABLE_GPU "Enable GPU support in backend." ON)
+option(TRITON_ENABLE_STATS "Include statistics collections in backend." ON)
+set(TRITON_TENSORRT_LIB_PATHS "" CACHE PATH "Paths to TensorRT libraries. Multiple paths may be specified by separating them with a semicolon.")
+set(TRITON_TENSORRT_INCLUDE_PATHS "" CACHE PATH "Paths to TensorRT includes. Multiple paths may be specified by separating them with a semicolon.")
+
+set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo.")
+set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo.")
+set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo.")
+
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif()
+
+set(TRITON_TENSORRT_BACKEND_LIBNAME triton_tensorrt)
+set(TRITON_TENSORRT_BACKEND_INSTALLDIR ${CMAKE_INSTALL_PREFIX}/backends/tensorrt)
+
+#
+# Dependencies
+#
+# FetchContent's composibility isn't very good. We must include the
+# transitive closure of all repos so that we can override the tag.
+#
+include(FetchContent)
+
+FetchContent_Declare(
+  repo-common
+  GIT_REPOSITORY https://github.com/triton-inference-server/common.git
+  GIT_TAG ${TRITON_COMMON_REPO_TAG}
+  GIT_SHALLOW ON
+)
+FetchContent_Declare(
+  repo-core
+  GIT_REPOSITORY https://github.com/triton-inference-server/core.git
+  GIT_TAG ${TRITON_CORE_REPO_TAG}
+  GIT_SHALLOW ON
+)
+FetchContent_Declare(
+  repo-backend
+  GIT_REPOSITORY https://github.com/triton-inference-server/backend.git
+  GIT_TAG ${TRITON_BACKEND_REPO_TAG}
+  GIT_SHALLOW ON
+)
+FetchContent_MakeAvailable(repo-common repo-core repo-backend)
+
+#
+# CUDA
+#
+if(${TRITON_ENABLE_GPU})
+  find_package(CUDAToolkit REQUIRED)
+  message(STATUS "Using CUDA ${CUDA_VERSION}")
+  set(CUDA_NVCC_FLAGS -std=c++11)
+
+  if(CUDA_VERSION VERSION_GREATER "10.1" OR CUDA_VERSION VERSION_EQUAL "10.1")
+    add_definitions(-DTRITON_ENABLE_CUDA_GRAPH=1)
+  else()
+    message(WARNING "CUDA ${CUDA_VERSION} does not support CUDA graphs.")
+  endif()
+else()
+  message( FATAL_ERROR "TensorRT backend requires TRITON_ENABLE_GPU=1, CMake will exit." )
+endif() # TRITON_ENABLE_GPU
+
+#
+# Shared library implementing the Triton Backend API
+#
+configure_file(src/libtriton_tensorrt.ldscript libtriton_tensorrt.ldscript COPYONLY)
+
+add_library(
+  triton-tensorrt-backend SHARED
+  src/tensorrt.cc
+  src/tensorrt_model.cc
+  src/tensorrt_model.h
+  src/tensorrt_model_instance.cc
+  src/tensorrt_model_instance.h
+  src/tensorrt_utils.cc
+  src/tensorrt_utils.h
+  src/loader.cc
+  src/loader.h
+  src/logging.cc
+  src/logging.h
+)
+
+add_library(
+  TritonTensorRTBackend::triton-tensorrt-backend ALIAS triton-tensorrt-backend
+)
+
+target_include_directories(
+  triton-tensorrt-backend
+  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src
+)
+
+target_include_directories(
+    triton-tensorrt-backend
+    PRIVATE ${TRITON_TENSORRT_INCLUDE_PATHS}
+  )
+
+target_compile_features(triton-tensorrt-backend PRIVATE cxx_std_11)
+target_compile_options(
+  triton-tensorrt-backend PRIVATE
+  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
+)
+
+# C/C++ defines that are used directly by this backend.
+target_compile_definitions(
+    triton-tensorrt-backend
+    PRIVATE TRITON_ENABLE_GPU=1
+)
+
+set_target_properties(
+  triton-tensorrt-backend
+  PROPERTIES
+    POSITION_INDEPENDENT_CODE ON
+    OUTPUT_NAME ${TRITON_TENSORRT_BACKEND_LIBNAME}
+    SKIP_BUILD_RPATH TRUE
+    BUILD_WITH_INSTALL_RPATH TRUE
+    INSTALL_RPATH_USE_LINK_PATH FALSE
+    INSTALL_RPATH "$\{ORIGIN\}"
+    LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_tensorrt.ldscript
+    LINK_FLAGS "-Wl,--version-script libtriton_tensorrt.ldscript"
+)
+
+FOREACH(p ${TRITON_TENSORRT_LIB_PATHS})
+    set(TRITON_TENSORRT_LDFLAGS ${TRITON_TENSORRT_LDFLAGS} "-L${p}")
+ENDFOREACH(p)
+
+
+target_link_libraries(
+  triton-tensorrt-backend
+  PRIVATE
+    triton-core-serverapi   # from repo-core
+    triton-core-serverstub  # from repo-core
+    triton-backend-utils    # from repo-backend
+    -lpthread
+)
+
+target_link_libraries(
+    triton-tensorrt-backend
+    PRIVATE ${TRITON_TENSORRT_LDFLAGS}
+)
+
+target_link_libraries(
+    triton-tensorrt-backend
+    PRIVATE
+      CUDA::cudart
+)
+
+
+#
+# Install
+#
+include(GNUInstallDirs)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonTensorRTBackend)
+
+install(
+  TARGETS
+    triton-tensorrt-backend
+  EXPORT
+    triton-tensorrt-backend-targets
+  LIBRARY DESTINATION ${TRITON_TENSORRT_BACKEND_INSTALLDIR}
+  ARCHIVE DESTINATION ${TRITON_TENSORRT_BACKEND_INSTALLDIR}
+)
+
+install(
+  EXPORT
+    triton-tensorrt-backend-targets
+  FILE
+    TritonTensorRTBackendTargets.cmake
+  NAMESPACE
+    TritonTensorRTBackend::
+  DESTINATION
+    ${INSTALL_CONFIGDIR}
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonTensorRTBackendConfig.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonTensorRTBackendConfig.cmake
+  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+install(
+  FILES
+    ${CMAKE_CURRENT_BINARY_DIR}/TritonTensorRTBackendConfig.cmake
+  DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+#
+# Export from build tree
+#
+export(
+  EXPORT triton-tensorrt-backend-targets
+  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonTensorRTBackendTargets.cmake
+  NAMESPACE TritonTensorRTBackend::
+)
+
+export(PACKAGE TritonTensorRTBackend)
@@ -1,7 +1,70 @@
+<!--
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
+
 # TensorRT Backend
 
 **WORK IN PROGRESS: This repository tracks the development of TensorRT
 Backend using new TRITONBACKEND API and is not fit for use yet. The
 source for current TensorRT backend can be found
 [here](https://github.com/triton-inference-server/server/tree/master/src/backends/tensorrt).**
 
+The Triton backend for [TensorRT](https://github.com/NVIDIA/TensorRT). 
+You can learn more about Triton backends in the [backend
+repo](https://github.com/triton-inference-server/backend). Ask
+questions or report problems on the [issues
+page](https://github.com/triton-inference-server/server/issues).
+This backend is designed to run a serialized [TensorRT engine](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#build_engine_c)
+models using the TensorRT C++ API.
+
+Where can I ask general questions about Triton and Triton backends?
+Be sure to read all the information below as well as the [general
+Triton documentation](https://github.com/triton-inference-server/server#triton-inference-server)
+available in the main [server](https://github.com/triton-inference-server/server)
+repo. If you don't find your answer there you can ask questions on the
+main Triton [issues page](https://github.com/triton-inference-server/server/issues).
+
+## Build the TensorRT Backend
+
+Appropriate version of TensorRT must be installed on the system. Check the support matrix to find the correct version of TensorRT to be installed.
+
+```
+$ mkdir build
+$ cd build
+$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ..
+$ make install
+```
+
+The following required Triton repositories will be pulled and used in
+the build. By default the "main" branch/tag will be used for each repo
+but the listed CMake argument can be used to override.
+
+* triton-inference-server/backend: -DTRITON_BACKEND_REPO_TAG=[tag]
+* triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag]
+* triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
@@ -0,0 +1,39 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONTENSORRTBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONTENSORRTBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TritonTensorRTBackend::triton-tensorrt-backend)
+  include("${TRITONTENSORRTBACKEND_CMAKE_DIR}/TritonTensorRTBackendTargets.cmake")
+endif()
+
+set(TRITONTENSORRTBACKEND_LIBRARIES TritonTensorRTBackend::triton-tensorrt-backend)