Merge pull request #1397 from pytorch/minor_fix

peri044 · web-flow · commit a558e2a8da45 · 2022-10-11T15:52:26.000-07:00
chore: minor fixes
diff --git a/BUILD b/BUILD
@@ -22,9 +22,9 @@ pkg_tar(
         "//core/lowering:include",
         "//core/lowering/passes:include",
         "//core/partitioning:include",
-        "//core/partitioning/segmentedblock:include",
-        "//core/partitioning/partitioninginfo:include",
         "//core/partitioning/partitioningctx:include",
+        "//core/partitioning/partitioninginfo:include",
+        "//core/partitioning/segmentedblock:include",
         "//core/plugins:impl_include",
         "//core/plugins:include",
         "//core/runtime:include",
diff --git a/tests/core/partitioning/test_conditionals.cpp b/tests/core/partitioning/test_conditionals.cpp
@@ -71,5 +71,5 @@ TEST(Partitioning, FallbackInplaceOPInConditionalsCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
diff --git a/tests/core/partitioning/test_fallback_graph_output.cpp b/tests/core/partitioning/test_fallback_graph_output.cpp
@@ -34,7 +34,7 @@ TEST(Partitioning, ComputeResNet50FallbackGraphCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
 
 TEST(Partitioning, ComputeMobileNetFallbackGraphCorrectly) {
@@ -64,6 +64,6 @@ TEST(Partitioning, ComputeMobileNetFallbackGraphCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
 #endif
diff --git a/tests/core/partitioning/test_loop_fallback.cpp b/tests/core/partitioning/test_loop_fallback.cpp
@@ -30,7 +30,7 @@ TEST(Partitioning, CheckLoopFallbackEvalCompilesCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
 
 TEST(Partitioning, CheckLoopFallbackNoEvalCompilesCorrectly) {
@@ -58,5 +58,5 @@ TEST(Partitioning, CheckLoopFallbackNoEvalCompilesCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
diff --git a/tests/core/partitioning/test_resolve_nontensor_inputs.cpp b/tests/core/partitioning/test_resolve_nontensor_inputs.cpp
@@ -87,7 +87,7 @@ TEST(Partitioning, ResolveNonTensorInputsForIFBlockCorrectly) {
   auto jit_results = mod.forward({jit_in0, jit_in1});
   auto trt_results = new_mod.forward({trt_in0, trt_in1});
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results.toTensor(), trt_results.toTensor(), 2e-6));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results.toTensor(), trt_results.toTensor()));
 }
 
 TEST(Partitioning, ResolveNonTensorInputsCorrectly) {
diff --git a/tests/cpp/cpp_api_test.h b/tests/cpp/cpp_api_test.h
@@ -6,7 +6,7 @@
 #include "torch/script.h"
 #include "torch_tensorrt/torch_tensorrt.h"
 
-using PathAndInput = std::tuple<std::string, std::vector<std::vector<int64_t>>, std::vector<c10::ScalarType>, float>;
+using PathAndInput = std::tuple<std::string, std::vector<std::vector<int64_t>>, std::vector<c10::ScalarType>>;
 
 class CppAPITests : public testing::TestWithParam<PathAndInput> {
  public:
@@ -22,7 +22,6 @@ class CppAPITests : public testing::TestWithParam<PathAndInput> {
     }
     input_shapes = std::get<1>(params);
     input_types = std::get<2>(params);
-    threshold = std::get<3>(params);
   }
 
   void TearDown() {
@@ -34,5 +33,4 @@ class CppAPITests : public testing::TestWithParam<PathAndInput> {
   torch::jit::script::Module mod;
   std::vector<std::vector<int64_t>> input_shapes;
   std::vector<c10::ScalarType> input_types;
-  float threshold;
 };
diff --git a/tests/cpp/test_collections.cpp b/tests/cpp/test_collections.cpp
@@ -42,7 +42,7 @@ TEST(CppAPITests, TestCollectionStandardTensorInput) {
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   auto trt_out = trt_mod.forward(inputs_);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor(), 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor()));
 }
 
 TEST(CppAPITests, TestCollectionTupleInput) {
@@ -85,7 +85,7 @@ TEST(CppAPITests, TestCollectionTupleInput) {
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   auto trt_out = trt_mod.forward(complex_inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor(), 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor()));
 }
 
 TEST(CppAPITests, TestCollectionListInput) {
@@ -144,7 +144,7 @@ TEST(CppAPITests, TestCollectionListInput) {
   LOG_DEBUG("Finish compile");
   auto trt_out = trt_mod.forward(complex_inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor(), 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor()));
 }
 
 TEST(CppAPITests, TestCollectionTupleInputOutput) {
@@ -178,23 +178,20 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {
   torch::jit::IValue complex_input_shape(input_shape_tuple);
   std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
   torch::jit::IValue complex_input_shape2(input_tuple2);
-  // torch::jit::IValue complex_input_shape(list);
 
   auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2);
   compile_settings.min_block_size = 1;
 
-  // compile_settings.torch_executed_ops.push_back("prim::TupleConstruct");
-
   // // FP16 execution
   compile_settings.enabled_precisions = {torch::kHalf};
   // // Compile module
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   auto trt_out = trt_mod.forward(complex_inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor()));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor()));
 }
 
 TEST(CppAPITests, TestCollectionListInputOutput) {
@@ -252,10 +249,10 @@ TEST(CppAPITests, TestCollectionListInputOutput) {
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   auto trt_out = trt_mod.forward(complex_inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor()));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor()));
 }
 
 TEST(CppAPITests, TestCollectionComplexModel) {
@@ -313,8 +310,8 @@ TEST(CppAPITests, TestCollectionComplexModel) {
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   auto trt_out = trt_mod.forward(complex_inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor()));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor()));
 }
diff --git a/tests/cpp/test_compiled_modules.cpp b/tests/cpp/test_compiled_modules.cpp
@@ -41,8 +41,7 @@ TEST_P(CppAPITests, CompiledModuleIsClose) {
   }
 
   for (size_t i = 0; i < trt_results.size(); i++) {
-    ASSERT_TRUE(
-        torch_tensorrt::tests::util::cosineSimEqual(jit_results[i], trt_results[i].reshape_as(jit_results[i]), 0.99));
+    ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results[i], trt_results[i].reshape_as(jit_results[i])));
   }
 }
 
@@ -52,10 +51,10 @@ INSTANTIATE_TEST_SUITE_P(
     CompiledModuleForwardIsCloseSuite,
     CppAPITests,
     testing::Values(
-        PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}),
-        PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}),
-        PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 8e-3}),
-        PathAndInput({"tests/modules/bert_base_uncased_traced.jit.pt", {{1, 14}, {1, 14}}, {at::kInt, at::kInt}, 8e-2}),
-        PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 8e-2})));
+        PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/bert_base_uncased_traced.jit.pt", {{1, 14}, {1, 14}}, {at::kInt, at::kInt}}),
+        PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));
 
 #endif
diff --git a/tests/cpp/test_default_input_types.cpp b/tests/cpp/test_default_input_types.cpp
@@ -116,5 +116,4 @@ TEST_P(CppAPITests, InputsRespectUserSettingFP32WeightsFP16In) {
 INSTANTIATE_TEST_SUITE_P(
     CompiledModuleForwardIsCloseSuite,
     CppAPITests,
-    testing::Values(
-        PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat} /*unused*/, 2e-5})));
+    testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));
diff --git a/tests/cpp/test_example_tensors.cpp b/tests/cpp/test_example_tensors.cpp
@@ -21,4 +21,4 @@ TEST_P(CppAPITests, InputsFromTensors) {
 INSTANTIATE_TEST_SUITE_P(
     CompiledModuleForwardIsCloseSuite,
     CppAPITests,
-    testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5})));
+    testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));
diff --git a/tests/cpp/test_module_fallback.cpp b/tests/cpp/test_module_fallback.cpp
@@ -30,7 +30,7 @@ TEST(CppAPITest, ResNetModuleFallbacksCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::ts::compile(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
 
 TEST(CppAPITest, MobileNetModuleFallbacksCorrectlyWithOneEngine) {
@@ -69,6 +69,6 @@ TEST(CppAPITest, MobileNetModuleFallbacksCorrectlyWithOneEngine) {
   ASSERT_TRUE(trt_count == 1);
 
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
 #endif
diff --git a/tests/cpp/test_modules_as_engines.cpp b/tests/cpp/test_modules_as_engines.cpp
@@ -14,8 +14,7 @@ TEST_P(CppAPITests, ModuleAsEngineIsClose) {
   jit_results.push_back(jit_results_ivalues.toTensor());
   auto trt_results = torch_tensorrt::tests::util::RunModuleForwardAsEngine(mod, inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
-      jit_results[0], trt_results[0].reshape_as(jit_results[0]), threshold));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0])));
 }
 
 #ifndef DISABLE_TEST_IN_CI
@@ -24,8 +23,8 @@ INSTANTIATE_TEST_SUITE_P(
     ModuleAsEngineForwardIsCloseSuite,
     CppAPITests,
     testing::Values(
-        PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}),
-        PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}),
-        PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}),
-        PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99})));
+        PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));
 #endif
diff --git a/tests/cpp/test_multi_gpu_serde.cpp b/tests/cpp/test_multi_gpu_serde.cpp
@@ -24,11 +24,11 @@ TEST_P(CppAPITests, CompiledModuleIsClose) {
 
   for (size_t i = 0; i < trt_results.size(); i++) {
     ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
-        jit_results[i], trt_results[i].reshape_as(jit_results[i]).to(torch::Device("cuda:0")), threshold));
+        jit_results[i], trt_results[i].reshape_as(jit_results[i]).to(torch::Device("cuda:0"))));
   }
 }
 
 INSTANTIATE_TEST_SUITE_P(
     CompiledModuleForwardIsCloseSuite,
     CppAPITests,
-    testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99})));
+    testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));
diff --git a/tests/cpp/test_multiple_registered_engines.cpp b/tests/cpp/test_multiple_registered_engines.cpp
@@ -56,13 +56,13 @@ TEST(CppAPITest, CanRunMultipleEngines) {
   trt2_results.push_back(trt2_results_ivalues.toTensor());
 
   for (size_t i = 0; i < trt1_results.size(); i++) {
-    ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
-        jit1_results[i], trt1_results[i].reshape_as(jit1_results[i]), 0.99));
+    ASSERT_TRUE(
+        torch_tensorrt::tests::util::cosineSimEqual(jit1_results[i], trt1_results[i].reshape_as(jit1_results[i])));
   }
 
   for (size_t i = 0; i < trt2_results.size(); i++) {
-    ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
-        jit2_results[i], trt2_results[i].reshape_as(jit2_results[i]), 0.99));
+    ASSERT_TRUE(
+        torch_tensorrt::tests::util::cosineSimEqual(jit2_results[i], trt2_results[i].reshape_as(jit2_results[i])));
   }
 }
 #endif
diff --git a/tests/cpp/test_runtime_thread_safety.cpp b/tests/cpp/test_runtime_thread_safety.cpp
@@ -78,7 +78,7 @@ TEST(CppAPITests, RuntimeThreadSafety) {
 
   bool flag = true;
   for (int i = 0; i < num_threads; i++) {
-    bool f = torch_tensorrt::tests::util::almostEqual(out_vec[i].toTensor(), trt_out_vec[i].toTensor(), 1e-2);
+    bool f = torch_tensorrt::tests::util::cosineSimEqual(out_vec[i].toTensor(), trt_out_vec[i].toTensor());
     flag = flag && f;
   }
   ASSERT_TRUE(flag);
diff --git a/tests/cpp/test_serialization.cpp b/tests/cpp/test_serialization.cpp
@@ -42,8 +42,8 @@ TEST_P(CppAPITests, SerializedModuleIsStillCorrect) {
   post_serialized_results.push_back(post_serialized_results_ivalues.toTensor());
 
   for (size_t i = 0; i < pre_serialized_results.size(); i++) {
-    ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-        post_serialized_results[i], pre_serialized_results[i].reshape_as(post_serialized_results[i]), threshold));
+    ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+        post_serialized_results[i], pre_serialized_results[i].reshape_as(post_serialized_results[i])));
   }
 }
 
@@ -72,14 +72,14 @@ TEST_P(CppAPITests, SerializedDynamicModuleIsStillCorrect) {
   post_serialized_results.push_back(post_serialized_results_ivalues.toTensor());
 
   for (size_t i = 0; i < pre_serialized_results.size(); i++) {
-    ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-        post_serialized_results[i], pre_serialized_results[i].reshape_as(post_serialized_results[i]), threshold));
+    ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+        post_serialized_results[i], pre_serialized_results[i].reshape_as(post_serialized_results[i])));
   }
 }
 
 INSTANTIATE_TEST_SUITE_P(
     CompiledModuleForwardIsCloseSuite,
     CppAPITests,
     testing::Values(
-        PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}),
-        PathAndInput({"tests/modules/pooling_traced.jit.pt", {{1, 3, 10, 10}}, {at::kFloat}, 2e-5})));
+        PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/pooling_traced.jit.pt", {{1, 3, 10, 10}}, {at::kFloat}})));
diff --git a/tests/modules/hub.py b/tests/modules/hub.py
@@ -46,12 +46,6 @@
         "model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True),
         "path": "both",
     },
-    "ssd": {
-        "model": torch.hub.load(
-            "NVIDIA/DeepLearningExamples:torchhub", "nvidia_ssd", model_math="fp32"
-        ),
-        "path": "trace",
-    },
     "efficientnet_b0": {
         "model": timm.create_model("efficientnet_b0", pretrained=True),
         "path": "script",
diff --git a/tests/util/util.cpp b/tests/util/util.cpp
@@ -1,3 +1,4 @@
+#include "util.h"
 #include "core/util/prelude.h"
 #include "torch/script.h"
 #include "torch/torch.h"
@@ -6,19 +7,7 @@ namespace torch_tensorrt {
 namespace tests {
 namespace util {
 
-bool cosineSimEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float threshold = 0.99f) {
-  torch::Tensor cosine_sim = torch::nn::functional::cosine_similarity(
-      computed_tensor.flatten(), gt_tensor.flatten(), torch::nn::functional::CosineSimilarityFuncOptions().dim(0));
-  std::ostringstream ss;
-  ss << computed_tensor << std::endl << gt_tensor << std::endl;
-  LOG_DEBUG(ss.str());
-  LOG_DEBUG(std::string("Cosine Similarity score: ") + std::to_string(cosine_sim.item<float>()));
-  LOG_DEBUG(std::string("Acceptable Threshold: ") + std::to_string(threshold));
-
-  return cosine_sim.item<float>() >= threshold;
-}
-
-bool almostEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float atol = 1e-8, float rtol = 1e-5) {
+bool almostEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float atol, float rtol) {
   std::ostringstream ss;
   ss << computed_tensor << std::endl << gt_tensor << std::endl;
   ss << " atol: " << atol << " rtol: " << rtol << std::endl;
@@ -37,6 +26,21 @@ bool almostEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor,
   return result <= threshold;
 }
 
+bool cosineSimEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float threshold) {
+  torch::Tensor cosine_sim = torch::nn::functional::cosine_similarity(
+      computed_tensor.flatten(), gt_tensor.flatten(), torch::nn::functional::CosineSimilarityFuncOptions().dim(0));
+  std::ostringstream ss;
+  ss << computed_tensor << std::endl << gt_tensor << std::endl;
+  LOG_DEBUG(ss.str());
+  if (computed_tensor.sum().item<float>() == 0.f || gt_tensor.sum().item<float>() == 0.f) {
+    return almostEqual(computed_tensor, gt_tensor);
+  } else {
+    LOG_DEBUG(std::string("Cosine Similarity score: ") + std::to_string(cosine_sim.item<float>()));
+    LOG_DEBUG(std::string("Acceptable Threshold: ") + std::to_string(threshold));
+    return cosine_sim.item<float>() >= threshold;
+  }
+}
+
 bool exactlyEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor) {
   LOG_DEBUG(computed_tensor << std::endl << gt_tensor << std::endl);
   std::cout << "Max Difference: " << (computed_tensor - gt_tensor).abs().max().item<float>() << std::endl;
diff --git a/tests/util/util.h b/tests/util/util.h

Original file line number	Diff line number	Diff line change
`@@ -71,5 +71,5 @@ TEST(Partitioning, FallbackInplaceOPInConditionalsCorrectly) {`
`71`	`71`	`auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();`
`72`	`72`	`auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);`
`73`	`73`	`auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();`
`74`		`- ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6));`
	`74`	`+ ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));`
`75`	`75`	`}`
Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,7 @@ TEST(Partitioning, CheckLoopFallbackEvalCompilesCorrectly) {`
`30`	`30`	`auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();`
`31`	`31`	`auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);`
`32`	`32`	`auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();`
`33`		`- ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6));`
	`33`	`+ ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));`
`34`	`34`	`}`
`35`	`35`
`36`	`36`	`TEST(Partitioning, CheckLoopFallbackNoEvalCompilesCorrectly) {`
`@@ -58,5 +58,5 @@ TEST(Partitioning, CheckLoopFallbackNoEvalCompilesCorrectly) {`
`58`	`58`	`auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();`
`59`	`59`	`auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);`
`60`	`60`	`auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();`
`61`		`- ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6));`
	`61`	`+ ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));`
`62`	`62`	`}`
Original file line number	Diff line number	Diff line change
`@@ -87,7 +87,7 @@ TEST(Partitioning, ResolveNonTensorInputsForIFBlockCorrectly) {`
`87`	`87`	`auto jit_results = mod.forward({jit_in0, jit_in1});`
`88`	`88`	`auto trt_results = new_mod.forward({trt_in0, trt_in1});`
`89`	`89`
`90`		`- ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results.toTensor(), trt_results.toTensor(), 2e-6));`
	`90`	`+ ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results.toTensor(), trt_results.toTensor()));`
`91`	`91`	`}`
`92`	`92`
`93`	`93`	`TEST(Partitioning, ResolveNonTensorInputsCorrectly) {`
Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,7 @@ TEST(CppAPITest, ResNetModuleFallbacksCorrectly) {`
`30`	`30`	`auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();`
`31`	`31`	`auto trt_mod = torch_tensorrt::ts::compile(mod, cfg);`
`32`	`32`	`auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();`
`33`		`- ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99));`
	`33`	`+ ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));`
`34`	`34`	`}`
`35`	`35`
`36`	`36`	`TEST(CppAPITest, MobileNetModuleFallbacksCorrectlyWithOneEngine) {`
`@@ -69,6 +69,6 @@ TEST(CppAPITest, MobileNetModuleFallbacksCorrectlyWithOneEngine) {`
`69`	`69`	`ASSERT_TRUE(trt_count == 1);`
`70`	`70`
`71`	`71`	`auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();`
`72`		`- ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99));`
	`72`	`+ ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));`
`73`	`73`	`}`
`74`	`74`	`#endif`
Original file line number	Diff line number	Diff line change
`@@ -24,11 +24,11 @@ TEST_P(CppAPITests, CompiledModuleIsClose) {`
`24`	`24`
`25`	`25`	`for (size_t i = 0; i < trt_results.size(); i++) {`
`26`	`26`	`ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(`
`27`		`- jit_results[i], trt_results[i].reshape_as(jit_results[i]).to(torch::Device("cuda:0")), threshold));`
	`27`	`+ jit_results[i], trt_results[i].reshape_as(jit_results[i]).to(torch::Device("cuda:0"))));`
`28`	`28`	`}`
`29`	`29`	`}`
`30`	`30`
`31`	`31`	`INSTANTIATE_TEST_SUITE_P(`
`32`	`32`	`CompiledModuleForwardIsCloseSuite,`
`33`	`33`	`CppAPITests,`
`34`		`- testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99})));`
	`34`	`+ testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));`
Original file line number	Diff line number	Diff line change
`@@ -56,13 +56,13 @@ TEST(CppAPITest, CanRunMultipleEngines) {`
`56`	`56`	`trt2_results.push_back(trt2_results_ivalues.toTensor());`
`57`	`57`
`58`	`58`	`for (size_t i = 0; i < trt1_results.size(); i++) {`
`59`		`- ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(`
`60`		`- jit1_results[i], trt1_results[i].reshape_as(jit1_results[i]), 0.99));`
	`59`	`+ ASSERT_TRUE(`
	`60`	`+ torch_tensorrt::tests::util::cosineSimEqual(jit1_results[i], trt1_results[i].reshape_as(jit1_results[i])));`
`61`	`61`	`}`
`62`	`62`
`63`	`63`	`for (size_t i = 0; i < trt2_results.size(); i++) {`
`64`		`- ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(`
`65`		`- jit2_results[i], trt2_results[i].reshape_as(jit2_results[i]), 0.99));`
	`64`	`+ ASSERT_TRUE(`
	`65`	`+ torch_tensorrt::tests::util::cosineSimEqual(jit2_results[i], trt2_results[i].reshape_as(jit2_results[i])));`
`66`	`66`	`}`
`67`	`67`	`}`
`68`	`68`	`#endif`
Original file line number	Diff line number	Diff line change
`@@ -78,7 +78,7 @@ TEST(CppAPITests, RuntimeThreadSafety) {`
`78`	`78`
`79`	`79`	`bool flag = true;`
`80`	`80`	`for (int i = 0; i < num_threads; i++) {`
`81`		`- bool f = torch_tensorrt::tests::util::almostEqual(out_vec[i].toTensor(), trt_out_vec[i].toTensor(), 1e-2);`
	`81`	`+ bool f = torch_tensorrt::tests::util::cosineSimEqual(out_vec[i].toTensor(), trt_out_vec[i].toTensor());`
`82`	`82`	`flag = flag && f;`
`83`	`83`	`}`
`84`	`84`	`ASSERT_TRUE(flag);`