@@ -75,10 +75,8 @@ auto get_native(const SyclObjectT &Obj)
75
75
namespace detail {
76
76
class queue_impl ;
77
77
78
- #if __SYCL_USE_FALLBACK_ASSERT
79
78
inline event submitAssertCapture (queue &, event &, queue *,
80
79
const detail::code_location &);
81
- #endif
82
80
83
81
// Function to postprocess submitted command
84
82
// Arguments:
@@ -375,8 +373,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
375
373
std::enable_if_t <std::is_invocable_r_v<void , T, handler &>, event> submit (
376
374
T CGF,
377
375
const detail::code_location &CodeLoc = detail::code_location::current()) {
378
- return submit_with_event (
379
- sycl::ext::oneapi::experimental::empty_properties_t {}, CGF,
376
+ return submit_with_event<__SYCL_USE_FALLBACK_ASSERT>(
377
+ sycl::ext::oneapi::experimental::empty_properties_t {},
378
+ detail::type_erased_cgfo_ty{CGF},
380
379
/* SecondaryQueuePtr=*/ nullptr , CodeLoc);
381
380
}
382
381
@@ -395,9 +394,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
395
394
std::enable_if_t <std::is_invocable_r_v<void , T, handler &>, event> submit (
396
395
T CGF, queue &SecondaryQueue,
397
396
const detail::code_location &CodeLoc = detail::code_location::current()) {
398
- return submit_with_event (
399
- sycl::ext::oneapi::experimental::empty_properties_t {}, CGF,
400
- &SecondaryQueue, CodeLoc);
397
+ return submit_with_event<__SYCL_USE_FALLBACK_ASSERT> (
398
+ sycl::ext::oneapi::experimental::empty_properties_t {},
399
+ detail::type_erased_cgfo_ty{CGF}, &SecondaryQueue, CodeLoc);
401
400
}
402
401
403
402
// / Prevents any commands submitted afterward to this queue from executing
@@ -2786,6 +2785,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
2786
2785
2787
2786
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
2788
2787
// / TODO: Unused. Remove these when ABI-break window is open.
2788
+ // / Not using `type_erased_cgfo_ty` on purpose.
2789
2789
event submit_impl (std::function<void (handler &)> CGH,
2790
2790
const detail::code_location &CodeLoc);
2791
2791
event submit_impl (std::function<void (handler &)> CGH,
@@ -2815,16 +2815,28 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
2815
2815
std::function<void (handler &)> CGH, queue secondQueue,
2816
2816
const detail::code_location &CodeLoc,
2817
2817
const detail::SubmitPostProcessF &PostProcess, bool IsTopCodeLoc);
2818
+
2819
+ // Old version when `std::function` was used in place of
2820
+ // `std::function<void(handler &)>`.
2821
+ event submit_with_event_impl (std::function<void (handler &)> CGH,
2822
+ const detail::SubmissionInfo &SubmitInfo,
2823
+ const detail::code_location &CodeLoc,
2824
+ bool IsTopCodeLoc);
2825
+
2826
+ void submit_without_event_impl (std::function<void (handler &)> CGH,
2827
+ const detail::SubmissionInfo &SubmitInfo,
2828
+ const detail::code_location &CodeLoc,
2829
+ bool IsTopCodeLoc);
2818
2830
#endif // __INTEL_PREVIEW_BREAKING_CHANGES
2819
2831
2820
2832
// / A template-free versions of submit.
2821
- event submit_with_event_impl (std::function< void (handler &)> CGH,
2833
+ event submit_with_event_impl (const detail::type_erased_cgfo_ty & CGH,
2822
2834
const detail::SubmissionInfo &SubmitInfo,
2823
2835
const detail::code_location &CodeLoc,
2824
2836
bool IsTopCodeLoc);
2825
2837
2826
2838
// / A template-free version of submit_without_event.
2827
- void submit_without_event_impl (std::function< void (handler &)> CGH,
2839
+ void submit_without_event_impl (const detail::type_erased_cgfo_ty & CGH,
2828
2840
const detail::SubmissionInfo &SubmitInfo,
2829
2841
const detail::code_location &CodeLoc,
2830
2842
bool IsTopCodeLoc);
@@ -2836,32 +2848,35 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
2836
2848
// / \param CGF is a function object containing command group.
2837
2849
// / \param CodeLoc is the code location of the submit call (default argument)
2838
2850
// / \return a SYCL event object for the submitted command group.
2839
- template <typename T, typename PropertiesT>
2840
- std::enable_if_t <std::is_invocable_r_v<void , T, handler &>, event>
2841
- submit_with_event (
2842
- PropertiesT Props, T CGF, queue *SecondaryQueuePtr,
2851
+ //
2852
+ // UseFallBackAssert as template param vs `#if` in function body is necessary
2853
+ // to prevent ODR-violation between TUs built with different fallback assert
2854
+ // modes.
2855
+ template <bool UseFallbackAssert, typename PropertiesT>
2856
+ event submit_with_event (
2857
+ PropertiesT Props, const detail::type_erased_cgfo_ty &CGF,
2858
+ queue *SecondaryQueuePtr,
2843
2859
const detail::code_location &CodeLoc = detail::code_location::current()) {
2844
2860
detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
2845
2861
detail::SubmissionInfo SI{};
2846
2862
ProcessSubmitProperties (Props, SI);
2847
2863
if (SecondaryQueuePtr)
2848
2864
SI.SecondaryQueue () = detail::getSyclObjImpl (*SecondaryQueuePtr);
2849
- #if __SYCL_USE_FALLBACK_ASSERT
2850
- SI.PostProcessorFunc () =
2851
- [this , &SecondaryQueuePtr,
2852
- &TlsCodeLocCapture](bool IsKernel, bool KernelUsesAssert, event &E) {
2853
- if (IsKernel && !device_has (aspect::ext_oneapi_native_assert) &&
2854
- KernelUsesAssert && !device_has (aspect::accelerator)) {
2855
- // __devicelib_assert_fail isn't supported by Device-side Runtime
2856
- // Linking against fallback impl of __devicelib_assert_fail is
2857
- // performed by program manager class
2858
- // Fallback assert isn't supported for FPGA
2859
- submitAssertCapture (*this , E, SecondaryQueuePtr,
2860
- TlsCodeLocCapture.query ());
2861
- }
2862
- };
2863
- #endif // __SYCL_USE_FALLBACK_ASSERT
2864
- return submit_with_event_impl (std::move (CGF), SI, TlsCodeLocCapture.query (),
2865
+ if constexpr (UseFallbackAssert)
2866
+ SI.PostProcessorFunc () =
2867
+ [this , &SecondaryQueuePtr,
2868
+ &TlsCodeLocCapture](bool IsKernel, bool KernelUsesAssert, event &E) {
2869
+ if (IsKernel && !device_has (aspect::ext_oneapi_native_assert) &&
2870
+ KernelUsesAssert && !device_has (aspect::accelerator)) {
2871
+ // __devicelib_assert_fail isn't supported by Device-side Runtime
2872
+ // Linking against fallback impl of __devicelib_assert_fail is
2873
+ // performed by program manager class
2874
+ // Fallback assert isn't supported for FPGA
2875
+ submitAssertCapture (*this , E, SecondaryQueuePtr,
2876
+ TlsCodeLocCapture.query ());
2877
+ }
2878
+ };
2879
+ return submit_with_event_impl (CGF, SI, TlsCodeLocCapture.query (),
2865
2880
TlsCodeLocCapture.isToplevel ());
2866
2881
}
2867
2882
@@ -2871,21 +2886,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
2871
2886
// / \param Props is a property list with submission properties.
2872
2887
// / \param CGF is a function object containing command group.
2873
2888
// / \param CodeLoc is the code location of the submit call (default argument)
2874
- template <typename T, typename PropertiesT>
2875
- std::enable_if_t <std::is_invocable_r_v<void , T, handler &>, void >
2876
- submit_without_event (PropertiesT Props, T CGF,
2877
- const detail::code_location &CodeLoc) {
2878
- #if __SYCL_USE_FALLBACK_ASSERT
2879
- // If post-processing is needed, fall back to the regular submit.
2880
- // TODO: Revisit whether we can avoid this.
2881
- submit_with_event (Props, CGF, nullptr , CodeLoc);
2882
- #else
2883
- detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
2884
- detail::SubmissionInfo SI{};
2885
- ProcessSubmitProperties (Props, SI);
2886
- submit_without_event_impl (CGF, SI, TlsCodeLocCapture.query (),
2887
- TlsCodeLocCapture.isToplevel ());
2888
- #endif // __SYCL_USE_FALLBACK_ASSERT
2889
+ //
2890
+ // UseFallBackAssert as template param vs `#if` in function body is necessary
2891
+ // to prevent ODR-violation between TUs built with different fallback assert
2892
+ // modes.
2893
+ template <bool UseFallbackAssert, typename PropertiesT>
2894
+ void submit_without_event (PropertiesT Props,
2895
+ const detail::type_erased_cgfo_ty &CGF,
2896
+ const detail::code_location &CodeLoc) {
2897
+ if constexpr (UseFallbackAssert) {
2898
+ // If post-processing is needed, fall back to the regular submit.
2899
+ // TODO: Revisit whether we can avoid this.
2900
+ submit_with_event<UseFallbackAssert>(Props, CGF, nullptr , CodeLoc);
2901
+ } else {
2902
+ detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
2903
+ detail::SubmissionInfo SI{};
2904
+ ProcessSubmitProperties (Props, SI);
2905
+ submit_without_event_impl (CGF, SI, TlsCodeLocCapture.query (),
2906
+ TlsCodeLocCapture.isToplevel ());
2907
+ }
2889
2908
}
2890
2909
2891
2910
// / parallel_for_impl with a kernel represented as a lambda + range that
@@ -3114,10 +3133,10 @@ event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
3114
3133
});
3115
3134
};
3116
3135
3117
- CopierEv = Self.submit_with_event (
3136
+ CopierEv = Self.submit_with_event < true > (
3118
3137
sycl::ext::oneapi::experimental::empty_properties_t {}, CopierCGF,
3119
3138
SecondaryQueue, CodeLoc);
3120
- CheckerEv = Self.submit_with_event (
3139
+ CheckerEv = Self.submit_with_event < true > (
3121
3140
sycl::ext::oneapi::experimental::empty_properties_t {}, CheckerCGF,
3122
3141
SecondaryQueue, CodeLoc);
3123
3142
0 commit comments