Skip to content

[SYCL][Fusion] Add kernel fusion extension API #7416

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ install(DIRECTORY ${OpenCL_INCLUDE_DIR}/CL
DESTINATION ${SYCL_INCLUDE_DIR}/sycl
COMPONENT OpenCL-Headers)

# Option to enable online kernel fusion via a JIT compiler
option(SYCL_ENABLE_KERNEL_FUSION "Enable kernel fusion via JIT compiler" OFF)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pvchupin - Do we have a good place to document this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A future PR will add support for fusion in buildbot/configure.py, which would then set the value for this option.

Users using buildbot/configure.py would therefore not need to set this option manually.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case, I am okay with waiting with documentation for it until it has an option in buildbot/configure.py. I'll let @pvchupin have the last say in that though.


# Needed for feature_test.hpp
if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS)
set(SYCL_BUILD_PI_CUDA ON)
Expand Down
7 changes: 6 additions & 1 deletion sycl/include/sycl/detail/property_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,13 @@ enum DataLessPropKind {
UseDefaultStream = 8,
DiscardEvents = 9,
DeviceReadOnly = 10,
FusionPromotePrivate = 11,
FusionPromoteLocal = 12,
FusionNoBarrier = 13,
FusionEnable = 14,
FusionForce = 15,
// Indicates the last known dataless property.
LastKnownDataLessPropKind = 10,
LastKnownDataLessPropKind = 15,
// Exceeding 32 may cause ABI breaking change on some of OSes.
DataLessPropKindSize = 32
};
Expand Down
108 changes: 108 additions & 0 deletions sycl/include/sycl/ext/codeplay/experimental/fusion_properties.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
//==----------- fusion_properties.hpp --- SYCL fusion properties -----------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <sycl/access/access.hpp>
#include <sycl/detail/property_helper.hpp>
#include <sycl/properties/property_traits.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace ext {
namespace codeplay {
namespace experimental {
namespace property {

class promote_private
: public detail::DataLessProperty<detail::FusionPromotePrivate> {};

class promote_local
: public detail::DataLessProperty<detail::FusionPromoteLocal> {};

class no_barriers : public detail::DataLessProperty<detail::FusionNoBarrier> {};

class force_fusion : public detail::DataLessProperty<detail::FusionForce> {};

namespace queue {
class enable_fusion : public detail::DataLessProperty<detail::FusionEnable> {};
} // namespace queue

} // namespace property
} // namespace experimental
} // namespace codeplay
} // namespace ext

// Forward declarations
template <typename T, int Dimensions, typename AllocatorT, typename Enable>
class buffer;

template <typename DataT, int Dimensions, access::mode AccessMode,
access::target AccessTarget, access::placeholder IsPlaceholder,
typename PropertyListT>
class accessor;

class queue;

// Property trait specializations.
template <>
struct is_property<ext::codeplay::experimental::property::promote_private>
: std::true_type {};

template <>
struct is_property<ext::codeplay::experimental::property::promote_local>
: std::true_type {};

template <>
struct is_property<ext::codeplay::experimental::property::no_barriers>
: std::true_type {};

template <>
struct is_property<ext::codeplay::experimental::property::force_fusion>
: std::true_type {};

template <>
struct is_property<ext::codeplay::experimental::property::queue::enable_fusion>
: std::true_type {};

// Buffer property trait specializations
template <typename T, int Dimensions, typename AllocatorT>
struct is_property_of<ext::codeplay::experimental::property::promote_private,
buffer<T, Dimensions, AllocatorT, void>>
: std::true_type {};

template <typename T, int Dimensions, typename AllocatorT>
struct is_property_of<ext::codeplay::experimental::property::promote_local,
buffer<T, Dimensions, AllocatorT, void>>
: std::true_type {};

// Accessor property trait specializations
template <typename DataT, int Dimensions, access::mode AccessMode,
access::target AccessTarget, access::placeholder IsPlaceholder,
typename PropertyListT>
struct is_property_of<ext::codeplay::experimental::property::promote_private,
accessor<DataT, Dimensions, AccessMode, AccessTarget,
IsPlaceholder, PropertyListT>> : std::true_type {
};

template <typename DataT, int Dimensions, access::mode AccessMode,
access::target AccessTarget, access::placeholder IsPlaceholder,
typename PropertyListT>
struct is_property_of<ext::codeplay::experimental::property::promote_local,
accessor<DataT, Dimensions, AccessMode, AccessTarget,
IsPlaceholder, PropertyListT>> : std::true_type {
};

// Queue property trait specializations
template <>
struct is_property_of<
ext::codeplay::experimental::property::queue::enable_fusion, queue>
: std::true_type {};

} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
98 changes: 98 additions & 0 deletions sycl/include/sycl/ext/codeplay/experimental/fusion_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
//==---- fusion_wrapper.hpp --- SYCL wrapper for queue for kernel fusion ---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <sycl/queue.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {

namespace detail {
class fusion_wrapper_impl;
}

namespace ext {
namespace codeplay {
namespace experimental {

///
/// A wrapper wrapping a sycl::queue to provide access to the kernel fusion API,
/// allowing to manage kernel fusion on the wrapped queue.
class __SYCL_EXPORT fusion_wrapper {

public:
///
/// Wrap a queue to get access to the kernel fusion API.
///
/// @throw sycl::exception with errc::invalid if trying to construct a wrapper
/// on a queue which doesn't support fusion.
explicit fusion_wrapper(queue &q);

///
/// Access the queue wrapped by this fusion wrapper.
queue get_queue() const;

///
/// @brief Check whether the wrapped queue is in fusion mode or not.
bool is_in_fusion_mode() const;

///
/// @brief Set the wrapped queue into "fusion mode". This means that the
/// kernels that are submitted in subsequent calls to queue::submit() are not
/// submitted for execution right away, but rather added to a list of kernels
/// that should be fused.
///
/// @throw sycl::exception with errc::invalid if this operation is called on a
/// queue which is already in fusion mode.
void start_fusion();

///
/// @brief Cancel the fusion and submit all kernels submitted since the last
/// start_fusion() for immediate execution without fusion. The kernels are
/// executed in the same order as they were initially submitted to the wrapped
/// queue.
///
/// This operation is asynchronous, i.e., it may return after the previously
/// submitted kernels have been passed to the scheduler, but before any of the
/// previously submitted kernel starts or completes execution. The events
/// returned by submit() since the last call to start_fusion remain valid and
/// can be used for synchronization.
///
/// The queue is not in "fusion mode" anymore after this calls returns, until
/// the next start_fusion().
void cancel_fusion();

///
/// @brief Complete the fusion: JIT-compile a fused kernel from all kernels
/// submitted to the wrapped queue since the last start_fusion and submit the
/// fused kernel for execution. Inside the fused kernel, the per-work-item
/// effects are executed in the same order as the kernels were initially
/// submitted.
///
/// This operation is asynchronous, i.e., it may return after the JIT
/// compilation is executed and the fused kernel is passed to the scheduler,
/// but before the fused kernel starts or completes execution. The returned
/// event allows to synchronize with the execution of the fused kernel. All
/// events returned by queue::submit since the last call to start_fusion
/// remain valid.
///
/// The wrapped queue is not in "fusion mode" anymore after this calls
/// returns, until the next start_fusion().
///
/// @param properties Properties to take into account when performing fusion.
event complete_fusion(const property_list &propList = {});

private:
std::shared_ptr<detail::fusion_wrapper_impl> MImpl;
};
} // namespace experimental
} // namespace codeplay
} // namespace ext
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
4 changes: 4 additions & 0 deletions sycl/include/sycl/feature_test.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
#if SYCL_BUILD_PI_HIP
#define SYCL_EXT_ONEAPI_BACKEND_HIP 1
#endif
#cmakedefine01 SYCL_ENABLE_KERNEL_FUSION
#if SYCL_ENABLE_KERNEL_FUSION
#define SYCL_EXT_CODEPLAY_KERNEL_FUSION 1
#endif

} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
1 change: 1 addition & 0 deletions sycl/include/sycl/properties/all_properties.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <sycl/ext/codeplay/experimental/fusion_properties.hpp>
#include <sycl/properties/accessor_properties.hpp>
#include <sycl/properties/buffer_properties.hpp>
#include <sycl/properties/context_properties.hpp>
Expand Down
8 changes: 8 additions & 0 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1251,6 +1251,14 @@ class __SYCL_EXPORT queue {
} _CODELOCFW(CodeLoc));
}

/// @brief Returns true if the queue was created with the
/// ext::codeplay::experimental::property::queue::enable_fusion property.
///
/// Equivalent to
/// `has_property<ext::codeplay::experimental::property::queue::enable_fusion>()`.
///
bool ext_codeplay_supports_fusion() const;

// Clean KERNELFUNC macros.
#undef _KERNELFUNCPARAM

Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#if SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO
#include <sycl/ext/oneapi/backend/level_zero.hpp>
#endif
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/ext/oneapi/device_global/device_global.hpp>
#include <sycl/ext/oneapi/device_global/properties.hpp>
#include <sycl/ext/oneapi/experimental/builtins.hpp>
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ set(SYCL_SOURCES
"detail/error_handling/enqueue_kernel.cpp"
"detail/event_impl.cpp"
"detail/filter_selector_impl.cpp"
"detail/fusion/fusion_wrapper.cpp"
"detail/fusion/fusion_wrapper_impl.cpp"
"detail/global_handler.cpp"
"detail/helpers.cpp"
"detail/handler_proxy.cpp"
Expand Down
50 changes: 50 additions & 0 deletions sycl/source/detail/fusion/fusion_wrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//==------------ fusion_wrapper.cpp ----------------------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>

#include <detail/fusion/fusion_wrapper_impl.hpp>
#include <detail/queue_impl.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace ext {
namespace codeplay {
namespace experimental {

fusion_wrapper::fusion_wrapper(queue &Queue) {
if (!Queue.ext_codeplay_supports_fusion()) {
throw sycl::exception(
sycl::errc::invalid,
"Cannot wrap a queue for fusion which doesn't support fusion");
}
MImpl = std::make_shared<detail::fusion_wrapper_impl>(
sycl::detail::getSyclObjImpl(Queue));
}

queue fusion_wrapper::get_queue() const {
return sycl::detail::createSyclObjFromImpl<sycl::queue>(MImpl->get_queue());
}

bool fusion_wrapper::is_in_fusion_mode() const {
return MImpl->is_in_fusion_mode();
}

void fusion_wrapper::start_fusion() { MImpl->start_fusion(); }

void fusion_wrapper::cancel_fusion() { MImpl->cancel_fusion(); }

event fusion_wrapper::complete_fusion(const property_list &PropList) {
return MImpl->complete_fusion(PropList);
}

} // namespace experimental
} // namespace codeplay
} // namespace ext
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
43 changes: 43 additions & 0 deletions sycl/source/detail/fusion/fusion_wrapper_impl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//==------------ fusion_wrapper.cpp ----------------------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <detail/fusion/fusion_wrapper_impl.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace detail {

fusion_wrapper_impl::fusion_wrapper_impl(
std::shared_ptr<detail::queue_impl> Queue)
: MQueue{std::move(Queue)} {}

std::shared_ptr<detail::queue_impl> fusion_wrapper_impl::get_queue() const {
return MQueue;
}

bool fusion_wrapper_impl::is_in_fusion_mode() const { return false; }

void fusion_wrapper_impl::start_fusion() {
throw sycl::exception(sycl::errc::feature_not_supported,
"Fusion not yet implemented");
}

void fusion_wrapper_impl::cancel_fusion() {
throw sycl::exception(sycl::errc::feature_not_supported,
"Fusion not yet implemented");
}

event fusion_wrapper_impl::complete_fusion(const property_list &PropList) {
(void)PropList;
throw sycl::exception(sycl::errc::feature_not_supported,
"Fusion not yet implemented");
}

} // namespace detail
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
Loading