|
39 | 39 | #include <syclcompat/math.hpp>
|
40 | 40 | #include <syclcompat/memory.hpp>
|
41 | 41 |
|
| 42 | +// TODO: Remove these function definitions once they exist in the DPC++ compiler |
| 43 | +#if defined(__SYCL_DEVICE_ONLY__) && defined(__INTEL_LLVM_COMPILER) |
| 44 | +template <typename T> |
| 45 | +__SYCL_CONVERGENT__ extern SYCL_EXTERNAL __SYCL_EXPORT |
| 46 | + __attribute__((noduplicate)) T |
| 47 | + __spirv_GroupNonUniformShuffle(__spv::Scope::Flag, T, unsigned) noexcept; |
| 48 | + |
| 49 | +template <typename T> |
| 50 | +__SYCL_CONVERGENT__ extern SYCL_EXTERNAL __SYCL_EXPORT |
| 51 | + __attribute__((noduplicate)) T |
| 52 | + __spirv_GroupNonUniformShuffleDown(__spv::Scope::Flag, T, |
| 53 | + unsigned) noexcept; |
| 54 | + |
| 55 | +template <typename T> |
| 56 | +__SYCL_CONVERGENT__ extern SYCL_EXTERNAL __SYCL_EXPORT |
| 57 | + __attribute__((noduplicate)) T |
| 58 | + __spirv_GroupNonUniformShuffleUp(__spv::Scope::Flag, T, unsigned) noexcept; |
| 59 | +#endif |
| 60 | + |
42 | 61 | namespace syclcompat {
|
43 | 62 |
|
44 | 63 | namespace detail {
|
@@ -269,6 +288,196 @@ T permute_sub_group_by_xor(sycl::sub_group g, T x, unsigned int mask,
|
269 | 288 | : id);
|
270 | 289 | }
|
271 | 290 |
|
| 291 | +namespace experimental { |
| 292 | +/// Masked version of select_from_sub_group, which execute masked sub-group |
| 293 | +/// operation. The parameter member_mask indicating the work-items participating |
| 294 | +/// the call. Whether the n-th bit is set to 1 representing whether the |
| 295 | +/// work-item with id n is participating the call. All work-items named in |
| 296 | +/// member_mask must be executed with the same member_mask, or the result is |
| 297 | +/// undefined. |
| 298 | +/// \tparam T Input value type |
| 299 | +/// \param [in] member_mask Input mask |
| 300 | +/// \param [in] g Input sub_group |
| 301 | +/// \param [in] x Input value |
| 302 | +/// \param [in] remote_local_id Input source work item id |
| 303 | +/// \param [in] logical_sub_group_size Input logical sub_group size |
| 304 | +/// \returns The result |
| 305 | +template <typename T> |
| 306 | +T select_from_sub_group(unsigned int member_mask, sycl::sub_group g, T x, |
| 307 | + int remote_local_id, int logical_sub_group_size = 32) { |
| 308 | + unsigned int start_index = |
| 309 | + g.get_local_linear_id() / logical_sub_group_size * logical_sub_group_size; |
| 310 | + unsigned logical_remote_id = |
| 311 | + start_index + remote_local_id % logical_sub_group_size; |
| 312 | +#if defined(__SYCL_DEVICE_ONLY__) && defined(__INTEL_LLVM_COMPILER) |
| 313 | +#if defined(__SPIR__) |
| 314 | + return __spirv_GroupNonUniformShuffle(__spv::Scope::Subgroup, x, |
| 315 | + logical_remote_id); |
| 316 | +#else |
| 317 | + // TODO: Check |
| 318 | + // https://github.com/intel/llvm/pull/12972#issuecomment-2034218881 for the |
| 319 | + // __NVPTX__ implementation |
| 320 | + throw sycl::exception(sycl::errc::runtime, |
| 321 | + "[SYCLcompat] Masked version of select_from_sub_group " |
| 322 | + "only supports SPIR-V backends."); |
| 323 | +#endif // __SPIR__ |
| 324 | +#else |
| 325 | + (void)g; |
| 326 | + (void)x; |
| 327 | + (void)remote_local_id; |
| 328 | + (void)logical_sub_group_size; |
| 329 | + (void)member_mask; |
| 330 | + throw sycl::exception( |
| 331 | + sycl::errc::runtime, |
| 332 | + "[SYCLcompat] Masked version of select_from_sub_group not " |
| 333 | + "supported on host device and none intel compiler."); |
| 334 | +#endif // __SYCL_DEVICE_ONLY__ && __INTEL_LLVM_COMPILER |
| 335 | +} |
| 336 | + |
| 337 | +/// Masked version of shift_sub_group_left, which execute masked sub-group |
| 338 | +/// operation. The parameter member_mask indicating the work-items participating |
| 339 | +/// the call. Whether the n-th bit is set to 1 representing whether the |
| 340 | +/// work-item with id n is participating the call. All work-items named in |
| 341 | +/// member_mask must be executed with the same member_mask, or the result is |
| 342 | +/// undefined. |
| 343 | +/// \tparam T Input value type |
| 344 | +/// \param [in] member_mask Input mask |
| 345 | +/// \param [in] g Input sub_group |
| 346 | +/// \param [in] x Input value |
| 347 | +/// \param [in] delta Input delta |
| 348 | +/// \param [in] logical_sub_group_size Input logical sub_group size |
| 349 | +/// \returns The result |
| 350 | +template <typename T> |
| 351 | +T shift_sub_group_left(unsigned int member_mask, sycl::sub_group g, T x, |
| 352 | + unsigned int delta, int logical_sub_group_size = 32) { |
| 353 | + unsigned int id = g.get_local_linear_id(); |
| 354 | + unsigned int end_index = |
| 355 | + (id / logical_sub_group_size + 1) * logical_sub_group_size; |
| 356 | +#if defined(__SYCL_DEVICE_ONLY__) && defined(__INTEL_LLVM_COMPILER) |
| 357 | +#if defined(__SPIR__) |
| 358 | + T result = |
| 359 | + __spirv_GroupNonUniformShuffleDown(__spv::Scope::Subgroup, x, delta); |
| 360 | + if ((id + delta) >= end_index) { |
| 361 | + result = x; |
| 362 | + } |
| 363 | + return result; |
| 364 | +#else |
| 365 | + // TODO: Check |
| 366 | + // https://github.com/intel/llvm/pull/12972#issuecomment-2034218881 for the |
| 367 | + // __NVPTX__ implementation |
| 368 | + throw sycl::exception(sycl::errc::runtime, |
| 369 | + "[SYCLcompat] Masked version of shift_sub_group_left " |
| 370 | + "only supports SPIR-V backends."); |
| 371 | +#endif // __SPIR__ |
| 372 | +#else |
| 373 | + (void)g; |
| 374 | + (void)x; |
| 375 | + (void)delta; |
| 376 | + (void)logical_sub_group_size; |
| 377 | + (void)member_mask; |
| 378 | + throw sycl::exception( |
| 379 | + sycl::errc::runtime, |
| 380 | + "[SYCLcompat] Masked version of select_from_sub_group not " |
| 381 | + "supported on host device and none intel compiler."); |
| 382 | +#endif // __SYCL_DEVICE_ONLY__ && __INTEL_LLVM_COMPILER |
| 383 | +} |
| 384 | + |
| 385 | +/// Masked version of shift_sub_group_right, which execute masked sub-group |
| 386 | +/// operation. The parameter member_mask indicating the work-items participating |
| 387 | +/// the call. Whether the n-th bit is set to 1 representing whether the |
| 388 | +/// work-item with id n is participating the call. All work-items named in |
| 389 | +/// member_mask must be executed with the same member_mask, or the result is |
| 390 | +/// undefined. |
| 391 | +/// \tparam T Input value type |
| 392 | +/// \param [in] member_mask Input mask |
| 393 | +/// \param [in] g Input sub_group |
| 394 | +/// \param [in] x Input value |
| 395 | +/// \param [in] delta Input delta |
| 396 | +/// \param [in] logical_sub_group_size Input logical sub_group size |
| 397 | +/// \returns The result |
| 398 | +template <typename T> |
| 399 | +T shift_sub_group_right(unsigned int member_mask, sycl::sub_group g, T x, |
| 400 | + unsigned int delta, int logical_sub_group_size = 32) { |
| 401 | + unsigned int id = g.get_local_linear_id(); |
| 402 | + unsigned int start_index = |
| 403 | + id / logical_sub_group_size * logical_sub_group_size; |
| 404 | +#if defined(__SYCL_DEVICE_ONLY__) && defined(__INTEL_LLVM_COMPILER) |
| 405 | +#if defined(__SPIR__) |
| 406 | + T result = __spirv_GroupNonUniformShuffleUp(__spv::Scope::Subgroup, x, delta); |
| 407 | + if ((id - start_index) < delta) { |
| 408 | + result = x; |
| 409 | + } |
| 410 | + return result; |
| 411 | +#else |
| 412 | + // TODO: Check |
| 413 | + // https://github.com/intel/llvm/pull/12972#issuecomment-2034218881 for the |
| 414 | + // __NVPTX__ implementation |
| 415 | + throw sycl::exception(sycl::errc::runtime, |
| 416 | + "Masked version of shift_sub_group_right " |
| 417 | + "only supports SPIR-V backends."); |
| 418 | +#endif // __SPIR__ |
| 419 | +#else |
| 420 | + (void)g; |
| 421 | + (void)x; |
| 422 | + (void)delta; |
| 423 | + (void)logical_sub_group_size; |
| 424 | + (void)member_mask; |
| 425 | + throw sycl::exception(sycl::errc::runtime, |
| 426 | + "Masked version of select_from_sub_group not " |
| 427 | + "supported on host device and none intel compiler."); |
| 428 | +#endif // __SYCL_DEVICE_ONLY && __INTEL_LLVM_COMPILER |
| 429 | +} |
| 430 | + |
| 431 | +/// Masked version of permute_sub_group_by_xor, which execute masked sub-group |
| 432 | +/// operation. The parameter member_mask indicating the work-items participating |
| 433 | +/// the call. Whether the n-th bit is set to 1 representing whether the |
| 434 | +/// work-item with id n is participating the call. All work-items named in |
| 435 | +/// member_mask must be executed with the same member_mask, or the result is |
| 436 | +/// undefined. |
| 437 | +/// \tparam T Input value type |
| 438 | +/// \param [in] member_mask Input mask |
| 439 | +/// \param [in] g Input sub_group |
| 440 | +/// \param [in] x Input value |
| 441 | +/// \param [in] mask Input mask |
| 442 | +/// \param [in] logical_sub_group_size Input logical sub_group size |
| 443 | +/// \returns The result |
| 444 | +template <typename T> |
| 445 | +T permute_sub_group_by_xor(unsigned int member_mask, sycl::sub_group g, T x, |
| 446 | + unsigned int mask, int logical_sub_group_size = 32) { |
| 447 | + unsigned int id = g.get_local_linear_id(); |
| 448 | + unsigned int start_index = |
| 449 | + id / logical_sub_group_size * logical_sub_group_size; |
| 450 | + unsigned int target_offset = (id % logical_sub_group_size) ^ mask; |
| 451 | + unsigned logical_remote_id = (target_offset < logical_sub_group_size) |
| 452 | + ? start_index + target_offset |
| 453 | + : id; |
| 454 | +#if defined(__SYCL_DEVICE_ONLY__) && defined(__INTEL_LLVM_COMPILER) |
| 455 | +#if defined(__SPIR__) |
| 456 | + return __spirv_GroupNonUniformShuffle(__spv::Scope::Subgroup, x, |
| 457 | + logical_remote_id); |
| 458 | +#else |
| 459 | + // TODO: Check |
| 460 | + // https://github.com/intel/llvm/pull/12972#issuecomment-2034218881 for the |
| 461 | + // __NVPTX__ implementation |
| 462 | + throw sycl::exception( |
| 463 | + sycl::errc::runtime, |
| 464 | + "[SYCLcompat] Masked version of permute_sub_group_by_xor " |
| 465 | + "only supports SPIR-V backends."); |
| 466 | +#endif // __SPIR__ |
| 467 | +#else |
| 468 | + (void)g; |
| 469 | + (void)x; |
| 470 | + (void)mask; |
| 471 | + (void)logical_sub_group_size; |
| 472 | + (void)member_mask; |
| 473 | + throw sycl::exception( |
| 474 | + sycl::errc::runtime, |
| 475 | + "[SYCLcompat]Masked version of select_from_sub_group not " |
| 476 | + "supported on host device and none intel compiler."); |
| 477 | +#endif // __SYCL_DEVICE_ONLY__ && __INTEL_LLVM_COMPILER |
| 478 | +} |
| 479 | +} // namespace experimental |
| 480 | + |
272 | 481 | /// Inherited from the original SYCLomatic compatibility headers.
|
273 | 482 | /// @return compiler's SYCL version if defined, 202000 otherwise.
|
274 | 483 | inline int get_sycl_language_version() {
|
|
0 commit comments