Skip to content

Commit a0a9861

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents 6f2e7d9 + 91a79d5 commit a0a9861

33 files changed

+663
-457
lines changed

.github/CODEOWNERS

+4-4
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,10 @@ clang/tools/clang-offload-deps/ @sndmitriev @mlychkov @AlexeySachkov
9999
clang/tools/clang-offload-extract/ @sndmitriev @mlychkov @AlexeySachkov
100100

101101
# Explicit SIMD
102-
SYCLLowerIR/ @kbobrovs @DenisBakhvalov
103-
esimd/ @kbobrovs @DenisBakhvalov
104-
sycl/include/sycl/ext/intel/experimental/esimd.hpp @kbobrovs @DenisBakhvalov
105-
sycl/doc/extensions/ExplicitSIMD/ @kbobrovs
102+
SYCLLowerIR/ @kbobrovs @DenisBakhvalov @kychendev
103+
esimd/ @kbobrovs @DenisBakhvalov @kychendev
104+
sycl/include/sycl/ext/intel/experimental/esimd.hpp @kbobrovs @DenisBakhvalov @kychendev
105+
sycl/doc/extensions/ExplicitSIMD/ @kbobrovs @DenisBakhvalov @kychendev
106106

107107
# ITT annotations
108108
llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp @MrSidims @vzakhari

.github/workflows/gh_pages.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
- name: Install deps
1717
run: |
1818
sudo apt-get install -y doxygen graphviz ssh ninja-build
19-
sudo pip3 install 'sphinx==3.0.0' 'recommonmark==0.6.0' 'sphinx_markdown_tables==0.0.12'
19+
sudo pip3 install 'sphinx==4.1.2' 'myst-parser==0.15.1'
2020
- name: Build Docs
2121
run: |
2222
mkdir -p $GITHUB_WORKSPACE/build

libclc/ptx-nvidiacl/libspirv/group/collectives.cl

+22-22
Original file line numberDiff line numberDiff line change
@@ -211,17 +211,17 @@ __CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, half, 0)
211211
__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0)
212212
__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0)
213213

214-
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, char, 0)
215-
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uchar, 0)
216-
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, short, 0)
217-
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ushort, 0)
218-
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, int, 0)
219-
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uint, 0)
220-
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, long, 0)
221-
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ulong, 0)
222-
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, half, 0)
223-
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, float, 0)
224-
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, double, 0)
214+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, char, 1)
215+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uchar, 1)
216+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, short, 1)
217+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ushort, 1)
218+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, int, 1)
219+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uint, 1)
220+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, long, 1)
221+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ulong, 1)
222+
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, half, 1)
223+
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, float, 1)
224+
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, double, 1)
225225

226226
__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX)
227227
__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX)
@@ -332,17 +332,17 @@ __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0)
332332
__CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0)
333333

334334
// There is no Mul group op in SPIR-V, use non-uniform variant instead.
335-
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, char, 0)
336-
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uchar, 0)
337-
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, short, 0)
338-
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ushort, 0)
339-
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, int, 0)
340-
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uint, 0)
341-
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, long, 0)
342-
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ulong, 0)
343-
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, half, 0)
344-
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, float, 0)
345-
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, double, 0)
335+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, char, 1)
336+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uchar, 1)
337+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, short, 1)
338+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ushort, 1)
339+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, int, 1)
340+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uint, 1)
341+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, long, 1)
342+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ulong, 1)
343+
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, half, 1)
344+
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, float, 1)
345+
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, double, 1)
346346

347347
__CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX)
348348
__CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX)

sycl/doc/PreprocessorMacros.md

+24-24
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,38 @@
22

33
This file describes macros that have effect on SYCL compiler and run-time.
44

5-
### `RESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR`
5+
- **RESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR**
66

7-
The spec assumes that the SYCL implementation does address space deduction.
8-
However, for our implementation, the deduction is performed in the middle end,
9-
where it's hard to provide user friendly diagnositcs.
10-
Due to these problems writing to raw pointers obtained from `constant_ptr` is
11-
not diagnosed now.
12-
The user can enable diagnostics upon writing to such pointers via enabling the
13-
`RESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR` macro.
14-
This allows `constant_ptr` to use constant pointers as underlying
15-
pointer types. Thus, conversions from `constant_ptr` to raw pointers will return
16-
constant pointers and writing to const pointers will be diagnosed by the
17-
front-end.
18-
This behavior is not following the SYCL spec since `constant_ptr` conversions to
19-
the underlying pointer types return pointers without any additional qualifiers
20-
so it's disabled by default.
7+
The spec assumes that the SYCL implementation does address space deduction.
8+
However, for our implementation, the deduction is performed in the middle end,
9+
where it's hard to provide user friendly diagnositcs.
10+
Due to these problems writing to raw pointers obtained from `constant_ptr` is
11+
not diagnosed now.
12+
The user can enable diagnostics upon writing to such pointers via enabling the
13+
`RESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR` macro.
14+
This allows `constant_ptr` to use constant pointers as underlying
15+
pointer types. Thus, conversions from `constant_ptr` to raw pointers will return
16+
constant pointers and writing to const pointers will be diagnosed by the
17+
front-end.
18+
This behavior is not following the SYCL spec since `constant_ptr` conversions to
19+
the underlying pointer types return pointers without any additional qualifiers
20+
so it's disabled by default.
2121

22-
### `DISABLE_SYCL_INSTRUMENTATION_METADATA`
22+
- **DISABLE_SYCL_INSTRUMENTATION_METADATA**
2323

24-
This macro is used to disable passing of code location information to public
25-
methods.
24+
This macro is used to disable passing of code location information to public
25+
methods.
2626

27-
### `SYCL2020_DISABLE_DEPRECATION_WARNINGS`
27+
- **SYCL2020_DISABLE_DEPRECATION_WARNINGS**
2828

29-
Disables warnings coming from usage of SYCL 1.2.1 APIs, that are deprecated in
30-
SYCL 2020.
29+
Disables warnings coming from usage of SYCL 1.2.1 APIs, that are deprecated in
30+
SYCL 2020.
3131

32-
### `SYCL_DISABLE_DEPRECATION_WARNINGS`
32+
- **SYCL_DISABLE_DEPRECATION_WARNINGS**
3333

34-
Disables all deprecation warnings in SYCL runtime headers, including SYCL 1.2.1 deprecations.
34+
Disables all deprecation warnings in SYCL runtime headers, including SYCL 1.2.1 deprecations.
3535

36-
### Version macros
36+
## Version macros
3737

3838
- `__LIBSYCL_MAJOR_VERSION` is set to SYCL runtime library major version.
3939
- `__LIBSYCL_MINOR_VERSION` is set to SYCL runtime library minor version.

sycl/doc/conf.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@
3232
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
3333
# ones.
3434
extensions = [
35-
'recommonmark',
36-
'sphinx_markdown_tables'
35+
'myst_parser'
3736
]
3837

3938
# The name of the Pygments (syntax highlighting) style to use.
@@ -49,13 +48,11 @@
4948
# Extensions are mostly in asciidoc which has poor support in Sphinx
5049
exclude_patterns = ['extensions/*']
5150

52-
source_parsers = {'.md': 'recommonmark.parser.CommonMarkParser'}
53-
5451
suppress_warnings = [ 'misc.highlighting_failure' ]
5552

5653
def on_missing_reference(app, env, node, contnode):
5754
if node['reftype'] == 'any':
58-
contnode['refuri'] = "https://github.com/intel/llvm/tree/sycl/sycl/doc/" + contnode['refuri']
55+
contnode['refuri'] = "https://github.com/intel/llvm/tree/sycl/sycl/doc/" + node['reftarget']
5956
return contnode
6057
else:
6158
return None

sycl/include/CL/sycl/buffer.hpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,10 @@ class buffer {
263263
size_t get_count() const { return size(); }
264264
size_t size() const noexcept { return Range.size(); }
265265

266-
size_t get_size() const { return size() * sizeof(T); }
266+
__SYCL2020_DEPRECATED(
267+
"get_size() is deprecated, please use byte_size() instead")
268+
size_t get_size() const { return byte_size(); }
269+
size_t byte_size() const noexcept { return size() * sizeof(T); }
267270

268271
AllocatorT get_allocator() const {
269272
return impl->template get_allocator<AllocatorT>();
@@ -343,7 +346,7 @@ class buffer {
343346
template <typename ReinterpretT, int ReinterpretDim>
344347
buffer<ReinterpretT, ReinterpretDim, AllocatorT>
345348
reinterpret(range<ReinterpretDim> reinterpretRange) const {
346-
if (sizeof(ReinterpretT) * reinterpretRange.size() != get_size())
349+
if (sizeof(ReinterpretT) * reinterpretRange.size() != byte_size())
347350
throw cl::sycl::invalid_object_error(
348351
"Total size in bytes represented by the type and range of the "
349352
"reinterpreted SYCL buffer does not equal the total size in bytes "
@@ -369,7 +372,7 @@ class buffer {
369372
(sizeof(ReinterpretT) != sizeof(T))),
370373
buffer<ReinterpretT, ReinterpretDim, AllocatorT>>::type
371374
reinterpret() const {
372-
long sz = get_size(); // TODO: switch to byte_size() once implemented
375+
long sz = byte_size();
373376
if (sz % sizeof(ReinterpretT) != 0)
374377
throw cl::sycl::invalid_object_error(
375378
"Total byte size of buffer is not evenly divisible by the size of "

sycl/include/CL/sycl/group_algorithm.hpp

+18-12
Original file line numberDiff line numberDiff line change
@@ -624,14 +624,15 @@ joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init,
624624
const ptrdiff_t &divisor) -> ptrdiff_t {
625625
return ((v + divisor - 1) / divisor) * divisor;
626626
};
627-
typename InPtr::element_type x;
628-
typename OutPtr::element_type carry = init;
627+
typename std::remove_const<typename detail::remove_pointer<InPtr>::type>::type
628+
x;
629+
typename detail::remove_pointer<OutPtr>::type carry = init;
629630
for (ptrdiff_t chunk = 0; chunk < roundup(N, stride); chunk += stride) {
630631
ptrdiff_t i = chunk + offset;
631632
if (i < N) {
632633
x = first[i];
633634
}
634-
typename OutPtr::element_type out =
635+
typename detail::remove_pointer<OutPtr>::type out =
635636
exclusive_scan_over_group(g, x, carry, binary_op);
636637
if (i < N) {
637638
result[i] = out;
@@ -664,13 +665,15 @@ joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result,
664665
// FIXME: Do not special-case for half precision
665666
static_assert(
666667
std::is_same<decltype(binary_op(*first, *first)),
667-
typename OutPtr::element_type>::value ||
668-
(std::is_same<typename OutPtr::element_type, half>::value &&
668+
typename detail::remove_pointer<OutPtr>::type>::value ||
669+
(std::is_same<typename detail::remove_pointer<OutPtr>::type,
670+
half>::value &&
669671
std::is_same<decltype(binary_op(*first, *first)), float>::value),
670672
"Result type of binary_op must match scan accumulation type.");
671673
return joint_exclusive_scan(
672674
g, first, last, result,
673-
sycl::known_identity_v<BinaryOperation, typename OutPtr::element_type>,
675+
sycl::known_identity_v<BinaryOperation,
676+
typename detail::remove_pointer<OutPtr>::type>,
674677
binary_op);
675678
}
676679

@@ -791,14 +794,15 @@ joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result,
791794
const ptrdiff_t &divisor) -> ptrdiff_t {
792795
return ((v + divisor - 1) / divisor) * divisor;
793796
};
794-
typename InPtr::element_type x;
795-
typename OutPtr::element_type carry = init;
797+
typename std::remove_const<typename detail::remove_pointer<InPtr>::type>::type
798+
x;
799+
typename detail::remove_pointer<OutPtr>::type carry = init;
796800
for (ptrdiff_t chunk = 0; chunk < roundup(N, stride); chunk += stride) {
797801
ptrdiff_t i = chunk + offset;
798802
if (i < N) {
799803
x = first[i];
800804
}
801-
typename OutPtr::element_type out =
805+
typename detail::remove_pointer<OutPtr>::type out =
802806
inclusive_scan_over_group(g, x, binary_op, carry);
803807
if (i < N) {
804808
result[i] = out;
@@ -830,13 +834,15 @@ joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result,
830834
// FIXME: Do not special-case for half precision
831835
static_assert(
832836
std::is_same<decltype(binary_op(*first, *first)),
833-
typename OutPtr::element_type>::value ||
834-
(std::is_same<typename OutPtr::element_type, half>::value &&
837+
typename detail::remove_pointer<OutPtr>::type>::value ||
838+
(std::is_same<typename detail::remove_pointer<OutPtr>::type,
839+
half>::value &&
835840
std::is_same<decltype(binary_op(*first, *first)), float>::value),
836841
"Result type of binary_op must match scan accumulation type.");
837842
return joint_inclusive_scan(
838843
g, first, last, result, binary_op,
839-
sycl::known_identity_v<BinaryOperation, typename OutPtr::element_type>);
844+
sycl::known_identity_v<BinaryOperation,
845+
typename detail::remove_pointer<OutPtr>::type>);
840846
}
841847

842848
namespace detail {

sycl/include/CL/sycl/types.hpp

+13-2
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,10 @@ template <typename Type, int NumElements> class vec {
810810
__SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead")
811811
static constexpr size_t get_count() { return size(); }
812812
static constexpr size_t size() noexcept { return NumElements; }
813-
static constexpr size_t get_size() { return sizeof(m_Data); }
813+
__SYCL2020_DEPRECATED(
814+
"get_size() is deprecated, please use byte_size() instead")
815+
static constexpr size_t get_size() { return byte_size(); }
816+
static constexpr size_t byte_size() { return sizeof(m_Data); }
814817

815818
template <typename convertT,
816819
rounding_mode roundingMode = rounding_mode::automatic>
@@ -1408,7 +1411,15 @@ class SwizzleOp {
14081411
__SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead")
14091412
size_t get_count() const { return size(); }
14101413
size_t size() const noexcept { return getNumElements(); }
1411-
template <int Num = getNumElements()> size_t get_size() const {
1414+
1415+
template <int Num = getNumElements()>
1416+
__SYCL2020_DEPRECATED(
1417+
"get_size() is deprecated, please use byte_size() instead")
1418+
size_t get_size() const {
1419+
return byte_size<Num>();
1420+
}
1421+
1422+
template <int Num = getNumElements()> size_t byte_size() const noexcept {
14121423
return sizeof(DataT) * (Num == 3 ? 4 : Num);
14131424
}
14141425

sycl/include/sycl/ext/intel/experimental/esimd/detail/region.hpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,7 @@ template <typename T, int SizeY, int StrideY, int SizeX, int StrideX>
5757
using region2d_t = region_base<true, T, SizeY, StrideY, SizeX, StrideX>;
5858

5959
// A region with a single element.
60-
template <bool Is2D, typename T, int StrideY, int StrideX>
61-
using region_base_1 = region_base<Is2D, T, 1, StrideY, 1, StrideX>;
60+
template <typename T> using region_base_1 = region_base<false, T, 1, 0, 1, 0>;
6261

6362
// simd_view forward declaration.
6463
template <typename BaseTy, typename RegionTy> class simd_view;

sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp

+8-3
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ template <typename BaseTy, typename RegionTy> class simd_view_impl {
5959
: M_base(Base), M_region(Region) {}
6060

6161
public:
62-
// Disallow copy and move constructors.
63-
simd_view_impl(const simd_view_impl &Other) = delete;
64-
simd_view_impl(simd_view_impl &&Other) = delete;
62+
// Default copy and move constructors.
63+
simd_view_impl(const simd_view_impl &Other) = default;
64+
simd_view_impl(simd_view_impl &&Other) = default;
6565
/// @}
6666

6767
/// Conversion to simd type.
@@ -80,6 +80,11 @@ template <typename BaseTy, typename RegionTy> class simd_view_impl {
8080
simd_view_impl &operator=(const value_type &Val) { return write(Val); }
8181
/// @}
8282

83+
/// Move assignment operator.
84+
simd_view_impl &operator=(simd_view_impl &&Other) {
85+
return write(Other.read());
86+
}
87+
8388
/// @{
8489
/// Region accessors.
8590
static constexpr bool is1D() { return !ShapeTy::Is_2D; }

sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp

+7-4
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ template <typename Ty, int N> class simd {
139139
}
140140

141141
/// View this simd object in a different element type.
142-
template <typename EltTy> auto bit_cast_view() & {
142+
template <typename EltTy> auto bit_cast_view() &[[clang::lifetimebound]] {
143143
using TopRegionTy = detail::compute_format_type_t<simd, EltTy>;
144144
using RetTy = simd_view<simd, TopRegionTy>;
145145
TopRegionTy R(0);
@@ -153,7 +153,8 @@ template <typename Ty, int N> class simd {
153153
}
154154

155155
/// View as a 2-dimensional simd_view.
156-
template <typename EltTy, int Height, int Width> auto bit_cast_view() & {
156+
template <typename EltTy, int Height, int Width>
157+
auto bit_cast_view() &[[clang::lifetimebound]] {
157158
using TopRegionTy =
158159
detail::compute_format_type_2d_t<simd, EltTy, Height, Width>;
159160
using RetTy = simd_view<simd, TopRegionTy>;
@@ -174,7 +175,8 @@ template <typename Ty, int N> class simd {
174175
/// \param Offset is the starting element offset.
175176
/// \return the representing region object.
176177
template <int Size, int Stride>
177-
simd_view<simd, region1d_t<Ty, Size, Stride>> select(uint16_t Offset = 0) & {
178+
simd_view<simd, region1d_t<Ty, Size, Stride>> select(uint16_t Offset = 0) &[
179+
[clang::lifetimebound]] {
178180
region1d_t<Ty, Size, Stride> Reg(Offset);
179181
return {*this, Reg};
180182
}
@@ -200,7 +202,8 @@ template <typename Ty, int N> class simd {
200202
Ty operator()(int i) const { return data()[i]; }
201203

202204
/// Return writable view of a single element.
203-
simd_view<simd, region1d_t<Ty, 1, 0>> operator[](int i) {
205+
simd_view<simd, region1d_t<Ty, 1, 0>> operator[](int i)
206+
[[clang::lifetimebound]] {
204207
return select<1, 0>(i);
205208
}
206209

0 commit comments

Comments
 (0)