Skip to content

Commit 107998a

Browse files
[SYCL][Joint Matrix] Add DMR to device_architecture and matrix extensions (#16543)
This patch adds support of Diamond Rapids CPU to `sycl_ext_oneapi_device_architecture` and `sycl_ext_oneapi_matrix` extensions.
1 parent 5515791 commit 107998a

File tree

5 files changed

+46
-7
lines changed

5 files changed

+46
-7
lines changed

sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,7 +1067,8 @@ XMX hardware. Note that these can be returned using
10671067

10681068
==== Intel AMX Supported Combinations
10691069
This is currently available in devices with the architecture
1070-
`architecture::intel_cpu_spr`, and `architecture::intel_cpu_gnr`.
1070+
`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr` and
1071+
`architecture::intel_cpu_dmr`.
10711072
In this architecture's implementation, the type of the C matrix must
10721073
be the same as the type of the D matrix. Therefore, that common type
10731074
is shown in a single column in the table below.
@@ -1078,21 +1079,29 @@ is shown in a single column in the table below.
10781079
| `matrix_type::uint8` | `matrix_type::uint8` |
10791080
`matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64
10801081
|`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr`
1082+
, `architecture::intel_cpu_dmr`
10811083
| `matrix_type::uint8` | `matrix_type::sint8` |
10821084
`matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64
10831085
|`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr`
1086+
, `architecture::intel_cpu_dmr`
10841087
| `matrix_type::sint8` | `matrix_type::uint8` |
10851088
`matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64
10861089
|`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr`
1090+
, `architecture::intel_cpu_dmr`
10871091
| `matrix_type::sint8` | `matrix_type::sint8` |
10881092
`matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64
10891093
|`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr`
1094+
, `architecture::intel_cpu_dmr`
10901095
| `matrix_type::bf16` | `matrix_type::bf16` |
10911096
`matrix_type::fp32` | +<=+ 16 | +<=+ 16 | +<=+ 32
10921097
|`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr`
1098+
, `architecture::intel_cpu_dmr`
10931099
| `matrix_type::fp16` | `matrix_type::fp16` |
10941100
`matrix_type::fp32` | +<=+ 16 | +<=+ 16 | +<=+ 32
1095-
|`architecture::intel_cpu_gnr`
1101+
|`architecture::intel_cpu_gnr`, `architecture::intel_cpu_dmr`
1102+
| `matrix_type::tf32` | `matrix_type::tf32` |
1103+
`matrix_type::fp32` | +<=+ 16 | +<=+ 16 | +<=+ 16
1104+
|`architecture::intel_cpu_dmr`
10961105
|======================
10971106

10981107
==== Intel XMX Supported Combinations

sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,17 @@ Intel Xeon processor codenamed Granite Rapids.
163163
The utility of this enumeration is currently limited.
164164
See the section "Limitations with the experimental version" for details.
165165

166+
a|
167+
[source]
168+
----
169+
intel_cpu_dmr
170+
----
171+
|-
172+
|
173+
Intel Xeon processor codenamed Diamond Rapids.
174+
The utility of this enumeration is currently limited.
175+
See the section "Limitations with the experimental version" for details.
176+
166177
3+^|*Intel GPU family*
167178

168179
a|
@@ -1130,7 +1141,7 @@ option.
11301141
These are the target names of the form "intel_gpu_*", "nvidia_gpu_*", or
11311142
"amd_gpu_*".
11321143

1133-
The architecture enumerations `intel_cpu_spr` and `intel_cpu_gnr` do
1144+
The architecture enumerations `intel_cpu_spr`, `intel_cpu_gnr` and `intel_cpu_dmr` do
11341145
not currently work with any of the APIs described in this extension.
11351146
They cannot be used with the `if_architecture_is` function, the
11361147
`device::ext_oneapi_architecture_is` function, or the

sycl/include/sycl/ext/oneapi/experimental/device_architecture.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ __SYCL_ARCHITECTURE(unknown, 0x9900000000000000)
3030
__SYCL_ARCHITECTURE(x86_64, 0x0300000000000000)
3131
__SYCL_ARCHITECTURE(intel_cpu_spr, 0x0300000000000800)
3232
__SYCL_ARCHITECTURE(intel_cpu_gnr, 0x0300000000000900)
33+
__SYCL_ARCHITECTURE(intel_cpu_dmr, 0x0300000000001000)
3334
//
3435
// Intel GPU architectures
3536
//

sycl/source/detail/device_impl.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -674,10 +674,10 @@ bool device_impl::has(aspect Aspect) const {
674674
using arch = sycl::ext::oneapi::experimental::architecture;
675675
const arch supported_archs[] = {
676676
arch::intel_cpu_spr, arch::intel_cpu_gnr,
677-
arch::intel_gpu_pvc, arch::intel_gpu_dg2_g10,
678-
arch::intel_gpu_dg2_g11, arch::intel_gpu_dg2_g12,
679-
arch::intel_gpu_bmg_g21, arch::intel_gpu_lnl_m,
680-
arch::intel_gpu_arl_h,
677+
arch::intel_cpu_dmr, arch::intel_gpu_pvc,
678+
arch::intel_gpu_dg2_g10, arch::intel_gpu_dg2_g11,
679+
arch::intel_gpu_dg2_g12, arch::intel_gpu_bmg_g21,
680+
arch::intel_gpu_lnl_m, arch::intel_gpu_arl_h,
681681
};
682682
try {
683683
return std::any_of(

sycl/source/detail/device_info.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,7 @@ constexpr std::pair<const int, oneapi_exp_arch> IntelGPUArchitectures[] = {
722722
constexpr std::pair<const int, oneapi_exp_arch> IntelCPUArchitectures[] = {
723723
{8, oneapi_exp_arch::intel_cpu_spr},
724724
{9, oneapi_exp_arch::intel_cpu_gnr},
725+
{10, oneapi_exp_arch::intel_cpu_dmr},
725726
};
726727

727728
template <>
@@ -843,6 +844,23 @@ struct get_device_info_impl<
843844
{16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16,
844845
matrix_type::fp32, matrix_type::fp32},
845846
};
847+
else if (architecture::intel_cpu_dmr == DeviceArch)
848+
return {
849+
{16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
850+
matrix_type::sint32, matrix_type::sint32},
851+
{16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
852+
matrix_type::sint32, matrix_type::sint32},
853+
{16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
854+
matrix_type::sint32, matrix_type::sint32},
855+
{16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
856+
matrix_type::sint32, matrix_type::sint32},
857+
{16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
858+
matrix_type::fp32, matrix_type::fp32},
859+
{16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16,
860+
matrix_type::fp32, matrix_type::fp32},
861+
{16, 16, 16, 0, 0, 0, matrix_type::tf32, matrix_type::tf32,
862+
matrix_type::fp32, matrix_type::fp32},
863+
};
846864
else if ((architecture::intel_gpu_pvc == DeviceArch) ||
847865
(architecture::intel_gpu_bmg_g21 == DeviceArch) ||
848866
(architecture::intel_gpu_lnl_m == DeviceArch)) {

0 commit comments

Comments
 (0)