-
Notifications
You must be signed in to change notification settings - Fork 61
[main] Combine SME slice parameters #225
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
rsandifo-arm
merged 1 commit into
ARM-software:main
from
rsandifo-arm:sme-merge-slice-arguments
Aug 17, 2023
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9001,9 +9001,21 @@ following it. --><span id="__arm_za_disable"></span> | |
|
||
The intrinsics in this section have the following properties in common: | ||
|
||
* Every argument named `tile`, `slice_offset` or `tile_mask` must | ||
be an integer constant expression in the range of the underlying | ||
instruction. | ||
* Every argument named `tile` or `tile_mask` must be an integer constant | ||
expression in the range of the underlying instruction. | ||
|
||
* Some SME instructions identify a slice of ZA using the sum of a 32-bit | ||
general-purpose register and an immediate offset. The intrinsics for | ||
these instructions have a 32-bit argument called `slice`, which is | ||
interpreted as follows: | ||
|
||
* If the intrinsic also has a `vnum` argument, the ZA slice number | ||
is calculated by adding `vnum` to `slice`. Both `slice` and `vnum` | ||
can both be variable. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can both be variable -> can be variable. |
||
|
||
* Otherwise, `slice` specifies the ZA slice number directly; that is, | ||
it represents the sum of the 32-bit register and the immediate | ||
offset. `slice` can be variable. | ||
|
||
* ZA loads and stores do not use typed pointers, since there is | ||
no C or C++ type information associated with the contents of ZA. | ||
|
@@ -9017,74 +9029,85 @@ The intrinsics in this section have the following properties in common: | |
``` c | ||
// Also for _za16, _za32, _za64 and _za128 (with the same prototype). | ||
__attribute__((arm_streaming, arm_shared_za)) | ||
void svld1_hor_za8(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, const void *ptr); | ||
void svld1_hor_za8(uint64_t tile, uint32_t slice, svbool_t pg, | ||
const void *ptr); | ||
|
||
// Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr. | ||
// Synthetic intrinsic: adds vnum to slice and vnum * svcntsb() to the | ||
// address given by ptr. | ||
// | ||
// Also for _za16, _za32, _za64 and _za128 (with the same prototype). | ||
__attribute__((arm_streaming, arm_shared_za)) | ||
void svld1_hor_vnum_za8(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, | ||
void svld1_hor_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg, | ||
const void *ptr, int64_t vnum); | ||
|
||
// Also for _za16, _za32, _za64 and _za128 (with the same prototype). | ||
__attribute__((arm_streaming, arm_shared_za)) | ||
void svld1_ver_za8(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, const void *ptr); | ||
void svld1_ver_za8(uint64_t tile, uint32_t slice, svbool_t pg, | ||
const void *ptr); | ||
|
||
// Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr. | ||
// Synthetic intrinsic: adds vnum to slice and vnum * svcntsb() to the | ||
// address given by ptr. | ||
// | ||
// Also for _za16, _za32, _za64 and _za128 (with the same prototype). | ||
__attribute__((arm_streaming, arm_shared_za)) | ||
void svld1_ver_vnum_za8(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, | ||
void svld1_ver_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg, | ||
const void *ptr, int64_t vnum); | ||
``` | ||
|
||
#### LDR | ||
|
||
``` c | ||
// slice_offset fills the role of the usual vnum parameter. | ||
__attribute__((arm_streaming_compatible, arm_shared_za)) | ||
void svldr_vnum_za(uint32_t slice_base, uint64_t slice_offset, | ||
const void *ptr); | ||
void svldr_za(uint32_t slice, const void *ptr); | ||
|
||
// Adds vnum to slice and vnum * svcntsb() to the address given by ptr. | ||
// This can be done in a single instruction if vnum is a constant in the | ||
// range [0, 15]. The intrinsic is synthetic for other vnum parameters. | ||
__attribute__((arm_streaming_compatible, arm_shared_za)) | ||
void svldr_vnum_za(uint32_t slice, const void *ptr, int64_t vnum); | ||
rsandifo-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
``` | ||
|
||
#### ST1B, ST1H, ST1W, ST1D, ST1Q | ||
|
||
``` c | ||
// Also for _za16, _za32, _za64 and _za128 (with the same prototype). | ||
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) | ||
void svst1_hor_za8(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, | ||
void svst1_hor_za8(uint64_t tile, uint32_t slice, svbool_t pg, | ||
void *ptr); | ||
|
||
// Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr. | ||
// Synthetic intrinsic: adds vnum to slice and vnum * svcntsb() to the | ||
// address given by ptr. | ||
// | ||
// Also for _za16, _za32, _za64 and _za128 (with the same prototype). | ||
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) | ||
void svst1_hor_vnum_za8(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, | ||
void svst1_hor_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg, | ||
void *ptr, int64_t vnum); | ||
|
||
// Also for _za16, _za32, _za64 and _za128 (with the same prototype). | ||
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) | ||
void svst1_ver_za8(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, | ||
void svst1_ver_za8(uint64_t tile, uint32_t slice, svbool_t pg, | ||
void *ptr); | ||
|
||
// Synthetic intrinsic: adds vnum * svcntsb() to the address given by ptr. | ||
// Synthetic intrinsic: adds vnum to slice and vnum * svcntsb() to the | ||
// address given by ptr. | ||
// | ||
// Also for _za16, _za32, _za64 and _za128 (with the same prototype). | ||
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) | ||
void svst1_ver_vnum_za8(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, | ||
void svst1_ver_vnum_za8(uint64_t tile, uint32_t slice, svbool_t pg, | ||
void *ptr, int64_t vnum); | ||
``` | ||
|
||
#### STR | ||
|
||
``` c | ||
// slice_offset fills the role of the usual vnum parameter. | ||
__attribute__((arm_streaming_compatible, arm_shared_za, arm_preserves_za)) | ||
void svstr_vnum_za(uint32_t slice_base, uint64_t slice_offset, void *ptr); | ||
void svstr_za(uint32_t slice, void *ptr); | ||
|
||
// Adds vnum to slice and vnum * svcntsb() to the address given by ptr. | ||
// This can be done in a single instruction if vnum is a constant in the | ||
// range [0, 15]. The intrinsic is synthetic for other vnum parameters. | ||
__attribute__((arm_streaming_compatible, arm_shared_za, arm_preserves_za)) | ||
void svstr_vnum_za(uint32_t slice, void *ptr, int64_t vnum); | ||
``` | ||
|
||
#### MOVA | ||
|
@@ -9098,32 +9121,27 @@ parameter both have type `svuint8_t`. | |
// And similarly for u8. | ||
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) | ||
svint8_t svread_hor_za8[_s8]_m(svint8_t zd, svbool_t pg, | ||
uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset); | ||
uint64_t tile, uint32_t slice); | ||
|
||
// And similarly for u16, bf16 and f16. | ||
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) | ||
svint16_t svread_hor_za16[_s16]_m(svint16_t zd, svbool_t pg, | ||
uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset); | ||
uint64_t tile, uint32_t slice); | ||
|
||
// And similarly for u32 and f32. | ||
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) | ||
svint32_t svread_hor_za32[_s32]_m(svint32_t zd, svbool_t pg, | ||
uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset); | ||
uint64_t tile, uint32_t slice); | ||
|
||
// And similarly for u64 and f64. | ||
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) | ||
svint64_t svread_hor_za64[_s64]_m(svint64_t zd, svbool_t pg, | ||
uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset); | ||
uint64_t tile, uint32_t slice); | ||
|
||
// And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64 | ||
__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) | ||
svint8_t svread_hor_za128[_s8]_m(svint8_t zd, svbool_t pg, | ||
uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset); | ||
uint64_t tile, uint32_t slice); | ||
``` | ||
|
||
Replacing `_hor` with `_ver` gives the associated vertical forms. | ||
|
@@ -9135,32 +9153,27 @@ the `zn` parameter to the `_u8` intrinsic has type `svuint8_t`. | |
``` c | ||
// And similarly for u8. | ||
__attribute__((arm_streaming, arm_shared_za)) | ||
void svwrite_hor_za8[_s8]_m(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, | ||
void svwrite_hor_za8[_s8]_m(uint64_t tile, uint32_t slice, svbool_t pg, | ||
svint8_t zn); | ||
|
||
// And similarly for u16, bf16 and f16. | ||
__attribute__((arm_streaming, arm_shared_za)) | ||
void svwrite_hor_za16[_s16]_m(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, | ||
void svwrite_hor_za16[_s16]_m(uint64_t tile, uint32_t slice, svbool_t pg, | ||
svint16_t zn); | ||
|
||
// And similarly for u32 and f32. | ||
__attribute__((arm_streaming, arm_shared_za)) | ||
void svwrite_hor_za32[_s32]_m(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, | ||
void svwrite_hor_za32[_s32]_m(uint64_t tile, uint32_t slice, svbool_t pg, | ||
svint32_t zn); | ||
|
||
// And similarly for u64 and f64. | ||
__attribute__((arm_streaming, arm_shared_za)) | ||
void svwrite_hor_za64[_s64]_m(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, | ||
void svwrite_hor_za64[_s64]_m(uint64_t tile, uint32_t slice, svbool_t pg, | ||
svint64_t zn); | ||
|
||
// And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64 | ||
__attribute__((arm_streaming, arm_shared_za)) | ||
void svwrite_hor_za128[_s8]_m(uint64_t tile, uint32_t slice_base, | ||
uint64_t slice_offset, svbool_t pg, | ||
void svwrite_hor_za128[_s8]_m(uint64_t tile, uint32_t slice, svbool_t pg, | ||
svint8_t zn); | ||
``` | ||
|
||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be consistent, change "called" to "named" - as you did in the opening paragraph