Skip to content

Commit f7400c3

Browse files
committed
Auto merge of rust-lang#3895 - TDecking:gfni, r=RalfJung
Implement LLVM x86 gfni intrinsics
2 parents 6602a23 + d00b754 commit f7400c3

File tree

3 files changed

+722
-0
lines changed

3 files changed

+722
-0
lines changed

src/tools/miri/src/shims/x86/gfni.rs

+196
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
use rustc_span::Symbol;
2+
use rustc_target::spec::abi::Abi;
3+
4+
use crate::*;
5+
6+
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
7+
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
8+
fn emulate_x86_gfni_intrinsic(
9+
&mut self,
10+
link_name: Symbol,
11+
abi: Abi,
12+
args: &[OpTy<'tcx>],
13+
dest: &MPlaceTy<'tcx>,
14+
) -> InterpResult<'tcx, EmulateItemResult> {
15+
let this = self.eval_context_mut();
16+
17+
// Prefix should have already been checked.
18+
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap();
19+
20+
this.expect_target_feature_for_intrinsic(link_name, "gfni")?;
21+
if unprefixed_name.ends_with(".256") {
22+
this.expect_target_feature_for_intrinsic(link_name, "avx")?;
23+
} else if unprefixed_name.ends_with(".512") {
24+
this.expect_target_feature_for_intrinsic(link_name, "avx512f")?;
25+
}
26+
27+
match unprefixed_name {
28+
// Used to implement the `_mm{, 256, 512}_gf2p8affine_epi64_epi8` functions.
29+
// See `affine_transform` for details.
30+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8affine_
31+
"vgf2p8affineqb.128" | "vgf2p8affineqb.256" | "vgf2p8affineqb.512" => {
32+
let [left, right, imm8] =
33+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
34+
affine_transform(this, left, right, imm8, dest, /* inverse */ false)?;
35+
}
36+
// Used to implement the `_mm{, 256, 512}_gf2p8affineinv_epi64_epi8` functions.
37+
// See `affine_transform` for details.
38+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8affineinv
39+
"vgf2p8affineinvqb.128" | "vgf2p8affineinvqb.256" | "vgf2p8affineinvqb.512" => {
40+
let [left, right, imm8] =
41+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
42+
affine_transform(this, left, right, imm8, dest, /* inverse */ true)?;
43+
}
44+
// Used to implement the `_mm{, 256, 512}_gf2p8mul_epi8` functions.
45+
// Multiplies packed 8-bit integers in `left` and `right` in the finite field GF(2^8)
46+
// and store the results in `dst`. The field GF(2^8) is represented in
47+
// polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.
48+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8mul
49+
"vgf2p8mulb.128" | "vgf2p8mulb.256" | "vgf2p8mulb.512" => {
50+
let [left, right] =
51+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
52+
53+
let (left, left_len) = this.project_to_simd(left)?;
54+
let (right, right_len) = this.project_to_simd(right)?;
55+
let (dest, dest_len) = this.project_to_simd(dest)?;
56+
57+
assert_eq!(left_len, right_len);
58+
assert_eq!(dest_len, right_len);
59+
60+
for i in 0..dest_len {
61+
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_u8()?;
62+
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u8()?;
63+
let dest = this.project_index(&dest, i)?;
64+
this.write_scalar(Scalar::from_u8(gf2p8_mul(left, right)), &dest)?;
65+
}
66+
}
67+
_ => return interp_ok(EmulateItemResult::NotSupported),
68+
}
69+
interp_ok(EmulateItemResult::NeedsReturn)
70+
}
71+
}
72+
73+
/// Calculates the affine transformation `right * left + imm8` inside the finite field GF(2^8).
74+
/// `right` is an 8x8 bit matrix, `left` and `imm8` are bit vectors.
75+
/// If `inverse` is set, then the inverse transformation with respect to the reduction polynomial
76+
/// x^8 + x^4 + x^3 + x + 1 is performed instead.
77+
fn affine_transform<'tcx>(
78+
this: &mut MiriInterpCx<'tcx>,
79+
left: &OpTy<'tcx>,
80+
right: &OpTy<'tcx>,
81+
imm8: &OpTy<'tcx>,
82+
dest: &MPlaceTy<'tcx>,
83+
inverse: bool,
84+
) -> InterpResult<'tcx, ()> {
85+
let (left, left_len) = this.project_to_simd(left)?;
86+
let (right, right_len) = this.project_to_simd(right)?;
87+
let (dest, dest_len) = this.project_to_simd(dest)?;
88+
89+
assert_eq!(dest_len, right_len);
90+
assert_eq!(dest_len, left_len);
91+
92+
let imm8 = this.read_scalar(imm8)?.to_u8()?;
93+
94+
// Each 8x8 bit matrix gets multiplied with eight bit vectors.
95+
// Therefore, the iteration is done in chunks of eight.
96+
for i in (0..dest_len).step_by(8) {
97+
// Get the bit matrix.
98+
let mut matrix = [0u8; 8];
99+
for j in 0..8 {
100+
matrix[usize::try_from(j).unwrap()] =
101+
this.read_scalar(&this.project_index(&right, i.wrapping_add(j))?)?.to_u8()?;
102+
}
103+
104+
// Multiply the matrix with the vector and perform the addition.
105+
for j in 0..8 {
106+
let index = i.wrapping_add(j);
107+
let left = this.read_scalar(&this.project_index(&left, index)?)?.to_u8()?;
108+
let left = if inverse { TABLE[usize::from(left)] } else { left };
109+
110+
let mut res = 0;
111+
112+
// Do the matrix multiplication.
113+
for bit in 0u8..8 {
114+
let mut b = matrix[usize::from(bit)] & left;
115+
116+
// Calculate the parity bit.
117+
b = (b & 0b1111) ^ (b >> 4);
118+
b = (b & 0b11) ^ (b >> 2);
119+
b = (b & 0b1) ^ (b >> 1);
120+
121+
res |= b << 7u8.wrapping_sub(bit);
122+
}
123+
124+
// Perform the addition.
125+
res ^= imm8;
126+
127+
let dest = this.project_index(&dest, index)?;
128+
this.write_scalar(Scalar::from_u8(res), &dest)?;
129+
}
130+
}
131+
132+
interp_ok(())
133+
}
134+
135+
/// A lookup table for computing the inverse byte for the inverse affine transformation.
136+
// This is a evaluated at compile time. Trait based conversion is not available.
137+
/// See <https://www.corsix.org/content/galois-field-instructions-2021-cpus> for the
138+
/// definition of `gf_inv` which was used for the creation of this table.
139+
#[allow(clippy::cast_possible_truncation)]
140+
static TABLE: [u8; 256] = {
141+
let mut array = [0; 256];
142+
143+
let mut i = 1;
144+
while i < 256 {
145+
let mut x = i as u8;
146+
let mut y = gf2p8_mul(x, x);
147+
x = y;
148+
let mut j = 2;
149+
while j < 8 {
150+
x = gf2p8_mul(x, x);
151+
y = gf2p8_mul(x, y);
152+
j += 1;
153+
}
154+
array[i] = y;
155+
i += 1;
156+
}
157+
158+
array
159+
};
160+
161+
/// Multiplies packed 8-bit integers in `left` and `right` in the finite field GF(2^8)
162+
/// and store the results in `dst`. The field GF(2^8) is represented in
163+
/// polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.
164+
/// See <https://www.corsix.org/content/galois-field-instructions-2021-cpus> for details.
165+
// This is a const function. Trait based conversion is not available.
166+
#[allow(clippy::cast_possible_truncation)]
167+
const fn gf2p8_mul(left: u8, right: u8) -> u8 {
168+
// This implementation is based on the `gf2p8mul_byte` definition found inside the Intel intrinsics guide.
169+
// See https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8mul
170+
// for more information.
171+
172+
const POLYNOMIAL: u32 = 0x11b;
173+
174+
let left = left as u32;
175+
let right = right as u32;
176+
177+
let mut result = 0u32;
178+
179+
let mut i = 0u32;
180+
while i < 8 {
181+
if left & (1 << i) != 0 {
182+
result ^= right << i;
183+
}
184+
i = i.wrapping_add(1);
185+
}
186+
187+
let mut i = 14u32;
188+
while i >= 8 {
189+
if result & (1 << i) != 0 {
190+
result ^= POLYNOMIAL << i.wrapping_sub(8);
191+
}
192+
i = i.wrapping_sub(1);
193+
}
194+
195+
result as u8
196+
}

src/tools/miri/src/shims/x86/mod.rs

+8
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ mod aesni;
1515
mod avx;
1616
mod avx2;
1717
mod bmi;
18+
mod gfni;
1819
mod sha;
1920
mod sse;
2021
mod sse2;
@@ -106,6 +107,13 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
106107
this, link_name, abi, args, dest,
107108
);
108109
}
110+
// The GFNI extension does not get its own namespace.
111+
// Check for instruction names instead.
112+
name if name.starts_with("vgf2p8affine") || name.starts_with("vgf2p8mulb") => {
113+
return gfni::EvalContextExt::emulate_x86_gfni_intrinsic(
114+
this, link_name, abi, args, dest,
115+
);
116+
}
109117
name if name.starts_with("sha") => {
110118
return sha::EvalContextExt::emulate_x86_sha_intrinsic(
111119
this, link_name, abi, args, dest,

0 commit comments

Comments
 (0)