|
| 1 | +use rustc_span::Symbol; |
| 2 | +use rustc_target::spec::abi::Abi; |
| 3 | + |
| 4 | +use crate::*; |
| 5 | + |
| 6 | +impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} |
| 7 | +pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { |
| 8 | + fn emulate_x86_gfni_intrinsic( |
| 9 | + &mut self, |
| 10 | + link_name: Symbol, |
| 11 | + abi: Abi, |
| 12 | + args: &[OpTy<'tcx>], |
| 13 | + dest: &MPlaceTy<'tcx>, |
| 14 | + ) -> InterpResult<'tcx, EmulateItemResult> { |
| 15 | + let this = self.eval_context_mut(); |
| 16 | + |
| 17 | + // Prefix should have already been checked. |
| 18 | + let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap(); |
| 19 | + |
| 20 | + this.expect_target_feature_for_intrinsic(link_name, "gfni")?; |
| 21 | + if unprefixed_name.ends_with(".256") { |
| 22 | + this.expect_target_feature_for_intrinsic(link_name, "avx")?; |
| 23 | + } else if unprefixed_name.ends_with(".512") { |
| 24 | + this.expect_target_feature_for_intrinsic(link_name, "avx512f")?; |
| 25 | + } |
| 26 | + |
| 27 | + match unprefixed_name { |
| 28 | + // Used to implement the `_mm{, 256, 512}_gf2p8affine_epi64_epi8` functions. |
| 29 | + // See `affine_transform` for details. |
| 30 | + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8affine_ |
| 31 | + "vgf2p8affineqb.128" | "vgf2p8affineqb.256" | "vgf2p8affineqb.512" => { |
| 32 | + let [left, right, imm8] = |
| 33 | + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; |
| 34 | + affine_transform(this, left, right, imm8, dest, /* inverse */ false)?; |
| 35 | + } |
| 36 | + // Used to implement the `_mm{, 256, 512}_gf2p8affineinv_epi64_epi8` functions. |
| 37 | + // See `affine_transform` for details. |
| 38 | + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8affineinv |
| 39 | + "vgf2p8affineinvqb.128" | "vgf2p8affineinvqb.256" | "vgf2p8affineinvqb.512" => { |
| 40 | + let [left, right, imm8] = |
| 41 | + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; |
| 42 | + affine_transform(this, left, right, imm8, dest, /* inverse */ true)?; |
| 43 | + } |
| 44 | + // Used to implement the `_mm{, 256, 512}_gf2p8mul_epi8` functions. |
| 45 | + // Multiplies packed 8-bit integers in `left` and `right` in the finite field GF(2^8) |
| 46 | + // and store the results in `dst`. The field GF(2^8) is represented in |
| 47 | + // polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. |
| 48 | + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8mul |
| 49 | + "vgf2p8mulb.128" | "vgf2p8mulb.256" | "vgf2p8mulb.512" => { |
| 50 | + let [left, right] = |
| 51 | + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; |
| 52 | + |
| 53 | + let (left, left_len) = this.project_to_simd(left)?; |
| 54 | + let (right, right_len) = this.project_to_simd(right)?; |
| 55 | + let (dest, dest_len) = this.project_to_simd(dest)?; |
| 56 | + |
| 57 | + assert_eq!(left_len, right_len); |
| 58 | + assert_eq!(dest_len, right_len); |
| 59 | + |
| 60 | + for i in 0..dest_len { |
| 61 | + let left = this.read_scalar(&this.project_index(&left, i)?)?.to_u8()?; |
| 62 | + let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u8()?; |
| 63 | + let dest = this.project_index(&dest, i)?; |
| 64 | + this.write_scalar(Scalar::from_u8(gf2p8_mul(left, right)), &dest)?; |
| 65 | + } |
| 66 | + } |
| 67 | + _ => return interp_ok(EmulateItemResult::NotSupported), |
| 68 | + } |
| 69 | + interp_ok(EmulateItemResult::NeedsReturn) |
| 70 | + } |
| 71 | +} |
| 72 | + |
| 73 | +/// Calculates the affine transformation `right * left + imm8` inside the finite field GF(2^8). |
| 74 | +/// `right` is an 8x8 bit matrix, `left` and `imm8` are bit vectors. |
| 75 | +/// If `inverse` is set, then the inverse transformation with respect to the reduction polynomial |
| 76 | +/// x^8 + x^4 + x^3 + x + 1 is performed instead. |
| 77 | +fn affine_transform<'tcx>( |
| 78 | + this: &mut MiriInterpCx<'tcx>, |
| 79 | + left: &OpTy<'tcx>, |
| 80 | + right: &OpTy<'tcx>, |
| 81 | + imm8: &OpTy<'tcx>, |
| 82 | + dest: &MPlaceTy<'tcx>, |
| 83 | + inverse: bool, |
| 84 | +) -> InterpResult<'tcx, ()> { |
| 85 | + let (left, left_len) = this.project_to_simd(left)?; |
| 86 | + let (right, right_len) = this.project_to_simd(right)?; |
| 87 | + let (dest, dest_len) = this.project_to_simd(dest)?; |
| 88 | + |
| 89 | + assert_eq!(dest_len, right_len); |
| 90 | + assert_eq!(dest_len, left_len); |
| 91 | + |
| 92 | + let imm8 = this.read_scalar(imm8)?.to_u8()?; |
| 93 | + |
| 94 | + // Each 8x8 bit matrix gets multiplied with eight bit vectors. |
| 95 | + // Therefore, the iteration is done in chunks of eight. |
| 96 | + for i in (0..dest_len).step_by(8) { |
| 97 | + // Get the bit matrix. |
| 98 | + let mut matrix = [0u8; 8]; |
| 99 | + for j in 0..8 { |
| 100 | + matrix[usize::try_from(j).unwrap()] = |
| 101 | + this.read_scalar(&this.project_index(&right, i.wrapping_add(j))?)?.to_u8()?; |
| 102 | + } |
| 103 | + |
| 104 | + // Multiply the matrix with the vector and perform the addition. |
| 105 | + for j in 0..8 { |
| 106 | + let index = i.wrapping_add(j); |
| 107 | + let left = this.read_scalar(&this.project_index(&left, index)?)?.to_u8()?; |
| 108 | + let left = if inverse { TABLE[usize::from(left)] } else { left }; |
| 109 | + |
| 110 | + let mut res = 0; |
| 111 | + |
| 112 | + // Do the matrix multiplication. |
| 113 | + for bit in 0u8..8 { |
| 114 | + let mut b = matrix[usize::from(bit)] & left; |
| 115 | + |
| 116 | + // Calculate the parity bit. |
| 117 | + b = (b & 0b1111) ^ (b >> 4); |
| 118 | + b = (b & 0b11) ^ (b >> 2); |
| 119 | + b = (b & 0b1) ^ (b >> 1); |
| 120 | + |
| 121 | + res |= b << 7u8.wrapping_sub(bit); |
| 122 | + } |
| 123 | + |
| 124 | + // Perform the addition. |
| 125 | + res ^= imm8; |
| 126 | + |
| 127 | + let dest = this.project_index(&dest, index)?; |
| 128 | + this.write_scalar(Scalar::from_u8(res), &dest)?; |
| 129 | + } |
| 130 | + } |
| 131 | + |
| 132 | + interp_ok(()) |
| 133 | +} |
| 134 | + |
| 135 | +/// A lookup table for computing the inverse byte for the inverse affine transformation. |
| 136 | +// This is a evaluated at compile time. Trait based conversion is not available. |
| 137 | +/// See <https://www.corsix.org/content/galois-field-instructions-2021-cpus> for the |
| 138 | +/// definition of `gf_inv` which was used for the creation of this table. |
| 139 | +#[allow(clippy::cast_possible_truncation)] |
| 140 | +static TABLE: [u8; 256] = { |
| 141 | + let mut array = [0; 256]; |
| 142 | + |
| 143 | + let mut i = 1; |
| 144 | + while i < 256 { |
| 145 | + let mut x = i as u8; |
| 146 | + let mut y = gf2p8_mul(x, x); |
| 147 | + x = y; |
| 148 | + let mut j = 2; |
| 149 | + while j < 8 { |
| 150 | + x = gf2p8_mul(x, x); |
| 151 | + y = gf2p8_mul(x, y); |
| 152 | + j += 1; |
| 153 | + } |
| 154 | + array[i] = y; |
| 155 | + i += 1; |
| 156 | + } |
| 157 | + |
| 158 | + array |
| 159 | +}; |
| 160 | + |
| 161 | +/// Multiplies packed 8-bit integers in `left` and `right` in the finite field GF(2^8) |
| 162 | +/// and store the results in `dst`. The field GF(2^8) is represented in |
| 163 | +/// polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1. |
| 164 | +/// See <https://www.corsix.org/content/galois-field-instructions-2021-cpus> for details. |
| 165 | +// This is a const function. Trait based conversion is not available. |
| 166 | +#[allow(clippy::cast_possible_truncation)] |
| 167 | +const fn gf2p8_mul(left: u8, right: u8) -> u8 { |
| 168 | + // This implementation is based on the `gf2p8mul_byte` definition found inside the Intel intrinsics guide. |
| 169 | + // See https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=gf2p8mul |
| 170 | + // for more information. |
| 171 | + |
| 172 | + const POLYNOMIAL: u32 = 0x11b; |
| 173 | + |
| 174 | + let left = left as u32; |
| 175 | + let right = right as u32; |
| 176 | + |
| 177 | + let mut result = 0u32; |
| 178 | + |
| 179 | + let mut i = 0u32; |
| 180 | + while i < 8 { |
| 181 | + if left & (1 << i) != 0 { |
| 182 | + result ^= right << i; |
| 183 | + } |
| 184 | + i = i.wrapping_add(1); |
| 185 | + } |
| 186 | + |
| 187 | + let mut i = 14u32; |
| 188 | + while i >= 8 { |
| 189 | + if result & (1 << i) != 0 { |
| 190 | + result ^= POLYNOMIAL << i.wrapping_sub(8); |
| 191 | + } |
| 192 | + i = i.wrapping_sub(1); |
| 193 | + } |
| 194 | + |
| 195 | + result as u8 |
| 196 | +} |
0 commit comments