@@ -176,34 +176,70 @@ where
176
176
unsafe { & mut * ( self as * mut Self as * mut [ T ; N ] ) }
177
177
}
178
178
179
+ /// Load a vector from an array of `T`.
180
+ ///
181
+ /// This function is necessary since `repr(simd)` has padding for non-power-of-2 vectors (at the time of writing).
182
+ /// With padding, `read_unaligned` will read past the end of an array of N elements.
183
+ ///
184
+ /// # Safety
185
+ /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read_unaligned`.
186
+ const unsafe fn load ( ptr : * const [ T ; N ] ) -> Self {
187
+ // There are potentially simpler ways to write this function, but this should result in
188
+ // LLVM `load <N x T>`
189
+
190
+ let mut tmp = core:: mem:: MaybeUninit :: < Self > :: uninit ( ) ;
191
+ // SAFETY: `Simd<T, N>` always contains `N` elements of type `T`. It may have padding
192
+ // which does not need to be initialized. The safety of reading `ptr` is ensured by the
193
+ // caller.
194
+ unsafe {
195
+ core:: ptr:: copy_nonoverlapping ( ptr, tmp. as_mut_ptr ( ) . cast ( ) , 1 ) ;
196
+ tmp. assume_init ( )
197
+ }
198
+ }
199
+
200
+ /// Store a vector to an array of `T`.
201
+ ///
202
+ /// See `load` as to why this function is necessary.
203
+ ///
204
+ /// # Safety
205
+ /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write_unaligned`.
206
+ const unsafe fn store ( self , ptr : * mut [ T ; N ] ) {
207
+ // There are potentially simpler ways to write this function, but this should result in
208
+ // LLVM `store <N x T>`
209
+
210
+ // Creating a temporary helps LLVM turn the memcpy into a store.
211
+ let tmp = self ;
212
+ // SAFETY: `Simd<T, N>` always contains `N` elements of type `T`. The safety of writing
213
+ // `ptr` is ensured by the caller.
214
+ unsafe { core:: ptr:: copy_nonoverlapping ( tmp. as_array ( ) , ptr, 1 ) }
215
+ }
216
+
179
217
/// Converts an array to a SIMD vector.
180
218
pub const fn from_array ( array : [ T ; N ] ) -> Self {
181
- // SAFETY: Transmuting between `Simd<T, N>` and `[T; N]`
182
- // is always valid. We need to use `read_unaligned` here, since
183
- // the array may have a lower alignment than the vector.
219
+ // SAFETY: `&array` is safe to read.
184
220
//
185
- // FIXME: We currently use a pointer read instead of `transmute_copy` because
186
- // it results in better codegen with optimizations disabled, but we should
187
- // probably just use `transmute` once that works on const generic types.
221
+ // FIXME: We currently use a pointer load instead of `transmute_copy` because `repr(simd)`
222
+ // results in padding for non-power-of-2 vectors (so vectors are larger than arrays).
188
223
//
189
224
// NOTE: This deliberately doesn't just use `Self(array)`, see the comment
190
225
// on the struct definition for details.
191
- unsafe { ( & array as * const [ T ; N ] as * const Self ) . read_unaligned ( ) }
226
+ unsafe { Self :: load ( & array) }
192
227
}
193
228
194
229
/// Converts a SIMD vector to an array.
195
230
pub const fn to_array ( self ) -> [ T ; N ] {
196
- // SAFETY: Transmuting between `Simd<T, N>` and `[T; N]`
197
- // is always valid. No need to use `read_unaligned` here, since
198
- // the vector never has a lower alignment than the array.
231
+ let mut tmp = core:: mem:: MaybeUninit :: uninit ( ) ;
232
+ // SAFETY: writing to `tmp` is safe and initializes it.
199
233
//
200
- // FIXME: We currently use a pointer read instead of `transmute_copy` because
201
- // it results in better codegen with optimizations disabled, but we should
202
- // probably just use `transmute` once that works on const generic types.
234
+ // FIXME: We currently use a pointer store instead of `transmute_copy` because `repr(simd)`
235
+ // results in padding for non-power-of-2 vectors (so vectors are larger than arrays).
203
236
//
204
237
// NOTE: This deliberately doesn't just use `self.0`, see the comment
205
238
// on the struct definition for details.
206
- unsafe { ( & self as * const Self as * const [ T ; N ] ) . read ( ) }
239
+ unsafe {
240
+ self . store ( tmp. as_mut_ptr ( ) ) ;
241
+ tmp. assume_init ( )
242
+ }
207
243
}
208
244
209
245
/// Converts a slice to a SIMD vector containing `slice[..N]`.
0 commit comments