Skip to content

Commit 2e0033e

Browse files
committed
Removed out-of-bound pointer access for AddScalarU SSE and AVX intrinsics
1 parent 9383dd1 commit 2e0033e

File tree

2 files changed

+20
-14
lines changed

2 files changed

+20
-14
lines changed

src/Microsoft.ML.CpuMath/AvxIntrinsics.cs

+12-8
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ internal static class AvxIntrinsics
2020
{
2121
private static readonly Vector256<float> _absMask256 = Avx.StaticCast<int, float>(Avx.SetAllVector256(0x7FFFFFFF));
2222

23+
// The count of 32-bit floats in Vector256<T>
24+
private const int AvxAlignment = 8;
25+
26+
// The count of bytes in Vector256<T>, corresponding to _cbAlign in AlignedArray
2327
private const int Vector256Alignment = 32;
2428

2529
[MethodImplAttribute(MethodImplOptions.AggressiveInlining)]
@@ -415,32 +419,32 @@ public static unsafe void AddScalarU(float scalar, Span<float> dst)
415419
{
416420
fixed (float* pdst = dst)
417421
{
418-
float* pDstEnd = pdst + dst.Length;
419-
float* pDstCurrent = pdst;
420-
421422
Vector256<float> scalarVector256 = Avx.SetAllVector256(scalar);
423+
int countAvx = Math.DivRem(dst.Length, AvxAlignment, out int remainderAvx);
424+
float* pDstCurrent = pdst;
422425

423-
while (pDstCurrent + 8 <= pDstEnd)
426+
for (int i = 0; i < countAvx; i++)
424427
{
425428
Vector256<float> dstVector = Avx.LoadVector256(pDstCurrent);
426429
dstVector = Avx.Add(dstVector, scalarVector256);
427430
Avx.Store(pDstCurrent, dstVector);
428431

429-
pDstCurrent += 8;
432+
pDstCurrent += AvxAlignment;
430433
}
431434

432435
Vector128<float> scalarVector128 = Sse.SetAllVector128(scalar);
436+
int countSse = Math.DivRem(remainderAvx, SseIntrinsics.SseAlignment, out int remainderSse);
433437

434-
if (pDstCurrent + 4 <= pDstEnd)
438+
if (countSse > 0)
435439
{
436440
Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent);
437441
dstVector = Sse.Add(dstVector, scalarVector128);
438442
Sse.Store(pDstCurrent, dstVector);
439443

440-
pDstCurrent += 4;
444+
pDstCurrent += SseIntrinsics.SseAlignment;
441445
}
442446

443-
while (pDstCurrent < pDstEnd)
447+
for (int i = 0; i < remainderSse; i++)
444448
{
445449
Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent);
446450
dstVector = Sse.AddScalar(dstVector, scalarVector128);

src/Microsoft.ML.CpuMath/SseIntrinsics.cs

+8-6
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ internal static class SseIntrinsics
2626
Sse.StaticCast<int, float>(Sse2.SetAllVector128(0x7FFFFFFF)) :
2727
Sse.SetAllVector128(BitConverter.Int32BitsToSingle(0x7FFFFFFF));
2828

29+
// The count of 32-bit floats in Vector128<T>
30+
internal const int SseAlignment = 4;
31+
2932
// The count of bytes in Vector128<T>, corresponding to _cbAlign in AlignedArray
3033
private const int Vector128Alignment = 16;
3134

@@ -412,21 +415,20 @@ public static unsafe void AddScalarU(float scalar, Span<float> dst)
412415
{
413416
fixed (float* pdst = dst)
414417
{
415-
float* pDstEnd = pdst + dst.Length;
416-
float* pDstCurrent = pdst;
417-
418418
Vector128<float> scalarVector = Sse.SetAllVector128(scalar);
419+
int count = Math.DivRem(dst.Length, SseAlignment, out int remainder);
420+
float* pDstCurrent = pdst;
419421

420-
while (pDstCurrent + 4 <= pDstEnd)
422+
for (int i = 0; i < count; i++)
421423
{
422424
Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent);
423425
dstVector = Sse.Add(dstVector, scalarVector);
424426
Sse.Store(pDstCurrent, dstVector);
425427

426-
pDstCurrent += 4;
428+
pDstCurrent += SseAlignment;
427429
}
428430

429-
while (pDstCurrent < pDstEnd)
431+
for (int i = 0; i < remainder; i++)
430432
{
431433
Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent);
432434
dstVector = Sse.AddScalar(dstVector, scalarVector);

0 commit comments

Comments
 (0)