@@ -793,6 +793,7 @@ public static unsafe void AddScalarU(float scalar, Span<float> dst)
793
793
{
794
794
float * pDstEnd = pdst + dst . Length ;
795
795
float * pDstCurrent = pdst ;
796
+ int destinationEnd = pDstEnd - 4 ;
796
797
797
798
Vector256 < float > scalarVector256 = Avx . SetAllVector256 ( scalar ) ;
798
799
@@ -807,7 +808,7 @@ public static unsafe void AddScalarU(float scalar, Span<float> dst)
807
808
808
809
Vector128 < float > scalarVector128 = Sse . SetAllVector128 ( scalar ) ;
809
810
810
- if ( pDstCurrent + 4 <= pDstEnd )
811
+ if ( pDstCurrent <= destinationEnd )
811
812
{
812
813
Vector128 < float > dstVector = Sse . LoadVector128 ( pDstCurrent ) ;
813
814
dstVector = Sse . Add ( dstVector , scalarVector128 ) ;
@@ -956,6 +957,7 @@ public static unsafe void ScaleSrcU(float scale, ReadOnlySpan<float> src, Span<f
956
957
float * pDstEnd = pdst + count ;
957
958
float * pSrcCurrent = psrc ;
958
959
float * pDstCurrent = pdst ;
960
+ int destinationEnd = pDstEnd - 4 ;
959
961
960
962
Vector256 < float > scaleVector256 = Avx . SetAllVector256 ( scale ) ;
961
963
@@ -971,7 +973,7 @@ public static unsafe void ScaleSrcU(float scale, ReadOnlySpan<float> src, Span<f
971
973
972
974
Vector128 < float > scaleVector128 = Sse . SetAllVector128 ( scale ) ;
973
975
974
- if ( pDstCurrent + 4 <= pDstEnd )
976
+ if ( pDstCurrent <= destinationEnd )
975
977
{
976
978
Vector128 < float > srcVector = Sse . LoadVector128 ( pSrcCurrent ) ;
977
979
srcVector = Sse . Multiply ( srcVector , scaleVector128 ) ;
@@ -1000,6 +1002,7 @@ public static unsafe void ScaleAddU(float a, float b, Span<float> dst)
1000
1002
{
1001
1003
float * pDstEnd = pdst + dst . Length ;
1002
1004
float * pDstCurrent = pdst ;
1005
+ int destinationEnd = pDstEnd - 4 ;
1003
1006
1004
1007
Vector256 < float > a256 = Avx . SetAllVector256 ( a ) ;
1005
1008
Vector256 < float > b256 = Avx . SetAllVector256 ( b ) ;
@@ -1017,7 +1020,7 @@ public static unsafe void ScaleAddU(float a, float b, Span<float> dst)
1017
1020
Vector128 < float > a128 = Sse . SetAllVector128 ( a ) ;
1018
1021
Vector128 < float > b128 = Sse . SetAllVector128 ( b ) ;
1019
1022
1020
- if ( pDstCurrent + 4 <= pDstEnd )
1023
+ if ( pDstCurrent <= destinationEnd )
1021
1024
{
1022
1025
Vector128 < float > dstVector = Sse . LoadVector128 ( pDstCurrent ) ;
1023
1026
dstVector = Sse . Add ( dstVector , b128 ) ;
0 commit comments