Fix up more SSE implementations for nontrapping-fp (#22931)

dschuff · web-flow · commit 10f1a2c05c55 · 2024-11-15T11:40:31.000-08:00
Fixes lto2.test_sse1 and test_sse2 with checks similar to #22911 and #22893
diff --git a/system/include/compat/emmintrin.h b/system/include/compat/emmintrin.h
@@ -449,8 +449,7 @@ _mm_cvttsd_si32(__m128d __a)
 {
   // TODO: OPTIMIZE!
   float elem = __a[0];
-  if (isnan(elem) || elem > INT_MAX || elem < INT_MIN) return (int)0x80000000;
-  if (lrint(elem) != 0 || fabs(elem) < 2.0)
+  if ((lrint(elem) != 0 || fabs(elem) < 2.0) && !isnanf(elem) && elem <= INT_MAX && elem >= INT_MIN)
     // Use the trapping instruction here since we have explicit bounds checks
     // above.
     return __builtin_wasm_trunc_s_i32_f32(elem);
@@ -1008,9 +1007,10 @@ static __inline__ long long __attribute__((__always_inline__, __nodebug__))
 _mm_cvtsd_si64(__m128d __a)
 {
   // TODO: optimize
-  if (isnan(__a[0]) || isinf(__a[0])) return 0x8000000000000000LL;
-  long long x = llrint(__a[0]);
-  if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabs(__a[0]) < 2.f))
+  double e = __a[0];
+  if (isnan(e) || isinf(e)) return 0x8000000000000000LL;
+  long long x = llrint(e);
+  if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabs(e) < 2.f) && e <= LLONG_MAX && e >= LLONG_MIN)
     return x;
   else
     return 0x8000000000000000LL;
diff --git a/system/include/compat/xmmintrin.h b/system/include/compat/xmmintrin.h
@@ -596,8 +596,9 @@ _mm_cvtsi32_ss(__m128 __a, int __b)
 
 static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvtss_si32(__m128 __a)
 {
-  int x = lrint(((__f32x4)__a)[0]);
-  if (x != 0 || fabsf(((__f32x4)__a)[0]) < 2.f)
+  float e = ((__f32x4)__a)[0];
+  int x = lrint(e);
+  if ((x != 0 || fabsf(e)) < 2.f && !isnan(e) && e <= INT_MAX && e >= INT_MIN)
     return x;
   else
     return (int)0x80000000;
@@ -607,9 +608,8 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SL
 static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvttss_si32(__m128 __a)
 {
   float e = ((__f32x4)__a)[0];
-  if (isnanf(e) || e > INT_MAX || e < INT_MIN) return (int)0x80000000;
   int x = lrint(e);
-  if ((x != 0 || fabsf(e) < 2.f))
+  if ((x != 0 || fabsf(e) < 2.f) && !isnanf(e) && e <= INT_MAX && e >= INT_MIN)
     return (int)e;
   else
     return (int)0x80000000;
@@ -627,9 +627,9 @@ _mm_cvtsi64_ss(__m128 __a, long long __b)
 static __inline__ long long __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
 _mm_cvtss_si64(__m128 __a)
 {
-  if (isnan(((__f32x4)__a)[0]) || isinf(((__f32x4)__a)[0])) return 0x8000000000000000LL;
-  long long x = llrintf(((__f32x4)__a)[0]);
-  if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(((__f32x4)__a)[0]) < 2.f))
+  float e = ((__f32x4)__a)[0];
+  long long x = llrintf(e);
+  if ((x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f)) && !isnanf(e) && e <= LLONG_MAX && e >= LLONG_MIN)
     return x;
   else
     return 0x8000000000000000LL;
@@ -639,9 +639,8 @@ static __inline__ long long __attribute__((__always_inline__, __nodebug__, DIAGN
 _mm_cvttss_si64(__m128 __a)
 {
   float e = ((__f32x4)__a)[0];
-  if (isnan(e) || isinf(e) || e > LLONG_MAX || e < LLONG_MIN) return 0x8000000000000000LL;
   long long x = llrintf(e);
-  if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f))
+  if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f) && !isnanf(e) && e <= LLONG_MAX && e >= LLONG_MIN)
     return (long long)e;
   else
     return 0x8000000000000000LL;