diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index e020710c7aa4f..35493c4a934d9 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -651,6 +651,9 @@ The integer elementwise intrinsics, including ``__builtin_elementwise_popcount``
 ``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
 ``__builtin_elementwise_sub_sat`` can be called in a ``constexpr`` context.
 
+No implicit promotion of integer types takes place. The mixing of integer types
+of different sizes and signs is forbidden in binary and ternary builtins.
+
 ============================================== ====================================================================== =========================================
          Name                                   Operation                                                             Supported element types
 ============================================== ====================================================================== =========================================
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index af59b7f38c71a..72bd7639bd1ad 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2323,7 +2323,8 @@ class Sema final : public SemaBase {
                          const FunctionProtoType *Proto);
 
   /// \param FPOnly restricts the arguments to floating-point types.
-  bool BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly = false);
+  std::optional<QualType> BuiltinVectorMath(CallExpr *TheCall,
+                                            bool FPOnly = false);
   bool BuiltinVectorToScalarMath(CallExpr *TheCall);
 
   void checkLifetimeCaptureBy(FunctionDecl *FDecl, bool IsMemberFunction,
@@ -7491,10 +7492,15 @@ class Sema final : public SemaBase {
     return K == ConditionKind::Switch ? Context.IntTy : Context.BoolTy;
   }
 
-  // UsualUnaryConversions - promotes integers (C99 6.3.1.1p2) and converts
-  // functions and arrays to their respective pointers (C99 6.3.2.1).
+  // UsualUnaryConversions - promotes integers (C99 6.3.1.1p2), converts
+  // functions and arrays to their respective pointers (C99 6.3.2.1), and
+  // promotes floating-piont types according to the language semantics.
   ExprResult UsualUnaryConversions(Expr *E);
 
+  // UsualUnaryFPConversions - promotes floating-point types according to the
+  // current language semantics.
+  ExprResult UsualUnaryFPConversions(Expr *E);
+
   /// CallExprUnaryConversions - a special case of an unary conversion
   /// performed on a function designator of a call expression.
   ExprResult CallExprUnaryConversions(Expr *E);
@@ -7557,6 +7563,11 @@ class Sema final : public SemaBase {
   ExprResult DefaultVariadicArgumentPromotion(Expr *E, VariadicCallType CT,
                                               FunctionDecl *FDecl);
 
+  // Check that the usual arithmetic conversions can be performed on this pair
+  // of expressions that might be of enumeration type.
+  void checkEnumArithmeticConversions(Expr *LHS, Expr *RHS, SourceLocation Loc,
+                                      Sema::ArithConvKind ACK);
+
   // UsualArithmeticConversions - performs the UsualUnaryConversions on it's
   // operands and then handles various conversions that are common to binary
   // operators (C99 6.3.1.8). If both operands aren't arithmetic, this
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ce846ae88c38b..75bfdf9bc76af 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -14594,11 +14594,23 @@ void Sema::CheckAddressOfPackedMember(Expr *rhs) {
                      _2, _3, _4));
 }
 
+// Performs a similar job to Sema::UsualUnaryConversions, but without any
+// implicit promotion of integral/enumeration types.
+static ExprResult BuiltinVectorMathConversions(Sema &S, Expr *E) {
+  // First, convert to an r-value.
+  ExprResult Res = S.DefaultFunctionArrayLvalueConversion(E);
+  if (Res.isInvalid())
+    return ExprError();
+
+  // Promote floating-point types.
+  return S.UsualUnaryFPConversions(Res.get());
+}
+
 bool Sema::PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall) {
   if (checkArgCount(TheCall, 1))
     return true;
 
-  ExprResult A = UsualUnaryConversions(TheCall->getArg(0));
+  ExprResult A = BuiltinVectorMathConversions(*this, TheCall->getArg(0));
   if (A.isInvalid())
     return true;
 
@@ -14613,57 +14625,77 @@ bool Sema::PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall) {
 }
 
 bool Sema::BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly) {
-  QualType Res;
-  if (BuiltinVectorMath(TheCall, Res, FPOnly))
-    return true;
-  TheCall->setType(Res);
-  return false;
+  if (auto Res = BuiltinVectorMath(TheCall, FPOnly); Res.has_value()) {
+    TheCall->setType(*Res);
+    return false;
+  }
+  return true;
 }
 
 bool Sema::BuiltinVectorToScalarMath(CallExpr *TheCall) {
-  QualType Res;
-  if (BuiltinVectorMath(TheCall, Res))
+  std::optional<QualType> Res = BuiltinVectorMath(TheCall);
+  if (!Res)
     return true;
 
-  if (auto *VecTy0 = Res->getAs<VectorType>())
+  if (auto *VecTy0 = (*Res)->getAs<VectorType>())
     TheCall->setType(VecTy0->getElementType());
   else
-    TheCall->setType(Res);
+    TheCall->setType(*Res);
 
   return false;
 }
 
-bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res, bool FPOnly) {
+static bool checkBuiltinVectorMathMixedEnums(Sema &S, Expr *LHS, Expr *RHS,
+                                             SourceLocation Loc) {
+  QualType L = LHS->getEnumCoercedType(S.Context),
+           R = RHS->getEnumCoercedType(S.Context);
+  if (L->isUnscopedEnumerationType() && R->isUnscopedEnumerationType() &&
+      !S.Context.hasSameUnqualifiedType(L, R)) {
+    return S.Diag(Loc, diag::err_conv_mixed_enum_types_cxx26)
+           << LHS->getSourceRange() << RHS->getSourceRange()
+           << /*Arithmetic Between*/ 0 << L << R;
+  }
+  return false;
+}
+
+std::optional<QualType> Sema::BuiltinVectorMath(CallExpr *TheCall,
+                                                bool FPOnly) {
   if (checkArgCount(TheCall, 2))
-    return true;
+    return std::nullopt;
 
-  ExprResult A = TheCall->getArg(0);
-  ExprResult B = TheCall->getArg(1);
-  // Do standard promotions between the two arguments, returning their common
-  // type.
-  Res = UsualArithmeticConversions(A, B, TheCall->getExprLoc(), ACK_Comparison);
-  if (A.isInvalid() || B.isInvalid())
-    return true;
+  if (checkBuiltinVectorMathMixedEnums(
+          *this, TheCall->getArg(0), TheCall->getArg(1), TheCall->getExprLoc()))
+    return std::nullopt;
 
-  QualType TyA = A.get()->getType();
-  QualType TyB = B.get()->getType();
+  Expr *Args[2];
+  for (int I = 0; I < 2; ++I) {
+    ExprResult Converted =
+        BuiltinVectorMathConversions(*this, TheCall->getArg(I));
+    if (Converted.isInvalid())
+      return std::nullopt;
+    Args[I] = Converted.get();
+  }
 
-  if (Res.isNull() || TyA.getCanonicalType() != TyB.getCanonicalType())
-    return Diag(A.get()->getBeginLoc(),
-                diag::err_typecheck_call_different_arg_types)
-           << TyA << TyB;
+  SourceLocation LocA = Args[0]->getBeginLoc();
+  QualType TyA = Args[0]->getType();
+  QualType TyB = Args[1]->getType();
+
+  if (TyA.getCanonicalType() != TyB.getCanonicalType()) {
+    Diag(LocA, diag::err_typecheck_call_different_arg_types) << TyA << TyB;
+    return std::nullopt;
+  }
 
   if (FPOnly) {
-    if (checkFPMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
-      return true;
+    if (checkFPMathBuiltinElementType(*this, LocA, TyA, 1))
+      return std::nullopt;
   } else {
-    if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA, 1))
-      return true;
+    if (checkMathBuiltinElementType(*this, LocA, TyA, 1))
+      return std::nullopt;
   }
 
-  TheCall->setArg(0, A.get());
-  TheCall->setArg(1, B.get());
-  return false;
+  TheCall->setArg(0, Args[0]);
+  TheCall->setArg(1, Args[1]);
+  return TyA;
 }
 
 bool Sema::BuiltinElementwiseTernaryMath(CallExpr *TheCall,
@@ -14671,9 +14703,17 @@ bool Sema::BuiltinElementwiseTernaryMath(CallExpr *TheCall,
   if (checkArgCount(TheCall, 3))
     return true;
 
+  SourceLocation Loc = TheCall->getExprLoc();
+  if (checkBuiltinVectorMathMixedEnums(*this, TheCall->getArg(0),
+                                       TheCall->getArg(1), Loc) ||
+      checkBuiltinVectorMathMixedEnums(*this, TheCall->getArg(1),
+                                       TheCall->getArg(2), Loc))
+    return true;
+
   Expr *Args[3];
   for (int I = 0; I < 3; ++I) {
-    ExprResult Converted = UsualUnaryConversions(TheCall->getArg(I));
+    ExprResult Converted =
+        BuiltinVectorMathConversions(*this, TheCall->getArg(I));
     if (Converted.isInvalid())
       return true;
     Args[I] = Converted.get();
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 562c98c6babe0..b264b2a33f9e9 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -776,20 +776,11 @@ ExprResult Sema::CallExprUnaryConversions(Expr *E) {
   return Res.get();
 }
 
-/// UsualUnaryConversions - Performs various conversions that are common to most
-/// operators (C99 6.3). The conversions of array and function types are
-/// sometimes suppressed. For example, the array->pointer conversion doesn't
-/// apply if the array is an argument to the sizeof or address (&) operators.
-/// In these instances, this routine should *not* be called.
-ExprResult Sema::UsualUnaryConversions(Expr *E) {
-  // First, convert to an r-value.
-  ExprResult Res = DefaultFunctionArrayLvalueConversion(E);
-  if (Res.isInvalid())
-    return ExprError();
-  E = Res.get();
-
+/// UsualUnaryFPConversions - Promotes floating-point types according to the
+/// current language semantics.
+ExprResult Sema::UsualUnaryFPConversions(Expr *E) {
   QualType Ty = E->getType();
-  assert(!Ty.isNull() && "UsualUnaryConversions - missing type");
+  assert(!Ty.isNull() && "UsualUnaryFPConversions - missing type");
 
   LangOptions::FPEvalMethodKind EvalMethod = CurFPFeatures.getFPEvalMethod();
   if (EvalMethod != LangOptions::FEM_Source && Ty->isFloatingType() &&
@@ -827,7 +818,30 @@ ExprResult Sema::UsualUnaryConversions(Expr *E) {
 
   // Half FP have to be promoted to float unless it is natively supported
   if (Ty->isHalfType() && !getLangOpts().NativeHalfType)
-    return ImpCastExprToType(Res.get(), Context.FloatTy, CK_FloatingCast);
+    return ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast);
+
+  return E;
+}
+
+/// UsualUnaryConversions - Performs various conversions that are common to most
+/// operators (C99 6.3). The conversions of array and function types are
+/// sometimes suppressed. For example, the array->pointer conversion doesn't
+/// apply if the array is an argument to the sizeof or address (&) operators.
+/// In these instances, this routine should *not* be called.
+ExprResult Sema::UsualUnaryConversions(Expr *E) {
+  // First, convert to an r-value.
+  ExprResult Res = DefaultFunctionArrayLvalueConversion(E);
+  if (Res.isInvalid())
+    return ExprError();
+
+  // Promote floating-point types.
+  Res = UsualUnaryFPConversions(Res.get());
+  if (Res.isInvalid())
+    return ExprError();
+  E = Res.get();
+
+  QualType Ty = E->getType();
+  assert(!Ty.isNull() && "UsualUnaryConversions - missing type");
 
   // Try to perform integral promotions if the object has a theoretically
   // promotable type.
@@ -1489,9 +1503,9 @@ static QualType handleFixedPointConversion(Sema &S, QualType LHSTy,
 
 /// Check that the usual arithmetic conversions can be performed on this pair of
 /// expressions that might be of enumeration type.
-static void checkEnumArithmeticConversions(Sema &S, Expr *LHS, Expr *RHS,
-                                           SourceLocation Loc,
-                                           Sema::ArithConvKind ACK) {
+void Sema::checkEnumArithmeticConversions(Expr *LHS, Expr *RHS,
+                                          SourceLocation Loc,
+                                          Sema::ArithConvKind ACK) {
   // C++2a [expr.arith.conv]p1:
   //   If one operand is of enumeration type and the other operand is of a
   //   different enumeration type or a floating-point type, this behavior is
@@ -1499,54 +1513,53 @@ static void checkEnumArithmeticConversions(Sema &S, Expr *LHS, Expr *RHS,
   //
   // Warn on this in all language modes. Produce a deprecation warning in C++20.
   // Eventually we will presumably reject these cases (in C++23 onwards?).
-  QualType L = LHS->getEnumCoercedType(S.Context),
-           R = RHS->getEnumCoercedType(S.Context);
+  QualType L = LHS->getEnumCoercedType(Context),
+           R = RHS->getEnumCoercedType(Context);
   bool LEnum = L->isUnscopedEnumerationType(),
        REnum = R->isUnscopedEnumerationType();
   bool IsCompAssign = ACK == Sema::ACK_CompAssign;
   if ((!IsCompAssign && LEnum && R->isFloatingType()) ||
       (REnum && L->isFloatingType())) {
-    S.Diag(Loc, S.getLangOpts().CPlusPlus26
-                    ? diag::err_arith_conv_enum_float_cxx26
-                : S.getLangOpts().CPlusPlus20
-                    ? diag::warn_arith_conv_enum_float_cxx20
-                    : diag::warn_arith_conv_enum_float)
+    Diag(Loc, getLangOpts().CPlusPlus26 ? diag::err_arith_conv_enum_float_cxx26
+              : getLangOpts().CPlusPlus20
+                  ? diag::warn_arith_conv_enum_float_cxx20
+                  : diag::warn_arith_conv_enum_float)
         << LHS->getSourceRange() << RHS->getSourceRange() << (int)ACK << LEnum
         << L << R;
   } else if (!IsCompAssign && LEnum && REnum &&
-             !S.Context.hasSameUnqualifiedType(L, R)) {
+             !Context.hasSameUnqualifiedType(L, R)) {
     unsigned DiagID;
     // In C++ 26, usual arithmetic conversions between 2 different enum types
     // are ill-formed.
-    if (S.getLangOpts().CPlusPlus26)
+    if (getLangOpts().CPlusPlus26)
       DiagID = diag::err_conv_mixed_enum_types_cxx26;
     else if (!L->castAs<EnumType>()->getDecl()->hasNameForLinkage() ||
              !R->castAs<EnumType>()->getDecl()->hasNameForLinkage()) {
       // If either enumeration type is unnamed, it's less likely that the
       // user cares about this, but this situation is still deprecated in
       // C++2a. Use a different warning group.
-      DiagID = S.getLangOpts().CPlusPlus20
-                    ? diag::warn_arith_conv_mixed_anon_enum_types_cxx20
-                    : diag::warn_arith_conv_mixed_anon_enum_types;
+      DiagID = getLangOpts().CPlusPlus20
+                   ? diag::warn_arith_conv_mixed_anon_enum_types_cxx20
+                   : diag::warn_arith_conv_mixed_anon_enum_types;
     } else if (ACK == Sema::ACK_Conditional) {
       // Conditional expressions are separated out because they have
       // historically had a different warning flag.
-      DiagID = S.getLangOpts().CPlusPlus20
+      DiagID = getLangOpts().CPlusPlus20
                    ? diag::warn_conditional_mixed_enum_types_cxx20
                    : diag::warn_conditional_mixed_enum_types;
     } else if (ACK == Sema::ACK_Comparison) {
       // Comparison expressions are separated out because they have
       // historically had a different warning flag.
-      DiagID = S.getLangOpts().CPlusPlus20
+      DiagID = getLangOpts().CPlusPlus20
                    ? diag::warn_comparison_mixed_enum_types_cxx20
                    : diag::warn_comparison_mixed_enum_types;
     } else {
-      DiagID = S.getLangOpts().CPlusPlus20
+      DiagID = getLangOpts().CPlusPlus20
                    ? diag::warn_arith_conv_mixed_enum_types_cxx20
                    : diag::warn_arith_conv_mixed_enum_types;
     }
-    S.Diag(Loc, DiagID) << LHS->getSourceRange() << RHS->getSourceRange()
-                        << (int)ACK << L << R;
+    Diag(Loc, DiagID) << LHS->getSourceRange() << RHS->getSourceRange()
+                      << (int)ACK << L << R;
   }
 }
 
@@ -1557,7 +1570,7 @@ static void checkEnumArithmeticConversions(Sema &S, Expr *LHS, Expr *RHS,
 QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS,
                                           SourceLocation Loc,
                                           ArithConvKind ACK) {
-  checkEnumArithmeticConversions(*this, LHS.get(), RHS.get(), Loc, ACK);
+  checkEnumArithmeticConversions(LHS.get(), RHS.get(), Loc, ACK);
 
   if (ACK != ACK_CompAssign) {
     LHS = UsualUnaryConversions(LHS.get());
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index 7f6b5f26eb930..5661bbc6008a0 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -14,10 +14,18 @@ __attribute__((address_space(1))) int int_as_one;
 typedef int bar;
 bar b;
 
+struct StructWithBitfield {
+  int i : 5;
+  short s : 3;
+  char c: 2;
+  long long int lli : 3;
+};
+
 void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2,
                                   float4 vf1, float4 vf2, si8 vi1, si8 vi2,
                                   long long int i1, long long int i2, short si,
-                                  _BitInt(31) bi1, _BitInt(31) bi2) {
+                                  _BitInt(31) bi1, _BitInt(31) bi2, int i,
+                                  char ci) {
   // CHECK-LABEL: define void @test_builtin_elementwise_abs(
   // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
   // CHECK-NEXT:  call float @llvm.fabs.f32(float [[F1]])
@@ -35,6 +43,11 @@ void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2,
   // CHECK-NEXT: call i64 @llvm.abs.i64(i64 [[I1]], i1 false)
   i2 = __builtin_elementwise_abs(i1);
 
+  // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
+  // CHECK:      [[S1:%.+]] = trunc i64 [[I1]] to i16
+  // CHECK-NEXT: call i16 @llvm.abs.i16(i16 [[S1]], i1 false)
+  i1 = __builtin_elementwise_abs((short)i1);
+
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[VI1]], i1 false)
   vi2 = __builtin_elementwise_abs(vi1);
@@ -57,10 +70,37 @@ void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2,
   b = __builtin_elementwise_abs(-10);
 
   // CHECK:      [[SI:%.+]] = load i16, ptr %si.addr, align 2
-  // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
-  // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.abs.i32(i32 [[SI_EXT]], i1 false)
-  // CHECK-NEXT: = trunc i32 [[RES]] to i16
+  // CHECK-NEXT: [[RES:%.+]] = call i16 @llvm.abs.i16(i16 [[SI]], i1 false)
   si = __builtin_elementwise_abs(si);
+
+  struct StructWithBitfield t;
+
+  // CHECK:      [[BFLOAD:%.+]] = load i16, ptr %t, align 8
+  // CHECK-NEXT: [[BFSHL:%.+]] = shl i16 [[BFLOAD]], 11
+  // CHECK-NEXT: [[BFASHR:%.+]] = ashr i16 [[BFSHL]], 11
+  // CHECK-NEXT: [[BFCAST:%.+]] = sext i16 [[BFASHR]] to i32
+  // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.abs.i32(i32 [[BFCAST]], i1 false)
+  i = __builtin_elementwise_abs(t.i);
+
+  // CHECK:      [[BFLOAD:%.+]] = load i16, ptr %t, align 8
+  // CHECK-NEXT: [[BFSHL:%.+]] = shl i16 [[BFLOAD]], 8
+  // CHECK-NEXT: [[BFASHR:%.+]] = ashr i16 [[BFSHL]], 13
+  // CHECK-NEXT: [[RES:%.+]] = call i16 @llvm.abs.i16(i16 [[BFASHR]], i1 false)
+  si = __builtin_elementwise_abs(t.s);
+
+  // CHECK:      [[BFLOAD:%.+]] = load i16, ptr %t, align 8
+  // CHECK-NEXT: [[BFSHL:%.+]] = shl i16 [[BFLOAD]], 6
+  // CHECK-NEXT: [[BFASHR:%.+]] = ashr i16 [[BFSHL]], 14
+  // CHECK-NEXT: [[BFCAST:%.+]] = trunc i16 [[BFASHR]] to i8
+  // CHECK-NEXT: [[RES:%.+]] = call i8 @llvm.abs.i8(i8 [[BFCAST]], i1 false)
+  ci = __builtin_elementwise_abs(t.c);
+
+  // CHECK:      [[BFLOAD:%.+]] = load i16, ptr %t, align 8
+  // CHECK-NEXT: [[BFSHL:%.+]] = shl i16 [[BFLOAD]], 3
+  // CHECK-NEXT: [[BFASHR:%.+]] = ashr i16 [[BFSHL]], 13
+  // CHECK-NEXT: [[BFCAST:%.+]] = sext i16 [[BFASHR]] to i64
+  // CHECK-NEXT: [[RES:%.+]] = call i64 @llvm.abs.i64(i64 [[BFCAST]], i1 false)
+  i1 = __builtin_elementwise_abs(t.lli);
 }
 
 void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
@@ -68,7 +108,10 @@ void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
                                       long long int i2, si8 vi1, si8 vi2,
                                       unsigned u1, unsigned u2, u4 vu1, u4 vu2,
                                       _BitInt(31) bi1, _BitInt(31) bi2,
-                                      unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
+                                      unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2,
+                                      char c1, char c2, unsigned char uc1,
+                                      unsigned char uc2, short s1, short s2,
+                                      unsigned short us1, unsigned short us2) {
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8
   // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 [[I2]])
@@ -76,7 +119,7 @@ void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
 
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 10)
-  i1 = __builtin_elementwise_add_sat(i1, 10);
+  i1 = __builtin_elementwise_add_sat(i1, 10ll);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16
@@ -114,6 +157,33 @@ void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
 
   // CHECK: store i64 98, ptr %i1.addr, align 8
   i1 = __builtin_elementwise_add_sat(1, 'a');
+
+  // CHECK:      [[C1:%.+]] = load i8, ptr %c1.addr, align 1
+  // CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr, align 1
+  // CHECK-NEXT: call i8 @llvm.sadd.sat.i8(i8 [[C1]], i8 [[C2]])
+  c1 = __builtin_elementwise_add_sat(c1, c2);
+
+  // CHECK:      [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
+  // CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr, align 1
+  // CHECK-NEXT: call i8 @llvm.uadd.sat.i8(i8 [[UC1]], i8 [[UC2]])
+  uc1 = __builtin_elementwise_add_sat(uc1, uc2);
+
+  // CHECK:      [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK-NEXT: [[S2:%.+]] = load i16, ptr %s2.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[S1]], i16 [[S2]])
+  s1 = __builtin_elementwise_add_sat(s1, s2);
+
+  // CHECK:      [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK:      [[I1:%.+]] = sext i16 [[S1]] to i32
+  // CHECK-NEXT: [[S2:%.+]] = load i16, ptr %s2.addr, align 2
+  // CHECK:      [[I2:%.+]] = sext i16 [[S2]] to i32
+  // CHECK-NEXT: call i32 @llvm.sadd.sat.i32(i32 [[I1]], i32 [[I2]])
+  s1 = __builtin_elementwise_add_sat((int)s1, (int)s2);
+
+  // CHECK:      [[US1:%.+]] = load i16, ptr %us1.addr, align 2
+  // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.uadd.sat.i16(i16 [[US1]], i16 [[US2]])
+  us1 = __builtin_elementwise_add_sat(us1, us2);
 }
 
 void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
@@ -121,7 +191,10 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
                                       long long int i2, si8 vi1, si8 vi2,
                                       unsigned u1, unsigned u2, u4 vu1, u4 vu2,
                                       _BitInt(31) bi1, _BitInt(31) bi2,
-                                      unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
+                                      unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2,
+                                      char c1, char c2, unsigned char uc1,
+                                      unsigned char uc2, short s1, short s2,
+                                      unsigned short us1, unsigned short us2) {
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8
   // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 [[I2]])
@@ -129,7 +202,7 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
 
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 10)
-  i1 = __builtin_elementwise_sub_sat(i1, 10);
+  i1 = __builtin_elementwise_sub_sat(i1, 10ll);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16
@@ -167,6 +240,26 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
 
   // CHECK: store i64 -96, ptr %i1.addr, align 8
   i1 = __builtin_elementwise_sub_sat(1, 'a');
+
+  // CHECK:      [[C1:%.+]] = load i8, ptr %c1.addr, align 1
+  // CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr, align 1
+  // CHECK-NEXT: call i8 @llvm.ssub.sat.i8(i8 [[C1]], i8 [[C2]])
+  c1 = __builtin_elementwise_sub_sat(c1, c2);
+
+  // CHECK:      [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
+  // CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr, align 1
+  // CHECK-NEXT: call i8 @llvm.usub.sat.i8(i8 [[UC1]], i8 [[UC2]])
+  uc1 = __builtin_elementwise_sub_sat(uc1, uc2);
+
+  // CHECK:      [[S1:%.+]] = load i16, ptr %s1.addr, align 2
+  // CHECK-NEXT: [[S2:%.+]] = load i16, ptr %s2.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[S1]], i16 [[S2]])
+  s1 = __builtin_elementwise_sub_sat(s1, s2);
+
+  // CHECK:      [[US1:%.+]] = load i16, ptr %us1.addr, align 2
+  // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2
+  // CHECK-NEXT: call i16 @llvm.usub.sat.i16(i16 [[US1]], i16 [[US2]])
+  us1 = __builtin_elementwise_sub_sat(us1, us2);
 }
 
 void test_builtin_elementwise_maximum(float f1, float f2, double d1, double d2,
@@ -278,7 +371,7 @@ void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
 
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10)
-  i1 = __builtin_elementwise_max(i1, 10);
+  i1 = __builtin_elementwise_max(i1, 10ll);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16
@@ -362,7 +455,14 @@ void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
 
   // CHECK:      [[I2:%.+]] = load i64, ptr %i2.addr, align 8
   // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]])
-  i1 = __builtin_elementwise_min(-11, i2);
+  i1 = __builtin_elementwise_min(-11ll, i2);
+
+  // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
+  // CHECK:      [[S1:%.+]] = trunc i64 [[I1]] to i16
+  // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8
+  // CHECK:      [[S2:%.+]] = trunc i64 [[I2]] to i16
+  // CHECK-NEXT: call i16 @llvm.smin.i16(i16 [[S1]], i16 [[S2]])
+  i1 = __builtin_elementwise_min((short)i1, (short)i2);
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
   // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr, align 16
@@ -374,12 +474,6 @@ void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
   // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]])
   u1 = __builtin_elementwise_min(u1, u2);
 
-  // CHECK:      [[U1:%.+]] = load i32, ptr %u1.addr, align 4
-  // CHECK-NEXT: [[ZEXT_U1:%.+]] = zext i32 [[U1]] to i64
-  // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8
-  // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[ZEXT_U1]], i64 [[I2]])
-  u1 = __builtin_elementwise_min(u1, i2);
-
   // CHECK:      [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr, align 16
   // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr, align 16
   // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
@@ -418,7 +512,8 @@ void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
 
 void test_builtin_elementwise_bitreverse(si8 vi1, si8 vi2,
                                   long long int i1, long long int i2, short si,
-                                  _BitInt(31) bi1, _BitInt(31) bi2) {
+                                  _BitInt(31) bi1, _BitInt(31) bi2,
+                                  char ci) {
   
 
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
@@ -447,10 +542,24 @@ void test_builtin_elementwise_bitreverse(si8 vi1, si8 vi2,
   b = __builtin_elementwise_bitreverse(-10);
 
   // CHECK:      [[SI:%.+]] = load i16, ptr %si.addr, align 2
-  // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
-  // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.bitreverse.i32(i32 [[SI_EXT]])
-  // CHECK-NEXT: = trunc i32 [[RES]] to i16
+  // CHECK-NEXT: [[RES:%.+]] = call i16 @llvm.bitreverse.i16(i16 [[SI]])
   si = __builtin_elementwise_bitreverse(si);
+
+  // CHECK:      store i16 28671, ptr %si.addr, align 2
+  si = __builtin_elementwise_bitreverse((short)-10);
+
+  // CHECK:      store i16 28671, ptr %si.addr, align 2
+  si = __builtin_elementwise_bitreverse((unsigned short)-10);
+
+  // CHECK:      [[CI:%.+]] = load i8, ptr %ci.addr, align 1
+  // CHECK-NEXT: [[RES:%.+]] = call i8 @llvm.bitreverse.i8(i8 [[CI]])
+  ci = __builtin_elementwise_bitreverse(ci);
+
+  // CHECK:      store i8 111, ptr %ci.addr, align 1
+  ci = __builtin_elementwise_bitreverse((unsigned char)-10);
+
+  // CHECK:      store i8 111, ptr %ci.addr, align 1
+  ci = __builtin_elementwise_bitreverse((char)-10);
 }
 
 void test_builtin_elementwise_ceil(float f1, float f2, double d1, double d2,
@@ -668,7 +777,8 @@ void test_builtin_elementwise_log2(float f1, float f2, double d1, double d2,
 
 void test_builtin_elementwise_popcount(si8 vi1, si8 vi2, long long int i1,
                                        long long int i2, short si,
-                                       _BitInt(31) bi1, _BitInt(31) bi2) {
+                                       _BitInt(31) bi1, _BitInt(31) bi2,
+                                       char ci) {
   // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
   // CHECK-NEXT: call i64 @llvm.ctpop.i64(i64 [[I1]])
   i2 = __builtin_elementwise_popcount(i1);
@@ -695,10 +805,24 @@ void test_builtin_elementwise_popcount(si8 vi1, si8 vi2, long long int i1,
   b = __builtin_elementwise_popcount(-10);
 
   // CHECK:      [[SI:%.+]] = load i16, ptr %si.addr, align 2
-  // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
-  // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.ctpop.i32(i32 [[SI_EXT]])
-  // CHECK-NEXT: = trunc i32 [[RES]] to i16
+  // CHECK-NEXT: [[RES:%.+]] = call i16 @llvm.ctpop.i16(i16 [[SI]])
   si = __builtin_elementwise_popcount(si);
+
+  // CHECK:      store i16 3, ptr %si.addr, align 2
+  si = __builtin_elementwise_popcount((unsigned short)32771);
+
+  // CHECK:      store i16 3, ptr %si.addr, align 2
+  si = __builtin_elementwise_popcount((short)32771);
+
+  // CHECK:      [[CI:%.+]] = load i8, ptr %ci.addr, align 1
+  // CHECK-NEXT: [[RES:%.+]] = call i8 @llvm.ctpop.i8(i8 [[CI]])
+  ci = __builtin_elementwise_popcount(ci);
+
+  // CHECK:      store i8 2, ptr %ci.addr, align 1
+  ci = __builtin_elementwise_popcount((unsigned char)192);
+
+  // CHECK:      store i8 2, ptr %ci.addr, align 1
+  ci = __builtin_elementwise_popcount((char)192);
 }
 
 void test_builtin_elementwise_fmod(float f1, float f2, double d1, double d2,
diff --git a/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
index 482f089d4770f..f6a19257d94e2 100644
--- a/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
@@ -6,7 +6,7 @@
 // CHECK: %conv2 = fptrunc double %hlsl.dot to float
 // CHECK: ret float %conv2
 float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) {
-  return __builtin_hlsl_dot ( p0, p1 );
+  return __builtin_hlsl_dot ( (double)p0, (double)p1 );
 }
 
 // CHECK-LABEL: builtin_bool_to_float_arg1_type_promotion
@@ -16,7 +16,7 @@ float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) {
 // CHECK: %conv2 = fptrunc double %hlsl.dot to float
 // CHECK: ret float %conv2
 float builtin_bool_to_float_arg1_type_promotion ( bool p0, float p1 ) {
-  return __builtin_hlsl_dot ( p0, p1 );
+  return __builtin_hlsl_dot ( (double)p0, (double)p1 );
 }
 
 // CHECK-LABEL: builtin_dot_int_to_float_promotion
@@ -26,5 +26,5 @@ float builtin_bool_to_float_arg1_type_promotion ( bool p0, float p1 ) {
 // CHECK: %conv2 = fptrunc double %hlsl.dot to float
 // CHECK: ret float %conv2
 float builtin_dot_int_to_float_promotion ( float p0, int p1 ) {
-  return __builtin_hlsl_dot ( p0, p1 );
+  return __builtin_hlsl_dot ( (double)p0, (double)p1 );
 }
diff --git a/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl b/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl
index ca01960678175..a8ab6ce98ae7e 100644
--- a/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl
+++ b/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl
@@ -37,7 +37,7 @@ uint Find(Node SortedTree[MAX], uint key) {
 // Imagine the inout works
 export
 bool InitTree(/*inout*/ Node tree[MAX], RWBuffer<uint4> encodedTree, uint maxDepth) {
-  uint size = pow(2.f, maxDepth) - 1;
+  uint size = pow(2.f, (float)maxDepth) - 1;
   if (size > MAX) return false;
   for (uint i = 1; i < size; i++) {
     tree[i].value = encodedTree[i].x;
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index 6002e91f8ec6f..d6785f78340ca 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -69,14 +69,18 @@ void test_builtin_elementwise_add_sat(int i, short s, double d, float4 v, int3 i
   // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
 
   s = __builtin_elementwise_add_sat(i, s);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'short')}}
 
   enum e { one,
            two };
   i = __builtin_elementwise_add_sat(one, two);
 
+  i = __builtin_elementwise_add_sat(one, d);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'double')}}
+
   enum f { three };
   enum f x = __builtin_elementwise_add_sat(one, three);
-  // expected-warning@-1 {{comparison of different enumeration types ('enum e' and 'enum f')}}
+  // expected-error@-1 {{invalid arithmetic between different enumeration types ('enum e' and 'enum f')}}
 
   _BitInt(32) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}}
   ext = __builtin_elementwise_add_sat(ext, ext);
@@ -128,14 +132,18 @@ void test_builtin_elementwise_sub_sat(int i, short s, double d, float4 v, int3 i
   // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
 
   s = __builtin_elementwise_sub_sat(i, s);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'short')}}
 
   enum e { one,
            two };
   i = __builtin_elementwise_sub_sat(one, two);
 
+  i = __builtin_elementwise_sub_sat(one, d);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'double')}}
+
   enum f { three };
   enum f x = __builtin_elementwise_sub_sat(one, three);
-  // expected-warning@-1 {{comparison of different enumeration types ('enum e' and 'enum f')}}
+  // expected-error@-1 {{invalid arithmetic between different enumeration types ('enum e' and 'enum f')}}
 
   _BitInt(32) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}}
   ext = __builtin_elementwise_sub_sat(ext, ext);
@@ -184,14 +192,18 @@ void test_builtin_elementwise_max(int i, short s, double d, float4 v, int3 iv, u
   // expected-error@-1 {{arguments are of different types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
 
   s = __builtin_elementwise_max(i, s);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'short')}}
 
   enum e { one,
            two };
   i = __builtin_elementwise_max(one, two);
 
+  i = __builtin_elementwise_max(one, d);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'double')}}
+
   enum f { three };
   enum f x = __builtin_elementwise_max(one, three);
-  // expected-warning@-1 {{comparison of different enumeration types ('enum e' and 'enum f')}}
+  // expected-error@-1 {{invalid arithmetic between different enumeration types ('enum e' and 'enum f')}}
 
   _BitInt(32) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}}
   ext = __builtin_elementwise_max(ext, ext);
@@ -240,14 +252,18 @@ void test_builtin_elementwise_min(int i, short s, double d, float4 v, int3 iv, u
   // expected-error@-1 {{arguments are of different types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
 
   s = __builtin_elementwise_min(i, s);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'short')}}
 
   enum e { one,
            two };
   i = __builtin_elementwise_min(one, two);
 
+  i = __builtin_elementwise_min(one, d);
+  // expected-error@-1 {{arguments are of different types ('int' vs 'double')}}
+
   enum f { three };
   enum f x = __builtin_elementwise_min(one, three);
-  // expected-warning@-1 {{comparison of different enumeration types ('enum e' and 'enum f')}}
+  // expected-error@-1 {{invalid arithmetic between different enumeration types ('enum e' and 'enum f')}}
 
   _BitInt(32) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}}
   ext = __builtin_elementwise_min(ext, ext);
@@ -273,7 +289,7 @@ void test_builtin_elementwise_min(int i, short s, double d, float4 v, int3 iv, u
   // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}}
 }
 
-void test_builtin_elementwise_maximum(int i, short s, float f, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
+void test_builtin_elementwise_maximum(int i, short s, float f, double d, float4 fv, double4 dv, int3 iv, unsigned3 uv, int *p) {
   i = __builtin_elementwise_maximum(p, d);
   // expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}
 
@@ -289,15 +305,19 @@ void test_builtin_elementwise_maximum(int i, short s, float f, double d, float4
   i = __builtin_elementwise_maximum(i, i, i);
   // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
 
-  i = __builtin_elementwise_maximum(v, iv);
+  i = __builtin_elementwise_maximum(fv, iv);
   // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'int3' (vector of 3 'int' values))}}
 
   i = __builtin_elementwise_maximum(uv, iv);
   // expected-error@-1 {{arguments are of different types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
 
+  dv = __builtin_elementwise_maximum(fv, dv);
+  // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'double4' (vector of 4 'double' values))}}
+
   d = __builtin_elementwise_maximum(f, d);
+  // expected-error@-1 {{arguments are of different types ('float' vs 'double')}}
 
-  v = __builtin_elementwise_maximum(v, v);
+  fv = __builtin_elementwise_maximum(fv, fv);
 
   i = __builtin_elementwise_maximum(iv, iv);
   // expected-error@-1 {{1st argument must be a floating point type (was 'int3' (vector of 3 'int' values))}}
@@ -314,7 +334,7 @@ void test_builtin_elementwise_maximum(int i, short s, float f, double d, float4
   // expected-error@-1 {{1st argument must be a floating point type (was '_Complex float')}}
 }
 
-void test_builtin_elementwise_minimum(int i, short s, float f, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
+void test_builtin_elementwise_minimum(int i, short s, float f, double d, float4 fv, double4 dv, int3 iv, unsigned3 uv, int *p) {
   i = __builtin_elementwise_minimum(p, d);
   // expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}
 
@@ -330,15 +350,19 @@ void test_builtin_elementwise_minimum(int i, short s, float f, double d, float4
   i = __builtin_elementwise_minimum(i, i, i);
   // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
 
-  i = __builtin_elementwise_minimum(v, iv);
+  i = __builtin_elementwise_minimum(fv, iv);
   // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'int3' (vector of 3 'int' values))}}
 
   i = __builtin_elementwise_minimum(uv, iv);
   // expected-error@-1 {{arguments are of different types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
 
+  dv = __builtin_elementwise_minimum(fv, dv);
+  // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'double4' (vector of 4 'double' values))}}
+
   d = __builtin_elementwise_minimum(f, d);
+  // expected-error@-1 {{arguments are of different types ('float' vs 'double')}}
 
-  v = __builtin_elementwise_minimum(v, v);
+  fv = __builtin_elementwise_minimum(fv, fv);
 
   i = __builtin_elementwise_minimum(iv, iv);
   // expected-error@-1 {{1st argument must be a floating point type (was 'int3' (vector of 3 'int' values))}}
diff --git a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
index ee4605528f410..b96761fb76cc0 100644
--- a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
@@ -84,3 +84,10 @@ float builtin_mad_int_to_float_promotion(float p0, int p1) {
   return __builtin_hlsl_mad(p0, p0, p1);
   // expected-error@-1 {{3rd argument must be a floating point type (was 'int')}}
 }
+
+int builtin_mad_mixed_enums() {
+  enum e { one, two };
+  enum f { three };
+  return __builtin_hlsl_mad(one, two, three);
+  // expected-error@-1 {{invalid arithmetic between different enumeration types ('e' and 'f')}}
+}