diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 628ddba99857..f98ab30d04c6 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2160,12 +2160,15 @@ static mlir::Value buildArmLdrexNon128Intrinsic(unsigned int builtinID,
   }
 }
 
-/// Given a vector of unsigned int type `vecTy`, return a vector type of
-/// signed int type with the same element type width and vector size.
-static mlir::cir::VectorType getSignedVectorType(CIRGenBuilderTy &builder,
-                                                 mlir::cir::VectorType vecTy) {
+/// Given a vector of int type `vecTy`, return a vector type of
+/// int type with the same element type width, different signedness,
+/// and the same vector size.
+static mlir::cir::VectorType
+getSignChangedVectorType(CIRGenBuilderTy &builder,
+                         mlir::cir::VectorType vecTy) {
   auto elemTy = mlir::cast<mlir::cir::IntType>(vecTy.getEltType());
-  elemTy = builder.getSIntNTy(elemTy.getWidth());
+  elemTy = elemTy.isSigned() ? builder.getUIntNTy(elemTy.getWidth())
+                             : builder.getSIntNTy(elemTy.getWidth());
   return mlir::cir::VectorType::get(builder.getContext(), elemTy,
                                     vecTy.getSize());
 }
@@ -2354,13 +2357,25 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
                              : "llvm.aarch64.neon.sqrdmulh.lane",
                          resTy, getLoc(e->getExprLoc()));
   }
+  case NEON::BI__builtin_neon_vqshlu_n_v:
+  case NEON::BI__builtin_neon_vqshluq_n_v: {
+    // These intrinsics expect signed vector type as input, but
+    // return unsigned vector type.
+    mlir::cir::VectorType srcTy = getSignChangedVectorType(builder, vTy);
+    return buildNeonCall(
+        builder, {srcTy, srcTy}, ops, "llvm.aarch64.neon.sqshlu", vTy,
+        getLoc(e->getExprLoc()), false, /* not fp constrained op */
+        1,                              /* second arg is shift amount */
+        false /* leftshift */);
+  }
   case NEON::BI__builtin_neon_vrshr_n_v:
   case NEON::BI__builtin_neon_vrshrq_n_v: {
     return buildNeonCall(
-        builder, {vTy, isUnsigned ? getSignedVectorType(builder, vTy) : vTy},
-        ops, isUnsigned ? "llvm.aarch64.neon.urshl" : "llvm.aarch64.neon.srshl",
-        vTy, getLoc(e->getExprLoc()), false, /* not fp constrained op*/
-        1,                                   /* second arg is shift amount */
+        builder,
+        {vTy, isUnsigned ? getSignChangedVectorType(builder, vTy) : vTy}, ops,
+        isUnsigned ? "llvm.aarch64.neon.urshl" : "llvm.aarch64.neon.srshl", vTy,
+        getLoc(e->getExprLoc()), false, /* not fp constrained op*/
+        1,                              /* second arg is shift amount */
         true /* rightshift */);
   }
   case NEON::BI__builtin_neon_vshl_n_v:
diff --git a/clang/test/CIR/CodeGen/AArch64/neon.c b/clang/test/CIR/CodeGen/AArch64/neon.c
index eb001fcc5262..053464b4f009 100644
--- a/clang/test/CIR/CodeGen/AArch64/neon.c
+++ b/clang/test/CIR/CodeGen/AArch64/neon.c
@@ -6038,64 +6038,117 @@ uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
 //   return vsliq_n_p16(a, b, 15);
 // }
 
-// NYI-LABEL: @test_vqshlu_n_s8(
-// NYI:   [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-// NYI:   ret <8 x i8> [[VQSHLU_N]]
-// uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
-//   return vqshlu_n_s8(a, 3);
-// }
+uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
+  return vqshlu_n_s8(a, 3);
 
-// NYI-LABEL: @test_vqshlu_n_s16(
-// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// NYI:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// NYI:   [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
-// NYI:   ret <4 x i16> [[VQSHLU_N1]]
-// uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
-//   return vqshlu_n_s16(a, 3);
-// }
+ // CIR-LABEL: vqshlu_n_s8
+ // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, 
+ // CIR: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i]> : !cir.vector<!s8i x 8>
+ // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : 
+ // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!u8i x 8>
 
-// NYI-LABEL: @test_vqshlu_n_s32(
-// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// NYI:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// NYI:   [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
-// NYI:   ret <2 x i32> [[VQSHLU_N1]]
-// uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
-//   return vqshlu_n_s32(a, 3);
-// }
+ // LLVM: {{.*}}@test_vqshlu_n_s8(<8 x i8>{{.*}}[[A:%.*]])
+ // LLVM: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[A]], <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+ // LLVM: ret <8 x i8> [[VQSHLU_N]]
+}
 
-// NYI-LABEL: @test_vqshluq_n_s8(
-// NYI:   [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-// NYI:   ret <16 x i8> [[VQSHLU_N]]
-// uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
-//   return vqshluq_n_s8(a, 3);
-// }
+uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
+  return vqshlu_n_s16(a, 3);
 
-// NYI-LABEL: @test_vqshluq_n_s16(
-// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// NYI:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// NYI:   [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-// NYI:   ret <8 x i16> [[VQSHLU_N1]]
-// uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
-//   return vqshluq_n_s16(a, 3);
-// }
+  // CIR-LABEL: vqshlu_n_s16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i, 
+  // CIR-SAME:#cir.int<3> : !s16i, #cir.int<3> : !s16i]> : !cir.vector<!s16i x 4>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : 
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!u16i x 4>
 
-// NYI-LABEL: @test_vqshluq_n_s32(
-// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// NYI:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// NYI:   [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
-// NYI:   ret <4 x i32> [[VQSHLU_N1]]
-// uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
-//   return vqshluq_n_s32(a, 3);
-// }
+  // LLVM: {{.*}}@test_vqshlu_n_s16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+  // LLVM: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+  // LLVM: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
+  // LLVM: ret <4 x i16> [[VQSHLU_N1]]
+}
 
-// NYI-LABEL: @test_vqshluq_n_s64(
-// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// NYI:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// NYI:   [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
-// NYI:   ret <2 x i64> [[VQSHLU_N1]]
-// uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
-//   return vqshluq_n_s64(a, 3);
-// }
+uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
+  return vqshlu_n_s32(a, 3);
+
+  // CIR-LABEL: vqshlu_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector<!s32i x 2>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : 
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_vqshlu_n_s32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+  // LLVM: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+  // LLVM: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
+}
+
+uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
+  return vqshluq_n_s8(a, 3);
+
+  // CIR-LABEL: vqshluq_n_s8
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, 
+  // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, 
+  // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
+  // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i]> : !cir.vector<!s8i x 16>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : 
+  // CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}@test_vqshluq_n_s8(<16 x i8>{{.*}}[[A:%.*]])
+  // LLVM: [[VQSHLUQ_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[A]], 
+  // LLVM-SAME: <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  // LLVM: ret <16 x i8> [[VQSHLUQ_N]]
+}
+
+uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
+  return vqshluq_n_s16(a, 3);
+
+  // CIR-LABEL: vqshluq_n_s16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i, 
+  // CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i, 
+  // CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i]> : !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : 
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}@test_vqshluq_n_s16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+  // LLVM: [[VQSHLUQ_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM: [[VQSHLUQ_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLUQ_N]], 
+  // LLVM-SAME: <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  // LLVM: ret <8 x i16> [[VQSHLUQ_N1]]
+}
+
+uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
+  return vqshluq_n_s32(a, 3);
+
+  // CIR-LABEL: vqshluq_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i, 
+  // CIR-SAME: #cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : 
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}@test_vqshluq_n_s32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+  // LLVM: [[VQSHLUQ_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM: [[VQSHLUQ_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLUQ_N]], 
+  // LLVM-SAME: <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  // LLVM: ret <4 x i32> [[VQSHLUQ_N1]]
+}
+
+uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
+  return vqshluq_n_s64(a, 3);
+
+  // CIR-LABEL: vqshluq_n_s64
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s64i, #cir.int<3> : !s64i]> : !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "llvm.aarch64.neon.sqshlu" {{%.*}}, [[AMT]] : 
+  // CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}@test_vqshluq_n_s64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+  // LLVM: [[VQSHLUQ_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM: [[VQSHLUQ_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLUQ_N]], 
+  // LLVM-SAME: <2 x i64> <i64 3, i64 3>)
+  // LLVM: ret <2 x i64> [[VQSHLUQ_N1]]
+}
 
 // NYI-LABEL: @test_vshrn_n_s16(
 // NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>