From b2c4084ff5c8d59e691ae0c3638114706efa7c08 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 17 Sep 2024 19:34:34 +0200 Subject: [PATCH 01/12] Initial experiments (with integer regs for fp16). Experiment with soft-promotion in FP regs (not working). Try to make f16 legal instead Atomic loads/stores, spill/reload, tests for __fp16 and half vectors. strict f16 with tests. Review Make use of vector facility if present. --- clang/docs/LanguageExtensions.rst | 1 + clang/lib/Basic/Targets/SystemZ.h | 15 + clang/lib/CodeGen/Targets/SystemZ.cpp | 11 +- clang/test/CodeGen/SystemZ/Float16.c | 85 ++ clang/test/CodeGen/SystemZ/fp16.c | 39 + clang/test/CodeGen/SystemZ/systemz-abi.c | 44 + compiler-rt/test/builtins/CMakeLists.txt | 2 +- llvm/lib/IR/RuntimeLibcalls.cpp | 5 + .../SystemZ/AsmParser/SystemZAsmParser.cpp | 14 + .../MCTargetDesc/SystemZMCTargetDesc.cpp | 19 + .../MCTargetDesc/SystemZMCTargetDesc.h | 2 + llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 8 + llvm/lib/Target/SystemZ/SystemZCallingConv.td | 4 +- .../Target/SystemZ/SystemZISelDAGToDAG.cpp | 7 +- .../Target/SystemZ/SystemZISelLowering.cpp | 127 ++- llvm/lib/Target/SystemZ/SystemZISelLowering.h | 6 + llvm/lib/Target/SystemZ/SystemZInstrFP.td | 14 +- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 52 ++ llvm/lib/Target/SystemZ/SystemZInstrVector.td | 2 + .../lib/Target/SystemZ/SystemZRegisterInfo.td | 25 +- llvm/lib/Target/SystemZ/SystemZScheduleZ13.td | 12 +- llvm/lib/Target/SystemZ/SystemZScheduleZ14.td | 12 +- llvm/lib/Target/SystemZ/SystemZScheduleZ15.td | 12 +- llvm/lib/Target/SystemZ/SystemZScheduleZ16.td | 12 +- .../lib/Target/SystemZ/SystemZScheduleZ196.td | 8 +- .../Target/SystemZ/SystemZScheduleZEC12.td | 8 +- llvm/test/CodeGen/SystemZ/atomic-load-10.ll | 22 + llvm/test/CodeGen/SystemZ/atomic-store-10.ll | 24 + llvm/test/CodeGen/SystemZ/fp-half-libcall.ll | 312 +++++++ llvm/test/CodeGen/SystemZ/fp-half-strict.ll | 209 +++++ llvm/test/CodeGen/SystemZ/fp-half-vector.ll | 797 ++++++++++++++++++ llvm/test/CodeGen/SystemZ/fp-half.ll | 627 ++++++++++++++ llvm/test/CodeGen/SystemZ/fp-round-03.ll | 15 +- llvm/test/CodeGen/SystemZ/spill-half-01.mir | 47 ++ llvm/test/CodeGen/SystemZ/spill-half-02.mir | 40 + llvm/test/CodeGen/SystemZ/twoaddr-kill.mir | 8 +- 36 files changed, 2590 insertions(+), 57 deletions(-) create mode 100644 clang/test/CodeGen/SystemZ/Float16.c create mode 100644 clang/test/CodeGen/SystemZ/fp16.c create mode 100644 llvm/test/CodeGen/SystemZ/atomic-load-10.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomic-store-10.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-half-libcall.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-half-strict.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-half-vector.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-half.ll create mode 100644 llvm/test/CodeGen/SystemZ/spill-half-01.mir create mode 100644 llvm/test/CodeGen/SystemZ/spill-half-02.mir diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 971ab50cc9a69..7835eceadf660 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -1000,6 +1000,7 @@ to ``float``; see below for more information on this emulation. * SPIR (natively) * X86 (if SSE2 is available; natively if AVX512-FP16 is also available) * RISC-V (natively if Zfh or Zhinx is available) + * SystemZ (emulated) * ``__bf16`` is supported on the following targets (currently never natively): diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h index 4d1509b84e82b..1427c8e5e4e07 100644 --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -93,11 +93,26 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo { "-v128:64-a:8:16-n32:64"); } MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 128; + + // True if the backend supports operations on the half LLVM IR type. + // By setting this to false, conversions will happen for _Float16 around + // a statement by default, with operations done in float. However, if + // -ffloat16-excess-precision=none is given, no conversions will be made + // and instead the backend will promote each half operation to float + // individually. + HasLegalHalfType = false; + // Support _Float16. + HasFloat16 = true; + HasStrictFP = true; } unsigned getMinGlobalAlign(uint64_t Size, bool HasNonWeakDef) const override; + bool useFP16ConversionIntrinsics() const override { + return false; + } + void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/clang/lib/CodeGen/Targets/SystemZ.cpp b/clang/lib/CodeGen/Targets/SystemZ.cpp index 8a9fddace76d9..10f955c08188e 100644 --- a/clang/lib/CodeGen/Targets/SystemZ.cpp +++ b/clang/lib/CodeGen/Targets/SystemZ.cpp @@ -185,6 +185,7 @@ bool SystemZABIInfo::isFPArgumentType(QualType Ty) const { if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { + case BuiltinType::Float16: // _Float16 case BuiltinType::Float: case BuiltinType::Double: return true; @@ -277,7 +278,8 @@ RValue SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, } else { if (AI.getCoerceToType()) ArgTy = AI.getCoerceToType(); - InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() || ArgTy->isDoubleTy())); + InFPRs = (!IsSoftFloatABI && + (ArgTy->isHalfTy() || ArgTy->isFloatTy() || ArgTy->isDoubleTy())); IsVector = ArgTy->isVectorTy(); UnpaddedSize = TyInfo.Width; DirectAlign = TyInfo.Align; @@ -449,10 +451,11 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { // The structure is passed as an unextended integer, a float, or a double. if (isFPArgumentType(SingleElementTy)) { - assert(Size == 32 || Size == 64); + assert(Size == 16 || Size == 32 || Size == 64); return ABIArgInfo::getDirect( - Size == 32 ? llvm::Type::getFloatTy(getVMContext()) - : llvm::Type::getDoubleTy(getVMContext())); + Size == 16 ? llvm::Type::getHalfTy(getVMContext()) + : Size == 32 ? llvm::Type::getFloatTy(getVMContext()) + : llvm::Type::getDoubleTy(getVMContext())); } else { llvm::IntegerType *PassTy = llvm::IntegerType::get(getVMContext(), Size); return Size <= 32 ? ABIArgInfo::getNoExtend(PassTy) diff --git a/clang/test/CodeGen/SystemZ/Float16.c b/clang/test/CodeGen/SystemZ/Float16.c new file mode 100644 index 0000000000000..4444dbdcc23ca --- /dev/null +++ b/clang/test/CodeGen/SystemZ/Float16.c @@ -0,0 +1,85 @@ +// RUN: %clang_cc1 -triple s390x-linux-gnu \ +// RUN: -ffloat16-excess-precision=standard -emit-llvm -o - %s \ +// RUN: | FileCheck %s -check-prefix=STANDARD + +// RUN: %clang_cc1 -triple s390x-linux-gnu \ +// RUN: -ffloat16-excess-precision=none -emit-llvm -o - %s \ +// RUN: | FileCheck %s -check-prefix=NONE + +// RUN: %clang_cc1 -triple s390x-linux-gnu \ +// RUN: -ffloat16-excess-precision=fast -emit-llvm -o - %s \ +// RUN: | FileCheck %s -check-prefix=FAST + +_Float16 f(_Float16 a, _Float16 b, _Float16 c, _Float16 d) { + return a * b + c * d; +} + +// STANDARD-LABEL: define dso_local half @f(half noundef %a, half noundef %b, half noundef %c, half noundef %d) #0 { +// STANDARD-NEXT: entry: +// STANDARD-NEXT: %a.addr = alloca half, align 2 +// STANDARD-NEXT: %b.addr = alloca half, align 2 +// STANDARD-NEXT: %c.addr = alloca half, align 2 +// STANDARD-NEXT: %d.addr = alloca half, align 2 +// STANDARD-NEXT: store half %a, ptr %a.addr, align 2 +// STANDARD-NEXT: store half %b, ptr %b.addr, align 2 +// STANDARD-NEXT: store half %c, ptr %c.addr, align 2 +// STANDARD-NEXT: store half %d, ptr %d.addr, align 2 +// STANDARD-NEXT: %0 = load half, ptr %a.addr, align 2 +// STANDARD-NEXT: %ext = fpext half %0 to float +// STANDARD-NEXT: %1 = load half, ptr %b.addr, align 2 +// STANDARD-NEXT: %ext1 = fpext half %1 to float +// STANDARD-NEXT: %mul = fmul float %ext, %ext1 +// STANDARD-NEXT: %2 = load half, ptr %c.addr, align 2 +// STANDARD-NEXT: %ext2 = fpext half %2 to float +// STANDARD-NEXT: %3 = load half, ptr %d.addr, align 2 +// STANDARD-NEXT: %ext3 = fpext half %3 to float +// STANDARD-NEXT: %mul4 = fmul float %ext2, %ext3 +// STANDARD-NEXT: %add = fadd float %mul, %mul4 +// STANDARD-NEXT: %unpromotion = fptrunc float %add to half +// STANDARD-NEXT: ret half %unpromotion +// STANDARD-NEXT: } + +// NONE-LABEL: define dso_local half @f(half noundef %a, half noundef %b, half noundef %c, half noundef %d) #0 { +// NONE-NEXT: entry: +// NONE-NEXT: %a.addr = alloca half, align 2 +// NONE-NEXT: %b.addr = alloca half, align 2 +// NONE-NEXT: %c.addr = alloca half, align 2 +// NONE-NEXT: %d.addr = alloca half, align 2 +// NONE-NEXT: store half %a, ptr %a.addr, align 2 +// NONE-NEXT: store half %b, ptr %b.addr, align 2 +// NONE-NEXT: store half %c, ptr %c.addr, align 2 +// NONE-NEXT: store half %d, ptr %d.addr, align 2 +// NONE-NEXT: %0 = load half, ptr %a.addr, align 2 +// NONE-NEXT: %1 = load half, ptr %b.addr, align 2 +// NONE-NEXT: %mul = fmul half %0, %1 +// NONE-NEXT: %2 = load half, ptr %c.addr, align 2 +// NONE-NEXT: %3 = load half, ptr %d.addr, align 2 +// NONE-NEXT: %mul1 = fmul half %2, %3 +// NONE-NEXT: %add = fadd half %mul, %mul1 +// NONE-NEXT: ret half %add +// NONE-NEXT: } + +// FAST-LABEL: define dso_local half @f(half noundef %a, half noundef %b, half noundef %c, half noundef %d) #0 { +// FAST-NEXT: entry: +// FAST-NEXT: %a.addr = alloca half, align 2 +// FAST-NEXT: %b.addr = alloca half, align 2 +// FAST-NEXT: %c.addr = alloca half, align 2 +// FAST-NEXT: %d.addr = alloca half, align 2 +// FAST-NEXT: store half %a, ptr %a.addr, align 2 +// FAST-NEXT: store half %b, ptr %b.addr, align 2 +// FAST-NEXT: store half %c, ptr %c.addr, align 2 +// FAST-NEXT: store half %d, ptr %d.addr, align 2 +// FAST-NEXT: %0 = load half, ptr %a.addr, align 2 +// FAST-NEXT: %ext = fpext half %0 to float +// FAST-NEXT: %1 = load half, ptr %b.addr, align 2 +// FAST-NEXT: %ext1 = fpext half %1 to float +// FAST-NEXT: %mul = fmul float %ext, %ext1 +// FAST-NEXT: %2 = load half, ptr %c.addr, align 2 +// FAST-NEXT: %ext2 = fpext half %2 to float +// FAST-NEXT: %3 = load half, ptr %d.addr, align 2 +// FAST-NEXT: %ext3 = fpext half %3 to float +// FAST-NEXT: %mul4 = fmul float %ext2, %ext3 +// FAST-NEXT: %add = fadd float %mul, %mul4 +// FAST-NEXT: %unpromotion = fptrunc float %add to half +// FAST-NEXT: ret half %unpromotion +// FAST-NEXT: } diff --git a/clang/test/CodeGen/SystemZ/fp16.c b/clang/test/CodeGen/SystemZ/fp16.c new file mode 100644 index 0000000000000..430958b69a177 --- /dev/null +++ b/clang/test/CodeGen/SystemZ/fp16.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -triple s390x-linux-gnu -emit-llvm -o - %s \ +// RUN: | FileCheck %s + +void f(__fp16 *a, __fp16 *b, __fp16 *c, __fp16 *d, __fp16 *e) { + *e = (*a) * (*b) + (*c) * (*d); +} + +// CHECK-LABEL: define dso_local void @f(ptr noundef %a, ptr noundef %b, ptr noundef %c, ptr noundef %d, ptr noundef %e) #0 { +// CHECK-NEXT: entry: +// CHECK-NEXT: %a.addr = alloca ptr, align 8 +// CHECK-NEXT: %b.addr = alloca ptr, align 8 +// CHECK-NEXT: %c.addr = alloca ptr, align 8 +// CHECK-NEXT: %d.addr = alloca ptr, align 8 +// CHECK-NEXT: %e.addr = alloca ptr, align 8 +// CHECK-NEXT: store ptr %a, ptr %a.addr, align 8 +// CHECK-NEXT: store ptr %b, ptr %b.addr, align 8 +// CHECK-NEXT: store ptr %c, ptr %c.addr, align 8 +// CHECK-NEXT: store ptr %d, ptr %d.addr, align 8 +// CHECK-NEXT: store ptr %e, ptr %e.addr, align 8 +// CHECK-NEXT: %0 = load ptr, ptr %a.addr, align 8 +// CHECK-NEXT: %1 = load half, ptr %0, align 2 +// CHECK-NEXT: %conv = fpext half %1 to float +// CHECK-NEXT: %2 = load ptr, ptr %b.addr, align 8 +// CHECK-NEXT: %3 = load half, ptr %2, align 2 +// CHECK-NEXT: %conv1 = fpext half %3 to float +// CHECK-NEXT: %mul = fmul float %conv, %conv1 +// CHECK-NEXT: %4 = load ptr, ptr %c.addr, align 8 +// CHECK-NEXT: %5 = load half, ptr %4, align 2 +// CHECK-NEXT: %conv2 = fpext half %5 to float +// CHECK-NEXT: %6 = load ptr, ptr %d.addr, align 8 +// CHECK-NEXT: %7 = load half, ptr %6, align 2 +// CHECK-NEXT: %conv3 = fpext half %7 to float +// CHECK-NEXT: %mul4 = fmul float %conv2, %conv3 +// CHECK-NEXT: %add = fadd float %mul, %mul4 +// CHECK-NEXT: %8 = fptrunc float %add to half +// CHECK-NEXT: %9 = load ptr, ptr %e.addr, align 8 +// CHECK-NEXT: store half %8, ptr %9, align 2 +// CHECK-NEXT: ret void +// CHECK-NEXT: } diff --git a/clang/test/CodeGen/SystemZ/systemz-abi.c b/clang/test/CodeGen/SystemZ/systemz-abi.c index 7de425950e9fd..5e61c03672174 100644 --- a/clang/test/CodeGen/SystemZ/systemz-abi.c +++ b/clang/test/CodeGen/SystemZ/systemz-abi.c @@ -52,6 +52,9 @@ long long pass_longlong(long long arg) { return arg; } __int128 pass_int128(__int128 arg) { return arg; } // CHECK-LABEL: define{{.*}} void @pass_int128(ptr dead_on_unwind noalias writable sret(i128) align 8 %{{.*}}, ptr %0) +_Float16 pass__Float16(_Float16 arg) { return arg; } +// CHECK-LABEL: define{{.*}} half @pass__Float16(half %{{.*}}) + float pass_float(float arg) { return arg; } // CHECK-LABEL: define{{.*}} float @pass_float(float %{{.*}}) @@ -79,6 +82,9 @@ _Complex long pass_complex_long(_Complex long arg) { return arg; } _Complex long long pass_complex_longlong(_Complex long long arg) { return arg; } // CHECK-LABEL: define{{.*}} void @pass_complex_longlong(ptr dead_on_unwind noalias writable sret({ i64, i64 }) align 8 %{{.*}}, ptr %{{.*}}arg) +_Complex _Float16 pass_complex__Float16(_Complex _Float16 arg) { return arg; } +// CHECK-LABEL: define{{.*}} void @pass_complex__Float16(ptr dead_on_unwind noalias writable sret({ half, half }) align 2 %{{.*}}, ptr %{{.*}}arg) + _Complex float pass_complex_float(_Complex float arg) { return arg; } // CHECK-LABEL: define{{.*}} void @pass_complex_float(ptr dead_on_unwind noalias writable sret({ float, float }) align 4 %{{.*}}, ptr %{{.*}}arg) @@ -130,6 +136,11 @@ struct agg_16byte pass_agg_16byte(struct agg_16byte arg) { return arg; } // Float-like aggregate types +struct agg__Float16 { _Float16 a; }; +struct agg__Float16 pass_agg__Float16(struct agg__Float16 arg) { return arg; } +// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, half %{{.*}}) +// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, i16 noext %{{.*}}) + struct agg_float { float a; }; struct agg_float pass_agg_float(struct agg_float arg) { return arg; } // HARD-FLOAT-LABEL: define{{.*}} void @pass_agg_float(ptr dead_on_unwind noalias writable sret(%struct.agg_float) align 4 %{{.*}}, float %{{.*}}) @@ -144,6 +155,11 @@ struct agg_longdouble { long double a; }; struct agg_longdouble pass_agg_longdouble(struct agg_longdouble arg) { return arg; } // CHECK-LABEL: define{{.*}} void @pass_agg_longdouble(ptr dead_on_unwind noalias writable sret(%struct.agg_longdouble) align 8 %{{.*}}, ptr %{{.*}}) +struct agg__Float16_a8 { _Float16 a __attribute__((aligned (8))); }; +struct agg__Float16_a8 pass_agg__Float16_a8(struct agg__Float16_a8 arg) { return arg; } +// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a8) align 8 %{{.*}}, double %{{.*}}) +// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a8) align 8 %{{.*}}, i64 %{{.*}}) + struct agg_float_a8 { float a __attribute__((aligned (8))); }; struct agg_float_a8 pass_agg_float_a8(struct agg_float_a8 arg) { return arg; } // HARD-FLOAT-LABEL: define{{.*}} void @pass_agg_float_a8(ptr dead_on_unwind noalias writable sret(%struct.agg_float_a8) align 8 %{{.*}}, double %{{.*}}) @@ -171,6 +187,10 @@ struct agg_nofloat3 pass_agg_nofloat3(struct agg_nofloat3 arg) { return arg; } // Union types likewise are *not* float-like aggregate types +union union__Float16 { _Float16 a; }; +union union__Float16 pass_union__Float16(union union__Float16 arg) { return arg; } +// CHECK-LABEL: define{{.*}} void @pass_union__Float16(ptr dead_on_unwind noalias writable sret(%union.union__Float16) align 2 %{{.*}}, i16 noext %{{.*}}) + union union_float { float a; }; union union_float pass_union_float(union union_float arg) { return arg; } // CHECK-LABEL: define{{.*}} void @pass_union_float(ptr dead_on_unwind noalias writable sret(%union.union_float) align 4 %{{.*}}, i32 noext %{{.*}}) @@ -448,6 +468,30 @@ struct agg_8byte va_agg_8byte(__builtin_va_list l) { return __builtin_va_arg(l, // CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi ptr [ [[RAW_REG_ADDR]], %{{.*}} ], [ [[RAW_MEM_ADDR]], %{{.*}} ] // CHECK: ret void +struct agg__Float16 va_agg__Float16(__builtin_va_list l) { return __builtin_va_arg(l, struct agg__Float16); } +// CHECK-LABEL: define{{.*}} void @va_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, ptr %{{.*}} +// HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 1 +// SOFT-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 0 +// CHECK: [[REG_COUNT:%[^ ]+]] = load i64, ptr [[REG_COUNT_PTR]] +// HARD-FLOAT: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 4 +// SOFT-FLOAT: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5 +// CHECK: br i1 [[FITS_IN_REGS]], +// CHECK: [[SCALED_REG_COUNT:%[^ ]+]] = mul i64 [[REG_COUNT]], 8 +// HARD-FLOAT: [[REG_OFFSET:%[^ ]+]] = add i64 [[SCALED_REG_COUNT]], 128 +// SOFT-FLOAT: [[REG_OFFSET:%[^ ]+]] = add i64 [[SCALED_REG_COUNT]], 22 +// CHECK: [[REG_SAVE_AREA_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 3 +// CHECK: [[REG_SAVE_AREA:%[^ ]+]] = load ptr, ptr [[REG_SAVE_AREA_PTR:[^ ]+]] +// CHECK: [[RAW_REG_ADDR:%[^ ]+]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i64 [[REG_OFFSET]] +// CHECK: [[REG_COUNT1:%[^ ]+]] = add i64 [[REG_COUNT]], 1 +// CHECK: store i64 [[REG_COUNT1]], ptr [[REG_COUNT_PTR]] +// CHECK: [[OVERFLOW_ARG_AREA_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 2 +// CHECK: [[OVERFLOW_ARG_AREA:%[^ ]+]] = load ptr, ptr [[OVERFLOW_ARG_AREA_PTR]] +// CHECK: [[RAW_MEM_ADDR:%[^ ]+]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i64 6 +// CHECK: [[OVERFLOW_ARG_AREA2:%[^ ]+]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i64 8 +// CHECK: store ptr [[OVERFLOW_ARG_AREA2]], ptr [[OVERFLOW_ARG_AREA_PTR]] +// CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi ptr [ [[RAW_REG_ADDR]], %{{.*}} ], [ [[RAW_MEM_ADDR]], %{{.*}} ] +// CHECK: ret void + struct agg_float va_agg_float(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_float); } // CHECK-LABEL: define{{.*}} void @va_agg_float(ptr dead_on_unwind noalias writable sret(%struct.agg_float) align 4 %{{.*}}, ptr %{{.*}} // HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 1 diff --git a/compiler-rt/test/builtins/CMakeLists.txt b/compiler-rt/test/builtins/CMakeLists.txt index 8fdcec6029a2a..63f4c94605c90 100644 --- a/compiler-rt/test/builtins/CMakeLists.txt +++ b/compiler-rt/test/builtins/CMakeLists.txt @@ -56,7 +56,7 @@ foreach(arch ${BUILTIN_TEST_ARCH}) string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}") endif() else() - if (${arch} MATCHES "arm|armhf|aarch64|arm64|i?86|x86_64|AMD64|riscv32|riscv64" AND COMPILER_RT_HAS_${arch}_FLOAT16) + if (${arch} MATCHES "arm|armhf|aarch64|arm64|i?86|x86_64|AMD64|riscv32|riscv64|s390x" AND COMPILER_RT_HAS_${arch}_FLOAT16) list(APPEND BUILTINS_TEST_TARGET_CFLAGS -DCOMPILER_RT_HAS_FLOAT16) string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}") endif() diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 90c3bf0db0236..5ba1bd87e9518 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -253,4 +253,9 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) { } setLibcallName(RTLIB::MULO_I128, nullptr); } + + if (TT.isSystemZ()) { + setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); + setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); + } } diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 4fa5f026602ef..265fea11e15dd 100644 --- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -61,9 +61,11 @@ enum RegisterKind { GRH32Reg, GR64Reg, GR128Reg, + FP16Reg, FP32Reg, FP64Reg, FP128Reg, + VR16Reg, VR32Reg, VR64Reg, VR128Reg, @@ -365,9 +367,11 @@ class SystemZOperand : public MCParsedAsmOperand { bool isADDR32() const { return isReg(GR32Reg); } bool isADDR64() const { return isReg(GR64Reg); } bool isADDR128() const { return false; } + bool isFP16() const { return isReg(FP16Reg); } bool isFP32() const { return isReg(FP32Reg); } bool isFP64() const { return isReg(FP64Reg); } bool isFP128() const { return isReg(FP128Reg); } + bool isVR16() const { return isReg(VR16Reg); } bool isVR32() const { return isReg(VR32Reg); } bool isVR64() const { return isReg(VR64Reg); } bool isVF128() const { return false; } @@ -544,6 +548,9 @@ class SystemZAsmParser : public MCTargetAsmParser { ParseStatus parseADDR128(OperandVector &Operands) { llvm_unreachable("Shouldn't be used as an operand"); } + ParseStatus parseFP16(OperandVector &Operands) { + return parseRegister(Operands, FP16Reg); + } ParseStatus parseFP32(OperandVector &Operands) { return parseRegister(Operands, FP32Reg); } @@ -553,6 +560,9 @@ class SystemZAsmParser : public MCTargetAsmParser { ParseStatus parseFP128(OperandVector &Operands) { return parseRegister(Operands, FP128Reg); } + ParseStatus parseVR16(OperandVector &Operands) { + return parseRegister(Operands, VR16Reg); + } ParseStatus parseVR32(OperandVector &Operands) { return parseRegister(Operands, VR32Reg); } @@ -842,11 +852,13 @@ ParseStatus SystemZAsmParser::parseRegister(OperandVector &Operands, case GR128Reg: Group = RegGR; break; + case FP16Reg: case FP32Reg: case FP64Reg: case FP128Reg: Group = RegFP; break; + case VR16Reg: case VR32Reg: case VR64Reg: case VR128Reg: @@ -895,9 +907,11 @@ ParseStatus SystemZAsmParser::parseRegister(OperandVector &Operands, case GRH32Reg: Regs = SystemZMC::GRH32Regs; break; case GR64Reg: Regs = SystemZMC::GR64Regs; break; case GR128Reg: Regs = SystemZMC::GR128Regs; break; + case FP16Reg: Regs = SystemZMC::FP16Regs; break; case FP32Reg: Regs = SystemZMC::FP32Regs; break; case FP64Reg: Regs = SystemZMC::FP64Regs; break; case FP128Reg: Regs = SystemZMC::FP128Regs; break; + case VR16Reg: Regs = SystemZMC::VR16Regs; break; case VR32Reg: Regs = SystemZMC::VR32Regs; break; case VR64Reg: Regs = SystemZMC::VR64Regs; break; case VR128Reg: Regs = SystemZMC::VR128Regs; break; diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 2bef87696a913..493d6ea3b8cd4 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -64,6 +64,13 @@ const unsigned SystemZMC::GR128Regs[16] = { SystemZ::R0Q, 0, SystemZ::R2Q, 0, SystemZ::R4Q, 0, SystemZ::R6Q, 0, SystemZ::R8Q, 0, SystemZ::R10Q, 0, SystemZ::R12Q, 0, SystemZ::R14Q, 0}; +const unsigned SystemZMC::FP16Regs[16] = { + SystemZ::F0H, SystemZ::F1H, SystemZ::F2H, SystemZ::F3H, + SystemZ::F4H, SystemZ::F5H, SystemZ::F6H, SystemZ::F7H, + SystemZ::F8H, SystemZ::F9H, SystemZ::F10H, SystemZ::F11H, + SystemZ::F12H, SystemZ::F13H, SystemZ::F14H, SystemZ::F15H +}; + const unsigned SystemZMC::FP32Regs[16] = { SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, @@ -80,6 +87,17 @@ const unsigned SystemZMC::FP128Regs[16] = { SystemZ::F0Q, SystemZ::F1Q, 0, 0, SystemZ::F4Q, SystemZ::F5Q, 0, 0, SystemZ::F8Q, SystemZ::F9Q, 0, 0, SystemZ::F12Q, SystemZ::F13Q, 0, 0}; +const unsigned SystemZMC::VR16Regs[32] = { + SystemZ::F0H, SystemZ::F1H, SystemZ::F2H, SystemZ::F3H, + SystemZ::F4H, SystemZ::F5H, SystemZ::F6H, SystemZ::F7H, + SystemZ::F8H, SystemZ::F9H, SystemZ::F10H, SystemZ::F11H, + SystemZ::F12H, SystemZ::F13H, SystemZ::F14H, SystemZ::F15H, + SystemZ::F16H, SystemZ::F17H, SystemZ::F18H, SystemZ::F19H, + SystemZ::F20H, SystemZ::F21H, SystemZ::F22H, SystemZ::F23H, + SystemZ::F24H, SystemZ::F25H, SystemZ::F26H, SystemZ::F27H, + SystemZ::F28H, SystemZ::F29H, SystemZ::F30H, SystemZ::F31H +}; + const unsigned SystemZMC::VR32Regs[32] = { SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, SystemZ::F8S, SystemZ::F9S, @@ -132,6 +150,7 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) { Map[AR32Regs[I]] = I; } for (unsigned I = 0; I < 32; ++I) { + Map[VR16Regs[I]] = I; Map[VR32Regs[I]] = I; Map[VR64Regs[I]] = I; Map[VR128Regs[I]] = I; diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 39c1836a13700..1db1b4b9da002 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -43,9 +43,11 @@ extern const unsigned GR32Regs[16]; extern const unsigned GRH32Regs[16]; extern const unsigned GR64Regs[16]; extern const unsigned GR128Regs[16]; +extern const unsigned FP16Regs[16]; extern const unsigned FP32Regs[16]; extern const unsigned FP64Regs[16]; extern const unsigned FP128Regs[16]; +extern const unsigned VR16Regs[32]; extern const unsigned VR32Regs[32]; extern const unsigned VR64Regs[32]; extern const unsigned VR128Regs[32]; diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index b3efa579dfe0c..f679cc05f3c04 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -549,6 +549,10 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) { lowerAlignmentHint(MI, LoweredMI, SystemZ::VSTMAlign); break; + case SystemZ::VL16: + LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPH); + break; + case SystemZ::VL32: LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF); break; @@ -557,6 +561,10 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) { LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG); break; + case SystemZ::VST16: + LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEH); + break; + case SystemZ::VST32: LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF); break; diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td index 99bb697ce2014..0ad872bcb63a7 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -50,6 +50,7 @@ def RetCC_SystemZ_ELF : CallingConv<[ // other floating-point argument registers available for code that // doesn't care about the ABI. All floating-point argument registers // are call-clobbered, so we can use all of them here. + CCIfType<[f16], CCAssignToReg<[F0H, F2H, F4H, F6H]>>, CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, @@ -115,6 +116,7 @@ def CC_SystemZ_ELF : CallingConv<[ CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>, // The first 4 float and double arguments are passed in even registers F0-F6. + CCIfType<[f16], CCAssignToReg<[F0H, F2H, F4H, F6H]>>, CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, @@ -138,7 +140,7 @@ def CC_SystemZ_ELF : CallingConv<[ CCAssignToStack<16, 8>>>, // Other arguments are passed in 8-byte-aligned 8-byte stack slots. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> + CCIfType<[i32, i64, f16, f32, f64], CCAssignToStack<8, 8>> ]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index caf01ccd1ef7c..6f146b67f8566 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1204,9 +1204,10 @@ void SystemZDAGToDAGISel::loadVectorConstant( SDValue BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op); ReplaceNode(Node, BitCast.getNode()); SelectCode(BitCast.getNode()); - } else { // float or double - unsigned SubRegIdx = - (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64); + } else { // half, float or double + unsigned SubRegIdx = (VT.getSizeInBits() == 16 ? SystemZ::subreg_h16 + : VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 + : SystemZ::subreg_h64); ReplaceNode( Node, CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Op).getNode()); } diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 35cee7b39d143..7b2df9c64aaf0 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -103,9 +103,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); if (!useSoftFloat()) { if (Subtarget.hasVector()) { + addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass); addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); } else { + addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass); addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); } @@ -548,11 +550,24 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, } // Handle floating-point types. + // Promote all f16 operations to float, with some exceptions below. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, MVT::f16, Promote); + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); + for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); + setTruncStoreAction(VT, MVT::f16, Expand); + } + for (auto Op : {ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE}) + setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom); + setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, LibCall); + for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; ++I) { MVT VT = MVT::SimpleValueType(I); - if (isTypeLegal(VT)) { + if (isTypeLegal(VT) && VT != MVT::f16) { // We can use FI for FRINT. setOperationAction(ISD::FRINT, VT, Legal); @@ -585,7 +600,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FSQRT, VT, Legal); setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); if (Subtarget.hasFPExtension()) { setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); @@ -594,6 +608,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FROUND, VT, Legal); setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); } + + // Extension from f16 needs libcall. + setOperationAction(ISD::FP_EXTEND, VT, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom); } } @@ -816,6 +834,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Default to having -disable-strictnode-mutation on IsStrictFPEnabled = true; + setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); + setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); + if (Subtarget.isTargetzOS()) { struct RTLibCallMapping { RTLIB::Libcall Code; @@ -1941,6 +1962,10 @@ SDValue SystemZTargetLowering::LowerFormalArguments( NumFixedGPRs += 1; RC = &SystemZ::GR64BitRegClass; break; + case MVT::f16: + NumFixedFPRs += 1; + RC = &SystemZ::FP16BitRegClass; + break; case MVT::f32: NumFixedFPRs += 1; RC = &SystemZ::FP32BitRegClass; @@ -1985,9 +2010,12 @@ SDValue SystemZTargetLowering::LowerFormalArguments( // from this parameter. Unpromoted ints and floats are // passed as right-justified 8-byte values. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) + if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 || + VA.getLocVT() == MVT::f16) { + unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4; FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, - DAG.getIntPtrConstant(4, DL)); + DAG.getIntPtrConstant(SlotOffs, DL)); + } ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, MachinePointerInfo::getFixedStack(MF, FI)); } @@ -2300,6 +2328,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, VA.getLocMemOffset(); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) Offset += 4; + else if (VA.getLocVT() == MVT::f16) + Offset += 6; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, DAG.getIntPtrConstant(Offset, DL)); @@ -4973,6 +5003,22 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); } +SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, + SelectionDAG &DAG) const { + MVT RegVT = Op.getSimpleValueType(); + if (RegVT.getSizeInBits() == 128) + return lowerATOMIC_LDST_I128(Op, DAG); + return lowerLoadF16(Op, DAG); +} + +SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, + SelectionDAG &DAG) const { + auto *Node = cast(Op.getNode()); + if (Node->getMemoryVT().getSizeInBits() == 128) + return lowerATOMIC_LDST_I128(Op, DAG); + return lowerStoreF16(Op, DAG); +} + SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); @@ -6736,6 +6782,69 @@ static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG) { return Op; } +SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); + if (In.getSimpleValueType() != MVT::f16) + return Op; // Legal + return SDValue(); // Let legalizer emit the libcall. +} + +SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op, + SelectionDAG &DAG) const { + MVT RegVT = Op.getSimpleValueType(); + assert(RegVT == MVT::f16 && "Expected to lower an f16 load."); + + SDLoc DL(Op); + SDValue NewLd; + if (auto *AtomicLd = dyn_cast(Op.getNode())) { + assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load"); + NewLd = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, MVT::i16, MVT::i32, + AtomicLd->getChain(), AtomicLd->getBasePtr(), + AtomicLd->getMemOperand()); + cast(NewLd)->setExtensionType(ISD::EXTLOAD); + } else { + LoadSDNode *Ld = cast(Op.getNode()); + assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load"); + NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Ld->getChain(), + Ld->getBasePtr(), Ld->getPointerInfo(), + MVT::i16, Ld->getOriginalAlign(), + Ld->getMemOperand()->getFlags()); + } + // Load as integer, shift and then insert into upper 2 bytes of the FP + // register. + SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i32, NewLd, + DAG.getConstant(16, DL, MVT::i32)); + SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Shft); + SDValue F16Val = DAG.getTargetExtractSubreg(SystemZ::subreg_h16, + DL, MVT::f16, BCast); + return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL); +} + +SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op, + SelectionDAG &DAG) const { + SDValue StoredVal = Op->getOperand(1); + MVT StoreVT = StoredVal.getSimpleValueType(); + assert(StoreVT == MVT::f16 && "Expected to lower an f16 store."); + + // Move into a GPR, shift and store the 2 bytes. + SDLoc DL(Op); + SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f32); + SDValue In32 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, + MVT::f32, SDValue(U32, 0), StoredVal); + SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, In32); + SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i32, BCast, + DAG.getConstant(16, DL, MVT::i32)); + + if (auto *AtomicSt = dyn_cast(Op.getNode())) + return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(), + Shft, AtomicSt->getBasePtr(), AtomicSt->getMemOperand()); + + StoreSDNode *St = cast(Op.getNode()); + return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), + MVT::i16, St->getMemOperand()); +} + SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -6859,8 +6968,9 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: + return lowerATOMIC_STORE(Op, DAG); case ISD::ATOMIC_LOAD: - return lowerATOMIC_LDST_I128(Op, DAG); + return lowerATOMIC_LOAD(Op, DAG); case ISD::ATOMIC_LOAD_ADD: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); case ISD::ATOMIC_LOAD_SUB: @@ -6921,6 +7031,13 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerFSHL(Op, DAG); case ISD::FSHR: return lowerFSHR(Op, DAG); + case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: + return lowerFP_EXTEND(Op, DAG); + case ISD::LOAD: + return lowerLoadF16(Op, DAG); + case ISD::STORE: + return lowerStoreF16(Op, DAG); case ISD::IS_FPCLASS: return lowerIS_FPCLASS(Op, DAG); case ISD::GET_ROUNDING: diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 4763af75552da..241acdea77c5c 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -720,6 +720,8 @@ class SystemZTargetLowering : public TargetLowering { SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LDST_I128(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, unsigned Opcode) const; @@ -743,6 +745,10 @@ class SystemZTargetLowering : public TargetLowering { SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; SDValue lowerFSHL(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFSHR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerLoadF16(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerStoreF16(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index bef38b9cb809b..b258ab49cc1e2 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -36,6 +36,8 @@ defm CondStoreF64 : CondStores; def LZER : InherentRRE<"lzer", 0xB374, FP32, fpimm0>; def LZDR : InherentRRE<"lzdr", 0xB375, FP64, fpimm0>; def LZXR : InherentRRE<"lzxr", 0xB376, FP128, fpimm0>; @@ -47,8 +49,11 @@ let isMoveReg = 1 in { def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>; def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>; // For z13 we prefer LDR over LER to avoid partial register dependencies. - let isCodeGenOnly = 1 in - def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>; + let isCodeGenOnly = 1 in { + def LER16 : UnaryRR <"ler", 0x38, null_frag, FP16, FP16>; + def LDR16 : UnaryRR<"ldr", 0x28, null_frag, FP16, FP16>; + def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>; + } } @@ -333,8 +338,10 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { } // Generic form, which does not set CC. def LCDFR : UnaryRRE<"lcdfr", 0xB373, fneg, FP64, FP64>; -let isCodeGenOnly = 1 in +let isCodeGenOnly = 1 in { + def LCDFR_16 : UnaryRRE<"lcdfr", 0xB373, fneg, FP16, FP16>; def LCDFR_32 : UnaryRRE<"lcdfr", 0xB373, fneg, FP32, FP32>; +} // Absolute value (Load Positive). let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { @@ -600,6 +607,7 @@ let hasSideEffects = 1 in { // Peepholes //===----------------------------------------------------------------------===// +def : Pat<(f16 fpimmneg0), (LCDFR_16 (LZER_16))>; def : Pat<(f32 fpimmneg0), (LCDFR_32 (LZER))>; def : Pat<(f64 fpimmneg0), (LCDFR (LZDR))>; def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 91a4aa9c73010..faeec2ada9bf1 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -989,6 +989,8 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned Opcode; if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::LGR; + else if (SystemZ::FP16BitRegClass.contains(DestReg, SrcReg)) + Opcode = STI.hasVector() ? SystemZ::LDR16 : SystemZ::LER16; else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg)) // For z13 we prefer LDR over LER to avoid partial register dependencies. Opcode = STI.hasVector() ? SystemZ::LDR32 : SystemZ::LER; @@ -1022,8 +1024,31 @@ void SystemZInstrInfo::storeRegToStackSlot( bool isKill, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + // Without vector support, there are no fp16 load/store instructions, so + // need to save/restore via GPR. + if (RC == &SystemZ::FP16BitRegClass && !STI.hasVector()) { + assert(!MRI.isSSA() && MRI.getNumVirtRegs() && + "Expected non-SSA form with virtual registers."); + Register GR64Reg = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); + Register FP64Reg = MRI.createVirtualRegister(&SystemZ::FP64BitRegClass); + BuildMI(MBB, MBBI, DL, get(SystemZ::COPY)) + .addReg(FP64Reg, RegState::DefineNoRead, SystemZ::subreg_h16) + .addReg(SrcReg, getKillRegState(isKill)); + BuildMI(MBB, MBBI, DL, get(SystemZ::LGDR), GR64Reg) + .addReg(FP64Reg, RegState::Kill); + BuildMI(MBB, MBBI, DL, get(SystemZ::SRLG), GR64Reg) + .addReg(GR64Reg) + .addReg(0) + .addImm(48); + addFrameReference(BuildMI(MBB, MBBI, DL, get(SystemZ::STH)) + .addReg(GR64Reg, RegState::Kill, SystemZ::subreg_l32), + FrameIdx); + return; + } + // Callers may expect a single instruction, so keep 128-bit moves // together for now and lower them after register allocation. unsigned LoadOpcode, StoreOpcode; @@ -1037,8 +1062,31 @@ void SystemZInstrInfo::loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + // Without vector support, there are no fp16 load/store instructions, so + // need to save/restore via GPR. + if (RC == &SystemZ::FP16BitRegClass && !STI.hasVector()) { + assert(!MRI.isSSA() && MRI.getNumVirtRegs() && + "Expected non-SSA form with virtual registers."); + Register GR64Reg = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); + Register FP64Reg = MRI.createVirtualRegister(&SystemZ::FP64BitRegClass); + addFrameReference(BuildMI(MBB, MBBI, DL, get(SystemZ::LH)) + .addReg(GR64Reg, RegState::DefineNoRead, + SystemZ::subreg_l32), + FrameIdx); + BuildMI(MBB, MBBI, DL, get(SystemZ::SLLG), GR64Reg) + .addReg(GR64Reg) + .addReg(0) + .addImm(48); + BuildMI(MBB, MBBI, DL, get(SystemZ::LDGR), FP64Reg) + .addReg(GR64Reg, RegState::Kill); + BuildMI(MBB, MBBI, DL, get(SystemZ::COPY), DestReg) + .addReg(FP64Reg, RegState::Kill, SystemZ::subreg_h16); + return; + } + // Callers may expect a single instruction, so keep 128-bit moves // together for now and lower them after register allocation. unsigned LoadOpcode, StoreOpcode; @@ -1909,6 +1957,10 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, } else if (RC == &SystemZ::FP128BitRegClass) { LoadOpcode = SystemZ::LX; StoreOpcode = SystemZ::STX; + } else if (RC == &SystemZ::FP16BitRegClass || + RC == &SystemZ::VR16BitRegClass) { + LoadOpcode = SystemZ::VL16; + StoreOpcode = SystemZ::VST16; } else if (RC == &SystemZ::VR32BitRegClass) { LoadOpcode = SystemZ::VL32; StoreOpcode = SystemZ::VST32; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index d8c48239ac633..7a240e7269516 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -143,6 +143,7 @@ let Predicates = [FeatureVector] in { // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. let mayLoad = 1, canFoldAsLoad = 1 in { + def VL16 : UnaryAliasVRX; def VL32 : UnaryAliasVRX; def VL64 : UnaryAliasVRX; } @@ -240,6 +241,7 @@ let Predicates = [FeatureVector] in { // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. let mayStore = 1 in { + def VST16 : StoreAliasVRX; def VST32 : StoreAliasVRX; def VST64 : StoreAliasVRX; } diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td index 8f9bb56f2eb3b..1dfe264b501b1 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -20,6 +20,7 @@ class SystemZRegWithSubregs subregs> } let Namespace = "SystemZ" in { +def subreg_h16 : SubRegIndex<16, 16>; def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_hl32. def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_hh32. def subreg_l64 : SubRegIndex<64, 0>; @@ -201,9 +202,16 @@ def F27Dwarf : DwarfMapping<81>; def F29Dwarf : DwarfMapping<82>; def F31Dwarf : DwarfMapping<83>; +// Upper 16 bits of one of the floating-point registers +class FPR16 num, string n> : SystemZReg { + let HWEncoding = num; +} + // Upper 32 bits of one of the floating-point registers -class FPR32 num, string n> : SystemZReg { +class FPR32 num, string n, FPR16 high> + : SystemZRegWithSubregs { let HWEncoding = num; + let SubRegIndices = [subreg_h16]; } // One of the floating-point registers. @@ -223,12 +231,14 @@ class FPR128 num, string n, FPR64 low, FPR64 high> // Floating-point registers. Registers 16-31 require the vector facility. foreach I = 0-15 in { - def F#I#S : FPR32; + def F#I#H : FPR16; + def F#I#S : FPR32("F"#I#"H")>; def F#I#D : FPR64("F"#I#"S")>, DwarfRegNum<[!cast("F"#I#"Dwarf").Id]>; } foreach I = 16-31 in { - def F#I#S : FPR32; + def F#I#H : FPR16; + def F#I#S : FPR32("F"#I#"H")>; def F#I#D : FPR64("F"#I#"S")>, DwarfRegNum<[!cast("F"#I#"Dwarf").Id]>; } @@ -240,6 +250,7 @@ foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { // There's no store-multiple instruction for FPRs, so we're not fussy // about the order in which call-saved registers are allocated. +defm FP16 : SystemZRegClass<"FP16", [f16], 16, (sequence "F%uH", 0, 15)>; defm FP32 : SystemZRegClass<"FP32", [f32], 32, (sequence "F%uS", 0, 15)>; defm FP64 : SystemZRegClass<"FP64", [f64], 64, (sequence "F%uD", 0, 15)>; defm FP128 : SystemZRegClass<"FP128", [f128], 128, @@ -262,6 +273,13 @@ foreach I = 0-31 in { DwarfRegNum<[!cast("F"#I#"Dwarf").Id]>; } +// Class used to store 16-bit fp values in the first element of a vector +// register. +defm VR16 : SystemZRegClass<"VR16", [f16], 16, + (add (sequence "F%uH", 0, 7), + (sequence "F%uH", 16, 31), + (sequence "F%uH", 8, 15))>; + // Class used to store 32-bit values in the first element of a vector // register. f32 scalars are used for the WLEDB and WLDEB instructions. defm VR32 : SystemZRegClass<"VR32", [f32, v4i8, v2i16], 32, @@ -298,6 +316,7 @@ class TypedReg { RegisterOperand op = opin; } +def v16hb : TypedReg; def v32f : TypedReg; def v32sb : TypedReg; def v64g : TypedReg; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td index 5f15348654c75..c059f3ececef6 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -773,12 +773,12 @@ def : InstRW<[], (instregex "Insn.*")>; //===----------------------------------------------------------------------===// // Load zero -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER|ER_16)$")>; def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; // Load -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER(16)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R16|R32|GR)$")>; def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; @@ -840,7 +840,7 @@ def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; // Load Complement / Negative / Positive def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32|_16)?$")>; def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; // Square root @@ -1191,7 +1191,7 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(16|32|64)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], @@ -1205,7 +1205,7 @@ def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], // Vector: Stores //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|16|32|64)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td index 336bbe2483340..bd3d6678fc6a8 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -793,12 +793,12 @@ def : InstRW<[], (instregex "Insn.*")>; //===----------------------------------------------------------------------===// // Load zero -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER|ER_16)$")>; def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; // Load -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER(16)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R16|R32|GR)$")>; def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; @@ -860,7 +860,7 @@ def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; // Load Complement / Negative / Positive def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32|_16)?$")>; def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; // Square root @@ -1209,7 +1209,7 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(16|32|64)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], @@ -1224,7 +1224,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; // Vector: Stores //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|16|32|64)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td index 5f2a04c298a25..4d8e1d9f78dc7 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td @@ -811,12 +811,12 @@ def : InstRW<[], (instregex "Insn.*")>; //===----------------------------------------------------------------------===// // Load zero -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER|ER_16)$")>; def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; // Load -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER(16)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R16|R32|GR)$")>; def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; @@ -878,7 +878,7 @@ def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; // Load Complement / Negative / Positive def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32|_16)?$")>; def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; // Square root @@ -1231,7 +1231,7 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(16|32|64)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], @@ -1246,7 +1246,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; // Vector: Stores //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|16|32|64)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td index 83e980940d758..7791472efbcfb 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td @@ -812,12 +812,12 @@ def : InstRW<[], (instregex "Insn.*")>; //===----------------------------------------------------------------------===// // Load zero -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER|ER_16)$")>; def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; // Load -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER(16)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R16|R32|GR)$")>; def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; @@ -879,7 +879,7 @@ def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; // Load Complement / Negative / Positive def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32|_16)?$")>; def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; // Square root @@ -1237,7 +1237,7 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(16|32|64)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], @@ -1252,7 +1252,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; // Vector: Stores //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|16|32|64)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index f8397921bf684..e67c207833f4d 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -705,12 +705,12 @@ def : InstRW<[], (instregex "Insn.*")>; //===----------------------------------------------------------------------===// // Load zero -def : InstRW<[WLat1, FXU, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LZ(DR|ER|ER_16)$")>; def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LZXR$")>; // Load -def : InstRW<[WLat1, FXU, NormalGr], (instregex "LER$")>; -def : InstRW<[WLat1, FXU, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LER(16)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LD(R|R16|R32|GR)$")>; def : InstRW<[WLat3, FXU, NormalGr], (instregex "LGDR$")>; def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LXR$")>; @@ -771,7 +771,7 @@ def : InstRW<[WLat12, WLat12, FXU, FPU2, GroupAlone], (instregex "CL(F|G)XBR$")> // Load Complement / Negative / Positive def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; -def : InstRW<[WLat1, FXU, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "L(C|N|P)DFR(_32|_16)?$")>; def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "L(C|N|P)XBR$")>; // Square root diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index 039c8146618fe..465cf8ae392c6 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -743,12 +743,12 @@ def : InstRW<[], (instregex "Insn.*")>; //===----------------------------------------------------------------------===// // Load zero -def : InstRW<[WLat1, FXU, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LZ(DR|ER|ER_16)$")>; def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LZXR$")>; // Load -def : InstRW<[WLat1, FXU, NormalGr], (instregex "LER$")>; -def : InstRW<[WLat1, FXU, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LER(16)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "LD(R|R16|R32|GR)$")>; def : InstRW<[WLat3, FXU, NormalGr], (instregex "LGDR$")>; def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LXR$")>; @@ -809,7 +809,7 @@ def : InstRW<[WLat12, WLat12, FXU, FPU2, GroupAlone], (instregex "CL(F|G)XBR$")> // Load Complement / Negative / Positive def : InstRW<[WLat7, WLat7, FPU, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; -def : InstRW<[WLat1, FXU, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "L(C|N|P)DFR(_32|_16)?$")>; def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "L(C|N|P)XBR$")>; // Square root diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-10.ll b/llvm/test/CodeGen/SystemZ/atomic-load-10.ll new file mode 100644 index 0000000000000..e30f9791b51e0 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomic-load-10.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Test fp16 atomic loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs -mcpu=z16 | FileCheck %s -check-prefix=VECTOR + +define half @f1(ptr %src) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: lh %r0, 0(%r2) +; CHECK-NEXT: sllg %r0, %r0, 48 +; CHECK-NEXT: ldgr %f0, %r0 +; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d +; CHECK-NEXT: br %r14 +; +; VECTOR-LABEL: f1: +; VECTOR: # %bb.0: +; VECTOR-NEXT: vlreph %v0, 0(%r2) +; VECTOR-NEXT: br %r14 + %val = load atomic half, ptr %src seq_cst, align 2 + ret half %val +} diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-10.ll b/llvm/test/CodeGen/SystemZ/atomic-store-10.ll new file mode 100644 index 0000000000000..3f228d58dcd8c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomic-store-10.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Test half atomic stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs -mcpu=z16 | FileCheck %s -check-prefix=VECTOR + +define void @f1(ptr %src, half %val) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d +; CHECK-NEXT: lgdr %r0, %f0 +; CHECK-NEXT: srlg %r0, %r0, 48 +; CHECK-NEXT: sth %r0, 0(%r2) +; CHECK-NEXT: bcr 15, %r0 +; CHECK-NEXT: br %r14 +; +; VECTOR-LABEL: f1: +; VECTOR: # %bb.0: +; VECTOR-NEXT: vsteh %v0, 0(%r2), 0 +; VECTOR-NEXT: bcr 14, %r0 +; VECTOR-NEXT: br %r14 + store atomic half %val, ptr %src seq_cst, align 2 + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-half-libcall.ll b/llvm/test/CodeGen/SystemZ/fp-half-libcall.ll new file mode 100644 index 0000000000000..6e813a4a5094d --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-half-libcall.ll @@ -0,0 +1,312 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Test that library calls are emitted for LLVM IR intrinsics +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define half @f1(half %x, i16 %y) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r13, %r15, 104(%r15) +; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: lhr %r13, %r2 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: llgfr %r2, %r13 +; CHECK-NEXT: brasl %r14, __powisf2@PLT +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: lmg %r13, %r15, 264(%r15) +; CHECK-NEXT: br %r14 + %tmp = call half @llvm.powi.f16.i16(half %x, i16 %y) + ret half %tmp +} + +define half @f2(half %x, half %y) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -176 +; CHECK-NEXT: .cfi_def_cfa_offset 336 +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -168 +; CHECK-NEXT: .cfi_offset %f9, -176 +; CHECK-NEXT: ler %f8, %f2 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f9, %f0 +; CHECK-NEXT: ler %f0, %f8 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f2, %f0 +; CHECK-NEXT: ler %f0, %f9 +; CHECK-NEXT: brasl %r14, powf@PLT +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r14, %r15, 288(%r15) +; CHECK-NEXT: br %r14 + %tmp = call half @llvm.pow.f16(half %x, half %y) + ret half %tmp +} + +define half @f3(half %x) { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: brasl %r14, sinf@PLT +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %tmp = call half @llvm.sin.f16(half %x) + ret half %tmp +} + +define half @f4(half %x) { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: brasl %r14, cosf@PLT +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %tmp = call half @llvm.cos.f16(half %x) + ret half %tmp +} + +define half @f5(half %x) { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: brasl %r14, expf@PLT +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %tmp = call half @llvm.exp.f16(half %x) + ret half %tmp +} + +define half @f6(half %x) { +; CHECK-LABEL: f6: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: brasl %r14, exp2f@PLT +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %tmp = call half @llvm.exp2.f16(half %x) + ret half %tmp +} + +define half @f7(half %x) { +; CHECK-LABEL: f7: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: brasl %r14, logf@PLT +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %tmp = call half @llvm.log.f16(half %x) + ret half %tmp +} + +define half @f8(half %x) { +; CHECK-LABEL: f8: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: brasl %r14, log2f@PLT +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %tmp = call half @llvm.log2.f16(half %x) + ret half %tmp +} + +define half @f9(half %x) { +; CHECK-LABEL: f9: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: brasl %r14, log10f@PLT +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %tmp = call half @llvm.log10.f16(half %x) + ret half %tmp +} + +define half @f10(half %x, half %y) { +; CHECK-LABEL: f10: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -176 +; CHECK-NEXT: .cfi_def_cfa_offset 336 +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -168 +; CHECK-NEXT: .cfi_offset %f9, -176 +; CHECK-NEXT: ler %f8, %f2 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f9, %f0 +; CHECK-NEXT: ler %f0, %f8 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f2, %f0 +; CHECK-NEXT: ler %f0, %f9 +; CHECK-NEXT: brasl %r14, fminf@PLT +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r14, %r15, 288(%r15) +; CHECK-NEXT: br %r14 + %tmp = call half @llvm.minnum.f16(half %x, half %y) + ret half %tmp +} + +define half @f11(half %x, half %y) { +; CHECK-LABEL: f11: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -176 +; CHECK-NEXT: .cfi_def_cfa_offset 336 +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -168 +; CHECK-NEXT: .cfi_offset %f9, -176 +; CHECK-NEXT: ler %f8, %f2 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f9, %f0 +; CHECK-NEXT: ler %f0, %f8 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f2, %f0 +; CHECK-NEXT: ler %f0, %f9 +; CHECK-NEXT: brasl %r14, fmaxf@PLT +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r14, %r15, 288(%r15) +; CHECK-NEXT: br %r14 + %tmp = call half @llvm.maxnum.f16(half %x, half %y) + ret half %tmp +} + +; Verify that "nnan" minnum/maxnum calls are transformed to +; compare+select sequences instead of libcalls. +define half @f12(half %x, half %y) { +; CHECK-LABEL: f12: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -176 +; CHECK-NEXT: .cfi_def_cfa_offset 336 +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -168 +; CHECK-NEXT: .cfi_offset %f9, -176 +; CHECK-NEXT: ler %f9, %f0 +; CHECK-NEXT: ler %f0, %f2 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f8, %f0 +; CHECK-NEXT: ler %f0, %f9 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cebr %f0, %f8 +; CHECK-NEXT: jl .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ler %f0, %f8 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r14, %r15, 288(%r15) +; CHECK-NEXT: br %r14 + %tmp = call nnan half @llvm.minnum.f16(half %x, half %y) + ret half %tmp +} + +define half @f13(half %x, half %y) { +; CHECK-LABEL: f13: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -176 +; CHECK-NEXT: .cfi_def_cfa_offset 336 +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -168 +; CHECK-NEXT: .cfi_offset %f9, -176 +; CHECK-NEXT: ler %f9, %f0 +; CHECK-NEXT: ler %f0, %f2 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f8, %f0 +; CHECK-NEXT: ler %f0, %f9 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cebr %f0, %f8 +; CHECK-NEXT: jh .LBB12_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ler %f0, %f8 +; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r14, %r15, 288(%r15) +; CHECK-NEXT: br %r14 + %tmp = call nnan half @llvm.maxnum.f16(half %x, half %y) + ret half %tmp +} + +declare half @llvm.powi.f16.i16(half, i16) +declare half @llvm.pow.f16(half, half) + +declare half @llvm.sin.f16(half) +declare half @llvm.cos.f16(half) + +declare half @llvm.exp.f16(half) +declare half @llvm.exp2.f16(half) + +declare half @llvm.log.f16(half) +declare half @llvm.log2.f16(half) +declare half @llvm.log10.f16(half) + +declare half @llvm.minnum.f16(half, half) +declare half @llvm.maxnum.f16(half, half) diff --git a/llvm/test/CodeGen/SystemZ/fp-half-strict.ll b/llvm/test/CodeGen/SystemZ/fp-half-strict.ll new file mode 100644 index 0000000000000..42663b109d7a9 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-half-strict.ll @@ -0,0 +1,209 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=NOVEC +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=VECTOR +; +; Tests for strict 16-bit floating point (half). + +declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata) +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata) + +; Test register addition. +define half @fun0(half %f1, half %f2) #0 { +; NOVEC-LABEL: fun0: +; NOVEC: # %bb.0: +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -176 +; NOVEC-NEXT: .cfi_def_cfa_offset 336 +; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: .cfi_offset %f8, -168 +; NOVEC-NEXT: .cfi_offset %f9, -176 +; NOVEC-NEXT: ler %f8, %f0 +; NOVEC-NEXT: ler %f0, %f2 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: ler %f9, %f0 +; NOVEC-NEXT: ler %f0, %f8 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f9 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: lmg %r14, %r15, 288(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun0: +; VECTOR: # %bb.0: +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -176 +; VECTOR-NEXT: .cfi_def_cfa_offset 336 +; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: .cfi_offset %f8, -168 +; VECTOR-NEXT: .cfi_offset %f9, -176 +; VECTOR-NEXT: ldr %f8, %f0 +; VECTOR-NEXT: ldr %f0, %f2 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: ldr %f9, %f0 +; VECTOR-NEXT: ldr %f0, %f8 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f9 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: lmg %r14, %r15, 288(%r15) +; VECTOR-NEXT: br %r14 + %res = call half @llvm.experimental.constrained.fadd.f16( + half %f1, half %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %res +} + +; Test atomic memory accesses and extension/truncation inside a strictfp +; function. +define void @fun1(ptr %Src, ptr %Dst) #0 { +; NOVEC-LABEL: fun1: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r13, %r15, 104(%r15) +; NOVEC-NEXT: .cfi_offset %r13, -56 +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -160 +; NOVEC-NEXT: .cfi_def_cfa_offset 320 +; NOVEC-NEXT: lh %r0, 0(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgr %r13, %r3 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: brasl %r14, __extendhfdf2@PLT +; NOVEC-NEXT: adbr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncdfhf2@PLT +; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 0(%r13) +; NOVEC-NEXT: bcr 14, %r0 +; NOVEC-NEXT: lmg %r13, %r15, 264(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun1: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r13, %r15, 104(%r15) +; VECTOR-NEXT: .cfi_offset %r13, -56 +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -160 +; VECTOR-NEXT: .cfi_def_cfa_offset 320 +; VECTOR-NEXT: vlreph %v0, 0(%r2) +; VECTOR-NEXT: lgr %r13, %r3 +; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT +; VECTOR-NEXT: adbr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT +; VECTOR-NEXT: vsteh %v0, 0(%r13), 0 +; VECTOR-NEXT: bcr 14, %r0 +; VECTOR-NEXT: lmg %r13, %r15, 264(%r15) +; VECTOR-NEXT: br %r14 +entry: + %Op0 = load atomic half, ptr %Src seq_cst, align 2 + %E0 = fpext half %Op0 to double + %Add = call double @llvm.experimental.constrained.fadd.f64( + double %E0, double %E0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %Res = fptrunc double %Add to half + store atomic half %Res, ptr %Dst seq_cst, align 2 + ret void +} + +; Test a chain of half operations which should have each operation surrounded +; by conversions to/from fp32 for proper emulation. +define half @fun2(half %Op0, half %Op1, half %Op2) #0 { +; NOVEC-LABEL: fun2: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -184 +; NOVEC-NEXT: .cfi_def_cfa_offset 344 +; NOVEC-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: .cfi_offset %f8, -168 +; NOVEC-NEXT: .cfi_offset %f9, -176 +; NOVEC-NEXT: .cfi_offset %f10, -184 +; NOVEC-NEXT: ler %f9, %f0 +; NOVEC-NEXT: ler %f0, %f2 +; NOVEC-NEXT: ler %f8, %f4 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: ler %f10, %f0 +; NOVEC-NEXT: ler %f0, %f9 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: meebr %f0, %f10 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: ler %f9, %f0 +; NOVEC-NEXT: ler %f0, %f8 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: meebr %f0, %f9 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: lmg %r14, %r15, 296(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun2: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -184 +; VECTOR-NEXT: .cfi_def_cfa_offset 344 +; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: .cfi_offset %f8, -168 +; VECTOR-NEXT: .cfi_offset %f9, -176 +; VECTOR-NEXT: .cfi_offset %f10, -184 +; VECTOR-NEXT: ldr %f9, %f0 +; VECTOR-NEXT: ldr %f0, %f2 +; VECTOR-NEXT: ldr %f8, %f4 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: ldr %f10, %f0 +; VECTOR-NEXT: ldr %f0, %f9 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: meebr %f0, %f10 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: ldr %f9, %f0 +; VECTOR-NEXT: ldr %f0, %f8 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: wfmsb %f0, %f9, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: lmg %r14, %r15, 296(%r15) +; VECTOR-NEXT: br %r14 +entry: + %A0 = call half @llvm.experimental.constrained.fmul.f16( + half %Op0, half %Op1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + %Res = call half @llvm.experimental.constrained.fmul.f16( + half %A0, half %Op2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %Res +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/SystemZ/fp-half-vector.ll b/llvm/test/CodeGen/SystemZ/fp-half-vector.ll new file mode 100644 index 0000000000000..cc3f61f998649 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-half-vector.ll @@ -0,0 +1,797 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=NOVEC +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=VECTOR + +; Add the <8 x half> argument with itself and return it. +define <8 x half> @fun0(<8 x half> %Op) { +; NOVEC-LABEL: fun0: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r13, %r15, 104(%r15) +; NOVEC-NEXT: .cfi_offset %r13, -56 +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -224 +; NOVEC-NEXT: .cfi_def_cfa_offset 384 +; NOVEC-NEXT: std %f8, 216(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f9, 208(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f10, 200(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f11, 192(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f12, 184(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f13, 176(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f14, 168(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f15, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: .cfi_offset %f8, -168 +; NOVEC-NEXT: .cfi_offset %f9, -176 +; NOVEC-NEXT: .cfi_offset %f10, -184 +; NOVEC-NEXT: .cfi_offset %f11, -192 +; NOVEC-NEXT: .cfi_offset %f12, -200 +; NOVEC-NEXT: .cfi_offset %f13, -208 +; NOVEC-NEXT: .cfi_offset %f14, -216 +; NOVEC-NEXT: .cfi_offset %f15, -224 +; NOVEC-NEXT: lh %r0, 414(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f15, %r0 +; NOVEC-NEXT: lh %r0, 406(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f12, %r0 +; NOVEC-NEXT: lh %r0, 398(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f9, %r0 +; NOVEC-NEXT: lh %r0, 390(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ler %f10, %f6 +; NOVEC-NEXT: ler %f11, %f4 +; NOVEC-NEXT: ler %f13, %f2 +; NOVEC-NEXT: ler %f14, %f0 +; NOVEC-NEXT: lgr %r13, %r2 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ler %f8, %f0 +; NOVEC-NEXT: ler %f0, %f9 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ler %f9, %f0 +; NOVEC-NEXT: ler %f0, %f12 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ler %f12, %f0 +; NOVEC-NEXT: ler %f0, %f15 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ler %f15, %f0 +; NOVEC-NEXT: ler %f0, %f14 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ler %f14, %f0 +; NOVEC-NEXT: ler %f0, %f13 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ler %f13, %f0 +; NOVEC-NEXT: ler %f0, %f11 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ler %f11, %f0 +; NOVEC-NEXT: ler %f0, %f10 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 6(%r13) +; NOVEC-NEXT: lgdr %r0, %f11 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 4(%r13) +; NOVEC-NEXT: lgdr %r0, %f13 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 2(%r13) +; NOVEC-NEXT: lgdr %r0, %f14 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 0(%r13) +; NOVEC-NEXT: lgdr %r0, %f15 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 14(%r13) +; NOVEC-NEXT: lgdr %r0, %f12 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 12(%r13) +; NOVEC-NEXT: lgdr %r0, %f9 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 10(%r13) +; NOVEC-NEXT: lgdr %r0, %f8 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 8(%r13) +; NOVEC-NEXT: ld %f8, 216(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f9, 208(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f10, 200(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f11, 192(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f12, 184(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f13, 176(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f14, 168(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f15, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: lmg %r13, %r15, 328(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun0: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r13, %r15, 104(%r15) +; VECTOR-NEXT: .cfi_offset %r13, -56 +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -224 +; VECTOR-NEXT: .cfi_def_cfa_offset 384 +; VECTOR-NEXT: std %f8, 216(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f9, 208(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f10, 200(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f11, 192(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f12, 184(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f13, 176(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f14, 168(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f15, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: .cfi_offset %f8, -168 +; VECTOR-NEXT: .cfi_offset %f9, -176 +; VECTOR-NEXT: .cfi_offset %f10, -184 +; VECTOR-NEXT: .cfi_offset %f11, -192 +; VECTOR-NEXT: .cfi_offset %f12, -200 +; VECTOR-NEXT: .cfi_offset %f13, -208 +; VECTOR-NEXT: .cfi_offset %f14, -216 +; VECTOR-NEXT: .cfi_offset %f15, -224 +; VECTOR-NEXT: vlreph %v11, 414(%r15) +; VECTOR-NEXT: vlreph %v12, 406(%r15) +; VECTOR-NEXT: vlreph %v13, 398(%r15) +; VECTOR-NEXT: vlreph %v14, 390(%r15) +; VECTOR-NEXT: ldr %f8, %f6 +; VECTOR-NEXT: ldr %f9, %f4 +; VECTOR-NEXT: ldr %f10, %f2 +; VECTOR-NEXT: lgr %r13, %r2 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ldr %f15, %f0 +; VECTOR-NEXT: ldr %f0, %f10 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ldr %f10, %f0 +; VECTOR-NEXT: ldr %f0, %f9 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ldr %f9, %f0 +; VECTOR-NEXT: ldr %f0, %f8 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ldr %f8, %f0 +; VECTOR-NEXT: ldr %f0, %f14 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ldr %f14, %f0 +; VECTOR-NEXT: ldr %f0, %f13 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ldr %f13, %f0 +; VECTOR-NEXT: ldr %f0, %f12 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ldr %f12, %f0 +; VECTOR-NEXT: ldr %f0, %f11 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: vsteh %v0, 14(%r13), 0 +; VECTOR-NEXT: vsteh %v12, 12(%r13), 0 +; VECTOR-NEXT: vsteh %v13, 10(%r13), 0 +; VECTOR-NEXT: vsteh %v14, 8(%r13), 0 +; VECTOR-NEXT: vsteh %v8, 6(%r13), 0 +; VECTOR-NEXT: vsteh %v9, 4(%r13), 0 +; VECTOR-NEXT: vsteh %v10, 2(%r13), 0 +; VECTOR-NEXT: vsteh %v15, 0(%r13), 0 +; VECTOR-NEXT: ld %f8, 216(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f9, 208(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f10, 200(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f11, 192(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f12, 184(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f13, 176(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f14, 168(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f15, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: lmg %r13, %r15, 328(%r15) +; VECTOR-NEXT: br %r14 +entry: + %Res = fadd <8 x half> %Op, %Op + ret <8 x half> %Res +} + +; Same, but with partial vector values. +define <4 x half> @fun1(<4 x half> %Op) { +; NOVEC-LABEL: fun1: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -192 +; NOVEC-NEXT: .cfi_def_cfa_offset 352 +; NOVEC-NEXT: std %f8, 184(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f9, 176(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f10, 168(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f11, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: .cfi_offset %f8, -168 +; NOVEC-NEXT: .cfi_offset %f9, -176 +; NOVEC-NEXT: .cfi_offset %f10, -184 +; NOVEC-NEXT: .cfi_offset %f11, -192 +; NOVEC-NEXT: ler %f8, %f6 +; NOVEC-NEXT: ler %f9, %f4 +; NOVEC-NEXT: ler %f10, %f2 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ler %f11, %f0 +; NOVEC-NEXT: ler %f0, %f10 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ler %f10, %f0 +; NOVEC-NEXT: ler %f0, %f9 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ler %f9, %f0 +; NOVEC-NEXT: ler %f0, %f8 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ler %f6, %f0 +; NOVEC-NEXT: ler %f0, %f11 +; NOVEC-NEXT: ler %f2, %f10 +; NOVEC-NEXT: ler %f4, %f9 +; NOVEC-NEXT: ld %f8, 184(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f9, 176(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f10, 168(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f11, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: lmg %r14, %r15, 304(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun1: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -192 +; VECTOR-NEXT: .cfi_def_cfa_offset 352 +; VECTOR-NEXT: std %f8, 184(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f9, 176(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f10, 168(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f11, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: .cfi_offset %f8, -168 +; VECTOR-NEXT: .cfi_offset %f9, -176 +; VECTOR-NEXT: .cfi_offset %f10, -184 +; VECTOR-NEXT: .cfi_offset %f11, -192 +; VECTOR-NEXT: ldr %f8, %f6 +; VECTOR-NEXT: ldr %f9, %f4 +; VECTOR-NEXT: ldr %f10, %f2 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ldr %f11, %f0 +; VECTOR-NEXT: ldr %f0, %f10 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ldr %f10, %f0 +; VECTOR-NEXT: ldr %f0, %f9 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ldr %f9, %f0 +; VECTOR-NEXT: ldr %f0, %f8 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ldr %f6, %f0 +; VECTOR-NEXT: ldr %f0, %f11 +; VECTOR-NEXT: ldr %f2, %f10 +; VECTOR-NEXT: ldr %f4, %f9 +; VECTOR-NEXT: ld %f8, 184(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f9, 176(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f10, 168(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f11, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: lmg %r14, %r15, 304(%r15) +; VECTOR-NEXT: br %r14 +entry: + %Res = fadd <4 x half> %Op, %Op + ret <4 x half> %Res +} + +; Test a vector extension. +define <2 x half> @fun2(<2 x half> %Op) { +; NOVEC-LABEL: fun2: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -176 +; NOVEC-NEXT: .cfi_def_cfa_offset 336 +; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: .cfi_offset %f8, -168 +; NOVEC-NEXT: .cfi_offset %f9, -176 +; NOVEC-NEXT: ler %f8, %f2 +; NOVEC-NEXT: brasl %r14, __extendhfdf2@PLT +; NOVEC-NEXT: ldr %f9, %f0 +; NOVEC-NEXT: ler %f0, %f8 +; NOVEC-NEXT: brasl %r14, __extendhfdf2@PLT +; NOVEC-NEXT: adbr %f9, %f9 +; NOVEC-NEXT: ldr %f8, %f0 +; NOVEC-NEXT: adbr %f8, %f0 +; NOVEC-NEXT: ldr %f0, %f9 +; NOVEC-NEXT: brasl %r14, __truncdfhf2@PLT +; NOVEC-NEXT: ler %f9, %f0 +; NOVEC-NEXT: ldr %f0, %f8 +; NOVEC-NEXT: brasl %r14, __truncdfhf2@PLT +; NOVEC-NEXT: ler %f2, %f0 +; NOVEC-NEXT: ler %f0, %f9 +; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: lmg %r14, %r15, 288(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun2: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -184 +; VECTOR-NEXT: .cfi_def_cfa_offset 344 +; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: .cfi_offset %f8, -168 +; VECTOR-NEXT: ldr %f8, %f0 +; VECTOR-NEXT: ldr %f0, %f2 +; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT +; VECTOR-NEXT: # kill: def $f0d killed $f0d def $v0 +; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; VECTOR-NEXT: ldr %f0, %f8 +; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT +; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; VECTOR-NEXT: # kill: def $f0d killed $f0d def $v0 +; VECTOR-NEXT: vmrhg %v0, %v0, %v1 +; VECTOR-NEXT: vfadb %v0, %v0, %v0 +; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0 +; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT +; VECTOR-NEXT: ldr %f8, %f0 +; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload +; VECTOR-NEXT: vrepg %v0, %v0, 1 +; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0 +; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT +; VECTOR-NEXT: ldr %f2, %f0 +; VECTOR-NEXT: ldr %f0, %f8 +; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: lmg %r14, %r15, 296(%r15) +; VECTOR-NEXT: br %r14 +entry: + %E = fpext <2 x half> %Op to <2 x double> + %Add = fadd <2 x double> %E, %E + %Res = fptrunc <2 x double> %Add to <2 x half> + ret <2 x half> %Res +} + +; Load and store an <8 x half> vector. +define void @fun3(ptr %Src, ptr %Dst) { +; NOVEC-LABEL: fun3: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: lh %r0, 0(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: lh %r0, 2(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f1, %r0 +; NOVEC-NEXT: lh %r0, 4(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f2, %r0 +; NOVEC-NEXT: lh %r0, 6(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f3, %r0 +; NOVEC-NEXT: lh %r0, 8(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f4, %r0 +; NOVEC-NEXT: lh %r0, 10(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f5, %r0 +; NOVEC-NEXT: lh %r0, 12(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f6, %r0 +; NOVEC-NEXT: lh %r0, 14(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f7, %r0 +; NOVEC-NEXT: lgdr %r0, %f7 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 14(%r3) +; NOVEC-NEXT: lgdr %r0, %f6 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 12(%r3) +; NOVEC-NEXT: lgdr %r0, %f5 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 10(%r3) +; NOVEC-NEXT: lgdr %r0, %f4 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 8(%r3) +; NOVEC-NEXT: lgdr %r0, %f3 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 6(%r3) +; NOVEC-NEXT: lgdr %r0, %f2 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 4(%r3) +; NOVEC-NEXT: lgdr %r0, %f1 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 2(%r3) +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 0(%r3) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun3: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: vlreph %v0, 0(%r2) +; VECTOR-NEXT: vlreph %v1, 2(%r2) +; VECTOR-NEXT: vlreph %v2, 4(%r2) +; VECTOR-NEXT: vlreph %v3, 6(%r2) +; VECTOR-NEXT: vlreph %v4, 8(%r2) +; VECTOR-NEXT: vlreph %v5, 10(%r2) +; VECTOR-NEXT: vlreph %v6, 12(%r2) +; VECTOR-NEXT: vlreph %v7, 14(%r2) +; VECTOR-NEXT: vsteh %v7, 14(%r3), 0 +; VECTOR-NEXT: vsteh %v6, 12(%r3), 0 +; VECTOR-NEXT: vsteh %v5, 10(%r3), 0 +; VECTOR-NEXT: vsteh %v4, 8(%r3), 0 +; VECTOR-NEXT: vsteh %v3, 6(%r3), 0 +; VECTOR-NEXT: vsteh %v2, 4(%r3), 0 +; VECTOR-NEXT: vsteh %v1, 2(%r3), 0 +; VECTOR-NEXT: vsteh %v0, 0(%r3), 0 +; VECTOR-NEXT: br %r14 +entry: + %L = load <8 x half>, ptr %Src + store <8 x half> %L, ptr %Dst + ret void +} + +; Call a function with <8 x half> argument and return values. +declare <8 x half> @foo(<8 x half>) +define void @fun4(ptr %Src, ptr %Dst) { +; NOVEC-LABEL: fun4: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r13, %r15, 104(%r15) +; NOVEC-NEXT: .cfi_offset %r13, -56 +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -208 +; NOVEC-NEXT: .cfi_def_cfa_offset 368 +; NOVEC-NEXT: lh %r0, 0(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: lh %r0, 2(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f2, %r0 +; NOVEC-NEXT: # kill: def $f2h killed $f2h killed $f2d +; NOVEC-NEXT: lh %r0, 4(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f4, %r0 +; NOVEC-NEXT: # kill: def $f4h killed $f4h killed $f4d +; NOVEC-NEXT: lh %r0, 6(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f6, %r0 +; NOVEC-NEXT: # kill: def $f6h killed $f6h killed $f6d +; NOVEC-NEXT: lh %r0, 8(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f1, %r0 +; NOVEC-NEXT: lh %r0, 10(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f3, %r0 +; NOVEC-NEXT: lh %r0, 12(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f5, %r0 +; NOVEC-NEXT: lh %r0, 14(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f7, %r0 +; NOVEC-NEXT: lgdr %r0, %f7 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 190(%r15) +; NOVEC-NEXT: lgdr %r0, %f5 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 182(%r15) +; NOVEC-NEXT: lgdr %r0, %f3 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 174(%r15) +; NOVEC-NEXT: lgdr %r0, %f1 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: la %r2, 192(%r15) +; NOVEC-NEXT: lgr %r13, %r3 +; NOVEC-NEXT: sth %r0, 166(%r15) +; NOVEC-NEXT: brasl %r14, foo@PLT +; NOVEC-NEXT: lh %r0, 192(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: lh %r0, 194(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f1, %r0 +; NOVEC-NEXT: lh %r0, 196(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f2, %r0 +; NOVEC-NEXT: lh %r0, 198(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f3, %r0 +; NOVEC-NEXT: lh %r0, 200(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f4, %r0 +; NOVEC-NEXT: lh %r0, 202(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f5, %r0 +; NOVEC-NEXT: lh %r0, 204(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f6, %r0 +; NOVEC-NEXT: lh %r0, 206(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f7, %r0 +; NOVEC-NEXT: lgdr %r0, %f7 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 14(%r13) +; NOVEC-NEXT: lgdr %r0, %f6 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 12(%r13) +; NOVEC-NEXT: lgdr %r0, %f5 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 10(%r13) +; NOVEC-NEXT: lgdr %r0, %f4 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 8(%r13) +; NOVEC-NEXT: lgdr %r0, %f3 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 6(%r13) +; NOVEC-NEXT: lgdr %r0, %f2 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 4(%r13) +; NOVEC-NEXT: lgdr %r0, %f1 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 2(%r13) +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 0(%r13) +; NOVEC-NEXT: lmg %r13, %r15, 312(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun4: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r13, %r15, 104(%r15) +; VECTOR-NEXT: .cfi_offset %r13, -56 +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -208 +; VECTOR-NEXT: .cfi_def_cfa_offset 368 +; VECTOR-NEXT: vlreph %v6, 6(%r2) +; VECTOR-NEXT: vlreph %v4, 4(%r2) +; VECTOR-NEXT: vlreph %v2, 2(%r2) +; VECTOR-NEXT: vlreph %v0, 0(%r2) +; VECTOR-NEXT: vlreph %v1, 8(%r2) +; VECTOR-NEXT: vlreph %v3, 10(%r2) +; VECTOR-NEXT: vlreph %v5, 12(%r2) +; VECTOR-NEXT: vlreph %v7, 14(%r2) +; VECTOR-NEXT: la %r2, 192(%r15) +; VECTOR-NEXT: lgr %r13, %r3 +; VECTOR-NEXT: vsteh %v7, 190(%r15), 0 +; VECTOR-NEXT: vsteh %v5, 182(%r15), 0 +; VECTOR-NEXT: vsteh %v3, 174(%r15), 0 +; VECTOR-NEXT: vsteh %v1, 166(%r15), 0 +; VECTOR-NEXT: brasl %r14, foo@PLT +; VECTOR-NEXT: vlreph %v0, 192(%r15) +; VECTOR-NEXT: vlreph %v1, 194(%r15) +; VECTOR-NEXT: vlreph %v2, 196(%r15) +; VECTOR-NEXT: vlreph %v3, 198(%r15) +; VECTOR-NEXT: vlreph %v4, 200(%r15) +; VECTOR-NEXT: vlreph %v5, 202(%r15) +; VECTOR-NEXT: vlreph %v6, 204(%r15) +; VECTOR-NEXT: vlreph %v7, 206(%r15) +; VECTOR-NEXT: vsteh %v7, 14(%r13), 0 +; VECTOR-NEXT: vsteh %v6, 12(%r13), 0 +; VECTOR-NEXT: vsteh %v5, 10(%r13), 0 +; VECTOR-NEXT: vsteh %v4, 8(%r13), 0 +; VECTOR-NEXT: vsteh %v3, 6(%r13), 0 +; VECTOR-NEXT: vsteh %v2, 4(%r13), 0 +; VECTOR-NEXT: vsteh %v1, 2(%r13), 0 +; VECTOR-NEXT: vsteh %v0, 0(%r13), 0 +; VECTOR-NEXT: lmg %r13, %r15, 312(%r15) +; VECTOR-NEXT: br %r14 +entry: + %arg = load <8 x half>, ptr %Src + %Res = call <8 x half> @foo(<8 x half> %arg) + store <8 x half> %Res, ptr %Dst + ret void +} + +; Receive and pass argument fully on stack. +declare void @foo2(<4 x half> %dummy, <8 x half> %Arg5) +define void @fun5(<4 x half> %dummy, <8 x half> %Arg5) { +; NOVEC-LABEL: fun5: +; NOVEC: # %bb.0: +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -256 +; NOVEC-NEXT: .cfi_def_cfa_offset 416 +; NOVEC-NEXT: std %f8, 248(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f9, 240(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f10, 232(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f11, 224(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: .cfi_offset %f8, -168 +; NOVEC-NEXT: .cfi_offset %f9, -176 +; NOVEC-NEXT: .cfi_offset %f10, -184 +; NOVEC-NEXT: .cfi_offset %f11, -192 +; NOVEC-NEXT: lh %r0, 422(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f1, %r0 +; NOVEC-NEXT: lh %r0, 430(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f3, %r0 +; NOVEC-NEXT: lh %r0, 438(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f5, %r0 +; NOVEC-NEXT: lh %r0, 446(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f7, %r0 +; NOVEC-NEXT: lh %r0, 454(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f8, %r0 +; NOVEC-NEXT: lh %r0, 462(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f9, %r0 +; NOVEC-NEXT: lh %r0, 470(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f10, %r0 +; NOVEC-NEXT: lh %r0, 478(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f11, %r0 +; NOVEC-NEXT: lgdr %r0, %f11 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 222(%r15) +; NOVEC-NEXT: lgdr %r0, %f10 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 214(%r15) +; NOVEC-NEXT: lgdr %r0, %f9 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 206(%r15) +; NOVEC-NEXT: lgdr %r0, %f8 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 198(%r15) +; NOVEC-NEXT: lgdr %r0, %f7 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 190(%r15) +; NOVEC-NEXT: lgdr %r0, %f5 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 182(%r15) +; NOVEC-NEXT: lgdr %r0, %f3 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 174(%r15) +; NOVEC-NEXT: lgdr %r0, %f1 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 166(%r15) +; NOVEC-NEXT: brasl %r14, foo2@PLT +; NOVEC-NEXT: ld %f8, 248(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f9, 240(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f10, 232(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f11, 224(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: lmg %r14, %r15, 368(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun5: +; VECTOR: # %bb.0: +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -224 +; VECTOR-NEXT: .cfi_def_cfa_offset 384 +; VECTOR-NEXT: vlreph %v1, 390(%r15) +; VECTOR-NEXT: vlreph %v3, 398(%r15) +; VECTOR-NEXT: vlreph %v5, 406(%r15) +; VECTOR-NEXT: vlreph %v7, 414(%r15) +; VECTOR-NEXT: vlreph %v16, 422(%r15) +; VECTOR-NEXT: vlreph %v17, 430(%r15) +; VECTOR-NEXT: vlreph %v18, 438(%r15) +; VECTOR-NEXT: vlreph %v19, 446(%r15) +; VECTOR-NEXT: vsteh %v19, 222(%r15), 0 +; VECTOR-NEXT: vsteh %v18, 214(%r15), 0 +; VECTOR-NEXT: vsteh %v17, 206(%r15), 0 +; VECTOR-NEXT: vsteh %v16, 198(%r15), 0 +; VECTOR-NEXT: vsteh %v7, 190(%r15), 0 +; VECTOR-NEXT: vsteh %v5, 182(%r15), 0 +; VECTOR-NEXT: vsteh %v3, 174(%r15), 0 +; VECTOR-NEXT: vsteh %v1, 166(%r15), 0 +; VECTOR-NEXT: brasl %r14, foo2@PLT +; VECTOR-NEXT: lmg %r14, %r15, 336(%r15) +; VECTOR-NEXT: br %r14 + call void @foo2(<4 x half> %dummy, <8 x half> %Arg5) + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-half.ll b/llvm/test/CodeGen/SystemZ/fp-half.ll new file mode 100644 index 0000000000000..cd4aa12c2b4ef --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-half.ll @@ -0,0 +1,627 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=NOVEC +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=VECTOR +; +; Tests for 16-bit floating point (half). + +; Incoming half arguments added together and returned. +define half @fun0(half %Op0, half %Op1) { +; NOVEC-LABEL: fun0: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -176 +; NOVEC-NEXT: .cfi_def_cfa_offset 336 +; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: .cfi_offset %f8, -168 +; NOVEC-NEXT: .cfi_offset %f9, -176 +; NOVEC-NEXT: ler %f8, %f0 +; NOVEC-NEXT: ler %f0, %f2 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: ler %f9, %f0 +; NOVEC-NEXT: ler %f0, %f8 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f9 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: lmg %r14, %r15, 288(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun0: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -176 +; VECTOR-NEXT: .cfi_def_cfa_offset 336 +; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: .cfi_offset %f8, -168 +; VECTOR-NEXT: .cfi_offset %f9, -176 +; VECTOR-NEXT: ldr %f8, %f0 +; VECTOR-NEXT: ldr %f0, %f2 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: ldr %f9, %f0 +; VECTOR-NEXT: ldr %f0, %f8 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f9 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: lmg %r14, %r15, 288(%r15) +; VECTOR-NEXT: br %r14 +entry: + %Res = fadd half %Op0, %Op1 + ret half %Res +} + +define half @fun1(half %Op0, half %Op1) { +; NOVEC-LABEL: fun1: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -176 +; NOVEC-NEXT: .cfi_def_cfa_offset 336 +; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: .cfi_offset %f8, -168 +; NOVEC-NEXT: .cfi_offset %f9, -176 +; NOVEC-NEXT: ler %f8, %f2 +; NOVEC-NEXT: brasl %r14, __extendhfdf2@PLT +; NOVEC-NEXT: ldr %f9, %f0 +; NOVEC-NEXT: ler %f0, %f8 +; NOVEC-NEXT: brasl %r14, __extendhfdf2@PLT +; NOVEC-NEXT: adbr %f0, %f9 +; NOVEC-NEXT: brasl %r14, __truncdfhf2@PLT +; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: lmg %r14, %r15, 288(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun1: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -176 +; VECTOR-NEXT: .cfi_def_cfa_offset 336 +; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: .cfi_offset %f8, -168 +; VECTOR-NEXT: .cfi_offset %f9, -176 +; VECTOR-NEXT: ldr %f8, %f2 +; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT +; VECTOR-NEXT: ldr %f9, %f0 +; VECTOR-NEXT: ldr %f0, %f8 +; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT +; VECTOR-NEXT: wfadb %f0, %f9, %f0 +; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT +; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: lmg %r14, %r15, 288(%r15) +; VECTOR-NEXT: br %r14 +entry: + %E0 = fpext half %Op0 to double + %E1 = fpext half %Op1 to double + %Add = fadd double %E0, %E1 + %Res = fptrunc double %Add to half + ret half %Res +} + +define half @fun2(half %Op0, half %Op1) { +; NOVEC-LABEL: fun2: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -232 +; NOVEC-NEXT: .cfi_def_cfa_offset 392 +; NOVEC-NEXT: std %f8, 224(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f9, 216(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f11, 208(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: .cfi_offset %f8, -168 +; NOVEC-NEXT: .cfi_offset %f9, -176 +; NOVEC-NEXT: .cfi_offset %f11, -184 +; NOVEC-NEXT: la %r2, 160(%r15) +; NOVEC-NEXT: ler %f8, %f2 +; NOVEC-NEXT: brasl %r14, __extendhftf2@PLT +; NOVEC-NEXT: ld %f9, 160(%r15) +; NOVEC-NEXT: ld %f11, 168(%r15) +; NOVEC-NEXT: la %r2, 176(%r15) +; NOVEC-NEXT: ler %f0, %f8 +; NOVEC-NEXT: brasl %r14, __extendhftf2@PLT +; NOVEC-NEXT: ld %f0, 176(%r15) +; NOVEC-NEXT: ld %f2, 184(%r15) +; NOVEC-NEXT: la %r2, 192(%r15) +; NOVEC-NEXT: axbr %f0, %f9 +; NOVEC-NEXT: std %f0, 192(%r15) +; NOVEC-NEXT: std %f2, 200(%r15) +; NOVEC-NEXT: brasl %r14, __trunctfhf2@PLT +; NOVEC-NEXT: ld %f8, 224(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f9, 216(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f11, 208(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: lmg %r14, %r15, 344(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun2: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -232 +; VECTOR-NEXT: .cfi_def_cfa_offset 392 +; VECTOR-NEXT: std %f8, 224(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: .cfi_offset %f8, -168 +; VECTOR-NEXT: la %r2, 176(%r15) +; VECTOR-NEXT: ldr %f8, %f2 +; VECTOR-NEXT: brasl %r14, __extendhftf2@PLT +; VECTOR-NEXT: vl %v0, 176(%r15), 3 +; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; VECTOR-NEXT: la %r2, 192(%r15) +; VECTOR-NEXT: ldr %f0, %f8 +; VECTOR-NEXT: brasl %r14, __extendhftf2@PLT +; VECTOR-NEXT: vl %v0, 192(%r15), 3 +; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; VECTOR-NEXT: wfaxb %v0, %v1, %v0 +; VECTOR-NEXT: la %r2, 208(%r15) +; VECTOR-NEXT: vst %v0, 208(%r15), 3 +; VECTOR-NEXT: brasl %r14, __trunctfhf2@PLT +; VECTOR-NEXT: ld %f8, 224(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: lmg %r14, %r15, 344(%r15) +; VECTOR-NEXT: br %r14 +entry: + %E0 = fpext half %Op0 to fp128 + %E1 = fpext half %Op1 to fp128 + %Add = fadd fp128 %E0, %E1 + %Res = fptrunc fp128 %Add to half + ret half %Res +} + +; Test loading and storing a half value. +define void @fun3(ptr %Src, ptr %Dst) { +; NOVEC-LABEL: fun3: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: lh %r0, 0(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 0(%r3) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun3: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: vlreph %v0, 0(%r2) +; VECTOR-NEXT: vsteh %v0, 0(%r3), 0 +; VECTOR-NEXT: br %r14 +entry: + %L = load half, ptr %Src, align 2 + store half %L, ptr %Dst, align 2 + ret void +} + +define void @fun4(ptr %Src, ptr %Dst) { +; NOVEC-LABEL: fun4: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r13, %r15, 104(%r15) +; NOVEC-NEXT: .cfi_offset %r13, -56 +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -160 +; NOVEC-NEXT: .cfi_def_cfa_offset 320 +; NOVEC-NEXT: lh %r0, 0(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgr %r13, %r3 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: brasl %r14, __extendhfdf2@PLT +; NOVEC-NEXT: adbr %f0, %f0 +; NOVEC-NEXT: brasl %r14, __truncdfhf2@PLT +; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 0(%r13) +; NOVEC-NEXT: lmg %r13, %r15, 264(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun4: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r13, %r15, 104(%r15) +; VECTOR-NEXT: .cfi_offset %r13, -56 +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -160 +; VECTOR-NEXT: .cfi_def_cfa_offset 320 +; VECTOR-NEXT: vlreph %v0, 0(%r2) +; VECTOR-NEXT: lgr %r13, %r3 +; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT +; VECTOR-NEXT: adbr %f0, %f0 +; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT +; VECTOR-NEXT: vsteh %v0, 0(%r13), 0 +; VECTOR-NEXT: lmg %r13, %r15, 264(%r15) +; VECTOR-NEXT: br %r14 +entry: + %Op0 = load half, ptr %Src, align 2 + %E0 = fpext half %Op0 to double + %Add = fadd double %E0, %E0 + %Res = fptrunc double %Add to half + store half %Res, ptr %Dst, align 2 + ret void +} + +define void @fun5(ptr %Src, ptr %Dst) { +; NOVEC-LABEL: fun5: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r13, %r15, 104(%r15) +; NOVEC-NEXT: .cfi_offset %r13, -56 +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -192 +; NOVEC-NEXT: .cfi_def_cfa_offset 352 +; NOVEC-NEXT: lh %r0, 0(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: la %r2, 160(%r15) +; NOVEC-NEXT: lgr %r13, %r3 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: brasl %r14, __extendhftf2@PLT +; NOVEC-NEXT: ld %f0, 160(%r15) +; NOVEC-NEXT: ld %f2, 168(%r15) +; NOVEC-NEXT: la %r2, 176(%r15) +; NOVEC-NEXT: axbr %f0, %f0 +; NOVEC-NEXT: std %f0, 176(%r15) +; NOVEC-NEXT: std %f2, 184(%r15) +; NOVEC-NEXT: brasl %r14, __trunctfhf2@PLT +; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 0(%r13) +; NOVEC-NEXT: lmg %r13, %r15, 296(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun5: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r13, %r15, 104(%r15) +; VECTOR-NEXT: .cfi_offset %r13, -56 +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -192 +; VECTOR-NEXT: .cfi_def_cfa_offset 352 +; VECTOR-NEXT: vlreph %v0, 0(%r2) +; VECTOR-NEXT: la %r2, 160(%r15) +; VECTOR-NEXT: lgr %r13, %r3 +; VECTOR-NEXT: brasl %r14, __extendhftf2@PLT +; VECTOR-NEXT: vl %v0, 160(%r15), 3 +; VECTOR-NEXT: wfaxb %v0, %v0, %v0 +; VECTOR-NEXT: la %r2, 176(%r15) +; VECTOR-NEXT: vst %v0, 176(%r15), 3 +; VECTOR-NEXT: brasl %r14, __trunctfhf2@PLT +; VECTOR-NEXT: vsteh %v0, 0(%r13), 0 +; VECTOR-NEXT: lmg %r13, %r15, 296(%r15) +; VECTOR-NEXT: br %r14 +entry: + %Op0 = load half, ptr %Src, align 2 + %E0 = fpext half %Op0 to fp128 + %Add = fadd fp128 %E0, %E0 + %Res = fptrunc fp128 %Add to half + store half %Res, ptr %Dst, align 2 + ret void +} + +; Test a chain of half operations which should have each operation surrounded +; by conversions to/from fp32 for proper emulation. +define half @fun6(half %Op0, half %Op1, half %Op2) { +; NOVEC-LABEL: fun6: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -184 +; NOVEC-NEXT: .cfi_def_cfa_offset 344 +; NOVEC-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: .cfi_offset %f8, -168 +; NOVEC-NEXT: .cfi_offset %f9, -176 +; NOVEC-NEXT: .cfi_offset %f10, -184 +; NOVEC-NEXT: ler %f9, %f0 +; NOVEC-NEXT: ler %f0, %f2 +; NOVEC-NEXT: ler %f8, %f4 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: ler %f10, %f0 +; NOVEC-NEXT: ler %f0, %f9 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f10 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: ler %f9, %f0 +; NOVEC-NEXT: ler %f0, %f8 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f9 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: lmg %r14, %r15, 296(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun6: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -184 +; VECTOR-NEXT: .cfi_def_cfa_offset 344 +; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: .cfi_offset %f8, -168 +; VECTOR-NEXT: .cfi_offset %f9, -176 +; VECTOR-NEXT: .cfi_offset %f10, -184 +; VECTOR-NEXT: ldr %f9, %f0 +; VECTOR-NEXT: ldr %f0, %f2 +; VECTOR-NEXT: ldr %f8, %f4 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: ldr %f10, %f0 +; VECTOR-NEXT: ldr %f0, %f9 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f10 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: ldr %f9, %f0 +; VECTOR-NEXT: ldr %f0, %f8 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: wfasb %f0, %f9, %f0 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: lmg %r14, %r15, 296(%r15) +; VECTOR-NEXT: br %r14 +entry: + %A0 = fadd half %Op0, %Op1 + %Res = fadd half %A0, %Op2 + ret half %Res +} + +; Store an incoming half argument and return a loaded one. +define half @fun7(half %Op0, ptr %Dst, ptr %Src) { +; NOVEC-LABEL: fun7: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 0(%r2) +; NOVEC-NEXT: lh %r0, 0(%r3) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun7: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: vsteh %v0, 0(%r2), 0 +; VECTOR-NEXT: vlreph %v0, 0(%r3) +; VECTOR-NEXT: br %r14 +entry: + store half %Op0, ptr %Dst + %Res = load half, ptr %Src + ret half %Res +} + +; Call a function with half argument and return values. +declare half @foo(half) +define void @fun8(ptr %Src, ptr %Dst) { +; NOVEC-LABEL: fun8: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r13, %r15, 104(%r15) +; NOVEC-NEXT: .cfi_offset %r13, -56 +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -160 +; NOVEC-NEXT: .cfi_def_cfa_offset 320 +; NOVEC-NEXT: lh %r0, 0(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgr %r13, %r3 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: brasl %r14, foo@PLT +; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: sth %r0, 0(%r13) +; NOVEC-NEXT: lmg %r13, %r15, 264(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun8: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r13, %r15, 104(%r15) +; VECTOR-NEXT: .cfi_offset %r13, -56 +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -160 +; VECTOR-NEXT: .cfi_def_cfa_offset 320 +; VECTOR-NEXT: vlreph %v0, 0(%r2) +; VECTOR-NEXT: lgr %r13, %r3 +; VECTOR-NEXT: brasl %r14, foo@PLT +; VECTOR-NEXT: vsteh %v0, 0(%r13), 0 +; VECTOR-NEXT: lmg %r13, %r15, 264(%r15) +; VECTOR-NEXT: br %r14 +entry: + %arg = load half, ptr %Src + %Res = call half @foo(half %arg) + store half %Res, ptr %Dst + ret void +} + +; Receive stack argument. +define half @fun9(half %Arg0, half %Arg1, half %Arg2, half %Arg3, half %Arg4) { +; NOVEC-LABEL: fun9: +; NOVEC: # %bb.0: +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -176 +; NOVEC-NEXT: .cfi_def_cfa_offset 336 +; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: .cfi_offset %f8, -168 +; NOVEC-NEXT: .cfi_offset %f9, -176 +; NOVEC-NEXT: lh %r0, 342(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ler %f8, %f6 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: ler %f9, %f0 +; NOVEC-NEXT: ler %f0, %f8 +; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT +; NOVEC-NEXT: aebr %f0, %f9 +; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT +; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: lmg %r14, %r15, 288(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun9: +; VECTOR: # %bb.0: +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -176 +; VECTOR-NEXT: .cfi_def_cfa_offset 336 +; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: .cfi_offset %f8, -168 +; VECTOR-NEXT: .cfi_offset %f9, -176 +; VECTOR-NEXT: vlreph %v0, 342(%r15) +; VECTOR-NEXT: ldr %f8, %f6 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: ldr %f9, %f0 +; VECTOR-NEXT: ldr %f0, %f8 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: aebr %f0, %f9 +; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT +; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: lmg %r14, %r15, 288(%r15) +; VECTOR-NEXT: br %r14 + %A0 = fadd half %Arg3, %Arg4 + ret half %A0 +} + +; Pass stack argument. +define void @fun10(half %Arg0) { +; NOVEC-LABEL: fun10: +; NOVEC: # %bb.0: +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -168 +; NOVEC-NEXT: .cfi_def_cfa_offset 328 +; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: ler %f2, %f0 +; NOVEC-NEXT: ler %f4, %f0 +; NOVEC-NEXT: ler %f6, %f0 +; NOVEC-NEXT: sth %r0, 166(%r15) +; NOVEC-NEXT: brasl %r14, fun9@PLT +; NOVEC-NEXT: lmg %r14, %r15, 280(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun10: +; VECTOR: # %bb.0: +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -168 +; VECTOR-NEXT: .cfi_def_cfa_offset 328 +; VECTOR-NEXT: ldr %f2, %f0 +; VECTOR-NEXT: ldr %f4, %f0 +; VECTOR-NEXT: ldr %f6, %f0 +; VECTOR-NEXT: vsteh %v0, 166(%r15), 0 +; VECTOR-NEXT: brasl %r14, fun9@PLT +; VECTOR-NEXT: lmg %r14, %r15, 280(%r15) +; VECTOR-NEXT: br %r14 + call void @fun9(half %Arg0, half %Arg0, half %Arg0, half %Arg0, half %Arg0) + ret void +} + +; Test loading some immediates from the Constant Pool. +declare void @foo2(half, half, half, half) +define void @fun11() { +; NOVEC-LABEL: fun11: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -160 +; NOVEC-NEXT: .cfi_def_cfa_offset 320 +; NOVEC-NEXT: lhrl %r0, .LCPI11_0 +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f4, %r0 +; NOVEC-NEXT: lhrl %r0, .LCPI11_1 +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: # kill: def $f4h killed $f4h killed $f4d +; NOVEC-NEXT: lzer %f2 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lcdfr %f0, %f2 +; NOVEC-NEXT: ldgr %f6, %r0 +; NOVEC-NEXT: # kill: def $f6h killed $f6h killed $f6d +; NOVEC-NEXT: brasl %r14, foo2@PLT +; NOVEC-NEXT: lmg %r14, %r15, 272(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: fun11: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -160 +; VECTOR-NEXT: .cfi_def_cfa_offset 320 +; VECTOR-NEXT: lzer %f2 +; VECTOR-NEXT: vrepih %v4, 13824 +; VECTOR-NEXT: vrepih %v6, 15360 +; VECTOR-NEXT: lcdfr %f0, %f2 +; VECTOR-NEXT: brasl %r14, foo2@PLT +; VECTOR-NEXT: lmg %r14, %r15, 272(%r15) +; VECTOR-NEXT: br %r14 +entry: + call void @foo2(half -0.0, half 0.0, half 0.375, half 1.0) + ret void +} + +; Test a tail call. +declare void @foo3(half) +define void @fun12(half %Arg0) { +; NOVEC-LABEL: fun12: +; NOVEC: # %bb.0: # %entry +; NOVEC-NEXT: jg foo3@PLT +; +; VECTOR-LABEL: fun12: +; VECTOR: # %bb.0: # %entry +; VECTOR-NEXT: jg foo3@PLT +entry: + tail call void @foo3(half %Arg0) + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-round-03.ll b/llvm/test/CodeGen/SystemZ/fp-round-03.ll index 3cae74749efbe..e0c059661137c 100644 --- a/llvm/test/CodeGen/SystemZ/fp-round-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-round-03.ll @@ -1,6 +1,19 @@ ; Test rounding functions for z14 and above. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -verify-machineinstrs \ +; RUN: | FileCheck %s + +; Test that an f16 intrinsic can be lowered with promotion to float. +declare half @llvm.rint.f16(half %f) +define half @f0(half %f) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: fiebra %f0, 0, %f0, 0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.rint.f16(half %f) + ret half %res +} ; Test rint for f32. declare float @llvm.rint.f32(float %f) diff --git a/llvm/test/CodeGen/SystemZ/spill-half-01.mir b/llvm/test/CodeGen/SystemZ/spill-half-01.mir new file mode 100644 index 0000000000000..56f4ecbffd2c6 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/spill-half-01.mir @@ -0,0 +1,47 @@ +# RUN: llc -o - %s -mtriple=s390x-linux-gnu -mcpu=zEC12 -verify-machineinstrs \ +# RUN: -start-before=greedy | FileCheck %s -check-prefix=CHECK +# RUN: llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ +# RUN: -start-before=greedy | FileCheck %s -check-prefix=VECTOR + +# Test spilling / reloading of an fp16bit virtual register. + +--- +name: fun0 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: fp16bit } +liveins: + - { reg: '$f0h', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $f0h + + ; CHECK-LABEL: fun0: + ; CHECK-NOT: $f0 + ; CHECK: # kill: def $f0h killed $f0h killed $f0d def $f0d + ; CHECK-NEXT: lgdr %r0, %f0 + ; CHECK-NEXT: srlg %r0, %r0, 48 + ; CHECK-NEXT: sth %r0, 166(%r15) # 2-byte Folded Spill + ; CHECK-NEXT: #APP + ; CHECK-NEXT: #NO_APP + ; CHECK: lh %r0, 166(%r15) # 2-byte Folded Reload + ; CHECK-NEXT: sllg %r0, %r0, 48 + ; CHECK-NEXT: ldgr %f0, %r0 + ; CHECK: # kill: def $f0h killed $f0h killed $f0d + ; CHECK-NOT: $f0 + + ; VECTOR-LABEL: fun0: + ; VECTOR: vsteh %v0, 166(%r15), 0 # 2-byte Folded Spill + ; VECTOR-NEXT: #APP + ; VECTOR-NEXT: #NO_APP + ; VECTOR-NEXT: vlreph %v0, 166(%r15) # 2-byte Folded Reload + + %0:fp16bit = COPY $f0h + INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f0d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d + $f0h = COPY %0 + Return implicit $f0h +... diff --git a/llvm/test/CodeGen/SystemZ/spill-half-02.mir b/llvm/test/CodeGen/SystemZ/spill-half-02.mir new file mode 100644 index 0000000000000..4934d0b728115 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/spill-half-02.mir @@ -0,0 +1,40 @@ +# RUN: llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ +# RUN: -start-before=greedy | FileCheck %s + +# Test spilling / reloading of an vr16bit virtual register. + +--- +name: fun0 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: addr64bit } + - { id: 2, class: vr16bit } +liveins: + - { reg: '$r2d', virtual-reg: '%0' } + - { reg: '$r3d', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d, $r3d + + ; CHECK-LABEL: fun0: + ; CHECK: stg %r3, 168(%r15) # 8-byte Folded Spill + ; CHECK-NEXT: vlreph %v0, 0(%r2) + ; CHECK-NEXT: vsteh %v0, 166(%r15), 0 # 2-byte Folded Spill + ; CHECK-NEXT: #APP + ; CHECK-NEXT: #NO_APP + ; CHECK-NEXT: lg %r1, 168(%r15) # 8-byte Folded Reload + ; CHECK-NEXT: vlreph %v0, 166(%r15) # 2-byte Folded Reload + ; CHECK-NEXT: vsteh %v0, 0(%r1), 0 + + %1:addr64bit = COPY $r3d + %0:addr64bit = COPY $r2d + %2:vr16bit = VL16 %0, 0, $noreg + INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f0d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d, 12, implicit-def dead early-clobber $f16d, 12, implicit-def dead early-clobber $f17d, 12, implicit-def dead early-clobber $f18d, 12, implicit-def dead early-clobber $f19d, 12, implicit-def dead early-clobber $f20d, 12, implicit-def dead early-clobber $f21d, 12, implicit-def dead early-clobber $f22d, 12, implicit-def dead early-clobber $f23d, 12, implicit-def dead early-clobber $f24d, 12, implicit-def dead early-clobber $f25d, 12, implicit-def dead early-clobber $f26d, 12, implicit-def dead early-clobber $f27d, 12, implicit-def dead early-clobber $f28d, 12, implicit-def dead early-clobber $f29d, 12, implicit-def dead early-clobber $f30d, 12, implicit-def dead early-clobber $f31d + VST16 %2, %1, 0, $noreg + Return +... diff --git a/llvm/test/CodeGen/SystemZ/twoaddr-kill.mir b/llvm/test/CodeGen/SystemZ/twoaddr-kill.mir index 7fc7bd3e347bb..95ba0b4bf3466 100644 --- a/llvm/test/CodeGen/SystemZ/twoaddr-kill.mir +++ b/llvm/test/CodeGen/SystemZ/twoaddr-kill.mir @@ -18,19 +18,19 @@ body: | ; CHECK-NEXT: $r2l = COPY [[COPY]] ; CHECK-NEXT: $r3l = COPY killed [[COPY]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:grh32bit = COPY killed [[COPY1]] - ; CHECK-NEXT: INLINEASM &"stepa $1, $2, $3", 0 /* attdialect */, 393226 /* regdef:GRH32Bit */, def [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3), 9 /* reguse */, killed $r2l, 9 /* reguse */, killed $r3l + ; CHECK-NEXT: INLINEASM &"stepa $1, $2, $3", 0 /* attdialect */, 524298 /* regdef:GRH32Bit */, def [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3), 9 /* reguse */, killed $r2l, 9 /* reguse */, killed $r3l ; CHECK-NEXT: [[COPY3:%[0-9]+]]:grh32bit = COPY killed [[COPY2]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:grh32bit = COPY [[COPY3]] - ; CHECK-NEXT: INLINEASM &"stepb $1, $2", 0 /* attdialect */, 393227 /* regdef-ec:GRH32Bit */, def early-clobber [[COPY4]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 393225 /* reguse:GRH32Bit */, [[COPY3]] + ; CHECK-NEXT: INLINEASM &"stepb $1, $2", 0 /* attdialect */, 524299 /* regdef-ec:GRH32Bit */, def early-clobber [[COPY4]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 524297 /* reguse:GRH32Bit */, [[COPY3]] ; CHECK-NEXT: $r2l = COPY killed [[COPY4]] ; CHECK-NEXT: Return implicit killed $r2l %0:gr32bit = COPY killed $r2l %2:grh32bit = COPY %0 $r2l = COPY %0 $r3l = COPY killed %0 - INLINEASM &"stepa $1, $2, $3", 0 /* attdialect */, 393226 /* regdef:GRH32Bit */, def %1:grh32bit, 2147483657 /* reguse tiedto:$0 */, killed %2(tied-def 3), 9 /* reguse */, killed $r2l, 9 /* reguse */, killed $r3l + INLINEASM &"stepa $1, $2, $3", 0 /* attdialect */, 524298 /* regdef:GRH32Bit */, def %1:grh32bit, 2147483657 /* reguse tiedto:$0 */, killed %2(tied-def 3), 9 /* reguse */, killed $r2l, 9 /* reguse */, killed $r3l %4:grh32bit = COPY killed %1 - INLINEASM &"stepb $1, $2", 0 /* attdialect */, 393227 /* regdef-ec:GRH32Bit */, def early-clobber %3:grh32bit, 2147483657 /* reguse tiedto:$0 */, killed %4(tied-def 3), 393225 /* reguse:GRH32Bit */, %4 + INLINEASM &"stepb $1, $2", 0 /* attdialect */, 524299 /* regdef-ec:GRH32Bit */, def early-clobber %3:grh32bit, 2147483657 /* reguse tiedto:$0 */, killed %4(tied-def 3), 524297 /* reguse:GRH32Bit */, %4 $r2l = COPY killed %3 Return implicit killed $r2l ... From aefc5cc5c88e3ddf3238403a1fed4c365aa31db1 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 27 Nov 2024 15:13:31 -0600 Subject: [PATCH 02/12] Review updates Use 4-byte spill size in case of no vector support. Build the generic compiler-rt sources for s390x. Don't set libcall names. @llvm.s390.tdc, fcopysign, strict_fminimum/fmaximum. More tests for f16, but not complete. libfuncs built also to double and long double. --- clang/include/clang/Basic/TargetInfo.h | 2 +- clang/lib/Basic/Targets/SystemZ.h | 4 +- clang/lib/CodeGen/Targets/SystemZ.cpp | 37 ++- .../test/CodeGen/SystemZ/strictfp_builtins.c | 14 +- clang/test/CodeGen/SystemZ/systemz-abi.c | 9 + .../test/CodeGen/SystemZ/systemz-inline-asm.c | 8 + compiler-rt/cmake/builtin-config-ix.cmake | 3 +- compiler-rt/lib/builtins/CMakeLists.txt | 6 + compiler-rt/lib/builtins/clear_cache.c | 2 + compiler-rt/lib/builtins/extendhfdf2.c | 27 ++ compiler-rt/lib/builtins/extendhftf2.c | 2 +- compiler-rt/lib/builtins/trunctfhf2.c | 2 +- llvm/docs/LangRef.rst | 2 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 19 ++ llvm/lib/IR/RuntimeLibcalls.cpp | 5 - .../SystemZ/AsmParser/SystemZAsmParser.cpp | 2 + .../MCTargetDesc/SystemZMCTargetDesc.cpp | 25 +- llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 38 ++- llvm/lib/Target/SystemZ/SystemZFeatures.td | 2 + .../Target/SystemZ/SystemZISelLowering.cpp | 291 +++++++++++++----- llvm/lib/Target/SystemZ/SystemZISelLowering.h | 5 + llvm/lib/Target/SystemZ/SystemZInstrFP.td | 14 +- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 59 +--- llvm/lib/Target/SystemZ/SystemZInstrVector.td | 2 + .../Target/SystemZ/SystemZRegisterInfo.cpp | 5 +- llvm/lib/Target/SystemZ/SystemZRegisterInfo.h | 2 +- .../lib/Target/SystemZ/SystemZRegisterInfo.td | 11 +- llvm/lib/Target/SystemZ/SystemZScheduleZ13.td | 8 +- llvm/lib/Target/SystemZ/SystemZScheduleZ14.td | 8 +- llvm/lib/Target/SystemZ/SystemZScheduleZ15.td | 8 +- llvm/lib/Target/SystemZ/SystemZScheduleZ16.td | 8 +- .../lib/Target/SystemZ/SystemZScheduleZ196.td | 4 +- .../Target/SystemZ/SystemZScheduleZEC12.td | 4 +- llvm/test/CodeGen/SystemZ/asm-10.ll | 9 + llvm/test/CodeGen/SystemZ/asm-17.ll | 11 + llvm/test/CodeGen/SystemZ/asm-19.ll | 19 ++ .../CodeGen/SystemZ/fmuladd-soft-float.ll | 37 +++ llvm/test/CodeGen/SystemZ/fp-abs-01.ll | 11 + llvm/test/CodeGen/SystemZ/fp-abs-03.ll | 12 + llvm/test/CodeGen/SystemZ/fp-abs-04.ll | 16 + llvm/test/CodeGen/SystemZ/fp-add-01.ll | 12 + llvm/test/CodeGen/SystemZ/fp-cmp-04.ll | 68 +++- llvm/test/CodeGen/SystemZ/fp-conv-05.ll | 10 + llvm/test/CodeGen/SystemZ/fp-conv-06.ll | 12 + llvm/test/CodeGen/SystemZ/fp-conv-07.ll | 10 + llvm/test/CodeGen/SystemZ/fp-conv-08.ll | 9 + llvm/test/CodeGen/SystemZ/fp-conv-09.ll | 10 + llvm/test/CodeGen/SystemZ/fp-conv-10.ll | 36 ++- llvm/test/CodeGen/SystemZ/fp-conv-11.ll | 10 + llvm/test/CodeGen/SystemZ/fp-conv-12.ll | 36 ++- llvm/test/CodeGen/SystemZ/fp-conv-13.ll | 32 +- llvm/test/CodeGen/SystemZ/fp-conv-14.ll | 32 +- llvm/test/CodeGen/SystemZ/fp-conv-20.ll | 74 +++-- llvm/test/CodeGen/SystemZ/fp-copysign-01.ll | 32 ++ llvm/test/CodeGen/SystemZ/fp-copysign-02.ll | 29 ++ llvm/test/CodeGen/SystemZ/fp-div-01.ll | 12 + llvm/test/CodeGen/SystemZ/fp-half-cmp.ll | 161 ++++++++++ llvm/test/CodeGen/SystemZ/fp-half-move.ll | 85 +++++ llvm/test/CodeGen/SystemZ/fp-libcall.ll | 10 + llvm/test/CodeGen/SystemZ/fp-mul-01.ll | 12 + llvm/test/CodeGen/SystemZ/fp-mul-06.ll | 14 + llvm/test/CodeGen/SystemZ/fp-mul-08.ll | 18 ++ llvm/test/CodeGen/SystemZ/fp-mul-10.ll | 38 ++- llvm/test/CodeGen/SystemZ/fp-mul-15.ll | 20 ++ llvm/test/CodeGen/SystemZ/fp-neg-01.ll | 11 + llvm/test/CodeGen/SystemZ/fp-neg-02.ll | 11 + llvm/test/CodeGen/SystemZ/fp-round-01.ll | 48 +++ llvm/test/CodeGen/SystemZ/fp-round-02.ll | 36 +++ llvm/test/CodeGen/SystemZ/fp-sqrt-01.ll | 12 + llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll | 114 ++++++- .../test/CodeGen/SystemZ/fp-strict-cmps-01.ll | 20 ++ .../test/CodeGen/SystemZ/fp-strict-cmps-04.ll | 37 +++ .../test/CodeGen/SystemZ/fp-strict-conv-01.ll | 29 ++ .../test/CodeGen/SystemZ/fp-strict-conv-02.ll | 11 + .../test/CodeGen/SystemZ/fp-strict-conv-05.ll | 13 + .../test/CodeGen/SystemZ/fp-strict-conv-06.ll | 15 + .../test/CodeGen/SystemZ/fp-strict-conv-07.ll | 13 + .../test/CodeGen/SystemZ/fp-strict-conv-08.ll | 12 + .../test/CodeGen/SystemZ/fp-strict-conv-09.ll | 12 + .../test/CodeGen/SystemZ/fp-strict-conv-10.ll | 50 ++- .../test/CodeGen/SystemZ/fp-strict-conv-11.ll | 12 + .../test/CodeGen/SystemZ/fp-strict-conv-12.ll | 50 ++- .../test/CodeGen/SystemZ/fp-strict-conv-13.ll | 38 ++- .../test/CodeGen/SystemZ/fp-strict-conv-14.ll | 36 ++- .../test/CodeGen/SystemZ/fp-strict-conv-15.ll | 28 ++ .../test/CodeGen/SystemZ/fp-strict-conv-17.ll | 84 +++-- llvm/test/CodeGen/SystemZ/fp-strict-div-01.ll | 16 + llvm/test/CodeGen/SystemZ/fp-strict-mul-06.ll | 18 ++ .../CodeGen/SystemZ/fp-strict-round-01.ll | 71 +++++ .../CodeGen/SystemZ/fp-strict-round-02.ll | 42 +++ .../CodeGen/SystemZ/fp-strict-round-03.ll | 15 + .../test/CodeGen/SystemZ/fp-strict-sqrt-01.ll | 16 + llvm/test/CodeGen/SystemZ/fp-strict-sub-01.ll | 16 + llvm/test/CodeGen/SystemZ/fp-sub-01.ll | 12 + ...-asm-fp-int-casting-explicit-regs-zEC12.ll | 39 +++ ...inline-asm-fp-int-casting-explicit-regs.ll | 40 +++ .../inline-asm-fp-int-casting-zEC12.ll | 37 +++ .../SystemZ/inline-asm-fp-int-casting.ll | 52 ++++ llvm/test/CodeGen/SystemZ/is_fpclass.ll | 19 ++ llvm/test/CodeGen/SystemZ/spill-half-01.mir | 68 ++-- llvm/test/CodeGen/SystemZ/spill-half-02.mir | 19 +- llvm/test/CodeGen/SystemZ/stackmap.ll | 30 +- llvm/test/CodeGen/SystemZ/tdc-01.ll | 12 + llvm/test/CodeGen/SystemZ/tdc-02.ll | 17 + llvm/test/CodeGen/SystemZ/tdc-03.ll | 26 +- llvm/test/CodeGen/SystemZ/tdc-04.ll | 18 +- llvm/test/CodeGen/SystemZ/tdc-05.ll | 24 ++ llvm/test/CodeGen/SystemZ/tdc-06.ll | 2 - llvm/test/CodeGen/SystemZ/vec-max-05.ll | 24 ++ llvm/test/CodeGen/SystemZ/vec-min-05.ll | 24 ++ .../test/CodeGen/SystemZ/vec-strict-max-01.ll | 31 ++ .../test/CodeGen/SystemZ/vec-strict-min-01.ll | 31 ++ 112 files changed, 2574 insertions(+), 372 deletions(-) create mode 100644 compiler-rt/lib/builtins/extendhfdf2.c create mode 100644 llvm/test/CodeGen/SystemZ/fp-half-cmp.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-half-move.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-mul-15.ll diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 93cffe84e2f42..8c3dcda25bc8d 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -235,7 +235,7 @@ class TargetInfo : public TransferrableTargetInfo, bool NoAsmVariants; // True if {|} are normal characters. bool HasLegalHalfType; // True if the backend supports operations on the half // LLVM IR type. - bool HalfArgsAndReturns; + bool HalfArgsAndReturns; // OpenCL 6.1.1.1, NEON (IEEE 754-2008 half) type. bool HasFloat128; bool HasFloat16; bool HasBFloat16; diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h index 1427c8e5e4e07..cb71c5d7e75d8 100644 --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -109,9 +109,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo { unsigned getMinGlobalAlign(uint64_t Size, bool HasNonWeakDef) const override; - bool useFP16ConversionIntrinsics() const override { - return false; - } + bool useFP16ConversionIntrinsics() const override { return false; } void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/clang/lib/CodeGen/Targets/SystemZ.cpp b/clang/lib/CodeGen/Targets/SystemZ.cpp index 10f955c08188e..6ea6c7a546436 100644 --- a/clang/lib/CodeGen/Targets/SystemZ.cpp +++ b/clang/lib/CodeGen/Targets/SystemZ.cpp @@ -31,7 +31,7 @@ class SystemZABIInfo : public ABIInfo { bool isPromotableIntegerTypeForABI(QualType Ty) const; bool isCompoundType(QualType Ty) const; bool isVectorArgumentType(QualType Ty) const; - bool isFPArgumentType(QualType Ty) const; + llvm::Type *getFPArgumentType(QualType Ty, uint64_t Size) const; QualType GetSingleElementType(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy) const; @@ -107,7 +107,8 @@ class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { return nullptr; llvm::Type *Ty = V->getType(); - if (Ty->isFloatTy() || Ty->isDoubleTy() || Ty->isFP128Ty()) { + if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy() || + Ty->isFP128Ty()) { llvm::Module &M = CGM.getModule(); auto &Ctx = M.getContext(); llvm::Function *TDCFunc = llvm::Intrinsic::getOrInsertDeclaration( @@ -179,21 +180,31 @@ bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const { getContext().getTypeSize(Ty) <= 128); } -bool SystemZABIInfo::isFPArgumentType(QualType Ty) const { +// The Size argument will in case of af an overaligned single element struct +// reflect the overalignment value. In such a case the argument will be +// passed using the type matching Size. +llvm::Type *SystemZABIInfo::getFPArgumentType(QualType Ty, + uint64_t Size) const { if (IsSoftFloatABI) - return false; + return nullptr; if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { - case BuiltinType::Float16: // _Float16 + case BuiltinType::Float16: + if (Size == 16) + return llvm::Type::getHalfTy(getVMContext()); + LLVM_FALLTHROUGH; case BuiltinType::Float: + if (Size == 32) + return llvm::Type::getFloatTy(getVMContext()); + LLVM_FALLTHROUGH; case BuiltinType::Double: - return true; + return llvm::Type::getDoubleTy(getVMContext()); default: - return false; + return nullptr; } - return false; + return nullptr; } QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const { @@ -449,13 +460,11 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), /*ByVal=*/false); - // The structure is passed as an unextended integer, a float, or a double. - if (isFPArgumentType(SingleElementTy)) { + // The structure is passed as an unextended integer, a half, a float, + // or a double. + if (llvm::Type *FPArgTy = getFPArgumentType(SingleElementTy, Size)) { assert(Size == 16 || Size == 32 || Size == 64); - return ABIArgInfo::getDirect( - Size == 16 ? llvm::Type::getHalfTy(getVMContext()) - : Size == 32 ? llvm::Type::getFloatTy(getVMContext()) - : llvm::Type::getDoubleTy(getVMContext())); + return ABIArgInfo::getDirect(FPArgTy); } else { llvm::IntegerType *PassTy = llvm::IntegerType::get(getVMContext(), Size); return Size <= 32 ? ABIArgInfo::getNoExtend(PassTy) diff --git a/clang/test/CodeGen/SystemZ/strictfp_builtins.c b/clang/test/CodeGen/SystemZ/strictfp_builtins.c index 8c8f1f4cabd74..f871debde067e 100644 --- a/clang/test/CodeGen/SystemZ/strictfp_builtins.c +++ b/clang/test/CodeGen/SystemZ/strictfp_builtins.c @@ -4,12 +4,24 @@ #pragma float_control(except, on) +// CHECK-LABEL: @test_isnan__Float16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[F_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: store half [[F:%.*]], ptr [[F_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[F_ADDR]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f16(half [[TMP0]], i64 15) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret i32 [[TMP1]] +// +int test_isnan__Float16(_Float16 f) { + return __builtin_isnan(f); +} + // CHECK-LABEL: @test_isnan_float( // CHECK-NEXT: entry: // CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f32(float [[TMP0]], i64 15) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f32(float [[TMP0]], i64 15) #[[ATTR2]] // CHECK-NEXT: ret i32 [[TMP1]] // int test_isnan_float(float f) { diff --git a/clang/test/CodeGen/SystemZ/systemz-abi.c b/clang/test/CodeGen/SystemZ/systemz-abi.c index 5e61c03672174..f26084ab44eae 100644 --- a/clang/test/CodeGen/SystemZ/systemz-abi.c +++ b/clang/test/CodeGen/SystemZ/systemz-abi.c @@ -155,11 +155,20 @@ struct agg_longdouble { long double a; }; struct agg_longdouble pass_agg_longdouble(struct agg_longdouble arg) { return arg; } // CHECK-LABEL: define{{.*}} void @pass_agg_longdouble(ptr dead_on_unwind noalias writable sret(%struct.agg_longdouble) align 8 %{{.*}}, ptr %{{.*}}) +struct agg__Float16_a4 { _Float16 a __attribute__((aligned (4))); }; +struct agg__Float16_a4 pass_agg__Float16_a4(struct agg__Float16_a4 arg) { return arg; } +// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a4(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a4) align 4 %{{.*}}, float %{{.*}}) +// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a4(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a4) align 4 %{{.*}}, i32 noext %{{.*}}) + struct agg__Float16_a8 { _Float16 a __attribute__((aligned (8))); }; struct agg__Float16_a8 pass_agg__Float16_a8(struct agg__Float16_a8 arg) { return arg; } // HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a8) align 8 %{{.*}}, double %{{.*}}) // SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a8) align 8 %{{.*}}, i64 %{{.*}}) +struct agg__Float16_a16 { _Float16 a __attribute__((aligned (16))); }; +struct agg__Float16_a16 pass_agg__Float16_a16(struct agg__Float16_a16 arg) { return arg; } +// CHECK-LABEL: define{{.*}} void @pass_agg__Float16_a16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a16) align 16 %{{.*}}, ptr %{{.*}}) + struct agg_float_a8 { float a __attribute__((aligned (8))); }; struct agg_float_a8 pass_agg_float_a8(struct agg_float_a8 arg) { return arg; } // HARD-FLOAT-LABEL: define{{.*}} void @pass_agg_float_a8(ptr dead_on_unwind noalias writable sret(%struct.agg_float_a8) align 8 %{{.*}}, double %{{.*}}) diff --git a/clang/test/CodeGen/SystemZ/systemz-inline-asm.c b/clang/test/CodeGen/SystemZ/systemz-inline-asm.c index 9e62b8e107900..434937a66389c 100644 --- a/clang/test/CodeGen/SystemZ/systemz-inline-asm.c +++ b/clang/test/CodeGen/SystemZ/systemz-inline-asm.c @@ -106,6 +106,14 @@ void test_M(void) { // CHECK: call void asm sideeffect "#FOO $0", "M"(i32 2147483647) } +_Float16 test_f16(_Float16 a) { + _Float16 f; + asm("ler %0, %1" : "=f" (f) : "f" (a)); + return f; +// CHECK-LABEL: define{{.*}} half @test_f16(half noundef %a) +// CHECK: call half asm "ler $0, $1", "=f,f"(half %a) +} + float test_f32(float f, float g) { asm("aebr %0, %2" : "=f" (f) : "0" (f), "f" (g)); return f; diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index 7bd3269bd999d..cbb43a5958d2f 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -73,6 +73,7 @@ set(PPC32 powerpc powerpcspe) set(PPC64 powerpc64 powerpc64le) set(RISCV32 riscv32) set(RISCV64 riscv64) +set(S390X s390x) set(SPARC sparc) set(SPARCV9 sparcv9) set(WASM32 wasm32) @@ -88,7 +89,7 @@ endif() set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${AMDGPU} ${ARM32} ${ARM64} ${AVR} ${HEXAGON} ${MIPS32} ${MIPS64} ${NVPTX} ${PPC32} ${PPC64} - ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} + ${RISCV32} ${RISCV64} ${S390X} ${SPARC} ${SPARCV9} ${WASM32} ${WASM64} ${VE} ${LOONGARCH64}) include(CompilerRTUtils) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 3cdbf21ed403d..74d9627b9f102 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -104,6 +104,7 @@ set(GENERIC_SOURCES divti3.c extendsfdf2.c extendhfsf2.c + extendhfdf2.c ffsdi2.c ffssi2.c ffsti2.c @@ -768,6 +769,11 @@ set(riscv64_SOURCES set(sparc_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES}) set(sparcv9_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES}) +set(s390x_SOURCES + ${GENERIC_SOURCES} + ${GENERIC_TF_SOURCES} +) + set(wasm32_SOURCES ${GENERIC_TF_SOURCES} ${GENERIC_SOURCES} diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c index 2ac99b25c243f..441eabd1fe922 100644 --- a/compiler-rt/lib/builtins/clear_cache.c +++ b/compiler-rt/lib/builtins/clear_cache.c @@ -62,6 +62,8 @@ void __clear_cache(void *start, void *end) { #if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64) // Intel processors have a unified instruction and data cache // so there is nothing to do +#elif defined(__s390__) +// no-op #elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__)) FlushInstructionCache(GetCurrentProcess(), start, end - start); #elif defined(__arm__) && !defined(__APPLE__) diff --git a/compiler-rt/lib/builtins/extendhfdf2.c b/compiler-rt/lib/builtins/extendhfdf2.c new file mode 100644 index 0000000000000..33fa92d2cd341 --- /dev/null +++ b/compiler-rt/lib/builtins/extendhfdf2.c @@ -0,0 +1,27 @@ +//===-- lib/extendhfdf2.c - half -> single conversion -------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SRC_HALF +#define DST_DOUBLE +#include "fp_extend_impl.inc" + +// Use a forwarding definition and noinline to implement a poor man's alias, +// as there isn't a good cross-platform way of defining one. +COMPILER_RT_ABI NOINLINE float __extendhfdf2(src_t a) { + return __extendXfYf2__(a); +} + +COMPILER_RT_ABI float __gnu_h2d_ieee(src_t a) { return __extendhfdf2(a); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI float __aeabi_h2d(src_t a) { return __extendhfdf2(a); } +#else +COMPILER_RT_ALIAS(__extendhfdf2, __aeabi_h2d) +#endif +#endif diff --git a/compiler-rt/lib/builtins/extendhftf2.c b/compiler-rt/lib/builtins/extendhftf2.c index 7609db6f06e4a..67eddc6b34761 100644 --- a/compiler-rt/lib/builtins/extendhftf2.c +++ b/compiler-rt/lib/builtins/extendhftf2.c @@ -10,7 +10,7 @@ #define QUAD_PRECISION #include "fp_lib.h" -#if defined(CRT_HAS_TF_MODE) && defined(COMPILER_RT_HAS_FLOAT16) +#if defined(CRT_HAS_TF_MODE) #define SRC_HALF #define DST_QUAD #include "fp_extend_impl.inc" diff --git a/compiler-rt/lib/builtins/trunctfhf2.c b/compiler-rt/lib/builtins/trunctfhf2.c index 3f031e0f84451..0db4c4d0d8b31 100644 --- a/compiler-rt/lib/builtins/trunctfhf2.c +++ b/compiler-rt/lib/builtins/trunctfhf2.c @@ -10,7 +10,7 @@ #define QUAD_PRECISION #include "fp_lib.h" -#if defined(CRT_HAS_TF_MODE) && defined(COMPILER_RT_HAS_FLOAT16) +#if defined(CRT_HAS_TF_MODE) #define SRC_QUAD #define DST_HALF #include "fp_trunc_impl.inc" diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 769003a90f959..110c30e19220f 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -5710,7 +5710,7 @@ SystemZ: address context evaluates as zero). - ``h``: A 32-bit value in the high part of a 64bit data register (LLVM-specific) -- ``f``: A 32, 64, or 128-bit floating-point register. +- ``f``: A 16, 32, 64, or 128-bit floating-point register. X86: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index b8af281e1c24b..fddb99d2f0b22 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -5466,6 +5466,25 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; + + case ISD::STRICT_FMINIMUM: { + case ISD::STRICT_FMAXIMUM: + SDValue InChain = Node->getOperand(0); + SDVTList VTs = DAG.getVTList(NVT, MVT::Other); + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, VTs, InChain, + Node->getOperand(1)); + Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, VTs, InChain, + Node->getOperand(2)); + SmallVector Ops = {InChain, Tmp1, Tmp2}; + Tmp3 = DAG.getNode(Node->getOpcode(), dl, VTs, Ops, Node->getFlags()); + Tmp4 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, DAG.getVTList(OVT, MVT::Other), + InChain, Tmp3, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); + Results.push_back(Tmp4); + Results.push_back(Tmp4.getValue(1)); + break; + } + case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 5ba1bd87e9518..90c3bf0db0236 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -253,9 +253,4 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) { } setLibcallName(RTLIB::MULO_I128, nullptr); } - - if (TT.isSystemZ()) { - setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); - setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); - } } diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 265fea11e15dd..6d9a7a73f72db 100644 --- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -901,6 +901,7 @@ ParseStatus SystemZAsmParser::parseRegister(OperandVector &Operands, return ParseStatus::NoMatch; // Determine the LLVM register number according to Kind. + // clang-format off const unsigned *Regs; switch (Kind) { case GR32Reg: Regs = SystemZMC::GR32Regs; break; @@ -918,6 +919,7 @@ ParseStatus SystemZAsmParser::parseRegister(OperandVector &Operands, case AR32Reg: Regs = SystemZMC::AR32Regs; break; case CR64Reg: Regs = SystemZMC::CR64Regs; break; } + // clang-format on if (Regs[Reg.Num] == 0) return Error(Reg.StartLoc, "invalid register pair"); diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 493d6ea3b8cd4..d2ed5cac5c576 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -65,11 +65,10 @@ const unsigned SystemZMC::GR128Regs[16] = { SystemZ::R8Q, 0, SystemZ::R10Q, 0, SystemZ::R12Q, 0, SystemZ::R14Q, 0}; const unsigned SystemZMC::FP16Regs[16] = { - SystemZ::F0H, SystemZ::F1H, SystemZ::F2H, SystemZ::F3H, - SystemZ::F4H, SystemZ::F5H, SystemZ::F6H, SystemZ::F7H, - SystemZ::F8H, SystemZ::F9H, SystemZ::F10H, SystemZ::F11H, - SystemZ::F12H, SystemZ::F13H, SystemZ::F14H, SystemZ::F15H -}; + SystemZ::F0H, SystemZ::F1H, SystemZ::F2H, SystemZ::F3H, + SystemZ::F4H, SystemZ::F5H, SystemZ::F6H, SystemZ::F7H, + SystemZ::F8H, SystemZ::F9H, SystemZ::F10H, SystemZ::F11H, + SystemZ::F12H, SystemZ::F13H, SystemZ::F14H, SystemZ::F15H}; const unsigned SystemZMC::FP32Regs[16] = { SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, @@ -88,15 +87,13 @@ const unsigned SystemZMC::FP128Regs[16] = { SystemZ::F8Q, SystemZ::F9Q, 0, 0, SystemZ::F12Q, SystemZ::F13Q, 0, 0}; const unsigned SystemZMC::VR16Regs[32] = { - SystemZ::F0H, SystemZ::F1H, SystemZ::F2H, SystemZ::F3H, - SystemZ::F4H, SystemZ::F5H, SystemZ::F6H, SystemZ::F7H, - SystemZ::F8H, SystemZ::F9H, SystemZ::F10H, SystemZ::F11H, - SystemZ::F12H, SystemZ::F13H, SystemZ::F14H, SystemZ::F15H, - SystemZ::F16H, SystemZ::F17H, SystemZ::F18H, SystemZ::F19H, - SystemZ::F20H, SystemZ::F21H, SystemZ::F22H, SystemZ::F23H, - SystemZ::F24H, SystemZ::F25H, SystemZ::F26H, SystemZ::F27H, - SystemZ::F28H, SystemZ::F29H, SystemZ::F30H, SystemZ::F31H -}; + SystemZ::F0H, SystemZ::F1H, SystemZ::F2H, SystemZ::F3H, SystemZ::F4H, + SystemZ::F5H, SystemZ::F6H, SystemZ::F7H, SystemZ::F8H, SystemZ::F9H, + SystemZ::F10H, SystemZ::F11H, SystemZ::F12H, SystemZ::F13H, SystemZ::F14H, + SystemZ::F15H, SystemZ::F16H, SystemZ::F17H, SystemZ::F18H, SystemZ::F19H, + SystemZ::F20H, SystemZ::F21H, SystemZ::F22H, SystemZ::F23H, SystemZ::F24H, + SystemZ::F25H, SystemZ::F26H, SystemZ::F27H, SystemZ::F28H, SystemZ::F29H, + SystemZ::F30H, SystemZ::F31H}; const unsigned SystemZMC::VR32Regs[32] = { SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, SystemZ::F4S, diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index f679cc05f3c04..c1ffc287235e5 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -136,6 +136,25 @@ static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) { .addImm(0); } +// MI extracts the first element of the source vector. +static MCInst lowerVecEltExtraction(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())) + .addReg(0) + .addImm(0); +} + +// MI inserts value into the first element of the destination vector. +static MCInst lowerVecEltInsertion(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addReg(0) + .addImm(0); +} + // The XPLINK ABI requires that a no-op encoding the call type is emitted after // each call to a subroutine. This information can be used by the called // function to determine its entry point, e.g. for generating a backtrace. The @@ -574,18 +593,19 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) { break; case SystemZ::LFER: - LoweredMI = MCInstBuilder(SystemZ::VLGVF) - .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg())) - .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())) - .addReg(0).addImm(0); + LoweredMI = lowerVecEltExtraction(MI, SystemZ::VLGVF); + break; + + case SystemZ::LFER_16: + LoweredMI = lowerVecEltExtraction(MI, SystemZ::VLGVH); break; case SystemZ::LEFR: - LoweredMI = MCInstBuilder(SystemZ::VLVGF) - .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) - .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) - .addReg(MI->getOperand(1).getReg()) - .addReg(0).addImm(0); + LoweredMI = lowerVecEltInsertion(MI, SystemZ::VLVGF); + break; + + case SystemZ::LEFR_16: + LoweredMI = lowerVecEltInsertion(MI, SystemZ::VLVGH); break; #define LOWER_LOW(NAME) \ diff --git a/llvm/lib/Target/SystemZ/SystemZFeatures.td b/llvm/lib/Target/SystemZ/SystemZFeatures.td index ec1a7beeab213..2c48da8320fb9 100644 --- a/llvm/lib/Target/SystemZ/SystemZFeatures.td +++ b/llvm/lib/Target/SystemZ/SystemZFeatures.td @@ -196,6 +196,8 @@ def FeatureVector : SystemZFeature< >; def FeatureNoVector : SystemZMissingFeature<"Vector">; +def NoVecHwMode : HwMode<"-vector", [FeatureNoVector]>; + def Arch11NewFeatures : SystemZFeatureList<[ FeatureLoadAndZeroRightmostByte, FeatureLoadStoreOnCond2, diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 7b2df9c64aaf0..508ca594f78c8 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -226,23 +226,25 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SMUL_LOHI, VT, Custom); setOperationAction(ISD::UMUL_LOHI, VT, Custom); - // Only z196 and above have native support for conversions to unsigned. - // On z10, promoting to i64 doesn't generate an inexact condition for - // values that are outside the i32 range but in the i64 range, so use - // the default expansion. - if (!Subtarget.hasFPExtension()) - setOperationAction(ISD::FP_TO_UINT, VT, Expand); - - // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all - // default to Expand, so need to be modified to Legal where appropriate. - setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); - if (Subtarget.hasFPExtension()) - setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); - - // And similarly for STRICT_[SU]INT_TO_FP. - setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); - if (Subtarget.hasFPExtension()) - setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); + // The fp<=>int conversions are all Legal except for f16 and unsigned + // on z10 - only z196 and above have native support for conversions to + // unsigned. The Custom handlings for all these nodes only modify f16 + // cases. + for (auto Op : {ISD::FP_TO_SINT, ISD::SINT_TO_FP, ISD::STRICT_FP_TO_SINT, + ISD::STRICT_SINT_TO_FP}) + setOperationAction(Op, VT, Custom); + // On z10, promoting the result to i64 doesn't generate an inexact + // condition for values that are outside the i32 range but in the i64 + // range, so use the default expansion. + for (auto Op : {ISD::FP_TO_UINT, ISD::STRICT_FP_TO_UINT}) + setOperationAction(Op, VT, + Subtarget.hasFPExtension() ? Custom : Expand); + for (auto Op : {ISD::UINT_TO_FP, ISD::STRICT_UINT_TO_FP}) { + // Handle unsigned 32-bit input types as signed 64-bit types on z10. + auto ActionZ10 = VT == MVT::i32 ? Promote : Expand; + setOperationAction(Op, VT, + Subtarget.hasFPExtension() ? Custom : ActionZ10); + } } } @@ -345,15 +347,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Traps are legal, as we will convert them to "j .+2". setOperationAction(ISD::TRAP, MVT::Other, Legal); - // z10 has instructions for signed but not unsigned FP conversion. - // Handle unsigned 32-bit types as signed 64-bit types. - if (!Subtarget.hasFPExtension()) { - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); - } - // We have native support for a 64-bit CTLZ, via FLOGR. setOperationAction(ISD::CTLZ, MVT::i32, Promote); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); @@ -550,18 +543,22 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, } // Handle floating-point types. - // Promote all f16 operations to float, with some exceptions below. - for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) - setOperationAction(Opc, MVT::f16, Promote); - setOperationAction(ISD::ConstantFP, MVT::f16, Expand); - for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { - setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); - setTruncStoreAction(VT, MVT::f16, Expand); - } - for (auto Op : {ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE}) - setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom); - setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall); - setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, LibCall); + if (!useSoftFloat()) { + // Promote all f16 operations to float, with some exceptions below. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, MVT::f16, Promote); + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); + for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); + setTruncStoreAction(VT, MVT::f16, Expand); + } + for (auto Op : {ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE}) + setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom); + setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, LibCall); + setOperationAction(ISD::BITCAST, MVT::i16, Custom); + setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); + } for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; @@ -590,6 +587,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Special treatment. setOperationAction(ISD::IS_FPCLASS, VT, Custom); + setOperationAction(ISD::FCOPYSIGN, VT, Custom); // Handle constrained floating-point operations. setOperationAction(ISD::STRICT_FADD, VT, Legal); @@ -834,9 +832,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Default to having -disable-strictnode-mutation on IsStrictFPEnabled = true; - setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); - setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); - if (Subtarget.isTargetzOS()) { struct RTLibCallMapping { RTLIB::Libcall Code; @@ -1628,7 +1623,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( case 'f': // Floating-point register if (!useSoftFloat()) { - if (VT.getSizeInBits() == 64) + if (VT.getSizeInBits() == 16) + return std::make_pair(0U, &SystemZ::FP16BitRegClass); + else if (VT.getSizeInBits() == 64) return std::make_pair(0U, &SystemZ::FP64BitRegClass); else if (VT.getSizeInBits() == 128) return std::make_pair(0U, &SystemZ::FP128BitRegClass); @@ -1638,6 +1635,8 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( case 'v': // Vector register if (Subtarget.hasVector()) { + if (VT.getSizeInBits() == 16) + return std::make_pair(0U, &SystemZ::VR16BitRegClass); if (VT.getSizeInBits() == 32) return std::make_pair(0U, &SystemZ::VR32BitRegClass); if (VT.getSizeInBits() == 64) @@ -1673,6 +1672,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( if (useSoftFloat()) return std::make_pair( 0u, static_cast(nullptr)); + if (getVTSizeInBits() == 16) + return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass, + SystemZMC::FP16Regs, 16); if (getVTSizeInBits() == 32) return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, SystemZMC::FP32Regs, 16); @@ -1686,6 +1688,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( if (!Subtarget.hasVector()) return std::make_pair( 0u, static_cast(nullptr)); + if (getVTSizeInBits() == 16) + return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass, + SystemZMC::VR16Regs, 32); if (getVTSizeInBits() == 32) return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass, SystemZMC::VR32Regs, 32); @@ -2766,13 +2771,21 @@ static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode) { // Copy all operands except the intrinsic ID. + SDLoc DL(Op); unsigned NumOps = Op.getNumOperands(); SmallVector Ops; Ops.reserve(NumOps - 1); - for (unsigned I = 1; I < NumOps; ++I) - Ops.push_back(Op.getOperand(I)); + for (unsigned I = 1; I < NumOps; ++I) { + SDValue CurrOper = Op.getOperand(I); + if (CurrOper.getValueType() == MVT::f16) { + assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) && + "Unhandled intrinsic with f16 operand."); + CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32); + } + Ops.push_back(CurrOper); + } - SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops); + SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops); return Intr.getNode(); } @@ -3914,6 +3927,14 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDLoc DL(Op); + // SELECT_CC involving f16 will not have the cmp-ops promoted by the + // legalizer, as it will be handled according to the type of the resulting + // value. Extend them here if needed. + if (CmpOp0.getSimpleValueType() == MVT::f16) { + CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32); + CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32); + } + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); // Check for absolute and negative-absolute selections, including those @@ -6786,15 +6807,97 @@ SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); if (In.getSimpleValueType() != MVT::f16) - return Op; // Legal + return Op; // Legal return SDValue(); // Let legalizer emit the libcall. } +SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op, + SelectionDAG &DAG) const { + SDValue In = Op.getOperand(0); + if (In.getSimpleValueType() != MVT::f16) + return Op; // Legal + + // f16: Extend to f32 before the operation. + SDLoc DL(Op); + SDValue InF32 = DAG.getFPExtendOrRound(In, SDLoc(In), MVT::f32); + return DAG.getNode(Op->getOpcode(), DL, Op.getSimpleValueType(), InF32); +} + +SDValue SystemZTargetLowering::lowerSTRICT_FP_TO_INT(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue In = Op.getOperand(1); + + if (In.getSimpleValueType() != MVT::f16) + return Op; // Legal + + // f16: Extend to f32 before the operation. + SDLoc DL(Op); + SDValue InF32 = DAG.getFPExtendOrRound(In, SDLoc(In), MVT::f32); + return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), {Chain, InF32}); +} + +SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + if (Op.getSimpleValueType() != MVT::f16) + return Op; // Legal + + // f16: first do the operation to f32 and then round to f16. + SDLoc DL(Op); + SDValue F32Conv = + DAG.getNode(Op->getOpcode(), DL, MVT::f32, Op->getOperand(0)); + return DAG.getFPExtendOrRound(F32Conv, DL, MVT::f16); +} + +SDValue SystemZTargetLowering::lowerSTRICT_INT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + if (Op.getSimpleValueType() != MVT::f16) + return Op; // Legal + + // f16: first do the operation to f32 and then round to f16. + SDLoc DL(Op); + SDValue F32Conv = + DAG.getNode(Op->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other), + {Op->getOperand(0), Op->getOperand(1)}); + SDValue F16Res = DAG.getFPExtendOrRound(F32Conv, DL, MVT::f16); + return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), F16Res, + F32Conv.getValue(1)); +} + +// Shift the lower 2 bytes of Op to the left in order to insert into the +// upper 2 bytes of the FP register. +static SDValue convertToF16(SDValue Op, SelectionDAG &DAG) { + assert(Op.getSimpleValueType() == MVT::i32 && + "Expexted to convert i32 to f16."); + SDLoc DL(Op); + SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i32, Op, + DAG.getConstant(16, DL, MVT::i32)); + SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Shft); + SDValue F16Val = + DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast); + return F16Val; +} + +// Extract Op into GPR and shift the 2 f16 bytes to the right. +static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG) { + assert(Op.getSimpleValueType() == MVT::f16 && + "Expected to convert f16 to i32."); + SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f32); + SDValue In32 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f32, + SDValue(U32, 0), Op); + SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, In32); + SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i32, BCast, + DAG.getConstant(16, DL, MVT::i32)); + return Shft; +} + +// Lower an f16 LOAD in case of no vector support. SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op, SelectionDAG &DAG) const { MVT RegVT = Op.getSimpleValueType(); assert(RegVT == MVT::f16 && "Expected to lower an f16 load."); + // Load as integer. SDLoc DL(Op); SDValue NewLd; if (auto *AtomicLd = dyn_cast(Op.getNode())) { @@ -6806,43 +6909,29 @@ SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op, } else { LoadSDNode *Ld = cast(Op.getNode()); assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load"); - NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Ld->getChain(), - Ld->getBasePtr(), Ld->getPointerInfo(), - MVT::i16, Ld->getOriginalAlign(), - Ld->getMemOperand()->getFlags()); - } - // Load as integer, shift and then insert into upper 2 bytes of the FP - // register. - SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i32, NewLd, - DAG.getConstant(16, DL, MVT::i32)); - SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Shft); - SDValue F16Val = DAG.getTargetExtractSubreg(SystemZ::subreg_h16, - DL, MVT::f16, BCast); + NewLd = + DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Ld->getChain(), + Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16, + Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags()); + } + SDValue F16Val = convertToF16(NewLd, DAG); return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL); } +// Lower an f16 STORE in case of no vector support. SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op, SelectionDAG &DAG) const { - SDValue StoredVal = Op->getOperand(1); - MVT StoreVT = StoredVal.getSimpleValueType(); - assert(StoreVT == MVT::f16 && "Expected to lower an f16 store."); - - // Move into a GPR, shift and store the 2 bytes. SDLoc DL(Op); - SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f32); - SDValue In32 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, - MVT::f32, SDValue(U32, 0), StoredVal); - SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, In32); - SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i32, BCast, - DAG.getConstant(16, DL, MVT::i32)); + SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG); if (auto *AtomicSt = dyn_cast(Op.getNode())) return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(), - Shft, AtomicSt->getBasePtr(), AtomicSt->getMemOperand()); + Shft, AtomicSt->getBasePtr(), + AtomicSt->getMemOperand()); StoreSDNode *St = cast(Op.getNode()); - return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), - MVT::i16, St->getMemOperand()); + return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16, + St->getMemOperand()); } SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, @@ -6875,10 +6964,27 @@ SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, TDCMask |= SystemZ::TDCMASK_ZERO_MINUS; SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64); + if (Arg.getSimpleValueType() == MVT::f16) + Arg = DAG.getFPExtendOrRound(Arg, SDLoc(Arg), MVT::f32); SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV); return getCCResult(DAG, Intr); } +SDValue SystemZTargetLowering::lowerFCOPYSIGN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue SignArg = Op.getOperand(1); + if (SignArg.getSimpleValueType() != MVT::f16) + return Op; // Legal + + // f16: Extend SignArg f32. The DAGCombiner removes the fpext without + // asking, but it is needed as there is no target instruction handling f16. + SDValue SignArgF32 = + DAG.getFPExtendOrRound(SignArg, SDLoc(SignArg), MVT::f32); + return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), + {Op.getOperand(0), SignArgF32}); +} + SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -7034,12 +7140,26 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: return lowerFP_EXTEND(Op, DAG); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return lower_FP_TO_INT(Op, DAG); + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + return lowerSTRICT_FP_TO_INT(Op, DAG); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return lower_INT_TO_FP(Op, DAG); + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + return lowerSTRICT_INT_TO_FP(Op, DAG); case ISD::LOAD: return lowerLoadF16(Op, DAG); case ISD::STORE: return lowerStoreF16(Op, DAG); case ISD::IS_FPCLASS: return lowerIS_FPCLASS(Op, DAG); + case ISD::FCOPYSIGN: + return lowerFCOPYSIGN(Op, DAG); case ISD::GET_ROUNDING: return lowerGET_ROUNDING(Op, DAG); case ISD::READCYCLECOUNTER: @@ -7101,8 +7221,7 @@ static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi); } -// Lower operations with invalid operand or result types (currently used -// only for 128-bit integer types). +// Lower operations with invalid operand or result types. void SystemZTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, @@ -7162,11 +7281,29 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N, break; } case ISD::BITCAST: { + if (useSoftFloat()) + return; + SDLoc DL(N); SDValue Src = N->getOperand(0); - if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 && - !useSoftFloat()) { - SDLoc DL(N); + EVT SrcVT = Src.getValueType(); + EVT ResVT = N->getValueType(0); + if (ResVT == MVT::i128 && SrcVT == MVT::f128) Results.push_back(expandBitCastF128ToI128(DAG, Src, DL)); + else if (SrcVT == MVT::i16 && ResVT == MVT::f16) { + SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src); + SDValue Res = + Subtarget.hasVector() + ? SDValue( + DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0) + : convertToF16(In32, DAG); + Results.push_back(Res); + } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) { + SDValue ExtractedI32 = + Subtarget.hasVector() + ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src), + 0) + : convertFromF16(Src, DL, DAG); + Results.push_back(DAG.getZExtOrTrunc(ExtractedI32, DL, ResVT)); } break; } diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 241acdea77c5c..f7240dae8c4ab 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -746,10 +746,15 @@ class SystemZTargetLowering : public TargetLowering { SDValue lowerFSHL(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFSHR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue lower_FP_TO_INT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTRICT_FP_TO_INT(SDValue Op, SelectionDAG &DAG) const; + SDValue lower_INT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTRICT_INT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerLoadF16(SDValue Op, SelectionDAG &DAG) const; SDValue lowerStoreF16(SDValue Op, SelectionDAG &DAG) const; SDValue lowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index b258ab49cc1e2..712d0a73272b1 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -50,9 +50,9 @@ let isMoveReg = 1 in { def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>; // For z13 we prefer LDR over LER to avoid partial register dependencies. let isCodeGenOnly = 1 in { - def LER16 : UnaryRR <"ler", 0x38, null_frag, FP16, FP16>; - def LDR16 : UnaryRR<"ldr", 0x28, null_frag, FP16, FP16>; - def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>; + def LER16 : UnaryRR<"ler", 0x38, null_frag, FP16, FP16>; + def LDR16 : UnaryRR<"ldr", 0x28, null_frag, FP16, FP16>; + def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>; } } @@ -136,6 +136,9 @@ defm LoadStoreF128 : MVCLoadStore; //===----------------------------------------------------------------------===// let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in { + let isCodeGenOnly = 1 in + // Reload f16 from 4-byte spill slot. + defm LE16 : UnaryRXPair<"le", 0x78, 0xED64, z_load, FP16, 4>; defm LE : UnaryRXPair<"le", 0x78, 0xED64, z_load, FP32, 4>; defm LD : UnaryRXPair<"ld", 0x68, 0xED65, z_load, FP64, 8>; @@ -156,6 +159,9 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in { //===----------------------------------------------------------------------===// let SimpleBDXStore = 1, mayStore = 1 in { + let isCodeGenOnly = 1 in + // Spill f16 to 4-byte spill slot. + defm STE16 : StoreRXPair<"ste", 0x70, 0xED66, store, FP16, 4>; defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32, 4>; defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64, 8>; @@ -241,7 +247,7 @@ let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureFPExtension] in def CXGBRA : TernaryRRFe<"cxgbra", 0xB3A6, FP128, GR64>; } -// Convert am unsigned integer register value to a floating-point one. +// Convert an unsigned integer register value to a floating-point one. let Predicates = [FeatureFPExtension] in { let Uses = [FPC], mayRaiseFPException = 1 in { def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index faeec2ada9bf1..1ae3994eb0e01 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -61,7 +61,8 @@ void SystemZInstrInfo::anchor() {} SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti) : SystemZGenInstrInfo(-1, -1), - RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister()), + RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister(), + sti.getHwMode()), STI(sti) {} // MI is a 128-bit load or store. Split it into two 64-bit loads or stores, @@ -1024,31 +1025,8 @@ void SystemZInstrInfo::storeRegToStackSlot( bool isKill, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - // Without vector support, there are no fp16 load/store instructions, so - // need to save/restore via GPR. - if (RC == &SystemZ::FP16BitRegClass && !STI.hasVector()) { - assert(!MRI.isSSA() && MRI.getNumVirtRegs() && - "Expected non-SSA form with virtual registers."); - Register GR64Reg = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); - Register FP64Reg = MRI.createVirtualRegister(&SystemZ::FP64BitRegClass); - BuildMI(MBB, MBBI, DL, get(SystemZ::COPY)) - .addReg(FP64Reg, RegState::DefineNoRead, SystemZ::subreg_h16) - .addReg(SrcReg, getKillRegState(isKill)); - BuildMI(MBB, MBBI, DL, get(SystemZ::LGDR), GR64Reg) - .addReg(FP64Reg, RegState::Kill); - BuildMI(MBB, MBBI, DL, get(SystemZ::SRLG), GR64Reg) - .addReg(GR64Reg) - .addReg(0) - .addImm(48); - addFrameReference(BuildMI(MBB, MBBI, DL, get(SystemZ::STH)) - .addReg(GR64Reg, RegState::Kill, SystemZ::subreg_l32), - FrameIdx); - return; - } - // Callers may expect a single instruction, so keep 128-bit moves // together for now and lower them after register allocation. unsigned LoadOpcode, StoreOpcode; @@ -1062,31 +1040,8 @@ void SystemZInstrInfo::loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - // Without vector support, there are no fp16 load/store instructions, so - // need to save/restore via GPR. - if (RC == &SystemZ::FP16BitRegClass && !STI.hasVector()) { - assert(!MRI.isSSA() && MRI.getNumVirtRegs() && - "Expected non-SSA form with virtual registers."); - Register GR64Reg = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); - Register FP64Reg = MRI.createVirtualRegister(&SystemZ::FP64BitRegClass); - addFrameReference(BuildMI(MBB, MBBI, DL, get(SystemZ::LH)) - .addReg(GR64Reg, RegState::DefineNoRead, - SystemZ::subreg_l32), - FrameIdx); - BuildMI(MBB, MBBI, DL, get(SystemZ::SLLG), GR64Reg) - .addReg(GR64Reg) - .addReg(0) - .addImm(48); - BuildMI(MBB, MBBI, DL, get(SystemZ::LDGR), FP64Reg) - .addReg(GR64Reg, RegState::Kill); - BuildMI(MBB, MBBI, DL, get(SystemZ::COPY), DestReg) - .addReg(FP64Reg, RegState::Kill, SystemZ::subreg_h16); - return; - } - // Callers may expect a single instruction, so keep 128-bit moves // together for now and lower them after register allocation. unsigned LoadOpcode, StoreOpcode; @@ -1307,9 +1262,10 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( return nullptr; unsigned OpNum = Ops[0]; - assert(Size * 8 == - TRI->getRegSizeInBits(*MF.getRegInfo() - .getRegClass(MI.getOperand(OpNum).getReg())) && + const TargetRegisterClass *RC = + MF.getRegInfo().getRegClass(MI.getOperand(OpNum).getReg()); + assert((Size * 8 == TRI->getRegSizeInBits(*RC) || + (RC == &SystemZ::FP16BitRegClass && Size == 4 && !STI.hasVector())) && "Invalid size combination"); if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && OpNum == 0 && @@ -1948,6 +1904,9 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, RC == &SystemZ::ADDR128BitRegClass) { LoadOpcode = SystemZ::L128; StoreOpcode = SystemZ::ST128; + } else if (RC == &SystemZ::FP16BitRegClass && !STI.hasVector()) { + LoadOpcode = SystemZ::LE16; + StoreOpcode = SystemZ::STE16; } else if (RC == &SystemZ::FP32BitRegClass) { LoadOpcode = SystemZ::LE; StoreOpcode = SystemZ::STE; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 7a240e7269516..9271d129f8504 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -2210,6 +2210,8 @@ let Predicates = [FeatureVector] in { def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>; def : Pat<(i32 (bitconvert (f32 VR32:$src))), (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>; + def LEFR_16 : UnaryAliasVRS; + def LFER_16 : UnaryAliasVRS; } // Floating-point values are stored in element 0 of the corresponding diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 1e0c043682157..177f2c50e4f01 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -232,8 +232,9 @@ SystemZELFRegisters::getCallPreservedMask(const MachineFunction &MF, return CSR_SystemZ_ELF_RegMask; } -SystemZRegisterInfo::SystemZRegisterInfo(unsigned int RA) - : SystemZGenRegisterInfo(RA) {} +SystemZRegisterInfo::SystemZRegisterInfo(unsigned int RA, unsigned int HwMode) + : SystemZGenRegisterInfo(RA, /*DwarfFlavour=*/0, /*EHFlavour=*/0, /*PC=*/0, + HwMode) {} const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 4f497f8d23d29..460be432811a4 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -129,7 +129,7 @@ class SystemZELFRegisters : public SystemZCallingConventionRegisters { struct SystemZRegisterInfo : public SystemZGenRegisterInfo { public: - SystemZRegisterInfo(unsigned int RA); + SystemZRegisterInfo(unsigned int RA, unsigned int HwMode); /// getPointerRegClass - Return the register class to use to hold pointers. /// This is currently only used by LOAD_STACK_GUARD, which requires a non-%r0 diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td index 1dfe264b501b1..e79f12b449a88 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -35,7 +35,9 @@ def subreg_ll32 : ComposedSubRegIndex; // If the user provides an alternate order list of regs, it will be used for // XPLINK. Otherwise, by default, XPLINK will use the regList ordering as well multiclass SystemZRegClass types, int size, - dag regList, list altRegList = [regList], bit allocatable = 1> { + dag regList, list altRegList = [regList], + bit allocatable = 1, + RegInfoByHwMode RI = RegInfoByHwMode<[], []>> { def AsmOperand : AsmOperandClass { let Name = name; let ParserMethod = "parse"#name; @@ -49,6 +51,7 @@ multiclass SystemZRegClass types, int size, const SystemZSubtarget &S = MF.getSubtarget(); return S.isTargetXPLINK64(); }]; + let RegInfos = RI; } def "" : RegisterOperand(name#"Bit")> { let ParserMatchClass = !cast(name#"AsmOperand"); @@ -250,7 +253,11 @@ foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { // There's no store-multiple instruction for FPRs, so we're not fussy // about the order in which call-saved registers are allocated. -defm FP16 : SystemZRegClass<"FP16", [f16], 16, (sequence "F%uH", 0, 15)>; +// Adjust the spill size of f16 to 32 bits in case of no vector support. +def FP16RI : RegInfoByHwMode<[DefaultMode, NoVecHwMode], + [RegInfo<16,16,16>, RegInfo<16,32,32>]>; +defm FP16 : SystemZRegClass<"FP16", [f16], 16, (sequence "F%uH", 0, 15), + [(sequence "F%uH", 0, 15)], 1, FP16RI>; defm FP32 : SystemZRegClass<"FP32", [f32], 32, (sequence "F%uS", 0, 15)>; defm FP64 : SystemZRegClass<"FP64", [f64], 64, (sequence "F%uD", 0, 15)>; defm FP128 : SystemZRegClass<"FP128", [f128], 128, diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td index c059f3ececef6..7213c31c4d522 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -793,7 +793,7 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; // FP: Load instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "L(E16|E)(Y)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; @@ -801,7 +801,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; // FP: Store instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E16|E|D)(Y)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; //===----------------------------------------------------------------------===// @@ -1376,8 +1376,8 @@ def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>; // Vector: Floating-point insertion and extraction //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; -def : InstRW<[WLat4, FXb, NormalGr], (instregex "LFER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR(_16)?$")>; +def : InstRW<[WLat4, FXb, NormalGr], (instregex "LFER(_16)?$")>; //===----------------------------------------------------------------------===// // Vector: String instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td index bd3d6678fc6a8..e3d2c2e9373d6 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -813,7 +813,7 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; // FP: Load instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "L(E16|E)(Y)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; @@ -821,7 +821,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; // FP: Store instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E16|E|D)(Y)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; //===----------------------------------------------------------------------===// @@ -1448,8 +1448,8 @@ def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; // Vector: Floating-point insertion and extraction //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; -def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR(_16)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER(_16)?$")>; //===----------------------------------------------------------------------===// // Vector: String instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td index 4d8e1d9f78dc7..f13988133ac24 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td @@ -831,7 +831,7 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; // FP: Load instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "L(E16|E)(Y)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; @@ -839,7 +839,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; // FP: Store instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E16|E|D)(Y)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; //===----------------------------------------------------------------------===// @@ -1491,8 +1491,8 @@ def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; // Vector: Floating-point insertion and extraction //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; -def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR(_16)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER(_16)?$")>; //===----------------------------------------------------------------------===// // Vector: String instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td index 7791472efbcfb..739eaf340ef69 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td @@ -832,7 +832,7 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; // FP: Load instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "L(E16|E)(Y)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; @@ -840,7 +840,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; // FP: Store instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E16|E|D)(Y)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; //===----------------------------------------------------------------------===// @@ -1499,8 +1499,8 @@ def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; // Vector: Floating-point insertion and extraction //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; -def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR(_16)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER(_16)?$")>; //===----------------------------------------------------------------------===// // Vector: String instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index e67c207833f4d..a898151217aa5 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -725,14 +725,14 @@ def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>; // FP: Load instructions //===----------------------------------------------------------------------===// -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(E|D)(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(E16|E|D)(Y|E32)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; //===----------------------------------------------------------------------===// // FP: Store instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "ST(E16|E|D)(Y)?$")>; def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STX$")>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index 465cf8ae392c6..00237c2407be8 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -763,14 +763,14 @@ def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>; // FP: Load instructions //===----------------------------------------------------------------------===// -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(E|D)(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(E16|E|D)(Y|E32)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; //===----------------------------------------------------------------------===// // FP: Store instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "ST(E16|E|D)(Y)?$")>; def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "STX$")>; //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/SystemZ/asm-10.ll b/llvm/test/CodeGen/SystemZ/asm-10.ll index b71db8350781d..8226b8a1a2d25 100644 --- a/llvm/test/CodeGen/SystemZ/asm-10.ll +++ b/llvm/test/CodeGen/SystemZ/asm-10.ll @@ -2,6 +2,15 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s +define half @f0() { +; CHECK-LABEL: f0: +; CHECK: lzer %f1 +; CHECK: blah %f0 %f1 +; CHECK: br %r14 + %val = call half asm "blah $0 $1", "=&f,f" (half 0.0) + ret half %val +} + define float @f1() { ; CHECK-LABEL: f1: ; CHECK: lzer %f1 diff --git a/llvm/test/CodeGen/SystemZ/asm-17.ll b/llvm/test/CodeGen/SystemZ/asm-17.ll index c9c4d73c66ebb..dad75d4d012d1 100644 --- a/llvm/test/CodeGen/SystemZ/asm-17.ll +++ b/llvm/test/CodeGen/SystemZ/asm-17.ll @@ -25,6 +25,17 @@ define i64 @f2() { ret i64 %ret } +; Test 16-bit FPRs. +define half @f3_half() { +; CHECK-LABEL: f3_half: +; CHECK: lzer %f4 +; CHECK: blah %f4 +; CHECK: ler %f0, %f4 +; CHECK: br %r14 + %ret = call half asm "blah $0", "={f4},0" (half 0.0) + ret half %ret +} + ; Test i32 FPRs. define float @f3() { ; CHECK-LABEL: f3: diff --git a/llvm/test/CodeGen/SystemZ/asm-19.ll b/llvm/test/CodeGen/SystemZ/asm-19.ll index e16fdfa13fce6..6c77fb55071ca 100644 --- a/llvm/test/CodeGen/SystemZ/asm-19.ll +++ b/llvm/test/CodeGen/SystemZ/asm-19.ll @@ -3,6 +3,15 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -no-integrated-as | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -no-integrated-as | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z14 +define half @f0() { +; CHECK-LABEL: f0: +; CHECK: lzer %f1 +; CHECK: blah %f0 %f1 +; CHECK: br %r14 + %val = call half asm "blah $0 $1", "=&v,v" (half 0.0) + ret half %val +} + define float @f1() { ; CHECK-LABEL: f1: ; CHECK: lzer %f1 @@ -86,6 +95,16 @@ define <4 x float> @f9() { ret <4 x float> %val } +define half @f10_half() { +; CHECK-LABEL: f10_half: +; CHECK: lzer %f4 +; CHECK: blah %f4 +; CHECK: ldr %f0, %f4 +; CHECK: br %r14 + %ret = call half asm "blah $0", "={v4},0" (half 0.0) + ret half %ret +} + define float @f10() { ; CHECK-LABEL: f10: ; CHECK: lzer %f4 diff --git a/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll b/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll index 1447c576f48ae..a982f9af52358 100644 --- a/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll +++ b/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll @@ -1,6 +1,43 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=s390x-linux < %s | FileCheck %s -check-prefix=SOFT-FLOAT +define half @fmuladd_intrinsic_f16(half %a, half %b, half %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_intrinsic_f16: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r12, %r15, 96(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r12, -64 +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -160 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 320 +; SOFT-FLOAT-NEXT: # kill: def $r4l killed $r4l def $r4d +; SOFT-FLOAT-NEXT: llghr %r0, %r4 +; SOFT-FLOAT-NEXT: lr %r13, %r3 +; SOFT-FLOAT-NEXT: lr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r0 +; SOFT-FLOAT-NEXT: brasl %r14, __extendhfsf2@PLT +; SOFT-FLOAT-NEXT: llghr %r0, %r12 +; SOFT-FLOAT-NEXT: lgr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r0 +; SOFT-FLOAT-NEXT: brasl %r14, __extendhfsf2@PLT +; SOFT-FLOAT-NEXT: llghr %r0, %r13 +; SOFT-FLOAT-NEXT: lgr %r13, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r0 +; SOFT-FLOAT-NEXT: brasl %r14, __extendhfsf2@PLT +; SOFT-FLOAT-NEXT: lgr %r3, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r13 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: lgr %r3, %r12 +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: brasl %r14, __truncsfhf2@PLT +; SOFT-FLOAT-NEXT: # kill: def $r2l killed $r2l killed $r2d +; SOFT-FLOAT-NEXT: lmg %r12, %r15, 256(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %result = call half @llvm.fmuladd.f16(half %a, half %b, half %c) + ret half %result +} + define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 { ; SOFT-FLOAT-LABEL: fmuladd_intrinsic_f32: ; SOFT-FLOAT: # %bb.0: diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll index bf0870a86702c..2c8aebc5315b2 100644 --- a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll @@ -3,6 +3,17 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +; Test f16. +declare half @llvm.fabs.f16(half %f) +define half @f0(half %f) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: lpdfr %f0, %f0 +; CHECK: br %r14 + %res = call half @llvm.fabs.f16(half %f) + ret half %res +} + ; Test f32. declare float @llvm.fabs.f32(float %f) define float @f1(float %f) { diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll index 72786ea203df4..dc55374294896 100644 --- a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll @@ -2,6 +2,18 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; Test f16. +declare half @llvm.fabs.f16(half %f) +define half @f0(half %f) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: lpdfr %f0, %f0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.fabs.f16(half %f) + ret half %res +} + ; Test f32. declare float @llvm.fabs.f32(float %f) define float @f1(float %f) { diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-04.ll b/llvm/test/CodeGen/SystemZ/fp-abs-04.ll index b02abc8443491..afaf3f6d22ac8 100644 --- a/llvm/test/CodeGen/SystemZ/fp-abs-04.ll +++ b/llvm/test/CodeGen/SystemZ/fp-abs-04.ll @@ -2,6 +2,22 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; Test f16. +declare half @llvm.fabs.f16(half %f) +define half @f0(half %f) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: lpdfr %f0, %f0 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: lcdfr %f0, %f0 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %abs = call half @llvm.fabs.f16(half %f) + %res = fneg half %abs + ret half %res +} + ; Test f32. declare float @llvm.fabs.f32(float %f) define float @f1(float %f) { diff --git a/llvm/test/CodeGen/SystemZ/fp-add-01.ll b/llvm/test/CodeGen/SystemZ/fp-add-01.ll index eb845bae9b804..b65744c4aac0c 100644 --- a/llvm/test/CodeGen/SystemZ/fp-add-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-add-01.ll @@ -6,6 +6,18 @@ declare float @foo() +; Check register addition. +define half @f0(half %f1, half %f2) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: aebr %f0, %f9 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = fadd half %f1, %f2 + ret half %res +} + ; Check register addition. define float @f1(float %f1, float %f2) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll index c1773abe92305..d3d641357ae58 100644 --- a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll +++ b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll @@ -227,6 +227,38 @@ exit: ret float %add } +define half @f12_half(half %dummy, half %val, ptr %dest) { +; CHECK-LABEL: f12_half: +; CHECK: ler %f8, %f2 +; CHECK-NEXT: ler %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: jl .LBB11_2 +; CHECK-NEXT:# %bb.1: +; CHECK-NEXT: lgdr %r0, %f8 +; CHECK-NEXT: srlg %r0, %r0, 48 +; CHECK-NEXT: sth %r0, 0(%r13) +; CHECK-NEXT:.LBB11_2: +; CHECK-NEXT: ler %f0, %f8 +; CHECK-NEXT: ld %f8, 160(%r15) +; CHECK-NEXT: lmg %r13, %r15, 272(%r15) +; CHECK-NEXT: br %r14 +entry: + call void asm sideeffect "blah $0", "{f0}"(half %val) + %cmp = fcmp olt half %val, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store half %val, ptr %dest + br label %exit + +exit: + ret half %val +} + ; %val in %f2 must be preserved during comparison and also copied to %f0. define float @f12(float %dummy, float %val, ptr %dest) { ; CHECK-LABEL: f12: @@ -304,6 +336,38 @@ exit: ret void } +define half @f15_half(half %val, half %dummy, ptr %dest) { +; CHECK-LABEL: f15_half: +; CHECK: ler %f8, %f0 +; CHECK-NEXT: ler %f2, %f0 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %f2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: jl .LBB15_2 +; CHECK-NEXT:# %bb.1: +; CHECK-NEXT: lgdr %r0, %f8 +; CHECK-NEXT: srlg %r0, %r0, 48 +; CHECK-NEXT: sth %r0, 0(%r13) +; CHECK-NEXT:.LBB15_2: +; CHECK-NEXT: ler %f0, %f8 +; CHECK-NEXT: ld %f8, 160(%r15) +; CHECK-NEXT: lmg %r13, %r15, 272(%r15) +; CHECK-NEXT: br %r14 +entry: + call void asm sideeffect "blah $0", "{f2}"(half %val) + %cmp = fcmp olt half %val, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store half %val, ptr %dest + br label %exit + +exit: + ret half %val +} + define float @f15(float %val, float %dummy, ptr %dest) { ; CHECK-LABEL: f15: ; CHECK: ltebr %f1, %f0 @@ -374,7 +438,7 @@ define float @f18(float %dummy, float %a, ptr %dest) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lnebr %f0, %f2 ; CHECK-NEXT: blr %r14 -; CHECK-NEXT: .LBB17_1: # %store +; CHECK-NEXT: .LBB19_1: # %store ; CHECK-NEXT: ste %f0, 0(%r2) ; CHECK-NEXT: br %r14 entry: @@ -397,7 +461,7 @@ define float @f19(float %dummy, float %a, ptr %dest) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lcebr %f0, %f2 ; CHECK-NEXT: bler %r14 -; CHECK-NEXT: .LBB18_1: # %store +; CHECK-NEXT: .LBB20_1: # %store ; CHECK-NEXT: ste %f0, 0(%r2) ; CHECK-NEXT: br %r14 entry: diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-05.ll b/llvm/test/CodeGen/SystemZ/fp-conv-05.ll index 4596649d5659c..fef2b842c54aa 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-05.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-05.ll @@ -2,6 +2,16 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; Check i32->f16. +define half @f0(i32 %i) { +; CHECK-LABEL: f0: +; CHECK: cefbr %f0, %r2 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %conv = sitofp i32 %i to half + ret half %conv +} + ; Check i32->f32. define float @f1(i32 %i) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-06.ll b/llvm/test/CodeGen/SystemZ/fp-conv-06.ll index e754a7e161f8f..deb22ee4d19b4 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-06.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-06.ll @@ -2,6 +2,18 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; Check i32->f16. There is no native instruction, so we must promote +; to i64 first. +define half @f0(i32 %i) { +; CHECK-LABEL: f0: +; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2 +; CHECK: cegbr %f0, [[REGISTER]] +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %conv = uitofp i32 %i to half + ret half %conv +} + ; Check i32->f32. There is no native instruction, so we must promote ; to i64 first. define float @f1(i32 %i) { diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-07.ll b/llvm/test/CodeGen/SystemZ/fp-conv-07.ll index 2941e77441461..02f47e481cc6a 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-07.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-07.ll @@ -2,6 +2,16 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; Test i64->f16. +define half @f0(i64 %i) { +; CHECK-LABEL: f0: +; CHECK: cegbr %f0, %r2 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %conv = sitofp i64 %i to half + ret half %conv +} + ; Test i64->f32. define float @f1(i64 %i) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-08.ll b/llvm/test/CodeGen/SystemZ/fp-conv-08.ll index e2a5f74185216..b91da08c835d6 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-08.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-08.ll @@ -2,6 +2,15 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; Test i64->f16. For z10, this results in just a single a libcall. +define half @f0(i64 %i) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __floatundihf@PLT +; CHECK: br %r14 + %conv = uitofp i64 %i to half + ret half %conv +} + ; Test i64->f32. There's no native support for unsigned i64-to-fp conversions, ; but we should be able to implement them using signed i64-to-fp conversions. define float @f1(i64 %i) { diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-09.ll b/llvm/test/CodeGen/SystemZ/fp-conv-09.ll index 0e730c3705030..423bbf285e9e1 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-09.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-09.ll @@ -2,6 +2,16 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; Test f316->i32. +define i32 @f0(half %f) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cfebr %r2, 5, %f0 +; CHECK: br %r14 + %conv = fptosi half %f to i32 + ret i32 %conv +} + ; Test f32->i32. define i32 @f1(float %f) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-10.ll b/llvm/test/CodeGen/SystemZ/fp-conv-10.ll index 82913265853a5..a0455a2cea2d6 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-10.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-10.ll @@ -8,18 +8,36 @@ ; Promoting to i64 doesn't generate an inexact condition for values that are ; outside the i32 range but in the i64 range, so use the default expansion. +; Test f16->i32. Converted to signed as the max float value is smaller than +; the signed integer range. +define i32 @f0(half %f) { +; CHECK-LABEL: f0: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cfebr %r2, 5, %f0 +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %conv = fptoui half %f to i32 + ret i32 %conv +} + ; Test f32->i32. define i32 @f1(float %f) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: larl %r1, .LCPI0_0 +; CHECK-NEXT: larl %r1, .LCPI1_0 ; CHECK-NEXT: le %f1, 0(%r1) ; CHECK-NEXT: cebr %f0, %f1 -; CHECK-NEXT: jnl .LBB0_2 +; CHECK-NEXT: jnl .LBB1_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: cfebr %r2, 5, %f0 ; CHECK-NEXT: br %r14 -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: sebr %f0, %f1 ; CHECK-NEXT: cfebr %r2, 5, %f0 ; CHECK-NEXT: xilf %r2, 2147483648 @@ -32,14 +50,14 @@ define i32 @f1(float %f) { define i32 @f2(double %f) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: larl %r1, .LCPI1_0 +; CHECK-NEXT: larl %r1, .LCPI2_0 ; CHECK-NEXT: ld %f1, 0(%r1) ; CHECK-NEXT: cdbr %f0, %f1 -; CHECK-NEXT: jnl .LBB1_2 +; CHECK-NEXT: jnl .LBB2_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: cfdbr %r2, 5, %f0 ; CHECK-NEXT: br %r14 -; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: sdbr %f0, %f1 ; CHECK-NEXT: cfdbr %r2, 5, %f0 ; CHECK-NEXT: xilf %r2, 2147483648 @@ -54,14 +72,14 @@ define i32 @f3(ptr %src) { ; CHECK: # %bb.0: ; CHECK-NEXT: ld %f0, 0(%r2) ; CHECK-NEXT: ld %f2, 8(%r2) -; CHECK-NEXT: larl %r1, .LCPI2_0 +; CHECK-NEXT: larl %r1, .LCPI3_0 ; CHECK-NEXT: lxeb %f1, 0(%r1) ; CHECK-NEXT: cxbr %f0, %f1 -; CHECK-NEXT: jnl .LBB2_2 +; CHECK-NEXT: jnl .LBB3_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: cfxbr %r2, 5, %f0 ; CHECK-NEXT: br %r14 -; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: .LBB3_2: ; CHECK-NEXT: sxbr %f0, %f1 ; CHECK-NEXT: cfxbr %r2, 5, %f0 ; CHECK-NEXT: xilf %r2, 2147483648 diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-11.ll b/llvm/test/CodeGen/SystemZ/fp-conv-11.ll index 2dd543b5810bf..55a2f8a51a526 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-11.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-11.ll @@ -2,6 +2,16 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; Test f16->i64. +define i64 @f0(half %f) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cgebr %r2, 5, %f0 +; CHECK: br %r14 + %conv = fptosi half %f to i64 + ret i64 %conv +} + ; Test f32->i64. define i64 @f1(float %f) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-12.ll b/llvm/test/CodeGen/SystemZ/fp-conv-12.ll index 27afbf4d398a0..bb83a677210ac 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-12.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-12.ll @@ -7,18 +7,36 @@ ; they were added in z196 as the Convert to Logical family of instructions. ; Convert via signed i64s instead. +; Test f16->i64. Converted to signed as the max float value is smaller than +; the signed integer range. +define i64 @f0(half %f) { +; CHECK-LABEL: f0: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cgebr %r2, 5, %f0 +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %conv = fptoui half %f to i64 + ret i64 %conv +} + ; Test f32->i64. define i64 @f1(float %f) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: larl %r1, .LCPI0_0 +; CHECK-NEXT: larl %r1, .LCPI1_0 ; CHECK-NEXT: le %f1, 0(%r1) ; CHECK-NEXT: cebr %f0, %f1 -; CHECK-NEXT: jnl .LBB0_2 +; CHECK-NEXT: jnl .LBB1_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: cgebr %r2, 5, %f0 ; CHECK-NEXT: br %r14 -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: sebr %f0, %f1 ; CHECK-NEXT: cgebr %r2, 5, %f0 ; CHECK-NEXT: xihf %r2, 2147483648 @@ -31,14 +49,14 @@ define i64 @f1(float %f) { define i64 @f2(double %f) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: larl %r1, .LCPI1_0 +; CHECK-NEXT: larl %r1, .LCPI2_0 ; CHECK-NEXT: ld %f1, 0(%r1) ; CHECK-NEXT: cdbr %f0, %f1 -; CHECK-NEXT: jnl .LBB1_2 +; CHECK-NEXT: jnl .LBB2_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: cgdbr %r2, 5, %f0 ; CHECK-NEXT: br %r14 -; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: sdbr %f0, %f1 ; CHECK-NEXT: cgdbr %r2, 5, %f0 ; CHECK-NEXT: xihf %r2, 2147483648 @@ -53,14 +71,14 @@ define i64 @f3(ptr %src) { ; CHECK: # %bb.0: ; CHECK-NEXT: ld %f0, 0(%r2) ; CHECK-NEXT: ld %f2, 8(%r2) -; CHECK-NEXT: larl %r1, .LCPI2_0 +; CHECK-NEXT: larl %r1, .LCPI3_0 ; CHECK-NEXT: lxeb %f1, 0(%r1) ; CHECK-NEXT: cxbr %f0, %f1 -; CHECK-NEXT: jnl .LBB2_2 +; CHECK-NEXT: jnl .LBB3_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: cgxbr %r2, 5, %f0 ; CHECK-NEXT: br %r14 -; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: .LBB3_2: ; CHECK-NEXT: sxbr %f0, %f1 ; CHECK-NEXT: cgxbr %r2, 5, %f0 ; CHECK-NEXT: xihf %r2, 2147483648 diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-13.ll b/llvm/test/CodeGen/SystemZ/fp-conv-13.ll index 6e6c96bea2b35..4869d070b6beb 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-13.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-13.ll @@ -3,6 +3,16 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s +; Check i32->f16. +define half @f0(i32 %i) { +; CHECK-LABEL: f0: +; CHECK: celfbr %f0, 0, %r2, 0 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %conv = uitofp i32 %i to half + ret half %conv +} + ; Check i32->f32. define float @f1(i32 %i) { ; CHECK-LABEL: f1: @@ -33,18 +43,28 @@ define void @f3(i32 %i, ptr %dst) { ret void } -; Check i64->f32. -define float @f4(i64 %i) { +; Check i64->f16. +define half @f4(i64 %i) { ; CHECK-LABEL: f4: ; CHECK: celgbr %f0, 0, %r2, 0 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %conv = uitofp i64 %i to half + ret half %conv +} + +; Check i64->f32. +define float @f5(i64 %i) { +; CHECK-LABEL: f5: +; CHECK: celgbr %f0, 0, %r2, 0 ; CHECK: br %r14 %conv = uitofp i64 %i to float ret float %conv } ; Check i64->f64. -define double @f5(i64 %i) { -; CHECK-LABEL: f5: +define double @f6(i64 %i) { +; CHECK-LABEL: f6: ; CHECK: cdlgbr %f0, 0, %r2, 0 ; CHECK: br %r14 %conv = uitofp i64 %i to double @@ -52,8 +72,8 @@ define double @f5(i64 %i) { } ; Check i64->f128. -define void @f6(i64 %i, ptr %dst) { -; CHECK-LABEL: f6: +define void @f7(i64 %i, ptr %dst) { +; CHECK-LABEL: f7: ; CHECK: cxlgbr %f0, 0, %r2, 0 ; CHECK-DAG: std %f0, 0(%r3) ; CHECK-DAG: std %f2, 8(%r3) diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-14.ll b/llvm/test/CodeGen/SystemZ/fp-conv-14.ll index 0d1f951994d27..c9448eac91fb1 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-14.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-14.ll @@ -2,6 +2,16 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s +; Test f16->i32. +define i32 @f0(half %f) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: clfebr %r2, 5, %f0, 0 +; CHECK: br %r14 + %conv = fptoui half %f to i32 + ret i32 %conv +} + ; Test f32->i32. define i32 @f1(float %f) { ; CHECK-LABEL: f1: @@ -32,9 +42,19 @@ define i32 @f3(ptr %src) { ret i32 %conv } -; Test f32->i64. -define i64 @f4(float %f) { +; Test f16->i64. +define i64 @f4(half %f) { ; CHECK-LABEL: f4: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: clgebr %r2, 5, %f0, 0 +; CHECK: br %r14 + %conv = fptoui half %f to i64 + ret i64 %conv +} + +; Test f32->i64. +define i64 @f5(float %f) { +; CHECK-LABEL: f5: ; CHECK: clgebr %r2, 5, %f0, 0 ; CHECK: br %r14 %conv = fptoui float %f to i64 @@ -42,8 +62,8 @@ define i64 @f4(float %f) { } ; Test f64->i64. -define i64 @f5(double %f) { -; CHECK-LABEL: f5: +define i64 @f6(double %f) { +; CHECK-LABEL: f6: ; CHECK: clgdbr %r2, 5, %f0, 0 ; CHECK: br %r14 %conv = fptoui double %f to i64 @@ -51,8 +71,8 @@ define i64 @f5(double %f) { } ; Test f128->i64. -define i64 @f6(ptr %src) { -; CHECK-LABEL: f6: +define i64 @f7(ptr %src) { +; CHECK-LABEL: f7: ; CHECK-DAG: ld %f0, 0(%r2) ; CHECK-DAG: ld %f2, 8(%r2) ; CHECK: clgxbr %r2, 5, %f0, 0 diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-20.ll b/llvm/test/CodeGen/SystemZ/fp-conv-20.ll index 8006a8beb0789..abf45e3d7a597 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-20.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-20.ll @@ -30,9 +30,18 @@ define float @f3(i128 %i) { ret float %conv } -; Test unsigned i128->f128. -define fp128 @f4(i128 %i) { +; Test signed i128->f16. +define half @f4(i128 %i) { ; CHECK-LABEL: f4: +; CHECK: brasl %r14, __floattihf@PLT +; CHECK: br %r14 + %conv = sitofp i128 %i to half + ret half %conv +} + +; Test unsigned i128->f128. +define fp128 @f5(i128 %i) { +; CHECK-LABEL: f5: ; CHECK: brasl %r14, __floatuntitf@PLT ; CHECK: br %r14 %conv = uitofp i128 %i to fp128 @@ -40,8 +49,8 @@ define fp128 @f4(i128 %i) { } ; Test unsigned i128->f64. -define double @f5(i128 %i) { -; CHECK-LABEL: f5: +define double @f6(i128 %i) { +; CHECK-LABEL: f6: ; CHECK: brasl %r14, __floatuntidf@PLT ; CHECK: br %r14 %conv = uitofp i128 %i to double @@ -49,17 +58,26 @@ define double @f5(i128 %i) { } ; Test unsigned i128->f32. -define float @f6(i128 %i) { -; CHECK-LABEL: f6: +define float @f7(i128 %i) { +; CHECK-LABEL: f7: ; CHECK: brasl %r14, __floatuntisf@PLT ; CHECK: br %r14 %conv = uitofp i128 %i to float ret float %conv } +; Test unsigned i128->f16. +define half @f8(i128 %i) { +; CHECK-LABEL: f8: +; CHECK: brasl %r14, __floatuntihf@PLT +; CHECK: br %r14 + %conv = uitofp i128 %i to half + ret half %conv +} + ; Test signed f128->i128. -define i128 @f7(fp128 %f) { -; CHECK-LABEL: f7: +define i128 @f9(fp128 %f) { +; CHECK-LABEL: f9: ; CHECK: brasl %r14, __fixtfti@PLT ; CHECK: br %r14 %conv = fptosi fp128 %f to i128 @@ -67,26 +85,35 @@ define i128 @f7(fp128 %f) { } ; Test signed f64->i128. -define i128 @f8(double %f) { -; CHECK-LABEL: f8: +define i128 @f10(double %f) { +; CHECK-LABEL: f10: ; CHECK: brasl %r14, __fixdfti@PLT ; CHECK: br %r14 %conv = fptosi double %f to i128 ret i128 %conv } -; Test signed f9->i128. -define i128 @f9(float %f) { -; CHECK-LABEL: f9: +; Test signed f32->i128. +define i128 @f11(float %f) { +; CHECK-LABEL: f11: ; CHECK: brasl %r14, __fixsfti@PLT ; CHECK: br %r14 %conv = fptosi float %f to i128 ret i128 %conv } +; Test signed f16->i128. +define i128 @f12(half %f) { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, __fixhfti@PLT +; CHECK: br %r14 + %conv = fptosi half %f to i128 + ret i128 %conv +} + ; Test unsigned f128->i128. -define i128 @f10(fp128 %f) { -; CHECK-LABEL: f10: +define i128 @f13(fp128 %f) { +; CHECK-LABEL: f13: ; CHECK: brasl %r14, __fixunstfti@PLT ; CHECK: br %r14 %conv = fptoui fp128 %f to i128 @@ -94,8 +121,8 @@ define i128 @f10(fp128 %f) { } ; Test unsigned f64->i128. -define i128 @f11(double %f) { -; CHECK-LABEL: f11: +define i128 @f14(double %f) { +; CHECK-LABEL: f14: ; CHECK: brasl %r14, __fixunsdfti@PLT ; CHECK: br %r14 %conv = fptoui double %f to i128 @@ -103,10 +130,19 @@ define i128 @f11(double %f) { } ; Test unsigned f32->i128. -define i128 @f12(float %f) { -; CHECK-LABEL: f12: +define i128 @f15(float %f) { +; CHECK-LABEL: f15: ; CHECK: brasl %r14, __fixunssfti@PLT ; CHECK: br %r14 %conv = fptoui float %f to i128 ret i128 %conv } + +; Test unsigned f16->i128. +define i128 @f16(half %f) { +; CHECK-LABEL: f16: +; CHECK: brasl %r14, __fixunshfti@PLT +; CHECK: br %r14 + %conv = fptoui half %f to i128 + ret i128 %conv +} diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll index d2b6488008e6b..3026191601081 100644 --- a/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll @@ -2,11 +2,23 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare half @copysignh(half, half) readnone declare float @copysignf(float, float) readnone declare double @copysign(double, double) readnone ; FIXME: not really the correct prototype for SystemZ. declare fp128 @copysignl(fp128, fp128) readnone +; Test f32 copies in which the sign comes from an f16. +define float @f0(float %a, half %bh) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: cpsdr %f0, %f0, %f8 +; CHECK: br %r14 + %b = fpext half %bh to float + %res = call float @copysignf(float %a, float %b) readnone + ret float %res +} + ; Test f32 copies in which the sign comes from an f32. define float @f1(float %a, float %b) { ; CHECK-LABEL: f1: @@ -126,3 +138,23 @@ define void @f9(ptr %cptr, ptr %aptr, ptr %bptr) { store fp128 %c, ptr %cptr ret void } + +; Test f16 copies in which the sign comes from an f16. +define half @f10(half %a, half %b) { +; CHECK-LABEL: f10: +; CHECK: brasl %r14, copysignh@PLT +; CHECK: br %r14 + %res = call half @copysignh(half %a, half %b) readnone + ret half %res +} + +; Test f16 copies in which the sign comes from an f32. +define half @f11(half %a, float %bf) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: brasl %r14, copysignh@PLT +; CHECK: br %r14 + %b = fptrunc float %bf to half + %res = call half @copysignh(half %a, half %b) readnone + ret half %res +} diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll index 178568ebb3bf9..320eee19afe05 100644 --- a/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll @@ -2,11 +2,25 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +declare half @copysignh(half, half) readnone declare float @copysignf(float, float) readnone declare double @copysign(double, double) readnone ; FIXME: not really the correct prototype for SystemZ. declare fp128 @copysignl(fp128, fp128) readnone +; Test f16 copies in which the sign comes from an f128. +define half @f0(half %a, ptr %bptr) { +; CHECK-LABEL: f0: +; CHECK: vl %v[[REG:[0-9]+]], 0(%r2) +; CHECK: brasl %r14, __trunctfhf2@PLT +; CHECK: brasl %r14, copysignh@PLT +; CHECK: br %r14 + %bl = load volatile fp128, ptr %bptr + %b = fptrunc fp128 %bl to half + %res = call half @copysignh(half %a, half %b) readnone + ret half %res +} + ; Test f32 copies in which the sign comes from an f128. define float @f1(float %a, ptr %bptr) { ; CHECK-LABEL: f1: @@ -31,6 +45,21 @@ define double @f2(double %a, ptr %bptr) { ret double %res } +; Test f128 copies in which the sign comes from an f16. +define void @f7_half(ptr %cptr, ptr %aptr, half %bh) { +; CHECK-LABEL: f7_half: +; CHECK: vl [[REG1:%v[0-7]+]], 0(%r3) +; CHECK: vsteh %v0, 164(%r15), 0 +; CHECK: tm 164(%r15), 128 +; CHECK: wflnxb [[REG2:%v[0-9]+]], [[REG1]] +; CHECK: wflpxb [[REG2]], [[REG1]] + %a = load volatile fp128, ptr %aptr + %b = fpext half %bh to fp128 + %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone + store fp128 %c, ptr %cptr + ret void +} + ; Test f128 copies in which the sign comes from an f32. define void @f7(ptr %cptr, ptr %aptr, float %bf) { ; CHECK-LABEL: f7: diff --git a/llvm/test/CodeGen/SystemZ/fp-div-01.ll b/llvm/test/CodeGen/SystemZ/fp-div-01.ll index d33e61bbd1eda..78df879613cb2 100644 --- a/llvm/test/CodeGen/SystemZ/fp-div-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-div-01.ll @@ -6,6 +6,18 @@ declare float @foo() +; Check register division. +define half @f0(half %f1, half %f2) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: debr %f0, %f9 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = fdiv half %f1, %f2 + ret half %res +} + ; Check register division. define float @f1(float %f1, float %f2) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-half-cmp.ll b/llvm/test/CodeGen/SystemZ/fp-half-cmp.ll new file mode 100644 index 0000000000000..3d9ec6a43e374 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-half-cmp.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ +; RUN: | FileCheck %s +; +; Various tests comparaisons and uses involving 16-bit floating point (half). + +; fcmp half; select half +define half @fun0(half %Arg0, half %Arg1) { +; CHECK-LABEL: fun0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -176 +; CHECK-NEXT: .cfi_def_cfa_offset 336 +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -168 +; CHECK-NEXT: .cfi_offset %f9, -176 +; CHECK-NEXT: ldr %f8, %f2 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ldr %f9, %f0 +; CHECK-NEXT: ldr %f0, %f8 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ltebr %f1, %f9 +; CHECK-NEXT: # kill: def $f0s killed $f0s def $v0 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: vgmf %v0, 2, 8 +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: # kill: def $f0s killed $f0s killed $v0 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r14, %r15, 288(%r15) +; CHECK-NEXT: br %r14 +entry: + %cmp = fcmp olt half %Arg0, 0xH0000 + %cond = select i1 %cmp, half %Arg1, half 1.0 + ret half %cond +} + +; fcmp half; select i32 +define i32 @fun1(half %Arg0, i32 %Arg1) { +; CHECK-LABEL: fun1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stmg %r13, %r15, 104(%r15) +; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: lr %r13, %r2 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: lochinl %r13, 0 +; CHECK-NEXT: lr %r2, %r13 +; CHECK-NEXT: lmg %r13, %r15, 264(%r15) +; CHECK-NEXT: br %r14 +entry: + %cmp = fcmp olt half %Arg0, 0xH0000 + %cond = select i1 %cmp, i32 %Arg1, i32 0 + ret i32 %cond +} + +; icmp i32; select half +define half @fun2(i32 %Arg0, half %Arg1) { +; CHECK-LABEL: fun2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stmg %r13, %r15, 104(%r15) +; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: lr %r13, %r2 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: # kill: def $f0s killed $f0s def $v0 +; CHECK-NEXT: cije %r13, 0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: vgmf %v0, 2, 8 +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: # kill: def $f0s killed $f0s killed $v0 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: lmg %r13, %r15, 264(%r15) +; CHECK-NEXT: br %r14 +entry: + %cmp = icmp eq i32 %Arg0, 0 + %cond = select i1 %cmp, half %Arg1, half 1.0 + ret half %cond +} + +define i64 @fun3(i64 %a, i64 %b, half %f1, half %f2) #0 { +; CHECK-LABEL: fun3: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r12, %r15, 96(%r15) +; CHECK-NEXT: .cfi_offset %r12, -64 +; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -176 +; CHECK-NEXT: .cfi_def_cfa_offset 336 +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -168 +; CHECK-NEXT: .cfi_offset %f9, -176 +; CHECK-NEXT: ldr %f8, %f0 +; CHECK-NEXT: ldr %f0, %f2 +; CHECK-NEXT: lgr %r13, %r3 +; CHECK-NEXT: lgr %r12, %r2 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ldr %f9, %f0 +; CHECK-NEXT: ldr %f0, %f8 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cebr %f0, %f9 +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: selgre %r2, %r12, %r13 +; CHECK-NEXT: lmg %r12, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmp.f32( + half %f1, half %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +define half @fun4(half %Arg0, ptr %Dst) { +; CHECK-LABEL: fun4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stmg %r13, %r15, 104(%r15) +; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: lgr %r13, %r2 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: je .LBB4_2 +; CHECK-NEXT: # %bb.1: # %store +; CHECK-NEXT: lzer %f0 +; CHECK-NEXT: vsteh %v0, 0(%r13), 0 +; CHECK-NEXT: .LBB4_2: # %exit +; CHECK-NEXT: lzer %f0 +; CHECK-NEXT: lmg %r13, %r15, 264(%r15) +; CHECK-NEXT: br %r14 +entry: + %cmp = fcmp oeq half %Arg0, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store half 0.0, ptr %Dst + br label %exit + +exit: + ret half 0.0 +} + +declare i1 @llvm.experimental.constrained.fcmp.f32(half, half, metadata, metadata) diff --git a/llvm/test/CodeGen/SystemZ/fp-half-move.ll b/llvm/test/CodeGen/SystemZ/fp-half-move.ll new file mode 100644 index 0000000000000..1c8d5daf05f78 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-half-move.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=NOVEC +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=VECTOR +; +; Test moves (bitcasts) between i16 and half. + +define half @f1(ptr %ptr) { +; NOVEC-LABEL: f1: +; NOVEC: # %bb.0: +; NOVEC-NEXT: lh %r0, 0(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: oilh %r0, 255 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: f1: +; VECTOR: # %bb.0: +; VECTOR-NEXT: lh %r0, 0(%r2) +; VECTOR-NEXT: oill %r0, 255 +; VECTOR-NEXT: vlvgh %v0, %r0, 0 +; VECTOR-NEXT: br %r14 + %L = load i16, ptr %ptr + %O = or i16 %L, 255 + %res = bitcast i16 %O to half + ret half %res +} + +define half @f2(i16 %Arg) { +; NOVEC-LABEL: f2: +; NOVEC: # %bb.0: +; NOVEC-NEXT: sll %r2, 16 +; NOVEC-NEXT: risbhg %r0, %r2, 0, 159, 32 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: f2: +; VECTOR: # %bb.0: +; VECTOR-NEXT: vlvgh %v0, %r2, 0 +; VECTOR-NEXT: br %r14 + %res = bitcast i16 %Arg to half + ret half %res +} + +define void @f3(half %val, ptr %ptr) { +; NOVEC-LABEL: f3: +; NOVEC: # %bb.0: +; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: stc %r0, 0(%r2) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: f3: +; VECTOR: # %bb.0: +; VECTOR-NEXT: vlgvh %r0, %v0, 0 +; VECTOR-NEXT: stc %r0, 0(%r2) +; VECTOR-NEXT: br %r14 + %res = bitcast half %val to i16 + %trunc = trunc i16 %res to i8 + store i8 %trunc, ptr %ptr + ret void +} + +define i16 @f4(half %Arg) { +; NOVEC-LABEL: f4: +; NOVEC: # %bb.0: +; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d +; NOVEC-NEXT: lgdr %r0, %f0 +; NOVEC-NEXT: risblg %r2, %r0, 0, 159, 32 +; NOVEC-NEXT: srl %r2, 16 +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: f4: +; VECTOR: # %bb.0: +; VECTOR-NEXT: vlgvh %r2, %v0, 0 +; VECTOR-NEXT: br %r14 + %res = bitcast half %Arg to i16 + ret i16 %res +} diff --git a/llvm/test/CodeGen/SystemZ/fp-libcall.ll b/llvm/test/CodeGen/SystemZ/fp-libcall.ll index 60b698e34fcfe..5069b9b257b80 100644 --- a/llvm/test/CodeGen/SystemZ/fp-libcall.ll +++ b/llvm/test/CodeGen/SystemZ/fp-libcall.ll @@ -212,6 +212,16 @@ define fp128 @f30(fp128 %x, fp128 %y) { ret fp128 %tmp } +define half @f31_half(half %x, half %y) { +; CHECK-LABEL: f31_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, fmaxf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT + %tmp = call half @llvm.maxnum.f16(half %x, half %y) + ret half %tmp +} + define float @f31(float %x, float %y) { ; CHECK-LABEL: f31: ; CHECK: brasl %r14, fmaxf@PLT diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-01.ll b/llvm/test/CodeGen/SystemZ/fp-mul-01.ll index c5e66ff72c2a4..323907f03b743 100644 --- a/llvm/test/CodeGen/SystemZ/fp-mul-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-mul-01.ll @@ -6,6 +6,18 @@ declare float @foo() +; Check register multiplication. +define half @f0(half %f1, half %f2) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: meebr %f0, %f9 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = fmul half %f1, %f2 + ret half %res +} + ; Check register multiplication. define float @f1(float %f1, float %f2) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-06.ll b/llvm/test/CodeGen/SystemZ/fp-mul-06.ll index 8fd363bc397d0..6b285a49057dc 100644 --- a/llvm/test/CodeGen/SystemZ/fp-mul-06.ll +++ b/llvm/test/CodeGen/SystemZ/fp-mul-06.ll @@ -3,8 +3,22 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s +declare half @llvm.fma.f16(half %f1, half %f2, half %f3) declare float @llvm.fma.f32(float %f1, float %f2, float %f3) +define half @f0(half %f1, half %f2, half %acc) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-SCALAR: maebr %f0, %f9, %f10 +; CHECK-VECTOR: wfmasb %f0, %f0, %f8, %f10 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.fma.f16 (half %f1, half %f2, half %acc) + ret half %res +} + define float @f1(float %f1, float %f2, float %acc) { ; CHECK-LABEL: f1: ; CHECK-SCALAR: maebr %f4, %f0, %f2 diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-08.ll b/llvm/test/CodeGen/SystemZ/fp-mul-08.ll index 543ab95551690..2b18abec8d555 100644 --- a/llvm/test/CodeGen/SystemZ/fp-mul-08.ll +++ b/llvm/test/CodeGen/SystemZ/fp-mul-08.ll @@ -3,8 +3,26 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s +declare half @llvm.fma.f16(half %f1, half %f2, half %f3) declare float @llvm.fma.f32(float %f1, float %f2, float %f3) +define half @f0(half %f1, half %f2, half %acc) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: lcdfr %f0, %f0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-SCALAR: maebr %f0, %f9, %f8 +; CHECK-VECTOR: wfmasb %f0, %f0, %f8, %f10 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %negacc = fneg half %acc + %res = call half @llvm.fma.f16 (half %f1, half %f2, half %negacc) + ret half %res +} + define float @f1(float %f1, float %f2, float %acc) { ; CHECK-LABEL: f1: ; CHECK-SCALAR: msebr %f4, %f0, %f2 diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-10.ll b/llvm/test/CodeGen/SystemZ/fp-mul-10.ll index 669ccbacf7898..1ecf52fbde354 100644 --- a/llvm/test/CodeGen/SystemZ/fp-mul-10.ll +++ b/llvm/test/CodeGen/SystemZ/fp-mul-10.ll @@ -2,6 +2,7 @@ declare double @llvm.fma.f64(double %f1, double %f2, double %f3) declare float @llvm.fma.f32(float %f1, float %f2, float %f3) +declare half @llvm.fma.f16(half %f1, half %f2, half %f3) define double @f1(double %f1, double %f2, double %acc) { ; CHECK-LABEL: f1: @@ -22,6 +23,22 @@ define double @f2(double %f1, double %f2, double %acc) { ret double %negres } +define half @f3_half(half %f1, half %f2, half %acc) { +; CHECK-LABEL: f3_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: wfmasb %f0, %f0, %f8, %f10 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: lcdfr %f0, %f0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.fma.f16 (half %f1, half %f2, half %acc) + %negres = fneg half %res + ret half %negres +} + define float @f3(float %f1, float %f2, float %acc) { ; CHECK-LABEL: f3: ; CHECK: wfnmasb %f0, %f0, %f2, %f4 @@ -31,6 +48,26 @@ define float @f3(float %f1, float %f2, float %acc) { ret float %negres } +define half @f4_half(half %f1, half %f2, half %acc) { +; CHECK-LABEL: f4_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: lcdfr %f0, %f0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: wfmasb %f0, %f0, %f8, %f10 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: lcdfr %f0, %f0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %negacc = fneg half %acc + %res = call half @llvm.fma.f16 (half %f1, half %f2, half %negacc) + %negres = fneg half %res + ret half %negres +} + define float @f4(float %f1, float %f2, float %acc) { ; CHECK-LABEL: f4: ; CHECK: wfnmssb %f0, %f0, %f2, %f4 @@ -40,4 +77,3 @@ define float @f4(float %f1, float %f2, float %acc) { %negres = fneg float %res ret float %negres } - diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-15.ll b/llvm/test/CodeGen/SystemZ/fp-mul-15.ll new file mode 100644 index 0000000000000..c897d05ab86df --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-mul-15.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s +; +; Check that a multiply-and-add *not* result for half. + +define half @f1(half %arg, half %A2, half %A3) { +; CHECK-LABEL: f1: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: meebr %f0, %f10 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: wfasb %f0, %f9, %f0 +; CHECK: brasl %r14, __truncsfhf2@PLT + +bb: + %i = fmul contract half %arg, %A2 + %i4 = fadd contract half %i, %A3 + ret half %i4 +} diff --git a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll index 875905de4948d..a8fe8d5da7c8a 100644 --- a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll @@ -3,6 +3,17 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +; Test f16. +define half @f0(half %f) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: lcdfr %f0, %f0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = fneg half %f + ret half %res +} + ; Test f32. define float @f1(float %f) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-neg-02.ll b/llvm/test/CodeGen/SystemZ/fp-neg-02.ll index 7cd66948e2fc7..848c4740d8540 100644 --- a/llvm/test/CodeGen/SystemZ/fp-neg-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-neg-02.ll @@ -2,6 +2,17 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; Test f16. +define half @f0(half %f) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: lcdfr %f0, %f0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = fneg half %f + ret half %res +} + ; Test f32. define float @f1(float %f) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-round-01.ll b/llvm/test/CodeGen/SystemZ/fp-round-01.ll index b1db2f547a832..21b354c7a83c4 100644 --- a/llvm/test/CodeGen/SystemZ/fp-round-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-round-01.ll @@ -34,6 +34,18 @@ define void @f3(ptr %ptr) { ret void } +; Test nearbyint for f16. +declare half @llvm.nearbyint.f16(half %f) +define half @f4_half(half %f) { +; CHECK-LABEL: f4_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, nearbyintf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.nearbyint.f16(half %f) + ret half %res +} + ; Test nearbyint for f32. declare float @llvm.nearbyint.f32(float %f) define float @f4(float %f) { @@ -66,6 +78,18 @@ define void @f6(ptr %ptr) { ret void } +; Test floor for f16. +declare half @llvm.floor.f16(half %f) +define half @f7_half(half %f) { +; CHECK-LABEL: f7_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, floorf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.floor.f16(half %f) + ret half %res +} + ; Test floor for f32. declare float @llvm.floor.f32(float %f) define float @f7(float %f) { @@ -98,6 +122,18 @@ define void @f9(ptr %ptr) { ret void } +; Test ceil for f16. +declare half @llvm.ceil.f16(half %f) +define half @f10_half(half %f) { +; CHECK-LABEL: f10_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, ceilf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.ceil.f16(half %f) + ret half %res +} + ; Test ceil for f32. declare float @llvm.ceil.f32(float %f) define float @f10(float %f) { @@ -162,6 +198,18 @@ define void @f15(ptr %ptr) { ret void } +; Test round for f16. +declare half @llvm.round.f16(half %f) +define half @f16_half(half %f) { +; CHECK-LABEL: f16_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, roundf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.round.f16(half %f) + ret half %res +} + ; Test round for f32. declare float @llvm.round.f32(float %f) define float @f16(float %f) { diff --git a/llvm/test/CodeGen/SystemZ/fp-round-02.ll b/llvm/test/CodeGen/SystemZ/fp-round-02.ll index 2cf009ad5b856..f1a0a2847a303 100644 --- a/llvm/test/CodeGen/SystemZ/fp-round-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-round-02.ll @@ -134,6 +134,18 @@ define void @f12(ptr %ptr) { ret void } +; Test trunc for f16. +declare half @llvm.trunc.f16(half %f) +define half @f13_half(half %f) { +; CHECK-LABEL: f13_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: fiebra %f0, 5, %f0, 4 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.trunc.f16(half %f) + ret half %res +} + ; Test trunc for f32. declare float @llvm.trunc.f32(float %f) define float @f13(float %f) { @@ -166,6 +178,18 @@ define void @f15(ptr %ptr) { ret void } +; Test round for f16. +declare half @llvm.round.f16(half %f) +define half @f16_half(half %f) { +; CHECK-LABEL: f16_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: fiebra %f0, 1, %f0, 4 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.round.f16(half %f) + ret half %res +} + ; Test round for f32. declare float @llvm.round.f32(float %f) define float @f16(float %f) { @@ -198,6 +222,18 @@ define void @f18(ptr %ptr) { ret void } +; Test roundeven for f16. +declare half @llvm.roundeven.f16(half %f) +define half @f19_half(half %f) { +; CHECK-LABEL: f19_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: fiebra %f0, 4, %f0, 4 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.roundeven.f16(half %f) + ret half %res +} + ; Test roundeven for f32. declare float @llvm.roundeven.f32(float %f) define float @f19(float %f) { diff --git a/llvm/test/CodeGen/SystemZ/fp-sqrt-01.ll b/llvm/test/CodeGen/SystemZ/fp-sqrt-01.ll index 996bdc458b9de..2f7d38339eacd 100644 --- a/llvm/test/CodeGen/SystemZ/fp-sqrt-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-sqrt-01.ll @@ -4,9 +4,21 @@ ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +declare half @llvm.sqrt.f16(half) declare float @llvm.sqrt.f32(float) declare float @sqrtf(float) +; Check register square root. +define half @f0(half %val) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: sqebr %f0, %f0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.sqrt.f16(half %val) + ret half %res +} + ; Check register square root. define float @f1(float %val) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll index bf9ccbcd70550..dfefc43c02bed 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll @@ -298,6 +298,43 @@ exit: ret float %add } +define half @f12_half(half %dummy, half %val) #0 { +; CHECK-LABEL: f12_half: +; CHECK: ler %f9, %f2 +; CHECK-NEXT: ler %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: ler %f8, %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lzer %f0 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f10, %f0 +; CHECK-NEXT: ler %f0, %f9 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cebr %f0, %f10 +; CHECK-NEXT: jl .LBB11_2 +; CHECK-NEXT:# %bb.1: # %store +; CHECK-NEXT: #APP +; CHECK-NEXT: blah +; CHECK-NEXT: #NO_APP +; CHECK-NEXT:.LBB11_2: # %exit +; CHECK-NEXT: ler %f0, %f8 +; CHECK: br %r14 +entry: + %ret = call half asm "ler $0, $1", "=f,{f0}"(half %val) #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16( + half %val, half 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() #0 + br label %exit + +exit: + ret half %ret +} + ; Test that LER does not get converted to LTEBR as %f0 is live after it. define float @f12(float %dummy, float %val) #0 { ; CHECK-LABEL: f12: @@ -309,7 +346,7 @@ define float @f12(float %dummy, float %val) #0 { ; CHECK-NEXT: blr %r14 ; CHECK: br %r14 entry: - %ret = call float asm "blah $1", "=f,{f0}"(float %val) #0 + %ret = call float asm "$0 = blah $1", "=f,{f0}"(float %val) #0 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( float %val, float 0.0, metadata !"olt", @@ -384,6 +421,43 @@ exit: ret void } +define half @f15_half(half %val, half %dummy) #0 { +; CHECK-LABEL: f15_half: +; CHECK: ler %f9, %f0 +; CHECK-NEXT: ler %f2, %f0 +; CHECK-NEXT: #APP +; CHECK-NEXT: ler %f8, %f2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lzer %f0 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f10, %f0 +; CHECK-NEXT: ler %f0, %f9 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cebr %f0, %f10 +; CHECK-NEXT: jl .LBB15_2 +; CHECK-NEXT:# %bb.1: # %store +; CHECK-NEXT: #APP +; CHECK-NEXT: blah +; CHECK-NEXT: #NO_APP +; CHECK-NEXT:.LBB15_2: # %exit +; CHECK-NEXT: ler %f0, %f8 +; CHECK: br %r14 +entry: + %ret = call half asm "ler $0, $1", "=f,{f2}"(half %val) #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16( + half %val, half 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() #0 + br label %exit + +exit: + ret half %ret +} + ; Test a case where it is the source rather than destination of LER that ; we need, but cannot convert the LER. define float @f15(float %val, float %dummy) #0 { @@ -491,6 +565,43 @@ exit: ret float %res } +define half @f19_half(half %dummy, half %val) #0 { +; CHECK-LABEL: f19_half: +; CHECK: ler %f9, %f2 +; CHECK-NEXT: ler %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: ler %f8, %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lzer %f0 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f10, %f0 +; CHECK-NEXT: ler %f0, %f9 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cebr %f0, %f10 +; CHECK-NEXT: jl .LBB20_2 +; CHECK-NEXT:# %bb.1: # %store +; CHECK-NEXT: #APP +; CHECK-NEXT: blah +; CHECK-NEXT: #NO_APP +; CHECK-NEXT:.LBB20_2: # %exit +; CHECK-NEXT: ler %f0, %f8 +; CHECK: br %r14 +entry: + %ret = call half asm sideeffect "ler $0, $1", "=f,{f0}"(half %val) #0 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16( + half %val, half 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() #0 + br label %exit + +exit: + ret half %ret +} + ; Verify that we cannot convert LER to LTEBR and omit the compare if ; there may be an intervening change to the exception flags. define float @f19(float %dummy, float %val) #0 { @@ -524,6 +635,7 @@ declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, me declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata) declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmps-01.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-01.ll index ac8894417921c..20efbf60fdbdc 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-cmps-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-01.ll @@ -8,6 +8,26 @@ declare float @foo() +; Check comparison with registers. +define i64 @f0(i64 %a, i64 %b, half %f1, half %f2) #0 { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: kebr %f0, %f9 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r13, %r12 +; CHECK-SCALAR: lgr %r2, %r13 +; CHECK-VECTOR: locgrne %r12, %r13 +; CHECK-VECTOR: lgr %r2, %r12 +; CHECK: br %r14 + %cond = call i1 @llvm.experimental.constrained.fcmps.f16( + half %f1, half %f2, + metadata !"oeq", + metadata !"fpexcept.strict") #0 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + ; Check comparison with registers. define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) #0 { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmps-04.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-04.ll index e178769f263e6..ad86df1753192 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-cmps-04.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-04.ll @@ -110,6 +110,43 @@ exit: ret float %res } +define half @f12_half(half %dummy, half %val) #0 { +; CHECK-LABEL: f12_half: +; CHECK: ler %f9, %f2 +; CHECK-NEXT: ler %f0, %f2 +; CHECK-NEXT: #APP +; CHECK-NEXT: ler %f8, %f0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lzer %f0 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f10, %f0 +; CHECK-NEXT: ler %f0, %f9 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: kebr %f0, %f10 +; CHECK-NEXT: jl .LBB4_2 +; CHECK-NEXT:# %bb.1: # %store +; CHECK-NEXT: #APP +; CHECK-NEXT: blah +; CHECK-NEXT: #NO_APP +; CHECK-NEXT:.LBB4_2: # %exit +; CHECK-NEXT: ler %f0, %f8 +; CHECK: br %r14 +entry: + %ret = call half asm "ler $0, $1", "=f,{f0}"(half %val) #0 + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16( + half %val, half 0.0, + metadata !"olt", + metadata !"fpexcept.strict") #0 + br i1 %cmp, label %exit, label %store + +store: + call void asm sideeffect "blah", ""() #0 + br label %exit + +exit: + ret half %ret +} + ; Test that LER does not get converted to LTEBR. define float @f12(float %dummy, float %val) #0 { ; CHECK-LABEL: f12: diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-01.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-01.ll index 6b9db1569cf8c..8df7ef5f3d7c8 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-01.ll @@ -5,13 +5,29 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + +declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) + declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) +declare half @llvm.experimental.constrained.fptrunc.f16.f128(fp128, metadata, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata) declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata) declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +; Test f64->f16. +define half @f0(double %d1, double %d2) #0 { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __truncdfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.fptrunc.f16.f64( + double %d2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test f64->f32. define float @f1(double %d1, double %d2) #0 { ; CHECK-LABEL: f1: @@ -25,6 +41,19 @@ define float @f1(double %d1, double %d2) #0 { ret float %res } +; Test f128->f16. +define half @f2_half(ptr %ptr) #0 { +; CHECK-LABEL: f2_half: +; CHECK: brasl %r14, __trunctfhf2@PLT +; CHECK: br %r14 + %val = load fp128, ptr %ptr + %res = call half @llvm.experimental.constrained.fptrunc.f16.f128( + fp128 %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test f128->f32. define float @f2(ptr %ptr) #0 { ; CHECK-LABEL: f2: diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-02.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-02.ll index c79f51dd1ae9e..725d53cabb937 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-02.ll @@ -2,8 +2,19 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) +; Check register extension. +define double @f0(half %val) #0 { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfdf2@PLT +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.fpext.f64.f16(half %val, + metadata !"fpexcept.strict") #0 + ret double %res +} + ; Check register extension. define double @f1(float %val) #0 { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-05.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-05.ll index f2a66098d32e2..96f764fdab47a 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-05.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-05.ll @@ -2,10 +2,23 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata) declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) declare fp128 @llvm.experimental.constrained.sitofp.f128.i32(i32, metadata, metadata) +; Check i32->f16. +define half @f0(i32 %i) #0 { +; CHECK-LABEL: f0: +; CHECK: cefbr %f0, %r2 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %conv = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %i, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %conv +} + ; Check i32->f32. define float @f1(i32 %i) #0 { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-06.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-06.ll index e23eaf3ab359a..2bd8556edd664 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-06.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-06.ll @@ -2,10 +2,25 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) declare fp128 @llvm.experimental.constrained.uitofp.f128.i32(i32, metadata, metadata) +; Check i32->f16. There is no native instruction, so we must promote +; to i64 first. +define half @f0(i32 %i) #0 { +; CHECK-LABEL: f0: +; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2 +; CHECK: cegbr %f0, [[REGISTER]] +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %conv = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %i, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %conv +} + ; Check i32->f32. There is no native instruction, so we must promote ; to i64 first. define float @f1(i32 %i) #0 { diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-07.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-07.ll index d18aa38966009..d2a568ed19a4e 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-07.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-07.ll @@ -2,10 +2,23 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata) declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata) declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata) declare fp128 @llvm.experimental.constrained.sitofp.f128.i64(i64, metadata, metadata) +; Test i64->f16. +define half @f0(i64 %i) #0 { +; CHECK-LABEL: f0: +; CHECK: cegbr %f0, %r2 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %conv = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %i, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %conv +} + ; Test i64->f32. define float @f1(i64 %i) #0 { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-08.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-08.ll index 2cbcf2a2ef0a3..e0821bf4f529e 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-08.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-08.ll @@ -2,10 +2,22 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata) declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) declare fp128 @llvm.experimental.constrained.uitofp.f128.i64(i64, metadata, metadata) +; Test i64->f16. For z10, this results in just a single a libcall. +define half @f0(i64 %i) #0 { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __floatundihf@PLT +; CHECK: br %r14 + %conv = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %i, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %conv +} + ; Test i64->f32. There's no native support for unsigned i64-to-fp conversions, ; but we should be able to implement them using signed i64-to-fp conversions. define float @f1(i64 %i) #0 { diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-09.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-09.ll index a54055120f727..40da726b7a46a 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-09.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-09.ll @@ -2,10 +2,22 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) +; Test f16->i32. +define i32 @f0(half %f) #0 { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cfebr %r2, 5, %f0 +; CHECK: br %r14 + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %f, + metadata !"fpexcept.strict") #0 + ret i32 %conv +} + ; Test f32->i32. define i32 @f1(float %f) #0 { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll index 7cbcfeea8cf60..d2206a40169e5 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll @@ -9,25 +9,45 @@ ; outside the i32 range but in the i64 range, so use the default expansion. ; Note that the strict expansion sequence must be used. +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) +; Test f16->i32. Converted to signed as the max float value is smaller than +; the signed integer range. +define i32 @f0(half %f) #0 { +; CHECK-LABEL: f0: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cfebr %r2, 5, %f0 +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %f, + metadata !"fpexcept.strict") #0 + ret i32 %conv +} + ; Test f32->i32. define i32 @f1(float %f) #0 { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: larl %r1, .LCPI0_0 +; CHECK-NEXT: larl %r1, .LCPI1_0 ; CHECK-NEXT: le %f1, 0(%r1) ; CHECK-NEXT: kebr %f0, %f1 -; CHECK-NEXT: jnl .LBB0_2 +; CHECK-NEXT: jnl .LBB1_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lhi %r0, 0 ; CHECK-NEXT: lzer %f1 -; CHECK-NEXT: j .LBB0_3 -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: j .LBB1_3 +; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: llilh %r0, 32768 -; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: .LBB1_3: ; CHECK-NEXT: sebr %f0, %f1 ; CHECK-NEXT: cfebr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 @@ -41,17 +61,17 @@ define i32 @f1(float %f) #0 { define i32 @f2(double %f) #0 { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: larl %r1, .LCPI1_0 +; CHECK-NEXT: larl %r1, .LCPI2_0 ; CHECK-NEXT: ld %f1, 0(%r1) ; CHECK-NEXT: kdbr %f0, %f1 -; CHECK-NEXT: jnl .LBB1_2 +; CHECK-NEXT: jnl .LBB2_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lhi %r0, 0 ; CHECK-NEXT: lzdr %f1 -; CHECK-NEXT: j .LBB1_3 -; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: j .LBB2_3 +; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: llilh %r0, 32768 -; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: .LBB2_3: ; CHECK-NEXT: sdbr %f0, %f1 ; CHECK-NEXT: cfdbr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 @@ -67,17 +87,17 @@ define i32 @f3(ptr %src) #0 { ; CHECK: # %bb.0: ; CHECK-NEXT: ld %f0, 0(%r2) ; CHECK-NEXT: ld %f2, 8(%r2) -; CHECK-NEXT: larl %r1, .LCPI2_0 +; CHECK-NEXT: larl %r1, .LCPI3_0 ; CHECK-NEXT: lxeb %f1, 0(%r1) ; CHECK-NEXT: kxbr %f0, %f1 -; CHECK-NEXT: jnl .LBB2_2 +; CHECK-NEXT: jnl .LBB3_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lhi %r0, 0 ; CHECK-NEXT: lzxr %f1 -; CHECK-NEXT: j .LBB2_3 -; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: j .LBB3_3 +; CHECK-NEXT: .LBB3_2: ; CHECK-NEXT: llilh %r0, 32768 -; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: .LBB3_3: ; CHECK-NEXT: sxbr %f0, %f1 ; CHECK-NEXT: cfxbr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-11.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-11.ll index 27af314cff01b..dd8a708599629 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-11.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-11.ll @@ -2,10 +2,22 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.f32(float, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) +; Test f16->i64. +define i64 @f0(half %f) #0 { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cgebr %r2, 5, %f0 +; CHECK: br %r14 + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %f, + metadata !"fpexcept.strict") #0 + ret i64 %conv +} + ; Test f32->i64. define i64 @f1(float %f) #0 { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll index 69bbd82e29898..76c7188641724 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll @@ -8,25 +8,45 @@ ; Convert via signed i64s instead. ; Note that the strict expansion sequence must be used. +declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) +; Test f16->i64. Converted to signed as the max float value is smaller than +; the signed integer range. +define i64 @f0(half %f) #0 { +; CHECK-LABEL: f0: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: cgebr %r2, 5, %f0 +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %f, + metadata !"fpexcept.strict") #0 + ret i64 %conv +} + ; Test f32->i64. define i64 @f1(float %f) #0 { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: larl %r1, .LCPI0_0 +; CHECK-NEXT: larl %r1, .LCPI1_0 ; CHECK-NEXT: le %f1, 0(%r1) ; CHECK-NEXT: kebr %f0, %f1 -; CHECK-NEXT: jnl .LBB0_2 +; CHECK-NEXT: jnl .LBB1_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lghi %r0, 0 ; CHECK-NEXT: lzer %f1 -; CHECK-NEXT: j .LBB0_3 -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: j .LBB1_3 +; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: llihh %r0, 32768 -; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: .LBB1_3: ; CHECK-NEXT: sebr %f0, %f1 ; CHECK-NEXT: cgebr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 @@ -40,17 +60,17 @@ define i64 @f1(float %f) #0 { define i64 @f2(double %f) #0 { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: larl %r1, .LCPI1_0 +; CHECK-NEXT: larl %r1, .LCPI2_0 ; CHECK-NEXT: ld %f1, 0(%r1) ; CHECK-NEXT: kdbr %f0, %f1 -; CHECK-NEXT: jnl .LBB1_2 +; CHECK-NEXT: jnl .LBB2_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lghi %r0, 0 ; CHECK-NEXT: lzdr %f1 -; CHECK-NEXT: j .LBB1_3 -; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: j .LBB2_3 +; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: llihh %r0, 32768 -; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: .LBB2_3: ; CHECK-NEXT: sdbr %f0, %f1 ; CHECK-NEXT: cgdbr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 @@ -66,17 +86,17 @@ define i64 @f3(ptr %src) #0 { ; CHECK: # %bb.0: ; CHECK-NEXT: ld %f0, 0(%r2) ; CHECK-NEXT: ld %f2, 8(%r2) -; CHECK-NEXT: larl %r1, .LCPI2_0 +; CHECK-NEXT: larl %r1, .LCPI3_0 ; CHECK-NEXT: lxeb %f1, 0(%r1) ; CHECK-NEXT: kxbr %f0, %f1 -; CHECK-NEXT: jnl .LBB2_2 +; CHECK-NEXT: jnl .LBB3_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lghi %r0, 0 ; CHECK-NEXT: lzxr %f1 -; CHECK-NEXT: j .LBB2_3 -; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: j .LBB3_3 +; CHECK-NEXT: .LBB3_2: ; CHECK-NEXT: llihh %r0, 32768 -; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: .LBB3_3: ; CHECK-NEXT: sxbr %f0, %f1 ; CHECK-NEXT: cgxbr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-13.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-13.ll index 41913106f5340..2b1c47d0d91e4 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-13.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-13.ll @@ -3,14 +3,28 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) declare fp128 @llvm.experimental.constrained.uitofp.f128.i32(i32, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata) declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) declare fp128 @llvm.experimental.constrained.uitofp.f128.i64(i64, metadata, metadata) +; Check i32->f16. +define half @f0(i32 %i) #0 { +; CHECK-LABEL: f0: +; CHECK: celfbr %f0, 0, %r2, 0 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %conv = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %i, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %conv +} + ; Check i32->f32. define float @f1(i32 %i) #0 { ; CHECK-LABEL: f1: @@ -47,10 +61,22 @@ define void @f3(i32 %i, ptr %dst) #0 { ret void } -; Check i64->f32. -define float @f4(i64 %i) #0 { +; Check i64->f16. +define half @f4(i64 %i) #0 { ; CHECK-LABEL: f4: ; CHECK: celgbr %f0, 0, %r2, 0 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %conv = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %i, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %conv +} + +; Check i64->f32. +define float @f5(i64 %i) #0 { +; CHECK-LABEL: f5: +; CHECK: celgbr %f0, 0, %r2, 0 ; CHECK: br %r14 %conv = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %i, metadata !"round.dynamic", @@ -59,8 +85,8 @@ define float @f4(i64 %i) #0 { } ; Check i64->f64. -define double @f5(i64 %i) #0 { -; CHECK-LABEL: f5: +define double @f6(i64 %i) #0 { +; CHECK-LABEL: f6: ; CHECK: cdlgbr %f0, 0, %r2, 0 ; CHECK: br %r14 %conv = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %i, @@ -70,8 +96,8 @@ define double @f5(i64 %i) #0 { } ; Check i64->f128. -define void @f6(i64 %i, ptr %dst) #0 { -; CHECK-LABEL: f6: +define void @f7(i64 %i, ptr %dst) #0 { +; CHECK-LABEL: f7: ; CHECK: cxlgbr %f0, 0, %r2, 0 ; CHECK-DAG: std %f0, 0(%r3) ; CHECK-DAG: std %f2, 8(%r3) diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-14.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-14.ll index aa82a1d91c4a8..1d3387f610d72 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-14.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-14.ll @@ -2,14 +2,27 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) +; Test f16->i32. +define i32 @f0(half %f) #0 { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: clfebr %r2, 5, %f0, 0 +; CHECK: br %r14 + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %f, + metadata !"fpexcept.strict") #0 + ret i32 %conv +} + ; Test f32->i32. define i32 @f1(float %f) #0 { ; CHECK-LABEL: f1: @@ -43,9 +56,20 @@ define i32 @f3(ptr %src) #0 { ret i32 %conv } -; Test f32->i64. -define i64 @f4(float %f) #0 { +; Test f16->i64. +define i64 @f4(half %f) #0 { ; CHECK-LABEL: f4: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: clgebr %r2, 5, %f0, 0 +; CHECK: br %r14 + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %f, + metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +; Test f32->i64. +define i64 @f5(float %f) #0 { +; CHECK-LABEL: f5: ; CHECK: clgebr %r2, 5, %f0, 0 ; CHECK: br %r14 %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %f, @@ -54,8 +78,8 @@ define i64 @f4(float %f) #0 { } ; Test f64->i64. -define i64 @f5(double %f) #0 { -; CHECK-LABEL: f5: +define i64 @f6(double %f) #0 { +; CHECK-LABEL: f6: ; CHECK: clgdbr %r2, 5, %f0, 0 ; CHECK: br %r14 %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %f, @@ -64,8 +88,8 @@ define i64 @f5(double %f) #0 { } ; Test f128->i64. -define i64 @f6(ptr %src) #0 { -; CHECK-LABEL: f6: +define i64 @f7(ptr %src) #0 { +; CHECK-LABEL: f7: ; CHECK-DAG: ld %f0, 0(%r2) ; CHECK-DAG: ld %f2, 8(%r2) ; CHECK: clgxbr %r2, 5, %f0, 0 diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-15.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-15.ll index de93192b5f305..a53a3537a7390 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-15.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-15.ll @@ -2,9 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +declare half @llvm.experimental.constrained.fptrunc.f16.f128(fp128, metadata, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata) declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.fpext.f128.f16(half, metadata) declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata) declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata) @@ -22,6 +24,21 @@ define double @f1(ptr %ptr) #0 { ret double %res } +; Test f128->f16. +define half @f2_half(ptr %ptr) #0 { +; CHECK-LABEL: f2_half: +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: vst %v0, 160(%r15), 3 +; CHECK: brasl %r14, __trunctfhf2@PLT +; CHECK: br %r14 + %val = load fp128, ptr %ptr + %res = call half @llvm.experimental.constrained.fptrunc.f16.f128( + fp128 %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test f128->f32. define float @f2(ptr %ptr) #0 { ; CHECK-LABEL: f2: @@ -62,4 +79,15 @@ define void @f4(ptr %dst, float %val) #0 { ret void } +; Test f16->f128. +define void @f5(ptr %dst, half %val) #0 { +; CHECK-LABEL: f5: +; CHECK: brasl %r14, __extendhftf2@PLT +; CHECK: br %r14 + %res = call fp128 @llvm.experimental.constrained.fpext.f128.f16(half %val, + metadata !"fpexcept.strict") #0 + store fp128 %res, ptr %dst + ret void +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-17.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-17.ll index 3ff63242a6d82..c9863af760688 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-17.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-17.ll @@ -6,18 +6,22 @@ declare fp128 @llvm.experimental.constrained.sitofp.f128.i128(i128, metadata, metadata) declare double @llvm.experimental.constrained.sitofp.f64.i128(i128, metadata, metadata) declare float @llvm.experimental.constrained.sitofp.f32.i128(i128, metadata, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i128(i128, metadata, metadata) declare fp128 @llvm.experimental.constrained.uitofp.f128.i128(i128, metadata, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i128(i128, metadata, metadata) declare float @llvm.experimental.constrained.uitofp.f32.i128(i128, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i128(i128, metadata, metadata) declare i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128, metadata) declare i128 @llvm.experimental.constrained.fptosi.i128.f64(double, metadata) declare i128 @llvm.experimental.constrained.fptosi.i128.f32(float, metadata) +declare i128 @llvm.experimental.constrained.fptosi.i128.f16(half, metadata) declare i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128, metadata) declare i128 @llvm.experimental.constrained.fptoui.i128.f64(double, metadata) declare i128 @llvm.experimental.constrained.fptoui.i128.f32(float, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.f16(half, metadata) ; Test signed i128->f128. define fp128 @f1(i128 %i) #0 { @@ -52,9 +56,20 @@ define float @f3(i128 %i) #0 { ret float %conv } -; Test unsigned i128->f128. -define fp128 @f4(i128 %i) #0 { +; Test signed i128->f16. +define half @f4(i128 %i) #0 { ; CHECK-LABEL: f4: +; CHECK: brasl %r14, __floattihf@PLT +; CHECK: br %r14 + %conv = call half @llvm.experimental.constrained.sitofp.f16.i128(i128 %i, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %conv +} + +; Test unsigned i128->f128. +define fp128 @f5(i128 %i) #0 { +; CHECK-LABEL: f5: ; CHECK: brasl %r14, __floatuntitf@PLT ; CHECK: br %r14 %conv = call fp128 @llvm.experimental.constrained.uitofp.f128.i128(i128 %i, @@ -64,8 +79,8 @@ define fp128 @f4(i128 %i) #0 { } ; Test unsigned i128->f64. -define double @f5(i128 %i) #0 { -; CHECK-LABEL: f5: +define double @f6(i128 %i) #0 { +; CHECK-LABEL: f6: ; CHECK: brasl %r14, __floatuntidf@PLT ; CHECK: br %r14 %conv = call double @llvm.experimental.constrained.uitofp.f64.i128(i128 %i, @@ -75,8 +90,8 @@ define double @f5(i128 %i) #0 { } ; Test unsigned i128->f32. -define float @f6(i128 %i) #0 { -; CHECK-LABEL: f6: +define float @f7(i128 %i) #0 { +; CHECK-LABEL: f7: ; CHECK: brasl %r14, __floatuntisf@PLT ; CHECK: br %r14 %conv = call float @llvm.experimental.constrained.uitofp.f32.i128(i128 %i, @@ -85,9 +100,20 @@ define float @f6(i128 %i) #0 { ret float %conv } +; Test unsigned i128->f16. +define half @f8(i128 %i) #0 { +; CHECK-LABEL: f8: +; CHECK: brasl %r14, __floatuntihf@PLT +; CHECK: br %r14 + %conv = call half @llvm.experimental.constrained.uitofp.f16.i128(i128 %i, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %conv +} + ; Test signed f128->i128. -define i128 @f7(fp128 %f) #0 { -; CHECK-LABEL: f7: +define i128 @f9(fp128 %f) #0 { +; CHECK-LABEL: f9: ; CHECK: brasl %r14, __fixtfti@PLT ; CHECK: br %r14 %conv = call i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128 %f, @@ -96,8 +122,8 @@ define i128 @f7(fp128 %f) #0 { } ; Test signed f64->i128. -define i128 @f8(double %f) #0 { -; CHECK-LABEL: f8: +define i128 @f10(double %f) #0 { +; CHECK-LABEL: f10: ; CHECK: brasl %r14, __fixdfti@PLT ; CHECK: br %r14 %conv = call i128 @llvm.experimental.constrained.fptosi.i128.f64(double %f, @@ -105,9 +131,9 @@ define i128 @f8(double %f) #0 { ret i128 %conv } -; Test signed f9->i128. -define i128 @f9(float %f) #0 { -; CHECK-LABEL: f9: +; Test signed f32->i128. +define i128 @f11(float %f) #0 { +; CHECK-LABEL: f11: ; CHECK: brasl %r14, __fixsfti@PLT ; CHECK: br %r14 %conv = call i128 @llvm.experimental.constrained.fptosi.i128.f32(float %f, @@ -115,9 +141,19 @@ define i128 @f9(float %f) #0 { ret i128 %conv } +; Test signed f16->i128. +define i128 @f12(half %f) #0 { +; CHECK-LABEL: f12: +; CHECK: brasl %r14, __fixhfti@PLT +; CHECK: br %r14 + %conv = call i128 @llvm.experimental.constrained.fptosi.i128.f16(half %f, + metadata !"fpexcept.strict") #0 + ret i128 %conv +} + ; Test unsigned f128->i128. -define i128 @f10(fp128 %f) #0 { -; CHECK-LABEL: f10: +define i128 @f13(fp128 %f) #0 { +; CHECK-LABEL: f13: ; CHECK: brasl %r14, __fixunstfti@PLT ; CHECK: br %r14 %conv = call i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128 %f, @@ -126,8 +162,8 @@ define i128 @f10(fp128 %f) #0 { } ; Test unsigned f64->i128. -define i128 @f11(double %f) #0 { -; CHECK-LABEL: f11: +define i128 @f14(double %f) #0 { +; CHECK-LABEL: f14: ; CHECK: brasl %r14, __fixunsdfti@PLT ; CHECK: br %r14 %conv = call i128 @llvm.experimental.constrained.fptoui.i128.f64(double %f, @@ -136,8 +172,8 @@ define i128 @f11(double %f) #0 { } ; Test unsigned f32->i128. -define i128 @f12(float %f) #0 { -; CHECK-LABEL: f12: +define i128 @f15(float %f) #0 { +; CHECK-LABEL: f15: ; CHECK: brasl %r14, __fixunssfti@PLT ; CHECK: br %r14 %conv = call i128 @llvm.experimental.constrained.fptoui.i128.f32(float %f, @@ -145,4 +181,14 @@ define i128 @f12(float %f) #0 { ret i128 %conv } +; Test unsigned f16->i128. +define i128 @f16(half %f) #0 { +; CHECK-LABEL: f16: +; CHECK: brasl %r14, __fixunshfti@PLT +; CHECK: br %r14 + %conv = call i128 @llvm.experimental.constrained.fptoui.i128.f16(half %f, + metadata !"fpexcept.strict") #0 + ret i128 %conv +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-div-01.ll b/llvm/test/CodeGen/SystemZ/fp-strict-div-01.ll index 980df79481936..8b9dbbe9c9e6e 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-div-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-div-01.ll @@ -5,8 +5,24 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @foo() +declare half @llvm.experimental.constrained.fdiv.f16(half, half, metadata, metadata) declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +; Check register division. +define half @f0(half %f1, half %f2) #0 { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: debr %f0, %f9 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.fdiv.f16( + half %f1, half %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Check register division. define float @f1(float %f1, float %f2) #0 { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-mul-06.ll b/llvm/test/CodeGen/SystemZ/fp-strict-mul-06.ll index 4971375789407..c951c79aeb7c6 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-mul-06.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-mul-06.ll @@ -3,8 +3,26 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s +declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata) declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) +define half @f0(half %f1, half %f2, half %acc) #0 { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-SCALAR: maebr %f10, %f0, %f8 +; CHECK-SCALAR: ler %f0, %f10 +; CHECK-VECTOR: wfmasb %f0, %f0, %f8, %f10 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.fma.f16 ( + half %f1, half %f2, half %acc, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %res +} + define float @f1(float %f1, float %f2, float %acc) #0 { ; CHECK-LABEL: f1: ; CHECK-SCALAR: maebr %f4, %f0, %f2 diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll b/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll index 964f16d605db6..95a5fa1af832b 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll @@ -43,6 +43,21 @@ define void @f3(ptr %ptr) #0 { ret void } +; Test nearbyint for f16. +declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata) +define half @f4_half(half %f) #0 { +; CHECK-LABEL: f4_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, nearbyintf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.nearbyint.f16( + half %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test nearbyint for f32. declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) define float @f4(float %f) #0 { @@ -84,6 +99,20 @@ define void @f6(ptr %ptr) #0 { ret void } +; Test floor for f16. +declare half @llvm.experimental.constrained.floor.f16(half, metadata) +define half @f7_half(half %f) #0 { +; CHECK-LABEL: f7_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, floorf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.floor.f16( + half %f, + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test floor for f32. declare float @llvm.experimental.constrained.floor.f32(float, metadata) define float @f7(float %f) #0 { @@ -122,6 +151,20 @@ define void @f9(ptr %ptr) #0 { ret void } +; Test ceil for f16. +declare half @llvm.experimental.constrained.ceil.f16(half, metadata) +define half @f10_half(half %f) #0 { +; CHECK-LABEL: f10_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, ceilf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.ceil.f16( + half %f, + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test ceil for f32. declare float @llvm.experimental.constrained.ceil.f32(float, metadata) define float @f10(float %f) #0 { @@ -160,6 +203,20 @@ define void @f12(ptr %ptr) #0 { ret void } +; Test trunc for f16. +declare half @llvm.experimental.constrained.trunc.f16(half, metadata) +define half @f13_half(half %f) #0 { +; CHECK-LABEL: f13_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, truncf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.trunc.f16( + half %f, + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test trunc for f32. declare float @llvm.experimental.constrained.trunc.f32(float, metadata) define float @f13(float %f) #0 { @@ -198,6 +255,20 @@ define void @f15(ptr %ptr) #0 { ret void } +; Test round for f16. +declare half @llvm.experimental.constrained.round.f16(half, metadata) +define half @f16_half(half %f) #0 { +; CHECK-LABEL: f16_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, roundf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.round.f16( + half %f, + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test round for f32. declare float @llvm.experimental.constrained.round.f32(float, metadata) define float @f16(float %f) #0 { diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-round-02.ll b/llvm/test/CodeGen/SystemZ/fp-strict-round-02.ll index c7b721e3770e5..bdfd9adf2b400 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-round-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-round-02.ll @@ -164,6 +164,20 @@ define void @f12(ptr %ptr) #0 { ret void } +; Test trunc for f16. +declare half @llvm.experimental.constrained.trunc.f16(half, metadata) +define half @f13_half(half %f) #0 { +; CHECK-LABEL: f13_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: fiebra %f0, 5, %f0, 4 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.trunc.f16( + half %f, + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test trunc for f32. declare float @llvm.experimental.constrained.trunc.f32(float, metadata) define float @f13(float %f) #0 { @@ -202,6 +216,20 @@ define void @f15(ptr %ptr) #0 { ret void } +; Test round for f16. +declare half @llvm.experimental.constrained.round.f16(half, metadata) +define half @f16_half(half %f) #0 { +; CHECK-LABEL: f16_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: fiebra %f0, 1, %f0, 4 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.round.f16( + half %f, + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test round for f32. declare float @llvm.experimental.constrained.round.f32(float, metadata) define float @f16(float %f) #0 { @@ -240,6 +268,20 @@ define void @f18(ptr %ptr) #0 { ret void } +; Test roundeven for f16. +declare half @llvm.experimental.constrained.roundeven.f16(half, metadata) +define half @f19_half(half %f) #0 { +; CHECK-LABEL: f19_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: fiebra %f0, 4, %f0, 4 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.roundeven.f16( + half %f, + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test roundeven for f32. declare float @llvm.experimental.constrained.roundeven.f32(float, metadata) define float @f19(float %f) #0 { diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-round-03.ll b/llvm/test/CodeGen/SystemZ/fp-strict-round-03.ll index e99d8b0f01650..5d9ee28ae8ea2 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-round-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-round-03.ll @@ -2,6 +2,21 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; Test rint for f16. +declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata) +define half @f0(half %f) #0 { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: fiebra %f0, 0, %f0, 0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.rint.f16( + half %f, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Test rint for f32. declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) define float @f1(float %f) #0 { diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-sqrt-01.ll b/llvm/test/CodeGen/SystemZ/fp-strict-sqrt-01.ll index 88cdb71ff7d12..2db86d2de7f66 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-sqrt-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-sqrt-01.ll @@ -6,8 +6,24 @@ ; Test strict 32-bit square root. ; +declare half @llvm.experimental.constrained.sqrt.f16(half, metadata, metadata) declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) +; Check register square root. +define half @f0(half %val) #0 { +; CHECK-LABEL: f0: +; CHECK: # %bb.0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: sqebr %f0, %f0 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.sqrt.f16( + half %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Check register square root. define float @f1(float %val) #0 { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-sub-01.ll b/llvm/test/CodeGen/SystemZ/fp-strict-sub-01.ll index a677d471397f7..da91b6e69fd5f 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-sub-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-sub-01.ll @@ -5,8 +5,24 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @foo() +declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata) declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +; Check register subtraction. +define half @f0(half %f1, half %f2) #0 { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: sebr %f0, %f9 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.experimental.constrained.fsub.f16( + half %f1, half %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret half %res +} + ; Check register subtraction. define float @f1(float %f1, float %f2) #0 { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/fp-sub-01.ll b/llvm/test/CodeGen/SystemZ/fp-sub-01.ll index e875fa3be735b..7359f10f92852 100644 --- a/llvm/test/CodeGen/SystemZ/fp-sub-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-sub-01.ll @@ -6,6 +6,18 @@ declare float @foo() +; Check register subtraction. +define half @f0(half %f1, half %f2) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: sebr %f0, %f9 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = fsub half %f1, %f2 + ret half %res +} + ; Check register subtraction. define float @f1(float %f1, float %f2) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll index 44175f924f7fc..f3ae0c3029c73 100644 --- a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll +++ b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll @@ -3,6 +3,26 @@ ; ; Test inline assembly where the operand is bitcasted. +define signext i16 @short_and_f(i16 signext %cc_dep1) { +; CHECK-LABEL: short_and_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll %r2, 16 +; CHECK-NEXT: risbhg %r0, %r2, 0, 159, 32 +; CHECK-NEXT: ldgr %f1, %r0 +; CHECK-NEXT: # kill: def $f1h killed $f1h killed $f1d +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: # kill: def $f1h killed $f1h def $f1d +; CHECK-NEXT: lgdr %r0, %f1 +; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 +; CHECK-NEXT: srl %r0, 16 +; CHECK-NEXT: lghr %r2, %r0 +; CHECK-NEXT: br %r14 +entry: + %0 = tail call i16 asm sideeffect "", "={f1},0"(i16 %cc_dep1) + ret i16 %0 +} + define signext i32 @int_and_f(i32 signext %cc_dep1) { ; CHECK-LABEL: int_and_f: ; CHECK: # %bb.0: # %entry @@ -51,6 +71,25 @@ entry: ret void } +define half @half_and_r(half %cc_dep1) { +; CHECK-LABEL: half_and_r: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d +; CHECK-NEXT: lgdr %r0, %f0 +; CHECK-NEXT: risblg %r2, %r0, 0, 159, 32 +; CHECK-NEXT: srl %r2, 16 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: sll %r2, 16 +; CHECK-NEXT: risbhg %r0, %r2, 0, 159, 32 +; CHECK-NEXT: ldgr %f0, %r0 +; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d +; CHECK-NEXT: br %r14 +entry: + %0 = tail call half asm sideeffect "", "={r2},0"(half %cc_dep1) + ret half %0 +} + define float @float_and_r(float %cc_dep1) { ; CHECK-LABEL: float_and_r: ; CHECK: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs.ll b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs.ll index 3cbf3d21dec5a..cf4dbbff8bec0 100644 --- a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs.ll +++ b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs.ll @@ -4,6 +4,20 @@ ; ; Test inline assembly where the operand is bitcasted. +define signext i16 @short_and_f(i16 signext %cc_dep1) { +; CHECK-LABEL: short_and_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vlvgh %v0, %r2, 0 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vlgvh %r0, %v0, 0 +; CHECK-NEXT: lghr %r2, %r0 +; CHECK-NEXT: br %r14 +entry: + %0 = tail call i16 asm sideeffect "", "={f0},0"(i16 %cc_dep1) + ret i16 %0 +} + define signext i32 @int_and_f(i32 signext %cc_dep1) { ; CHECK-LABEL: int_and_f: ; CHECK: # %bb.0: # %entry @@ -101,6 +115,19 @@ entry: ret void } +define half @half_and_r(half %cc_dep1) { +; CHECK-LABEL: half_and_r: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vlgvh %r0, %v0, 0 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vlvgh %v0, %r0, 0 +; CHECK-NEXT: br %r14 +entry: + %0 = tail call half asm sideeffect "", "={r0},0"(half %cc_dep1) + ret half %0 +} + define float @float_and_r(float %cc_dep1) { ; CHECK-LABEL: float_and_r: ; CHECK: # %bb.0: # %entry @@ -145,6 +172,19 @@ entry: ret void } +define half @half_and_v(half %cc_dep1) { +; CHECK-LABEL: half_and_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ldr %f3, %f0 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ldr %f0, %f3 +; CHECK-NEXT: br %r14 +entry: + %0 = tail call half asm sideeffect "", "={v3},0"(half %cc_dep1) + ret half %0 +} + define float @float_and_v(float %cc_dep1) { ; CHECK-LABEL: float_and_v: ; CHECK: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll index 1ef6eece80acb..36140073a41b9 100644 --- a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll +++ b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll @@ -3,6 +3,24 @@ ; ; Test inline assembly where the operand is bitcasted. +define signext i16 @short_and_f(i16 signext %cc_dep1) { +; CHECK-LABEL: short_and_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll %r2, 16 +; CHECK-NEXT: risbhg %r0, %r2, 0, 159, 32 +; CHECK-NEXT: ldgr %f0, %r0 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lgdr %r0, %f0 +; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 +; CHECK-NEXT: srl %r0, 16 +; CHECK-NEXT: lghr %r2, %r0 +; CHECK-NEXT: br %r14 +entry: + %0 = tail call i16 asm sideeffect "", "=f,0"(i16 %cc_dep1) + ret i16 %0 +} + define signext i32 @int_and_f(i32 signext %cc_dep1) { ; CHECK-LABEL: int_and_f: ; CHECK: # %bb.0: # %entry @@ -49,6 +67,25 @@ entry: ret void } +define half @half_and_r(half %cc_dep1) { +; CHECK-LABEL: half_and_r: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d +; CHECK-NEXT: lgdr %r0, %f0 +; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 +; CHECK-NEXT: srl %r0, 16 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: sll %r0, 16 +; CHECK-NEXT: risbhg %r0, %r0, 0, 159, 32 +; CHECK-NEXT: ldgr %f0, %r0 +; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d +; CHECK-NEXT: br %r14 +entry: + %0 = tail call half asm sideeffect "", "=r,0"(half %cc_dep1) + ret half %0 +} + define float @float_and_r(float %cc_dep1) { ; CHECK-LABEL: float_and_r: ; CHECK: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting.ll b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting.ll index 23d78a9315b40..b23b40e0f0e90 100644 --- a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting.ll +++ b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting.ll @@ -4,6 +4,20 @@ ; ; Test inline assembly where the operand is bitcasted. +define signext i16 @short_and_f(i16 signext %cc_dep1) { +; CHECK-LABEL: short_and_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vlvgh %v0, %r2, 0 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vlgvh %r0, %v0, 0 +; CHECK-NEXT: lghr %r2, %r0 +; CHECK-NEXT: br %r14 +entry: + %0 = tail call i16 asm sideeffect "", "=f,0"(i16 %cc_dep1) + ret i16 %0 +} + define signext i32 @int_and_f(i32 signext %cc_dep1) { ; CHECK-LABEL: int_and_f: ; CHECK: # %bb.0: # %entry @@ -58,6 +72,20 @@ entry: ret void } +define signext i16 @short_and_v(i16 signext %cc_dep1) { +; CHECK-LABEL: short_and_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vlvgh %v0, %r2, 0 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vlgvh %r0, %v0, 0 +; CHECK-NEXT: lghr %r2, %r0 +; CHECK-NEXT: br %r14 +entry: + %0 = tail call i16 asm sideeffect "", "=v,0"(i16 %cc_dep1) + ret i16 %0 +} + define signext i32 @int_and_v(i32 signext %cc_dep1) { ; CHECK-LABEL: int_and_v: ; CHECK: # %bb.0: # %entry @@ -100,6 +128,19 @@ entry: ret void } +define half @half_and_r(half %cc_dep1) { +; CHECK-LABEL: half_and_r: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vlgvh %r0, %v0, 0 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vlvgh %v0, %r0, 0 +; CHECK-NEXT: br %r14 +entry: + %0 = tail call half asm sideeffect "", "=r,0"(half %cc_dep1) + ret half %0 +} + define float @float_and_r(float %cc_dep1) { ; CHECK-LABEL: float_and_r: ; CHECK: # %bb.0: # %entry @@ -143,6 +184,17 @@ entry: ret void } +define half @half_and_v(half %cc_dep1) { +; CHECK-LABEL: half_and_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: br %r14 +entry: + %0 = tail call half asm sideeffect "", "=v,0"(half %cc_dep1) + ret half %0 +} + define float @float_and_v(float %cc_dep1) { ; CHECK-LABEL: float_and_v: ; CHECK: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/SystemZ/is_fpclass.ll b/llvm/test/CodeGen/SystemZ/is_fpclass.ll index 7a02730047d20..98b856c5737ed 100644 --- a/llvm/test/CodeGen/SystemZ/is_fpclass.ll +++ b/llvm/test/CodeGen/SystemZ/is_fpclass.ll @@ -3,11 +3,30 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i1 @llvm.is.fpclass.f16(half, i32) declare i1 @llvm.is.fpclass.f32(float, i32) declare i1 @llvm.is.fpclass.f64(double, i32) declare i1 @llvm.is.fpclass.f128(fp128, i32) +define i1 @isnan_h(half %x) { +; CHECK-LABEL: isnan_h: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: tceb %f0, 15 +; CHECK-NEXT: ipm %r2 +; CHECK-NEXT: srl %r2, 28 +; CHECK-NEXT: lmg %r14, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan + ret i1 %1 +} + define i1 @isnan_f(float %x) { ; CHECK-LABEL: isnan_f: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/SystemZ/spill-half-01.mir b/llvm/test/CodeGen/SystemZ/spill-half-01.mir index 56f4ecbffd2c6..bc6947fdec8a6 100644 --- a/llvm/test/CodeGen/SystemZ/spill-half-01.mir +++ b/llvm/test/CodeGen/SystemZ/spill-half-01.mir @@ -3,45 +3,61 @@ # RUN: llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ # RUN: -start-before=greedy | FileCheck %s -check-prefix=VECTOR -# Test spilling / reloading of an fp16bit virtual register. +# Test spilling / reloading fp16bit virtual registers. --- name: fun0 -alignment: 16 tracksRegLiveness: true -registers: - - { id: 0, class: fp16bit } -liveins: - - { reg: '$f0h', virtual-reg: '%0' } -frameInfo: - maxAlignment: 1 -machineFunctionInfo: {} body: | bb.0: - liveins: $f0h + liveins: $f0h, $f2h, $f4h ; CHECK-LABEL: fun0: - ; CHECK-NOT: $f0 - ; CHECK: # kill: def $f0h killed $f0h killed $f0d def $f0d - ; CHECK-NEXT: lgdr %r0, %f0 - ; CHECK-NEXT: srlg %r0, %r0, 48 - ; CHECK-NEXT: sth %r0, 166(%r15) # 2-byte Folded Spill + ; CHECK: aghi %r15, -240 + ; CHECK: ste %f4, 172(%r15) # 4-byte Folded Spill + ; CHECK-NEXT: ste %f2, 164(%r15) # 4-byte Folded Spill + ; CHECK-NEXT: ste %f0, 168(%r15) # 4-byte Folded Spill ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP - ; CHECK: lh %r0, 166(%r15) # 2-byte Folded Reload - ; CHECK-NEXT: sllg %r0, %r0, 48 - ; CHECK-NEXT: ldgr %f0, %r0 - ; CHECK: # kill: def $f0h killed $f0h killed $f0d - ; CHECK-NOT: $f0 + ; CHECK-NEXT: le %f0, 164(%r15) # 4-byte Folded Reload + ; CHECK: le %f0, 168(%r15) # 4-byte Folded Reload + ; CHECK: le %f0, 172(%r15) # 4-byte Folded Reload ; VECTOR-LABEL: fun0: - ; VECTOR: vsteh %v0, 166(%r15), 0 # 2-byte Folded Spill - ; VECTOR-NEXT: #APP - ; VECTOR-NEXT: #NO_APP - ; VECTOR-NEXT: vlreph %v0, 166(%r15) # 2-byte Folded Reload - + ; VECTOR: aghi %r15, -232 + ; VECTOR: vsteh %v4, 166(%r15), 0 # 2-byte Folded Spil + ; VECTOR-NEXT: vsteh %v2, 162(%r15), 0 # 2-byte Folded Spil + ; VECTOR-NEXT: vsteh %v0, 164(%r15), 0 # 2-byte Folded Spil + ; VECTOR-NEXT: #APP + ; VECTOR-NEXT: #NO_APP + ; VECTOR-NEXT: vlreph %v0, 162(%r15) # 2-byte Folded Reload + ; VECTOR: vlreph %v0, 164(%r15) # 2-byte Folded Reload + ; VECTOR: vlreph %v0, 166(%r15) # 2-byte Folded Reload + + %2:fp16bit = COPY $f4h + %1:fp16bit = COPY $f2h %0:fp16bit = COPY $f0h - INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f0d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d + INLINEASM &"", 1, 12, implicit-def dead early-clobber $f0d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d + $f0h = COPY %1 + CallBRASL &__extendhfsf2, $f0h, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $f0s + %3:fp32bit = COPY $f0s $f0h = COPY %0 + CallBRASL &__extendhfsf2, $f0h, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $f0s + %5:fp32bit = COPY $f0s + %5:fp32bit = nofpexcept AEBR %5, %3, implicit-def dead $cc, implicit $fpc + $f0s = COPY %5 + CallBRASL &__truncsfhf2, $f0s, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $f0h + %6:fp16bit = COPY $f0h + $f0h = COPY %6 + CallBRASL &__extendhfsf2, $f0h, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $f0s + %7:fp32bit = COPY $f0s + $f0h = COPY %2 + CallBRASL &__extendhfsf2, $f0h, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $f0s + %9:fp32bit = COPY $f0s + %9:fp32bit = nofpexcept AEBR %9, %7, implicit-def dead $cc, implicit $fpc + $f0s = COPY %9 + CallBRASL &__truncsfhf2, $f0s, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $f0h + %10:fp16bit = COPY $f0h + $f0h = COPY %10 Return implicit $f0h ... diff --git a/llvm/test/CodeGen/SystemZ/spill-half-02.mir b/llvm/test/CodeGen/SystemZ/spill-half-02.mir index 4934d0b728115..9ee2228612f50 100644 --- a/llvm/test/CodeGen/SystemZ/spill-half-02.mir +++ b/llvm/test/CodeGen/SystemZ/spill-half-02.mir @@ -5,36 +5,23 @@ --- name: fun0 -alignment: 16 tracksRegLiveness: true -registers: - - { id: 0, class: addr64bit } - - { id: 1, class: addr64bit } - - { id: 2, class: vr16bit } -liveins: - - { reg: '$r2d', virtual-reg: '%0' } - - { reg: '$r3d', virtual-reg: '%1' } -frameInfo: - maxAlignment: 1 -machineFunctionInfo: {} body: | bb.0: liveins: $r2d, $r3d ; CHECK-LABEL: fun0: - ; CHECK: stg %r3, 168(%r15) # 8-byte Folded Spill - ; CHECK-NEXT: vlreph %v0, 0(%r2) + ; CHECK: vlreph %v0, 0(%r2) ; CHECK-NEXT: vsteh %v0, 166(%r15), 0 # 2-byte Folded Spill ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP - ; CHECK-NEXT: lg %r1, 168(%r15) # 8-byte Folded Reload ; CHECK-NEXT: vlreph %v0, 166(%r15) # 2-byte Folded Reload - ; CHECK-NEXT: vsteh %v0, 0(%r1), 0 + ; CHECK-NEXT: vsteh %v0, 0(%r3), 0 %1:addr64bit = COPY $r3d %0:addr64bit = COPY $r2d %2:vr16bit = VL16 %0, 0, $noreg - INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f0d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d, 12, implicit-def dead early-clobber $f16d, 12, implicit-def dead early-clobber $f17d, 12, implicit-def dead early-clobber $f18d, 12, implicit-def dead early-clobber $f19d, 12, implicit-def dead early-clobber $f20d, 12, implicit-def dead early-clobber $f21d, 12, implicit-def dead early-clobber $f22d, 12, implicit-def dead early-clobber $f23d, 12, implicit-def dead early-clobber $f24d, 12, implicit-def dead early-clobber $f25d, 12, implicit-def dead early-clobber $f26d, 12, implicit-def dead early-clobber $f27d, 12, implicit-def dead early-clobber $f28d, 12, implicit-def dead early-clobber $f29d, 12, implicit-def dead early-clobber $f30d, 12, implicit-def dead early-clobber $f31d + INLINEASM &"", 1, 12, implicit-def dead early-clobber $f0d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d, 12, implicit-def dead early-clobber $f16d, 12, implicit-def dead early-clobber $f17d, 12, implicit-def dead early-clobber $f18d, 12, implicit-def dead early-clobber $f19d, 12, implicit-def dead early-clobber $f20d, 12, implicit-def dead early-clobber $f21d, 12, implicit-def dead early-clobber $f22d, 12, implicit-def dead early-clobber $f23d, 12, implicit-def dead early-clobber $f24d, 12, implicit-def dead early-clobber $f25d, 12, implicit-def dead early-clobber $f26d, 12, implicit-def dead early-clobber $f27d, 12, implicit-def dead early-clobber $f28d, 12, implicit-def dead early-clobber $f29d, 12, implicit-def dead early-clobber $f30d, 12, implicit-def dead early-clobber $f31d VST16 %2, %1, 0, $noreg Return ... diff --git a/llvm/test/CodeGen/SystemZ/stackmap.ll b/llvm/test/CodeGen/SystemZ/stackmap.ll index 6156b7f2fc5a1..05b8de756c032 100644 --- a/llvm/test/CodeGen/SystemZ/stackmap.ll +++ b/llvm/test/CodeGen/SystemZ/stackmap.ll @@ -553,7 +553,14 @@ declare void @escape_values(...) ; CHECK-LABEL: .long .L{{.*}}-floats ; CHECK-NEXT: .short 0 ; Num Locations -; CHECK-NEXT: .short 6 +; CHECK-NEXT: .short 9 +; Loc 0: constant half stored to FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 4 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 16 ; Loc 0: constant float stored to FP register ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 @@ -568,6 +575,13 @@ declare void @escape_values(...) ; CHECK-NEXT: .short {{.*}} ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 +; Loc 1: half value in FP register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 4 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 16 ; Loc 1: float value in FP register ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 @@ -582,6 +596,13 @@ declare void @escape_values(...) ; CHECK-NEXT: .short {{.*}} ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 +; Loc 3: half on stack +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short {{.*}} +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long {{.*}} ; Loc 3: float on stack ; CHECK-NEXT: .byte 2 ; CHECK-NEXT: .byte 0 @@ -596,11 +617,12 @@ declare void @escape_values(...) ; CHECK-NEXT: .short {{.*}} ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long {{.*}} -define void @floats(float %f, double %g) { +define void @floats(half %e, float %f, double %g) { + %hh = alloca half %ff = alloca float %gg = alloca double - call void (i64, i32, ...) @llvm.experimental.stackmap(i64 888, i32 0, float 1.25, - double 1.5, float %f, double %g, ptr %ff, ptr %gg) + call void (i64, i32, ...) @llvm.experimental.stackmap(i64 888, i32 0, half 1.125, + float 1.25, double 1.5, half %e, float %f, double %g, ptr %hh, ptr %ff, ptr %gg) ret void } diff --git a/llvm/test/CodeGen/SystemZ/tdc-01.ll b/llvm/test/CodeGen/SystemZ/tdc-01.ll index 052d895b798f6..a0c090f463a2c 100644 --- a/llvm/test/CodeGen/SystemZ/tdc-01.ll +++ b/llvm/test/CodeGen/SystemZ/tdc-01.ll @@ -2,10 +2,22 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i32 @llvm.s390.tdc.f16(half, i64) declare i32 @llvm.s390.tdc.f32(float, i64) declare i32 @llvm.s390.tdc.f64(double, i64) declare i32 @llvm.s390.tdc.f128(fp128, i64) +; Check using as i32 - f16 +define i32 @f0(half %x) { +; CHECK-LABEL: f0 +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: tceb %f0, 123 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 + %res = call i32 @llvm.s390.tdc.f16(half %x, i64 123) + ret i32 %res +} + ; Check using as i32 - f32 define i32 @f1(float %x) { ; CHECK-LABEL: f1 diff --git a/llvm/test/CodeGen/SystemZ/tdc-02.ll b/llvm/test/CodeGen/SystemZ/tdc-02.ll index c0c4ac84349e3..ceb397c6cb9cb 100644 --- a/llvm/test/CodeGen/SystemZ/tdc-02.ll +++ b/llvm/test/CodeGen/SystemZ/tdc-02.ll @@ -2,10 +2,27 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare i32 @llvm.s390.tdc.f16(half, i64) declare i32 @llvm.s390.tdc.f32(float, i64) declare i32 @llvm.s390.tdc.f64(double, i64) declare i32 @llvm.s390.tdc.f128(fp128, i64) +; Check using or i1 +define i32 @f0(half %x) { +; CHECK-LABEL: f0 +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: tceb %f0, 7 +; CHECK-NEXT: ipm [[REG1:%r[0-9]+]] +; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36 + %a = call i32 @llvm.s390.tdc.f16(half %x, i64 3) + %b = call i32 @llvm.s390.tdc.f16(half %x, i64 6) + %a1 = icmp ne i32 %a, 0 + %b1 = icmp ne i32 %b, 0 + %res = or i1 %a1, %b1 + %xres = zext i1 %res to i32 + ret i32 %xres +} + ; Check using or i1 define i32 @f1(float %x) { ; CHECK-LABEL: f1 diff --git a/llvm/test/CodeGen/SystemZ/tdc-03.ll b/llvm/test/CodeGen/SystemZ/tdc-03.ll index 95708f1effc6b..b6c12caef72fd 100644 --- a/llvm/test/CodeGen/SystemZ/tdc-03.ll +++ b/llvm/test/CodeGen/SystemZ/tdc-03.ll @@ -3,10 +3,23 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +declare half @llvm.fabs.f16(half) declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) declare fp128 @llvm.fabs.f128(fp128) +; Compare with 0 (unworthy) +define i32 @f0(half %x) { +; CHECK-LABEL: f0 +; CHECK-NOT: tceb +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: ltebr {{%f[0-9]+}}, %f0 +; CHECK-NOT: tceb + %res = fcmp ugt half %x, 0.0 + %xres = zext i1 %res to i32 + ret i32 %xres +} + ; Compare with 0 (unworthy) define i32 @f1(float %x) { ; CHECK-LABEL: f1 @@ -41,9 +54,20 @@ define i32 @f3(float %x) { ret i32 %xres } +; Compare fabs with inf +define i32 @f4_half(half %x) { +; CHECK-LABEL: f4_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: tceb %f0, 4047 + %y = call half @llvm.fabs.f16(half %x) + %res = fcmp ult half %y, 0x7ff0000000000000 + %xres = zext i1 %res to i32 + ret i32 %xres +} + ; Compare fabs with inf define i32 @f4(float %x) { -; CHECK-LABEL: f4 +; CHECK-LABEL: f4: ; CHECK: tceb %f0, 4047 %y = call float @llvm.fabs.f32(float %x) %res = fcmp ult float %y, 0x7ff0000000000000 diff --git a/llvm/test/CodeGen/SystemZ/tdc-04.ll b/llvm/test/CodeGen/SystemZ/tdc-04.ll index 8cc78f3de7522..bc719640e630d 100644 --- a/llvm/test/CodeGen/SystemZ/tdc-04.ll +++ b/llvm/test/CodeGen/SystemZ/tdc-04.ll @@ -1,10 +1,24 @@ ; Test the Test Data Class instruction logic operation conversion from ; signbit extraction. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefixes=CHECK,Z10 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s \ +; RUN: --check-prefixes=CHECK,Z13 ; +; Extract sign bit. +define i32 @f0(half %x) { +; CHECK-LABEL: f0 +; Z10: lgdr %r0, %f0 +; Z10: srlg %r2, %r0, 63 +; Z13: vlgvh %r0, %v0, 0 +; Z13: risblg %r2, %r0, 31, 159, 49 + %cast = bitcast half %x to i16 + %res = icmp slt i16 %cast, 0 + %xres = zext i1 %res to i32 + ret i32 %xres +} + ; Extract sign bit. define i32 @f1(float %x) { ; CHECK-LABEL: f1 diff --git a/llvm/test/CodeGen/SystemZ/tdc-05.ll b/llvm/test/CodeGen/SystemZ/tdc-05.ll index c639a9b7b4757..30f875c404258 100644 --- a/llvm/test/CodeGen/SystemZ/tdc-05.ll +++ b/llvm/test/CodeGen/SystemZ/tdc-05.ll @@ -8,6 +8,30 @@ declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) declare fp128 @llvm.fabs.f128(fp128) +; Compare with 0, extract sign bit +define i32 @f0(half %x) { +; CHECK-LABEL: f0 +; CHECK: lgdr %r0, %f0 +; CHECK-NEXT: srag %r0, %r0, 48 +; CHECK-NEXT: chi %r0, 0 +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: risbg %r13, %r0, 63, 191, 36 +; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: rosbg %r13, %r0, 63, 63, 35 +; CHECK-NEXT: lr %r2, %r13 +; CHECK-NEXT: lmg %r13, %r15, 264(%r15) +; CHECK-NEXT: br %r14 + %cast = bitcast half %x to i16 + %sign = icmp slt i16 %cast, 0 + %fcmp = fcmp ugt half %x, 0.0 + %res = or i1 %sign, %fcmp + %xres = zext i1 %res to i32 + ret i32 %xres +} + ; Compare with 0, extract sign bit define i32 @f1(float %x) { ; CHECK-LABEL: f1 diff --git a/llvm/test/CodeGen/SystemZ/tdc-06.ll b/llvm/test/CodeGen/SystemZ/tdc-06.ll index 4ebf020c973da..19536b26eb5ae 100644 --- a/llvm/test/CodeGen/SystemZ/tdc-06.ll +++ b/llvm/test/CodeGen/SystemZ/tdc-06.ll @@ -3,9 +3,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; -declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) -declare fp128 @llvm.fabs.f128(fp128) define i32 @fpc(double %x) { entry: diff --git a/llvm/test/CodeGen/SystemZ/vec-max-05.ll b/llvm/test/CodeGen/SystemZ/vec-max-05.ll index 7bdf4e06029d2..6815bad060e39 100644 --- a/llvm/test/CodeGen/SystemZ/vec-max-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-max-05.ll @@ -14,6 +14,9 @@ declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) declare float @llvm.maximum.f32(float, float) declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) +declare half @fmaxh(half, half) +declare half @llvm.maxnum.f16(half, half) + declare fp128 @fmaxl(fp128, fp128) declare fp128 @llvm.maxnum.f128(fp128, fp128) declare fp128 @llvm.maximum.f128(fp128, fp128) @@ -87,6 +90,15 @@ define <2 x double> @f7(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } +; Test the fmaxh library function. +define half @f11_half(half %dummy, half %val1, half %val2) { +; CHECK-LABEL: f11_half: +; CHECK: brasl %r14, fmaxh@PLT +; CHECK: br %r14 + %ret = call half @fmaxh(half %val1, half %val2) readnone + ret half %ret +} + ; Test the fmaxf library function. define float @f11(float %dummy, float %val1, float %val2) { ; CHECK-LABEL: f11: @@ -96,6 +108,18 @@ define float @f11(float %dummy, float %val1, float %val2) { ret float %ret } +; Test the f16 maxnum intrinsic. +define half @f12_half(half %dummy, half %val1, half %val2) { +; CHECK-LABEL: f12_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: wfmaxsb %f0, %f0, %f9, 4 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %ret = call half @llvm.maxnum.f16(half %val1, half %val2) + ret half %ret +} + ; Test the f32 maxnum intrinsic. define float @f12(float %dummy, float %val1, float %val2) { ; CHECK-LABEL: f12: diff --git a/llvm/test/CodeGen/SystemZ/vec-min-05.ll b/llvm/test/CodeGen/SystemZ/vec-min-05.ll index bf27eb3e56036..78ae80d89e30f 100644 --- a/llvm/test/CodeGen/SystemZ/vec-min-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-min-05.ll @@ -14,6 +14,9 @@ declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) declare float @llvm.minimum.f32(float, float) declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>) +declare half @fminh(half, half) +declare half @llvm.minnum.f16(half, half) + declare fp128 @fminl(fp128, fp128) declare fp128 @llvm.minnum.f128(fp128, fp128) declare fp128 @llvm.minimum.f128(fp128, fp128) @@ -87,6 +90,15 @@ define <2 x double> @f7(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } +; Test the fminh library function. +define half @f11_half(half %dummy, half %val1, half %val2) { +; CHECK-LABEL: f11_half: +; CHECK: %r14, fminh@PLT +; CHECK: br %r14 + %ret = call half @fminh(half %val1, half %val2) readnone + ret half %ret +} + ; Test the fminf library function. define float @f11(float %dummy, float %val1, float %val2) { ; CHECK-LABEL: f11: @@ -96,6 +108,18 @@ define float @f11(float %dummy, float %val1, float %val2) { ret float %ret } +; Test the f16 minnum intrinsic. +define half @f12_half(half %dummy, half %val1, half %val2) { +; CHECK-LABEL: f12_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: wfminsb %f0, %f0, %f9, 4 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %ret = call half @llvm.minnum.f16(half %val1, half %val2) + ret half %ret +} + ; Test the f32 minnum intrinsic. define float @f12(float %dummy, float %val1, float %val2) { ; CHECK-LABEL: f12: diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-max-01.ll b/llvm/test/CodeGen/SystemZ/vec-strict-max-01.ll index 66870a797a7a5..ff1875a731fbf 100644 --- a/llvm/test/CodeGen/SystemZ/vec-strict-max-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-strict-max-01.ll @@ -12,6 +12,9 @@ declare <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float>, <4 declare float @llvm.experimental.constrained.maximum.f32(float, float, metadata) declare <4 x float> @llvm.experimental.constrained.maximum.v4f32(<4 x float>, <4 x float>, metadata) +declare half @llvm.experimental.constrained.maxnum.f16(half, half, metadata) +declare half @llvm.experimental.constrained.maximum.f16(half, half, metadata) + declare fp128 @llvm.experimental.constrained.maxnum.f128(fp128, fp128, metadata) declare fp128 @llvm.experimental.constrained.maximum.f128(fp128, fp128, metadata) @@ -38,6 +41,20 @@ define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } +; Test the f16 maxnum intrinsic. +define half @f3_half(half %dummy, half %val1, half %val2) #0 { +; CHECK-LABEL: f3_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: wfmaxsb %f0, %f0, %f9, 4 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %ret = call half @llvm.experimental.constrained.maxnum.f16( + half %val1, half %val2, + metadata !"fpexcept.strict") #0 + ret half %ret +} + ; Test the f32 maxnum intrinsic. define float @f3(float %dummy, float %val1, float %val2) #0 { ; CHECK-LABEL: f3: @@ -101,6 +118,20 @@ define <2 x double> @f12(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } +; Test the f16 maximum intrinsic. +define half @f13_half(half %dummy, half %val1, half %val2) #0 { +; CHECK-LABEL: f13_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: wfmaxsb %f0, %f0, %f9, 1 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %ret = call half @llvm.experimental.constrained.maximum.f16( + half %val1, half %val2, + metadata !"fpexcept.strict") #0 + ret half %ret +} + ; Test the f32 maximum intrinsic. define float @f13(float %dummy, float %val1, float %val2) #0 { ; CHECK-LABEL: f13: diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-min-01.ll b/llvm/test/CodeGen/SystemZ/vec-strict-min-01.ll index cf5332ff4f1d1..ddbffd735f699 100644 --- a/llvm/test/CodeGen/SystemZ/vec-strict-min-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-strict-min-01.ll @@ -12,6 +12,9 @@ declare <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float>, <4 declare float @llvm.experimental.constrained.minimum.f32(float, float, metadata) declare <4 x float> @llvm.experimental.constrained.minimum.v4f32(<4 x float>, <4 x float>, metadata) +declare half @llvm.experimental.constrained.minnum.f16(half, half, metadata) +declare half @llvm.experimental.constrained.minimum.f16(half, half, metadata) + declare fp128 @llvm.experimental.constrained.minnum.f128(fp128, fp128, metadata) declare fp128 @llvm.experimental.constrained.minimum.f128(fp128, fp128, metadata) @@ -38,6 +41,20 @@ define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } +; Test the f16 minnum intrinsic. +define half @f3_half(half %dummy, half %val1, half %val2) #0 { +; CHECK-LABEL: f3_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: wfminsb %f0, %f0, %f9, 4 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %ret = call half @llvm.experimental.constrained.minnum.f16( + half %val1, half %val2, + metadata !"fpexcept.strict") #0 + ret half %ret +} + ; Test the f32 minnum intrinsic. define float @f3(float %dummy, float %val1, float %val2) #0 { ; CHECK-LABEL: f3: @@ -101,6 +118,20 @@ define <2 x double> @f12(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } +; Test the f32 minimum intrinsic. +define half @f13_half(half %dummy, half %val1, half %val2) #0 { +; CHECK-LABEL: f13_half: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: wfminsb %f0, %f0, %f9, 1 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %ret = call half @llvm.experimental.constrained.minimum.f16( + half %val1, half %val2, + metadata !"fpexcept.strict") #0 + ret half %ret +} + ; Test the f32 minimum intrinsic. define float @f13(float %dummy, float %val1, float %val2) #0 { ; CHECK-LABEL: f13: From cde94f89607609a6e14d2c312a94e93486fced44 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Sat, 8 Mar 2025 10:27:13 -0600 Subject: [PATCH 03/12] Updated per review + fix. --- compiler-rt/lib/builtins/extendhfdf2.c | 12 +----------- compiler-rt/lib/builtins/extendhftf2.c | 2 +- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/compiler-rt/lib/builtins/extendhfdf2.c b/compiler-rt/lib/builtins/extendhfdf2.c index 33fa92d2cd341..5055b5adad4bf 100644 --- a/compiler-rt/lib/builtins/extendhfdf2.c +++ b/compiler-rt/lib/builtins/extendhfdf2.c @@ -12,16 +12,6 @@ // Use a forwarding definition and noinline to implement a poor man's alias, // as there isn't a good cross-platform way of defining one. -COMPILER_RT_ABI NOINLINE float __extendhfdf2(src_t a) { +COMPILER_RT_ABI NOINLINE dst_t __extendhfdf2(src_t a) { return __extendXfYf2__(a); } - -COMPILER_RT_ABI float __gnu_h2d_ieee(src_t a) { return __extendhfdf2(a); } - -#if defined(__ARM_EABI__) -#if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI float __aeabi_h2d(src_t a) { return __extendhfdf2(a); } -#else -COMPILER_RT_ALIAS(__extendhfdf2, __aeabi_h2d) -#endif -#endif diff --git a/compiler-rt/lib/builtins/extendhftf2.c b/compiler-rt/lib/builtins/extendhftf2.c index 67eddc6b34761..7609db6f06e4a 100644 --- a/compiler-rt/lib/builtins/extendhftf2.c +++ b/compiler-rt/lib/builtins/extendhftf2.c @@ -10,7 +10,7 @@ #define QUAD_PRECISION #include "fp_lib.h" -#if defined(CRT_HAS_TF_MODE) +#if defined(CRT_HAS_TF_MODE) && defined(COMPILER_RT_HAS_FLOAT16) #define SRC_HALF #define DST_QUAD #include "fp_extend_impl.inc" From 25576d771ed7a539f0588fdba063bfd8e43327f4 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Thu, 13 Mar 2025 09:09:09 -0600 Subject: [PATCH 04/12] Updates --- compiler-rt/lib/builtins/trunctfhf2.c | 2 +- .../test/CodeGen/SystemZ/atomicrmw-fadd-04.ll | 76 +++++++++++++++++++ llvm/test/CodeGen/SystemZ/fp-half-cmp.ll | 2 +- llvm/test/CodeGen/SystemZ/fp-half-mem.ll | 65 ++++++++++++++++ 4 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fadd-04.ll create mode 100644 llvm/test/CodeGen/SystemZ/fp-half-mem.ll diff --git a/compiler-rt/lib/builtins/trunctfhf2.c b/compiler-rt/lib/builtins/trunctfhf2.c index 0db4c4d0d8b31..3f031e0f84451 100644 --- a/compiler-rt/lib/builtins/trunctfhf2.c +++ b/compiler-rt/lib/builtins/trunctfhf2.c @@ -10,7 +10,7 @@ #define QUAD_PRECISION #include "fp_lib.h" -#if defined(CRT_HAS_TF_MODE) +#if defined(CRT_HAS_TF_MODE) && defined(COMPILER_RT_HAS_FLOAT16) #define SRC_QUAD #define DST_HALF #include "fp_trunc_impl.inc" diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-04.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-04.ll new file mode 100644 index 0000000000000..918790aa404d6 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-04.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Test atomic half addition. Expect a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define half @f1(ptr %src, half %b) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r12, -64 +; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -184 +; CHECK-NEXT: .cfi_def_cfa_offset 344 +; CHECK-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -168 +; CHECK-NEXT: .cfi_offset %f9, -176 +; CHECK-NEXT: .cfi_offset %f10, -184 +; CHECK-NEXT: lgr %r13, %r2 +; CHECK-NEXT: lgh %r0, 0(%r2) +; CHECK-NEXT: ler %f8, %f0 +; CHECK-NEXT: sllg %r0, %r0, 48 +; CHECK-NEXT: ldgr %f9, %r0 +; CHECK-NEXT: risbg %r12, %r2, 0, 189, 0 +; CHECK-NEXT: sll %r13, 3 +; CHECK-NEXT: lcr %r11, %r13 +; CHECK-NEXT: j .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: sllg %r0, %r3, 48 +; CHECK-NEXT: ldgr %f9, %r0 +; CHECK-NEXT: je .LBB0_5 +; CHECK-NEXT: .LBB0_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_3 Depth 2 +; CHECK-NEXT: ler %f0, %f8 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ler %f10, %f0 +; CHECK-NEXT: ler %f0, %f9 +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: aebr %f0, %f10 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT +; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d +; CHECK-NEXT: lgdr %r1, %f0 +; CHECK-NEXT: l %r0, 0(%r12) +; CHECK-NEXT: srlg %r1, %r1, 48 +; CHECK-NEXT: lgdr %r2, %f9 +; CHECK-NEXT: srlg %r2, %r2, 48 +; CHECK-NEXT: .LBB0_3: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: rll %r3, %r0, 16(%r13) +; CHECK-NEXT: risbg %r1, %r3, 32, 47, 0 +; CHECK-NEXT: llhr %r3, %r3 +; CHECK-NEXT: cr %r3, %r2 +; CHECK-NEXT: jlh .LBB0_1 +; CHECK-NEXT: # %bb.4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=2 +; CHECK-NEXT: rll %r4, %r1, -16(%r11) +; CHECK-NEXT: cs %r0, %r4, 0(%r12) +; CHECK-NEXT: jl .LBB0_3 +; CHECK-NEXT: j .LBB0_1 +; CHECK-NEXT: .LBB0_5: # %atomicrmw.end +; CHECK-NEXT: ler %f0, %f9 +; CHECK-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r11, %r15, 272(%r15) +; CHECK-NEXT: br %r14 + %res = atomicrmw fadd ptr %src, half %b seq_cst + ret half %res +} diff --git a/llvm/test/CodeGen/SystemZ/fp-half-cmp.ll b/llvm/test/CodeGen/SystemZ/fp-half-cmp.ll index 3d9ec6a43e374..a6d454b6dbc29 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-cmp.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-cmp.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ ; RUN: | FileCheck %s ; -; Various tests comparaisons and uses involving 16-bit floating point (half). +; Some tests with comparisons and their uses involving 16-bit floating point. ; fcmp half; select half define half @fun0(half %Arg0, half %Arg1) { diff --git a/llvm/test/CodeGen/SystemZ/fp-half-mem.ll b/llvm/test/CodeGen/SystemZ/fp-half-mem.ll new file mode 100644 index 0000000000000..a3dd646d3b51f --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-half-mem.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=NOVEC +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=VECTOR + +declare void @foo(ptr) + +; Test an alloca. +define half @f1() { +; NOVEC-LABEL: f1: +; NOVEC: # %bb.0: +; NOVEC-NEXT: stmg %r14, %r15, 112(%r15) +; NOVEC-NEXT: .cfi_offset %r14, -48 +; NOVEC-NEXT: .cfi_offset %r15, -40 +; NOVEC-NEXT: aghi %r15, -168 +; NOVEC-NEXT: .cfi_def_cfa_offset 328 +; NOVEC-NEXT: la %r2, 166(%r15) +; NOVEC-NEXT: brasl %r14, foo@PLT +; NOVEC-NEXT: lh %r0, 166(%r15) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: lmg %r14, %r15, 280(%r15) +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: f1: +; VECTOR: # %bb.0: +; VECTOR-NEXT: stmg %r14, %r15, 112(%r15) +; VECTOR-NEXT: .cfi_offset %r14, -48 +; VECTOR-NEXT: .cfi_offset %r15, -40 +; VECTOR-NEXT: aghi %r15, -168 +; VECTOR-NEXT: .cfi_def_cfa_offset 328 +; VECTOR-NEXT: la %r2, 166(%r15) +; VECTOR-NEXT: brasl %r14, foo@PLT +; VECTOR-NEXT: vlreph %v0, 166(%r15) +; VECTOR-NEXT: lmg %r14, %r15, 280(%r15) +; VECTOR-NEXT: br %r14 + %ptr = alloca half + call void @foo(ptr %ptr) + %orig = load half, ptr %ptr + ret half %orig +} + +; Test accessing a half element of an aggregate type. +%s.half = type { half, half, half, half, half } +define half @f2(ptr %P) { +; NOVEC-LABEL: f2: +; NOVEC: # %bb.0: +; NOVEC-NEXT: lh %r0, 6(%r2) +; NOVEC-NEXT: sll %r0, 16 +; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: ldgr %f0, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d +; NOVEC-NEXT: br %r14 +; +; VECTOR-LABEL: f2: +; VECTOR: # %bb.0: +; VECTOR-NEXT: vlreph %v0, 6(%r2) +; VECTOR-NEXT: br %r14 + %gep = getelementptr inbounds %s.half, ptr %P, i64 0, i32 3 + %res = load half, ptr %gep + ret half %res +} From 1aff9687ba2790f7532c38961d753d9571d0f911 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 19 Mar 2025 14:58:23 -0600 Subject: [PATCH 05/12] Add extendhfdf2_test.c --- .../test/builtins/Unit/extendhfdf2_test.c | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 compiler-rt/test/builtins/Unit/extendhfdf2_test.c diff --git a/compiler-rt/test/builtins/Unit/extendhfdf2_test.c b/compiler-rt/test/builtins/Unit/extendhfdf2_test.c new file mode 100644 index 0000000000000..422e272c11f77 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/extendhfdf2_test.c @@ -0,0 +1,87 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_extendhfdf2 + +#include + +#include "fp_test.h" + +double __extendhfdf2(TYPE_FP16 a); + +int test__extendhfdf2(TYPE_FP16 a, uint64_t expected) +{ + double x = __extendhfdf2(a); + int ret = compareResultD(x, expected); + + if (ret){ + printf("error in test__extendhfdf2(%#.4x) = %f, " + "expected %f\n", toRep16(a), x, fromRep64(expected)); + } + return ret; +} + +char assumption_1[sizeof(TYPE_FP16) * CHAR_BIT == 16] = {0}; + +int main() +{ + // qNaN + if (test__extendhfdf2(makeQNaN16(), + UINT64_C(0x7ff8000000000000))) + return 1; + // NaN + if (test__extendhfdf2(fromRep16(0x7f80), + UINT64_C(0x7ffe000000000000))) + return 1; + // inf + if (test__extendhfdf2(makeInf16(), + UINT64_C(0x7ff0000000000000))) + return 1; + // -inf + if (test__extendhfdf2(makeNegativeInf16(), + UINT64_C(0xfff0000000000000))) + return 1; + // zero + if (test__extendhfdf2(fromRep16(0x0), + UINT64_C(0x0))) + return 1; + // -zero + if (test__extendhfdf2(fromRep16(0x8000), + UINT64_C(0x8000000000000000))) + return 1; + if (test__extendhfdf2(fromRep16(0x4248), + UINT64_C(0x4009200000000000))) + return 1; + if (test__extendhfdf2(fromRep16(0xc248), + UINT64_C(0xc009200000000000))) + return 1; + if (test__extendhfdf2(fromRep16(0x6e62), + UINT64_C(0x40b9880000000000))) + return 1; + if (test__extendhfdf2(fromRep16(0x3c00), + UINT64_C(0x3ff0000000000000))) + return 1; + if (test__extendhfdf2(fromRep16(0x0400), + UINT64_C(0x3f10000000000000))) + return 1; + // denormal + if (test__extendhfdf2(fromRep16(0x0010), + UINT64_C(0x3eb0000000000000))) + return 1; + if (test__extendhfdf2(fromRep16(0x0001), + UINT64_C(0x3e70000000000000))) + return 1; + if (test__extendhfdf2(fromRep16(0x8001), + UINT64_C(0xbe70000000000000))) + return 1; + if (test__extendhfdf2(fromRep16(0x0001), + UINT64_C(0x3e70000000000000))) + return 1; + // max (precise) + if (test__extendhfdf2(fromRep16(0x7bff), + UINT64_C(0x40effc0000000000))) + return 1; + // max (rounded) + if (test__extendhfdf2(fromRep16(0x7bff), + UINT64_C(0x40effc0000000000))) + return 1; + return 0; +} From 9f97d498f1fa720468324e3e6d9c1fd21fd10a11 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Mon, 31 Mar 2025 11:14:29 -0600 Subject: [PATCH 06/12] Rebase --- .../test/CodeGen/SystemZ/atomicrmw-fadd-04.ll | 12 +- llvm/test/CodeGen/SystemZ/fp-half-cmp.ll | 16 +-- llvm/test/CodeGen/SystemZ/fp-half-libcall.ll | 40 +++--- llvm/test/CodeGen/SystemZ/fp-half-strict.ll | 40 +++--- llvm/test/CodeGen/SystemZ/fp-half-vector.ll | 124 +++++++++--------- llvm/test/CodeGen/SystemZ/fp-half.ll | 88 ++++++------- llvm/test/CodeGen/SystemZ/spill-half-01.mir | 18 +-- 7 files changed, 169 insertions(+), 169 deletions(-) diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-04.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-04.ll index 918790aa404d6..a0869e13a013d 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-04.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-04.ll @@ -14,9 +14,9 @@ define half @f1(ptr %src, half %b) { ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -184 ; CHECK-NEXT: .cfi_def_cfa_offset 344 -; CHECK-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill -; CHECK-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill -; CHECK-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f8, 176(%r15) # 8-byte Spill +; CHECK-NEXT: std %f9, 168(%r15) # 8-byte Spill +; CHECK-NEXT: std %f10, 160(%r15) # 8-byte Spill ; CHECK-NEXT: .cfi_offset %f8, -168 ; CHECK-NEXT: .cfi_offset %f9, -176 ; CHECK-NEXT: .cfi_offset %f10, -184 @@ -66,9 +66,9 @@ define half @f1(ptr %src, half %b) { ; CHECK-NEXT: j .LBB0_1 ; CHECK-NEXT: .LBB0_5: # %atomicrmw.end ; CHECK-NEXT: ler %f0, %f9 -; CHECK-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload -; CHECK-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload -; CHECK-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f8, 176(%r15) # 8-byte Reload +; CHECK-NEXT: ld %f9, 168(%r15) # 8-byte Reload +; CHECK-NEXT: ld %f10, 160(%r15) # 8-byte Reload ; CHECK-NEXT: lmg %r11, %r15, 272(%r15) ; CHECK-NEXT: br %r14 %res = atomicrmw fadd ptr %src, half %b seq_cst diff --git a/llvm/test/CodeGen/SystemZ/fp-half-cmp.ll b/llvm/test/CodeGen/SystemZ/fp-half-cmp.ll index a6d454b6dbc29..2714d6ad9a92c 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-cmp.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-cmp.ll @@ -13,8 +13,8 @@ define half @fun0(half %Arg0, half %Arg1) { ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -176 ; CHECK-NEXT: .cfi_def_cfa_offset 336 -; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill ; CHECK-NEXT: .cfi_offset %f8, -168 ; CHECK-NEXT: .cfi_offset %f9, -176 ; CHECK-NEXT: ldr %f8, %f2 @@ -30,8 +30,8 @@ define half @fun0(half %Arg0, half %Arg1) { ; CHECK-NEXT: .LBB0_2: # %entry ; CHECK-NEXT: # kill: def $f0s killed $f0s killed $v0 ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT -; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; CHECK-NEXT: lmg %r14, %r15, 288(%r15) ; CHECK-NEXT: br %r14 entry: @@ -100,8 +100,8 @@ define i64 @fun3(i64 %a, i64 %b, half %f1, half %f2) #0 { ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -176 ; CHECK-NEXT: .cfi_def_cfa_offset 336 -; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill ; CHECK-NEXT: .cfi_offset %f8, -168 ; CHECK-NEXT: .cfi_offset %f9, -176 ; CHECK-NEXT: ldr %f8, %f0 @@ -113,8 +113,8 @@ define i64 @fun3(i64 %a, i64 %b, half %f1, half %f2) #0 { ; CHECK-NEXT: ldr %f0, %f8 ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT ; CHECK-NEXT: cebr %f0, %f9 -; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; CHECK-NEXT: selgre %r2, %r12, %r13 ; CHECK-NEXT: lmg %r12, %r15, 272(%r15) ; CHECK-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/fp-half-libcall.ll b/llvm/test/CodeGen/SystemZ/fp-half-libcall.ll index 6e813a4a5094d..d8db549388c46 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-libcall.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-libcall.ll @@ -31,8 +31,8 @@ define half @f2(half %x, half %y) { ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -176 ; CHECK-NEXT: .cfi_def_cfa_offset 336 -; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill ; CHECK-NEXT: .cfi_offset %f8, -168 ; CHECK-NEXT: .cfi_offset %f9, -176 ; CHECK-NEXT: ler %f8, %f2 @@ -44,8 +44,8 @@ define half @f2(half %x, half %y) { ; CHECK-NEXT: ler %f0, %f9 ; CHECK-NEXT: brasl %r14, powf@PLT ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT -; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; CHECK-NEXT: lmg %r14, %r15, 288(%r15) ; CHECK-NEXT: br %r14 %tmp = call half @llvm.pow.f16(half %x, half %y) @@ -179,8 +179,8 @@ define half @f10(half %x, half %y) { ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -176 ; CHECK-NEXT: .cfi_def_cfa_offset 336 -; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill ; CHECK-NEXT: .cfi_offset %f8, -168 ; CHECK-NEXT: .cfi_offset %f9, -176 ; CHECK-NEXT: ler %f8, %f2 @@ -192,8 +192,8 @@ define half @f10(half %x, half %y) { ; CHECK-NEXT: ler %f0, %f9 ; CHECK-NEXT: brasl %r14, fminf@PLT ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT -; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; CHECK-NEXT: lmg %r14, %r15, 288(%r15) ; CHECK-NEXT: br %r14 %tmp = call half @llvm.minnum.f16(half %x, half %y) @@ -208,8 +208,8 @@ define half @f11(half %x, half %y) { ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -176 ; CHECK-NEXT: .cfi_def_cfa_offset 336 -; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill ; CHECK-NEXT: .cfi_offset %f8, -168 ; CHECK-NEXT: .cfi_offset %f9, -176 ; CHECK-NEXT: ler %f8, %f2 @@ -221,8 +221,8 @@ define half @f11(half %x, half %y) { ; CHECK-NEXT: ler %f0, %f9 ; CHECK-NEXT: brasl %r14, fmaxf@PLT ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT -; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; CHECK-NEXT: lmg %r14, %r15, 288(%r15) ; CHECK-NEXT: br %r14 %tmp = call half @llvm.maxnum.f16(half %x, half %y) @@ -239,8 +239,8 @@ define half @f12(half %x, half %y) { ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -176 ; CHECK-NEXT: .cfi_def_cfa_offset 336 -; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill ; CHECK-NEXT: .cfi_offset %f8, -168 ; CHECK-NEXT: .cfi_offset %f9, -176 ; CHECK-NEXT: ler %f9, %f0 @@ -255,8 +255,8 @@ define half @f12(half %x, half %y) { ; CHECK-NEXT: ler %f0, %f8 ; CHECK-NEXT: .LBB11_2: ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT -; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; CHECK-NEXT: lmg %r14, %r15, 288(%r15) ; CHECK-NEXT: br %r14 %tmp = call nnan half @llvm.minnum.f16(half %x, half %y) @@ -271,8 +271,8 @@ define half @f13(half %x, half %y) { ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -176 ; CHECK-NEXT: .cfi_def_cfa_offset 336 -; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill +; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill ; CHECK-NEXT: .cfi_offset %f8, -168 ; CHECK-NEXT: .cfi_offset %f9, -176 ; CHECK-NEXT: ler %f9, %f0 @@ -287,8 +287,8 @@ define half @f13(half %x, half %y) { ; CHECK-NEXT: ler %f0, %f8 ; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT -; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; CHECK-NEXT: lmg %r14, %r15, 288(%r15) ; CHECK-NEXT: br %r14 %tmp = call nnan half @llvm.maxnum.f16(half %x, half %y) diff --git a/llvm/test/CodeGen/SystemZ/fp-half-strict.ll b/llvm/test/CodeGen/SystemZ/fp-half-strict.ll index 42663b109d7a9..43c2d7b2ab8c8 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-strict.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-strict.ll @@ -19,8 +19,8 @@ define half @fun0(half %f1, half %f2) #0 { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -176 ; NOVEC-NEXT: .cfi_def_cfa_offset 336 -; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: ler %f8, %f0 @@ -31,8 +31,8 @@ define half @fun0(half %f1, half %f2) #0 { ; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT ; NOVEC-NEXT: aebr %f0, %f9 ; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT -; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; NOVEC-NEXT: lmg %r14, %r15, 288(%r15) ; NOVEC-NEXT: br %r14 ; @@ -43,8 +43,8 @@ define half @fun0(half %f1, half %f2) #0 { ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -176 ; VECTOR-NEXT: .cfi_def_cfa_offset 336 -; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: .cfi_offset %f9, -176 ; VECTOR-NEXT: ldr %f8, %f0 @@ -55,8 +55,8 @@ define half @fun0(half %f1, half %f2) #0 { ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: aebr %f0, %f9 ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT -; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; VECTOR-NEXT: lmg %r14, %r15, 288(%r15) ; VECTOR-NEXT: br %r14 %res = call half @llvm.experimental.constrained.fadd.f16( @@ -134,9 +134,9 @@ define half @fun2(half %Op0, half %Op1, half %Op2) #0 { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -184 ; NOVEC-NEXT: .cfi_def_cfa_offset 344 -; NOVEC-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f8, 176(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f9, 168(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f10, 160(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: .cfi_offset %f10, -184 @@ -155,9 +155,9 @@ define half @fun2(half %Op0, half %Op1, half %Op2) #0 { ; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT ; NOVEC-NEXT: meebr %f0, %f9 ; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT -; NOVEC-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f8, 176(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f9, 168(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f10, 160(%r15) # 8-byte Reload ; NOVEC-NEXT: lmg %r14, %r15, 296(%r15) ; NOVEC-NEXT: br %r14 ; @@ -168,9 +168,9 @@ define half @fun2(half %Op0, half %Op1, half %Op2) #0 { ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -184 ; VECTOR-NEXT: .cfi_def_cfa_offset 344 -; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f9, 168(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f10, 160(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: .cfi_offset %f9, -176 ; VECTOR-NEXT: .cfi_offset %f10, -184 @@ -189,9 +189,9 @@ define half @fun2(half %Op0, half %Op1, half %Op2) #0 { ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: wfmsb %f0, %f9, %f0 ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT -; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f9, 168(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f10, 160(%r15) # 8-byte Reload ; VECTOR-NEXT: lmg %r14, %r15, 296(%r15) ; VECTOR-NEXT: br %r14 entry: diff --git a/llvm/test/CodeGen/SystemZ/fp-half-vector.ll b/llvm/test/CodeGen/SystemZ/fp-half-vector.ll index cc3f61f998649..71906bb8a66d7 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-vector.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-vector.ll @@ -14,14 +14,14 @@ define <8 x half> @fun0(<8 x half> %Op) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -224 ; NOVEC-NEXT: .cfi_def_cfa_offset 384 -; NOVEC-NEXT: std %f8, 216(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f9, 208(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f10, 200(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f11, 192(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f12, 184(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f13, 176(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f14, 168(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f15, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f8, 216(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f9, 208(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f10, 200(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f11, 192(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f12, 184(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f13, 176(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f14, 168(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f15, 160(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: .cfi_offset %f10, -184 @@ -123,14 +123,14 @@ define <8 x half> @fun0(<8 x half> %Op) { ; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 ; NOVEC-NEXT: srl %r0, 16 ; NOVEC-NEXT: sth %r0, 8(%r13) -; NOVEC-NEXT: ld %f8, 216(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f9, 208(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f10, 200(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f11, 192(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f12, 184(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f13, 176(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f14, 168(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f15, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f8, 216(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f9, 208(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f10, 200(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f11, 192(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f12, 184(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f13, 176(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f14, 168(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f15, 160(%r15) # 8-byte Reload ; NOVEC-NEXT: lmg %r13, %r15, 328(%r15) ; NOVEC-NEXT: br %r14 ; @@ -142,14 +142,14 @@ define <8 x half> @fun0(<8 x half> %Op) { ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -224 ; VECTOR-NEXT: .cfi_def_cfa_offset 384 -; VECTOR-NEXT: std %f8, 216(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f9, 208(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f10, 200(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f11, 192(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f12, 184(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f13, 176(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f14, 168(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f15, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f8, 216(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f9, 208(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f10, 200(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f11, 192(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f12, 184(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f13, 176(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f14, 168(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f15, 160(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: .cfi_offset %f9, -176 ; VECTOR-NEXT: .cfi_offset %f10, -184 @@ -212,14 +212,14 @@ define <8 x half> @fun0(<8 x half> %Op) { ; VECTOR-NEXT: vsteh %v9, 4(%r13), 0 ; VECTOR-NEXT: vsteh %v10, 2(%r13), 0 ; VECTOR-NEXT: vsteh %v15, 0(%r13), 0 -; VECTOR-NEXT: ld %f8, 216(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f9, 208(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f10, 200(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f11, 192(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f12, 184(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f13, 176(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f14, 168(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f15, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f8, 216(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f9, 208(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f10, 200(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f11, 192(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f12, 184(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f13, 176(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f14, 168(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f15, 160(%r15) # 8-byte Reload ; VECTOR-NEXT: lmg %r13, %r15, 328(%r15) ; VECTOR-NEXT: br %r14 entry: @@ -236,10 +236,10 @@ define <4 x half> @fun1(<4 x half> %Op) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -192 ; NOVEC-NEXT: .cfi_def_cfa_offset 352 -; NOVEC-NEXT: std %f8, 184(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f9, 176(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f10, 168(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f11, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f8, 184(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f9, 176(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f10, 168(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f11, 160(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: .cfi_offset %f10, -184 @@ -269,10 +269,10 @@ define <4 x half> @fun1(<4 x half> %Op) { ; NOVEC-NEXT: ler %f0, %f11 ; NOVEC-NEXT: ler %f2, %f10 ; NOVEC-NEXT: ler %f4, %f9 -; NOVEC-NEXT: ld %f8, 184(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f9, 176(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f10, 168(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f11, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f8, 184(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f9, 176(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f10, 168(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f11, 160(%r15) # 8-byte Reload ; NOVEC-NEXT: lmg %r14, %r15, 304(%r15) ; NOVEC-NEXT: br %r14 ; @@ -283,10 +283,10 @@ define <4 x half> @fun1(<4 x half> %Op) { ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -192 ; VECTOR-NEXT: .cfi_def_cfa_offset 352 -; VECTOR-NEXT: std %f8, 184(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f9, 176(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f10, 168(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f11, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f8, 184(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f9, 176(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f10, 168(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f11, 160(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: .cfi_offset %f9, -176 ; VECTOR-NEXT: .cfi_offset %f10, -184 @@ -316,10 +316,10 @@ define <4 x half> @fun1(<4 x half> %Op) { ; VECTOR-NEXT: ldr %f0, %f11 ; VECTOR-NEXT: ldr %f2, %f10 ; VECTOR-NEXT: ldr %f4, %f9 -; VECTOR-NEXT: ld %f8, 184(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f9, 176(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f10, 168(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f11, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f8, 184(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f9, 176(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f10, 168(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f11, 160(%r15) # 8-byte Reload ; VECTOR-NEXT: lmg %r14, %r15, 304(%r15) ; VECTOR-NEXT: br %r14 entry: @@ -336,8 +336,8 @@ define <2 x half> @fun2(<2 x half> %Op) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -176 ; NOVEC-NEXT: .cfi_def_cfa_offset 336 -; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: ler %f8, %f2 @@ -355,8 +355,8 @@ define <2 x half> @fun2(<2 x half> %Op) { ; NOVEC-NEXT: brasl %r14, __truncdfhf2@PLT ; NOVEC-NEXT: ler %f2, %f0 ; NOVEC-NEXT: ler %f0, %f9 -; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; NOVEC-NEXT: lmg %r14, %r15, 288(%r15) ; NOVEC-NEXT: br %r14 ; @@ -367,7 +367,7 @@ define <2 x half> @fun2(<2 x half> %Op) { ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -184 ; VECTOR-NEXT: .cfi_def_cfa_offset 344 -; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: ldr %f8, %f0 ; VECTOR-NEXT: ldr %f0, %f2 @@ -390,7 +390,7 @@ define <2 x half> @fun2(<2 x half> %Op) { ; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT ; VECTOR-NEXT: ldr %f2, %f0 ; VECTOR-NEXT: ldr %f0, %f8 -; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Reload ; VECTOR-NEXT: lmg %r14, %r15, 296(%r15) ; VECTOR-NEXT: br %r14 entry: @@ -686,10 +686,10 @@ define void @fun5(<4 x half> %dummy, <8 x half> %Arg5) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -256 ; NOVEC-NEXT: .cfi_def_cfa_offset 416 -; NOVEC-NEXT: std %f8, 248(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f9, 240(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f10, 232(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f11, 224(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f8, 248(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f9, 240(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f10, 232(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f11, 224(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: .cfi_offset %f10, -184 @@ -759,10 +759,10 @@ define void @fun5(<4 x half> %dummy, <8 x half> %Arg5) { ; NOVEC-NEXT: srl %r0, 16 ; NOVEC-NEXT: sth %r0, 166(%r15) ; NOVEC-NEXT: brasl %r14, foo2@PLT -; NOVEC-NEXT: ld %f8, 248(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f9, 240(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f10, 232(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f11, 224(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f8, 248(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f9, 240(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f10, 232(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f11, 224(%r15) # 8-byte Reload ; NOVEC-NEXT: lmg %r14, %r15, 368(%r15) ; NOVEC-NEXT: br %r14 ; diff --git a/llvm/test/CodeGen/SystemZ/fp-half.ll b/llvm/test/CodeGen/SystemZ/fp-half.ll index cd4aa12c2b4ef..3585772cacf29 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half.ll @@ -15,8 +15,8 @@ define half @fun0(half %Op0, half %Op1) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -176 ; NOVEC-NEXT: .cfi_def_cfa_offset 336 -; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: ler %f8, %f0 @@ -27,8 +27,8 @@ define half @fun0(half %Op0, half %Op1) { ; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT ; NOVEC-NEXT: aebr %f0, %f9 ; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT -; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; NOVEC-NEXT: lmg %r14, %r15, 288(%r15) ; NOVEC-NEXT: br %r14 ; @@ -39,8 +39,8 @@ define half @fun0(half %Op0, half %Op1) { ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -176 ; VECTOR-NEXT: .cfi_def_cfa_offset 336 -; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: .cfi_offset %f9, -176 ; VECTOR-NEXT: ldr %f8, %f0 @@ -51,8 +51,8 @@ define half @fun0(half %Op0, half %Op1) { ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: aebr %f0, %f9 ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT -; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; VECTOR-NEXT: lmg %r14, %r15, 288(%r15) ; VECTOR-NEXT: br %r14 entry: @@ -68,8 +68,8 @@ define half @fun1(half %Op0, half %Op1) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -176 ; NOVEC-NEXT: .cfi_def_cfa_offset 336 -; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: ler %f8, %f2 @@ -79,8 +79,8 @@ define half @fun1(half %Op0, half %Op1) { ; NOVEC-NEXT: brasl %r14, __extendhfdf2@PLT ; NOVEC-NEXT: adbr %f0, %f9 ; NOVEC-NEXT: brasl %r14, __truncdfhf2@PLT -; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; NOVEC-NEXT: lmg %r14, %r15, 288(%r15) ; NOVEC-NEXT: br %r14 ; @@ -91,8 +91,8 @@ define half @fun1(half %Op0, half %Op1) { ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -176 ; VECTOR-NEXT: .cfi_def_cfa_offset 336 -; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: .cfi_offset %f9, -176 ; VECTOR-NEXT: ldr %f8, %f2 @@ -102,8 +102,8 @@ define half @fun1(half %Op0, half %Op1) { ; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT ; VECTOR-NEXT: wfadb %f0, %f9, %f0 ; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT -; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; VECTOR-NEXT: lmg %r14, %r15, 288(%r15) ; VECTOR-NEXT: br %r14 entry: @@ -122,9 +122,9 @@ define half @fun2(half %Op0, half %Op1) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -232 ; NOVEC-NEXT: .cfi_def_cfa_offset 392 -; NOVEC-NEXT: std %f8, 224(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f9, 216(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f11, 208(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f8, 224(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f9, 216(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f11, 208(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: .cfi_offset %f11, -184 @@ -143,9 +143,9 @@ define half @fun2(half %Op0, half %Op1) { ; NOVEC-NEXT: std %f0, 192(%r15) ; NOVEC-NEXT: std %f2, 200(%r15) ; NOVEC-NEXT: brasl %r14, __trunctfhf2@PLT -; NOVEC-NEXT: ld %f8, 224(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f9, 216(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f11, 208(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f8, 224(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f9, 216(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f11, 208(%r15) # 8-byte Reload ; NOVEC-NEXT: lmg %r14, %r15, 344(%r15) ; NOVEC-NEXT: br %r14 ; @@ -156,7 +156,7 @@ define half @fun2(half %Op0, half %Op1) { ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -232 ; VECTOR-NEXT: .cfi_def_cfa_offset 392 -; VECTOR-NEXT: std %f8, 224(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f8, 224(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: la %r2, 176(%r15) ; VECTOR-NEXT: ldr %f8, %f2 @@ -172,7 +172,7 @@ define half @fun2(half %Op0, half %Op1) { ; VECTOR-NEXT: la %r2, 208(%r15) ; VECTOR-NEXT: vst %v0, 208(%r15), 3 ; VECTOR-NEXT: brasl %r14, __trunctfhf2@PLT -; VECTOR-NEXT: ld %f8, 224(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f8, 224(%r15) # 8-byte Reload ; VECTOR-NEXT: lmg %r14, %r15, 344(%r15) ; VECTOR-NEXT: br %r14 entry: @@ -330,9 +330,9 @@ define half @fun6(half %Op0, half %Op1, half %Op2) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -184 ; NOVEC-NEXT: .cfi_def_cfa_offset 344 -; NOVEC-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f8, 176(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f9, 168(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f10, 160(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: .cfi_offset %f10, -184 @@ -351,9 +351,9 @@ define half @fun6(half %Op0, half %Op1, half %Op2) { ; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT ; NOVEC-NEXT: aebr %f0, %f9 ; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT -; NOVEC-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f8, 176(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f9, 168(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f10, 160(%r15) # 8-byte Reload ; NOVEC-NEXT: lmg %r14, %r15, 296(%r15) ; NOVEC-NEXT: br %r14 ; @@ -364,9 +364,9 @@ define half @fun6(half %Op0, half %Op1, half %Op2) { ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -184 ; VECTOR-NEXT: .cfi_def_cfa_offset 344 -; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f9, 168(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f10, 160(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: .cfi_offset %f9, -176 ; VECTOR-NEXT: .cfi_offset %f10, -184 @@ -385,9 +385,9 @@ define half @fun6(half %Op0, half %Op1, half %Op2) { ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: wfasb %f0, %f9, %f0 ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT -; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f9, 168(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f10, 160(%r15) # 8-byte Reload ; VECTOR-NEXT: lmg %r14, %r15, 296(%r15) ; VECTOR-NEXT: br %r14 entry: @@ -479,8 +479,8 @@ define half @fun9(half %Arg0, half %Arg1, half %Arg2, half %Arg3, half %Arg4) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -176 ; NOVEC-NEXT: .cfi_def_cfa_offset 336 -; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Spill +; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: lh %r0, 342(%r15) @@ -495,8 +495,8 @@ define half @fun9(half %Arg0, half %Arg1, half %Arg2, half %Arg3, half %Arg4) { ; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT ; NOVEC-NEXT: aebr %f0, %f9 ; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT -; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; NOVEC-NEXT: lmg %r14, %r15, 288(%r15) ; NOVEC-NEXT: br %r14 ; @@ -507,8 +507,8 @@ define half @fun9(half %Arg0, half %Arg1, half %Arg2, half %Arg3, half %Arg4) { ; VECTOR-NEXT: .cfi_offset %r15, -40 ; VECTOR-NEXT: aghi %r15, -176 ; VECTOR-NEXT: .cfi_def_cfa_offset 336 -; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill -; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Spill +; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: .cfi_offset %f9, -176 ; VECTOR-NEXT: vlreph %v0, 342(%r15) @@ -519,8 +519,8 @@ define half @fun9(half %Arg0, half %Arg1, half %Arg2, half %Arg3, half %Arg4) { ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: aebr %f0, %f9 ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT -; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload -; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Reload +; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Reload ; VECTOR-NEXT: lmg %r14, %r15, 288(%r15) ; VECTOR-NEXT: br %r14 %A0 = fadd half %Arg3, %Arg4 diff --git a/llvm/test/CodeGen/SystemZ/spill-half-01.mir b/llvm/test/CodeGen/SystemZ/spill-half-01.mir index bc6947fdec8a6..9731f11445e50 100644 --- a/llvm/test/CodeGen/SystemZ/spill-half-01.mir +++ b/llvm/test/CodeGen/SystemZ/spill-half-01.mir @@ -14,20 +14,20 @@ body: | ; CHECK-LABEL: fun0: ; CHECK: aghi %r15, -240 - ; CHECK: ste %f4, 172(%r15) # 4-byte Folded Spill - ; CHECK-NEXT: ste %f2, 164(%r15) # 4-byte Folded Spill - ; CHECK-NEXT: ste %f0, 168(%r15) # 4-byte Folded Spill + ; CHECK: ste %f4, 172(%r15) # 4-byte Spill + ; CHECK-NEXT: ste %f2, 164(%r15) # 4-byte Spill + ; CHECK-NEXT: ste %f0, 168(%r15) # 4-byte Spill ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP - ; CHECK-NEXT: le %f0, 164(%r15) # 4-byte Folded Reload - ; CHECK: le %f0, 168(%r15) # 4-byte Folded Reload - ; CHECK: le %f0, 172(%r15) # 4-byte Folded Reload + ; CHECK-NEXT: le %f0, 164(%r15) # 4-byte Reload + ; CHECK: le %f0, 168(%r15) # 4-byte Reload + ; CHECK: le %f0, 172(%r15) # 4-byte Reload ; VECTOR-LABEL: fun0: ; VECTOR: aghi %r15, -232 - ; VECTOR: vsteh %v4, 166(%r15), 0 # 2-byte Folded Spil - ; VECTOR-NEXT: vsteh %v2, 162(%r15), 0 # 2-byte Folded Spil - ; VECTOR-NEXT: vsteh %v0, 164(%r15), 0 # 2-byte Folded Spil + ; VECTOR: vsteh %v4, 166(%r15), 0 # 2-byte Folded Spill + ; VECTOR-NEXT: vsteh %v2, 162(%r15), 0 # 2-byte Folded Spill + ; VECTOR-NEXT: vsteh %v0, 164(%r15), 0 # 2-byte Folded Spill ; VECTOR-NEXT: #APP ; VECTOR-NEXT: #NO_APP ; VECTOR-NEXT: vlreph %v0, 162(%r15) # 2-byte Folded Reload From 9ed85d3ca5f7552e52c76dd70df4c34c5af4506f Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 1 Apr 2025 09:54:06 -0600 Subject: [PATCH 07/12] Updates per review. --- compiler-rt/lib/builtins/extendhfdf2.c | 2 - .../Target/SystemZ/SystemZISelLowering.cpp | 336 ++++++++++++------ llvm/lib/Target/SystemZ/SystemZISelLowering.h | 5 +- llvm/lib/Target/SystemZ/SystemZInstrVector.td | 4 +- llvm/test/CodeGen/SystemZ/atomic-load-10.ll | 2 +- llvm/test/CodeGen/SystemZ/fp-abs-01.ll | 5 +- llvm/test/CodeGen/SystemZ/fp-abs-03.ll | 6 +- llvm/test/CodeGen/SystemZ/fp-conv-08.ll | 4 +- llvm/test/CodeGen/SystemZ/fp-conv-20.ll | 12 +- llvm/test/CodeGen/SystemZ/fp-copysign-01.ll | 32 -- llvm/test/CodeGen/SystemZ/fp-copysign-02.ll | 29 -- llvm/test/CodeGen/SystemZ/fp-copysign-03.ll | 65 ++++ llvm/test/CodeGen/SystemZ/fp-half-mem.ll | 10 +- llvm/test/CodeGen/SystemZ/fp-half-move.ll | 16 +- llvm/test/CodeGen/SystemZ/fp-half-strict.ll | 8 +- llvm/test/CodeGen/SystemZ/fp-half-vector.ll | 306 ++++++---------- llvm/test/CodeGen/SystemZ/fp-half.ll | 60 ++-- .../test/CodeGen/SystemZ/fp-strict-conv-08.ll | 4 +- .../test/CodeGen/SystemZ/fp-strict-conv-17.ll | 12 +- ...-asm-fp-int-casting-explicit-regs-zEC12.ll | 13 +- .../inline-asm-fp-int-casting-zEC12.ll | 13 +- llvm/test/CodeGen/SystemZ/spill-half-01.mir | 12 +- llvm/test/CodeGen/SystemZ/spill-half-02.mir | 4 +- llvm/test/CodeGen/SystemZ/tdc-05.ll | 3 +- llvm/test/CodeGen/SystemZ/vec-max-05.ll | 10 - llvm/test/CodeGen/SystemZ/vec-min-05.ll | 10 - 26 files changed, 499 insertions(+), 484 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/fp-copysign-03.ll diff --git a/compiler-rt/lib/builtins/extendhfdf2.c b/compiler-rt/lib/builtins/extendhfdf2.c index 5055b5adad4bf..1cfbdb82730ad 100644 --- a/compiler-rt/lib/builtins/extendhfdf2.c +++ b/compiler-rt/lib/builtins/extendhfdf2.c @@ -10,8 +10,6 @@ #define DST_DOUBLE #include "fp_extend_impl.inc" -// Use a forwarding definition and noinline to implement a poor man's alias, -// as there isn't a good cross-platform way of defining one. COMPILER_RT_ABI NOINLINE dst_t __extendhfdf2(src_t a) { return __extendXfYf2__(a); } diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 508ca594f78c8..a78427419f853 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -226,25 +226,15 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SMUL_LOHI, VT, Custom); setOperationAction(ISD::UMUL_LOHI, VT, Custom); - // The fp<=>int conversions are all Legal except for f16 and unsigned - // on z10 - only z196 and above have native support for conversions to - // unsigned. The Custom handlings for all these nodes only modify f16 - // cases. - for (auto Op : {ISD::FP_TO_SINT, ISD::SINT_TO_FP, ISD::STRICT_FP_TO_SINT, - ISD::STRICT_SINT_TO_FP}) + // The fp<=>i32/i64 conversions are all Legal except for f16 and for + // unsigned on z10 (only z196 and above have native support for + // unsigned conversions). + for (auto Op : {ISD::FP_TO_SINT, ISD::STRICT_FP_TO_SINT, + ISD::SINT_TO_FP, ISD::STRICT_SINT_TO_FP}) + setOperationAction(Op, VT, Custom); + for (auto Op : {ISD::FP_TO_UINT, ISD::STRICT_FP_TO_UINT, + ISD::UINT_TO_FP, ISD::STRICT_UINT_TO_FP}) setOperationAction(Op, VT, Custom); - // On z10, promoting the result to i64 doesn't generate an inexact - // condition for values that are outside the i32 range but in the i64 - // range, so use the default expansion. - for (auto Op : {ISD::FP_TO_UINT, ISD::STRICT_FP_TO_UINT}) - setOperationAction(Op, VT, - Subtarget.hasFPExtension() ? Custom : Expand); - for (auto Op : {ISD::UINT_TO_FP, ISD::STRICT_UINT_TO_FP}) { - // Handle unsigned 32-bit input types as signed 64-bit types on z10. - auto ActionZ10 = VT == MVT::i32 ? Promote : Expand; - setOperationAction(Op, VT, - Subtarget.hasFPExtension() ? Custom : ActionZ10); - } } } @@ -293,18 +283,18 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, if (Subtarget.hasVectorEnhancements3()) { setOperationAction(ISD::ABS, MVT::i128, Legal); } - - // We have to use libcalls for these. - setOperationAction(ISD::FP_TO_UINT, MVT::i128, LibCall); - setOperationAction(ISD::FP_TO_SINT, MVT::i128, LibCall); - setOperationAction(ISD::UINT_TO_FP, MVT::i128, LibCall); - setOperationAction(ISD::SINT_TO_FP, MVT::i128, LibCall); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, LibCall); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, LibCall); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, LibCall); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, LibCall); } + // These need custom handling in order to handle the f16 conversions. + setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom); + // Type legalization will convert 8- and 16-bit atomic operations into // forms that operate on i32s (but still keeping the original memory VT). // Lower them into full i32 operations. @@ -558,6 +548,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, LibCall); setOperationAction(ISD::BITCAST, MVT::i16, Custom); setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom); } for (unsigned I = MVT::FIRST_FP_VALUETYPE; @@ -5026,7 +5017,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { - MVT RegVT = Op.getSimpleValueType(); + EVT RegVT = Op.getValueType(); if (RegVT.getSizeInBits() == 128) return lowerATOMIC_LDST_I128(Op, DAG); return lowerLoadF16(Op, DAG); @@ -6811,68 +6802,124 @@ SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op, return SDValue(); // Let legalizer emit the libcall. } +SDValue SystemZTargetLowering::useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, + MVT VT, SDValue Arg, SDLoc DL, + SDValue Chain, bool IsStrict) const { + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!"); + MakeLibCallOptions CallOptions; + SDValue Result; + std::tie(Result, Chain) = + makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain); + return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result; +} + SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op, SelectionDAG &DAG) const { - SDValue In = Op.getOperand(0); - if (In.getSimpleValueType() != MVT::f16) - return Op; // Legal - - // f16: Extend to f32 before the operation. + bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT || + Op->getOpcode() == ISD::STRICT_FP_TO_SINT); + bool IsStrict = Op->isStrictFPOpcode(); SDLoc DL(Op); - SDValue InF32 = DAG.getFPExtendOrRound(In, SDLoc(In), MVT::f32); - return DAG.getNode(Op->getOpcode(), DL, Op.getSimpleValueType(), InF32); -} - -SDValue SystemZTargetLowering::lowerSTRICT_FP_TO_INT(SDValue Op, - SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); - SDValue In = Op.getOperand(1); + MVT VT = Op.getSimpleValueType(); + SDValue InOp = Op.getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); + EVT InVT = InOp.getValueType(); + + // FP to unsigned is not directly supported on z10. Promoting an i32 + // result to i64 doesn't generate an inexact condition for values that are + // outside the i32 range but in the i64 range, so use the default + // expansion. + if (!Subtarget.hasFPExtension() && !IsSigned) + return SDValue(); // Expand (i32 / i64). + + if (InOp.getSimpleValueType() == MVT::f16) { + // f16: Extend to f32 before the conversion. + if (!IsStrict) { + SDValue InF32 = DAG.getFPExtendOrRound(InOp, SDLoc(InOp), MVT::f32); + return DAG.getNode(Op->getOpcode(), DL, Op.getSimpleValueType(), InF32); + } + SDValue InF32; + std::tie(InF32, Chain) = + DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32); + return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), {Chain, InF32}); + } - if (In.getSimpleValueType() != MVT::f16) - return Op; // Legal + if (VT == MVT::i128) { + RTLIB::Libcall LC = + IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT); + return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict); + } - // f16: Extend to f32 before the operation. - SDLoc DL(Op); - SDValue InF32 = DAG.getFPExtendOrRound(In, SDLoc(In), MVT::f32); - return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), {Chain, InF32}); + return Op; // Legal } SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op, SelectionDAG &DAG) const { - if (Op.getSimpleValueType() != MVT::f16) - return Op; // Legal - - // f16: first do the operation to f32 and then round to f16. + bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP || + Op->getOpcode() == ISD::STRICT_SINT_TO_FP); + bool IsStrict = Op->isStrictFPOpcode(); SDLoc DL(Op); - SDValue F32Conv = - DAG.getNode(Op->getOpcode(), DL, MVT::f32, Op->getOperand(0)); - return DAG.getFPExtendOrRound(F32Conv, DL, MVT::f16); -} + MVT VT = Op.getSimpleValueType(); + SDValue InOp = Op.getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); + EVT InVT = InOp.getValueType(); + + auto roundToF16 = [&DAG, &IsStrict, &DL, &Chain](SDValue V) -> SDValue { + if (!IsStrict) + return DAG.getFPExtendOrRound(V, DL, MVT::f16); + SDValue F16Res; + std::tie(F16Res, Chain) = + DAG.getStrictFPExtendOrRound(V, V.getValue(1), DL, MVT::f16); + return DAG.getMergeValues({F16Res, Chain}, DL); + }; -SDValue SystemZTargetLowering::lowerSTRICT_INT_TO_FP(SDValue Op, - SelectionDAG &DAG) const { - if (Op.getSimpleValueType() != MVT::f16) - return Op; // Legal + // Unsigned to fp is not directly supported on z10. + if (!Subtarget.hasFPExtension() && !IsSigned) { + if (InVT == MVT::i32) { // Conversion from i32 is promoted to i64 (signed). + SDValue I64In = DAG.getZExtOrTrunc(InOp, DL, MVT::i64); + SDValue FPRes; + MVT ResVT = VT == MVT::f16 ? MVT::f32 : VT; + if (!IsStrict) + FPRes = DAG.getNode(ISD::SINT_TO_FP, DL, ResVT, I64In); + else + FPRes = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, + DAG.getVTList(ResVT, MVT::Other), {Chain, I64In}); + return VT == MVT::f16 ? roundToF16(FPRes) : FPRes; + } + assert(InVT == MVT::i64 && "i32 and i64 are the only legal int types."); + if (VT != MVT::f16) + return SDValue(); // Expand + } - // f16: first do the operation to f32 and then round to f16. - SDLoc DL(Op); - SDValue F32Conv = - DAG.getNode(Op->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other), - {Op->getOperand(0), Op->getOperand(1)}); - SDValue F16Res = DAG.getFPExtendOrRound(F32Conv, DL, MVT::f16); - return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), F16Res, - F32Conv.getValue(1)); + // Conversion to f16 is done via f32. + if (VT == MVT::f16) { + SDValue PromotedOp; + if (!IsStrict) + PromotedOp = DAG.getNode(Op->getOpcode(), DL, MVT::f32, InOp); + else + PromotedOp = + DAG.getNode(Op->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other), + {Chain, InOp}); + return roundToF16(PromotedOp); + } + + if (InVT == MVT::i128) { + RTLIB::Libcall LC = + IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT); + return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict); + } + + return Op; // Legal } // Shift the lower 2 bytes of Op to the left in order to insert into the // upper 2 bytes of the FP register. static SDValue convertToF16(SDValue Op, SelectionDAG &DAG) { - assert(Op.getSimpleValueType() == MVT::i32 && - "Expexted to convert i32 to f16."); + assert(Op.getSimpleValueType() == MVT::i64 && + "Expexted to convert i64 to f16."); SDLoc DL(Op); - SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i32, Op, - DAG.getConstant(16, DL, MVT::i32)); - SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Shft); + SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op, + DAG.getConstant(48, DL, MVT::i64)); + SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft); SDValue F16Val = DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast); return F16Val; @@ -6881,20 +6928,20 @@ static SDValue convertToF16(SDValue Op, SelectionDAG &DAG) { // Extract Op into GPR and shift the 2 f16 bytes to the right. static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG) { assert(Op.getSimpleValueType() == MVT::f16 && - "Expected to convert f16 to i32."); - SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f32); - SDValue In32 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f32, + "Expected to convert f16 to i64."); + SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); + SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64, SDValue(U32, 0), Op); - SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, In32); - SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i32, BCast, - DAG.getConstant(16, DL, MVT::i32)); + SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); + SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast, + DAG.getConstant(48, DL, MVT::i32)); return Shft; } // Lower an f16 LOAD in case of no vector support. SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op, SelectionDAG &DAG) const { - MVT RegVT = Op.getSimpleValueType(); + EVT RegVT = Op.getValueType(); assert(RegVT == MVT::f16 && "Expected to lower an f16 load."); // Load as integer. @@ -6902,7 +6949,7 @@ SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op, SDValue NewLd; if (auto *AtomicLd = dyn_cast(Op.getNode())) { assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load"); - NewLd = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, MVT::i16, MVT::i32, + NewLd = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, MVT::i16, MVT::i64, AtomicLd->getChain(), AtomicLd->getBasePtr(), AtomicLd->getMemOperand()); cast(NewLd)->setExtensionType(ISD::EXTLOAD); @@ -6910,7 +6957,7 @@ SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op, LoadSDNode *Ld = cast(Op.getNode()); assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load"); NewLd = - DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Ld->getChain(), + DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16, Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags()); } @@ -6972,17 +7019,23 @@ SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, SDValue SystemZTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - SDValue SignArg = Op.getOperand(1); - if (SignArg.getSimpleValueType() != MVT::f16) + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + MVT Op0VT = Op0.getSimpleValueType(); + MVT Op1VT = Op1.getSimpleValueType(); + if (Op0VT != MVT::f16 && Op1VT != MVT::f16) return Op; // Legal - // f16: Extend SignArg f32. The DAGCombiner removes the fpext without - // asking, but it is needed as there is no target instruction handling f16. - SDValue SignArgF32 = - DAG.getFPExtendOrRound(SignArg, SDLoc(SignArg), MVT::f32); - return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), - {Op.getOperand(0), SignArgF32}); + // Perform the copy on to the largest type present, or f32 if it was f16. + MVT VT = (Op0VT.getSizeInBits() > Op1VT.getSizeInBits()) ? Op0VT : Op1VT; + if (VT == MVT::f16) + VT = MVT::f32; + + SDLoc DL(Op); + SDValue Op0Conv = DAG.getFPExtendOrRound(Op0, DL, VT); + SDValue Op1Conv = DAG.getFPExtendOrRound(Op1, DL, VT); + SDValue ResConv = DAG.getNode(ISD::FCOPYSIGN, DL, VT, {Op0Conv, Op1Conv}); + return DAG.getFPExtendOrRound(ResConv, DL, Op0VT); } SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op, @@ -7140,18 +7193,16 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: return lowerFP_EXTEND(Op, DAG); - case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: - return lower_FP_TO_INT(Op, DAG); - case ISD::STRICT_FP_TO_SINT: + case ISD::FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: - return lowerSTRICT_FP_TO_INT(Op, DAG); - case ISD::SINT_TO_FP: + case ISD::STRICT_FP_TO_SINT: + return lower_FP_TO_INT(Op, DAG); case ISD::UINT_TO_FP: - return lower_INT_TO_FP(Op, DAG); - case ISD::STRICT_SINT_TO_FP: + case ISD::SINT_TO_FP: case ISD::STRICT_UINT_TO_FP: - return lowerSTRICT_INT_TO_FP(Op, DAG); + case ISD::STRICT_SINT_TO_FP: + return lower_INT_TO_FP(Op, DAG); case ISD::LOAD: return lowerLoadF16(Op, DAG); case ISD::STORE: @@ -7290,20 +7341,89 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N, if (ResVT == MVT::i128 && SrcVT == MVT::f128) Results.push_back(expandBitCastF128ToI128(DAG, Src, DL)); else if (SrcVT == MVT::i16 && ResVT == MVT::f16) { - SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src); - SDValue Res = - Subtarget.hasVector() - ? SDValue( - DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0) - : convertToF16(In32, DAG); - Results.push_back(Res); + if (Subtarget.hasVector()) { + SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src); + Results.push_back(SDValue( + DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0)); + } else { + SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src); + Results.push_back(convertToF16(In64, DAG)); + } } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) { - SDValue ExtractedI32 = + SDValue ExtractedVal = Subtarget.hasVector() ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src), 0) : convertFromF16(Src, DL, DAG); - Results.push_back(DAG.getZExtOrTrunc(ExtractedI32, DL, ResVT)); + Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT)); + } + break; + } + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: { + if (useSoftFloat()) + return; + SDLoc DL(N); + SDValue Src = N->getOperand(0); + EVT ResVT = N->getValueType(0); + if (ResVT == MVT::f16) { + SDValue F32Res = DAG.getNode(N->getOpcode(), DL, MVT::f32, Src); + Results.push_back(DAG.getFPExtendOrRound(F32Res, DL, MVT::f16)); + } + break; + } + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: { + if (useSoftFloat()) + return; + SDLoc DL(N); + SDValue Chain = N->getOperand(0); + SDValue Src = N->getOperand(1); + EVT ResVT = N->getValueType(0); + if (ResVT == MVT::f16) { + SDValue F32Res = + DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other), + {Chain, Src}); + SDValue F16Res; + std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound( + F32Res, F32Res.getValue(1), DL, MVT::f16); + Results.push_back(F16Res); + Results.push_back(Chain); + } + break; + } + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: { + if (useSoftFloat()) + return; + SDLoc DL(N); + SDValue Src = N->getOperand(0); + EVT SrcVT = Src->getValueType(0); + if (SrcVT == MVT::f16) { + SDValue SrcF32 = DAG.getFPExtendOrRound(Src, DL, MVT::f32); + SDValue OpF32 = + DAG.getNode(N->getOpcode(), DL, N->getValueType(0), SrcF32); + Results.push_back(OpF32); + } + break; + } + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: { + if (useSoftFloat()) + return; + SDLoc DL(N); + EVT ResVT = N->getValueType(0); + SDValue Chain = N->getOperand(0); + SDValue Src = N->getOperand(1); + EVT SrcVT = Src->getValueType(0); + if (SrcVT == MVT::f16) { + SDValue InF32; + std::tie(InF32, Chain) = + DAG.getStrictFPExtendOrRound(Src, Chain, DL, MVT::f32); + SDValue OpF32 = DAG.getNode( + N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other), {Chain, InF32}); + Results.push_back(OpF32); + Results.push_back(OpF32.getValue(1)); } break; } diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index f7240dae8c4ab..e6a2a74221b97 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -627,6 +627,9 @@ class SystemZTargetLowering : public TargetLowering { bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const; + SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, + SDLoc DL, SDValue Chain, bool IsStrict) const; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, @@ -747,9 +750,7 @@ class SystemZTargetLowering : public TargetLowering { SDValue lowerFSHR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue lower_FP_TO_INT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerSTRICT_FP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue lower_INT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerSTRICT_INT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerLoadF16(SDValue Op, SelectionDAG &DAG) const; SDValue lowerStoreF16(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 9271d129f8504..10de8b05cf45f 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -142,7 +142,7 @@ let Predicates = [FeatureVector] in { // LEY and LDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - let mayLoad = 1, canFoldAsLoad = 1 in { + let mayLoad = 1, SimpleBDXLoad = 1, canFoldAsLoad = 1 in { def VL16 : UnaryAliasVRX; def VL32 : UnaryAliasVRX; def VL64 : UnaryAliasVRX; @@ -240,7 +240,7 @@ let Predicates = [FeatureVector] in { // STEY and STDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - let mayStore = 1 in { + let mayStore = 1, SimpleBDXStore = 1 in { def VST16 : StoreAliasVRX; def VST32 : StoreAliasVRX; def VST64 : StoreAliasVRX; diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-10.ll b/llvm/test/CodeGen/SystemZ/atomic-load-10.ll index e30f9791b51e0..4135a55bb6fbc 100644 --- a/llvm/test/CodeGen/SystemZ/atomic-load-10.ll +++ b/llvm/test/CodeGen/SystemZ/atomic-load-10.ll @@ -7,7 +7,7 @@ define half @f1(ptr %src) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: lh %r0, 0(%r2) +; CHECK-NEXT: lgh %r0, 0(%r2) ; CHECK-NEXT: sllg %r0, %r0, 48 ; CHECK-NEXT: ldgr %f0, %r0 ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll index 2c8aebc5315b2..0cfdefe3bd61b 100644 --- a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll @@ -7,8 +7,9 @@ declare half @llvm.fabs.f16(half %f) define half @f0(half %f) { ; CHECK-LABEL: f0: -; CHECK: brasl %r14, __extendhfsf2@PLT -; CHECK: lpdfr %f0, %f0 +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: lpdfr %f0, %f0 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT ; CHECK: br %r14 %res = call half @llvm.fabs.f16(half %f) ret half %res diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll index dc55374294896..29f2d06e75ff9 100644 --- a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll @@ -6,9 +6,9 @@ declare half @llvm.fabs.f16(half %f) define half @f0(half %f) { ; CHECK-LABEL: f0: -; CHECK: brasl %r14, __extendhfsf2@PLT -; CHECK: lpdfr %f0, %f0 -; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: lpdfr %f0, %f0 +; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT ; CHECK: br %r14 %res = call half @llvm.fabs.f16(half %f) ret half %res diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-08.ll b/llvm/test/CodeGen/SystemZ/fp-conv-08.ll index b91da08c835d6..f2590b6566a62 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-08.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-08.ll @@ -5,7 +5,9 @@ ; Test i64->f16. For z10, this results in just a single a libcall. define half @f0(i64 %i) { ; CHECK-LABEL: f0: -; CHECK: brasl %r14, __floatundihf@PLT +; CHECK: cegbr +; CHECK: aebr +; CHECK: brasl %r14, __truncsfhf2@PLT ; CHECK: br %r14 %conv = uitofp i64 %i to half ret half %conv diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-20.ll b/llvm/test/CodeGen/SystemZ/fp-conv-20.ll index abf45e3d7a597..58db2e10da8b2 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-20.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-20.ll @@ -33,7 +33,8 @@ define float @f3(i128 %i) { ; Test signed i128->f16. define half @f4(i128 %i) { ; CHECK-LABEL: f4: -; CHECK: brasl %r14, __floattihf@PLT +; CHECK: brasl %r14, __floattisf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT ; CHECK: br %r14 %conv = sitofp i128 %i to half ret half %conv @@ -69,7 +70,8 @@ define float @f7(i128 %i) { ; Test unsigned i128->f16. define half @f8(i128 %i) { ; CHECK-LABEL: f8: -; CHECK: brasl %r14, __floatuntihf@PLT +; CHECK: brasl %r14, __floatuntisf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT ; CHECK: br %r14 %conv = uitofp i128 %i to half ret half %conv @@ -105,7 +107,8 @@ define i128 @f11(float %f) { ; Test signed f16->i128. define i128 @f12(half %f) { ; CHECK-LABEL: f12: -; CHECK: brasl %r14, __fixhfti@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __fixsfti@PLT ; CHECK: br %r14 %conv = fptosi half %f to i128 ret i128 %conv @@ -141,7 +144,8 @@ define i128 @f15(float %f) { ; Test unsigned f16->i128. define i128 @f16(half %f) { ; CHECK-LABEL: f16: -; CHECK: brasl %r14, __fixunshfti@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __fixunssfti@PLT ; CHECK: br %r14 %conv = fptoui half %f to i128 ret i128 %conv diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll index 3026191601081..d2b6488008e6b 100644 --- a/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll @@ -2,23 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -declare half @copysignh(half, half) readnone declare float @copysignf(float, float) readnone declare double @copysign(double, double) readnone ; FIXME: not really the correct prototype for SystemZ. declare fp128 @copysignl(fp128, fp128) readnone -; Test f32 copies in which the sign comes from an f16. -define float @f0(float %a, half %bh) { -; CHECK-LABEL: f0: -; CHECK: brasl %r14, __extendhfsf2@PLT -; CHECK: cpsdr %f0, %f0, %f8 -; CHECK: br %r14 - %b = fpext half %bh to float - %res = call float @copysignf(float %a, float %b) readnone - ret float %res -} - ; Test f32 copies in which the sign comes from an f32. define float @f1(float %a, float %b) { ; CHECK-LABEL: f1: @@ -138,23 +126,3 @@ define void @f9(ptr %cptr, ptr %aptr, ptr %bptr) { store fp128 %c, ptr %cptr ret void } - -; Test f16 copies in which the sign comes from an f16. -define half @f10(half %a, half %b) { -; CHECK-LABEL: f10: -; CHECK: brasl %r14, copysignh@PLT -; CHECK: br %r14 - %res = call half @copysignh(half %a, half %b) readnone - ret half %res -} - -; Test f16 copies in which the sign comes from an f32. -define half @f11(half %a, float %bf) { -; CHECK-LABEL: f11: -; CHECK: brasl %r14, __truncsfhf2@PLT -; CHECK: brasl %r14, copysignh@PLT -; CHECK: br %r14 - %b = fptrunc float %bf to half - %res = call half @copysignh(half %a, half %b) readnone - ret half %res -} diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll index 320eee19afe05..178568ebb3bf9 100644 --- a/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll @@ -2,25 +2,11 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s -declare half @copysignh(half, half) readnone declare float @copysignf(float, float) readnone declare double @copysign(double, double) readnone ; FIXME: not really the correct prototype for SystemZ. declare fp128 @copysignl(fp128, fp128) readnone -; Test f16 copies in which the sign comes from an f128. -define half @f0(half %a, ptr %bptr) { -; CHECK-LABEL: f0: -; CHECK: vl %v[[REG:[0-9]+]], 0(%r2) -; CHECK: brasl %r14, __trunctfhf2@PLT -; CHECK: brasl %r14, copysignh@PLT -; CHECK: br %r14 - %bl = load volatile fp128, ptr %bptr - %b = fptrunc fp128 %bl to half - %res = call half @copysignh(half %a, half %b) readnone - ret half %res -} - ; Test f32 copies in which the sign comes from an f128. define float @f1(float %a, ptr %bptr) { ; CHECK-LABEL: f1: @@ -45,21 +31,6 @@ define double @f2(double %a, ptr %bptr) { ret double %res } -; Test f128 copies in which the sign comes from an f16. -define void @f7_half(ptr %cptr, ptr %aptr, half %bh) { -; CHECK-LABEL: f7_half: -; CHECK: vl [[REG1:%v[0-7]+]], 0(%r3) -; CHECK: vsteh %v0, 164(%r15), 0 -; CHECK: tm 164(%r15), 128 -; CHECK: wflnxb [[REG2:%v[0-9]+]], [[REG1]] -; CHECK: wflpxb [[REG2]], [[REG1]] - %a = load volatile fp128, ptr %aptr - %b = fpext half %bh to fp128 - %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone - store fp128 %c, ptr %cptr - ret void -} - ; Test f128 copies in which the sign comes from an f32. define void @f7(ptr %cptr, ptr %aptr, float %bf) { ; CHECK-LABEL: f7: diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll new file mode 100644 index 0000000000000..015fdf195dd8e --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll @@ -0,0 +1,65 @@ +; Test copysign intrinsics involving half. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare half @llvm.copysign.f16(half, half) +declare float @llvm.copysign.f32(float, float) +declare double @llvm.copysign.f64(double, double) + +; Test f16 copies. +define half @f0(half %a, half %b) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: cpsdr %f0, %f9, %f0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %res = call half @llvm.copysign.f16(half %a, half %b) + ret half %res +} + +; Test f16 copies where the sign comes from an f32. +define half @f1(half %a, float %b) { +; CHECK-LABEL: f1: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: cpsdr %f0, %f8, %f0 +; CHECK: brasl %r14, __truncsfhf2@PLT +; CHECK: br %r14 + %bh = fptrunc float %b to half + %res = call half @llvm.copysign.f16(half %a, half %bh) + ret half %res +} + +; Test f16 copies where the sign comes from an f64. +define half @f2(half %a, double %b) { +; CHECK-LABEL: f2: +; CHECK: brasl %r14, __extendhfdf2@PLT +; CHECK: cpsdr %f0, %f8, %f0 +; CHECK: brasl %r14, __truncdfhf2@PLT +; CHECK: br %r14 + %bh = fptrunc double %b to half + %res = call half @llvm.copysign.f16(half %a, half %bh) + ret half %res +} + +; Test f32 copies in which the sign comes from an f16. +define float @f3(float %a, half %b) { +; CHECK-LABEL: f3: +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: cpsdr %f0, %f0, %f8 +; CHECK: br %r14 + %bf = fpext half %b to float + %res = call float @llvm.copysign.f32(float %a, float %bf) + ret float %res +} + +; Test f64 copies in which the sign comes from an f16. +define double @f4(double %a, half %b) { +; CHECK-LABEL: f4: +; CHECK: brasl %r14, __extendhfdf2@PLT +; CHECK: cpsdr %f0, %f0, %f8 +; CHECK: br %r14 + %bd = fpext half %b to double + %res = call double @llvm.copysign.f64(double %a, double %bd) + ret double %res +} diff --git a/llvm/test/CodeGen/SystemZ/fp-half-mem.ll b/llvm/test/CodeGen/SystemZ/fp-half-mem.ll index a3dd646d3b51f..5988a379b3d9a 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-mem.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-mem.ll @@ -17,9 +17,8 @@ define half @f1() { ; NOVEC-NEXT: .cfi_def_cfa_offset 328 ; NOVEC-NEXT: la %r2, 166(%r15) ; NOVEC-NEXT: brasl %r14, foo@PLT -; NOVEC-NEXT: lh %r0, 166(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 166(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f0, %r0 ; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d ; NOVEC-NEXT: lmg %r14, %r15, 280(%r15) @@ -48,9 +47,8 @@ define half @f1() { define half @f2(ptr %P) { ; NOVEC-LABEL: f2: ; NOVEC: # %bb.0: -; NOVEC-NEXT: lh %r0, 6(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 6(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f0, %r0 ; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d ; NOVEC-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/fp-half-move.ll b/llvm/test/CodeGen/SystemZ/fp-half-move.ll index 1c8d5daf05f78..1e761d4c70b22 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-move.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-move.ll @@ -10,9 +10,8 @@ define half @f1(ptr %ptr) { ; NOVEC-LABEL: f1: ; NOVEC: # %bb.0: ; NOVEC-NEXT: lh %r0, 0(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: oilh %r0, 255 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: oill %r0, 255 +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f0, %r0 ; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d ; NOVEC-NEXT: br %r14 @@ -32,8 +31,8 @@ define half @f1(ptr %ptr) { define half @f2(i16 %Arg) { ; NOVEC-LABEL: f2: ; NOVEC: # %bb.0: -; NOVEC-NEXT: sll %r2, 16 -; NOVEC-NEXT: risbhg %r0, %r2, 0, 159, 32 +; NOVEC-NEXT: # kill: def $r2l killed $r2l def $r2d +; NOVEC-NEXT: sllg %r0, %r2, 48 ; NOVEC-NEXT: ldgr %f0, %r0 ; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d ; NOVEC-NEXT: br %r14 @@ -51,8 +50,7 @@ define void @f3(half %val, ptr %ptr) { ; NOVEC: # %bb.0: ; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: stc %r0, 0(%r2) ; NOVEC-NEXT: br %r14 ; @@ -72,8 +70,8 @@ define i16 @f4(half %Arg) { ; NOVEC: # %bb.0: ; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r2, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r2, 16 +; NOVEC-NEXT: srlg %r2, %r0, 48 +; NOVEC-NEXT: # kill: def $r2l killed $r2l killed $r2d ; NOVEC-NEXT: br %r14 ; ; VECTOR-LABEL: f4: diff --git a/llvm/test/CodeGen/SystemZ/fp-half-strict.ll b/llvm/test/CodeGen/SystemZ/fp-half-strict.ll index 43c2d7b2ab8c8..4f58eb4c6cb20 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-strict.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-strict.ll @@ -77,9 +77,8 @@ define void @fun1(ptr %Src, ptr %Dst) #0 { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -160 ; NOVEC-NEXT: .cfi_def_cfa_offset 320 -; NOVEC-NEXT: lh %r0, 0(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 0(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: lgr %r13, %r3 ; NOVEC-NEXT: ldgr %f0, %r0 ; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d @@ -88,8 +87,7 @@ define void @fun1(ptr %Src, ptr %Dst) #0 { ; NOVEC-NEXT: brasl %r14, __truncdfhf2@PLT ; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 0(%r13) ; NOVEC-NEXT: bcr 14, %r0 ; NOVEC-NEXT: lmg %r13, %r15, 264(%r15) diff --git a/llvm/test/CodeGen/SystemZ/fp-half-vector.ll b/llvm/test/CodeGen/SystemZ/fp-half-vector.ll index 71906bb8a66d7..ba18ac90e6228 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-vector.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-vector.ll @@ -30,25 +30,21 @@ define <8 x half> @fun0(<8 x half> %Op) { ; NOVEC-NEXT: .cfi_offset %f13, -208 ; NOVEC-NEXT: .cfi_offset %f14, -216 ; NOVEC-NEXT: .cfi_offset %f15, -224 -; NOVEC-NEXT: lh %r0, 414(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: ldgr %f15, %r0 -; NOVEC-NEXT: lh %r0, 406(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 414(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 +; NOVEC-NEXT: ldgr %f14, %r0 +; NOVEC-NEXT: lgh %r0, 406(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f12, %r0 -; NOVEC-NEXT: lh %r0, 398(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 398(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f9, %r0 -; NOVEC-NEXT: lh %r0, 390(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 390(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ler %f10, %f6 ; NOVEC-NEXT: ler %f11, %f4 ; NOVEC-NEXT: ler %f13, %f2 -; NOVEC-NEXT: ler %f14, %f0 +; NOVEC-NEXT: ler %f15, %f0 ; NOVEC-NEXT: lgr %r13, %r2 ; NOVEC-NEXT: ldgr %f0, %r0 ; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d @@ -66,16 +62,16 @@ define <8 x half> @fun0(<8 x half> %Op) { ; NOVEC-NEXT: aebr %f0, %f0 ; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT ; NOVEC-NEXT: ler %f12, %f0 -; NOVEC-NEXT: ler %f0, %f15 +; NOVEC-NEXT: ler %f0, %f14 ; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT ; NOVEC-NEXT: aebr %f0, %f0 ; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT -; NOVEC-NEXT: ler %f15, %f0 -; NOVEC-NEXT: ler %f0, %f14 +; NOVEC-NEXT: ler %f14, %f0 +; NOVEC-NEXT: ler %f0, %f15 ; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT ; NOVEC-NEXT: aebr %f0, %f0 ; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT -; NOVEC-NEXT: ler %f14, %f0 +; NOVEC-NEXT: ler %f15, %f0 ; NOVEC-NEXT: ler %f0, %f13 ; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT ; NOVEC-NEXT: aebr %f0, %f0 @@ -92,36 +88,28 @@ define <8 x half> @fun0(<8 x half> %Op) { ; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT ; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 6(%r13) ; NOVEC-NEXT: lgdr %r0, %f11 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 4(%r13) ; NOVEC-NEXT: lgdr %r0, %f13 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 2(%r13) -; NOVEC-NEXT: lgdr %r0, %f14 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 -; NOVEC-NEXT: sth %r0, 0(%r13) ; NOVEC-NEXT: lgdr %r0, %f15 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 +; NOVEC-NEXT: sth %r0, 0(%r13) +; NOVEC-NEXT: lgdr %r0, %f14 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 14(%r13) ; NOVEC-NEXT: lgdr %r0, %f12 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 12(%r13) ; NOVEC-NEXT: lgdr %r0, %f9 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 10(%r13) ; NOVEC-NEXT: lgdr %r0, %f8 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 8(%r13) ; NOVEC-NEXT: ld %f8, 216(%r15) # 8-byte Reload ; NOVEC-NEXT: ld %f9, 208(%r15) # 8-byte Reload @@ -404,69 +392,53 @@ entry: define void @fun3(ptr %Src, ptr %Dst) { ; NOVEC-LABEL: fun3: ; NOVEC: # %bb.0: # %entry -; NOVEC-NEXT: lh %r0, 0(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 0(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f0, %r0 -; NOVEC-NEXT: lh %r0, 2(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 2(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f1, %r0 -; NOVEC-NEXT: lh %r0, 4(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 4(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f2, %r0 -; NOVEC-NEXT: lh %r0, 6(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 6(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f3, %r0 -; NOVEC-NEXT: lh %r0, 8(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 8(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f4, %r0 -; NOVEC-NEXT: lh %r0, 10(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 10(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f5, %r0 -; NOVEC-NEXT: lh %r0, 12(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 12(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f6, %r0 -; NOVEC-NEXT: lh %r0, 14(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 14(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f7, %r0 ; NOVEC-NEXT: lgdr %r0, %f7 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 14(%r3) ; NOVEC-NEXT: lgdr %r0, %f6 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 12(%r3) ; NOVEC-NEXT: lgdr %r0, %f5 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 10(%r3) ; NOVEC-NEXT: lgdr %r0, %f4 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 8(%r3) ; NOVEC-NEXT: lgdr %r0, %f3 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 6(%r3) ; NOVEC-NEXT: lgdr %r0, %f2 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 4(%r3) ; NOVEC-NEXT: lgdr %r0, %f1 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 2(%r3) ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 0(%r3) ; NOVEC-NEXT: br %r14 ; @@ -506,124 +478,96 @@ define void @fun4(ptr %Src, ptr %Dst) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -208 ; NOVEC-NEXT: .cfi_def_cfa_offset 368 -; NOVEC-NEXT: lh %r0, 0(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 0(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f0, %r0 -; NOVEC-NEXT: lh %r0, 2(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 2(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f2, %r0 +; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d ; NOVEC-NEXT: # kill: def $f2h killed $f2h killed $f2d -; NOVEC-NEXT: lh %r0, 4(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 4(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f4, %r0 ; NOVEC-NEXT: # kill: def $f4h killed $f4h killed $f4d -; NOVEC-NEXT: lh %r0, 6(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 6(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f6, %r0 ; NOVEC-NEXT: # kill: def $f6h killed $f6h killed $f6d -; NOVEC-NEXT: lh %r0, 8(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 8(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f1, %r0 -; NOVEC-NEXT: lh %r0, 10(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 10(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f3, %r0 -; NOVEC-NEXT: lh %r0, 12(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 12(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f5, %r0 -; NOVEC-NEXT: lh %r0, 14(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 14(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f7, %r0 ; NOVEC-NEXT: lgdr %r0, %f7 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 190(%r15) ; NOVEC-NEXT: lgdr %r0, %f5 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 182(%r15) ; NOVEC-NEXT: lgdr %r0, %f3 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 174(%r15) ; NOVEC-NEXT: lgdr %r0, %f1 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: la %r2, 192(%r15) ; NOVEC-NEXT: lgr %r13, %r3 ; NOVEC-NEXT: sth %r0, 166(%r15) ; NOVEC-NEXT: brasl %r14, foo@PLT -; NOVEC-NEXT: lh %r0, 192(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 192(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f0, %r0 -; NOVEC-NEXT: lh %r0, 194(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 194(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f1, %r0 -; NOVEC-NEXT: lh %r0, 196(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 196(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f2, %r0 -; NOVEC-NEXT: lh %r0, 198(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 198(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f3, %r0 -; NOVEC-NEXT: lh %r0, 200(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 200(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f4, %r0 -; NOVEC-NEXT: lh %r0, 202(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 202(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f5, %r0 -; NOVEC-NEXT: lh %r0, 204(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 204(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f6, %r0 -; NOVEC-NEXT: lh %r0, 206(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 206(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f7, %r0 ; NOVEC-NEXT: lgdr %r0, %f7 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 14(%r13) ; NOVEC-NEXT: lgdr %r0, %f6 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 12(%r13) ; NOVEC-NEXT: lgdr %r0, %f5 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 10(%r13) ; NOVEC-NEXT: lgdr %r0, %f4 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 8(%r13) ; NOVEC-NEXT: lgdr %r0, %f3 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 6(%r13) ; NOVEC-NEXT: lgdr %r0, %f2 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 4(%r13) ; NOVEC-NEXT: lgdr %r0, %f1 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 2(%r13) ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 0(%r13) ; NOVEC-NEXT: lmg %r13, %r15, 312(%r15) ; NOVEC-NEXT: br %r14 @@ -694,69 +638,53 @@ define void @fun5(<4 x half> %dummy, <8 x half> %Arg5) { ; NOVEC-NEXT: .cfi_offset %f9, -176 ; NOVEC-NEXT: .cfi_offset %f10, -184 ; NOVEC-NEXT: .cfi_offset %f11, -192 -; NOVEC-NEXT: lh %r0, 422(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 422(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f1, %r0 -; NOVEC-NEXT: lh %r0, 430(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 430(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f3, %r0 -; NOVEC-NEXT: lh %r0, 438(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 438(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f5, %r0 -; NOVEC-NEXT: lh %r0, 446(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 446(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f7, %r0 -; NOVEC-NEXT: lh %r0, 454(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 454(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f8, %r0 -; NOVEC-NEXT: lh %r0, 462(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 462(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f9, %r0 -; NOVEC-NEXT: lh %r0, 470(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 470(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f10, %r0 -; NOVEC-NEXT: lh %r0, 478(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 478(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f11, %r0 ; NOVEC-NEXT: lgdr %r0, %f11 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 222(%r15) ; NOVEC-NEXT: lgdr %r0, %f10 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 214(%r15) ; NOVEC-NEXT: lgdr %r0, %f9 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 206(%r15) ; NOVEC-NEXT: lgdr %r0, %f8 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 198(%r15) ; NOVEC-NEXT: lgdr %r0, %f7 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 190(%r15) ; NOVEC-NEXT: lgdr %r0, %f5 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 182(%r15) ; NOVEC-NEXT: lgdr %r0, %f3 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 174(%r15) ; NOVEC-NEXT: lgdr %r0, %f1 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 166(%r15) ; NOVEC-NEXT: brasl %r14, foo2@PLT ; NOVEC-NEXT: ld %f8, 248(%r15) # 8-byte Reload diff --git a/llvm/test/CodeGen/SystemZ/fp-half.ll b/llvm/test/CodeGen/SystemZ/fp-half.ll index 3585772cacf29..83b66052aa123 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half.ll @@ -187,13 +187,11 @@ entry: define void @fun3(ptr %Src, ptr %Dst) { ; NOVEC-LABEL: fun3: ; NOVEC: # %bb.0: # %entry -; NOVEC-NEXT: lh %r0, 0(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 0(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f0, %r0 ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 0(%r3) ; NOVEC-NEXT: br %r14 ; @@ -217,9 +215,8 @@ define void @fun4(ptr %Src, ptr %Dst) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -160 ; NOVEC-NEXT: .cfi_def_cfa_offset 320 -; NOVEC-NEXT: lh %r0, 0(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 0(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: lgr %r13, %r3 ; NOVEC-NEXT: ldgr %f0, %r0 ; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d @@ -228,8 +225,7 @@ define void @fun4(ptr %Src, ptr %Dst) { ; NOVEC-NEXT: brasl %r14, __truncdfhf2@PLT ; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 0(%r13) ; NOVEC-NEXT: lmg %r13, %r15, 264(%r15) ; NOVEC-NEXT: br %r14 @@ -268,9 +264,8 @@ define void @fun5(ptr %Src, ptr %Dst) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -192 ; NOVEC-NEXT: .cfi_def_cfa_offset 352 -; NOVEC-NEXT: lh %r0, 0(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 0(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: la %r2, 160(%r15) ; NOVEC-NEXT: lgr %r13, %r3 ; NOVEC-NEXT: ldgr %f0, %r0 @@ -285,8 +280,7 @@ define void @fun5(ptr %Src, ptr %Dst) { ; NOVEC-NEXT: brasl %r14, __trunctfhf2@PLT ; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 0(%r13) ; NOVEC-NEXT: lmg %r13, %r15, 296(%r15) ; NOVEC-NEXT: br %r14 @@ -402,12 +396,10 @@ define half @fun7(half %Op0, ptr %Dst, ptr %Src) { ; NOVEC: # %bb.0: # %entry ; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 0(%r2) -; NOVEC-NEXT: lh %r0, 0(%r3) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 0(%r3) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f0, %r0 ; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d ; NOVEC-NEXT: br %r14 @@ -434,17 +426,15 @@ define void @fun8(ptr %Src, ptr %Dst) { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -160 ; NOVEC-NEXT: .cfi_def_cfa_offset 320 -; NOVEC-NEXT: lh %r0, 0(%r2) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 0(%r2) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: lgr %r13, %r3 ; NOVEC-NEXT: ldgr %f0, %r0 ; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d ; NOVEC-NEXT: brasl %r14, foo@PLT ; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: sth %r0, 0(%r13) ; NOVEC-NEXT: lmg %r13, %r15, 264(%r15) ; NOVEC-NEXT: br %r14 @@ -483,9 +473,8 @@ define half @fun9(half %Arg0, half %Arg1, half %Arg2, half %Arg3, half %Arg4) { ; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Spill ; NOVEC-NEXT: .cfi_offset %f8, -168 ; NOVEC-NEXT: .cfi_offset %f9, -176 -; NOVEC-NEXT: lh %r0, 342(%r15) -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lgh %r0, 342(%r15) +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ler %f8, %f6 ; NOVEC-NEXT: ldgr %f0, %r0 ; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d @@ -538,8 +527,7 @@ define void @fun10(half %Arg0) { ; NOVEC-NEXT: .cfi_def_cfa_offset 328 ; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d ; NOVEC-NEXT: lgdr %r0, %f0 -; NOVEC-NEXT: risblg %r0, %r0, 0, 159, 32 -; NOVEC-NEXT: srl %r0, 16 +; NOVEC-NEXT: srlg %r0, %r0, 48 ; NOVEC-NEXT: ler %f2, %f0 ; NOVEC-NEXT: ler %f4, %f0 ; NOVEC-NEXT: ler %f6, %f0 @@ -576,16 +564,14 @@ define void @fun11() { ; NOVEC-NEXT: .cfi_offset %r15, -40 ; NOVEC-NEXT: aghi %r15, -160 ; NOVEC-NEXT: .cfi_def_cfa_offset 320 -; NOVEC-NEXT: lhrl %r0, .LCPI11_0 -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 +; NOVEC-NEXT: lghrl %r0, .LCPI11_0 +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f4, %r0 -; NOVEC-NEXT: lhrl %r0, .LCPI11_1 -; NOVEC-NEXT: sll %r0, 16 -; NOVEC-NEXT: # kill: def $f4h killed $f4h killed $f4d +; NOVEC-NEXT: lghrl %r0, .LCPI11_1 ; NOVEC-NEXT: lzer %f2 -; NOVEC-NEXT: risbhg %r0, %r0, 0, 159, 32 ; NOVEC-NEXT: lcdfr %f0, %f2 +; NOVEC-NEXT: # kill: def $f4h killed $f4h killed $f4d +; NOVEC-NEXT: sllg %r0, %r0, 48 ; NOVEC-NEXT: ldgr %f6, %r0 ; NOVEC-NEXT: # kill: def $f6h killed $f6h killed $f6d ; NOVEC-NEXT: brasl %r14, foo2@PLT diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-08.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-08.ll index e0821bf4f529e..c79f884ac4aeb 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-08.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-08.ll @@ -10,7 +10,9 @@ declare fp128 @llvm.experimental.constrained.uitofp.f128.i64(i64, metadata, meta ; Test i64->f16. For z10, this results in just a single a libcall. define half @f0(i64 %i) #0 { ; CHECK-LABEL: f0: -; CHECK: brasl %r14, __floatundihf@PLT +; CHECK: cegbr +; CHECK: aebr +; CHECK: brasl %r14, __truncsfhf2@PLT ; CHECK: br %r14 %conv = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %i, metadata !"round.dynamic", diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-17.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-17.ll index c9863af760688..2becd18277e2a 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-17.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-17.ll @@ -59,7 +59,8 @@ define float @f3(i128 %i) #0 { ; Test signed i128->f16. define half @f4(i128 %i) #0 { ; CHECK-LABEL: f4: -; CHECK: brasl %r14, __floattihf@PLT +; CHECK: %r14, __floattisf@PLT +; CHECK: %r14, __truncsfhf2@PLT ; CHECK: br %r14 %conv = call half @llvm.experimental.constrained.sitofp.f16.i128(i128 %i, metadata !"round.dynamic", @@ -103,7 +104,8 @@ define float @f7(i128 %i) #0 { ; Test unsigned i128->f16. define half @f8(i128 %i) #0 { ; CHECK-LABEL: f8: -; CHECK: brasl %r14, __floatuntihf@PLT +; CHECK: brasl %r14, __floatuntisf@PLT +; CHECK: brasl %r14, __truncsfhf2@PLT ; CHECK: br %r14 %conv = call half @llvm.experimental.constrained.uitofp.f16.i128(i128 %i, metadata !"round.dynamic", @@ -144,7 +146,8 @@ define i128 @f11(float %f) #0 { ; Test signed f16->i128. define i128 @f12(half %f) #0 { ; CHECK-LABEL: f12: -; CHECK: brasl %r14, __fixhfti@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __fixsfti@PLT ; CHECK: br %r14 %conv = call i128 @llvm.experimental.constrained.fptosi.i128.f16(half %f, metadata !"fpexcept.strict") #0 @@ -184,7 +187,8 @@ define i128 @f15(float %f) #0 { ; Test unsigned f16->i128. define i128 @f16(half %f) #0 { ; CHECK-LABEL: f16: -; CHECK: brasl %r14, __fixunshfti@PLT +; CHECK: brasl %r14, __extendhfsf2@PLT +; CHECK: brasl %r14, __fixunssfti@PLT ; CHECK: br %r14 %conv = call i128 @llvm.experimental.constrained.fptoui.i128.f16(half %f, metadata !"fpexcept.strict") #0 diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll index f3ae0c3029c73..6228ffaa35fa2 100644 --- a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll +++ b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll @@ -6,17 +6,14 @@ define signext i16 @short_and_f(i16 signext %cc_dep1) { ; CHECK-LABEL: short_and_f: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sll %r2, 16 -; CHECK-NEXT: risbhg %r0, %r2, 0, 159, 32 +; CHECK-NEXT: sllg %r0, %r2, 48 ; CHECK-NEXT: ldgr %f1, %r0 ; CHECK-NEXT: # kill: def $f1h killed $f1h killed $f1d ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: # kill: def $f1h killed $f1h def $f1d ; CHECK-NEXT: lgdr %r0, %f1 -; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 -; CHECK-NEXT: srl %r0, 16 -; CHECK-NEXT: lghr %r2, %r0 +; CHECK-NEXT: srag %r2, %r0, 48 ; CHECK-NEXT: br %r14 entry: %0 = tail call i16 asm sideeffect "", "={f1},0"(i16 %cc_dep1) @@ -76,12 +73,10 @@ define half @half_and_r(half %cc_dep1) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d ; CHECK-NEXT: lgdr %r0, %f0 -; CHECK-NEXT: risblg %r2, %r0, 0, 159, 32 -; CHECK-NEXT: srl %r2, 16 +; CHECK-NEXT: srlg %r2, %r0, 48 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: sll %r2, 16 -; CHECK-NEXT: risbhg %r0, %r2, 0, 159, 32 +; CHECK-NEXT: sllg %r0, %r2, 48 ; CHECK-NEXT: ldgr %f0, %r0 ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d ; CHECK-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll index 36140073a41b9..19969ccf4e297 100644 --- a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll +++ b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll @@ -6,15 +6,12 @@ define signext i16 @short_and_f(i16 signext %cc_dep1) { ; CHECK-LABEL: short_and_f: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sll %r2, 16 -; CHECK-NEXT: risbhg %r0, %r2, 0, 159, 32 +; CHECK-NEXT: sllg %r0, %r2, 48 ; CHECK-NEXT: ldgr %f0, %r0 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: lgdr %r0, %f0 -; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 -; CHECK-NEXT: srl %r0, 16 -; CHECK-NEXT: lghr %r2, %r0 +; CHECK-NEXT: srag %r2, %r0, 48 ; CHECK-NEXT: br %r14 entry: %0 = tail call i16 asm sideeffect "", "=f,0"(i16 %cc_dep1) @@ -72,12 +69,10 @@ define half @half_and_r(half %cc_dep1) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d ; CHECK-NEXT: lgdr %r0, %f0 -; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 -; CHECK-NEXT: srl %r0, 16 +; CHECK-NEXT: srlg %r0, %r0, 48 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: sll %r0, 16 -; CHECK-NEXT: risbhg %r0, %r0, 0, 159, 32 +; CHECK-NEXT: sllg %r0, %r0, 48 ; CHECK-NEXT: ldgr %f0, %r0 ; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d ; CHECK-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/spill-half-01.mir b/llvm/test/CodeGen/SystemZ/spill-half-01.mir index 9731f11445e50..2680d0225459b 100644 --- a/llvm/test/CodeGen/SystemZ/spill-half-01.mir +++ b/llvm/test/CodeGen/SystemZ/spill-half-01.mir @@ -25,14 +25,14 @@ body: | ; VECTOR-LABEL: fun0: ; VECTOR: aghi %r15, -232 - ; VECTOR: vsteh %v4, 166(%r15), 0 # 2-byte Folded Spill - ; VECTOR-NEXT: vsteh %v2, 162(%r15), 0 # 2-byte Folded Spill - ; VECTOR-NEXT: vsteh %v0, 164(%r15), 0 # 2-byte Folded Spill + ; VECTOR: vsteh %v4, 166(%r15), 0 # 2-byte Spill + ; VECTOR-NEXT: vsteh %v2, 162(%r15), 0 # 2-byte Spill + ; VECTOR-NEXT: vsteh %v0, 164(%r15), 0 # 2-byte Spill ; VECTOR-NEXT: #APP ; VECTOR-NEXT: #NO_APP - ; VECTOR-NEXT: vlreph %v0, 162(%r15) # 2-byte Folded Reload - ; VECTOR: vlreph %v0, 164(%r15) # 2-byte Folded Reload - ; VECTOR: vlreph %v0, 166(%r15) # 2-byte Folded Reload + ; VECTOR-NEXT: vlreph %v0, 162(%r15) # 2-byte Reload + ; VECTOR: vlreph %v0, 164(%r15) # 2-byte Reload + ; VECTOR: vlreph %v0, 166(%r15) # 2-byte Reload %2:fp16bit = COPY $f4h %1:fp16bit = COPY $f2h diff --git a/llvm/test/CodeGen/SystemZ/spill-half-02.mir b/llvm/test/CodeGen/SystemZ/spill-half-02.mir index 9ee2228612f50..724b5d352b298 100644 --- a/llvm/test/CodeGen/SystemZ/spill-half-02.mir +++ b/llvm/test/CodeGen/SystemZ/spill-half-02.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: fun0: ; CHECK: vlreph %v0, 0(%r2) - ; CHECK-NEXT: vsteh %v0, 166(%r15), 0 # 2-byte Folded Spill + ; CHECK-NEXT: vsteh %v0, 166(%r15), 0 # 2-byte Spill ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP - ; CHECK-NEXT: vlreph %v0, 166(%r15) # 2-byte Folded Reload + ; CHECK-NEXT: vlreph %v0, 166(%r15) # 2-byte Reload ; CHECK-NEXT: vsteh %v0, 0(%r3), 0 %1:addr64bit = COPY $r3d diff --git a/llvm/test/CodeGen/SystemZ/tdc-05.ll b/llvm/test/CodeGen/SystemZ/tdc-05.ll index 30f875c404258..becf293c21f1f 100644 --- a/llvm/test/CodeGen/SystemZ/tdc-05.ll +++ b/llvm/test/CodeGen/SystemZ/tdc-05.ll @@ -12,7 +12,8 @@ declare fp128 @llvm.fabs.f128(fp128) define i32 @f0(half %x) { ; CHECK-LABEL: f0 ; CHECK: lgdr %r0, %f0 -; CHECK-NEXT: srag %r0, %r0, 48 +; CHECK-NEXT: srlg %r0, %r0, 48 +; CHECK-NEXT: lhr %r0, %r0 ; CHECK-NEXT: chi %r0, 0 ; CHECK-NEXT: ipm %r0 ; CHECK-NEXT: risbg %r13, %r0, 63, 191, 36 diff --git a/llvm/test/CodeGen/SystemZ/vec-max-05.ll b/llvm/test/CodeGen/SystemZ/vec-max-05.ll index 6815bad060e39..09d40c77a1fb9 100644 --- a/llvm/test/CodeGen/SystemZ/vec-max-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-max-05.ll @@ -14,7 +14,6 @@ declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) declare float @llvm.maximum.f32(float, float) declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) -declare half @fmaxh(half, half) declare half @llvm.maxnum.f16(half, half) declare fp128 @fmaxl(fp128, fp128) @@ -90,15 +89,6 @@ define <2 x double> @f7(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } -; Test the fmaxh library function. -define half @f11_half(half %dummy, half %val1, half %val2) { -; CHECK-LABEL: f11_half: -; CHECK: brasl %r14, fmaxh@PLT -; CHECK: br %r14 - %ret = call half @fmaxh(half %val1, half %val2) readnone - ret half %ret -} - ; Test the fmaxf library function. define float @f11(float %dummy, float %val1, float %val2) { ; CHECK-LABEL: f11: diff --git a/llvm/test/CodeGen/SystemZ/vec-min-05.ll b/llvm/test/CodeGen/SystemZ/vec-min-05.ll index 78ae80d89e30f..b7b288f531041 100644 --- a/llvm/test/CodeGen/SystemZ/vec-min-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-min-05.ll @@ -14,7 +14,6 @@ declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) declare float @llvm.minimum.f32(float, float) declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>) -declare half @fminh(half, half) declare half @llvm.minnum.f16(half, half) declare fp128 @fminl(fp128, fp128) @@ -90,15 +89,6 @@ define <2 x double> @f7(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } -; Test the fminh library function. -define half @f11_half(half %dummy, half %val1, half %val2) { -; CHECK-LABEL: f11_half: -; CHECK: %r14, fminh@PLT -; CHECK: br %r14 - %ret = call half @fminh(half %val1, half %val2) readnone - ret half %ret -} - ; Test the fminf library function. define float @f11(float %dummy, float %val1, float %val2) { ; CHECK-LABEL: f11: From ddef5ab3ab6a6fe2b6a35d7b3aa86413a2290abe Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 4 Apr 2025 17:20:57 -0600 Subject: [PATCH 08/12] Review updates. --- .../Target/SystemZ/SystemZISelLowering.cpp | 185 +++++++----------- llvm/lib/Target/SystemZ/SystemZInstrFP.td | 23 ++- llvm/lib/Target/SystemZ/SystemZScheduleZ13.td | 2 +- llvm/lib/Target/SystemZ/SystemZScheduleZ14.td | 2 +- llvm/lib/Target/SystemZ/SystemZScheduleZ15.td | 2 +- llvm/lib/Target/SystemZ/SystemZScheduleZ16.td | 2 +- .../lib/Target/SystemZ/SystemZScheduleZ196.td | 2 +- .../Target/SystemZ/SystemZScheduleZEC12.td | 2 +- llvm/test/CodeGen/SystemZ/fp-copysign-03.ll | 112 ++++++++--- 9 files changed, 180 insertions(+), 152 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index a78427419f853..2a59f76ac2353 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -232,9 +232,14 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, for (auto Op : {ISD::FP_TO_SINT, ISD::STRICT_FP_TO_SINT, ISD::SINT_TO_FP, ISD::STRICT_SINT_TO_FP}) setOperationAction(Op, VT, Custom); - for (auto Op : {ISD::FP_TO_UINT, ISD::STRICT_FP_TO_UINT, - ISD::UINT_TO_FP, ISD::STRICT_UINT_TO_FP}) + for (auto Op : {ISD::FP_TO_UINT, ISD::STRICT_FP_TO_UINT}) setOperationAction(Op, VT, Custom); + for (auto Op : {ISD::UINT_TO_FP, ISD::STRICT_UINT_TO_FP}) { + // Handle unsigned 32-bit input types as signed 64-bit types on z10. + auto OpAction = + (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom; + setOperationAction(Op, VT, OpAction); + } } } @@ -578,7 +583,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Special treatment. setOperationAction(ISD::IS_FPCLASS, VT, Custom); - setOperationAction(ISD::FCOPYSIGN, VT, Custom); // Handle constrained floating-point operations. setOperationAction(ISD::STRICT_FADD, VT, Legal); @@ -6825,22 +6829,18 @@ SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op, EVT InVT = InOp.getValueType(); // FP to unsigned is not directly supported on z10. Promoting an i32 - // result to i64 doesn't generate an inexact condition for values that are - // outside the i32 range but in the i64 range, so use the default - // expansion. + // result to (signed) i64 doesn't generate an inexact condition (fp + // exception) for values that are outside the i32 range but in the i64 + // range, so use the default expansion. if (!Subtarget.hasFPExtension() && !IsSigned) - return SDValue(); // Expand (i32 / i64). + // Expand i32/i64. F16 values will be recognized to fit and extended. + return SDValue(); + // Conversion from f16 is done via f32. if (InOp.getSimpleValueType() == MVT::f16) { - // f16: Extend to f32 before the conversion. - if (!IsStrict) { - SDValue InF32 = DAG.getFPExtendOrRound(InOp, SDLoc(InOp), MVT::f32); - return DAG.getNode(Op->getOpcode(), DL, Op.getSimpleValueType(), InF32); - } - SDValue InF32; - std::tie(InF32, Chain) = - DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32); - return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), {Chain, InF32}); + SmallVector Results; + LowerOperationWrapper(Op.getNode(), Results, DAG); + return DAG.getMergeValues(Results, DL); } if (VT == MVT::i128) { @@ -6863,45 +6863,17 @@ SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op, SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); EVT InVT = InOp.getValueType(); - auto roundToF16 = [&DAG, &IsStrict, &DL, &Chain](SDValue V) -> SDValue { - if (!IsStrict) - return DAG.getFPExtendOrRound(V, DL, MVT::f16); - SDValue F16Res; - std::tie(F16Res, Chain) = - DAG.getStrictFPExtendOrRound(V, V.getValue(1), DL, MVT::f16); - return DAG.getMergeValues({F16Res, Chain}, DL); - }; - - // Unsigned to fp is not directly supported on z10. - if (!Subtarget.hasFPExtension() && !IsSigned) { - if (InVT == MVT::i32) { // Conversion from i32 is promoted to i64 (signed). - SDValue I64In = DAG.getZExtOrTrunc(InOp, DL, MVT::i64); - SDValue FPRes; - MVT ResVT = VT == MVT::f16 ? MVT::f32 : VT; - if (!IsStrict) - FPRes = DAG.getNode(ISD::SINT_TO_FP, DL, ResVT, I64In); - else - FPRes = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, - DAG.getVTList(ResVT, MVT::Other), {Chain, I64In}); - return VT == MVT::f16 ? roundToF16(FPRes) : FPRes; - } - assert(InVT == MVT::i64 && "i32 and i64 are the only legal int types."); - if (VT != MVT::f16) - return SDValue(); // Expand - } - // Conversion to f16 is done via f32. if (VT == MVT::f16) { - SDValue PromotedOp; - if (!IsStrict) - PromotedOp = DAG.getNode(Op->getOpcode(), DL, MVT::f32, InOp); - else - PromotedOp = - DAG.getNode(Op->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other), - {Chain, InOp}); - return roundToF16(PromotedOp); + SmallVector Results; + LowerOperationWrapper(Op.getNode(), Results, DAG); + return DAG.getMergeValues(Results, DL); } + // Unsigned to fp is not directly supported on z10. + if (!Subtarget.hasFPExtension() && !IsSigned) + return SDValue(); // Expand i64. + if (InVT == MVT::i128) { RTLIB::Libcall LC = IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT); @@ -7019,23 +6991,17 @@ SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, SDValue SystemZTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - MVT Op0VT = Op0.getSimpleValueType(); - MVT Op1VT = Op1.getSimpleValueType(); - if (Op0VT != MVT::f16 && Op1VT != MVT::f16) - return Op; // Legal + MVT VT = Op.getSimpleValueType(); + SDValue ValOp = Op.getOperand(0); + SDValue SignOp = Op.getOperand(1); - // Perform the copy on to the largest type present, or f32 if it was f16. - MVT VT = (Op0VT.getSizeInBits() > Op1VT.getSizeInBits()) ? Op0VT : Op1VT; - if (VT == MVT::f16) - VT = MVT::f32; + // Remove the rounding which would result in a libcall for half. + if (VT == MVT::f16 && SignOp.getOpcode() == ISD::FP_ROUND) { + SDValue WideOp = SignOp.getOperand(0); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(Op), VT, ValOp, WideOp); + } - SDLoc DL(Op); - SDValue Op0Conv = DAG.getFPExtendOrRound(Op0, DL, VT); - SDValue Op1Conv = DAG.getFPExtendOrRound(Op1, DL, VT); - SDValue ResConv = DAG.getNode(ISD::FCOPYSIGN, DL, VT, {Op0Conv, Op1Conv}); - return DAG.getFPExtendOrRound(ResConv, DL, Op0VT); + return Op; // Legal } SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op, @@ -7359,71 +7325,60 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N, } break; } + case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: { - if (useSoftFloat()) - return; - SDLoc DL(N); - SDValue Src = N->getOperand(0); - EVT ResVT = N->getValueType(0); - if (ResVT == MVT::f16) { - SDValue F32Res = DAG.getNode(N->getOpcode(), DL, MVT::f32, Src); - Results.push_back(DAG.getFPExtendOrRound(F32Res, DL, MVT::f16)); - } - break; - } - case ISD::STRICT_SINT_TO_FP: - case ISD::STRICT_UINT_TO_FP: { + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: { if (useSoftFloat()) return; + bool IsStrict = N->isStrictFPOpcode(); SDLoc DL(N); - SDValue Chain = N->getOperand(0); - SDValue Src = N->getOperand(1); + SDValue InOp = N->getOperand(IsStrict ? 1 : 0); EVT ResVT = N->getValueType(0); + SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode(); if (ResVT == MVT::f16) { - SDValue F32Res = - DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other), - {Chain, Src}); - SDValue F16Res; - std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound( - F32Res, F32Res.getValue(1), DL, MVT::f16); - Results.push_back(F16Res); - Results.push_back(Chain); + if (!IsStrict) { + SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp); + Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16)); + } else { + SDValue OpF32 = + DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other), + {Chain, InOp}); + SDValue F16Res; + std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound( + OpF32, OpF32.getValue(1), DL, MVT::f16); + Results.push_back(F16Res); + Results.push_back(Chain); + } } break; } case ISD::FP_TO_UINT: - case ISD::FP_TO_SINT: { - if (useSoftFloat()) - return; - SDLoc DL(N); - SDValue Src = N->getOperand(0); - EVT SrcVT = Src->getValueType(0); - if (SrcVT == MVT::f16) { - SDValue SrcF32 = DAG.getFPExtendOrRound(Src, DL, MVT::f32); - SDValue OpF32 = - DAG.getNode(N->getOpcode(), DL, N->getValueType(0), SrcF32); - Results.push_back(OpF32); - } - break; - } + case ISD::FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_TO_SINT: { if (useSoftFloat()) return; + bool IsStrict = N->isStrictFPOpcode(); SDLoc DL(N); EVT ResVT = N->getValueType(0); - SDValue Chain = N->getOperand(0); - SDValue Src = N->getOperand(1); - EVT SrcVT = Src->getValueType(0); - if (SrcVT == MVT::f16) { - SDValue InF32; - std::tie(InF32, Chain) = - DAG.getStrictFPExtendOrRound(Src, Chain, DL, MVT::f32); - SDValue OpF32 = DAG.getNode( - N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other), {Chain, InF32}); - Results.push_back(OpF32); - Results.push_back(OpF32.getValue(1)); + SDValue InOp = N->getOperand(IsStrict ? 1 : 0); + EVT InVT = InOp->getValueType(0); + SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode(); + if (InVT == MVT::f16) { + if (!IsStrict) { + SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32); + Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32)); + } else { + SDValue InF32; + std::tie(InF32, Chain) = + DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32); + SDValue OpF32 = + DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other), + {Chain, InF32}); + Results.push_back(OpF32); + Results.push_back(OpF32.getValue(1)); + } } break; } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 712d0a73272b1..7775f456bbdc1 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -84,8 +84,25 @@ let Predicates = [FeatureNoVectorEnhancements1] in def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>; def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>; +// fcopysign with an FP16 result. +let isCodeGenOnly = 1 in { + def CPSDRhh : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP16, FP16, FP16>; + def CPSDRhs : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP16, FP16, FP32>; + def CPSDRhd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP16, FP16, FP64>; +} + +// The sign of an FP128 is in the high register. +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fcopysign FP16:$src1, (f128 FP128:$src2)), + (CPSDRhd FP16:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureVectorEnhancements1] in + def : Pat<(fcopysign FP16:$src1, (f128 VR128:$src2)), + (CPSDRhd FP16:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>; + + // fcopysign with an FP32 result. let isCodeGenOnly = 1 in { + def CPSDRsh : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP16>; def CPSDRss : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP32>; def CPSDRsd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP64>; } @@ -99,8 +116,10 @@ let Predicates = [FeatureVectorEnhancements1] in (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>; // fcopysign with an FP64 result. -let isCodeGenOnly = 1 in +let isCodeGenOnly = 1 in { + def CPSDRdh : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP16>; def CPSDRds : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP32>; +} def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>; // The sign of an FP128 is in the high register. @@ -118,6 +137,8 @@ class CopySign128 (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>; let Predicates = [FeatureNoVectorEnhancements1] in { + def : CopySign128; def : CopySign128; def : CopySign128; def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>; // Copy sign -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s|h)(d|s|h)$")>; //===----------------------------------------------------------------------===// // FP: Load instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td index e3d2c2e9373d6..b48ed08c8a189 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -807,7 +807,7 @@ def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>; // Copy sign -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s|h)(d|s|h)$")>; //===----------------------------------------------------------------------===// // FP: Load instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td index f13988133ac24..e3ec7a6994221 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td @@ -825,7 +825,7 @@ def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>; // Copy sign -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s|h)(d|s|h)$")>; //===----------------------------------------------------------------------===// // FP: Load instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td index 739eaf340ef69..4f904daec5052 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td @@ -826,7 +826,7 @@ def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>; // Copy sign -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s|h)(d|s|h)$")>; //===----------------------------------------------------------------------===// // FP: Load instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index a898151217aa5..26433b97484da 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -719,7 +719,7 @@ def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>; def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>; // Copy sign -def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>; +def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s|h)(d|s|h)$")>; //===----------------------------------------------------------------------===// // FP: Load instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index 00237c2407be8..193a793f17367 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -757,7 +757,7 @@ def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>; def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>; // Copy sign -def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>; +def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s|h)(d|s|h)$")>; //===----------------------------------------------------------------------===// // FP: Load instructions diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll index 015fdf195dd8e..9625c8a7353b2 100644 --- a/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll @@ -1,65 +1,117 @@ -; Test copysign intrinsics involving half. +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefixes=CHECK,Z10 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 \ +; RUN: | FileCheck %s --check-prefixes=CHECK,Z16 ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; Test copysign intrinsics with half. declare half @llvm.copysign.f16(half, half) declare float @llvm.copysign.f32(float, float) declare double @llvm.copysign.f64(double, double) +declare fp128 @llvm.copysign.f128(fp128, fp128) -; Test f16 copies. +; Test copysign with an f16 result and f16 sign argument. define half @f0(half %a, half %b) { ; CHECK-LABEL: f0: -; CHECK: brasl %r14, __extendhfsf2@PLT -; CHECK: brasl %r14, __extendhfsf2@PLT -; CHECK: cpsdr %f0, %f9, %f0 -; CHECK: brasl %r14, __truncsfhf2@PLT -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: cpsdr %f0, %f2, %f0 +; CHECK-NEXT: br %r14 %res = call half @llvm.copysign.f16(half %a, half %b) ret half %res } -; Test f16 copies where the sign comes from an f32. +; Test copysign with an f16 result and f32 sign argument. define half @f1(half %a, float %b) { ; CHECK-LABEL: f1: -; CHECK: brasl %r14, __extendhfsf2@PLT -; CHECK: cpsdr %f0, %f8, %f0 -; CHECK: brasl %r14, __truncsfhf2@PLT -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: cpsdr %f0, %f2, %f0 +; CHECK-NEXT: br %r14 %bh = fptrunc float %b to half %res = call half @llvm.copysign.f16(half %a, half %bh) ret half %res } -; Test f16 copies where the sign comes from an f64. +; Test copysign with an f16 result and f64 sign argument. define half @f2(half %a, double %b) { ; CHECK-LABEL: f2: -; CHECK: brasl %r14, __extendhfdf2@PLT -; CHECK: cpsdr %f0, %f8, %f0 -; CHECK: brasl %r14, __truncdfhf2@PLT -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: cpsdr %f0, %f2, %f0 +; CHECK-NEXT: br %r14 %bh = fptrunc double %b to half %res = call half @llvm.copysign.f16(half %a, half %bh) ret half %res } -; Test f32 copies in which the sign comes from an f16. -define float @f3(float %a, half %b) { -; CHECK-LABEL: f3: -; CHECK: brasl %r14, __extendhfsf2@PLT -; CHECK: cpsdr %f0, %f0, %f8 -; CHECK: br %r14 +; Test copysign with an f16 result and f128 sign argument. +define half @f3(half %a, fp128 %b) { +; Z10-LABEL: f3: +; Z10: # %bb.0: +; Z10-NEXT: ld %f1, 0(%r2) +; Z10-NEXT: ld %f3, 8(%r2) +; Z10-NEXT: cpsdr %f0, %f1, %f0 +; Z10-NEXT: br %r14 +; +; Z16-LABEL: f3: +; Z16: # %bb.0: +; Z16-NEXT: vl %v1, 0(%r2), 3 +; Z16-NEXT: cpsdr %f0, %f1, %f0 +; Z16-NEXT: br %r14 + %bh = fptrunc fp128 %b to half + %res = call half @llvm.copysign.f16(half %a, half %bh) + ret half %res +} + +; Test copysign with an f32 result and half sign argument. +define float @f4(float %a, half %b) { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: +; CHECK-NEXT: cpsdr %f0, %f2, %f0 +; CHECK-NEXT: br %r14 %bf = fpext half %b to float %res = call float @llvm.copysign.f32(float %a, float %bf) ret float %res } -; Test f64 copies in which the sign comes from an f16. -define double @f4(double %a, half %b) { -; CHECK-LABEL: f4: -; CHECK: brasl %r14, __extendhfdf2@PLT -; CHECK: cpsdr %f0, %f0, %f8 -; CHECK: br %r14 +; Test copysign with an f64 result and half sign argument. +define double @f5(double %a, half %b) { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: +; CHECK-NEXT: cpsdr %f0, %f2, %f0 +; CHECK-NEXT: br %r14 %bd = fpext half %b to double %res = call double @llvm.copysign.f64(double %a, double %bd) ret double %res } + +; Test copysign with an f128 result and half sign argument. +define fp128 @f6(fp128 %a, half %b) { +; Z10-LABEL: f6: +; Z10: # %bb.0: +; Z10-NEXT: ld %f1, 0(%r3) +; Z10-NEXT: ld %f3, 8(%r3) +; Z10-NEXT: cpsdr %f1, %f0, %f1 +; Z10-NEXT: std %f1, 0(%r2) +; Z10-NEXT: std %f3, 8(%r2) +; Z10-NEXT: br %r14 +; +; Z16-LABEL: f6: +; Z16: # %bb.0: +; Z16-NEXT: aghi %r15, -168 +; Z16-NEXT: .cfi_def_cfa_offset 328 +; Z16-NEXT: vl %v1, 0(%r3), 3 +; Z16-NEXT: vsteh %v0, 164(%r15), 0 +; Z16-NEXT: tm 164(%r15), 128 +; Z16-NEXT: je .LBB6_2 +; Z16-NEXT: # %bb.1: +; Z16-NEXT: wflnxb %v0, %v1 +; Z16-NEXT: j .LBB6_3 +; Z16-NEXT: .LBB6_2: +; Z16-NEXT: wflpxb %v0, %v1 +; Z16-NEXT: .LBB6_3: +; Z16-NEXT: vst %v0, 0(%r2), 3 +; Z16-NEXT: aghi %r15, 168 +; Z16-NEXT: br %r14 + %bd = fpext half %b to fp128 + %res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %bd) + ret fp128 %res +} From ae1bea6639d65fd469fac1e7eb9edeeab11bebe3 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Mon, 7 Apr 2025 10:21:49 -0600 Subject: [PATCH 09/12] Wait with FCOPYSIGN optimization. --- .../Target/SystemZ/SystemZISelLowering.cpp | 19 +---------- llvm/lib/Target/SystemZ/SystemZISelLowering.h | 1 - llvm/test/CodeGen/SystemZ/fp-copysign-03.ll | 34 +++++++++++++++++-- 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 2a59f76ac2353..fdbfc196e8fbc 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -553,7 +553,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, LibCall); setOperationAction(ISD::BITCAST, MVT::i16, Custom); setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Legal); } for (unsigned I = MVT::FIRST_FP_VALUETYPE; @@ -6989,21 +6989,6 @@ SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, return getCCResult(DAG, Intr); } -SDValue SystemZTargetLowering::lowerFCOPYSIGN(SDValue Op, - SelectionDAG &DAG) const { - MVT VT = Op.getSimpleValueType(); - SDValue ValOp = Op.getOperand(0); - SDValue SignOp = Op.getOperand(1); - - // Remove the rounding which would result in a libcall for half. - if (VT == MVT::f16 && SignOp.getOpcode() == ISD::FP_ROUND) { - SDValue WideOp = SignOp.getOperand(0); - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(Op), VT, ValOp, WideOp); - } - - return Op; // Legal -} - SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -7175,8 +7160,6 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerStoreF16(Op, DAG); case ISD::IS_FPCLASS: return lowerIS_FPCLASS(Op, DAG); - case ISD::FCOPYSIGN: - return lowerFCOPYSIGN(Op, DAG); case ISD::GET_ROUNDING: return lowerGET_ROUNDING(Op, DAG); case ISD::READCYCLECOUNTER: diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index e6a2a74221b97..f438332c2dc4f 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -755,7 +755,6 @@ class SystemZTargetLowering : public TargetLowering { SDValue lowerStoreF16(SDValue Op, SelectionDAG &DAG) const; SDValue lowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll index 9625c8a7353b2..909519e8ace55 100644 --- a/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll @@ -43,18 +43,46 @@ define half @f2(half %a, double %b) { } ; Test copysign with an f16 result and f128 sign argument. +; TODO: Let the DAGCombiner remove the fp_round. define half @f3(half %a, fp128 %b) { ; Z10-LABEL: f3: ; Z10: # %bb.0: +; Z10-NEXT: stmg %r14, %r15, 112(%r15) +; Z10-NEXT: .cfi_offset %r14, -48 +; Z10-NEXT: .cfi_offset %r15, -40 +; Z10-NEXT: aghi %r15, -184 +; Z10-NEXT: .cfi_def_cfa_offset 344 +; Z10-NEXT: std %f8, 176(%r15) # 8-byte Spill +; Z10-NEXT: .cfi_offset %f8, -168 ; Z10-NEXT: ld %f1, 0(%r2) ; Z10-NEXT: ld %f3, 8(%r2) -; Z10-NEXT: cpsdr %f0, %f1, %f0 +; Z10-NEXT: ler %f8, %f0 +; Z10-NEXT: la %r2, 160(%r15) +; Z10-NEXT: std %f1, 160(%r15) +; Z10-NEXT: std %f3, 168(%r15) +; Z10-NEXT: brasl %r14, __trunctfhf2@PLT +; Z10-NEXT: cpsdr %f0, %f0, %f8 +; Z10-NEXT: ld %f8, 176(%r15) # 8-byte Reload +; Z10-NEXT: lmg %r14, %r15, 296(%r15) ; Z10-NEXT: br %r14 ; ; Z16-LABEL: f3: ; Z16: # %bb.0: -; Z16-NEXT: vl %v1, 0(%r2), 3 -; Z16-NEXT: cpsdr %f0, %f1, %f0 +; Z16-NEXT: stmg %r14, %r15, 112(%r15) +; Z16-NEXT: .cfi_offset %r14, -48 +; Z16-NEXT: .cfi_offset %r15, -40 +; Z16-NEXT: aghi %r15, -184 +; Z16-NEXT: .cfi_def_cfa_offset 344 +; Z16-NEXT: std %f8, 176(%r15) # 8-byte Spill +; Z16-NEXT: .cfi_offset %f8, -168 +; Z16-NEXT: ldr %f8, %f0 +; Z16-NEXT: vl %v0, 0(%r2), 3 +; Z16-NEXT: la %r2, 160(%r15) +; Z16-NEXT: vst %v0, 160(%r15), 3 +; Z16-NEXT: brasl %r14, __trunctfhf2@PLT +; Z16-NEXT: cpsdr %f0, %f0, %f8 +; Z16-NEXT: ld %f8, 176(%r15) # 8-byte Reload +; Z16-NEXT: lmg %r14, %r15, 296(%r15) ; Z16-NEXT: br %r14 %bh = fptrunc fp128 %b to half %res = call half @llvm.copysign.f16(half %a, half %bh) From fd0f04c11da367b0f4e90b5b34ac766b35c4a164 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 16 Apr 2025 12:04:46 +0200 Subject: [PATCH 10/12] Fix typo --- llvm/test/CodeGen/SystemZ/fp-conv-09.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-09.ll b/llvm/test/CodeGen/SystemZ/fp-conv-09.ll index 423bbf285e9e1..c29a202807c69 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-09.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-09.ll @@ -2,7 +2,7 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; Test f316->i32. +; Test f16->i32. define i32 @f0(half %f) { ; CHECK-LABEL: f0: ; CHECK: brasl %r14, __extendhfsf2@PLT From 6bad83ab90fdd6e4b3250b4dc6d1d5232c58e8f3 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 16 Apr 2025 12:46:18 +0200 Subject: [PATCH 11/12] Update SystemZScheduleZ17.td --- llvm/lib/Target/SystemZ/SystemZScheduleZ17.td | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ17.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ17.td index bd52627f636a7..3b5ce6c9b5a0e 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ17.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ17.td @@ -827,12 +827,12 @@ def : InstRW<[], (instregex "Insn.*")>; //===----------------------------------------------------------------------===// // Load zero -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER|ER_16)$")>; def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; // Load -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER(16)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R16|R32|GR)$")>; def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; @@ -841,13 +841,13 @@ def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>; // Copy sign -def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s|h)(d|s|h)$")>; //===----------------------------------------------------------------------===// // FP: Load instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "L(E16|E)(Y)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; @@ -855,7 +855,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; // FP: Store instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E16|E|D)(Y)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; //===----------------------------------------------------------------------===// @@ -894,7 +894,7 @@ def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; // Load Complement / Negative / Positive def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; -def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32|_16)?$")>; def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; // Square root @@ -1252,7 +1252,7 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; -def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(16|32|64)$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], @@ -1267,7 +1267,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; // Vector: Stores //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|16|32|64)?$")>; def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; @@ -1520,8 +1520,8 @@ def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; // Vector: Floating-point insertion and extraction //===----------------------------------------------------------------------===// -def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; -def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR(_16)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER(_16)?$")>; //===----------------------------------------------------------------------===// // Vector: String instructions From 0c2b0c832be3f6c1186e0b6c2d8a2813e2f5b898 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 16 Apr 2025 18:41:03 +0200 Subject: [PATCH 12/12] Minor updates of two CodeGen/SystemZ tests --- llvm/test/CodeGen/SystemZ/fp-half-vector.ll | 8 ++++---- llvm/test/CodeGen/SystemZ/fp-half.ll | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/test/CodeGen/SystemZ/fp-half-vector.ll b/llvm/test/CodeGen/SystemZ/fp-half-vector.ll index ba18ac90e6228..4997c5b0c617d 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-vector.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-vector.ll @@ -361,18 +361,18 @@ define <2 x half> @fun2(<2 x half> %Op) { ; VECTOR-NEXT: ldr %f0, %f2 ; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT ; VECTOR-NEXT: # kill: def $f0d killed $f0d def $v0 -; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: ldr %f0, %f8 ; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT -; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: # kill: def $f0d killed $f0d def $v0 ; VECTOR-NEXT: vmrhg %v0, %v0, %v1 ; VECTOR-NEXT: vfadb %v0, %v0, %v0 -; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0 ; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 -; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload +; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: vrepg %v0, %v0, 1 ; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0 ; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT diff --git a/llvm/test/CodeGen/SystemZ/fp-half.ll b/llvm/test/CodeGen/SystemZ/fp-half.ll index 83b66052aa123..f479e405b04e9 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half.ll @@ -161,13 +161,12 @@ define half @fun2(half %Op0, half %Op1) { ; VECTOR-NEXT: la %r2, 176(%r15) ; VECTOR-NEXT: ldr %f8, %f2 ; VECTOR-NEXT: brasl %r14, __extendhftf2@PLT -; VECTOR-NEXT: vl %v0, 176(%r15), 3 -; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; VECTOR-NEXT: mvc 160(16,%r15), 176(%r15) ; VECTOR-NEXT: la %r2, 192(%r15) ; VECTOR-NEXT: ldr %f0, %f8 ; VECTOR-NEXT: brasl %r14, __extendhftf2@PLT ; VECTOR-NEXT: vl %v0, 192(%r15), 3 -; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: wfaxb %v0, %v1, %v0 ; VECTOR-NEXT: la %r2, 208(%r15) ; VECTOR-NEXT: vst %v0, 208(%r15), 3