Fix for a bunch of fixed point integer SPIR-V instructions (#1213)

pauzinl · web-flow · commit a75a62e617c0 · 2021-09-20T17:13:13.000+03:00
If the return type of an instruction is wider than 64-bit, then this
instruction will return via 'sret' argument added into the arguments
list. Here we reverse this, removing 'sret' argument and restoring
the original return type.

TODO: currently 'sret' parameter attribute is being lost during reverse
translation for the mentioned case, need to fix it.
diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp
@@ -4127,19 +4127,27 @@ LLVMToSPIRVBase::transBuiltinToInstWithoutDecoration(Op OC, CallInst *CI,
     // Literal S Literal I Literal rI Literal Q Literal O
 
     Type *ResTy = CI->getType();
+
+    auto OpItr = CI->value_op_begin();
+    auto OpEnd = OpItr + CI->getNumArgOperands();
+
+    // If the return type of an instruction is wider than 64-bit, then this
+    // instruction will return via 'sret' argument added into the arguments
+    // list. Here we reverse this, removing 'sret' argument and restoring
+    // the original return type.
+    if (CI->hasStructRetAttr()) {
+      assert(ResTy->isVoidTy() && "Return type is not void");
+      ResTy = cast<PointerType>(OpItr->getType())->getElementType();
+      OpItr++;
+    }
+
     SPIRVValue *Input =
-        transValue(CI->getOperand(0) /* A - integer input of any width */, BB);
-
-    std::vector<Value *> Operands = {
-        CI->getOperand(1) /* S - bool value, indicator of signedness */,
-        CI->getOperand(2) /* I - location of the fixed-point of the input */,
-        CI->getOperand(3) /* rI - location of the fixed-point of the result*/,
-        CI->getOperand(4) /* Quantization mode */,
-        CI->getOperand(5) /* Overflow mode */};
+        transValue(*OpItr++ /* A - integer input of any width */, BB);
+
     std::vector<SPIRVWord> Literals;
-    for (auto *O : Operands) {
-      Literals.push_back(cast<llvm::ConstantInt>(O)->getZExtValue());
-    }
+    std::transform(OpItr, OpEnd, std::back_inserter(Literals), [](auto *O) {
+      return cast<llvm::ConstantInt>(O)->getZExtValue();
+    });
 
     return BM->addFixedPointIntelInst(OC, transType(ResTy), Input, Literals,
                                       BB);
@@ -4207,6 +4215,16 @@ LLVMToSPIRVBase::transBuiltinToInstWithoutDecoration(Op OC, CallInst *CI,
     auto OpItr = CI->value_op_begin();
     auto OpEnd = OpItr + CI->getNumArgOperands();
 
+    // If the return type of an instruction is wider than 64-bit, then this
+    // instruction will return via 'sret' argument added into the arguments
+    // list. Here we reverse this, removing 'sret' argument and restoring
+    // the original return type.
+    if (CI->hasStructRetAttr()) {
+      assert(ResTy->isVoidTy() && "Return type is not void");
+      ResTy = cast<PointerType>(OpItr->getType())->getElementType();
+      OpItr++;
+    }
+
     SPIRVValue *InA = transValue(*OpItr++ /* A - input */, BB);
 
     std::vector<SPIRVWord> Literals;
@@ -4265,6 +4283,16 @@ LLVMToSPIRVBase::transBuiltinToInstWithoutDecoration(Op OC, CallInst *CI,
     auto OpItr = CI->value_op_begin();
     auto OpEnd = OpItr + CI->getNumArgOperands();
 
+    // If the return type of an instruction is wider than 64-bit, then this
+    // instruction will return via 'sret' argument added into the arguments
+    // list. Here we reverse this, removing 'sret' argument and restoring
+    // the original return type.
+    if (CI->hasStructRetAttr()) {
+      assert(ResTy->isVoidTy() && "Return type is not void");
+      ResTy = cast<PointerType>(OpItr->getType())->getElementType();
+      OpItr++;
+    }
+
     SPIRVValue *InA = transValue(*OpItr++ /* A - input */, BB);
 
     std::vector<SPIRVWord> Literals;
diff --git a/test/transcoding/capability-arbitrary-precision-fixed-point-numbers.ll b/test/transcoding/capability-arbitrary-precision-fixed-point-numbers.ll
@@ -124,6 +124,7 @@
 ; CHECK-SPIRV: 4 TypeInt [[Ty_64:[0-9]+]] 64 0
 ; CHECK-SPIRV: 4 TypeInt [[Ty_44:[0-9]+]] 44 0
 ; CHECK-SPIRV: 4 TypeInt [[Ty_34:[0-9]+]] 34 0
+; CHECK-SPIRV: 4 TypeInt [[Ty_66:[0-9]+]] 66 0
 
 ; CHECK-SPIRV: 6 Load [[Ty_13]] [[Sqrt_InId:[0-9]+]]
 ; CHECK-SPIRV-NEXT: 9 FixedSqrtINTEL [[Ty_5]] [[#]] [[Sqrt_InId]] 0 2 2 0 0
@@ -162,6 +163,9 @@
 ; CHECK-SPIRV: 6 Load [[Ty_44]] [[Exp_InId:[0-9]+]]
 ; CHECK-SPIRV-NEXT: 9 FixedExpINTEL [[Ty_34]] [[#]] [[Exp_InId]] 0 20 20 0 0
 
+; CHECK-SPIRV: 6 Load [[Ty_34]] [[SinCos_InId:[0-9]+]]
+; CHECK-SPIRV-NEXT: 9 FixedSinCosINTEL [[Ty_66]] [[#]] [[SinCos_InId]] 1 3 2 0 0
+
 ; CHECK-LLVM: call i5 @intel_arbitrary_fixed_sqrt.i5.i13(i13 %[[#]], i1 false, i32 2, i32 2, i32 0, i32 0)
 ; CHECK-LLVM: call i13 @intel_arbitrary_fixed_sqrt.i13.i5(i5 %[[#]], i1 false, i32 2, i32 2, i32 0, i32 0)
 ; CHECK-LLVM: call i13 @intel_arbitrary_fixed_sqrt.i13.i5(i5 %[[#]], i1 false, i32 2, i32 2, i32 0, i32 0)
@@ -175,6 +179,7 @@
 ; CHECK-LLVM: call i10 @intel_arbitrary_fixed_sincospi.i10.i13(i13 %[[#]], i1 false, i32 2, i32 2, i32 0, i32 0)
 ; CHECK-LLVM: call i44 @intel_arbitrary_fixed_log.i44.i64(i64 %[[#]], i1 true, i32 24, i32 22, i32 0, i32 0)
 ; CHECK-LLVM: call i34 @intel_arbitrary_fixed_exp.i34.i44(i44 %[[#]], i1 false, i32 20, i32 20, i32 0, i32 0)
+; CHECK-LLVM: call i66 @intel_arbitrary_fixed_sincos.i66.i34(i34 %[[#]], i1 true, i32 3, i32 2, i32 0, i32 0)
 
 ; ModuleID = 'ap_fixed.cpp'
 source_filename = "ap_fixed.cpp"
@@ -205,6 +210,8 @@ $_Z3logILi64ELi44ELb1ELi24ELi22EEvv = comdat any
 
 $_Z3expILi44ELi34ELb0ELi20ELi20EEvv = comdat any
 
+$_Z7sin_cosILi31ELi20ELb1ELi10ELi12EEvv_ = comdat any
+
 ; Function Attrs: norecurse
 define dso_local spir_kernel void @_ZTSZ4mainE15kernel_function() #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !4 {
 entry:
@@ -237,6 +244,7 @@ entry:
   call spir_func void @_Z10sin_cos_piILi13ELi5ELb0ELi2ELi2EEvv()
   call spir_func void @_Z3logILi64ELi44ELb1ELi24ELi22EEvv()
   call spir_func void @_Z3expILi44ELi34ELb0ELi20ELi20EEvv()
+  call spir_func void @_Z7sin_cosILi31ELi20ELb1ELi10ELi12EEvv_()
   ret void
 }
 
@@ -476,6 +484,28 @@ entry:
   ret void
 }
 
+; Function Attrs: norecurse nounwind
+define linkonce_odr dso_local spir_func void @_Z7sin_cosILi31ELi20ELb1ELi10ELi12EEvv_() #3 comdat {
+entry:
+  %0 = alloca i34, align 8
+  %1 = addrspacecast i34* %0 to i34 addrspace(4)*
+  %2 = alloca i66, align 8
+  %3 = addrspacecast i66* %2 to i66 addrspace(4)*
+  %4 = bitcast i34* %0 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %4)
+  %5 = bitcast i66* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %5)
+  %6 = load i34, i34 addrspace(4)* %1, align 8
+  call spir_func void @_Z24__spirv_FixedSinCosINTELILi34ELi66EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(i66 addrspace(4)* sret(i66) align 8 %3, i34 %6, i1 zeroext true, i32 3, i32 2, i32 0, i32 0) #5
+  %7 = load i66, i66 addrspace(4)* %3, align 8
+  store i66 %7, i66 addrspace(4)* %3, align 8
+  %8 = bitcast i66* %2 to i8*
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %8)
+  %9 = bitcast i34* %0 to i8*
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %9)
+  ret void
+}
+
 ; Function Attrs: nounwind
 declare dso_local spir_func signext i5 @_Z22__spirv_FixedSqrtINTELILi13ELi5EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i13 signext, i1 zeroext, i32, i32, i32, i32) #4
 
@@ -512,6 +542,9 @@ declare dso_local spir_func i44 @_Z21__spirv_FixedLogINTELILi64ELi44EEU7_ExtIntI
 ; Function Attrs: nounwind
 declare dso_local spir_func i34 @_Z21__spirv_FixedExpINTELILi44ELi34EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i44, i1 zeroext, i32, i32, i32, i32) #4
 
+; Function Attrs: nounwind
+declare dso_local spir_func void @_Z24__spirv_FixedSinCosINTELILi34ELi66EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(i66 addrspace(4)* sret(i66) align 8, i34, i1 zeroext, i32, i32, i32, i32) #4
+
 attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="ap_fixed.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind willreturn }
 attributes #2 = { inlinehint norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/transcoding/capability-arbitrary-precision-floating-point.ll b/test/transcoding/capability-arbitrary-precision-floating-point.ll
@@ -466,6 +466,7 @@
 ; CHECK-SPIRV: 4 TypeInt [[Ty_54:[0-9]+]] 54 0
 ; CHECK-SPIRV: 4 TypeInt [[Ty_56:[0-9]+]] 56 0
 ; CHECK-SPIRV: 4 TypeInt [[Ty_12:[0-9]+]] 12 0
+; CHECK-SPIRV: 4 TypeInt [[Ty_66:[0-9]+]] 66 0
 ; CHECK-SPIRV: 2 TypeBool [[Ty_Bool:[0-9]+]]
 
 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
@@ -534,6 +535,8 @@ define internal spir_func void @"_ZZ4mainENK3$_0clEv"(%"class._ZTSZ4mainE3$_0.an
   call spir_func void @_Z12ap_float_powILi8ELi8ELi9ELi9ELi10ELi10EEvv()
   call spir_func void @_Z13ap_float_powrILi18ELi35ELi19ELi35ELi20ELi35EEvv()
   call spir_func void @_Z13ap_float_pownILi4ELi7ELi10ELi5ELi9EEvv()
+  call spir_func void @_Z15ap_float_sincosILi8ELi18ELi10ELi20EEvv_()
+  call spir_func void @_Z14ap_float_atan2ILi7ELi16ELi7ELi17ELi8ELi18EEvv_()
   ret void
 }
 
@@ -1562,6 +1565,60 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_pownILi4ELi7ELi10ELi5
   ret void
 }
 
+; Function Attrs: norecurse nounwind
+define linkonce_odr dso_local spir_func void @_Z15ap_float_sincosILi8ELi18ELi10ELi20EEvv_() #3 {
+  %1 = alloca i34, align 8
+  %2 = addrspacecast i34* %1 to i34 addrspace(4)*
+  %3 = alloca i66, align 8
+  %4 = addrspacecast i66* %3 to i66 addrspace(4)*
+  %5 = bitcast i34* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %5)
+  %6 = bitcast i66* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %6)
+  %7 = load i34, i34 addrspace(4)* %2, align 8
+  call spir_func void @_Z33__spirv_ArbitraryFloatSinCosINTELILi34ELi66EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEiiiiii(i66 addrspace(4)* sret(i66) align 8 %4, i34 %7, i32 18, i32 20, i32 0, i32 2, i32 1) #5
+; CHECK-SPIRV: 6 Load [[Ty_34]] [[SinCos_AId:[0-9]+]]
+; CHECK-SPIRV-NEXT: 9 ArbitraryFloatSinCosINTEL [[Ty_66]] [[#]] [[SinCos_AId]] 18 20 0 2 1
+; CHECK-LLVM: call i66 @intel_arbitrary_float_sincos.i66.i34(i34 %[[#]], i32 18, i32 20, i32 0, i32 2, i32 1)
+  %8 = load i66, i66 addrspace(4)* %4, align 8
+  store i66 %8, i66 addrspace(4)* %4, align 8
+  %9 = bitcast i34* %1 to i8*
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %9)
+  %10 = bitcast i66* %3 to i8*
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %10)
+  ret void
+}
+
+; Function Attrs: norecurse nounwind
+define linkonce_odr dso_local spir_func void @_Z14ap_float_atan2ILi7ELi16ELi7ELi17ELi8ELi18EEvv_() #3 {
+  %1 = alloca i24, align 4
+  %2 = alloca i25, align 4
+  %3 = alloca i66, align 8
+  %4 = addrspacecast i66* %3 to i66 addrspace(4)*
+  %5 = bitcast i24* %1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %5) #5
+  %6 = bitcast i25* %2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %6) #5
+  %7 = bitcast i66* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %7) #5
+  %8 = load i24, i24* %1, align 4, !tbaa !91
+  %9 = load i25, i25* %2, align 4, !tbaa !13
+  call spir_func void @_Z32__spirv_ArbitraryFloatATan2INTELILi24ELi25ELi66EEU7_ExtIntIXT1_EEiU7_ExtIntIXT_EEiiU7_ExtIntIXT0_EEiiiiii(i66 addrspace(4)* sret(i66) align 8 %4, i24 signext %8, i32 16, i25 signext %9, i32 17, i32 18, i32 0, i32 2, i32 1) #5
+; CHECK-SPIRV: 6 Load [[Ty_24]] [[ATan2_AId:[0-9]+]]
+; CHECK-SPIRV-NEXT: 6 Load [[Ty_25]] [[ATan2_BId:[0-9]+]]
+; CHECK-SPIRV-NEXT: 11 ArbitraryFloatATan2INTEL [[Ty_66]] [[#]] [[ATan2_AId]] 16 [[ATan2_BId]] 17 18 0 2 1
+; CHECK-LLVM: call i66 @intel_arbitrary_float_atan2.i66.i24.i25(i24 %[[#]], i32 16, i25 %[[#]], i32 17, i32 18, i32 0, i32 2, i32 1)
+  %10 = load i66, i66 addrspace(4)* %4, align 8
+  store i66 %10, i66 addrspace(4)* %4, align 8
+  %11 = bitcast i66* %3 to i8*
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %11) #5
+  %12 = bitcast i25* %2 to i8*
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %12) #5
+  %13 = bitcast i24* %1 to i8*
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %13) #5
+  ret void
+}
+
 ; Function Attrs: nounwind
 declare dso_local spir_func i40 @_Z31__spirv_ArbitraryFloatCastINTELILi40ELi40EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i40, i32, i32, i32, i32, i32) #4
 
@@ -1688,6 +1745,12 @@ declare dso_local spir_func i56 @_Z31__spirv_ArbitraryFloatPowRINTELILi54ELi55EL
 ; Function Attrs: nounwind
 declare dso_local spir_func signext i15 @_Z31__spirv_ArbitraryFloatPowNINTELILi12ELi10ELi15EEU7_ExtIntIXT1_EEiU7_ExtIntIXT_EEiiU7_ExtIntIXT0_EEiiiii(i12 signext, i32, i10 signext, i32, i32, i32, i32) #4
 
+; Function Attrs: nounwind
+declare dso_local spir_func void @_Z33__spirv_ArbitraryFloatSinCosINTELILi34ELi66EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEiiiiii(i66 addrspace(4)* sret(i66) align 8, i34, i32, i32, i32, i32, i32) #4
+
+; Function Attrs: nounwind
+declare dso_local spir_func void @_Z32__spirv_ArbitraryFloatATan2INTELILi24ELi25ELi66EEU7_ExtIntIXT1_EEiU7_ExtIntIXT_EEiiU7_ExtIntIXT0_EEiiiiii(i66 addrspace(4)* sret(i66) align 8, i24 signext, i32, i25 signext, i32, i32, i32, i32, i32) #4
+
 attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind willreturn }
 attributes #2 = { inlinehint norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }