JuliaLLVM · maleadt · Feb 1, 2024 · Jan 31, 2024 · Jan 31, 2024
diff --git a/deps/LLVMExtra/include/LLVMExtra.h b/deps/LLVMExtra/include/LLVMExtra.h
@@ -191,5 +191,28 @@ LLVMBool LLVMPostDominatorTreeInstructionDominates(LLVMPostDominatorTreeRef Tree
                                                    LLVMValueRef InstA, LLVMValueRef InstB);
 
 
+// fastmath (backport of llvm/llvm-project#75123)
+#if LLVM_VERSION_MAJOR < 18
+enum {
+  LLVMFastMathAllowReassoc = (1 << 0),
+  LLVMFastMathNoNaNs = (1 << 1),
+  LLVMFastMathNoInfs = (1 << 2),
+  LLVMFastMathNoSignedZeros = (1 << 3),
+  LLVMFastMathAllowReciprocal = (1 << 4),
+  LLVMFastMathAllowContract = (1 << 5),
+  LLVMFastMathApproxFunc = (1 << 6),
+  LLVMFastMathNone = 0,
+  LLVMFastMathAll = LLVMFastMathAllowReassoc | LLVMFastMathNoNaNs | LLVMFastMathNoInfs |
+                    LLVMFastMathNoSignedZeros | LLVMFastMathAllowReciprocal |
+                    LLVMFastMathAllowContract | LLVMFastMathApproxFunc,
+};
+typedef unsigned LLVMFastMathFlags;
+
+LLVMFastMathFlags LLVMGetFastMathFlags(LLVMValueRef FPMathInst);
+void LLVMSetFastMathFlags(LLVMValueRef FPMathInst, LLVMFastMathFlags FMF);
+LLVMBool LLVMCanValueUseFastMathFlags(LLVMValueRef Inst);
+#endif
+
+
 LLVM_C_EXTERN_C_END
 #endif
diff --git a/deps/LLVMExtra/lib/Core.cpp b/deps/LLVMExtra/lib/Core.cpp
@@ -578,3 +578,59 @@ LLVMBool LLVMPostDominatorTreeInstructionDominates(LLVMPostDominatorTreeRef Tree
                                                    LLVMValueRef InstA, LLVMValueRef InstB) {
   return unwrap(Tree)->dominates(unwrap<Instruction>(InstA), unwrap<Instruction>(InstB));
 }
+
+
+// fastmath (backport of llvm/llvm-project#75123)
+
+#if LLVM_VERSION_MAJOR < 18
+
+static FastMathFlags mapFromLLVMFastMathFlags(LLVMFastMathFlags FMF) {
+  FastMathFlags NewFMF;
+  NewFMF.setAllowReassoc((FMF & LLVMFastMathAllowReassoc) != 0);
+  NewFMF.setNoNaNs((FMF & LLVMFastMathNoNaNs) != 0);
+  NewFMF.setNoInfs((FMF & LLVMFastMathNoInfs) != 0);
+  NewFMF.setNoSignedZeros((FMF & LLVMFastMathNoSignedZeros) != 0);
+  NewFMF.setAllowReciprocal((FMF & LLVMFastMathAllowReciprocal) != 0);
+  NewFMF.setAllowContract((FMF & LLVMFastMathAllowContract) != 0);
+  NewFMF.setApproxFunc((FMF & LLVMFastMathApproxFunc) != 0);
+
+  return NewFMF;
+}
+
+static LLVMFastMathFlags mapToLLVMFastMathFlags(FastMathFlags FMF) {
+  LLVMFastMathFlags NewFMF = LLVMFastMathNone;
+  if (FMF.allowReassoc())
+    NewFMF |= LLVMFastMathAllowReassoc;
+  if (FMF.noNaNs())
+    NewFMF |= LLVMFastMathNoNaNs;
+  if (FMF.noInfs())
+    NewFMF |= LLVMFastMathNoInfs;
+  if (FMF.noSignedZeros())
+    NewFMF |= LLVMFastMathNoSignedZeros;
+  if (FMF.allowReciprocal())
+    NewFMF |= LLVMFastMathAllowReciprocal;
+  if (FMF.allowContract())
+    NewFMF |= LLVMFastMathAllowContract;
+  if (FMF.approxFunc())
+    NewFMF |= LLVMFastMathApproxFunc;
+
+  return NewFMF;
+}
+
+LLVMFastMathFlags LLVMGetFastMathFlags(LLVMValueRef FPMathInst) {
+  Value *P = unwrap<Value>(FPMathInst);
+  FastMathFlags FMF = cast<Instruction>(P)->getFastMathFlags();
+  return mapToLLVMFastMathFlags(FMF);
+}
+
+void LLVMSetFastMathFlags(LLVMValueRef FPMathInst, LLVMFastMathFlags FMF) {
+  Value *P = unwrap<Value>(FPMathInst);
+  cast<Instruction>(P)->setFastMathFlags(mapFromLLVMFastMathFlags(FMF));
+}
+
+LLVMBool LLVMCanValueUseFastMathFlags(LLVMValueRef V) {
+  Value *Val = unwrap<Value>(V);
+  return isa<FPMathOperator>(Val);
+}
+
+#endif
diff --git a/lib/13/libLLVM_extra.jl b/lib/13/libLLVM_extra.jl
@@ -337,3 +337,29 @@ function LLVMPostDominatorTreeInstructionDominates(Tree, InstA, InstB)
     ccall((:LLVMPostDominatorTreeInstructionDominates, libLLVMExtra), LLVMBool, (LLVMPostDominatorTreeRef, LLVMValueRef, LLVMValueRef), Tree, InstA, InstB)
 end
 
+@cenum __JL_Ctag_52::UInt32 begin
+    LLVMFastMathAllowReassoc = 1
+    LLVMFastMathNoNaNs = 2
+    LLVMFastMathNoInfs = 4
+    LLVMFastMathNoSignedZeros = 8
+    LLVMFastMathAllowReciprocal = 16
+    LLVMFastMathAllowContract = 32
+    LLVMFastMathApproxFunc = 64
+    LLVMFastMathNone = 0
+    LLVMFastMathAll = 127
+end
+
+const LLVMFastMathFlags = Cuint
+
+function LLVMGetFastMathFlags(FPMathInst)
+    ccall((:LLVMGetFastMathFlags, libLLVMExtra), LLVMFastMathFlags, (LLVMValueRef,), FPMathInst)
+end
+
+function LLVMSetFastMathFlags(FPMathInst, FMF)
+    ccall((:LLVMSetFastMathFlags, libLLVMExtra), Cvoid, (LLVMValueRef, LLVMFastMathFlags), FPMathInst, FMF)
+end
+
+function LLVMCanValueUseFastMathFlags(Inst)
+    ccall((:LLVMCanValueUseFastMathFlags, libLLVMExtra), LLVMBool, (LLVMValueRef,), Inst)
+end
+
diff --git a/lib/14/libLLVM_extra.jl b/lib/14/libLLVM_extra.jl
@@ -337,3 +337,29 @@ function LLVMPostDominatorTreeInstructionDominates(Tree, InstA, InstB)
     ccall((:LLVMPostDominatorTreeInstructionDominates, libLLVMExtra), LLVMBool, (LLVMPostDominatorTreeRef, LLVMValueRef, LLVMValueRef), Tree, InstA, InstB)
 end
 
+@cenum __JL_Ctag_52::UInt32 begin
+    LLVMFastMathAllowReassoc = 1
+    LLVMFastMathNoNaNs = 2
+    LLVMFastMathNoInfs = 4
+    LLVMFastMathNoSignedZeros = 8
+    LLVMFastMathAllowReciprocal = 16
+    LLVMFastMathAllowContract = 32
+    LLVMFastMathApproxFunc = 64
+    LLVMFastMathNone = 0
+    LLVMFastMathAll = 127
+end
+
+const LLVMFastMathFlags = Cuint
+
+function LLVMGetFastMathFlags(FPMathInst)
+    ccall((:LLVMGetFastMathFlags, libLLVMExtra), LLVMFastMathFlags, (LLVMValueRef,), FPMathInst)
+end
+
+function LLVMSetFastMathFlags(FPMathInst, FMF)
+    ccall((:LLVMSetFastMathFlags, libLLVMExtra), Cvoid, (LLVMValueRef, LLVMFastMathFlags), FPMathInst, FMF)
+end
+
+function LLVMCanValueUseFastMathFlags(Inst)
+    ccall((:LLVMCanValueUseFastMathFlags, libLLVMExtra), LLVMBool, (LLVMValueRef,), Inst)
+end
+
diff --git a/lib/15/libLLVM_extra.jl b/lib/15/libLLVM_extra.jl
@@ -329,6 +329,32 @@ function LLVMPostDominatorTreeInstructionDominates(Tree, InstA, InstB)
     ccall((:LLVMPostDominatorTreeInstructionDominates, libLLVMExtra), LLVMBool, (LLVMPostDominatorTreeRef, LLVMValueRef, LLVMValueRef), Tree, InstA, InstB)
 end
 
+@cenum __JL_Ctag_53::UInt32 begin
+    LLVMFastMathAllowReassoc = 1
+    LLVMFastMathNoNaNs = 2
+    LLVMFastMathNoInfs = 4
+    LLVMFastMathNoSignedZeros = 8
+    LLVMFastMathAllowReciprocal = 16
+    LLVMFastMathAllowContract = 32
+    LLVMFastMathApproxFunc = 64
+    LLVMFastMathNone = 0
+    LLVMFastMathAll = 127
+end
+
+const LLVMFastMathFlags = Cuint
+
+function LLVMGetFastMathFlags(FPMathInst)
+    ccall((:LLVMGetFastMathFlags, libLLVMExtra), LLVMFastMathFlags, (LLVMValueRef,), FPMathInst)
+end
+
+function LLVMSetFastMathFlags(FPMathInst, FMF)
+    ccall((:LLVMSetFastMathFlags, libLLVMExtra), Cvoid, (LLVMValueRef, LLVMFastMathFlags), FPMathInst, FMF)
+end
+
+function LLVMCanValueUseFastMathFlags(Inst)
+    ccall((:LLVMCanValueUseFastMathFlags, libLLVMExtra), LLVMBool, (LLVMValueRef,), Inst)
+end
+
 mutable struct LLVMOpaquePreservedAnalyses end
 
 const LLVMPreservedAnalysesRef = Ptr{LLVMOpaquePreservedAnalyses}

diff --git a/lib/16/libLLVM_extra.jl b/lib/16/libLLVM_extra.jl
@@ -329,6 +329,32 @@ function LLVMPostDominatorTreeInstructionDominates(Tree, InstA, InstB)
     ccall((:LLVMPostDominatorTreeInstructionDominates, libLLVMExtra), LLVMBool, (LLVMPostDominatorTreeRef, LLVMValueRef, LLVMValueRef), Tree, InstA, InstB)
 end
 
+@cenum __JL_Ctag_53::UInt32 begin
+    LLVMFastMathAllowReassoc = 1
+    LLVMFastMathNoNaNs = 2
+    LLVMFastMathNoInfs = 4
+    LLVMFastMathNoSignedZeros = 8
+    LLVMFastMathAllowReciprocal = 16
+    LLVMFastMathAllowContract = 32
+    LLVMFastMathApproxFunc = 64
+    LLVMFastMathNone = 0
+    LLVMFastMathAll = 127
+end
+
+const LLVMFastMathFlags = Cuint
+
+function LLVMGetFastMathFlags(FPMathInst)
+    ccall((:LLVMGetFastMathFlags, libLLVMExtra), LLVMFastMathFlags, (LLVMValueRef,), FPMathInst)
+end
+
+function LLVMSetFastMathFlags(FPMathInst, FMF)
+    ccall((:LLVMSetFastMathFlags, libLLVMExtra), Cvoid, (LLVMValueRef, LLVMFastMathFlags), FPMathInst, FMF)
+end
+
+function LLVMCanValueUseFastMathFlags(Inst)
+    ccall((:LLVMCanValueUseFastMathFlags, libLLVMExtra), LLVMBool, (LLVMValueRef,), Inst)
+end
+
 mutable struct LLVMOpaquePreservedAnalyses end
 
 const LLVMPreservedAnalysesRef = Ptr{LLVMOpaquePreservedAnalyses}

diff --git a/src/core/instructions.jl b/src/core/instructions.jl
@@ -335,3 +335,63 @@ function Base.append!(iter::PhiIncomingSet, args::Vector{Tuple{V, BasicBlock}} w
 end
 
 Base.push!(iter::PhiIncomingSet, args::Tuple{<:Value, BasicBlock}) = append!(iter, [args])
+
+
+## floating point operations
+
+export fast_math, fast_math!
+
+"""
+    fast_math(inst::Instruction)
+
+Get the fast math flags on an instruction.
+"""
+function fast_math(inst::Instruction)
+    if !Bool(API.LLVMCanValueUseFastMathFlags(inst))
+        throw(ArgumentError("Instruction cannot use fast math flags"))
+    end
+    flags = API.LLVMGetFastMathFlags(inst)
+    return (;
+        nnan = flags & LLVM.API.LLVMFastMathNoNaNs != 0,
+        ninf = flags & LLVM.API.LLVMFastMathNoInfs != 0,
+        nsz = flags & LLVM.API.LLVMFastMathNoSignedZeros != 0,
+        arcp = flags & LLVM.API.LLVMFastMathAllowReciprocal != 0,
+        contract = flags & LLVM.API.LLVMFastMathAllowContract != 0,
+        afn = flags & LLVM.API.LLVMFastMathApproxFunc != 0,
+        reassoc = flags & LLVM.API.LLVMFastMathAllowReassoc != 0,
+    )
+end
+
+"""
+    fast_math!(inst::Instruction; [flag=...], [all=...])
+
+Set the fast math flags on an instruction. If `all` is `true`, then all flags are set.
+
+The following flags are supported:
+ - `nnan`: assume arguments and results are not NaN
+ - `ninf`: assume arguments and results are not Inf
+ - `nsz`: treat the sign of zero arguments and results as insignificant
+ - `arcp`: allow use of reciprocal rather than perform division
+ - `contract`: allow contraction of operations
+ - `afn`: allow substitution of approximate calculations for functions
+ - `reassoc`: allow reassociation of operations
+"""
+function fast_math!(inst::Instruction; nnan=false, ninf=false, nsz=false, arcp=false,
+                          contract=false, afn=false, reassoc=false, all=false)
+    if !Bool(API.LLVMCanValueUseFastMathFlags(inst))
+        throw(ArgumentError("Instruction cannot use fast math flags"))
+    end
+    if all
+        API.LLVMSetFastMathFlags(inst, LLVM.API.LLVMFastMathAll)
+    else
+        flags = 0
+        nnan && (flags |= LLVM.API.LLVMFastMathNoNaNs)
+        ninf && (flags |= LLVM.API.LLVMFastMathNoInfs)
+        nsz && (flags |= LLVM.API.LLVMFastMathNoSignedZeros)
+        arcp && (flags |= LLVM.API.LLVMFastMathAllowReciprocal)
+        contract && (flags |= LLVM.API.LLVMFastMathAllowContract)
+        afn && (flags |= LLVM.API.LLVMFastMathApproxFunc)
+        reassoc && (flags |= LLVM.API.LLVMFastMathAllowReassoc)
+        API.LLVMSetFastMathFlags(inst, flags)
+    end
+end
diff --git a/test/instructions_tests.jl b/test/instructions_tests.jl
@@ -504,4 +504,77 @@ end
     end
 end
 
+
+@testset "fast math" begin
+@dispose ctx=Context() mod=LLVM.Module("my_module") begin
+    # emit some IR
+    param_types = [LLVM.FloatType()]
+    ret_type = LLVM.FloatType()
+    fun_type = LLVM.FunctionType(ret_type, param_types)
+    fun = LLVM.Function(mod, "add_sub", fun_type)
+    @dispose builder=IRBuilder() begin
+        entry = BasicBlock(fun, "entry")
+        position!(builder, entry)
+        # add and substract 42
+        a = fadd!(builder, parameters(fun)[1], LLVM.ConstantFP(Float32(42.)), "a")
+        # fast_math!(a; all=true)
+        b = fsub!(builder, a, LLVM.ConstantFP(Float32(42.)), "b")
+        # fast_math!(b; all=true)
+        ret!(builder, b)
+    end
+    verify(mod)
+
+    # optimize
+    function optimize(mod)
+        if LLVM.has_newpm()
+            host_triple = triple()
+            host_t = Target(triple=host_triple)
+            @dispose tm=TargetMachine(host_t, host_triple) pb=PassBuilder(tm) begin
+                NewPMModulePassManager(pb) do mpm
+                    parse!(pb, mpm, "default<O3>")
+                    run!(mpm, mod, tm)
+                end
+            end
+        else
+            pmb = PassManagerBuilder()
+            optlevel!(pmb, 3)
+            @dispose mpm=ModulePassManager() begin
+                populate!(mpm, pmb)
+                run!(mpm, mod)
+            end
+        end
+    end
+    optimize(mod)
+    verify(mod)
+
+    # ensure we still have our two operations
+    @test length(blocks(fun)) == 1
+    bb = blocks(fun)[1]
+    instns = collect(instructions(bb))
+    @test length(instns) == 3
+    @test instns[1] isa LLVM.FAddInst
+    @test instns[2] isa LLVM.FAddInst
+    @test instns[3] isa LLVM.RetInst
+
+    # make them fast math
+    @test !fast_math(instns[1]).contract
+    fast_math!(instns[1]; all=true)
+    @test fast_math(instns[1]).contract
+    fast_math!(instns[2]; all=true)
+    @test_throws ArgumentError fast_math(instns[3])
+    @test_throws ArgumentError fast_math!(instns[3]; all=true)
+
+    # optimize again
+    optimize(mod)
+    verify(mod)
+
+    # observe there's only a single return now
+    @test length(blocks(fun)) == 1
+    bb = blocks(fun)[1]
+    instns = collect(instructions(bb))
+    @test length(instns) == 1
+    @test instns[1] isa LLVM.RetInst
+end
+end
+
 end