TuringLang · penelopeysm · Mar 21, 2025 · Mar 22, 2025
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "Bijectors"
 uuid = "76274a88-744f-5084-9051-94815aaf08c4"
-version = "0.15.6"
+version = "0.15.7"
 
 [deps]
 ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"

diff --git a/src/bijectors/corr.jl b/src/bijectors/corr.jl
@@ -293,15 +293,15 @@ which is the above implementation.
 function _link_chol_lkj(W::AbstractMatrix)
     K = LinearAlgebra.checksquare(W)
 
-    y = similar(W) # z is also UpperTriangular. 
+    y = similar(W) # W is upper triangular.
     # Some zero filling can be avoided. Though diagnoal is still needed to be filled with zero.
 
     @inbounds for j in 1:K
-        remainder_sq = one(eltype(W))
-        for i in 1:(j - 1)
+        remainder_sq = W[j, j]^2
+        for i in (j - 1):-1:1
             z = W[i, j] / sqrt(remainder_sq)
-            y[i, j] = atanh(z)
-            remainder_sq -= W[i, j]^2
+            y[i, j] = asinh(z)
+            remainder_sq += W[i, j]^2
         end
         for i in j:K
             y[i, j] = 0
@@ -317,17 +317,18 @@ function _link_chol_lkj_from_upper(W::AbstractMatrix)
 
     y = similar(W, N)
 
-    idx = 1
+    starting_idx = 1
     @inbounds for j in 2:K
-        y[idx] = atanh(W[1, j])
-        idx += 1
-        remainder_sq = 1 - W[1, j]^2
-        for i in 2:(j - 1)
+        y[starting_idx] = atanh(W[1, j])
+        starting_idx += 1
+        remainder_sq = W[j, j]^2
+        for i in (j - 1):-1:2
+            idx = starting_idx + i - 2
             z = W[i, j] / sqrt(remainder_sq)
-            y[idx] = atanh(z)
-            remainder_sq -= W[i, j]^2
-            idx += 1
+            y[idx] = asinh(z)
+            remainder_sq += W[i, j]^2
         end
+        starting_idx += length((j - 1):-1:2)
     end
 
     return y

diff --git a/src/chainrules.jl b/src/chainrules.jl
@@ -161,21 +161,23 @@ function ChainRulesCore.rrule(::typeof(_link_chol_lkj_from_upper), W::AbstractMa
     N = ((K - 1) * K) ÷ 2
 
     z = zeros(eltype(W), N)
-    tmp_vec = similar(z)
+    remainders = similar(z)
 
-    idx = 1
+    starting_idx = 1
     @inbounds for j in 2:K
-        z[idx] = atanh(W[1, j])
-        tmp = sqrt(1 - W[1, j]^2)
-        tmp_vec[idx] = tmp
-        idx += 1
-        for i in 2:(j - 1)
-            p = W[i, j] / tmp
-            tmp *= sqrt(1 - p^2)
-            tmp_vec[idx] = tmp
-            z[idx] = atanh(p)
-            idx += 1
+        z[starting_idx] = atanh(W[1, j])
+        remainder_sq = W[j, j]^2
+        starting_idx += 1
+        for i in (j - 1):-1:2
+            idx = starting_idx + i - 2
+            remainder = sqrt(remainder_sq)
+            remainders[idx] = remainder
+            zt = W[i, j] / remainder
+            z[idx] = asinh(zt)
+            remainder_sq += W[i, j]^2
         end
+        remainders[starting_idx - 1] = sqrt(remainder_sq)
+        starting_idx += length((j - 1):-1:2)
     end
 
     function pullback_link_chol_lkj_from_upper(Δz_thunked)
@@ -190,7 +192,7 @@ function ChainRulesCore.rrule(::typeof(_link_chol_lkj_from_upper), W::AbstractMa
             ΔW[j, j] = 0
             Δtmp = zero(eltype(Δz))
             for i in (j - 1):-1:2
-                tmp = tmp_vec[idx_up_to_prev_column + i - 1]
+                tmp = remainders[idx_up_to_prev_column + i - 1]
                 p = W[i, j] / tmp
                 ftmp = sqrt(1 - p^2)
                 d_ftmp_p = -p / ftmp
@@ -216,21 +218,23 @@ function ChainRulesCore.rrule(::typeof(_link_chol_lkj_from_lower), W::AbstractMa
     N = ((K - 1) * K) ÷ 2
 
     z = zeros(eltype(W), N)
-    tmp_vec = similar(z)
+    remainders = similar(z)
 
-    idx = 1
+    starting_idx = 1
     @inbounds for i in 2:K
-        z[idx] = atanh(W[i, 1])
-        tmp = sqrt(1 - W[i, 1]^2)
-        tmp_vec[idx] = tmp
-        idx += 1
-        for j in 2:(i - 1)
-            p = W[i, j] / tmp
-            tmp *= sqrt(1 - p^2)
-            tmp_vec[idx] = tmp
-            z[idx] = atanh(p)
-            idx += 1
+        z[starting_idx] = atanh(W[i, 1])
+        remainder_sq = W[i, i]^2
+        starting_idx += 1
+        for j in (i - 1):-1:2
+            idx = starting_idx + j - 2
+            remainder = sqrt(remainder_sq)
+            remainders[idx] = remainder
+            zt = W[i, j] / remainder
+            z[idx] = asinh(zt)
+            remainder_sq += W[i, j]^2
         end
+        remainders[starting_idx - 1] = sqrt(remainder_sq)
+        starting_idx += length((i - 1):-1:2)
     end
 
     function pullback_link_chol_lkj_from_lower(Δz_thunked)
@@ -245,7 +249,7 @@ function ChainRulesCore.rrule(::typeof(_link_chol_lkj_from_lower), W::AbstractMa
             ΔW[i, i] = 0
             Δtmp = zero(eltype(Δz))
             for j in (i - 1):-1:2
-                tmp = tmp_vec[idx_up_to_prev_row + j - 1]
+                tmp = remainders[idx_up_to_prev_row + j - 1]
                 p = W[i, j] / tmp
                 ftmp = sqrt(1 - p^2)
                 d_ftmp_p = -p / ftmp

diff --git a/test/transform.jl b/test/transform.jl
@@ -237,18 +237,49 @@ end
 end
 
 @testset "LKJCholesky" begin
+    # Convert Cholesky factor to its free parameters, i.e. its off-diagonal elements
+    function chol_3by3_to_free_params(x::Cholesky)
+        if x.uplo == :U
+            return [x.U[1, 2], x.U[1, 3], x.U[2, 3]]
+        else
+            return [x.L[2, 1], x.L[3, 1], x.L[3, 2]]
+        end
+        # TODO: Generalise to arbitrary dimension using this code:
+        # inds = [
+        #     LinearIndices(size(x))[I] for I in CartesianIndices(size(x)) if
+        #     (uplo === :L && I[2] < I[1]) || (uplo === :U && I[2] > I[1])
+        # ]
+    end
+
+    # Reconstruct Cholesky factor from its free parameters
+    # Note that x[i, i] is always positive so we don't need to worry about the sign
+    function free_params_to_chol_3by3(free_params::AbstractVector, uplo::Symbol)
+        x = UpperTriangular(zeros(eltype(free_params), 3, 3))
+        x[1, 1] = 1
+        x[1, 2] = free_params[1]
+        x[1, 3] = free_params[2]
+        x[2, 2] = sqrt(1 - free_params[1]^2)
+        x[2, 3] = free_params[3]
+        x[3, 3] = sqrt(1 - free_params[2]^2 - free_params[3]^2)
+        if uplo == :U
+            return Cholesky(x)
+        else
+            return Cholesky(transpose(x))
+        end
+    end
+
     @testset "uplo: $uplo" for uplo in [:L, :U]
         dist = LKJCholesky(3, 1, uplo)
         single_sample_tests(dist)
 
         x = rand(dist)
 
-        inds = [
-            LinearIndices(size(x))[I] for I in CartesianIndices(size(x)) if
-            (uplo === :L && I[2] < I[1]) || (uplo === :U && I[2] > I[1])
-        ]
-        J = ForwardDiff.jacobian(z -> link(dist, Cholesky(z, x.uplo, x.info)), x.UL)
-        J = J[:, inds]
+        # Here, we need to pass ForwardDiff only the free parameters of the
+        # Cholesky factor so that we get a square Jacobian matrix
+        free_params = chol_3by3_to_free_params(x)
+        J = ForwardDiff.jacobian(
+            z -> link(dist, free_params_to_chol_3by3(z, uplo)), free_params
+        )
         logpdf_turing = logpdf_with_trans(dist, x, true)
         @test logpdf(dist, x) - _logabsdet(J) ≈ logpdf_turing
     end