pymc-devs · ricardoV94 · Mar 27, 2025 · Mar 20, 2025 · Mar 21, 2025
diff --git a/pytensor/link/numba/dispatch/basic.py b/pytensor/link/numba/dispatch/basic.py
@@ -75,7 +75,7 @@ def numba_njit(*args, fastmath=None, **kwargs):
         message=(
             "(\x1b\\[1m)*"  # ansi escape code for bold text
             "Cannot cache compiled function "
-            '"(numba_funcified_fgraph|store_core_outputs)" '
+            '"(numba_funcified_fgraph|store_core_outputs|cholesky|solve|solve_triangular|cho_solve)" '
             "as it uses dynamic globals"
         ),
         category=NumbaWarning,

diff --git a/pytensor/link/numba/dispatch/_LAPACK.py → ...sor/link/numba/dispatch/linalg/_LAPACK.py b/pytensor/link/numba/dispatch/_LAPACK.py → ...sor/link/numba/dispatch/linalg/_LAPACK.py
@@ -390,3 +390,70 @@ def numba_xposv(cls, dtype):
             _ptr_int,  # INFO
         )
         return functype(lapack_ptr)
+
+    @classmethod
+    def numba_xgttrf(cls, dtype):
+        """
+        Compute the LU factorization of a tridiagonal matrix A using row interchanges.
+
+        Called by scipy.linalg.lu_factor
+        """
+        lapack_ptr, float_pointer = _get_lapack_ptr_and_ptr_type(dtype, "gttrf")
+        functype = ctypes.CFUNCTYPE(
+            None,
+            _ptr_int,  # N
+            float_pointer,  # DL
+            float_pointer,  # D
+            float_pointer,  # DU
+            float_pointer,  # DU2
+            _ptr_int,  # IPIV
+            _ptr_int,  # INFO
+        )
+        return functype(lapack_ptr)
+
+    @classmethod
+    def numba_xgttrs(cls, dtype):
+        """
+        Solve a system of linear equations A @ X = B with a tridiagonal matrix A using the LU factorization computed by numba_gttrf.
+
+        Called by scipy.linalg.lu_solve
+        """
+        lapack_ptr, float_pointer = _get_lapack_ptr_and_ptr_type(dtype, "gttrs")
+        functype = ctypes.CFUNCTYPE(
+            None,
+            _ptr_int,  # TRANS
+            _ptr_int,  # N
+            _ptr_int,  # NRHS
+            float_pointer,  # DL
+            float_pointer,  # D
+            float_pointer,  # DU
+            float_pointer,  # DU2
+            _ptr_int,  # IPIV
+            float_pointer,  # B
+            _ptr_int,  # LDB
+            _ptr_int,  # INFO
+        )
+        return functype(lapack_ptr)
+
+    @classmethod
+    def numba_xgtcon(cls, dtype):
+        """
+        Estimate the reciprocal of the condition number of a tridiagonal matrix A using the LU factorization computed by numba_gttrf.
+        """
+        lapack_ptr, float_pointer = _get_lapack_ptr_and_ptr_type(dtype, "gtcon")
+        functype = ctypes.CFUNCTYPE(
+            None,
+            _ptr_int,  # NORM
+            _ptr_int,  # N
+            float_pointer,  # DL
+            float_pointer,  # D
+            float_pointer,  # DU
+            float_pointer,  # DU2
+            _ptr_int,  # IPIV
+            float_pointer,  # ANORM
+            float_pointer,  # RCOND
+            float_pointer,  # WORK
+            _ptr_int,  # IWORK
+            _ptr_int,  # INFO
+        )
+        return functype(lapack_ptr)
diff --git a/pytensor/link/numba/dispatch/linalg/__init__.py b/pytensor/link/numba/dispatch/linalg/__init__.py
diff --git a/pytensor/link/numba/dispatch/linalg/decomposition/__init__.py b/pytensor/link/numba/dispatch/linalg/decomposition/__init__.py
diff --git a/pytensor/link/numba/dispatch/linalg/decomposition/cholesky.py b/pytensor/link/numba/dispatch/linalg/decomposition/cholesky.py
@@ -0,0 +1,66 @@
+import numpy as np
+from numba.core.extending import overload
+from numba.np.linalg import _copy_to_fortran_order, ensure_lapack
+from scipy import linalg
+
+from pytensor.link.numba.dispatch.linalg._LAPACK import (
+    _LAPACK,
+    _get_underlying_float,
+    int_ptr_to_val,
+    val_to_int_ptr,
+)
+from pytensor.link.numba.dispatch.linalg.utils import _check_scipy_linalg_matrix
+
+
+def _cholesky(a, lower=False, overwrite_a=False, check_finite=True):
+    return (
+        linalg.cholesky(
+            a, lower=lower, overwrite_a=overwrite_a, check_finite=check_finite
+        ),
+        0,
+    )
+
+
+@overload(_cholesky)
+def cholesky_impl(A, lower=0, overwrite_a=False, check_finite=True):
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "cholesky")
+    dtype = A.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_potrf = _LAPACK().numba_xpotrf(dtype)
+
+    def impl(A, lower=0, overwrite_a=False, check_finite=True):
+        _N = np.int32(A.shape[-1])
+        if A.shape[-2] != _N:
+            raise linalg.LinAlgError("Last 2 dimensions of A must be square")
+
+        UPLO = val_to_int_ptr(ord("L") if lower else ord("U"))
+        N = val_to_int_ptr(_N)
+        LDA = val_to_int_ptr(_N)
+        INFO = val_to_int_ptr(0)
+
+        if overwrite_a and A.flags.f_contiguous:
+            A_copy = A
+        else:
+            A_copy = _copy_to_fortran_order(A)
+
+        numba_potrf(
+            UPLO,
+            N,
+            A_copy.view(w_type).ctypes,
+            LDA,
+            INFO,
+        )
+
+        if lower:
+            for j in range(1, _N):
+                for i in range(j):
+                    A_copy[i, j] = 0.0
+        else:
+            for j in range(_N):
+                for i in range(j + 1, _N):
+                    A_copy[i, j] = 0.0
+
+        return A_copy, int_ptr_to_val(INFO)
+
+    return impl
diff --git a/pytensor/link/numba/dispatch/linalg/solve/__init__.py b/pytensor/link/numba/dispatch/linalg/solve/__init__.py
diff --git a/pytensor/link/numba/dispatch/linalg/solve/cholesky.py b/pytensor/link/numba/dispatch/linalg/solve/cholesky.py
@@ -0,0 +1,87 @@
+import numpy as np
+from numba.core.extending import overload
+from numba.np.linalg import ensure_lapack
+from scipy import linalg
+
+from pytensor.link.numba.dispatch.linalg._LAPACK import (
+    _LAPACK,
+    _get_underlying_float,
+    int_ptr_to_val,
+    val_to_int_ptr,
+)
+from pytensor.link.numba.dispatch.linalg.solve.utils import _solve_check_input_shapes
+from pytensor.link.numba.dispatch.linalg.utils import (
+    _check_scipy_linalg_matrix,
+    _copy_to_fortran_order_even_if_1d,
+    _solve_check,
+)
+
+
+def _cho_solve(
+    C: np.ndarray, B: np.ndarray, lower: bool, overwrite_b: bool, check_finite: bool
+):
+    """
+    Solve a positive-definite linear system using the Cholesky decomposition.
+    """
+    return linalg.cho_solve(
+        (C, lower), b=B, overwrite_b=overwrite_b, check_finite=check_finite
+    )
+
+
+@overload(_cho_solve)
+def cho_solve_impl(C, B, lower=False, overwrite_b=False, check_finite=True):
+    ensure_lapack()
+    _check_scipy_linalg_matrix(C, "cho_solve")
+    _check_scipy_linalg_matrix(B, "cho_solve")
+    dtype = C.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_potrs = _LAPACK().numba_xpotrs(dtype)
+
+    def impl(C, B, lower=False, overwrite_b=False, check_finite=True):
+        _solve_check_input_shapes(C, B)
+
+        _N = np.int32(C.shape[-1])
+        if C.flags.f_contiguous or C.flags.c_contiguous:
+            C_f = C
+            if C.flags.c_contiguous:
+                # An upper/lower triangular c_contiguous is the same as a lower/upper triangular f_contiguous
+                lower = not lower
+        else:
+            C_f = np.asfortranarray(C)
+
+        if overwrite_b and B.flags.f_contiguous:
+            B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
+
+        B_is_1d = B.ndim == 1
+        if B_is_1d:
+            B_copy = np.expand_dims(B_copy, -1)
+
+        NRHS = 1 if B_is_1d else int(B.shape[-1])
+
+        UPLO = val_to_int_ptr(ord("L") if lower else ord("U"))
+        N = val_to_int_ptr(_N)
+        NRHS = val_to_int_ptr(NRHS)
+        LDA = val_to_int_ptr(_N)
+        LDB = val_to_int_ptr(_N)
+        INFO = val_to_int_ptr(0)
+
+        numba_potrs(
+            UPLO,
+            N,
+            NRHS,
+            C_f.view(w_type).ctypes,
+            LDA,
+            B_copy.view(w_type).ctypes,
+            LDB,
+            INFO,
+        )
+
+        _solve_check(_N, int_ptr_to_val(INFO))
+
+        if B_is_1d:
+            return B_copy[..., 0]
+        return B_copy
+
+    return impl