Toggle numba caching by environment variable

timothymillar · timothymillar · commit 64638f54976f · 2022-07-04T09:29:44.000+12:00
diff --git a/sgkit/caching.py b/sgkit/caching.py
@@ -0,0 +1,10 @@
+import os
+
+_DISABLE_CACHE = os.environ.get("SGKIT_DISABLE_NUMBA_CACHE", "0")
+
+try:
+    CACHE_NUMBA = {"0": True, "1": False}[_DISABLE_CACHE]
+except KeyError as e:  # pragma: no cover
+    raise KeyError(
+        "Environment variable 'SGKIT_DISABLE_NUMBA_CACHE' must be '0' or '1'"
+    ) from e
diff --git a/sgkit/stats/aggregation.py b/sgkit/stats/aggregation.py
@@ -8,6 +8,7 @@
 from xarray import Dataset
 
 from sgkit import variables
+from sgkit.caching import CACHE_NUMBA
 from sgkit.stats.utils import cohort_sum
 from sgkit.typing import ArrayLike
 from sgkit.utils import (
@@ -28,7 +29,7 @@
     ],
     "(k),(n)->(n)",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def count_alleles(
     g: ArrayLike, _: ArrayLike, out: ArrayLike
diff --git a/sgkit/stats/conversion.py b/sgkit/stats/conversion.py
@@ -4,6 +4,7 @@
 from xarray import Dataset
 
 from sgkit import variables
+from sgkit.caching import CACHE_NUMBA
 from sgkit.typing import ArrayLike
 from sgkit.utils import conditional_merge_datasets, create_dataset
 
@@ -15,7 +16,7 @@
     ],
     "(p),(k),()->(k)",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def _convert_probability_to_call(
     gp: ArrayLike, _: ArrayLike, threshold: float, out: ArrayLike
diff --git a/sgkit/stats/ld.py b/sgkit/stats/ld.py
@@ -11,11 +11,12 @@
 from xarray import Dataset
 
 from sgkit import variables
+from sgkit.caching import CACHE_NUMBA
 from sgkit.typing import ArrayLike, DType
 from sgkit.window import _get_chunked_windows, _sizes_to_start_offsets, has_windows
 
 
-@njit(nogil=True, fastmath=False, cache=True)  # type: ignore
+@njit(nogil=True, fastmath=False, cache=CACHE_NUMBA)  # type: ignore
 def rogers_huff_r_between(gn0: ArrayLike, gn1: ArrayLike) -> float:  # pragma: no cover
     """Rogers Huff *r*.
 
@@ -67,7 +68,7 @@ def rogers_huff_r_between(gn0: ArrayLike, gn1: ArrayLike) -> float:  # pragma: n
     return r
 
 
-@njit(nogil=True, fastmath=True, cache=True)  # type: ignore
+@njit(nogil=True, fastmath=True, cache=CACHE_NUMBA)  # type: ignore
 def rogers_huff_r2_between(gn0: ArrayLike, gn1: ArrayLike) -> float:  # pragma: no cover
     return rogers_huff_r_between(gn0, gn1) ** 2  # type: ignore
 
@@ -202,7 +203,7 @@ def to_ld_df(x: ArrayLike, chunk_index: int) -> DataFrame:
     )
 
 
-@njit(nogil=True, cache=True)  # type: ignore
+@njit(nogil=True, cache=CACHE_NUMBA)  # type: ignore
 def _ld_matrix_jit(
     x: ArrayLike,
     chunk_window_starts: ArrayLike,
@@ -302,7 +303,7 @@ def _ld_matrix(
     return df
 
 
-@njit(nogil=True, cache=True)  # type: ignore
+@njit(nogil=True, cache=CACHE_NUMBA)  # type: ignore
 def _maximal_independent_set_jit(
     idi: ArrayLike, idj: ArrayLike, cmp: ArrayLike
 ) -> List[int]:  # pragma: no cover
diff --git a/sgkit/stats/pedigree.py b/sgkit/stats/pedigree.py
@@ -7,6 +7,7 @@
 from xarray import Dataset
 
 from sgkit import variables
+from sgkit.caching import CACHE_NUMBA
 from sgkit.typing import ArrayLike
 from sgkit.utils import (
     conditional_merge_datasets,
@@ -108,7 +109,7 @@ def parent_indices(
     return conditional_merge_datasets(ds, new_ds, merge)
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def topological_argsort(parent: ArrayLike) -> ArrayLike:  # pragma: no cover
     """Find a topological ordering of samples within a pedigree such
     that no individual occurs before its parents.
@@ -172,7 +173,7 @@ def topological_argsort(parent: ArrayLike) -> ArrayLike:  # pragma: no cover
     return order[::-1]
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def _is_pedigree_sorted(parent: ArrayLike) -> bool:  # pragma: no cover
     n_samples, n_parents = parent.shape
     for i in range(n_samples):
@@ -183,7 +184,7 @@ def _is_pedigree_sorted(parent: ArrayLike) -> bool:  # pragma: no cover
     return True
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def _raise_on_half_founder(
     parent: ArrayLike, tau: ArrayLike = None
 ) -> None:  # pragma: no cover
@@ -202,7 +203,7 @@ def _raise_on_half_founder(
             raise ValueError("Pedigree contains half-founders")
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def _diploid_self_kinship(
     kinship: ArrayLike, parent: ArrayLike, i: int
 ) -> None:  # pragma: no cover
@@ -214,7 +215,7 @@ def _diploid_self_kinship(
         kinship[i, i] = (1 + kinship[p, q]) / 2
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def _diploid_pair_kinship(
     kinship: ArrayLike, parent: ArrayLike, i: int, j: int
 ) -> None:  # pragma: no cover
@@ -227,7 +228,7 @@ def _diploid_pair_kinship(
     kinship[j, i] = kinship_ij
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def kinship_diploid(
     parent: ArrayLike, allow_half_founders: bool = False, dtype: type = np.float64
 ) -> ArrayLike:  # pragma: no cover
@@ -290,15 +291,15 @@ def kinship_diploid(
     return kinship
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def _inbreeding_as_self_kinship(
     inbreeding: float, ploidy: int
 ) -> float:  # pragma: no cover
     """Calculate self-kinship of an individual."""
     return (1 + (ploidy - 1) * inbreeding) / ploidy
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def _hamilton_kerr_inbreeding_founder(
     lambda_p: float, lambda_q: float, ploidy_i: int
 ) -> float:  # pragma: no cover
@@ -310,7 +311,7 @@ def _hamilton_kerr_inbreeding_founder(
     return num / denom
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def _hamilton_kerr_inbreeding_non_founder(
     tau_p: int,
     lambda_p: float,
@@ -340,7 +341,7 @@ def _hamilton_kerr_inbreeding_non_founder(
     return num / denom
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def _hamilton_kerr_inbreeding_half_founder(
     tau_p: int,
     lambda_p: float,
@@ -374,7 +375,7 @@ def _hamilton_kerr_inbreeding_half_founder(
     )
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def _hamilton_kerr_self_kinship(
     kinship: ArrayLike, parent: ArrayLike, tau: ArrayLike, lambda_: ArrayLike, i: int
 ) -> None:  # pragma: no cover
@@ -421,7 +422,7 @@ def _hamilton_kerr_self_kinship(
     kinship[i, i] = _inbreeding_as_self_kinship(inbreeding_i, ploidy_i)
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def _hamilton_kerr_pair_kinship(
     kinship: ArrayLike, parent: ArrayLike, tau: ArrayLike, i: int, j: int
 ) -> None:  # pragma: no cover
@@ -435,7 +436,7 @@ def _hamilton_kerr_pair_kinship(
     kinship[j, i] = kinship_ij
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def kinship_Hamilton_Kerr(
     parent: ArrayLike,
     tau: ArrayLike,
@@ -646,7 +647,7 @@ def pedigree_kinship(
     return conditional_merge_datasets(ds, new_ds, merge)
 
 
-@vectorize(nopython=True, cache=True)
+@vectorize(nopython=True, cache=CACHE_NUMBA)
 def kinship_as_additive_relationship(
     kinship: float, ploidy_x: int, ploidy_y: int
 ) -> float:  # pragma: no cover
@@ -783,7 +784,7 @@ def additive_relationships(
     return conditional_merge_datasets(ds, new_ds, merge)
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def _update_inverse_additive_relationships(
     mtx: ArrayLike,
     kinship: ArrayLike,
@@ -838,7 +839,7 @@ def _update_inverse_additive_relationships(
     mtx[i, i] += scalar / ploidy_i
 
 
-@njit(cache=True)
+@njit(cache=CACHE_NUMBA)
 def pedigree_kinships_as_inverse_additive_relationships(
     kinship: ArrayLike, parent: ArrayLike, tau: Union[ArrayLike, None] = None
 ) -> ArrayLike:  # pragma: no cover
diff --git a/sgkit/stats/popgen.py b/sgkit/stats/popgen.py
@@ -7,6 +7,7 @@
 from numba import guvectorize
 from xarray import Dataset
 
+from sgkit.caching import CACHE_NUMBA
 from sgkit.cohorts import _cohorts_to_array
 from sgkit.stats.utils import assert_array_shape
 from sgkit.typing import ArrayLike
@@ -137,7 +138,7 @@ def diversity(
     ["void(int64[:, :], float64[:,:])", "void(uint64[:, :], float64[:,:])"],
     "(c, k)->(c,c)",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def _divergence(ac: ArrayLike, out: ArrayLike) -> None:  # pragma: no cover
     """Generalized U-function for computing divergence.
@@ -310,7 +311,7 @@ def divergence(
     ],
     "(c,c)->(c,c)",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def _Fst_Hudson(d: ArrayLike, out: ArrayLike) -> None:  # pragma: no cover
     """Generalized U-function for computing Fst using Hudson's estimator.
@@ -342,7 +343,7 @@ def _Fst_Hudson(d: ArrayLike, out: ArrayLike) -> None:  # pragma: no cover
     ],
     "(c,c)->(c,c)",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def _Fst_Nei(d: ArrayLike, out: ArrayLike) -> None:  # pragma: no cover
     """Generalized U-function for computing Fst using Nei's estimator.
@@ -623,7 +624,7 @@ def Tajimas_D(
     ["void(float32[:, :], float32[:,:,:])", "void(float64[:, :], float64[:,:,:])"],
     "(c,c)->(c,c,c)",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def _pbs(t: ArrayLike, out: ArrayLike) -> None:  # pragma: no cover
     """Generalized U-function for computing PBS."""
@@ -647,7 +648,7 @@ def _pbs(t: ArrayLike, out: ArrayLike) -> None:  # pragma: no cover
     ],
     "(c,c),(ct,i)->(c,c,c)",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def _pbs_cohorts(
     t: ArrayLike, ct: ArrayLike, out: ArrayLike
diff --git a/sgkit/stats/utils.py b/sgkit/stats/utils.py
@@ -8,6 +8,8 @@
 from numba import guvectorize
 from xarray import DataArray, Dataset
 
+from sgkit.caching import CACHE_NUMBA
+
 from ..typing import ArrayLike
 
 
@@ -176,7 +178,7 @@ def func(x: ArrayLike, cohort: ArrayLike, n: int, axis: int = -1) -> ArrayLike:
     ],
     "(n),(n),(c)->(c)",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def cohort_sum(
     x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike
@@ -222,7 +224,7 @@ def cohort_sum(
     ],
     "(n),(n),(c)->(c)",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def cohort_nansum(
     x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike
@@ -269,7 +271,7 @@ def cohort_nansum(
     ],
     "(n),(n),(c)->(c)",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def cohort_mean(
     x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike
@@ -320,7 +322,7 @@ def cohort_mean(
     ],
     "(n),(n),(c)->(c)",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def cohort_nanmean(
     x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike
diff --git a/sgkit/utils.py b/sgkit/utils.py
@@ -5,6 +5,8 @@
 from numba import guvectorize
 from xarray import Dataset
 
+from sgkit.caching import CACHE_NUMBA
+
 from . import variables
 from .typing import ArrayLike, DType
 
@@ -317,7 +319,7 @@ def max_str_len(a: ArrayLike) -> ArrayLike:
     ],
     "(n)->()",
     nopython=True,
-    cache=True,
+    cache=CACHE_NUMBA,
 )
 def hash_array(x: ArrayLike, out: ArrayLike) -> None:  # pragma: no cover
     """Hash entries of ``x`` using the DJBX33A hash function.