Skip to content

Commit 3b13a7b

Browse files
tomwhitemergify[bot]
authored andcommitted
Divergence should handle case when all calls for a cohort are missing #402
1 parent 4e7cc1f commit 3b13a7b

File tree

2 files changed

+21
-7
lines changed

2 files changed

+21
-7
lines changed

sgkit/stats/popgen.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -147,13 +147,14 @@ def _divergence(ac: ArrayLike, out: ArrayLike) -> None: # pragma: no cover
147147
for i in range(n_cohorts):
148148
for j in range(i + 1, n_cohorts):
149149
n_pairs = an[i] * an[j]
150-
n_same = 0
151-
for k in range(n_alleles):
152-
n_same += ac[i, k] * ac[j, k]
153-
n_diff = n_pairs - n_same
154-
div = n_diff / n_pairs
155-
out[i, j] = div
156-
out[j, i] = div
150+
if n_pairs != 0.0:
151+
n_same = 0
152+
for k in range(n_alleles):
153+
n_same += ac[i, k] * ac[j, k]
154+
n_diff = n_pairs - n_same
155+
div = n_diff / n_pairs
156+
out[i, j] = div
157+
out[j, i] = div
157158

158159
# calculate the diversity for each cohort
159160
for i in range(n_cohorts):

sgkit/tests/test_popgen.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
)
2323
from sgkit.window import window
2424

25+
from .test_aggregation import get_dataset
26+
2527

2628
def ts_to_dataset(ts, chunks=None, samples=None):
2729
"""
@@ -212,6 +214,17 @@ def test_divergence__windowed_scikit_allel_comparison(sample_size, n_cohorts, ch
212214
) # scikit-allel has final window missing
213215

214216

217+
def test_divergence__missing_calls():
218+
ds = get_dataset(
219+
[
220+
[[0, 0], [-1, -1], [-1, -1]], # all of cohort 1 calls are missing
221+
]
222+
)
223+
ds["sample_cohort"] = xr.DataArray(np.array([0, 1, 1]), dims="samples")
224+
ds = divergence(ds)
225+
np.testing.assert_equal(ds["stat_divergence"].values[0, 1], np.nan)
226+
227+
215228
@pytest.mark.parametrize("sample_size", [2, 3, 10, 100])
216229
def test_Fst__Hudson(sample_size):
217230
# scikit-allel can only calculate Fst for pairs of cohorts (populations)

0 commit comments

Comments
 (0)