Skip to content

Commit e286e75

Browse files
timothymillarmergify[bot]
authored andcommitted
Handle samples with no cohort in observed_heterozygosity
1 parent c0bc06a commit e286e75

File tree

2 files changed

+14
-10
lines changed

2 files changed

+14
-10
lines changed

sgkit/stats/popgen.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -952,8 +952,9 @@ def _cohort_observed_heterozygosity(
952952
h = hi[i]
953953
if not np.isnan(h):
954954
c = cohorts[i]
955-
out[c] += h
956-
_[c] += 1
955+
if c >= 0:
956+
out[c] += h
957+
_[c] += 1
957958
for j in range(n_cohorts):
958959
n = _[j]
959960
if n != 0:

sgkit/tests/test_popgen.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,15 @@ def test_observed_heterozygosity(chunks):
577577

578578

579579
@pytest.mark.parametrize("chunks", [((4,), (6,), (4,)), ((2, 2), (3, 3), (2, 2))])
580-
def test_observed_heterozygosity__windowed(chunks):
580+
@pytest.mark.parametrize(
581+
"cohorts,expectation",
582+
[
583+
([0, 0, 1, 1, 2, 2], [[1 / 4, 2 / 3, 2 / 3], [1 / 2, 5 / 3, np.nan]]),
584+
([2, 2, 1, 1, 0, 0], [[2 / 3, 2 / 3, 1 / 4], [np.nan, 5 / 3, 1 / 2]]),
585+
([-1, -1, 1, 1, 0, 0], [[2 / 3, 2 / 3], [np.nan, 5 / 3]]),
586+
],
587+
)
588+
def test_observed_heterozygosity__windowed(chunks, cohorts, expectation):
581589
ds = simulate_genotype_call_dataset(
582590
n_variant=4,
583591
n_sample=6,
@@ -625,18 +633,13 @@ def test_observed_heterozygosity__windowed(chunks):
625633
ds.call_genotype_mask.values = ds.call_genotype < 0
626634
ds["sample_cohort"] = (
627635
["samples"],
628-
da.asarray([0, 0, 1, 1, 2, 2]).rechunk(chunks[1]),
636+
da.asarray(cohorts).rechunk(chunks[1]),
629637
)
630638
ds = window(ds, size=2)
631639
ho = observed_heterozygosity(ds)["stat_observed_heterozygosity"]
632640
np.testing.assert_almost_equal(
633641
ho,
634-
np.array(
635-
[
636-
[1 / 4, 2 / 3, 2 / 3],
637-
[1 / 2, 5 / 3, np.nan],
638-
]
639-
),
642+
np.array(expectation),
640643
)
641644

642645

0 commit comments

Comments
 (0)