Skip to content

Commit 660c694

Browse files
daletovarjeromekelleher
authored andcommitted
add functions to rst and avoid modifying input ds
1 parent 96459db commit 660c694

File tree

2 files changed

+23
-11
lines changed

2 files changed

+23
-11
lines changed

docs/api.rst

+4
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,14 @@ Methods
2424

2525
count_call_alleles
2626
count_variant_alleles
27+
divergence
28+
diversity
29+
Fst
2730
gwas_linear_regression
2831
hardy_weinberg_test
2932
regenie
3033
variant_stats
34+
Tajimas_D
3135

3236
Utilities
3337
=========

sgkit/stats/popgen.py

+19-11
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ def diversity(
3737
if len(ds.samples) < 2:
3838
return xr.DataArray(np.nan)
3939
if allele_counts not in ds:
40-
ds = count_variant_alleles(ds)
41-
ac = ds[allele_counts]
40+
ds_new = count_variant_alleles(ds)
41+
else:
42+
ds_new = ds
43+
ac = ds_new[allele_counts]
4244
an = ac.sum(axis=1)
4345
n_pairs = an * (an - 1) / 2
4446
n_same = (ac * (ac - 1) / 2).sum(axis=1)
@@ -67,13 +69,17 @@ def divergence(
6769
divergence value between the two datasets.
6870
"""
6971
if allele_counts not in ds1:
70-
ds1 = count_variant_alleles(ds1)
71-
ac1 = ds1[allele_counts]
72+
ds1_new = count_variant_alleles(ds1)
73+
else:
74+
ds1_new = ds1
75+
ac1 = ds1_new[allele_counts]
7276
if allele_counts not in ds2:
73-
ds2 = count_variant_alleles(ds2)
74-
ac2 = ds2[allele_counts]
75-
an1 = ds1[allele_counts].sum(axis=1)
76-
an2 = ds2[allele_counts].sum(axis=1)
77+
ds2_new = count_variant_alleles(ds2)
78+
else:
79+
ds2_new = ds2
80+
ac2 = ds2_new[allele_counts]
81+
an1 = ds1_new[allele_counts].sum(axis=1)
82+
an2 = ds2_new[allele_counts].sum(axis=1)
7783

7884
n_pairs = an1 * an2
7985
n_same = (ac1 * ac2).sum(axis=1)
@@ -126,8 +132,10 @@ def Tajimas_D(
126132
Tajimas' D value.
127133
"""
128134
if allele_counts not in ds:
129-
ds = count_variant_alleles(ds)
130-
ac = ds[allele_counts]
135+
ds_new = count_variant_alleles(ds)
136+
else:
137+
ds_new = ds
138+
ac = ds_new[allele_counts]
131139

132140
# count segregating
133141
S = ((ac > 0).sum(axis=1) > 1).sum()
@@ -142,7 +150,7 @@ def Tajimas_D(
142150
theta = S / a1
143151

144152
# calculate diversity
145-
div = diversity(ds)
153+
div = diversity(ds_new)
146154

147155
# N.B., both theta estimates are usually divided by the number of
148156
# (accessible) bases but here we want the absolute difference

0 commit comments

Comments
 (0)