8
8
from xarray import Dataset
9
9
10
10
from sgkit import variables
11
+ from sgkit .stats .aggregation import genotype_count
11
12
from sgkit .utils import conditional_merge_datasets
12
13
13
14
@@ -129,7 +130,9 @@ def hardy_weinberg_test(
129
130
genotype_counts : Optional [Hashable ] = None ,
130
131
call_genotype : Hashable = variables .call_genotype ,
131
132
call_genotype_mask : Hashable = variables .call_genotype_mask ,
132
- merge : bool = True ,
133
+ ploidy : Optional [int ] = None ,
134
+ alleles : Optional [int ] = None ,
135
+ merge : bool = True
133
136
) -> Dataset :
134
137
"""Exact test for HWE as described in Wigginton et al. 2005 [1].
135
138
@@ -150,6 +153,16 @@ def hardy_weinberg_test(
150
153
call_genotype_mask
151
154
Input variable name holding call_genotype_mask.
152
155
Defined by :data:`sgkit.variables.call_genotype_mask_spec`
156
+ ploidy
157
+ Genotype ploidy, defaults to ``ploidy`` dimension of provided dataset.
158
+ If the `ploidy` dimension is not present, then this value must be set explicitly.
159
+ Currently HWE calculations are only supported for diploid datasets,
160
+ i.e. ``ploidy`` must equal 2.
161
+ alleles
162
+ Genotype allele count, defaults to ``alleles`` dimension of provided dataset.
163
+ If the `alleles` dimension is not present, then this value must be set explicitly.
164
+ Currently HWE calculations are only supported for biallelic datasets,
165
+ i.e. ``alleles`` must equal 2.
153
166
merge
154
167
If True (the default), merge the input dataset and the computed
155
168
output variables into a single dataset, otherwise return only
@@ -163,8 +176,9 @@ def hardy_weinberg_test(
163
176
Returns
164
177
-------
165
178
Dataset containing (N = num variants):
179
+
166
180
variant_hwe_p_value : [array-like, shape: (N, O)]
167
- P values from HWE test for each variant as float in [0, 1].
181
+ P values from HWE test for each variant as float in [0, 1].
168
182
169
183
References
170
184
----------
@@ -179,10 +193,22 @@ def hardy_weinberg_test(
179
193
NotImplementedError
180
194
If maximum number of alleles in provided dataset != 2
181
195
"""
182
- if ds .dims ["ploidy" ] != 2 :
196
+ ploidy = ploidy or ds .dims .get ("ploidy" )
197
+ if not ploidy :
198
+ raise ValueError (
199
+ "`ploidy` parameter must be set when not present as dataset dimension."
200
+ )
201
+ if ploidy != 2 :
183
202
raise NotImplementedError ("HWE test only implemented for diploid genotypes" )
184
- if ds .dims ["alleles" ] != 2 :
203
+
204
+ alleles = alleles or ds .dims .get ("alleles" )
205
+ if not alleles :
206
+ raise ValueError (
207
+ "`alleles` parameter must be set when not present as dataset dimension."
208
+ )
209
+ if alleles != 2 :
185
210
raise NotImplementedError ("HWE test only implemented for biallelic genotypes" )
211
+
186
212
# Use precomputed genotype counts if provided
187
213
if genotype_counts is not None :
188
214
variables .validate (ds , {genotype_counts : variables .genotype_counts_spec })
@@ -196,12 +222,16 @@ def hardy_weinberg_test(
196
222
call_genotype : variables .call_genotype_spec ,
197
223
},
198
224
)
199
- # TODO: Use API genotype counting function instead, e.g.
200
- # https://github.com/pystatgen/sgkit/issues/29#issuecomment-656691069
201
- M = ds [call_genotype_mask ].any (dim = "ploidy" )
202
- AC = xr .where (M , - 1 , ds [call_genotype ].sum (dim = "ploidy" )) # type: ignore[no-untyped-call]
203
- cts = [1 , 0 , 2 ] # arg order: hets, hom1, hom2
204
- obs = [da .asarray ((AC == ct ).sum (dim = "samples" )) for ct in cts ]
225
+ ds_ct = genotype_count (
226
+ ds ,
227
+ dim = "samples" ,
228
+ call_genotype = call_genotype ,
229
+ call_genotype_mask = call_genotype_mask ,
230
+ )
231
+ obs = [
232
+ da .asarray (ds_ct [v ])
233
+ for v in ["variant_n_het" , "variant_n_hom_ref" , "variant_n_hom_alt" ]
234
+ ]
205
235
p = da .map_blocks (hardy_weinberg_p_value_vec_jit , * obs )
206
236
new_ds = xr .Dataset ({variables .variant_hwe_p_value : ("variants" , p )})
207
237
return conditional_merge_datasets (ds , variables .validate (new_ds ), merge )
0 commit comments