@@ -255,7 +255,7 @@ def test_Fst__unknown_estimator():
255
255
256
256
@pytest .mark .parametrize (
257
257
"sample_size, n_cohorts" ,
258
- [(10 , 2 )],
258
+ [(10 , 2 ), ( 10 , 3 ) ],
259
259
)
260
260
@pytest .mark .parametrize ("chunks" , [(- 1 , - 1 ), (50 , - 1 )])
261
261
def test_Fst__windowed (sample_size , n_cohorts , chunks ):
@@ -280,16 +280,18 @@ def test_Fst__windowed(sample_size, n_cohorts, chunks):
280
280
281
281
np .testing .assert_allclose (fst , ts_fst )
282
282
283
+ # scikit-allel
283
284
fst_ds = Fst (ds , estimator = "Hudson" )
284
- fst = fst_ds ["stat_Fst" ].sel (cohorts_0 = "co_0" , cohorts_1 = "co_1" ).values
285
+ for i , j in itertools .combinations (range (n_cohorts ), 2 ):
286
+ fst = fst_ds ["stat_Fst" ].sel (cohorts_0 = f"co_{ i } " , cohorts_1 = f"co_{ j } " ).values
285
287
286
- ac1 = fst_ds .cohort_allele_count .values [:, 0 , :]
287
- ac2 = fst_ds .cohort_allele_count .values [:, 1 , :]
288
- ska_fst = allel .moving_hudson_fst (ac1 , ac2 , size = 25 )
288
+ ac_i = fst_ds .cohort_allele_count .values [:, i , :]
289
+ ac_j = fst_ds .cohort_allele_count .values [:, j , :]
290
+ ska_fst = allel .moving_hudson_fst (ac_i , ac_j , size = 25 )
289
291
290
- np .testing .assert_allclose (
291
- fst [:- 1 ], ska_fst
292
- ) # scikit-allel has final window missing
292
+ np .testing .assert_allclose (
293
+ fst [:- 1 ], ska_fst
294
+ ) # scikit-allel has final window missing
293
295
294
296
295
297
@pytest .mark .parametrize ("sample_size" , [2 , 3 , 10 , 100 ])
@@ -307,56 +309,63 @@ def test_Tajimas_D(sample_size):
307
309
308
310
@pytest .mark .parametrize (
309
311
"sample_size, n_cohorts" ,
310
- [(10 , 3 )],
312
+ [(10 , 3 ), ( 20 , 4 ) ],
311
313
)
312
314
def test_pbs (sample_size , n_cohorts ):
313
315
ts = msprime .simulate (sample_size , length = 100 , mutation_rate = 0.05 , random_seed = 42 )
314
316
ds = ts_to_dataset (ts ) # type: ignore[no-untyped-call]
315
- ds , subsets = add_cohorts (ds , ts , n_cohorts ) # type: ignore[no-untyped-call]
317
+ ds , subsets = add_cohorts (ds , ts , n_cohorts , cohort_key_names = [ "cohorts_0" , "cohorts_1" , "cohorts_2" ] ) # type: ignore[no-untyped-call]
316
318
n_variants = ds .dims ["variants" ]
317
319
ds = window (ds , size = n_variants ) # single window
318
320
319
321
ds = pbs (ds )
320
- stat_pbs = ds ["stat_pbs" ]
321
322
322
323
# scikit-allel
323
- ac1 = ds .cohort_allele_count .values [:, 0 , :]
324
- ac2 = ds .cohort_allele_count .values [:, 1 , :]
325
- ac3 = ds .cohort_allele_count .values [:, 2 , :]
326
-
327
- ska_pbs_value = np .full ([1 , n_cohorts , n_cohorts , n_cohorts ], np .nan )
328
324
for i , j , k in itertools .combinations (range (n_cohorts ), 3 ):
329
- ska_pbs_value [0 , i , j , k ] = allel .pbs (ac1 , ac2 , ac3 , window_size = n_variants )
325
+ stat_pbs = (
326
+ ds ["stat_pbs" ]
327
+ .sel (cohorts_0 = f"co_{ i } " , cohorts_1 = f"co_{ j } " , cohorts_2 = f"co_{ k } " )
328
+ .values
329
+ )
330
330
331
- np .testing .assert_allclose (stat_pbs , ska_pbs_value )
331
+ ac_i = ds .cohort_allele_count .values [:, i , :]
332
+ ac_j = ds .cohort_allele_count .values [:, j , :]
333
+ ac_k = ds .cohort_allele_count .values [:, k , :]
334
+
335
+ ska_pbs_value = allel .pbs (ac_i , ac_j , ac_k , window_size = n_variants )
336
+
337
+ np .testing .assert_allclose (stat_pbs , ska_pbs_value )
332
338
333
339
334
340
@pytest .mark .parametrize (
335
341
"sample_size, n_cohorts" ,
336
- [(10 , 3 )],
342
+ [(10 , 3 ), ( 20 , 4 ) ],
337
343
)
338
344
@pytest .mark .parametrize ("chunks" , [(- 1 , - 1 ), (50 , - 1 )])
339
345
def test_pbs__windowed (sample_size , n_cohorts , chunks ):
340
346
ts = msprime .simulate (sample_size , length = 200 , mutation_rate = 0.05 , random_seed = 42 )
341
347
ds = ts_to_dataset (ts , chunks ) # type: ignore[no-untyped-call]
342
- ds , subsets = add_cohorts (ds , ts , n_cohorts ) # type: ignore[no-untyped-call]
348
+ ds , subsets = add_cohorts (ds , ts , n_cohorts , cohort_key_names = [ "cohorts_0" , "cohorts_1" , "cohorts_2" ] ) # type: ignore[no-untyped-call]
343
349
ds = window (ds , size = 25 )
344
350
345
351
ds = pbs (ds )
346
- stat_pbs = ds ["stat_pbs" ].values
347
352
348
353
# scikit-allel
349
- ac1 = ds .cohort_allele_count .values [:, 0 , :]
350
- ac2 = ds .cohort_allele_count .values [:, 1 , :]
351
- ac3 = ds .cohort_allele_count .values [:, 2 , :]
352
-
353
- # scikit-allel has final window missing
354
- n_windows = ds .dims ["windows" ] - 1
355
- ska_pbs_value = np .full ([n_windows , n_cohorts , n_cohorts , n_cohorts ], np .nan )
356
354
for i , j , k in itertools .combinations (range (n_cohorts ), 3 ):
357
- ska_pbs_value [:, i , j , k ] = allel .pbs (ac1 , ac2 , ac3 , window_size = 25 )
355
+ stat_pbs = (
356
+ ds ["stat_pbs" ]
357
+ .sel (cohorts_0 = f"co_{ i } " , cohorts_1 = f"co_{ j } " , cohorts_2 = f"co_{ k } " )
358
+ .values
359
+ )
360
+
361
+ ac_i = ds .cohort_allele_count .values [:, i , :]
362
+ ac_j = ds .cohort_allele_count .values [:, j , :]
363
+ ac_k = ds .cohort_allele_count .values [:, k , :]
364
+
365
+ ska_pbs_value = allel .pbs (ac_i , ac_j , ac_k , window_size = 25 , window_step = 25 )
358
366
359
- np .testing .assert_allclose (stat_pbs [:- 1 ], ska_pbs_value )
367
+ # scikit-allel has final window missing
368
+ np .testing .assert_allclose (stat_pbs [:- 1 ], ska_pbs_value )
360
369
361
370
362
371
@pytest .mark .parametrize (
0 commit comments