@@ -362,18 +362,25 @@ def test_pbs__windowed(sample_size, n_cohorts, chunks):
362
362
ac_j = ds .cohort_allele_count .values [:, j , :]
363
363
ac_k = ds .cohort_allele_count .values [:, k , :]
364
364
365
- ska_pbs_value = allel .pbs (ac_i , ac_j , ac_k , window_size = 25 , window_step = 25 )
365
+ ska_pbs_value = allel .pbs (ac_i , ac_j , ac_k , window_size = 25 )
366
366
367
367
# scikit-allel has final window missing
368
368
np .testing .assert_allclose (stat_pbs [:- 1 ], ska_pbs_value )
369
369
370
370
371
371
@pytest .mark .parametrize (
372
- "n_variants, n_samples, n_contigs, n_cohorts" ,
373
- [(9 , 5 , 1 , 1 ), (9 , 5 , 1 , 2 )],
372
+ "n_variants, n_samples, n_contigs, n_cohorts, cohorts, cohort_indexes" ,
373
+ [
374
+ (9 , 5 , 1 , 1 , None , None ),
375
+ (9 , 5 , 1 , 2 , None , None ),
376
+ (9 , 5 , 1 , 2 , [1 ], [1 ]),
377
+ (9 , 5 , 1 , 2 , ["co_1" ], [1 ]),
378
+ ],
374
379
)
375
380
@pytest .mark .parametrize ("chunks" , [(- 1 , - 1 ), (5 , - 1 )])
376
- def test_Garud_h (n_variants , n_samples , n_contigs , n_cohorts , chunks ):
381
+ def test_Garud_h (
382
+ n_variants , n_samples , n_contigs , n_cohorts , cohorts , cohort_indexes , chunks
383
+ ):
377
384
ds = simulate_genotype_call_dataset (
378
385
n_variant = n_variants , n_sample = n_samples , n_contig = n_contigs
379
386
)
@@ -383,25 +390,37 @@ def test_Garud_h(n_variants, n_samples, n_contigs, n_cohorts, chunks):
383
390
[np .full_like (subset , i ) for i , subset in enumerate (subsets )]
384
391
)
385
392
ds ["sample_cohort" ] = xr .DataArray (sample_cohorts , dims = "samples" )
393
+ cohort_names = [f"co_{ i } " for i in range (n_cohorts )]
394
+ coords = {k : cohort_names for k in ["cohorts" ]}
395
+ ds = ds .assign_coords (coords ) # type: ignore[no-untyped-call]
386
396
ds = window (ds , size = 3 )
387
397
388
- gh = Garud_h (ds )
398
+ gh = Garud_h (ds , cohorts = cohorts )
389
399
h1 = gh .stat_Garud_h1 .values
390
400
h12 = gh .stat_Garud_h12 .values
391
401
h123 = gh .stat_Garud_h123 .values
392
402
h2_h1 = gh .stat_Garud_h2_h1 .values
393
403
394
404
# scikit-allel
395
405
for c in range (n_cohorts ):
396
- gt = ds .call_genotype .values [:, sample_cohorts == c , :]
397
- ska_gt = allel .GenotypeArray (gt )
398
- ska_ha = ska_gt .to_haplotypes ()
399
- ska_h = allel .moving_garud_h (ska_ha , size = 3 )
400
-
401
- np .testing .assert_allclose (h1 [:, c ], ska_h [0 ])
402
- np .testing .assert_allclose (h12 [:, c ], ska_h [1 ])
403
- np .testing .assert_allclose (h123 [:, c ], ska_h [2 ])
404
- np .testing .assert_allclose (h2_h1 [:, c ], ska_h [3 ])
406
+ if cohort_indexes is not None and c not in cohort_indexes :
407
+ # cohorts that were not computed should be nan
408
+ np .testing .assert_array_equal (h1 [:, c ], np .full_like (h1 [:, c ], np .nan ))
409
+ np .testing .assert_array_equal (h12 [:, c ], np .full_like (h12 [:, c ], np .nan ))
410
+ np .testing .assert_array_equal (h123 [:, c ], np .full_like (h123 [:, c ], np .nan ))
411
+ np .testing .assert_array_equal (
412
+ h2_h1 [:, c ], np .full_like (h2_h1 [:, c ], np .nan )
413
+ )
414
+ else :
415
+ gt = ds .call_genotype .values [:, sample_cohorts == c , :]
416
+ ska_gt = allel .GenotypeArray (gt )
417
+ ska_ha = ska_gt .to_haplotypes ()
418
+ ska_h = allel .moving_garud_h (ska_ha , size = 3 )
419
+
420
+ np .testing .assert_allclose (h1 [:, c ], ska_h [0 ])
421
+ np .testing .assert_allclose (h12 [:, c ], ska_h [1 ])
422
+ np .testing .assert_allclose (h123 [:, c ], ska_h [2 ])
423
+ np .testing .assert_allclose (h2_h1 [:, c ], ska_h [3 ])
405
424
406
425
407
426
def test_Garud_h__raise_on_non_diploid ():
0 commit comments