Skip to content

Commit 31dc606

Browse files
tomwhitemergify[bot]
authored andcommitted
Rename 'call_genotype_non_allele' to 'call_genotype_fill'
1 parent 8f04f0b commit 31dc606

File tree

5 files changed

+28
-33
lines changed

5 files changed

+28
-33
lines changed

docs/api.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ By convention, variable names are singular in sgkit. For example, ``genotype_cou
133133
variables.call_genotype_complete_mask_spec
134134
variables.call_genotype_spec
135135
variables.call_genotype_mask_spec
136-
variables.call_genotype_non_allele_spec
136+
variables.call_genotype_fill_spec
137137
variables.call_genotype_phased_spec
138138
variables.call_genotype_probability_spec
139139
variables.call_genotype_probability_mask_spec

sgkit/io/vcf/vcf_reader.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -340,10 +340,10 @@ def update_dataset(self, ds: xr.Dataset) -> None:
340340
{"comment": variables.call_genotype_mask_spec.__doc__.strip()},
341341
)
342342
if self.mixed_ploidy is True:
343-
ds["call_genotype_non_allele"] = (
343+
ds["call_genotype_fill"] = (
344344
[DIM_VARIANT, DIM_SAMPLE, DIM_PLOIDY],
345345
self.call_genotype < -1,
346-
{"comment": variables.call_genotype_non_allele_spec.__doc__.strip()},
346+
{"comment": variables.call_genotype_fill_spec.__doc__.strip()},
347347
)
348348
ds["call_genotype_phased"] = (
349349
[DIM_VARIANT, DIM_SAMPLE],
@@ -642,7 +642,7 @@ def vcf_to_zarrs(
642642
The (maximum) ploidy of genotypes in the VCF file.
643643
mixed_ploidy
644644
If True, genotype calls with fewer alleles than the specified ploidy will be padded
645-
with the non-allele sentinel value of -2. If false, calls with fewer alleles than
645+
with the fill (non-allele) sentinel value of -2. If false, calls with fewer alleles than
646646
the specified ploidy will be treated as incomplete and will be padded with the
647647
missing-allele sentinel value of -1.
648648
truncate_calls
@@ -845,7 +845,7 @@ def vcf_to_zarr(
845845
The (maximum) ploidy of genotypes in the VCF file.
846846
mixed_ploidy
847847
If True, genotype calls with fewer alleles than the specified ploidy will be padded
848-
with the non-allele sentinel value of -2. If false, calls with fewer alleles than
848+
with the fill (non-allele) sentinel value of -2. If false, calls with fewer alleles than
849849
the specified ploidy will be treated as incomplete and will be padded with the
850850
missing-allele sentinel value of -1.
851851
truncate_calls

sgkit/stats/aggregation.py

+17-19
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ def sample_stats(
617617
return conditional_merge_datasets(ds, variables.validate(new_ds), merge)
618618

619619

620-
def infer_non_alleles(
620+
def infer_call_genotype_fill(
621621
ds: Dataset,
622622
*,
623623
call_genotype: Hashable = variables.call_genotype,
@@ -626,24 +626,22 @@ def infer_non_alleles(
626626
variables.validate(ds, {call_genotype: variables.call_genotype_spec})
627627
mixed_ploidy = ds[variables.call_genotype].attrs.get("mixed_ploidy", False)
628628
if mixed_ploidy:
629-
call_genotype_non_allele = ds[call_genotype] < -1
629+
call_genotype_fill = ds[call_genotype] < -1
630630
else:
631-
call_genotype_non_allele = xr.full_like(ds[call_genotype], False, "b1")
632-
new_ds = create_dataset(
633-
{variables.call_genotype_non_allele: call_genotype_non_allele}
634-
)
631+
call_genotype_fill = xr.full_like(ds[call_genotype], False, "b1")
632+
new_ds = create_dataset({variables.call_genotype_fill: call_genotype_fill})
635633
return conditional_merge_datasets(ds, variables.validate(new_ds), merge)
636634

637635

638636
def infer_call_ploidy(
639637
ds: Dataset,
640638
*,
641639
call_genotype: Hashable = variables.call_genotype,
642-
call_genotype_non_allele: Hashable = variables.call_genotype_non_allele,
640+
call_genotype_fill: Hashable = variables.call_genotype_fill,
643641
merge: bool = True,
644642
) -> Dataset:
645643
"""Infer the ploidy of each call genotype based on the number of
646-
non-allele values in each call genotype.
644+
fill (non-allele) values in each call genotype.
647645
648646
Parameters
649647
----------
@@ -653,12 +651,12 @@ def infer_call_ploidy(
653651
Input variable name holding call_genotype as defined by
654652
:data:`sgkit.variables.call_genotype_spec`.
655653
Must be present in ``ds``.
656-
call_genotype_non_allele
657-
Input variable name holding call_genotype_non_allele as defined by
658-
:data:`sgkit.variables.call_genotype_non_allele_spec`.
654+
call_genotype_fill
655+
Input variable name holding call_genotype_fill as defined by
656+
:data:`sgkit.variables.call_genotype_fill_spec`.
659657
If the variable is not present in ``ds``, it will be computed
660-
assuming that allele values less than -1 are non-alleles in mixed ploidy
661-
datasets, or that no non-alleles are present in fixed ploidy datasets.
658+
assuming that allele values less than -1 are fill (non-allele) values in mixed ploidy
659+
datasets, or that no fill values are present in fixed ploidy datasets.
662660
merge
663661
If True (the default), merge the input dataset and the computed
664662
output variables into a single dataset, otherwise return only
@@ -671,13 +669,13 @@ def infer_call_ploidy(
671669
"""
672670
ds = define_variable_if_absent(
673671
ds,
674-
variables.call_genotype_non_allele,
675-
call_genotype_non_allele,
676-
infer_non_alleles,
672+
variables.call_genotype_fill,
673+
call_genotype_fill,
674+
infer_call_genotype_fill,
677675
)
678676
mixed_ploidy = ds[variables.call_genotype].attrs.get("mixed_ploidy", False)
679677
if mixed_ploidy:
680-
call_ploidy = (~ds[call_genotype_non_allele]).sum(axis=-1)
678+
call_ploidy = (~ds[call_genotype_fill]).sum(axis=-1)
681679
else:
682680
ploidy = ds[variables.call_genotype].shape[-1]
683681
call_ploidy = xr.full_like(ds[variables.call_genotype][..., 0], ploidy)
@@ -694,7 +692,7 @@ def infer_variant_ploidy(
694692
merge: bool = True,
695693
) -> Dataset:
696694
"""Infer the ploidy at each variant across all samples based on
697-
the number of non-allele values in call genotypes.
695+
the number of fill (non-allele) values in call genotypes.
698696
699697
Parameters
700698
----------
@@ -743,7 +741,7 @@ def infer_sample_ploidy(
743741
merge: bool = True,
744742
) -> Dataset:
745743
"""Infer the ploidy of each sample across all variants based on
746-
the number of non-allele values in call genotypes.
744+
the number of fill (non-allele) values in call genotypes.
747745
748746
Parameters
749747
----------

sgkit/tests/io/vcf/test_vcf_reader.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,7 @@ def test_vcf_to_zarr__mixed_ploidy_vcf(
634634
assert_array_equal(ds["sample_id"], ["SAMPLE1", "SAMPLE2", "SAMPLE3"])
635635

636636
assert ds["call_genotype"].attrs["mixed_ploidy"] == mixed_ploidy
637-
pad = -2 if mixed_ploidy else -1 # -2 indicates a non-allele
637+
pad = -2 if mixed_ploidy else -1 # -2 indicates a fill (non-allele) value
638638
call_genotype = np.array(
639639
[
640640
[[0, 0, 1, 1, pad], [0, 0, pad, pad, pad], [0, 0, 0, 1, pad]],
@@ -648,7 +648,7 @@ def test_vcf_to_zarr__mixed_ploidy_vcf(
648648
assert_array_equal(ds["call_genotype"], call_genotype)
649649
assert_array_equal(ds["call_genotype_mask"], call_genotype < 0)
650650
if mixed_ploidy:
651-
assert_array_equal(ds["call_genotype_non_allele"], call_genotype < -1)
651+
assert_array_equal(ds["call_genotype_fill"], call_genotype < -1)
652652

653653

654654
@pytest.mark.parametrize(
@@ -1141,7 +1141,7 @@ def test_spec(shared_datadir, tmp_path):
11411141
"call_GQ",
11421142
"call_genotype",
11431143
"call_genotype_mask",
1144-
"call_genotype_non_allele",
1144+
"call_genotype_fill",
11451145
"call_genotype_phased",
11461146
"call_HQ",
11471147
"sample_id",

sgkit/variables.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -245,17 +245,14 @@ def _check_field(
245245
)
246246
)
247247

248-
(
249-
call_genotype_non_allele,
250-
call_genotype_non_allele_spec,
251-
) = SgkitVariables.register_variable(
248+
(call_genotype_fill, call_genotype_fill_spec,) = SgkitVariables.register_variable(
252249
ArrayLikeSpec(
253-
"call_genotype_non_allele",
250+
"call_genotype_fill",
254251
kind="b",
255252
ndim=3,
256253
__doc__="""
257254
A flag for each allele position within mixed ploidy call genotypes
258-
indicating non-allele values of lower ploidy calls.
255+
indicating fill (non-allele) values of lower ploidy calls.
259256
""",
260257
)
261258
)

0 commit comments

Comments
 (0)