@@ -152,7 +152,7 @@ def for_field(
152
152
) -> "VcfFieldHandler" :
153
153
if field == "FORMAT/GT" :
154
154
return GenotypeFieldHandler (
155
- vcf , chunk_length , ploidy , mixed_ploidy , truncate_calls
155
+ vcf , chunk_length , ploidy , mixed_ploidy , truncate_calls , max_alt_alleles
156
156
)
157
157
category = field .split ("/" )[0 ]
158
158
vcf_field_defs = _get_vcf_field_defs (vcf , category )
@@ -277,13 +277,15 @@ def __init__(
277
277
ploidy : int ,
278
278
mixed_ploidy : bool ,
279
279
truncate_calls : bool ,
280
+ max_alt_alleles : int ,
280
281
) -> None :
281
282
n_sample = len (vcf .samples )
282
283
self .call_genotype = np .empty ((chunk_length , n_sample , ploidy ), dtype = "i1" )
283
284
self .call_genotype_phased = np .empty ((chunk_length , n_sample ), dtype = bool )
284
285
self .ploidy = ploidy
285
286
self .mixed_ploidy = mixed_ploidy
286
287
self .truncate_calls = truncate_calls
288
+ self .max_alt_alleles = max_alt_alleles
287
289
288
290
def add_variant (self , i : int , variant : Any ) -> None :
289
291
fill = - 2 if self .mixed_ploidy else - 1
@@ -296,6 +298,10 @@ def add_variant(self, i: int, variant: Any) -> None:
296
298
self .call_genotype [i , ..., 0 :n ] = gt [..., 0 :n ]
297
299
self .call_genotype [i , ..., n :] = fill
298
300
self .call_genotype_phased [i ] = gt [..., - 1 ]
301
+
302
+ # set any calls that exceed maximum number of alt alleles as missing
303
+ self .call_genotype [i ][self .call_genotype [i ] > self .max_alt_alleles ] = - 1
304
+
299
305
else :
300
306
self .call_genotype [i ] = fill
301
307
self .call_genotype_phased [i ] = 0
@@ -583,7 +589,8 @@ def vcf_to_zarrs(
583
589
max_alt_alleles
584
590
The (maximum) number of alternate alleles in the VCF file. Any records with more than
585
591
this number of alternate alleles will have the extra alleles dropped (the `variant_allele`
586
- variable will be truncated). Call genotype fields will however be unaffected.
592
+ variable will be truncated). Any call genotype fields with the extra alleles will
593
+ be changed to the missing-allele sentinel value of -1.
587
594
fields
588
595
Extra fields to extract data for. A list of strings, with ``INFO`` or ``FORMAT`` prefixes.
589
596
Wildcards are permitted too, for example: ``["INFO/*", "FORMAT/DP"]``.
@@ -739,7 +746,8 @@ def vcf_to_zarr(
739
746
max_alt_alleles
740
747
The (maximum) number of alternate alleles in the VCF file. Any records with more than
741
748
this number of alternate alleles will have the extra alleles dropped (the `variant_allele`
742
- variable will be truncated). Call genotype fields will however be unaffected.
749
+ variable will be truncated). Any call genotype fields with the extra alleles will
750
+ be changed to the missing-allele sentinel value of -1.
743
751
fields
744
752
Extra fields to extract data for. A list of strings, with ``INFO`` or ``FORMAT`` prefixes.
745
753
Wildcards are permitted too, for example: ``["INFO/*", "FORMAT/DP"]``.
0 commit comments