Skip to content

Commit 8f04f0b

Browse files
tomwhitemergify[bot]
authored andcommitted
Handle case where VCF filter is not defined in the header
1 parent 34d640a commit 8f04f0b

File tree

3 files changed

+30
-3
lines changed

3 files changed

+30
-3
lines changed

sgkit/io/vcf/vcf_reader.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -456,9 +456,11 @@ def vcf_to_zarr_sequential(
456456
variant_quality[i] = (
457457
variant.QUAL if variant.QUAL is not None else FLOAT32_MISSING
458458
)
459-
for f in variant.FILTERS:
460-
variant_filter[i][filters.index(f)] = True
461-
459+
try:
460+
for f in variant.FILTERS:
461+
variant_filter[i][filters.index(f)] = True
462+
except ValueError:
463+
raise ValueError(f"Filter '{f}' is not defined in the header.")
462464
for field_handler in field_handlers:
463465
field_handler.add_variant(i, variant)
464466

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
##fileformat=VCFv4.2
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##fileDate=20201009
4+
##source=.
5+
##reference=./simple.fasta
6+
##contig=<ID=CHR1,length=60>
7+
##contig=<ID=CHR2,length=60>
8+
##contig=<ID=CHR3,length=60>
9+
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
10+
##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
11+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
12+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2 SAMPLE3
13+
CHR1 2 . A T 60 PASS NS=3;AC=3 GT 0/0 0/0 0/0
14+
CHR1 7 . A C 60 FAIL NS=3;AC=4 GT 0/0 0/1 0/1

sgkit/tests/io/vcf/test_vcf_reader.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,17 @@ def test_vcf_to_zarr__contig_not_defined_in_header(shared_datadir, tmp_path):
727727
vcf_to_zarr(path, output)
728728

729729

730+
def test_vcf_to_zarr__filter_not_defined_in_header(shared_datadir, tmp_path):
731+
path = path_for_test(shared_datadir, "no_filter_defined.vcf")
732+
output = tmp_path.joinpath("vcf.zarr").as_posix()
733+
734+
with pytest.raises(
735+
ValueError,
736+
match=r"Filter 'FAIL' is not defined in the header.",
737+
):
738+
vcf_to_zarr(path, output)
739+
740+
730741
def test_vcf_to_zarr__large_number_of_contigs(shared_datadir, tmp_path):
731742
path = path_for_test(shared_datadir, "Homo_sapiens_assembly38.headerOnly.vcf.gz")
732743
output = tmp_path.joinpath("vcf.zarr").as_posix()

0 commit comments

Comments
 (0)