File tree 1 file changed +32
-0
lines changed
1 file changed +32
-0
lines changed Original file line number Diff line number Diff line change 4
4
import time
5
5
6
6
from sgkit .io .vcf .vcf_reader import vcf_to_zarr
7
+ from sgkit .io .vcf .vcf_writer import zarr_to_vcf
7
8
from sgkit .tests .io .vcf .utils import path_for_test
8
9
9
10
@@ -39,6 +40,37 @@ def test_vcf_read_speed(shared_datadir, tmp_path):
39
40
print (f"speed: { speed :.1f} MB/s" )
40
41
41
42
43
+ def test_vcf_write_speed (shared_datadir , tmp_path ):
44
+ path = path_for_test (
45
+ shared_datadir ,
46
+ "1000G.phase3.broad.withGenotypes.chr20.10100000.vcf.gz" ,
47
+ )
48
+ output_zarr = tmp_path .joinpath ("1000G.zarr" ).as_posix ()
49
+ output_vcf = tmp_path .joinpath ("1000G.vcf" ).as_posix ()
50
+
51
+ field_defs = {
52
+ "FORMAT/AD" : {"Number" : "R" },
53
+ }
54
+ vcf_to_zarr (
55
+ path ,
56
+ output_zarr ,
57
+ fields = ["INFO/*" , "FORMAT/*" ],
58
+ field_defs = field_defs ,
59
+ chunk_length = 1_000 ,
60
+ )
61
+
62
+ # throw away first run due to numba jit compilation
63
+ for _ in range (2 ):
64
+ duration = time_func (zarr_to_vcf , output_zarr , output_vcf )
65
+
66
+ bytes_written = os .path .getsize (output_vcf )
67
+ speed = bytes_written / (1_000_000 * duration )
68
+
69
+ print (f"bytes written: { bytes_written } " )
70
+ print (f"duration: { duration :.2f} s" )
71
+ print (f"speed: { speed :.1f} MB/s" )
72
+
73
+
42
74
def time_func (func , * args , ** kwargs ):
43
75
start = time .time ()
44
76
func (* args , ** kwargs )
You can’t perform that action at this time.
0 commit comments