Skip to content

Commit da4df51

Browse files
authored
Allow variant_id to be 'O' dtype to support variable length strings (sgkit-dev#116)
* Allow sample_id and variant_id to be 'O' dtype to support variable length strings
1 parent 305ce19 commit da4df51

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

sgkit/api.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def create_genotype_call_dataset(
3535
The reference position of the variant.
3636
variant_alleles : array_like, zero-terminated bytes, e.g. "S1", or object
3737
The possible alleles for the variant.
38-
sample_id : array_like, str
38+
sample_id : array_like, str or object
3939
The unique identifier of the sample.
4040
call_genotype : array_like, int
4141
Genotype, encoded as allele values (0 for the reference, 1 for
@@ -44,7 +44,7 @@ def create_genotype_call_dataset(
4444
call_genotype_phased : array_like, bool, optional
4545
A flag for each call indicating if it is phased or not. If
4646
omitted all calls are unphased.
47-
variant_id: array_like, str, optional
47+
variant_id: array_like, str or object, optional
4848
The unique identifier of the variant.
4949
5050
Returns
@@ -56,7 +56,7 @@ def create_genotype_call_dataset(
5656
check_array_like(variant_contig, kind="i", ndim=1)
5757
check_array_like(variant_position, kind="i", ndim=1)
5858
check_array_like(variant_alleles, kind={"S", "O"}, ndim=2)
59-
check_array_like(sample_id, kind="U", ndim=1)
59+
check_array_like(sample_id, kind={"U", "O"}, ndim=1)
6060
check_array_like(call_genotype, kind="i", ndim=3)
6161
data_vars: Dict[Hashable, Any] = {
6262
"variant_contig": ([DIM_VARIANT], variant_contig),
@@ -76,7 +76,7 @@ def create_genotype_call_dataset(
7676
call_genotype_phased,
7777
)
7878
if variant_id is not None:
79-
check_array_like(variant_id, kind="U", ndim=1)
79+
check_array_like(variant_id, kind={"U", "O"}, ndim=1)
8080
data_vars["variant_id"] = ([DIM_VARIANT], variant_id)
8181
attrs: Dict[Hashable, Any] = {"contigs": variant_contig_names}
8282
return xr.Dataset(data_vars=data_vars, attrs=attrs)
@@ -104,12 +104,12 @@ def create_genotype_dosage_dataset(
104104
The reference position of the variant.
105105
variant_alleles : array_like, zero-terminated bytes, e.g. "S1", or object
106106
The possible alleles for the variant.
107-
sample_id : array_like, str
107+
sample_id : array_like, str or object
108108
The unique identifier of the sample.
109109
call_dosage : array_like, float
110110
Dosages, encoded as floats, with NaN indicating a
111111
missing value.
112-
variant_id: array_like, str, optional
112+
variant_id: array_like, str or object, optional
113113
The unique identifier of the variant.
114114
115115
Returns
@@ -121,7 +121,7 @@ def create_genotype_dosage_dataset(
121121
check_array_like(variant_contig, kind="i", ndim=1)
122122
check_array_like(variant_position, kind="i", ndim=1)
123123
check_array_like(variant_alleles, kind={"S", "O"}, ndim=2)
124-
check_array_like(sample_id, kind="U", ndim=1)
124+
check_array_like(sample_id, kind={"U", "O"}, ndim=1)
125125
check_array_like(call_dosage, kind="f", ndim=2)
126126
data_vars: Dict[Hashable, Any] = {
127127
"variant_contig": ([DIM_VARIANT], variant_contig),
@@ -132,7 +132,7 @@ def create_genotype_dosage_dataset(
132132
"call_dosage_mask": ([DIM_VARIANT, DIM_SAMPLE], np.isnan(call_dosage),),
133133
}
134134
if variant_id is not None:
135-
check_array_like(variant_id, kind="U", ndim=1)
135+
check_array_like(variant_id, kind={"U", "O"}, ndim=1)
136136
data_vars["variant_id"] = ([DIM_VARIANT], variant_id)
137137
attrs: Dict[Hashable, Any] = {"contigs": variant_contig_names}
138138
return xr.Dataset(data_vars=data_vars, attrs=attrs)

0 commit comments

Comments
 (0)