Skip to content

Commit 3c8ef95

Browse files
committed
Merge branch 'master' of github.com:pystatgen/sgkit into regenie_wgr
2 parents 3ddbd78 + f61aa84 commit 3c8ef95

File tree

7 files changed

+44
-44
lines changed

7 files changed

+44
-44
lines changed

sgkit/api.py

+15-15
Original file line numberDiff line numberDiff line change
@@ -59,25 +59,25 @@ def create_genotype_call_dataset(
5959
check_array_like(sample_id, kind="U", ndim=1)
6060
check_array_like(call_genotype, kind="i", ndim=3)
6161
data_vars: Dict[Hashable, Any] = {
62-
"variant/contig": ([DIM_VARIANT], variant_contig),
63-
"variant/position": ([DIM_VARIANT], variant_position),
64-
"variant/alleles": ([DIM_VARIANT, DIM_ALLELE], variant_alleles),
65-
"sample/id": ([DIM_SAMPLE], sample_id),
66-
"call/genotype": ([DIM_VARIANT, DIM_SAMPLE, DIM_PLOIDY], call_genotype),
67-
"call/genotype_mask": (
62+
"variant_contig": ([DIM_VARIANT], variant_contig),
63+
"variant_position": ([DIM_VARIANT], variant_position),
64+
"variant_allele": ([DIM_VARIANT, DIM_ALLELE], variant_alleles),
65+
"sample_id": ([DIM_SAMPLE], sample_id),
66+
"call_genotype": ([DIM_VARIANT, DIM_SAMPLE, DIM_PLOIDY], call_genotype),
67+
"call_genotype_mask": (
6868
[DIM_VARIANT, DIM_SAMPLE, DIM_PLOIDY],
6969
call_genotype < 0,
7070
),
7171
}
7272
if call_genotype_phased is not None:
7373
check_array_like(call_genotype_phased, kind="b", ndim=2)
74-
data_vars["call/genotype_phased"] = (
74+
data_vars["call_genotype_phased"] = (
7575
[DIM_VARIANT, DIM_SAMPLE],
7676
call_genotype_phased,
7777
)
7878
if variant_id is not None:
7979
check_array_like(variant_id, kind="U", ndim=1)
80-
data_vars["variant/id"] = ([DIM_VARIANT], variant_id)
80+
data_vars["variant_id"] = ([DIM_VARIANT], variant_id)
8181
attrs: Dict[Hashable, Any] = {"contigs": variant_contig_names}
8282
return xr.Dataset(data_vars=data_vars, attrs=attrs)
8383

@@ -124,15 +124,15 @@ def create_genotype_dosage_dataset(
124124
check_array_like(sample_id, kind="U", ndim=1)
125125
check_array_like(call_dosage, kind="f", ndim=2)
126126
data_vars: Dict[Hashable, Any] = {
127-
"variant/contig": ([DIM_VARIANT], variant_contig),
128-
"variant/position": ([DIM_VARIANT], variant_position),
129-
"variant/alleles": ([DIM_VARIANT, DIM_ALLELE], variant_alleles),
130-
"sample/id": ([DIM_SAMPLE], sample_id),
131-
"call/dosage": ([DIM_VARIANT, DIM_SAMPLE], call_dosage),
132-
"call/dosage_mask": ([DIM_VARIANT, DIM_SAMPLE], np.isnan(call_dosage),),
127+
"variant_contig": ([DIM_VARIANT], variant_contig),
128+
"variant_position": ([DIM_VARIANT], variant_position),
129+
"variant_allele": ([DIM_VARIANT, DIM_ALLELE], variant_alleles),
130+
"sample_id": ([DIM_SAMPLE], sample_id),
131+
"call_dosage": ([DIM_VARIANT, DIM_SAMPLE], call_dosage),
132+
"call_dosage_mask": ([DIM_VARIANT, DIM_SAMPLE], np.isnan(call_dosage),),
133133
}
134134
if variant_id is not None:
135135
check_array_like(variant_id, kind="U", ndim=1)
136-
data_vars["variant/id"] = ([DIM_VARIANT], variant_id)
136+
data_vars["variant_id"] = ([DIM_VARIANT], variant_id)
137137
attrs: Dict[Hashable, Any] = {"contigs": variant_contig_names}
138138
return xr.Dataset(data_vars=data_vars, attrs=attrs)

sgkit/stats/aggregation.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def count_alleles(ds: Dataset) -> DataArray:
1515
1616
Returns
1717
-------
18-
variant/allele_count : DataArray
18+
variant_allele_count : DataArray
1919
Allele counts with shape (variants, alleles) and values
2020
corresponding to the number of non-missing occurrences
2121
of each allele.
@@ -26,7 +26,7 @@ def count_alleles(ds: Dataset) -> DataArray:
2626
>>> import sgkit as sg
2727
>>> from sgkit.testing import simulate_genotype_call_dataset
2828
>>> ds = simulate_genotype_call_dataset(n_variant=4, n_sample=2, seed=1)
29-
>>> ds['call/genotype'].to_series().unstack().astype(str).apply('/'.join, axis=1).unstack() # doctest: +NORMALIZE_WHITESPACE
29+
>>> ds['call_genotype'].to_series().unstack().astype(str).apply('/'.join, axis=1).unstack() # doctest: +NORMALIZE_WHITESPACE
3030
samples 0 1
3131
variants
3232
0 1/0 1/0
@@ -42,8 +42,8 @@ def count_alleles(ds: Dataset) -> DataArray:
4242
"""
4343
# Count each allele index individually as a 1D vector and
4444
# restack into new alleles dimension with same order
45-
G = ds["call/genotype"].stack(calls=("samples", "ploidy"))
46-
M = ds["call/genotype_mask"].stack(calls=("samples", "ploidy"))
45+
G = ds["call_genotype"].stack(calls=("samples", "ploidy"))
46+
M = ds["call_genotype_mask"].stack(calls=("samples", "ploidy"))
4747
n_variant, n_allele = G.shape[0], ds.dims["alleles"]
4848
max_allele = n_allele + 1
4949

@@ -68,4 +68,4 @@ def count_alleles(ds: Dataset) -> DataArray:
6868
AC = CTS.sum(axis=0)[:, :n_allele]
6969
assert AC.shape == (n_variant, n_allele)
7070

71-
return DataArray(data=AC, dims=("variants", "alleles"), name="variant/allele_count")
71+
return DataArray(data=AC, dims=("variants", "alleles"), name="variant_allele_count")

sgkit/stats/association.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def _get_loop_covariates(ds: Dataset, dosage: Optional[str] = None) -> Array:
103103
if dosage is None:
104104
# TODO: This should be (probably gwas-specific) allele
105105
# count with sex chromosome considerations
106-
G = ds["call/genotype"].sum(dim="ploidy") # pragma: no cover
106+
G = ds["call_genotype"].sum(dim="ploidy") # pragma: no cover
107107
else:
108108
G = ds[dosage]
109109
return da.asarray(G.data)
@@ -217,8 +217,8 @@ def gwas_linear_regression(
217217
res = linear_regression(G.T, X, Y)
218218
return xr.Dataset(
219219
{
220-
"variant/beta": (("variants", "traits"), res.beta),
221-
"variant/t_value": (("variants", "traits"), res.t_value),
222-
"variant/p_value": (("variants", "traits"), res.p_value),
220+
"variant_beta": (("variants", "traits"), res.beta),
221+
"variant_t_value": (("variants", "traits"), res.t_value),
222+
"variant_p_value": (("variants", "traits"), res.p_value),
223223
}
224224
)

sgkit/testing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def simulate_genotype_call_dataset(
3737
Number of alleles to simulate
3838
n_contig : int, optional
3939
Number of contigs to partition variants with,
40-
controlling values in `variant/contig`. Values
40+
controlling values in `variant_contig`. Values
4141
will all be 0 by default with `n_contig` == 1.
4242
seed : int, optional
4343
Seed for random number generation

sgkit/tests/test_aggregation.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ def get_dataset(calls: ArrayLike, **kwargs: Any) -> Dataset:
1414
ds = simulate_genotype_call_dataset(
1515
n_variant=calls.shape[0], n_sample=calls.shape[1], **kwargs
1616
)
17-
dims = ds["call/genotype"].dims
18-
ds["call/genotype"] = xr.DataArray(calls, dims=dims)
19-
ds["call/genotype_mask"] = xr.DataArray(calls < 0, dims=dims)
17+
dims = ds["call_genotype"].dims
18+
ds["call_genotype"] = xr.DataArray(calls, dims=dims)
19+
ds["call_genotype_mask"] = xr.DataArray(calls < 0, dims=dims)
2020
return ds
2121

2222

sgkit/tests/test_api.py

+15-15
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ def test_create_genotype_call_dataset():
4141
assert DIM_ALLELE in ds.dims
4242

4343
assert ds.attrs["contigs"] == variant_contig_names
44-
assert_array_equal(ds["variant/contig"], variant_contig)
45-
assert_array_equal(ds["variant/position"], variant_position)
46-
assert_array_equal(ds["variant/alleles"], variant_alleles)
47-
assert_array_equal(ds["variant/id"], variant_id)
48-
assert_array_equal(ds["sample/id"], sample_id)
49-
assert_array_equal(ds["call/genotype"], call_genotype)
50-
assert_array_equal(ds["call/genotype_mask"], call_genotype < 0)
51-
assert_array_equal(ds["call/genotype_phased"], call_genotype_phased)
44+
assert_array_equal(ds["variant_contig"], variant_contig)
45+
assert_array_equal(ds["variant_position"], variant_position)
46+
assert_array_equal(ds["variant_allele"], variant_alleles)
47+
assert_array_equal(ds["variant_id"], variant_id)
48+
assert_array_equal(ds["sample_id"], sample_id)
49+
assert_array_equal(ds["call_genotype"], call_genotype)
50+
assert_array_equal(ds["call_genotype_mask"], call_genotype < 0)
51+
assert_array_equal(ds["call_genotype_phased"], call_genotype_phased)
5252

5353

5454
def test_create_genotype_dosage_dataset():
@@ -73,10 +73,10 @@ def test_create_genotype_dosage_dataset():
7373
assert DIM_SAMPLE in ds.dims
7474

7575
assert ds.attrs["contigs"] == variant_contig_names
76-
assert_array_equal(ds["variant/contig"], variant_contig)
77-
assert_array_equal(ds["variant/position"], variant_position)
78-
assert_array_equal(ds["variant/alleles"], variant_alleles)
79-
assert_array_equal(ds["variant/id"], variant_id)
80-
assert_array_equal(ds["sample/id"], sample_id)
81-
assert_array_equal(ds["call/dosage"], call_dosage)
82-
assert_array_equal(ds["call/dosage_mask"], np.isnan(call_dosage))
76+
assert_array_equal(ds["variant_contig"], variant_contig)
77+
assert_array_equal(ds["variant_position"], variant_position)
78+
assert_array_equal(ds["variant_allele"], variant_alleles)
79+
assert_array_equal(ds["variant_id"], variant_id)
80+
assert_array_equal(ds["sample_id"], sample_id)
81+
assert_array_equal(ds["call_dosage"], call_dosage)
82+
assert_array_equal(ds["call_dosage_mask"], np.isnan(call_dosage))

sgkit/tests/test_association.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def _get_statistics(
129129
res = _sm_statistics(ds, i, add_intercept)
130130
df_pred.append(
131131
dsr.to_dataframe() # type: ignore[no-untyped-call]
132-
.rename(columns=lambda c: c.replace("variant/", ""))
132+
.rename(columns=lambda c: c.replace("variant_", ""))
133133
.iloc[i]
134134
.to_dict()
135135
)

0 commit comments

Comments
 (0)