Skip to content

Commit 53352af

Browse files
committed
Updates for data var rename
1 parent 3c8ef95 commit 53352af

File tree

9 files changed

+18
-17
lines changed

9 files changed

+18
-17
lines changed

sgkit/stats/regenie.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ def regenie(
841841
G = ds[dosage]
842842
X = da.asarray(concat_2d(ds[list(covariates)], dims=("samples", "covariates")))
843843
Y = da.asarray(concat_2d(ds[list(traits)], dims=("samples", "traits")))
844-
contigs = ds["variant/contig"]
844+
contigs = ds["variant_contig"]
845845
return regenie_transform(
846846
G.T,
847847
X,

sgkit/tests/test_regenie.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from sgkit.testing import simulate_genotype_call_dataset
3030

3131
regenie_sim = functools.partial(
32-
regenie, dosage="call/dosage", covariates="sample/covariate", traits="sample/trait"
32+
regenie, dosage="call_dosage", covariates="sample_covariate", traits="sample_trait"
3333
)
3434

3535

@@ -46,16 +46,16 @@ def simulate_regression_dataset(
4646
ds = simulate_genotype_call_dataset(
4747
n_variant=n_variant, n_sample=n_sample, n_contig=n_contig
4848
)
49-
G = ds["call/genotype"].sum(dim="ploidy")
49+
G = ds["call_genotype"].sum(dim="ploidy")
5050
X = rs.normal(size=(n_sample, n_covariate))
5151
Y = (
5252
G.T.data @ rs.normal(size=(G.shape[0], n_trait))
5353
+ X @ rs.normal(size=(n_covariate, n_trait))
5454
+ rs.normal(size=(n_sample, 1), scale=noise_scale)
5555
)
56-
ds["call/dosage"] = G
57-
ds["sample/covariate"] = (("samples", "covariates"), X)
58-
ds["sample/trait"] = (("samples", "traits"), Y)
56+
ds["call_dosage"] = G
57+
ds["sample_covariate"] = (("samples", "covariates"), X)
58+
ds["sample_trait"] = (("samples", "traits"), Y)
5959
return ds
6060

6161

@@ -185,7 +185,7 @@ def prepare_stage_3_sgkit_results(
185185
for k, v in dataclasses.asdict(stats).items()
186186
}
187187
)
188-
dsr = dsr.merge(ds[["variant/id"]].rename({"variant/id": "variant_id"}))
188+
dsr = dsr.merge(ds[["variant_id"]])
189189
dsr = dsr.assign(outcome=xr.DataArray(df_trait.columns, dims=("outcomes")))
190190
df = dsr.to_dataframe().reset_index(drop=True) # type: ignore[no-untyped-call]
191191
return df
@@ -244,12 +244,10 @@ def check_simulation_result(
244244
# Load simulated data
245245
with zarr.ZipStore(str(dataset_dir / "genotypes.zarr.zip"), mode="r") as store:
246246
ds = xr.open_zarr(store) # type: ignore[no-untyped-call]
247-
# Temporary workaround for https://github.com/pystatgen/sgkit/issues/62
248-
ds = ds.rename_vars({v: v.replace("-", "/") for v in ds})
249247
df_covariate = load_covariates(dataset_dir)
250248
df_trait = load_traits(dataset_dir)
251-
contigs = ds["variant/contig"].values
252-
G = ds["call/genotype"].sum(dim="ploidy").values
249+
contigs = ds["variant_contig"].values
250+
G = ds["call_genotype"].sum(dim="ploidy").values
253251
X = df_covariate.values
254252
Y = df_trait.values
255253

@@ -308,7 +306,7 @@ def test_regenie__32bit_float(ds):
308306
ds = ds.assign(
309307
{
310308
v: ds[v].astype(np.float32)
311-
for v in ["call/dosage", "sample/covariate", "sample/trait"]
309+
for v in ["call_dosage", "sample_covariate", "sample_trait"]
312310
}
313311
)
314312
# Ensure that a uniform demotion in types for input arrays (aside from contigs)
Binary file not shown.
Binary file not shown.

validation/gwas/method/regenie/README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,5 @@ invoke export --runs sim_sm_02-wgr_02 --runs sim_sm_01-wgr_01
2626

2727
### Glow WGR Release
2828

29-
This validation was run for [glow.py==0.5.0](https://pypi.org/project/glow.py/0.5.0/). At this time, binary traits are not yet supported and the REGENIE implementation hasn't even been officially released. Support for [binary traits should come in the next release](https://github.com/projectglow/glow/issues/256) along with official support at which time this validation should be updated. From that point onward, there is little need to update this data unless either implementation (sgkit or Glow) has been shown to be incorrect.
29+
This validation was run for [glow.py==0.5.0](https://pypi.org/project/glow.py/0.5.0/). At this time, binary traits are not yet supported and the REGENIE implementation hasn't even been officially released. Support for [binary traits should come in the next release](https://github.com/projectglow/glow/issues/256) along with official support at which time this validation should be updated. From that point onward, there is little need to update this data unless either implementation (sgkit or Glow) has been shown to be incorrect.
30+

validation/gwas/method/regenie/glow_wgr.py

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import io
88
import logging
9+
import logging.config
910
import shutil
1011
from pathlib import Path
1112
from typing import Dict, List, Optional

validation/gwas/method/regenie/hail_sim.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# coding: utf-8
33
import io
44
import logging
5+
import logging.config
56
import shutil
67
from pathlib import Path
78

validation/gwas/method/regenie/sgkit_zarr.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# coding: utf-8
33

44
import logging
5+
import logging.config
56
from pathlib import Path
67

78
import fire
@@ -17,9 +18,7 @@ def run(dataset: str, dataset_dir="data/dataset"):
1718
dataset_dir = Path(dataset_dir)
1819
plink_path = dataset_dir / dataset / "genotypes"
1920
zarr_path = dataset_dir / dataset / "genotypes.zarr.zip"
20-
ds = read_plink(plink_path, bim_sep="\t", fam_sep="\t")
21-
# Temporary workaround for https://github.com/pystatgen/sgkit/issues/62
22-
ds = ds.rename_vars({v: v.replace("/", "-") for v in ds})
21+
ds = read_plink(path=plink_path, bim_sep="\t", fam_sep="\t")
2322
# Pre-compute string lengths until this is done:
2423
# https://github.com/pystatgen/sgkit-plink/issues/12
2524
ds = ds.compute()

validation/gwas/method/regenie/tasks.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import glob
22
import logging
3+
import logging.config
34
import os
45
import shutil
56
from pathlib import Path
@@ -73,7 +74,7 @@ def copy_files(src, dst, patterns):
7374
dst.mkdir(parents=True, exist_ok=True)
7475
files = [Path(f) for pattern in patterns for f in glob.glob(str(src / pattern))]
7576
for f in files:
76-
logger.info("\tCopying path: {f}")
77+
logger.info(f"\tCopying path: {f}")
7778
if f.is_dir():
7879
shutil.copytree(f, dst / f.name)
7980
else:

0 commit comments

Comments
 (0)