Skip to content

Commit d05b262

Browse files
committed
First version of metadata parsing in create_zarr_structure (ref #112)
1 parent f3b28e8 commit d05b262

File tree

2 files changed

+82
-24
lines changed

2 files changed

+82
-24
lines changed

fractal/tasks/create_zarr_structure.py

Lines changed: 57 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@
1515
import os
1616
from glob import glob
1717

18+
import pandas as pd
1819
import zarr
20+
from anndata.experimental import write_elem
1921

2022
from fractal.tasks.lib_parse_filename_metadata import parse_metadata
23+
from fractal.tasks.lib_regions_of_interest import prepare_ROIs_table
2124
from fractal.tasks.metadata_parsing import parse_yokogawa_metadata
2225

2326

@@ -28,6 +31,7 @@ def create_zarr_structure(
2831
path_dict_channels=None,
2932
num_levels=None,
3033
coarsening_xy=None,
34+
metadata_table="mlf_mrf",
3135
):
3236

3337
"""
@@ -64,6 +68,21 @@ def create_zarr_structure(
6468
"ERROR in create_zarr_structure_multifov: in_paths is None"
6569
)
6670

71+
# Preliminary checks on metadata_table
72+
if metadata_table != "mlf_mrf" and not isinstance(
73+
metadata_table, pd.core.frame.DataFrame
74+
):
75+
raise Exception(
76+
"ERROR: metadata_table must be a known string or a "
77+
"pandas DataFrame}"
78+
)
79+
if metadata_table != "mlf_mrf":
80+
raise NotImplementedError(
81+
"We currently only support "
82+
'metadata_table="mlf_mrf", '
83+
f"and not {metadata_table}"
84+
)
85+
6786
# Identify all plates and all channels, across all input folders
6887
plates = []
6988
channels = None
@@ -116,6 +135,9 @@ def create_zarr_structure(
116135
else:
117136
plates.append(plate)
118137

138+
# Update dict_plate_paths
139+
dict_plate_paths[plate] = in_path
140+
119141
# Check that channels are the same as in previous plates
120142
if channels is None:
121143
channels = tmp_channels[:]
@@ -125,9 +147,6 @@ def create_zarr_structure(
125147
f"ERROR\n{info}\nERROR: expected channels " "{channels}"
126148
)
127149

128-
# Update dict_plate_paths
129-
dict_plate_paths[plate] = in_path
130-
131150
# Check that all channels are in the allowed_channels
132151
if not set(channels).issubset(set(dict_channels.keys())):
133152
msg = "ERROR in create_zarr_structure\n"
@@ -143,36 +162,42 @@ def create_zarr_structure(
143162
print(f"actual_channels: {actual_channels}")
144163

145164
zarrurls = {"plate": [], "well": []}
146-
# zarrurls_in_paths = {}
147-
148-
# PARSE METADATA
149-
# FIXME: hard-coded paths
150-
root = (
151-
"/data/active/fractal/3D/PelkmansLab/"
152-
"CardiacMultiplexing/Cycle1_testSubset/"
153-
)
154-
mrf_path = root + "MeasurementDetail.mrf"
155-
mlf_path = root + "MeasurementData.mlf"
156-
157-
site_metadata, total_files = parse_yokogawa_metadata(
158-
mrf_path=mrf_path, mlf_path=mlf_path
159-
)
160-
161-
# PIXEL SIZES
162-
pixel_size_z = site_metadata["pixel_size_z"][0]
163-
pixel_size_y = site_metadata["pixel_size_y"][0]
164-
pixel_size_x = site_metadata["pixel_size_x"][0]
165165

166+
# Sanitize out_path
166167
if not out_path.endswith("/"):
167168
out_path += "/"
169+
170+
# Loop over plates
168171
for plate in plates:
169172

173+
# Retrieve path corresponding to this plate
174+
in_path = dict_plate_paths[plate]
175+
170176
# Define plate zarr
171177
zarrurl = f"{out_path}{plate}.zarr"
172178
print(f"Creating {zarrurl}")
173179
group_plate = zarr.group(zarrurl)
174180
zarrurls["plate"].append(zarrurl)
175-
# zarrurls_in_paths[zarrurl] = dict_plate_paths[plate]
181+
182+
# Obtain FOV-metadata dataframe
183+
if metadata_table == "mlf_mrf":
184+
mrf_path = f"{in_path}MeasurementDetail.mrf"
185+
mlf_path = f"{in_path}MeasurementData.mlf"
186+
site_metadata, total_files = parse_yokogawa_metadata(
187+
mrf_path=mrf_path, mlf_path=mlf_path
188+
)
189+
# FIXME: hardcoded
190+
image_size = {"x": 2560, "y": 2160}
191+
192+
# Extract pixel sizes
193+
pixel_size_z = site_metadata["pixel_size_z"][0]
194+
pixel_size_y = site_metadata["pixel_size_y"][0]
195+
pixel_size_x = site_metadata["pixel_size_x"][0]
196+
197+
# Extract bit_depth #FIXME
198+
# bit_depth = site_metadata["bit_depth"][0]
199+
# if bit_depth == 8:
200+
# dtype
176201

177202
# Identify all wells
178203
plate_prefix = dict_plate_prefixes[plate]
@@ -291,7 +316,7 @@ def create_zarr_structure(
291316
}
292317
for ind_level in range(num_levels)
293318
],
294-
# Global rescaling to physiacl units
319+
# Global rescaling to physical units
295320
"coordinateTransformations": [
296321
{
297322
"type": "scale",
@@ -328,6 +353,13 @@ def create_zarr_structure(
328353
],
329354
}
330355

356+
# Prepare and write anndata table of FOV ROIs
357+
FOV_ROIs_table = prepare_ROIs_table(
358+
site_metadata.loc[f"{row+column}"], image_size=image_size
359+
)
360+
group_tables = group_field.create_group("tables/") # noqa: F841
361+
write_elem(group_tables, "FOV_ROI_table", FOV_ROIs_table)
362+
331363
return zarrurls, actual_channels
332364

333365

@@ -375,4 +407,5 @@ def create_zarr_structure(
375407
num_levels=args.num_levels,
376408
coarsening_xy=args.coarsening_xy,
377409
path_dict_channels=args.path_dict_channels,
410+
# metadata_table=args.metadata_table, #FIXME
378411
)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import anndata as ad
2+
import numpy as np
3+
4+
5+
def prepare_ROIs_table(df, image_size=None):
6+
if image_size is None:
7+
raise Exception("Missing image_size arg in prepare_ROIs_table")
8+
9+
df["x_micrometer"] -= df["x_micrometer"].min()
10+
df["y_micrometer"] -= df["y_micrometer"].min()
11+
df["z_micrometer"] -= df["z_micrometer"].min()
12+
13+
df["len_x_micrometer"] = image_size["x"] * df["pixel_size_x"]
14+
df["len_y_micrometer"] = image_size["y"] * df["pixel_size_y"]
15+
df["len_z_micrometer"] = df["pixel_size_z"]
16+
17+
df.drop("bit_depth", inplace=True, axis=1)
18+
19+
df = df.astype(np.float32)
20+
21+
adata = ad.AnnData(X=df, dtype=np.float32)
22+
adata.obs_names = [f"FOV_{i+1:d}" for i in range(len(df.index))]
23+
adata.var_names = df.columns
24+
25+
return adata

0 commit comments

Comments
 (0)