Skip to content

Commit a0cce76

Browse files
committed
(probably BROKEN) First attempt for multifov parsing within new channel scheme (ref #61 #74)
1 parent f626c44 commit a0cce76

File tree

4 files changed

+173
-123
lines changed

4 files changed

+173
-123
lines changed

fractal/fractal_cmd.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -507,10 +507,10 @@ def app_create_zarr_structure(**kwargs_):
507507
debug(zarrurls)
508508
debug(chl_list)
509509
elif task_names[0] == "create_zarr_structure_multifov":
510-
zarrurls, chl_list, sites_list = future.result()
510+
zarrurls, chl_list, well_to_sites = future.result()
511511
debug(zarrurls)
512512
debug(chl_list)
513-
debug(sites_list)
513+
debug(well_to_sites)
514514
task_names = task_names[1:] # FIXME
515515
else:
516516
print(
@@ -546,7 +546,7 @@ def app_create_zarr_structure(**kwargs_):
546546
ext=ext,
547547
delete_input=delete_input,
548548
chl_list=chl_list,
549-
sites_list=sites_list,
549+
sites_dict=well_to_sites,
550550
num_levels=num_levels,
551551
coarsening_xy=coarsening_xy,
552552
coarsening_z=coarsening_z,

fractal/tasks/create_zarr_structure.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,7 @@ def create_zarr_structure(
207207
group_well = group_plate.create_group(f"{row}/{column}/")
208208

209209
group_well.attrs["well"] = {
210-
"images": [
211-
{"path": "0"} # multiscale level, until pyramids just 0
212-
],
210+
"images": [{"path": "0"}],
213211
"version": "0.3",
214212
}
215213

fractal/tasks/create_zarr_structure_multifov.py

Lines changed: 167 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1,136 +1,175 @@
1+
import json
12
import os
2-
import re
33
from glob import glob
44

55
import zarr
66

7-
# from devtools import debug
8-
9-
10-
def metadata(filename):
11-
"""
12-
Extract metadata by parsing image filename, return a parameter dictionary.
13-
Three kinds of filenames are supported:
14-
1) Filenames from UZH:
15-
20200812-Cardio[...]Cycle1_B03_T0001F036L01A01Z18C01.png
16-
with plate name 20200812-Cardio[...]Cycle1
17-
2) Filenames from FMI, with successful barcode reading:
18-
210305NAR005AAN_210416_164828_B11_T0001F006L01A04Z14C01.tif
19-
with plate name 210305NAR005AAN
20-
3) Filenames from FMI, with failed barcode reading:
21-
yymmdd_hhmmss_210416_164828_B11_T0001F006L01A04Z14C01.tif
22-
with plate name RS{yymmddhhmmss}
23-
24-
:param filename: name of the image
25-
:type filename: str
26-
"""
27-
f = filename.rsplit(".", 1)[0]
28-
29-
well = re.findall(r"_(.*)_T", f)[0].split("_")[-1]
30-
tmp_plate = f.split(f"_{well}_")[0]
31-
32-
fields = tmp_plate.split("_")
33-
34-
if (
35-
len(fields) == 4
36-
and len(fields[0]) == 6
37-
and len(fields[1]) == 6
38-
and len(fields[2]) == 6
39-
):
40-
# FMI (failed barcode reading)
41-
scan_date, scan_time, img_date, img_time = fields[:]
42-
plate = f"RS{scan_date + scan_time}"
43-
elif len(fields) == 3:
44-
# FMI (correct barcode reading)
45-
barcode, img_date, img_time = fields[:]
46-
if len(img_date) != 6 or len(img_time) != 6:
47-
raise Exception(
48-
f"Failure in metadata parsing of {tmp_plate}, with"
49-
" img_date={img_date} and img_time={img_time}"
50-
)
51-
plate = barcode
52-
elif len(fields) == 1:
53-
# UZH
54-
plate = fields[0]
55-
56-
site = re.findall(r"F(.*)L", f)[0]
57-
chl = re.findall(r"[0-9]C(.*)", f)[0].split(".")[0].split("_")[0]
58-
t_ind = re.findall(r"T(.*)F", f)[0]
59-
z_ind = re.findall(r"Z(.*)C", f)[0]
60-
61-
result = dict(
62-
plate=plate, well=well, t_ind=t_ind, z_ind=z_ind, chl=chl, site=site
63-
)
64-
return result
7+
from fractal.tasks.lib_parse_filename_metadata import parse_metadata
658

669

6710
def create_zarr_structure_multifov(
68-
in_path=None,
11+
in_paths=[],
6912
out_path=None,
7013
ext=None,
14+
path_dict_channels=None,
7115
num_levels=None,
7216
):
7317

7418
"""
7519
Create (and store) the zarr folder, without reading or writing data.
7620
7721
78-
:param in_path: path of images
79-
:type in_path: str
22+
:param in_paths: list of image directories
23+
:type in_path: list
8024
:param out_path: path for output zarr files
8125
:type out_path: str
8226
:param ext: extension of images (e.g. tiff, png, ..)
83-
:type ext: str
27+
:param path_dict_channels: FIXME
28+
:type path_dict_channels: str
8429
:param num_levels: number of coarsening levels in the pyramid
8530
:type num_levels: int
8631
"""
8732

88-
raise NotImplementedError(
89-
"create_zarr_structure_multifov not implemented "
90-
"with new channel scheme"
91-
)
33+
try:
34+
with open(path_dict_channels, "r") as json_file:
35+
dict_channels = json.load(json_file)
36+
except FileNotFoundError:
37+
raise Exception(
38+
"ERROR in create_zarr_structure: " f"{path_dict_channels} missing."
39+
)
40+
except TypeError:
41+
raise Exception(
42+
"ERROR in create_zarr_structure: "
43+
f"{path_dict_channels} has wrong type "
44+
"(probably a None instead of a string)."
45+
)
46+
47+
# Identify all plates and all channels, across all input folders
48+
plates = []
49+
channels = None
50+
dict_plate_paths = {}
51+
dict_plate_prefixes = {}
52+
for in_path in in_paths:
53+
tmp_channels = []
54+
tmp_plates = []
55+
if not in_path.endswith("/"):
56+
in_path += "/"
57+
for fn in glob(in_path + "*." + ext):
58+
try:
59+
metadata = parse_metadata(os.path.basename(fn))
60+
plate_prefix = metadata["plate_prefix"]
61+
plate = metadata["plate"]
62+
if plate not in dict_plate_prefixes.keys():
63+
dict_plate_prefixes[plate] = plate_prefix
64+
tmp_plates.append(plate)
65+
tmp_channels.append(f"A{metadata['A']}_C{metadata['C']}")
66+
except IndexError:
67+
print("IndexError for ", fn)
68+
pass
69+
tmp_plates = sorted(list(set(tmp_plates)))
70+
tmp_channels = sorted(list(set(tmp_channels)))
71+
72+
info = (
73+
f"Listing all plates/channels from {in_path}*.{ext}\n"
74+
f"Plates: {tmp_plates}\n"
75+
f"Channels: {tmp_channels}\n"
76+
)
77+
78+
# Check that only one plate is found
79+
if len(tmp_plates) > 1:
80+
raise Exception(f"{info}ERROR: {len(tmp_plates)} plates detected")
81+
plate = tmp_plates[0]
82+
83+
# If plate already exists in other folder, add suffix
84+
if plate in plates:
85+
ind = 1
86+
new_plate = f"{plate}_{ind}"
87+
while new_plate in plates:
88+
new_plate = f"{plate}_{ind}"
89+
ind += 1
90+
print(
91+
f"WARNING: {plate} already exists, renaming it as {new_plate}"
92+
)
93+
plates.append(new_plate)
94+
dict_plate_prefixes[new_plate] = dict_plate_prefixes[plate]
95+
plate = new_plate
96+
else:
97+
plates.append(plate)
98+
99+
# Check that channels are the same as in previous plates
100+
if channels is None:
101+
channels = tmp_channels[:]
102+
else:
103+
if channels != tmp_channels:
104+
raise Exception(
105+
f"ERROR\n{info}\nERROR: expected channels " "{channels}"
106+
)
92107

93-
# Find all plates
94-
plate = []
95-
if not in_path.endswith("/"):
96-
in_path += "/"
97-
for i in glob(in_path + "*." + ext):
98-
try:
99-
plate.append(metadata(os.path.basename(i))["plate"])
100-
except IndexError:
101-
print("IndexError for ", i)
102-
pass
103-
plate_unique = set(plate)
104-
print("Find all plates in", in_path + "*." + ext)
105-
print(f"Plates: {plate_unique}")
106-
107-
well = []
108-
109-
zarrurls = {"plate": [], "well": [], "site": []}
110-
# FIXME: plate_attributes is just a placeholder, at the moment
111-
plate_attributes = {}
112-
113-
# Loop over plates
114-
for plate in plate_unique:
115-
group_plate = zarr.group(out_path + f"{plate}.zarr")
116-
zarrurls["plate"].append(out_path + f"{plate}.zarr")
117-
plate_attributes[plate] = dict(chl_list=[], sites_list=[])
118-
well = [
119-
metadata(os.path.basename(fn))["well"]
120-
for fn in glob(in_path + f"{plate}_*." + ext)
108+
# Update dict_plate_paths
109+
dict_plate_paths[plate] = in_path
110+
111+
# Check that all channels are in the allowed_channels
112+
if not set(channels).issubset(set(dict_channels.keys())):
113+
msg = "ERROR in create_zarr_structure\n"
114+
msg += f"channels: {channels}\n"
115+
msg += f"allowed_channels: {dict_channels.keys()}\n"
116+
raise Exception(msg)
117+
118+
# Sort channels according to allowed_channels, and assign increasing index
119+
# actual_channels is a list of entries like A01_C01"
120+
actual_channels = []
121+
for ind_ch, ch in enumerate(channels):
122+
actual_channels.append(ch)
123+
print(f"actual_channels: {actual_channels}")
124+
125+
zarrurls = {"plate": [], "well": []}
126+
well_to_sites = {}
127+
128+
if not out_path.endswith("/"):
129+
out_path += "/"
130+
for plate in plates:
131+
132+
# Define plate zarr
133+
zarrurl = f"{out_path}{plate}.zarr"
134+
print(f"Creating {zarrurl}")
135+
group_plate = zarr.group(zarrurl)
136+
zarrurls["plate"].append(zarrurl)
137+
# zarrurls_in_paths[zarrurl] = dict_plate_paths[plate]
138+
139+
# Identify all wells
140+
plate_prefix = dict_plate_prefixes[plate]
141+
wells = [
142+
parse_metadata(os.path.basename(fn))["well"]
143+
for fn in glob(f"{in_path}{plate_prefix}_*.{ext}")
121144
]
122-
well_unique = set(well)
145+
wells = sorted(list(set(wells)))
146+
147+
# Verify that all wells have all channels
148+
for well in wells:
149+
well_channels = []
150+
glob_string = f"{in_path}{plate_prefix}_{well}*.{ext}"
151+
for fn in glob(glob_string):
152+
try:
153+
metadata = parse_metadata(os.path.basename(fn))
154+
well_channels.append(f"A{metadata['A']}_C{metadata['C']}")
155+
except IndexError:
156+
print(f"Skipping {fn}")
157+
well_channels = sorted(list(set(well_channels)))
158+
if well_channels != actual_channels:
159+
raise Exception(
160+
f"ERROR: well {well} in plate {plate} (prefix: "
161+
f"{plate_prefix}) has missing channels.\n"
162+
f"Expected: {actual_channels}\n"
163+
f"Found: {well_channels}.\n"
164+
f"[glob_string: {glob_string}]"
165+
)
123166

124167
well_rows_columns = [
125-
ind for ind in sorted([(n[0], n[1:]) for n in well_unique])
168+
ind for ind in sorted([(n[0], n[1:]) for n in wells])
126169
]
127170

128171
group_plate.attrs["plate"] = {
129-
"acquisitions": [
130-
{"id": id_, "name": name}
131-
for id_, name in enumerate(plate_unique)
132-
],
133-
# takes unique cols from (row,col) tuples
172+
"acquisitions": [{"id": 1, "name": plate}],
134173
"columns": sorted(
135174
[
136175
{"name": u_col}
@@ -143,7 +182,6 @@ def create_zarr_structure_multifov(
143182
],
144183
key=lambda key: key["name"],
145184
),
146-
# takes unique rows from (row,col) tuples
147185
"rows": sorted(
148186
[
149187
{"name": u_row}
@@ -174,15 +212,6 @@ def create_zarr_structure_multifov(
174212
for fn in glob(in_path + f"{plate}*_{row+column}*." + ext)
175213
]
176214
sites_unique = sorted(list(set(sites)))
177-
plate_attributes[plate]["sites_list"] = sites_unique[:]
178-
179-
# Identify channels
180-
chl = [
181-
metadata(os.path.basename(fn))["chl"]
182-
for fn in glob(in_path + f"{plate}*_{row+column}*." + ext)
183-
]
184-
chl_unique = sorted(list(set(chl)))
185-
plate_attributes[plate]["chl_list"] = chl_unique[:]
186215

187216
# Write all sites in the attributes
188217
group_well.attrs["well"] = {
@@ -193,6 +222,7 @@ def create_zarr_structure_multifov(
193222
"version": "0.3",
194223
}
195224
zarrurls["well"].append(out_path + f"{plate}.zarr/{row}/{column}/")
225+
well_to_sites[zarrurls["well"][-1]] = sites_unique
196226

197227
# Create groups and paths for all sites
198228
for index_site, site in enumerate(sites_unique):
@@ -220,8 +250,32 @@ def create_zarr_structure_multifov(
220250
],
221251
}
222252
]
253+
group_field.attrs["omero"] = {
254+
"id": 1, # FIXME does this depend on the plate number?
255+
"name": "TBD",
256+
"version": "0.4",
257+
"channels": [
258+
{
259+
# FIXME
260+
# How to write true/false (lowercase) via python?
261+
# "active": true,
262+
"coefficient": 1,
263+
"color": dict_channels[channel]["colormap"],
264+
"family": "linear",
265+
# "inverted": false,
266+
"label": dict_channels[channel]["label"],
267+
"window": {
268+
"min": 0,
269+
"max": 65535,
270+
"start": dict_channels[channel]["start"],
271+
"end": dict_channels[channel]["end"],
272+
},
273+
}
274+
for channel in actual_channels
275+
],
276+
}
223277

224-
return zarrurls, chl_unique, sites_unique # , plate_attributes
278+
return zarrurls, actual_channels, well_to_sites
225279

226280

227281
if __name__ == "__main__":

fractal/tasks/yokogawa_to_zarr_multifov.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def yokogawa_to_zarr_multifov(
2828
in_path=None,
2929
ext=None,
3030
chl_list=None,
31-
sites_list=None,
31+
sites_dict={},
3232
num_levels=5,
3333
coarsening_xy=2,
3434
coarsening_z=1,
@@ -41,9 +41,7 @@ def yokogawa_to_zarr_multifov(
4141
4242
"""
4343

44-
raise NotImplementedError(
45-
"yokogawa_to_zarr_multifov not implemented " "with new channel scheme"
46-
)
44+
sites_list = sites_dict[zarrurl]
4745

4846
if not in_path.endswith("/"):
4947
in_path += "/"

0 commit comments

Comments
 (0)