1
+ import json
1
2
import os
2
- import re
3
3
from glob import glob
4
4
5
5
import zarr
6
6
7
- # from devtools import debug
8
-
9
-
10
- def metadata (filename ):
11
- """
12
- Extract metadata by parsing image filename, return a parameter dictionary.
13
- Three kinds of filenames are supported:
14
- 1) Filenames from UZH:
15
- 20200812-Cardio[...]Cycle1_B03_T0001F036L01A01Z18C01.png
16
- with plate name 20200812-Cardio[...]Cycle1
17
- 2) Filenames from FMI, with successful barcode reading:
18
- 210305NAR005AAN_210416_164828_B11_T0001F006L01A04Z14C01.tif
19
- with plate name 210305NAR005AAN
20
- 3) Filenames from FMI, with failed barcode reading:
21
- yymmdd_hhmmss_210416_164828_B11_T0001F006L01A04Z14C01.tif
22
- with plate name RS{yymmddhhmmss}
23
-
24
- :param filename: name of the image
25
- :type filename: str
26
- """
27
- f = filename .rsplit ("." , 1 )[0 ]
28
-
29
- well = re .findall (r"_(.*)_T" , f )[0 ].split ("_" )[- 1 ]
30
- tmp_plate = f .split (f"_{ well } _" )[0 ]
31
-
32
- fields = tmp_plate .split ("_" )
33
-
34
- if (
35
- len (fields ) == 4
36
- and len (fields [0 ]) == 6
37
- and len (fields [1 ]) == 6
38
- and len (fields [2 ]) == 6
39
- ):
40
- # FMI (failed barcode reading)
41
- scan_date , scan_time , img_date , img_time = fields [:]
42
- plate = f"RS{ scan_date + scan_time } "
43
- elif len (fields ) == 3 :
44
- # FMI (correct barcode reading)
45
- barcode , img_date , img_time = fields [:]
46
- if len (img_date ) != 6 or len (img_time ) != 6 :
47
- raise Exception (
48
- f"Failure in metadata parsing of { tmp_plate } , with"
49
- " img_date={img_date} and img_time={img_time}"
50
- )
51
- plate = barcode
52
- elif len (fields ) == 1 :
53
- # UZH
54
- plate = fields [0 ]
55
-
56
- site = re .findall (r"F(.*)L" , f )[0 ]
57
- chl = re .findall (r"[0-9]C(.*)" , f )[0 ].split ("." )[0 ].split ("_" )[0 ]
58
- t_ind = re .findall (r"T(.*)F" , f )[0 ]
59
- z_ind = re .findall (r"Z(.*)C" , f )[0 ]
60
-
61
- result = dict (
62
- plate = plate , well = well , t_ind = t_ind , z_ind = z_ind , chl = chl , site = site
63
- )
64
- return result
7
+ from fractal .tasks .lib_parse_filename_metadata import parse_metadata
65
8
66
9
67
10
def create_zarr_structure_multifov (
68
- in_path = None ,
11
+ in_paths = [] ,
69
12
out_path = None ,
70
13
ext = None ,
14
+ path_dict_channels = None ,
71
15
num_levels = None ,
72
16
):
73
17
74
18
"""
75
19
Create (and store) the zarr folder, without reading or writing data.
76
20
77
21
78
- :param in_path: path of images
79
- :type in_path: str
22
+ :param in_paths: list of image directories
23
+ :type in_path: list
80
24
:param out_path: path for output zarr files
81
25
:type out_path: str
82
26
:param ext: extension of images (e.g. tiff, png, ..)
83
- :type ext: str
27
+ :param path_dict_channels: FIXME
28
+ :type path_dict_channels: str
84
29
:param num_levels: number of coarsening levels in the pyramid
85
30
:type num_levels: int
86
31
"""
87
32
88
- raise NotImplementedError (
89
- "create_zarr_structure_multifov not implemented "
90
- "with new channel scheme"
91
- )
33
+ try :
34
+ with open (path_dict_channels , "r" ) as json_file :
35
+ dict_channels = json .load (json_file )
36
+ except FileNotFoundError :
37
+ raise Exception (
38
+ "ERROR in create_zarr_structure: " f"{ path_dict_channels } missing."
39
+ )
40
+ except TypeError :
41
+ raise Exception (
42
+ "ERROR in create_zarr_structure: "
43
+ f"{ path_dict_channels } has wrong type "
44
+ "(probably a None instead of a string)."
45
+ )
46
+
47
+ # Identify all plates and all channels, across all input folders
48
+ plates = []
49
+ channels = None
50
+ dict_plate_paths = {}
51
+ dict_plate_prefixes = {}
52
+ for in_path in in_paths :
53
+ tmp_channels = []
54
+ tmp_plates = []
55
+ if not in_path .endswith ("/" ):
56
+ in_path += "/"
57
+ for fn in glob (in_path + "*." + ext ):
58
+ try :
59
+ metadata = parse_metadata (os .path .basename (fn ))
60
+ plate_prefix = metadata ["plate_prefix" ]
61
+ plate = metadata ["plate" ]
62
+ if plate not in dict_plate_prefixes .keys ():
63
+ dict_plate_prefixes [plate ] = plate_prefix
64
+ tmp_plates .append (plate )
65
+ tmp_channels .append (f"A{ metadata ['A' ]} _C{ metadata ['C' ]} " )
66
+ except IndexError :
67
+ print ("IndexError for " , fn )
68
+ pass
69
+ tmp_plates = sorted (list (set (tmp_plates )))
70
+ tmp_channels = sorted (list (set (tmp_channels )))
71
+
72
+ info = (
73
+ f"Listing all plates/channels from { in_path } *.{ ext } \n "
74
+ f"Plates: { tmp_plates } \n "
75
+ f"Channels: { tmp_channels } \n "
76
+ )
77
+
78
+ # Check that only one plate is found
79
+ if len (tmp_plates ) > 1 :
80
+ raise Exception (f"{ info } ERROR: { len (tmp_plates )} plates detected" )
81
+ plate = tmp_plates [0 ]
82
+
83
+ # If plate already exists in other folder, add suffix
84
+ if plate in plates :
85
+ ind = 1
86
+ new_plate = f"{ plate } _{ ind } "
87
+ while new_plate in plates :
88
+ new_plate = f"{ plate } _{ ind } "
89
+ ind += 1
90
+ print (
91
+ f"WARNING: { plate } already exists, renaming it as { new_plate } "
92
+ )
93
+ plates .append (new_plate )
94
+ dict_plate_prefixes [new_plate ] = dict_plate_prefixes [plate ]
95
+ plate = new_plate
96
+ else :
97
+ plates .append (plate )
98
+
99
+ # Check that channels are the same as in previous plates
100
+ if channels is None :
101
+ channels = tmp_channels [:]
102
+ else :
103
+ if channels != tmp_channels :
104
+ raise Exception (
105
+ f"ERROR\n { info } \n ERROR: expected channels " "{channels}"
106
+ )
92
107
93
- # Find all plates
94
- plate = []
95
- if not in_path .endswith ("/" ):
96
- in_path += "/"
97
- for i in glob (in_path + "*." + ext ):
98
- try :
99
- plate .append (metadata (os .path .basename (i ))["plate" ])
100
- except IndexError :
101
- print ("IndexError for " , i )
102
- pass
103
- plate_unique = set (plate )
104
- print ("Find all plates in" , in_path + "*." + ext )
105
- print (f"Plates: { plate_unique } " )
106
-
107
- well = []
108
-
109
- zarrurls = {"plate" : [], "well" : [], "site" : []}
110
- # FIXME: plate_attributes is just a placeholder, at the moment
111
- plate_attributes = {}
112
-
113
- # Loop over plates
114
- for plate in plate_unique :
115
- group_plate = zarr .group (out_path + f"{ plate } .zarr" )
116
- zarrurls ["plate" ].append (out_path + f"{ plate } .zarr" )
117
- plate_attributes [plate ] = dict (chl_list = [], sites_list = [])
118
- well = [
119
- metadata (os .path .basename (fn ))["well" ]
120
- for fn in glob (in_path + f"{ plate } _*." + ext )
108
+ # Update dict_plate_paths
109
+ dict_plate_paths [plate ] = in_path
110
+
111
+ # Check that all channels are in the allowed_channels
112
+ if not set (channels ).issubset (set (dict_channels .keys ())):
113
+ msg = "ERROR in create_zarr_structure\n "
114
+ msg += f"channels: { channels } \n "
115
+ msg += f"allowed_channels: { dict_channels .keys ()} \n "
116
+ raise Exception (msg )
117
+
118
+ # Sort channels according to allowed_channels, and assign increasing index
119
+ # actual_channels is a list of entries like A01_C01"
120
+ actual_channels = []
121
+ for ind_ch , ch in enumerate (channels ):
122
+ actual_channels .append (ch )
123
+ print (f"actual_channels: { actual_channels } " )
124
+
125
+ zarrurls = {"plate" : [], "well" : []}
126
+ well_to_sites = {}
127
+
128
+ if not out_path .endswith ("/" ):
129
+ out_path += "/"
130
+ for plate in plates :
131
+
132
+ # Define plate zarr
133
+ zarrurl = f"{ out_path } { plate } .zarr"
134
+ print (f"Creating { zarrurl } " )
135
+ group_plate = zarr .group (zarrurl )
136
+ zarrurls ["plate" ].append (zarrurl )
137
+ # zarrurls_in_paths[zarrurl] = dict_plate_paths[plate]
138
+
139
+ # Identify all wells
140
+ plate_prefix = dict_plate_prefixes [plate ]
141
+ wells = [
142
+ parse_metadata (os .path .basename (fn ))["well" ]
143
+ for fn in glob (f"{ in_path } { plate_prefix } _*.{ ext } " )
121
144
]
122
- well_unique = set (well )
145
+ wells = sorted (list (set (wells )))
146
+
147
+ # Verify that all wells have all channels
148
+ for well in wells :
149
+ well_channels = []
150
+ glob_string = f"{ in_path } { plate_prefix } _{ well } *.{ ext } "
151
+ for fn in glob (glob_string ):
152
+ try :
153
+ metadata = parse_metadata (os .path .basename (fn ))
154
+ well_channels .append (f"A{ metadata ['A' ]} _C{ metadata ['C' ]} " )
155
+ except IndexError :
156
+ print (f"Skipping { fn } " )
157
+ well_channels = sorted (list (set (well_channels )))
158
+ if well_channels != actual_channels :
159
+ raise Exception (
160
+ f"ERROR: well { well } in plate { plate } (prefix: "
161
+ f"{ plate_prefix } ) has missing channels.\n "
162
+ f"Expected: { actual_channels } \n "
163
+ f"Found: { well_channels } .\n "
164
+ f"[glob_string: { glob_string } ]"
165
+ )
123
166
124
167
well_rows_columns = [
125
- ind for ind in sorted ([(n [0 ], n [1 :]) for n in well_unique ])
168
+ ind for ind in sorted ([(n [0 ], n [1 :]) for n in wells ])
126
169
]
127
170
128
171
group_plate .attrs ["plate" ] = {
129
- "acquisitions" : [
130
- {"id" : id_ , "name" : name }
131
- for id_ , name in enumerate (plate_unique )
132
- ],
133
- # takes unique cols from (row,col) tuples
172
+ "acquisitions" : [{"id" : 1 , "name" : plate }],
134
173
"columns" : sorted (
135
174
[
136
175
{"name" : u_col }
@@ -143,7 +182,6 @@ def create_zarr_structure_multifov(
143
182
],
144
183
key = lambda key : key ["name" ],
145
184
),
146
- # takes unique rows from (row,col) tuples
147
185
"rows" : sorted (
148
186
[
149
187
{"name" : u_row }
@@ -174,15 +212,6 @@ def create_zarr_structure_multifov(
174
212
for fn in glob (in_path + f"{ plate } *_{ row + column } *." + ext )
175
213
]
176
214
sites_unique = sorted (list (set (sites )))
177
- plate_attributes [plate ]["sites_list" ] = sites_unique [:]
178
-
179
- # Identify channels
180
- chl = [
181
- metadata (os .path .basename (fn ))["chl" ]
182
- for fn in glob (in_path + f"{ plate } *_{ row + column } *." + ext )
183
- ]
184
- chl_unique = sorted (list (set (chl )))
185
- plate_attributes [plate ]["chl_list" ] = chl_unique [:]
186
215
187
216
# Write all sites in the attributes
188
217
group_well .attrs ["well" ] = {
@@ -193,6 +222,7 @@ def create_zarr_structure_multifov(
193
222
"version" : "0.3" ,
194
223
}
195
224
zarrurls ["well" ].append (out_path + f"{ plate } .zarr/{ row } /{ column } /" )
225
+ well_to_sites [zarrurls ["well" ][- 1 ]] = sites_unique
196
226
197
227
# Create groups and paths for all sites
198
228
for index_site , site in enumerate (sites_unique ):
@@ -220,8 +250,32 @@ def create_zarr_structure_multifov(
220
250
],
221
251
}
222
252
]
253
+ group_field .attrs ["omero" ] = {
254
+ "id" : 1 , # FIXME does this depend on the plate number?
255
+ "name" : "TBD" ,
256
+ "version" : "0.4" ,
257
+ "channels" : [
258
+ {
259
+ # FIXME
260
+ # How to write true/false (lowercase) via python?
261
+ # "active": true,
262
+ "coefficient" : 1 ,
263
+ "color" : dict_channels [channel ]["colormap" ],
264
+ "family" : "linear" ,
265
+ # "inverted": false,
266
+ "label" : dict_channels [channel ]["label" ],
267
+ "window" : {
268
+ "min" : 0 ,
269
+ "max" : 65535 ,
270
+ "start" : dict_channels [channel ]["start" ],
271
+ "end" : dict_channels [channel ]["end" ],
272
+ },
273
+ }
274
+ for channel in actual_channels
275
+ ],
276
+ }
223
277
224
- return zarrurls , chl_unique , sites_unique # , plate_attributes
278
+ return zarrurls , actual_channels , well_to_sites
225
279
226
280
227
281
if __name__ == "__main__" :
0 commit comments