10
10
Institute for Biomedical Research and Pelkmans Lab from the University of
11
11
Zurich.
12
12
"""
13
-
14
- from xml .etree import ElementTree
15
- import pandas as pd
16
13
import warnings
14
+
17
15
import numpy as np
16
+ import pandas as pd
17
+ from defusedxml import ElementTree
18
+
18
19
19
20
def parse_yokogawa_metadata (mrf_path , mlf_path ):
20
21
"""
@@ -28,27 +29,41 @@ def parse_yokogawa_metadata(mrf_path, mlf_path):
28
29
mrf_frame , mlf_frame , error_count = read_metadata_files (mrf_path , mlf_path )
29
30
30
31
per_site_parameters = [
31
- 'x_micrometer' ,
32
- 'y_micrometer' ,
33
- 'pixel_size_x' ,
34
- 'pixel_size_y' ,
35
- 'bit_depth'
36
- ]
37
- grouping_params = ['well_id' , 'field_id' ]
38
- grouped_sites = mlf_frame .loc [:, grouping_params + per_site_parameters
39
- ].groupby (by = grouping_params )
32
+ "x_micrometer" ,
33
+ "y_micrometer" ,
34
+ "pixel_size_x" ,
35
+ "pixel_size_y" ,
36
+ "x_pixel" ,
37
+ "y_pixel" ,
38
+ "bit_depth" ,
39
+ ]
40
+ grouping_params = ["well_id" , "field_id" ]
41
+ grouped_sites = mlf_frame .loc [
42
+ :, grouping_params + per_site_parameters
43
+ ].groupby (by = grouping_params )
40
44
check_grouped_sites_consistency (grouped_sites , per_site_parameters )
41
45
site_metadata = grouped_sites .mean ()
42
46
47
+ # Cast image pixel sizes & bit depth to int
48
+ site_metadata = site_metadata .astype (
49
+ {
50
+ "x_pixel" : "int" ,
51
+ "y_pixel" : "int" ,
52
+ "bit_depth" : "int" ,
53
+ }
54
+ )
55
+
43
56
# Absolute Z positions are not saved by the Yokogawa,
44
57
# only relative positions to the autofocus
45
- site_metadata [' z_micrometer' ] = 0
58
+ site_metadata [" z_micrometer" ] = 0
46
59
47
60
site_metadata = pd .concat ([site_metadata , get_z_steps (mlf_frame )], axis = 1 )
48
61
49
62
if error_count > 0 :
50
- print (f"Succesfully parsed { len (site_metadata )} sites, could not " \
51
- f"parse { error_count } sites due to errors (see warnings)." )
63
+ print (
64
+ f"Succesfully parsed { len (site_metadata )} sites, could not "
65
+ f"parse { error_count } sites due to errors (see warnings)."
66
+ )
52
67
total_files = len (mlf_frame )
53
68
# TODO: Check whether the total_files correspond to the number of
54
69
# relevant input images in the input folder. Returning it for now
@@ -62,9 +77,10 @@ def parse_yokogawa_metadata(mrf_path, mlf_path):
62
77
63
78
64
79
def read_metadata_files (mrf_path , mlf_path ):
65
- # parsing of mrf & mlf files are based on the yokogawa_image_collection_task
66
- # v0.5 in drogon, written by Dario Vischi. Now modified for Fractal use
67
- # https://github.com/fmi-basel/job-system-workflows/blob/00bbf34448972d27f258a2c28245dd96180e8229/src/gliberal_workflows/tasks/yokogawa_image_collection_task/versions/version_0_5.py
80
+ # parsing of mrf & mlf files are based on the
81
+ # yokogawa_image_collection_task v0.5 in drogon, written by Dario Vischi.
82
+ # https://github.com/fmi-basel/job-system-workflows/blob/00bbf34448972d27f258a2c28245dd96180e8229/src/gliberal_workflows/tasks/yokogawa_image_collection_task/versions/version_0_5.py # noqa
83
+ # Now modified for Fractal use
68
84
69
85
mrf_frame = read_mrf_file (mrf_path )
70
86
# TODO: filter_position & filter_wheel_position are parsed, but not
@@ -78,25 +94,27 @@ def read_metadata_files(mrf_path, mlf_path):
78
94
return mrf_frame , mlf_frame , error_count
79
95
80
96
81
-
82
97
def read_mrf_file (mrf_path ):
83
98
84
99
# Prepare mrf dataframe
85
100
mrf_columns = [
86
- "channel_id" , "horiz_pixel_dim" , "vert_pixel_dim" ,
87
- "camera_no" , "bit_depth" , "horiz_pixels" , "vert_pixels" ,
88
- "filter_wheel_position" , "filter_position" , "shading_corr_src"
101
+ "channel_id" ,
102
+ "horiz_pixel_dim" ,
103
+ "vert_pixel_dim" ,
104
+ "camera_no" ,
105
+ "bit_depth" ,
106
+ "horiz_pixels" ,
107
+ "vert_pixels" ,
108
+ "filter_wheel_position" ,
109
+ "filter_position" ,
110
+ "shading_corr_src" ,
89
111
]
90
- mrf_frame = pd .DataFrame (
91
- columns = mrf_columns
92
- )
112
+ mrf_frame = pd .DataFrame (columns = mrf_columns )
93
113
94
114
mrf_xml = ElementTree .parse (mrf_path ).getroot ()
95
115
# Read mrf file
96
116
ns = {"bts" : "http://www.yokogawa.co.jp/BTS/BTSSchema/1.0" }
97
- for channel in mrf_xml .findall (
98
- "bts:MeasurementChannel" , namespaces = ns
99
- ):
117
+ for channel in mrf_xml .findall ("bts:MeasurementChannel" , namespaces = ns ):
100
118
mrf_frame .loc [channel .get ("{%s}Ch" % ns ["bts" ])] = [
101
119
channel .get ("{%s}Ch" % ns ["bts" ]),
102
120
float (channel .get ("{%s}HorizontalPixelDimension" % ns ["bts" ])),
@@ -107,7 +125,7 @@ def read_mrf_file(mrf_path):
107
125
int (channel .get ("{%s}VerticalPixels" % ns ["bts" ])),
108
126
int (channel .get ("{%s}FilterWheelPosition" % ns ["bts" ])),
109
127
int (channel .get ("{%s}FilterPosition" % ns ["bts" ])),
110
- channel .get ("{%s}ShadingCorrectionSource" % ns ["bts" ])
128
+ channel .get ("{%s}ShadingCorrectionSource" % ns ["bts" ]),
111
129
]
112
130
113
131
return mrf_frame
@@ -126,28 +144,40 @@ def blocks(fh, size=65536):
126
144
break
127
145
yield block
128
146
129
- with open (mlf_path , "r" , encoding = "utf-8" , errors = 'ignore' ) as fh :
130
- line_counter = sum (block .count ("\n " ) for block in blocks (fh ))
131
-
132
147
mlf_entries = mlf_xml .findall ("bts:MeasurementRecord" , namespaces = ns )
133
148
nb_lines = len (mlf_entries )
134
149
135
150
# Prepare mlf dataframe
136
151
mlf_columns = [
137
- "type" , "well_id" , "column" , "row" , "time_point" , "field_id" ,
138
- "z_index" , "timeline_id" , "action_id" , "action" ,
139
- "x_micrometer" , "y_micrometer" , "z_micrometer" ,
140
- "pixel_size_x" , "pixel_size_y" , "bit_depth" , "width" , "height" ,
141
- "channel_id" , 'camera_no' , "file_name"
152
+ "type" ,
153
+ "well_id" ,
154
+ "column" ,
155
+ "row" ,
156
+ "time_point" ,
157
+ "field_id" ,
158
+ "z_index" ,
159
+ "timeline_id" ,
160
+ "action_id" ,
161
+ "action" ,
162
+ "x_micrometer" ,
163
+ "y_micrometer" ,
164
+ "z_micrometer" ,
165
+ "x_pixel" ,
166
+ "y_pixel" ,
167
+ "pixel_size_x" ,
168
+ "pixel_size_y" ,
169
+ "bit_depth" ,
170
+ "width" ,
171
+ "height" ,
172
+ "channel_id" ,
173
+ "camera_no" ,
174
+ "file_name" ,
142
175
]
143
- mlf_frame = pd .DataFrame (
144
- columns = mlf_columns , index = range (0 , nb_lines )
145
- )
176
+ mlf_frame = pd .DataFrame (columns = mlf_columns , index = range (0 , nb_lines ))
146
177
147
178
mrf_channel_indices = {
148
- row .channel_id : idx for idx , (_ , row ) in enumerate (
149
- mrf_frame .iterrows ()
150
- )
179
+ row .channel_id : idx
180
+ for idx , (_ , row ) in enumerate (mrf_frame .iterrows ())
151
181
}
152
182
153
183
error_count = 0
@@ -180,17 +210,15 @@ def blocks(fh, size=65536):
180
210
181
211
well_row_id = record .get ("{%s}Row" % ns ["bts" ])
182
212
well_col_id = record .get ("{%s}Column" % ns ["bts" ])
183
- well_id = chr (64 + int (well_row_id )) + str (well_col_id ).zfill (2 )
213
+ well_id = chr (64 + int (well_row_id )) + str (well_col_id ).zfill (2 )
184
214
185
- x_pixel = np .nan
186
- y_pixel = np .nan
187
215
bit_depth = np .nan
188
216
width = np .nan
189
217
height = np .nan
190
218
camera_no = np .nan
191
219
pixel_size_x = np .nan
192
220
pixel_size_y = np .nan
193
- if rec_type == ' IMG' :
221
+ if rec_type == " IMG" :
194
222
mrf_idx = mrf_channel_indices [channel_id ]
195
223
pixel_size_x = mrf_frame .iat [mrf_idx , 1 ]
196
224
pixel_size_y = mrf_frame .iat [mrf_idx , 2 ]
@@ -204,69 +232,93 @@ def blocks(fh, size=65536):
204
232
mlf_frame .iat [idx , 1 ] = well_id
205
233
mlf_frame .iat [idx , 2 ] = int (well_col_id )
206
234
mlf_frame .iat [idx , 3 ] = int (well_row_id )
207
- mlf_frame .iat [idx , 4 ] = int (
208
- record .get ("{%s}TimePoint" % ns ["bts" ])
209
- )
210
- mlf_frame .iat [idx , 5 ] = \
211
- int (record .get ("{%s}FieldIndex" % ns ["bts" ]))
235
+ mlf_frame .iat [idx , 4 ] = int (record .get ("{%s}TimePoint" % ns ["bts" ]))
236
+ mlf_frame .iat [idx , 5 ] = int (record .get ("{%s}FieldIndex" % ns ["bts" ]))
212
237
mlf_frame .iat [idx , 6 ] = int (record .get ("{%s}ZIndex" % ns ["bts" ]))
213
- mlf_frame .iat [idx , 7 ] = \
214
- int ( record .get ("{%s}TimelineIndex" % ns ["bts" ]) )
215
- mlf_frame . iat [ idx , 8 ] = \
216
- int (record .get ("{%s}ActionIndex" % ns ["bts" ]))
238
+ mlf_frame .iat [idx , 7 ] = int (
239
+ record .get ("{%s}TimelineIndex" % ns ["bts" ])
240
+ )
241
+ mlf_frame . iat [ idx , 8 ] = int (record .get ("{%s}ActionIndex" % ns ["bts" ]))
217
242
mlf_frame .iat [idx , 9 ] = record .get ("{%s}Action" % ns ["bts" ])
218
243
mlf_frame .iat [idx , 10 ] = x_micrometer
219
244
mlf_frame .iat [idx , 11 ] = y_micrometer
220
245
mlf_frame .iat [idx , 12 ] = z_micrometer
221
-
222
- mlf_frame .iat [idx , 13 ] = pixel_size_x
223
- mlf_frame .iat [idx , 14 ] = pixel_size_y
224
-
225
- mlf_frame .iat [idx , 15 ] = bit_depth
226
- mlf_frame .iat [idx , 16 ] = width
227
- mlf_frame .iat [idx , 17 ] = height
228
- mlf_frame .iat [idx , 18 ] = channel_id
229
- mlf_frame .iat [idx , 19 ] = camera_no
230
- mlf_frame .iat [idx , 20 ] = record .text # file_name
246
+ mlf_frame .iat [idx , 13 ] = width
247
+ mlf_frame .iat [idx , 14 ] = height
248
+ mlf_frame .iat [idx , 15 ] = pixel_size_x
249
+ mlf_frame .iat [idx , 16 ] = pixel_size_y
250
+
251
+ mlf_frame .iat [idx , 17 ] = bit_depth
252
+ mlf_frame .iat [idx , 18 ] = width
253
+ mlf_frame .iat [idx , 19 ] = height
254
+ mlf_frame .iat [idx , 20 ] = channel_id
255
+ mlf_frame .iat [idx , 21 ] = camera_no
256
+ mlf_frame .iat [idx , 22 ] = record .text # file_name
231
257
232
258
mlf_frame = mlf_frame .dropna (thresh = (len (mlf_frame .columns )))
233
259
return mlf_frame , error_count
234
260
235
261
236
262
def calculate_steps (site_series : pd .Series ):
237
- # site_series is the z_micrometer series for a given site of a given channel
238
- # This function calculates the step size in Z
263
+ # site_series is the z_micrometer series for a given site of a given
264
+ # channel. This function calculates the step size in Z
239
265
steps = site_series .diff ()[1 :]
240
- assert steps .std ().sum () == 0.0 , "" \
241
- "When parsing the Yokogawa mlf file, some sites " \
242
- "had varying step size in Z. " \
266
+ if not steps .std ().sum () == 0.0 :
267
+ raise Exception (
268
+ "When parsing the Yokogawa mlf file, some sites "
269
+ "had varying step size in Z. "
243
270
"That is not supported for the OME-Zarr parsing"
271
+ )
244
272
return steps .mean ()
245
273
246
274
247
275
def get_z_steps (mlf_frame ):
248
- # Process mlf_frame to extract Z information
249
- # run checks on consistencies & return site-based z step dataframe
276
+ # Process mlf_frame to extract Z information (pixel size & steps).
277
+ # Run checks on consistencies & return site-based z step dataframe
250
278
# Group by well, field & channel
251
- grouped_sites_z = mlf_frame .loc [:, [
252
- 'well_id' ,
253
- 'field_id' ,
254
- 'action_id' ,
255
- 'channel_id' ,
256
- 'z_micrometer' ]].groupby (by = [
257
- 'well_id' ,
258
- 'field_id' ,
259
- 'action_id' ,
260
- 'channel_id' ])
279
+ grouped_sites_z = mlf_frame .loc [
280
+ :, ["well_id" , "field_id" , "action_id" , "channel_id" , "z_micrometer" ]
281
+ ].groupby (by = ["well_id" , "field_id" , "action_id" , "channel_id" ])
261
282
# Group the whole site (combine channels), because Z steps need to be
262
283
# consistent between channels for OME-Zarr.
263
- z_data = grouped_sites_z .apply (calculate_steps ).groupby (['well_id' , 'field_id' ])
264
- assert z_data .std ().sum ().sum () == 0.0 , "" \
265
- "When parsing the Yokogawa mlf file, channels had " \
266
- "varying step size in Z. " \
284
+ z_data = grouped_sites_z .apply (calculate_steps ).groupby (
285
+ ["well_id" , "field_id" ]
286
+ )
287
+ if not z_data .std ().sum ().sum () == 0.0 :
288
+ raise Exception (
289
+ "When parsing the Yokogawa mlf file, channels had "
290
+ "varying step size in Z. "
267
291
"That is not supported for the OME-Zarr parsing"
268
- z_frame = z_data .mean ()
269
- z_frame .columns = ['pixel_size_z' ]
292
+ )
293
+
294
+ # Ensure that channels have the same number of z planes and
295
+ # reduce it to one value.
296
+ # Only check if there is more than one channel available
297
+ if any (
298
+ grouped_sites_z .count ().groupby (["well_id" , "field_id" ]).count () > 1
299
+ ):
300
+ if any (
301
+ grouped_sites_z .count ()
302
+ .groupby (["well_id" , "field_id" ])
303
+ .std ()
304
+ .sum ()
305
+ != 0
306
+ ):
307
+ raise Exception (
308
+ "When parsing the Yokogawa mlf file, channels had "
309
+ "varying number of z planes."
310
+ "That is not supported for the OME-Zarr parsing"
311
+ )
312
+ z_steps = (
313
+ grouped_sites_z .count ()
314
+ .groupby (["well_id" , "field_id" ])
315
+ .mean ()
316
+ .astype (int )
317
+ )
318
+
319
+ # Combine the two dataframes
320
+ z_frame = pd .concat ([z_data .mean (), z_steps ], axis = 1 )
321
+ z_frame .columns = ["pixel_size_z" , "z_pixel" ]
270
322
return z_frame
271
323
272
324
@@ -275,7 +327,10 @@ def check_grouped_sites_consistency(grouped_sites, per_site_parameters):
275
327
# Same for pixel sizes
276
328
# Only relevant when a site has multiple entries
277
329
if grouped_sites .count ().min ().min () > 1 :
278
- assert grouped_sites .std ().sum ().sum () == 0.0 , "" \
279
- "When parsing the Yokogawa MeasurementData.mlf file, " \
280
- f"some of the parameters { per_site_parameters } varied within " \
330
+ if not grouped_sites .std ().sum ().sum () == 0.0 :
331
+ raise Exception (
332
+ ""
333
+ "When parsing the Yokogawa MeasurementData.mlf file, "
334
+ f"some of the parameters { per_site_parameters } varied within "
281
335
"the site. That is not supported for the OME-Zarr parsing"
336
+ )
0 commit comments