Skip to content

Commit 1df67ca

Browse files
authored
Merge pull request #579 from fractal-analytics-platform/578-should-import-ome-zarr-add-wavelength_id-andor-label-information-into-omero-metadata
Include Omero-channels-metadata update in import-ome-zarr task
2 parents 7e4e5d0 + 683bcf8 commit 1df67ca

File tree

7 files changed

+379
-12
lines changed

7 files changed

+379
-12
lines changed

CHANGELOG.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
**Note**: Numbers like (\#123) point to closed Pull Requests on the fractal-tasks-core repository.
22

3-
# Unreleased
3+
# 0.13.0
44

55
* Tasks:
66
* New task and helper functions:
7-
* Introduce `import_ome_zarr` task (\#557).
7+
* Introduce `import_ome_zarr` task (\#557, \#579).
88
* Introduce `get_single_image_ROI` and `get_image_grid_ROIs` (\#557).
99
* Introduce `detect_ome_ngff_type` (\#557).
10-
* Make `maximum_intensity_projection` task not depend on ROI tables (\#557).
10+
* Introduce `update_omero_channels` (\#579).
11+
* Make `maximum_intensity_projection` independent from ROI tables (\#557).
1112
* Make Cellpose task work when `input_ROI_table` is empty (\#566).
1213
* Fix bug of missing attributes in ROI-table Zarr group (\#573).
1314
* Dependencies:

fractal_tasks_core/__FRACTAL_MANIFEST__.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,12 @@
12451245
"type": "integer",
12461246
"description": "X shape of the ROI grid in `grid_ROI_table`."
12471247
},
1248+
"update_omero_metadata": {
1249+
"title": "Update Omero Metadata",
1250+
"default": true,
1251+
"type": "boolean",
1252+
"description": "Whether to update Omero-channels metadata, to make them Fractal-compatible."
1253+
},
12481254
"overwrite": {
12491255
"title": "Overwrite",
12501256
"default": false,

fractal_tasks_core/lib_channels.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
Helper functions to address channels via OME-NGFF/OMERO metadata.
1313
"""
1414
import logging
15+
from copy import deepcopy
16+
from typing import Any
1517
from typing import Optional
1618
from typing import Union
1719

@@ -339,3 +341,135 @@ def define_omero_channels(
339341
]
340342

341343
return new_channels_dictionaries
344+
345+
346+
def _get_new_unique_value(
347+
value: str,
348+
existing_values: list[str],
349+
) -> str:
350+
"""
351+
Produce a string value that is not present in a given list
352+
353+
Append `_1`, `_2`, ... to a given string, if needed, until finding a value
354+
which is not already present in `existing_values`.
355+
356+
Args:
357+
value: The first guess for the new value
358+
existing_values: The list of existing values
359+
360+
Returns:
361+
A string value which is not present in `existing_values`
362+
"""
363+
counter = 1
364+
new_value = value
365+
while new_value in existing_values:
366+
new_value = f"{value}-{counter}"
367+
counter += 1
368+
return new_value
369+
370+
371+
def update_omero_channels(
372+
old_channels: list[dict[str, Any]]
373+
) -> list[dict[str, Any]]:
374+
"""
375+
Make an existing list of Omero channels Fractal-compatible
376+
377+
The output channels all have keys `label`, `wavelength_id` and `color`;
378+
the `wavelength_id` values are unique across the channel list.
379+
380+
See https://ngff.openmicroscopy.org/0.4/index.html#omero-md for the
381+
definition of NGFF Omero metadata.
382+
383+
Args:
384+
old_channels: Existing list of Omero-channel dictionaries
385+
386+
Returns:
387+
New list of Fractal-compatible Omero-channel dictionaries
388+
"""
389+
new_channels = deepcopy(old_channels)
390+
existing_wavelength_ids: list[str] = []
391+
handled_channels = []
392+
393+
default_colors = ["00FFFF", "FF00FF", "FFFF00"]
394+
395+
def _get_next_color() -> str:
396+
try:
397+
return default_colors.pop(0)
398+
except IndexError:
399+
return "808080"
400+
401+
# Channels that contain the key "wavelength_id"
402+
for ind, old_channel in enumerate(old_channels):
403+
if "wavelength_id" in old_channel.keys():
404+
handled_channels.append(ind)
405+
existing_wavelength_ids.append(old_channel["wavelength_id"])
406+
new_channel = old_channel.copy()
407+
try:
408+
label = old_channel["label"]
409+
except KeyError:
410+
label = str(ind + 1)
411+
new_channel["label"] = label
412+
if "color" not in old_channel:
413+
new_channel["color"] = _get_next_color()
414+
new_channels[ind] = new_channel
415+
416+
# Channels that contain the key "label" but do not contain the key
417+
# "wavelength_id"
418+
for ind, old_channel in enumerate(old_channels):
419+
if ind in handled_channels:
420+
continue
421+
if "label" not in old_channel.keys():
422+
continue
423+
handled_channels.append(ind)
424+
label = old_channel["label"]
425+
wavelength_id = _get_new_unique_value(
426+
label,
427+
existing_wavelength_ids,
428+
)
429+
existing_wavelength_ids.append(wavelength_id)
430+
new_channel = old_channel.copy()
431+
new_channel["wavelength_id"] = wavelength_id
432+
if "color" not in old_channel:
433+
new_channel["color"] = _get_next_color()
434+
new_channels[ind] = new_channel
435+
436+
# Channels that do not contain the key "label" nor the key "wavelength_id"
437+
# NOTE: these channels must be treated last, as they have lower priority
438+
# w.r.t. existing "wavelength_id" or "label" values
439+
for ind, old_channel in enumerate(old_channels):
440+
if ind in handled_channels:
441+
continue
442+
label = str(ind + 1)
443+
wavelength_id = _get_new_unique_value(
444+
label,
445+
existing_wavelength_ids,
446+
)
447+
existing_wavelength_ids.append(wavelength_id)
448+
new_channel = old_channel.copy()
449+
new_channel["label"] = label
450+
new_channel["wavelength_id"] = wavelength_id
451+
if "color" not in old_channel:
452+
new_channel["color"] = _get_next_color()
453+
new_channels[ind] = new_channel
454+
455+
# Log old/new values of label, wavelength_id and color
456+
for ind, old_channel in enumerate(old_channels):
457+
label = old_channel.get("label")
458+
color = old_channel.get("color")
459+
wavelength_id = old_channel.get("wavelength_id")
460+
old_attributes = (
461+
f"Old attributes: {label=}, {wavelength_id=}, {color=}"
462+
)
463+
label = new_channels[ind]["label"]
464+
wavelength_id = new_channels[ind]["wavelength_id"]
465+
color = new_channels[ind]["color"]
466+
new_attributes = (
467+
f"New attributes: {label=}, {wavelength_id=}, {color=}"
468+
)
469+
logging.info(
470+
"Omero channel update:\n"
471+
f" {old_attributes}\n"
472+
f" {new_attributes}"
473+
)
474+
475+
return new_channels

fractal_tasks_core/tasks/import_ome_zarr.py

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import zarr
2222
from pydantic.decorator import validate_arguments
2323

24+
from fractal_tasks_core.lib_channels import update_omero_channels
2425
from fractal_tasks_core.lib_ngff import detect_ome_ngff_type
2526
from fractal_tasks_core.lib_ngff import NgffImageMeta
2627
from fractal_tasks_core.lib_regions_of_interest import get_image_grid_ROIs
@@ -34,6 +35,8 @@ def _process_single_image(
3435
image_path: str,
3536
add_image_ROI_table: bool,
3637
add_grid_ROI_table: bool,
38+
update_omero_metadata: bool,
39+
*,
3740
grid_YX_shape: Optional[tuple[int, int]] = None,
3841
overwrite: bool = False,
3942
) -> None:
@@ -43,14 +46,17 @@ def _process_single_image(
4346
This task:
4447
4548
1. Validates OME-NGFF image metadata, via `NgffImageMeta`;
46-
2. Optionally generates and writes two ROI tables.
49+
2. Optionally generates and writes two ROI tables;
50+
3. Optionally update OME-NGFF omero metadata.
4751
4852
Args:
4953
image_path: Absolute path to the image Zarr group.
5054
add_image_ROI_table: Whether to add a `image_ROI_table` table
5155
(argument propagated from `import_ome_zarr`).
5256
add_grid_ROI_table: Whether to add a `grid_ROI_table` table (argument
5357
propagated from `import_ome_zarr`).
58+
update_omero_metadata: Whether to update Omero-channels metadata
59+
(argument propagated from `import_ome_zarr`).
5460
grid_YX_shape: YX shape of the ROI grid (it must be not `None`, if
5561
`add_grid_ROI_table=True`.
5662
"""
@@ -100,6 +106,51 @@ def _process_single_image(
100106
logger=logger,
101107
)
102108

109+
# Update Omero-channels metadata
110+
if update_omero_metadata:
111+
# Extract number of channels from zarr array
112+
try:
113+
channel_axis_index = image_meta.axes_names.index("c")
114+
except ValueError:
115+
logger.error(f"Existing axes: {image_meta.axes_names}")
116+
msg = (
117+
"OME-Zarrs with no channel axis are not currently "
118+
"supported in fractal-tasks-core. Upcoming flexibility "
119+
"improvements are tracked in https://github.com/"
120+
"fractal-analytics-platform/fractal-tasks-core/issues/150."
121+
)
122+
logger.error(msg)
123+
raise NotImplementedError(msg)
124+
logger.info(f"Existing axes: {image_meta.axes_names}")
125+
logger.info(f"Channel-axis index: {channel_axis_index}")
126+
num_channels_zarr = array.shape[channel_axis_index]
127+
logger.info(
128+
f"{num_channels_zarr} channel(s) found in Zarr array "
129+
f"at {image_path}/{dataset_subpath}"
130+
)
131+
# Update or create omero channels metadata
132+
old_omero = image_group.attrs.get("omero", {})
133+
old_channels = old_omero.get("channels", [])
134+
if len(old_channels) > 0:
135+
logger.info(
136+
f"{len(old_channels)} channel(s) found in NGFF omero metadata"
137+
)
138+
if len(old_channels) != num_channels_zarr:
139+
error_msg = (
140+
"Channels-number mismatch: Number of channels in the "
141+
f"zarr array ({num_channels_zarr}) differs from number "
142+
"of channels listed in NGFF omero metadata "
143+
f"({len(old_channels)})."
144+
)
145+
logging.error(error_msg)
146+
raise ValueError(error_msg)
147+
else:
148+
old_channels = [{} for ind in range(num_channels_zarr)]
149+
new_channels = update_omero_channels(old_channels)
150+
new_omero = old_omero.copy()
151+
new_omero["channels"] = new_channels
152+
image_group.attrs.update(omero=new_omero)
153+
103154

104155
@validate_arguments
105156
def import_ome_zarr(
@@ -112,6 +163,7 @@ def import_ome_zarr(
112163
add_grid_ROI_table: bool = True,
113164
grid_y_shape: int = 2,
114165
grid_x_shape: int = 2,
166+
update_omero_metadata: bool = True,
115167
overwrite: bool = False,
116168
) -> dict[str, Any]:
117169
"""
@@ -141,6 +193,8 @@ def import_ome_zarr(
141193
image, with the image split into a rectangular grid of ROIs.
142194
grid_y_shape: Y shape of the ROI grid in `grid_ROI_table`.
143195
grid_x_shape: X shape of the ROI grid in `grid_ROI_table`.
196+
update_omero_metadata: Whether to update Omero-channels metadata, to
197+
make them Fractal-compatible.
144198
overwrite: Whether new ROI tables (added when `add_image_ROI_table`
145199
and/or `add_grid_ROI_table` are `True`) can overwite existing ones.
146200
"""
@@ -174,14 +228,15 @@ def import_ome_zarr(
174228
f"{zarr_path}/{well_path}/{image_path}",
175229
add_image_ROI_table,
176230
add_grid_ROI_table,
177-
grid_YX_shape,
231+
update_omero_metadata,
232+
grid_YX_shape=grid_YX_shape,
178233
overwrite=overwrite,
179234
)
180235
elif ngff_type == "well":
181236
zarrurls["well"].append(zarr_name)
182237
logger.warning(
183238
"Only OME-Zarr for plates are fully supported in Fractal; "
184-
"e.g. the current one ({ngff_type=}) cannot be "
239+
f"e.g. the current one ({ngff_type=}) cannot be "
185240
"processed via the `maximum_intensity_projection` task."
186241
)
187242
for image in root_group.attrs["well"]["images"]:
@@ -191,21 +246,23 @@ def import_ome_zarr(
191246
f"{zarr_path}/{image_path}",
192247
add_image_ROI_table,
193248
add_grid_ROI_table,
194-
grid_YX_shape,
249+
update_omero_metadata,
250+
grid_YX_shape=grid_YX_shape,
195251
overwrite=overwrite,
196252
)
197253
elif ngff_type == "image":
198254
zarrurls["image"].append(zarr_name)
199255
logger.warning(
200256
"Only OME-Zarr for plates are fully supported in Fractal; "
201-
"e.g. the current one ({ngff_type=}) cannot be "
257+
f"e.g. the current one ({ngff_type=}) cannot be "
202258
"processed via the `maximum_intensity_projection` task."
203259
)
204260
_process_single_image(
205261
zarr_path,
206262
add_image_ROI_table,
207263
add_grid_ROI_table,
208-
grid_YX_shape,
264+
update_omero_metadata,
265+
grid_YX_shape=grid_YX_shape,
209266
overwrite=overwrite,
210267
)
211268

tests/_zenodo_ome_zarrs.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
Zurich.
1313
"""
1414
import json
15+
import logging
1516
import shutil
1617
from pathlib import Path
1718
from typing import Any
1819

1920
import dask.array as da
21+
import zarr
2022
from devtools import debug
2123

2224

@@ -25,6 +27,7 @@ def prepare_3D_zarr(
2527
zenodo_zarr: list[str],
2628
zenodo_zarr_metadata: list[dict[str, Any]],
2729
remove_tables: bool = False,
30+
remove_omero: bool = False,
2831
):
2932
zenodo_zarr_3D, zenodo_zarr_2D = zenodo_zarr[:]
3033
metadata_3D, metadata_2D = zenodo_zarr_metadata[:]
@@ -35,6 +38,16 @@ def prepare_3D_zarr(
3538
shutil.rmtree(
3639
str(Path(zarr_path) / Path(zenodo_zarr_3D).name / "B/03/0/tables")
3740
)
41+
logging.warning("Removing ROI tables attributes 3D Zenodo zarr")
42+
if remove_omero:
43+
image_group = zarr.open_group(
44+
str(Path(zarr_path) / Path(zenodo_zarr_3D).name / "B/03/0"),
45+
mode="r+",
46+
)
47+
image_attrs = image_group.attrs.asdict()
48+
image_attrs.pop("omero")
49+
image_group.attrs.put(image_attrs)
50+
logging.warning("Removing omero attributes from 3D Zenodo zarr")
3851
metadata = metadata_3D.copy()
3952
return metadata
4053

0 commit comments

Comments
 (0)