Skip to content

Commit 0b619c8

Browse files
authored
Streaming export for YOLO and COCO formats (#9084)
1 parent d386ffa commit 0b619c8

File tree

5 files changed

+67
-20
lines changed

5 files changed

+67
-20
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
### Changed
2+
3+
- Optimized memory usage on export with YOLO and COCO formats for tasks
4+
(<https://github.com/cvat-ai/cvat/pull/9084>)

cvat/apps/dataset_manager/bindings.py

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1661,11 +1661,8 @@ def __init__(
16611661
self._user = self._load_user_info(instance_meta) if dimension == DimensionType.DIM_3D else {}
16621662
self._dimension = dimension
16631663
self._format_type = format_type
1664-
1665-
is_video = instance_meta['mode'] == 'interpolation'
1666-
ext = ''
1667-
if is_video:
1668-
ext = TaskFrameProvider.VIDEO_FRAME_EXT
1664+
self._instance_data = instance_data
1665+
self._include_images = include_images
16691666

16701667
if dimension == DimensionType.DIM_3D or include_images:
16711668
if isinstance(instance_data, TaskData):
@@ -1679,46 +1676,64 @@ def __init__(
16791676
{0: MediaSource(db_task)}
16801677
)
16811678

1682-
dm_items: list[dm.DatasetItem] = []
1683-
for frame_data in instance_data.group_by_frame(include_empty=True):
1679+
def __iter__(self):
1680+
instance_meta = self._instance_data.meta[self._instance_data.META_FIELD]
1681+
is_video = instance_meta['mode'] == 'interpolation'
1682+
ext = ''
1683+
if is_video:
1684+
ext = TaskFrameProvider.VIDEO_FRAME_EXT
1685+
1686+
for frame_data in self._instance_data.group_by_frame(include_empty=True):
16841687
dm_media_args = {
16851688
'path': frame_data.name + ext,
16861689
'ext': ext or frame_data.name.rsplit(osp.extsep, maxsplit=1)[1],
16871690
}
1688-
if dimension == DimensionType.DIM_3D:
1691+
if self._dimension == DimensionType.DIM_3D:
16891692
dm_media: dm.PointCloud = self._media_provider.get_media_for_frame(
16901693
0, frame_data.id, **dm_media_args
16911694
)
16921695

1693-
if not include_images:
1696+
if not self._include_images:
16941697
dm_media_args["extra_images"] = [
16951698
dm.Image.from_file(path=osp.basename(image.path))
16961699
for image in dm_media.extra_images
16971700
]
16981701
dm_media = dm.PointCloud.from_file(**dm_media_args)
16991702
else:
17001703
dm_media_args['size'] = (frame_data.height, frame_data.width)
1701-
if include_images:
1704+
if self._include_images:
17021705
dm_media: dm.Image = self._media_provider.get_media_for_frame(
17031706
0, frame_data.idx, **dm_media_args
17041707
)
17051708
else:
17061709
dm_media = dm.Image.from_file(**dm_media_args)
17071710

1711+
# do not keep parsed lazy list data after this iteration
1712+
frame_data = frame_data._replace(
1713+
labeled_shapes=[
1714+
(
1715+
shape._replace(points=shape.points.lazy_copy())
1716+
if isinstance(shape.points, LazyList) and not shape.points.is_parsed
1717+
else shape
1718+
)
1719+
for shape in frame_data.labeled_shapes
1720+
]
1721+
)
1722+
17081723
dm_anno = self._read_cvat_anno(frame_data, instance_meta['labels'])
17091724

17101725
dm_attributes = {'frame': frame_data.frame}
17111726

1712-
if dimension == DimensionType.DIM_2D:
1727+
if self._dimension == DimensionType.DIM_2D:
17131728
dm_item = dm.DatasetItem(
17141729
id=osp.splitext(frame_data.name)[0],
17151730
subset=frame_data.subset,
17161731
annotations=dm_anno,
17171732
media=dm_media,
17181733
attributes=dm_attributes,
17191734
)
1720-
elif dimension == DimensionType.DIM_3D:
1721-
if format_type == "sly_pointcloud":
1735+
elif self._dimension == DimensionType.DIM_3D:
1736+
if self._format_type == "sly_pointcloud":
17221737
dm_attributes["name"] = self._user["name"]
17231738
dm_attributes["createdAt"] = self._user["createdAt"]
17241739
dm_attributes["updatedAt"] = self._user["updatedAt"]
@@ -1735,9 +1750,10 @@ def __init__(
17351750
attributes=dm_attributes,
17361751
)
17371752

1738-
dm_items.append(dm_item)
1753+
yield dm_item
17391754

1740-
self._items = dm_items
1755+
def __len__(self):
1756+
return len(self._instance_data)
17411757

17421758
def _read_cvat_anno(self, cvat_frame_anno: CommonData.Frame, labels: list):
17431759
categories = self.categories()
@@ -1751,6 +1767,11 @@ def map_label(name, parent=''): return label_cat.find(name, parent)[0]
17511767
return self.convert_annotations(cvat_frame_anno,
17521768
label_attrs, map_label, self._format_type, self._dimension)
17531769

1770+
@property
1771+
def is_stream(self) -> bool:
1772+
return True
1773+
1774+
17541775
class CVATProjectDataExtractor(dm.DatasetBase, CVATDataExtractorMixin):
17551776
def __init__(
17561777
self,

cvat/apps/dataset_manager/formats/coco.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
import zipfile
77

88
from datumaro.components.annotation import AnnotationType
9-
from datumaro.components.dataset import Dataset
9+
from datumaro.components.dataset import Dataset, StreamDataset
1010
from datumaro.plugins.data_formats.coco.importer import CocoImporter
1111

1212
from cvat.apps.dataset_manager.bindings import (
1313
GetCVATDataExtractor,
1414
NoMediaInAnnotationFileError,
15+
ProjectData,
1516
detect_dataset,
1617
import_dm_annotations,
1718
)
@@ -23,7 +24,8 @@
2324
@exporter(name="COCO", ext="ZIP", version="1.0")
2425
def _export(dst_file, temp_dir, instance_data, save_images=False):
2526
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
26-
dataset = Dataset.from_extractors(extractor, env=dm_env)
27+
dataset_cls = Dataset if isinstance(instance_data, ProjectData) else StreamDataset
28+
dataset = dataset_cls.from_extractors(extractor, env=dm_env)
2729
dataset.export(temp_dir, "coco_instances", save_media=save_images, merge_images=False)
2830

2931
make_zip_archive(temp_dir, dst_file)
@@ -50,7 +52,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs
5052
@exporter(name="COCO Keypoints", ext="ZIP", version="1.0")
5153
def _export(dst_file, temp_dir, instance_data, save_images=False):
5254
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
53-
dataset = Dataset.from_extractors(extractor, env=dm_env)
55+
dataset_cls = Dataset if isinstance(instance_data, ProjectData) else StreamDataset
56+
dataset = dataset_cls.from_extractors(extractor, env=dm_env)
5457
dataset.export(
5558
temp_dir, "coco_person_keypoints", save_media=save_images, merge_images=False
5659
)

cvat/apps/dataset_manager/formats/yolo.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import Callable, Optional
88

99
from datumaro.components.annotation import AnnotationType
10+
from datumaro.components.dataset import StreamDataset
1011
from datumaro.components.dataset_base import DatasetItem
1112
from datumaro.components.project import Dataset
1213
from pyunpack import Archive
@@ -36,7 +37,8 @@ def _export_common(
3637
**kwargs,
3738
):
3839
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
39-
dataset = Dataset.from_extractors(extractor, env=dm_env)
40+
dataset_cls = Dataset if isinstance(instance_data, ProjectData) else StreamDataset
41+
dataset = dataset_cls.from_extractors(extractor, env=dm_env)
4042
dataset.export(temp_dir, format_name, save_media=save_images, **kwargs)
4143

4244
make_zip_archive(temp_dir, dst_file)
@@ -109,7 +111,8 @@ def _export_yolo_ultralytics_oriented_boxes(*args, **kwargs):
109111
@exporter(name="Ultralytics YOLO Segmentation", ext="ZIP", version="1.0")
110112
def _export_yolo_ultralytics_segmentation(dst_file, temp_dir, instance_data, *, save_images=False):
111113
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
112-
dataset = Dataset.from_extractors(extractor, env=dm_env)
114+
dataset_cls = Dataset if isinstance(instance_data, ProjectData) else StreamDataset
115+
dataset = dataset_cls.from_extractors(extractor, env=dm_env)
113116
dataset = dataset.transform("masks_to_polygons")
114117
dataset.export(temp_dir, "yolo_ultralytics_segmentation", save_media=save_images)
115118

cvat/apps/engine/lazy_list.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,3 +268,19 @@ def __setstate__(self, state):
268268
self._parsed = state["parsed"]
269269
if self._parsed:
270270
self.extend(state["parsed_elements"])
271+
272+
def lazy_copy(self) -> list[T]:
273+
"""
274+
Makes a copy without parsing elements.
275+
Only works if elements have not been parsed yet.
276+
"""
277+
assert not self._parsed
278+
return LazyList(
279+
string=self._string,
280+
separator=self._separator,
281+
converter=self._converter,
282+
)
283+
284+
@property
285+
def is_parsed(self):
286+
return self._parsed

0 commit comments

Comments
 (0)