|
1 | 1 | import abc
|
2 | 2 | import json
|
3 | 3 | import os
|
| 4 | +import re |
4 | 5 | import warnings
|
5 | 6 | import zipfile
|
6 | 7 | from pathlib import Path
|
7 |
| -from typing import Any, Iterator, List, Optional |
| 8 | +from typing import Any, Generic, Iterator, List, Optional, TypeVar |
8 | 9 |
|
9 | 10 | from annofabapi.dataclass.annotation import FullAnnotation, SimpleAnnotation
|
10 | 11 | from annofabapi.exceptions import AnnotationOuterFileNotFoundError
|
@@ -292,6 +293,32 @@ def open_outer_file(self, data_uri: str):
|
292 | 293 | raise AnnotationOuterFileNotFoundError(str(outer_file_path))
|
293 | 294 |
|
294 | 295 |
|
| 296 | +S = TypeVar("S", bound=SimpleAnnotationParser) |
| 297 | + |
| 298 | + |
| 299 | +class SimpleAnnotationParserGroupByTask(Generic[S]): |
| 300 | + """ |
| 301 | + Simple Annotationのparserをタスクごとにまとめたもの。 |
| 302 | +
|
| 303 | +
|
| 304 | + Args: |
| 305 | + task_id: タスクID |
| 306 | + parser_list: タスク配下のJSONに関するパーサのList |
| 307 | +
|
| 308 | + """ |
| 309 | + def __init__(self, task_id: str, parser_list: List[S]): |
| 310 | + self.__task_id = task_id |
| 311 | + self.__parser_list = parser_list |
| 312 | + |
| 313 | + @property |
| 314 | + def task_id(self) -> str: |
| 315 | + return self.__task_id |
| 316 | + |
| 317 | + @property |
| 318 | + def parser_list(self) -> List[S]: |
| 319 | + return self.__parser_list |
| 320 | + |
| 321 | + |
295 | 322 | def __parse_annotation_dir(annotaion_dir_path: Path, clazz) -> Iterator[Any]:
|
296 | 323 | for task_dir in annotaion_dir_path.iterdir():
|
297 | 324 | if not task_dir.is_dir():
|
@@ -333,6 +360,68 @@ def lazy_parse_full_annotation_dir(annotaion_dir_path: Path) -> Iterator[SimpleA
|
333 | 360 | return __parse_annotation_dir(annotaion_dir_path, FullAnnotationDirParser)
|
334 | 361 |
|
335 | 362 |
|
| 363 | +def lazy_parse_simple_annotation_zip_by_task( |
| 364 | + zip_file_path: Path) -> Iterator[SimpleAnnotationParserGroupByTask[SimpleAnnotationZipParser]]: |
| 365 | + """ |
| 366 | + Simpleアノテーションzipファイル内を探索し、タスクごとに各annotationをparse可能なオブジェクトの列を返します。 |
| 367 | +
|
| 368 | + Args: |
| 369 | + zip_file_path: annofabからダウンロードしたsimple annotationのzipファイルへのパス |
| 370 | +
|
| 371 | + Yields: |
| 372 | + 対象タスク内の、annotationの遅延Parseが可能なインスタンス列 |
| 373 | + """ |
| 374 | + def is_input_data_info_in_task(zip_info: zipfile.ZipInfo, task_id: str) -> bool: |
| 375 | + """ |
| 376 | + 指定されたtask_id配下の入力データJSONかどうか |
| 377 | + """ |
| 378 | + paths = [p for p in zip_info.filename.split("/") if len(p) != 0] |
| 379 | + if len(paths) != 2: |
| 380 | + return False |
| 381 | + if paths[0] != task_id: |
| 382 | + return False |
| 383 | + if not paths[1].endswith(".json"): |
| 384 | + return False |
| 385 | + |
| 386 | + return True |
| 387 | + |
| 388 | + with zipfile.ZipFile(zip_file_path, mode="r") as file: |
| 389 | + info_list: List[zipfile.ZipInfo] = file.infolist() |
| 390 | + # 1階層目のディレクトリをtask_idとみなす |
| 391 | + task_info_list = [e for e in info_list if e.is_dir() and len(re.findall("/", e.filename)) == 1] |
| 392 | + |
| 393 | + for task_info in task_info_list: |
| 394 | + task_id = task_info.filename.split("/")[0] |
| 395 | + parser_list = [ |
| 396 | + SimpleAnnotationZipParser(file, e.filename) |
| 397 | + for e in info_list |
| 398 | + if is_input_data_info_in_task(e, task_id) |
| 399 | + ] |
| 400 | + |
| 401 | + yield SimpleAnnotationParserGroupByTask(task_id, parser_list) |
| 402 | + |
| 403 | + |
| 404 | +def lazy_parse_simple_annotation_dir_by_task( |
| 405 | + annotaion_dir_path: Path) -> Iterator[SimpleAnnotationParserGroupByTask[SimpleAnnotationDirParser]]: |
| 406 | + """ |
| 407 | + Simpleアノテーションzipを展開したディレクトリ内を探索し、タスクごとに各annotationをparse可能なオブジェクトの列を返します。 |
| 408 | +
|
| 409 | + Args: |
| 410 | + annotaion_dir_path: annofabからダウンロードしたsimple annotationのzipファイルを展開したディレクトリ |
| 411 | +
|
| 412 | + Yields: |
| 413 | + 対象タスク内の、annotationの遅延Parseが可能なインスタンス列 |
| 414 | + """ |
| 415 | + |
| 416 | + for task_dir in annotaion_dir_path.iterdir(): |
| 417 | + if not task_dir.is_dir(): |
| 418 | + continue |
| 419 | + |
| 420 | + task_id = task_dir.name |
| 421 | + parser_list = [SimpleAnnotationDirParser(e) for e in task_dir.iterdir() if e.is_file() and e.suffix == ".json"] |
| 422 | + yield SimpleAnnotationParserGroupByTask(task_id, parser_list) |
| 423 | + |
| 424 | + |
336 | 425 | def __parse_annotation_zip(zip_file_path: Path, clazz) -> Iterator[Any]:
|
337 | 426 | def lazy_parser(zip_file: zipfile.ZipFile, info: zipfile.ZipInfo) -> Optional[Any]:
|
338 | 427 | paths = [p for p in info.filename.split("/") if len(p) != 0]
|
|
0 commit comments