Skip to content

Commit 2300709

Browse files
authored
[parser.py] SimpleAnnotationParserByTaskにメソッドを追加 (#274)
* [parser] タスクごとのparserを修正 * [parser] タスクごとのparserを修正 * [parser] タスクごとのparserを修正 * [parser] タスクごとのparserを修正 * version up
1 parent 6f15650 commit 2300709

File tree

5 files changed

+181
-28
lines changed

5 files changed

+181
-28
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ format:
1010
poetry run black annofabapi tests
1111

1212
lint:
13-
poetry run mypy annofabapi tests/create_test_project.py
13+
poetry run mypy annofabapi tests
1414
poetry run flake8 annofabapi tests/create_test_project.py
1515
poetry run pylint annofabapi tests/create_test_project.py
1616

annofabapi/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.42.0"
1+
__version__ = "0.42.1"

annofabapi/parser.py

+117-11
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ def json_file_path(self) -> str:
5151
@property
5252
def input_data_id(self) -> str:
5353
"""
54-
JSONファイルから決まる、input_data_id.
55-
Simple(v2)版用です。
54+
JSONファイルから決まるinput_data_id.
5655
"""
5756
return self.__input_data_id
5857

@@ -201,7 +200,7 @@ class SimpleAnnotationDirParser(SimpleAnnotationParser):
201200
Examples:
202201
JSONファイルをパースする::
203202
204-
p = SimpleAnnotationDirParser(Path("task_id/input_data_id.json"))
203+
p = SimpleAnnotationDirParser(Path("annotation/task_id/input_data_id.json"))
205204
annotation = p.parse()
206205
207206
"""
@@ -311,28 +310,134 @@ def __init__(self, task_id: str):
311310
def task_id(self) -> str:
312311
return self.__task_id
313312

313+
@property
314+
@abc.abstractmethod
315+
def json_file_path_list(self) -> List[str]:
316+
"""
317+
パースするJSONファイルパスのリスト
318+
"""
319+
320+
@abc.abstractmethod
321+
def get_parser(self, json_file_path: str) -> SimpleAnnotationParser:
322+
"""
323+
JSONファイルパスから、Simple Annotation parserを取得する。
324+
325+
Args:
326+
json_file_path: パースするJSONファイルのパス。``json_file_path_list`` に含まれる値を指定すること。
327+
328+
Returns:
329+
Simple Annotation parser
330+
331+
Raises:
332+
ValueError: ``json_file_path`` の値が ``json_file_path_list`` に含まれていないとき
333+
334+
"""
335+
314336
@abc.abstractmethod
315337
def lazy_parse(self) -> Iterator[SimpleAnnotationParser]:
316338
pass
317339

318340

319341
class SimpleAnnotationZipParserByTask(SimpleAnnotationParserByTask):
320-
def __init__(self, zip_file: zipfile.ZipFile, task_id: str, json_path_list: List[str]):
342+
"""
343+
Simple Annotation zipのparserをタスクごとにまとめたもの。
344+
345+
Args:
346+
zip_file: Simple Annotation zipのzipfileオブジェクト
347+
task_id: タスクID
348+
json_path_list: パースするJSONパスのリスト。
349+
Noneの場合は、``zipfile.ZipFile.infolist()`` 関数を呼び出して、JSONパスのリストを生成します。
350+
351+
Examples:
352+
JSONファイルをパースする::
353+
354+
with zipfile.ZipFile("simple-annotation.zip", "r") as zip_file:
355+
p = SimpleAnnotationZipParserByTask(zip_file, "task1")
356+
357+
"""
358+
359+
def __get_json_file_path_list(self, task_id: str) -> List[str]:
360+
"""
361+
task_idとJSONパスリストの辞書を取得する。
362+
"""
363+
364+
def _match_task_id_and_contain_input_data_json(zip_info: zipfile.ZipInfo) -> bool:
365+
"""
366+
task_idディレクトリ配下の入力データJSONかどうか
367+
"""
368+
paths = [p for p in zip_info.filename.split("/") if len(p) != 0]
369+
if len(paths) != 2:
370+
return False
371+
if paths[0] != task_id:
372+
return False
373+
if not paths[1].endswith(".json"):
374+
return False
375+
return True
376+
377+
return [
378+
zip_info.filename
379+
for zip_info in self.__zip_file.infolist()
380+
if _match_task_id_and_contain_input_data_json(zip_info)
381+
]
382+
383+
def __init__(self, zip_file: zipfile.ZipFile, task_id: str, json_path_list: Optional[List[str]] = None):
321384
self.__zip_file = zip_file
322-
self.__json_path_list = json_path_list
385+
if json_path_list is not None:
386+
self.__json_path_list = json_path_list
387+
else:
388+
self.__json_path_list = self.__get_json_file_path_list(task_id)
323389
super().__init__(task_id)
324390

325391
def lazy_parse(self) -> Iterator[SimpleAnnotationZipParser]:
326392
return (SimpleAnnotationZipParser(self.__zip_file, e) for e in self.__json_path_list)
327393

394+
@property
395+
def json_file_path_list(self) -> List[str]:
396+
return self.__json_path_list
397+
398+
def get_parser(self, json_file_path: str) -> SimpleAnnotationParser:
399+
if json_file_path in self.__json_path_list:
400+
return SimpleAnnotationZipParser(self.__zip_file, json_file_path)
401+
else:
402+
raise ValueError(f"json_file_path '{json_file_path}' は `json_file_path_list` に含まれていません。")
403+
328404

329405
class SimpleAnnotationDirParserByTask(SimpleAnnotationParserByTask):
330-
def __init__(self, task_id: str, json_path_list: List[Path]):
331-
self.__json_path_list = json_path_list
406+
"""
407+
Simple Annotation zipを展開したディレクトリのparserをタスクごとにまとめたもの。
408+
409+
Args:
410+
task_id: Simple Annotation zipのzipfileオブジェクト
411+
task_id: タスクID
412+
json_path_list: タスク配下のJSONパスのリスト。パスにはtask_idを含む。
413+
414+
Examples:
415+
JSONファイルをパースする::
416+
417+
with zipfile.ZipFile("simple-annotation.zip", "r") as zip_file:
418+
p = SimpleAnnotationZipParserByTask(zip_file, "task1", ["task1/input1.json","task1/input2.json"])
419+
420+
"""
421+
422+
def __init__(self, task_dir_path: Path):
423+
self.__task_dir_path = task_dir_path
424+
task_id = task_dir_path.name
332425
super().__init__(task_id)
333426

334427
def lazy_parse(self) -> Iterator[SimpleAnnotationDirParser]:
335-
return (SimpleAnnotationDirParser(e) for e in self.__json_path_list)
428+
return (
429+
SimpleAnnotationDirParser(e) for e in self.__task_dir_path.iterdir() if e.is_file() and e.suffix == ".json"
430+
)
431+
432+
@property
433+
def json_file_path_list(self) -> List[str]:
434+
return [str(e) for e in self.__task_dir_path.iterdir() if e.is_file() and e.suffix == ".json"]
435+
436+
def get_parser(self, json_file_path: str) -> SimpleAnnotationParser:
437+
if json_file_path in self.json_file_path_list:
438+
return SimpleAnnotationDirParser(Path(json_file_path))
439+
else:
440+
raise ValueError(f"json_file_path '{json_file_path}' は `json_file_path_list` に含まれていません。")
336441

337442

338443
def __parse_annotation_dir(annotaion_dir_path: Path, clazz) -> Iterator[Any]:
@@ -405,6 +510,9 @@ def is_input_data_json(zip_info: zipfile.ZipInfo) -> bool:
405510
return True
406511

407512
def create_task_dict(arg_info_list: List[zipfile.ZipInfo]) -> Dict[str, List[str]]:
513+
"""
514+
task_idとJSONパスリストの辞書を取得する。
515+
"""
408516
task_dict: Dict[str, List[str]] = {}
409517
sorted_path_list = sorted([e.filename for e in arg_info_list if is_input_data_json(e)])
410518

@@ -447,9 +555,7 @@ def lazy_parse_simple_annotation_dir_by_task(annotaion_dir_path: Path) -> Iterat
447555
if not task_dir.is_dir():
448556
continue
449557

450-
task_id = task_dir.name
451-
json_path_list = [e for e in task_dir.iterdir() if e.is_file() and e.suffix == ".json"]
452-
yield SimpleAnnotationDirParserByTask(task_id=task_id, json_path_list=json_path_list)
558+
yield SimpleAnnotationDirParserByTask(task_dir)
453559

454560

455561
def __parse_annotation_zip(zip_file_path: Path, clazz) -> Iterator[Any]:

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "annofabapi"
3-
version = "0.42.0"
3+
version = "0.42.1"
44
description = "Python Clinet Library of AnnoFab WebAPI (https://annofab.com/docs/api/)"
55
authors = ["yuji38kwmt"]
66
license = "MIT"

tests/test_local_parser.py

+61-14
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
FullAnnotationDirParser,
1515
FullAnnotationZipParser,
1616
SimpleAnnotationDirParser,
17+
SimpleAnnotationDirParserByTask,
1718
SimpleAnnotationZipParser,
19+
SimpleAnnotationZipParserByTask,
1820
)
1921

2022
# プロジェクトトップに移動する
@@ -25,7 +27,61 @@
2527
test_dir = Path("./tests/data")
2628

2729

28-
class TestSimpleAnnotationV2:
30+
class TestSimpleAnnotationParser:
31+
def test_SimpleAnnotationZipParser(self):
32+
zip_path = Path(test_dir / "simple-annotation.zip")
33+
with zipfile.ZipFile(zip_path) as zip_file:
34+
parser = SimpleAnnotationZipParser(zip_file, "sample_1/c86205d1-bdd4-4110-ae46-194e661d622b.json")
35+
assert parser.task_id == "sample_1"
36+
assert parser.input_data_id == "c86205d1-bdd4-4110-ae46-194e661d622b"
37+
assert parser.json_file_path == "sample_1/c86205d1-bdd4-4110-ae46-194e661d622b.json"
38+
with pytest.raises(AnnotationOuterFileNotFoundError):
39+
parser.open_outer_file("foo")
40+
41+
def test_SimpleAnnotationDirParser(self):
42+
dir_path = Path(test_dir / "simple-annotation")
43+
44+
parser = SimpleAnnotationDirParser(dir_path / "sample_1/c86205d1-bdd4-4110-ae46-194e661d622b.json")
45+
assert parser.task_id == "sample_1"
46+
assert parser.input_data_id == "c86205d1-bdd4-4110-ae46-194e661d622b"
47+
assert parser.json_file_path == str(dir_path / "sample_1/c86205d1-bdd4-4110-ae46-194e661d622b.json")
48+
with pytest.raises(AnnotationOuterFileNotFoundError):
49+
parser.open_outer_file("foo")
50+
51+
52+
class TestSimpleAnnotationParserByTask:
53+
def test_SimpleAnnotationDirParserByTask(self):
54+
annotation_dir = test_dir / "simple-annotation"
55+
task_parser = SimpleAnnotationDirParserByTask(annotation_dir / "sample_1")
56+
assert task_parser.task_id == "sample_1"
57+
json_file_path_list = task_parser.json_file_path_list
58+
assert str(annotation_dir / "sample_1/c6e1c2ec-6c7c-41c6-9639-4244c2ed2839.json") in json_file_path_list
59+
assert str(annotation_dir / "sample_1/c86205d1-bdd4-4110-ae46-194e661d622b.json") in json_file_path_list
60+
61+
input_data_parser = task_parser.get_parser(
62+
str(annotation_dir / "sample_1/c6e1c2ec-6c7c-41c6-9639-4244c2ed2839.json")
63+
)
64+
assert input_data_parser.input_data_id == "c6e1c2ec-6c7c-41c6-9639-4244c2ed2839"
65+
assert input_data_parser.json_file_path == str(
66+
test_dir / "simple-annotation/sample_1/c6e1c2ec-6c7c-41c6-9639-4244c2ed2839.json"
67+
)
68+
69+
70+
def test_SimpleAnnotationZipParserByTask(self):
71+
with zipfile.ZipFile(test_dir / "simple-annotation.zip") as zip_file:
72+
task_parser = SimpleAnnotationZipParserByTask(zip_file, "sample_1")
73+
74+
assert task_parser.task_id == "sample_1"
75+
json_file_path_list = task_parser.json_file_path_list
76+
assert "sample_1/c6e1c2ec-6c7c-41c6-9639-4244c2ed2839.json" in json_file_path_list
77+
assert "sample_1/c86205d1-bdd4-4110-ae46-194e661d622b.json" in json_file_path_list
78+
79+
input_data_parser = task_parser.get_parser("sample_1/c6e1c2ec-6c7c-41c6-9639-4244c2ed2839.json")
80+
assert input_data_parser.input_data_id == "c6e1c2ec-6c7c-41c6-9639-4244c2ed2839"
81+
assert input_data_parser.json_file_path == str("sample_1/c6e1c2ec-6c7c-41c6-9639-4244c2ed2839.json")
82+
83+
84+
class TestSimpleAnnotation:
2985
def test_simple_annotation_zip(self):
3086
zip_path = Path(test_dir / "simple-annotation.zip")
3187
iter_parser = annofabapi.parser.lazy_parse_simple_annotation_zip(zip_path)
@@ -42,13 +98,6 @@ def test_simple_annotation_zip(self):
4298

4399
assert index == 4
44100

45-
with zipfile.ZipFile(zip_path) as zip_file:
46-
parser = SimpleAnnotationZipParser(zip_file, "sample_1/c86205d1-bdd4-4110-ae46-194e661d622b.json")
47-
assert parser.task_id == "sample_1"
48-
assert parser.input_data_id == "c86205d1-bdd4-4110-ae46-194e661d622b"
49-
with pytest.raises(AnnotationOuterFileNotFoundError):
50-
parser.open_outer_file("foo")
51-
52101
def test_simple_annotation_dir(self):
53102
dir_path = Path(test_dir / "simple-annotation")
54103
iter_parser = annofabapi.parser.lazy_parse_simple_annotation_dir(dir_path)
@@ -61,12 +110,6 @@ def test_simple_annotation_dir(self):
61110

62111
assert index == 4
63112

64-
parser = SimpleAnnotationDirParser(dir_path / "sample_1/c86205d1-bdd4-4110-ae46-194e661d622b.json")
65-
assert parser.task_id == "sample_1"
66-
assert parser.input_data_id == "c86205d1-bdd4-4110-ae46-194e661d622b"
67-
with pytest.raises(AnnotationOuterFileNotFoundError):
68-
parser.open_outer_file("foo")
69-
70113
def test_lazy_parse_simple_annotation_zip_by_task(self):
71114
zip_path = Path(test_dir / "simple-annotation.zip")
72115
task_parser_list = list(annofabapi.parser.lazy_parse_simple_annotation_zip_by_task(zip_path))
@@ -81,6 +124,8 @@ def test_lazy_parse_simple_annotation_zip_by_task(self):
81124
assert len([e for e in parser_list if e.input_data_id == "c6e1c2ec-6c7c-41c6-9639-4244c2ed2839"]) == 1
82125
assert len([e for e in parser_list if e.input_data_id == "c86205d1-bdd4-4110-ae46-194e661d622b"]) == 1
83126

127+
128+
84129
def test_lazy_parse_simple_annotation_dir_by_task(self):
85130
zip_path = Path(test_dir / "simple-annotation")
86131
task_parser_list = list(annofabapi.parser.lazy_parse_simple_annotation_dir_by_task(zip_path))
@@ -96,6 +141,8 @@ def test_lazy_parse_simple_annotation_dir_by_task(self):
96141
assert len([e for e in parser_list if e.input_data_id == "c86205d1-bdd4-4110-ae46-194e661d622b"]) == 1
97142

98143

144+
145+
99146
class TestFullAnnotation:
100147
def test_full_annotation_zip(self):
101148
zip_path = Path(test_dir / "full-annotation.zip")

0 commit comments

Comments
 (0)