From e35c6e27249bdb26e9bb4e5dcac5b0f75a5f039c Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 23 May 2022 15:20:42 +0200 Subject: [PATCH 01/12] try fix cub dataset --- torchvision/prototype/datasets/_builtin/cub200.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/cub200.py b/torchvision/prototype/datasets/_builtin/cub200.py index 0e5a80de825..4085c5bad32 100644 --- a/torchvision/prototype/datasets/_builtin/cub200.py +++ b/torchvision/prototype/datasets/_builtin/cub200.py @@ -107,10 +107,8 @@ def _2011_classify_archive(self, data: Tuple[str, Any]) -> Optional[int]: return 0 elif path.name == "train_test_split.txt": return 1 - elif path.name == "images.txt": - return 2 elif path.name == "bounding_boxes.txt": - return 3 + return 2 else: return None @@ -180,15 +178,17 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, prepare_ann_fn: Callable if self._year == "2011": archive_dp, segmentations_dp = resource_dps - images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer( - archive_dp, 4, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE - ) + image_files_dp = Filter(archive_dp, path_comparator("name", "images.txt")) image_files_dp = CSVParser(image_files_dp, dialect="cub200") image_files_map = dict( (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1]) for image_id, rel_posix_path in image_files_dp ) + images_dp, split_dp, bounding_boxes_dp = Demultiplexer( + archive_dp, 3, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE + ) + split_dp = CSVParser(split_dp, dialect="cub200") split_dp = Filter(split_dp, self._2011_filter_split) split_dp = Mapper(split_dp, getitem(0)) From 31badb7590c71db878e3a05d562e1b39610b1ca0 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 23 May 2022 15:28:02 +0200 Subject: [PATCH 02/12] fix ImageNet --- .../prototype/datasets/_builtin/imagenet.py | 27 ++++--------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py index 1307757cef6..de2e4783ac6 100644 --- a/torchvision/prototype/datasets/_builtin/imagenet.py +++ b/torchvision/prototype/datasets/_builtin/imagenet.py @@ -1,4 +1,3 @@ -import enum import functools import pathlib import re @@ -10,7 +9,6 @@ IterKeyZipper, Mapper, Filter, - Demultiplexer, TarArchiveLoader, Enumerator, ) @@ -27,6 +25,7 @@ hint_shuffling, read_categories_file, path_accessor, + path_comparator, ) from torchvision.prototype.features import Label, EncodedImage @@ -46,11 +45,6 @@ def __init__(self, **kwargs: Any) -> None: super().__init__("Register on https://image-net.org/ and follow the instructions there.", **kwargs) -class ImageNetDemux(enum.IntEnum): - META = 0 - LABEL = 1 - - @register_dataset(NAME) class ImageNet(Dataset): """ @@ -108,12 +102,6 @@ def _prepare_train_data(self, data: Tuple[str, BinaryIO]) -> Tuple[Tuple[Label, def _prepare_test_data(self, data: Tuple[str, BinaryIO]) -> Tuple[None, Tuple[str, BinaryIO]]: return None, data - def _classifiy_devkit(self, data: Tuple[str, BinaryIO]) -> Optional[int]: - return { - "meta.mat": ImageNetDemux.META, - "ILSVRC2012_validation_ground_truth.txt": ImageNetDemux.LABEL, - }.get(pathlib.Path(data[0]).name) - # Although the WordNet IDs (wnids) are unique, the corresponding categories are not. For example, both n02012849 # and n03126707 are labeled 'crane' while the first means the bird and the latter means the construction equipment _WNID_MAP = { @@ -172,13 +160,11 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, else: # config.split == "val": images_dp, devkit_dp = resource_dps - meta_dp, label_dp = Demultiplexer( - devkit_dp, 2, self._classifiy_devkit, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE - ) - + meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat")) meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids) - _, wnids = zip(*next(iter(meta_dp))) + _, wnids = zip(*list(meta_dp)[0]) + label_dp = Filter(devkit_dp, path_comparator("name", "ILSVRC2012_validation_ground_truth.txt")) label_dp = LineReader(label_dp, decode=True, return_path=False) # We cannot use self._wnids here, since we use a different order than the dataset label_dp = Mapper(label_dp, functools.partial(self._imagenet_label_to_wnid, wnids=wnids)) @@ -204,15 +190,12 @@ def __len__(self) -> int: "test": 100_000, }[self._split] - def _filter_meta(self, data: Tuple[str, Any]) -> bool: - return self._classifiy_devkit(data) == ImageNetDemux.META - def _generate_categories(self) -> List[Tuple[str, ...]]: self._split = "val" resources = self._resources() devkit_dp = resources[1].load(self._root) - meta_dp = Filter(devkit_dp, self._filter_meta) + meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat")) meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids) categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp))) From 3d96754a7c58f42fe82eaab7a3e83d6d3f82f9b6 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 24 May 2022 11:51:56 +0200 Subject: [PATCH 03/12] streamline imagenet --- .../prototype/datasets/_builtin/imagenet.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py index de2e4783ac6..e239969be4e 100644 --- a/torchvision/prototype/datasets/_builtin/imagenet.py +++ b/torchvision/prototype/datasets/_builtin/imagenet.py @@ -109,8 +109,12 @@ def _prepare_test_data(self, data: Tuple[str, BinaryIO]) -> Tuple[None, Tuple[st "n03710721": "tank suit", } - def _extract_categories_and_wnids(self, data: Tuple[str, BinaryIO]) -> List[Tuple[str, str]]: - synsets = read_mat(data[1], squeeze_me=True)["synsets"] + def _extract_categories_and_wnids(self, devkit_dp: IterDataPipe[Tuple[str, BinaryIO]]) -> List[Tuple[str, str]]: + meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat")) + + _, buffer = list(meta_dp)[0] + synsets = read_mat(buffer, squeeze_me=True)["synsets"] + return [ (self._WNID_MAP.get(wnid, category.split(",", 1)[0]), wnid) for _, wnid, category, _, num_children, *_ in synsets @@ -160,9 +164,7 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, else: # config.split == "val": images_dp, devkit_dp = resource_dps - meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat")) - meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids) - _, wnids = zip(*list(meta_dp)[0]) + _, wnids = zip(*self._extract_categories_and_wnids(devkit_dp)) label_dp = Filter(devkit_dp, path_comparator("name", "ILSVRC2012_validation_ground_truth.txt")) label_dp = LineReader(label_dp, decode=True, return_path=False) @@ -195,9 +197,7 @@ def _generate_categories(self) -> List[Tuple[str, ...]]: resources = self._resources() devkit_dp = resources[1].load(self._root) - meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat")) - meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids) - categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp))) + categories_and_wnids = self._extract_categories_and_wnids(devkit_dp) categories_and_wnids.sort(key=lambda category_and_wnid: category_and_wnid[1]) return categories_and_wnids From 3d2ae08e4fe31be40d240ab32e1692e42a6e887d Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 26 May 2022 09:44:52 +0200 Subject: [PATCH 04/12] revert changes --- .../prototype/datasets/_builtin/cub200.py | 12 +++--- .../prototype/datasets/_builtin/imagenet.py | 37 ++++++++++++++----- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/cub200.py b/torchvision/prototype/datasets/_builtin/cub200.py index 4085c5bad32..0e5a80de825 100644 --- a/torchvision/prototype/datasets/_builtin/cub200.py +++ b/torchvision/prototype/datasets/_builtin/cub200.py @@ -107,8 +107,10 @@ def _2011_classify_archive(self, data: Tuple[str, Any]) -> Optional[int]: return 0 elif path.name == "train_test_split.txt": return 1 - elif path.name == "bounding_boxes.txt": + elif path.name == "images.txt": return 2 + elif path.name == "bounding_boxes.txt": + return 3 else: return None @@ -178,17 +180,15 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, prepare_ann_fn: Callable if self._year == "2011": archive_dp, segmentations_dp = resource_dps + images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer( + archive_dp, 4, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE + ) - image_files_dp = Filter(archive_dp, path_comparator("name", "images.txt")) image_files_dp = CSVParser(image_files_dp, dialect="cub200") image_files_map = dict( (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1]) for image_id, rel_posix_path in image_files_dp ) - images_dp, split_dp, bounding_boxes_dp = Demultiplexer( - archive_dp, 3, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE - ) - split_dp = CSVParser(split_dp, dialect="cub200") split_dp = Filter(split_dp, self._2011_filter_split) split_dp = Mapper(split_dp, getitem(0)) diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py index e239969be4e..1307757cef6 100644 --- a/torchvision/prototype/datasets/_builtin/imagenet.py +++ b/torchvision/prototype/datasets/_builtin/imagenet.py @@ -1,3 +1,4 @@ +import enum import functools import pathlib import re @@ -9,6 +10,7 @@ IterKeyZipper, Mapper, Filter, + Demultiplexer, TarArchiveLoader, Enumerator, ) @@ -25,7 +27,6 @@ hint_shuffling, read_categories_file, path_accessor, - path_comparator, ) from torchvision.prototype.features import Label, EncodedImage @@ -45,6 +46,11 @@ def __init__(self, **kwargs: Any) -> None: super().__init__("Register on https://image-net.org/ and follow the instructions there.", **kwargs) +class ImageNetDemux(enum.IntEnum): + META = 0 + LABEL = 1 + + @register_dataset(NAME) class ImageNet(Dataset): """ @@ -102,6 +108,12 @@ def _prepare_train_data(self, data: Tuple[str, BinaryIO]) -> Tuple[Tuple[Label, def _prepare_test_data(self, data: Tuple[str, BinaryIO]) -> Tuple[None, Tuple[str, BinaryIO]]: return None, data + def _classifiy_devkit(self, data: Tuple[str, BinaryIO]) -> Optional[int]: + return { + "meta.mat": ImageNetDemux.META, + "ILSVRC2012_validation_ground_truth.txt": ImageNetDemux.LABEL, + }.get(pathlib.Path(data[0]).name) + # Although the WordNet IDs (wnids) are unique, the corresponding categories are not. For example, both n02012849 # and n03126707 are labeled 'crane' while the first means the bird and the latter means the construction equipment _WNID_MAP = { @@ -109,12 +121,8 @@ def _prepare_test_data(self, data: Tuple[str, BinaryIO]) -> Tuple[None, Tuple[st "n03710721": "tank suit", } - def _extract_categories_and_wnids(self, devkit_dp: IterDataPipe[Tuple[str, BinaryIO]]) -> List[Tuple[str, str]]: - meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat")) - - _, buffer = list(meta_dp)[0] - synsets = read_mat(buffer, squeeze_me=True)["synsets"] - + def _extract_categories_and_wnids(self, data: Tuple[str, BinaryIO]) -> List[Tuple[str, str]]: + synsets = read_mat(data[1], squeeze_me=True)["synsets"] return [ (self._WNID_MAP.get(wnid, category.split(",", 1)[0]), wnid) for _, wnid, category, _, num_children, *_ in synsets @@ -164,9 +172,13 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, else: # config.split == "val": images_dp, devkit_dp = resource_dps - _, wnids = zip(*self._extract_categories_and_wnids(devkit_dp)) + meta_dp, label_dp = Demultiplexer( + devkit_dp, 2, self._classifiy_devkit, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE + ) + + meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids) + _, wnids = zip(*next(iter(meta_dp))) - label_dp = Filter(devkit_dp, path_comparator("name", "ILSVRC2012_validation_ground_truth.txt")) label_dp = LineReader(label_dp, decode=True, return_path=False) # We cannot use self._wnids here, since we use a different order than the dataset label_dp = Mapper(label_dp, functools.partial(self._imagenet_label_to_wnid, wnids=wnids)) @@ -192,12 +204,17 @@ def __len__(self) -> int: "test": 100_000, }[self._split] + def _filter_meta(self, data: Tuple[str, Any]) -> bool: + return self._classifiy_devkit(data) == ImageNetDemux.META + def _generate_categories(self) -> List[Tuple[str, ...]]: self._split = "val" resources = self._resources() devkit_dp = resources[1].load(self._root) + meta_dp = Filter(devkit_dp, self._filter_meta) + meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids) - categories_and_wnids = self._extract_categories_and_wnids(devkit_dp) + categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp))) categories_and_wnids.sort(key=lambda category_and_wnid: category_and_wnid[1]) return categories_and_wnids From b77c5b87592d942bb62ab7d32324f4a89780120f Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 26 May 2022 09:45:04 +0200 Subject: [PATCH 05/12] use map datapipe instead --- .../prototype/datasets/_builtin/cub200.py | 13 +++-- .../prototype/datasets/_builtin/imagenet.py | 57 +++++++++++-------- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/cub200.py b/torchvision/prototype/datasets/_builtin/cub200.py index 0e5a80de825..c38a51265a1 100644 --- a/torchvision/prototype/datasets/_builtin/cub200.py +++ b/torchvision/prototype/datasets/_builtin/cub200.py @@ -13,6 +13,7 @@ CSVParser, CSVDictParser, ) +from torchdata.datapipes.map import IterToMapConverter from torchvision.prototype.datasets.utils import Dataset, GDriveResource, OnlineResource from torchvision.prototype.datasets.utils._internal import ( INFINITE_BUFFER_SIZE, @@ -114,6 +115,9 @@ def _2011_classify_archive(self, data: Tuple[str, Any]) -> Optional[int]: else: return None + def _2011_extract_file_name(self, rel_posix_path: str) -> str: + return rel_posix_path.rsplit("/", maxsplit=1)[1] + def _2011_filter_split(self, row: List[str]) -> bool: _, split_id = row return { @@ -185,17 +189,16 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, ) image_files_dp = CSVParser(image_files_dp, dialect="cub200") - image_files_map = dict( - (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1]) for image_id, rel_posix_path in image_files_dp - ) + image_files_dp = Mapper(image_files_dp, self._2011_extract_file_name, input_col=1) + image_files_map = IterToMapConverter(image_files_dp) split_dp = CSVParser(split_dp, dialect="cub200") split_dp = Filter(split_dp, self._2011_filter_split) split_dp = Mapper(split_dp, getitem(0)) - split_dp = Mapper(split_dp, image_files_map.get) + split_dp = Mapper(split_dp, image_files_map.__getitem__) bounding_boxes_dp = CSVParser(bounding_boxes_dp, dialect="cub200") - bounding_boxes_dp = Mapper(bounding_boxes_dp, image_files_map.get, input_col=0) + bounding_boxes_dp = Mapper(bounding_boxes_dp, image_files_map.__getitem__, input_col=0) anns_dp = IterKeyZipper( bounding_boxes_dp, diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py index 1307757cef6..a8d9e6b9880 100644 --- a/torchvision/prototype/datasets/_builtin/imagenet.py +++ b/torchvision/prototype/datasets/_builtin/imagenet.py @@ -1,8 +1,7 @@ import enum -import functools import pathlib import re -from typing import Any, Dict, List, Optional, Tuple, BinaryIO, Match, cast, Union +from typing import Any, Dict, List, Optional, Tuple, BinaryIO, Match, cast, Union, Iterator from torchdata.datapipes.iter import ( IterDataPipe, @@ -14,6 +13,7 @@ TarArchiveLoader, Enumerator, ) +from torchdata.datapipes.map import IterToMapConverter from torchvision.prototype.datasets.utils import ( OnlineResource, ManualDownloadResource, @@ -51,6 +51,28 @@ class ImageNetDemux(enum.IntEnum): LABEL = 1 +class CategoryAndWordNetIDExtractor(IterDataPipe): + # Although the WordNet IDs (wnids) are unique, the corresponding categories are not. For example, both n02012849 + # and n03126707 are labeled 'crane' while the first means the bird and the latter means the construction equipment + _WNID_MAP = { + "n03126707": "construction crane", + "n03710721": "tank suit", + } + + def __init__(self, datapipe: IterDataPipe[Tuple[str, BinaryIO]]) -> None: + self.datapipe = datapipe + + def __iter__(self) -> Iterator[Tuple[str, str]]: + for _, stream in self.datapipe: + synsets = read_mat(stream, squeeze_me=True)["synsets"] + for _, wnid, category, _, num_children, *_ in synsets: + if num_children > 0: + # we are looking at a superclass that has no direct instance + continue + + yield self._WNID_MAP.get(wnid, category.split(",", 1)[0]), wnid + + @register_dataset(NAME) class ImageNet(Dataset): """ @@ -114,22 +136,6 @@ def _classifiy_devkit(self, data: Tuple[str, BinaryIO]) -> Optional[int]: "ILSVRC2012_validation_ground_truth.txt": ImageNetDemux.LABEL, }.get(pathlib.Path(data[0]).name) - # Although the WordNet IDs (wnids) are unique, the corresponding categories are not. For example, both n02012849 - # and n03126707 are labeled 'crane' while the first means the bird and the latter means the construction equipment - _WNID_MAP = { - "n03126707": "construction crane", - "n03710721": "tank suit", - } - - def _extract_categories_and_wnids(self, data: Tuple[str, BinaryIO]) -> List[Tuple[str, str]]: - synsets = read_mat(data[1], squeeze_me=True)["synsets"] - return [ - (self._WNID_MAP.get(wnid, category.split(",", 1)[0]), wnid) - for _, wnid, category, _, num_children, *_ in synsets - # if num_children > 0, we are looking at a superclass that has no direct instance - if num_children == 0 - ] - def _imagenet_label_to_wnid(self, imagenet_label: str, *, wnids: Tuple[str, ...]) -> str: return wnids[int(imagenet_label) - 1] @@ -176,12 +182,15 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, devkit_dp, 2, self._classifiy_devkit, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE ) - meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids) - _, wnids = zip(*next(iter(meta_dp))) + # We cannot use self._wnids here, since we use a different order than the dataset + meta_dp = CategoryAndWordNetIDExtractor(meta_dp) + wnid_dp = Mapper(meta_dp, getitem(1)) + wnid_dp = Enumerator(wnid_dp, 1) + wnid_dp = Mapper(wnid_dp, str, input_col=0) + wnid_map = IterToMapConverter(wnid_dp) label_dp = LineReader(label_dp, decode=True, return_path=False) - # We cannot use self._wnids here, since we use a different order than the dataset - label_dp = Mapper(label_dp, functools.partial(self._imagenet_label_to_wnid, wnids=wnids)) + label_dp = Mapper(label_dp, wnid_map.__getitem__) label_dp: IterDataPipe[Tuple[int, str]] = Enumerator(label_dp, 1) label_dp = hint_shuffling(label_dp) label_dp = hint_sharding(label_dp) @@ -213,8 +222,8 @@ def _generate_categories(self) -> List[Tuple[str, ...]]: devkit_dp = resources[1].load(self._root) meta_dp = Filter(devkit_dp, self._filter_meta) - meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids) + meta_dp = CategoryAndWordNetIDExtractor(meta_dp) - categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp))) + categories_and_wnids = cast(List[Tuple[str, ...]], list(meta_dp)) categories_and_wnids.sort(key=lambda category_and_wnid: category_and_wnid[1]) return categories_and_wnids From d8f3d07ba434cb822f7710a788c977f1ba79307d Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 31 Aug 2022 08:36:13 +0200 Subject: [PATCH 06/12] [DEBUG] run tests on full CI matrix --- .github/workflows/prototype-tests.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/prototype-tests.yml b/.github/workflows/prototype-tests.yml index ff29168d9a7..bc9cb5402c4 100644 --- a/.github/workflows/prototype-tests.yml +++ b/.github/workflows/prototype-tests.yml @@ -7,6 +7,11 @@ jobs: prototype: strategy: matrix: + python-version: + - "3.7" + - "3.8" + - "3.9" + - "3.10" os: - ubuntu-latest - windows-latest @@ -19,7 +24,7 @@ jobs: - name: Set up python uses: actions/setup-python@v3 with: - python-version: 3.7 + python-version: ${{ matrix.python-version }} - name: Upgrade system packages run: python -m pip install --upgrade pip setuptools wheel From 797fa81dd483cfc6c8ecf9240a3b27697eec2284 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 31 Aug 2022 08:36:37 +0200 Subject: [PATCH 07/12] [SKIP CI] CircleCI From 456dcf04f47adfb473877689dda676cc79428e81 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 31 Aug 2022 09:26:44 +0200 Subject: [PATCH 08/12] [SKIP CI] add temp fix for unnecessary strict torchdata check --- torchvision/prototype/datasets/_builtin/imagenet.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py index 5b24a613a55..d3bf748defe 100644 --- a/torchvision/prototype/datasets/_builtin/imagenet.py +++ b/torchvision/prototype/datasets/_builtin/imagenet.py @@ -161,6 +161,10 @@ def _prepare_sample( image=EncodedImage.from_file(buffer), ) + def _to_str(self, obj: Any): + # FIXME: remove this wrapper as soon as https://github.com/pytorch/pytorch/pull/84279 is landed + return str(obj) + def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: if self._split in {"train", "test"}: dp = resource_dps[0] @@ -183,7 +187,7 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, meta_dp = CategoryAndWordNetIDExtractor(meta_dp) wnid_dp = Mapper(meta_dp, getitem(1)) wnid_dp = Enumerator(wnid_dp, 1) - wnid_dp = Mapper(wnid_dp, str, input_col=0) + wnid_dp = Mapper(wnid_dp, self._to_str, input_col=0) wnid_map = IterToMapConverter(wnid_dp) label_dp = LineReader(label_dp, decode=True, return_path=False) From b961415cf65f5d7ef9d27750287a8abc892082b8 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 31 Aug 2022 08:36:13 +0200 Subject: [PATCH 09/12] Revert "[DEBUG] run tests on full CI matrix" This reverts commit d8f3d07ba434cb822f7710a788c977f1ba79307d. --- .github/workflows/prototype-tests.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/prototype-tests.yml b/.github/workflows/prototype-tests.yml index bc9cb5402c4..ff29168d9a7 100644 --- a/.github/workflows/prototype-tests.yml +++ b/.github/workflows/prototype-tests.yml @@ -7,11 +7,6 @@ jobs: prototype: strategy: matrix: - python-version: - - "3.7" - - "3.8" - - "3.9" - - "3.10" os: - ubuntu-latest - windows-latest @@ -24,7 +19,7 @@ jobs: - name: Set up python uses: actions/setup-python@v3 with: - python-version: ${{ matrix.python-version }} + python-version: 3.7 - name: Upgrade system packages run: python -m pip install --upgrade pip setuptools wheel From e6cf29d9134e76676cc7f77c75e02070ab031f31 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 31 Aug 2022 09:26:44 +0200 Subject: [PATCH 10/12] Revert "[SKIP CI] add temp fix for unnecessary strict torchdata check" This reverts commit 456dcf04f47adfb473877689dda676cc79428e81. --- torchvision/prototype/datasets/_builtin/imagenet.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py index d3bf748defe..5b24a613a55 100644 --- a/torchvision/prototype/datasets/_builtin/imagenet.py +++ b/torchvision/prototype/datasets/_builtin/imagenet.py @@ -161,10 +161,6 @@ def _prepare_sample( image=EncodedImage.from_file(buffer), ) - def _to_str(self, obj: Any): - # FIXME: remove this wrapper as soon as https://github.com/pytorch/pytorch/pull/84279 is landed - return str(obj) - def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: if self._split in {"train", "test"}: dp = resource_dps[0] @@ -187,7 +183,7 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, meta_dp = CategoryAndWordNetIDExtractor(meta_dp) wnid_dp = Mapper(meta_dp, getitem(1)) wnid_dp = Enumerator(wnid_dp, 1) - wnid_dp = Mapper(wnid_dp, self._to_str, input_col=0) + wnid_dp = Mapper(wnid_dp, str, input_col=0) wnid_map = IterToMapConverter(wnid_dp) label_dp = LineReader(label_dp, decode=True, return_path=False) From 5217e2df1873fc91ef510225a19ae540ad8724f7 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 13 Sep 2022 15:39:27 +0200 Subject: [PATCH 11/12] use int key --- torchvision/prototype/datasets/_builtin/imagenet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py index 5b24a613a55..a64f6596be7 100644 --- a/torchvision/prototype/datasets/_builtin/imagenet.py +++ b/torchvision/prototype/datasets/_builtin/imagenet.py @@ -183,10 +183,10 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, meta_dp = CategoryAndWordNetIDExtractor(meta_dp) wnid_dp = Mapper(meta_dp, getitem(1)) wnid_dp = Enumerator(wnid_dp, 1) - wnid_dp = Mapper(wnid_dp, str, input_col=0) wnid_map = IterToMapConverter(wnid_dp) label_dp = LineReader(label_dp, decode=True, return_path=False) + label_dp = Mapper(label_dp, int) label_dp = Mapper(label_dp, wnid_map.__getitem__) label_dp: IterDataPipe[Tuple[int, str]] = Enumerator(label_dp, 1) label_dp = hint_shuffling(label_dp) From c057cf3e2a2946948f866826fb235d75dc574de0 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 13 Sep 2022 15:44:07 +0200 Subject: [PATCH 12/12] remove unused function --- torchvision/prototype/datasets/_builtin/imagenet.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py index a64f6596be7..3192f1f5503 100644 --- a/torchvision/prototype/datasets/_builtin/imagenet.py +++ b/torchvision/prototype/datasets/_builtin/imagenet.py @@ -133,9 +133,6 @@ def _classifiy_devkit(self, data: Tuple[str, BinaryIO]) -> Optional[int]: "ILSVRC2012_validation_ground_truth.txt": ImageNetDemux.LABEL, }.get(pathlib.Path(data[0]).name) - def _imagenet_label_to_wnid(self, imagenet_label: str, *, wnids: Tuple[str, ...]) -> str: - return wnids[int(imagenet_label) - 1] - _VAL_TEST_IMAGE_NAME_PATTERN = re.compile(r"ILSVRC2012_(val|test)_(?P\d{8})[.]JPEG") def _val_test_image_key(self, path: pathlib.Path) -> int: