From e35c6e27249bdb26e9bb4e5dcac5b0f75a5f039c Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 23 May 2022 15:20:42 +0200
Subject: [PATCH 01/12] try fix cub dataset

---
 torchvision/prototype/datasets/_builtin/cub200.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/torchvision/prototype/datasets/_builtin/cub200.py b/torchvision/prototype/datasets/_builtin/cub200.py
index 0e5a80de825..4085c5bad32 100644
--- a/torchvision/prototype/datasets/_builtin/cub200.py
+++ b/torchvision/prototype/datasets/_builtin/cub200.py
@@ -107,10 +107,8 @@ def _2011_classify_archive(self, data: Tuple[str, Any]) -> Optional[int]:
             return 0
         elif path.name == "train_test_split.txt":
             return 1
-        elif path.name == "images.txt":
-            return 2
         elif path.name == "bounding_boxes.txt":
-            return 3
+            return 2
         else:
             return None
 
@@ -180,15 +178,17 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
         prepare_ann_fn: Callable
         if self._year == "2011":
             archive_dp, segmentations_dp = resource_dps
-            images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer(
-                archive_dp, 4, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
-            )
 
+            image_files_dp = Filter(archive_dp, path_comparator("name", "images.txt"))
             image_files_dp = CSVParser(image_files_dp, dialect="cub200")
             image_files_map = dict(
                 (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1]) for image_id, rel_posix_path in image_files_dp
             )
 
+            images_dp, split_dp, bounding_boxes_dp = Demultiplexer(
+                archive_dp, 3, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
+            )
+
             split_dp = CSVParser(split_dp, dialect="cub200")
             split_dp = Filter(split_dp, self._2011_filter_split)
             split_dp = Mapper(split_dp, getitem(0))

From 31badb7590c71db878e3a05d562e1b39610b1ca0 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 23 May 2022 15:28:02 +0200
Subject: [PATCH 02/12] fix ImageNet

---
 .../prototype/datasets/_builtin/imagenet.py   | 27 ++++---------------
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py
index 1307757cef6..de2e4783ac6 100644
--- a/torchvision/prototype/datasets/_builtin/imagenet.py
+++ b/torchvision/prototype/datasets/_builtin/imagenet.py
@@ -1,4 +1,3 @@
-import enum
 import functools
 import pathlib
 import re
@@ -10,7 +9,6 @@
     IterKeyZipper,
     Mapper,
     Filter,
-    Demultiplexer,
     TarArchiveLoader,
     Enumerator,
 )
@@ -27,6 +25,7 @@
     hint_shuffling,
     read_categories_file,
     path_accessor,
+    path_comparator,
 )
 from torchvision.prototype.features import Label, EncodedImage
 
@@ -46,11 +45,6 @@ def __init__(self, **kwargs: Any) -> None:
         super().__init__("Register on https://image-net.org/ and follow the instructions there.", **kwargs)
 
 
-class ImageNetDemux(enum.IntEnum):
-    META = 0
-    LABEL = 1
-
-
 @register_dataset(NAME)
 class ImageNet(Dataset):
     """
@@ -108,12 +102,6 @@ def _prepare_train_data(self, data: Tuple[str, BinaryIO]) -> Tuple[Tuple[Label,
     def _prepare_test_data(self, data: Tuple[str, BinaryIO]) -> Tuple[None, Tuple[str, BinaryIO]]:
         return None, data
 
-    def _classifiy_devkit(self, data: Tuple[str, BinaryIO]) -> Optional[int]:
-        return {
-            "meta.mat": ImageNetDemux.META,
-            "ILSVRC2012_validation_ground_truth.txt": ImageNetDemux.LABEL,
-        }.get(pathlib.Path(data[0]).name)
-
     # Although the WordNet IDs (wnids) are unique, the corresponding categories are not. For example, both n02012849
     # and n03126707 are labeled 'crane' while the first means the bird and the latter means the construction equipment
     _WNID_MAP = {
@@ -172,13 +160,11 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
         else:  # config.split == "val":
             images_dp, devkit_dp = resource_dps
 
-            meta_dp, label_dp = Demultiplexer(
-                devkit_dp, 2, self._classifiy_devkit, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
-            )
-
+            meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat"))
             meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)
-            _, wnids = zip(*next(iter(meta_dp)))
+            _, wnids = zip(*list(meta_dp)[0])
 
+            label_dp = Filter(devkit_dp, path_comparator("name", "ILSVRC2012_validation_ground_truth.txt"))
             label_dp = LineReader(label_dp, decode=True, return_path=False)
             # We cannot use self._wnids here, since we use a different order than the dataset
             label_dp = Mapper(label_dp, functools.partial(self._imagenet_label_to_wnid, wnids=wnids))
@@ -204,15 +190,12 @@ def __len__(self) -> int:
             "test": 100_000,
         }[self._split]
 
-    def _filter_meta(self, data: Tuple[str, Any]) -> bool:
-        return self._classifiy_devkit(data) == ImageNetDemux.META
-
     def _generate_categories(self) -> List[Tuple[str, ...]]:
         self._split = "val"
         resources = self._resources()
 
         devkit_dp = resources[1].load(self._root)
-        meta_dp = Filter(devkit_dp, self._filter_meta)
+        meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat"))
         meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)
 
         categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp)))

From 3d96754a7c58f42fe82eaab7a3e83d6d3f82f9b6 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 24 May 2022 11:51:56 +0200
Subject: [PATCH 03/12] streamline imagenet

---
 .../prototype/datasets/_builtin/imagenet.py      | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py
index de2e4783ac6..e239969be4e 100644
--- a/torchvision/prototype/datasets/_builtin/imagenet.py
+++ b/torchvision/prototype/datasets/_builtin/imagenet.py
@@ -109,8 +109,12 @@ def _prepare_test_data(self, data: Tuple[str, BinaryIO]) -> Tuple[None, Tuple[st
         "n03710721": "tank suit",
     }
 
-    def _extract_categories_and_wnids(self, data: Tuple[str, BinaryIO]) -> List[Tuple[str, str]]:
-        synsets = read_mat(data[1], squeeze_me=True)["synsets"]
+    def _extract_categories_and_wnids(self, devkit_dp: IterDataPipe[Tuple[str, BinaryIO]]) -> List[Tuple[str, str]]:
+        meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat"))
+
+        _, buffer = list(meta_dp)[0]
+        synsets = read_mat(buffer, squeeze_me=True)["synsets"]
+
         return [
             (self._WNID_MAP.get(wnid, category.split(",", 1)[0]), wnid)
             for _, wnid, category, _, num_children, *_ in synsets
@@ -160,9 +164,7 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
         else:  # config.split == "val":
             images_dp, devkit_dp = resource_dps
 
-            meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat"))
-            meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)
-            _, wnids = zip(*list(meta_dp)[0])
+            _, wnids = zip(*self._extract_categories_and_wnids(devkit_dp))
 
             label_dp = Filter(devkit_dp, path_comparator("name", "ILSVRC2012_validation_ground_truth.txt"))
             label_dp = LineReader(label_dp, decode=True, return_path=False)
@@ -195,9 +197,7 @@ def _generate_categories(self) -> List[Tuple[str, ...]]:
         resources = self._resources()
 
         devkit_dp = resources[1].load(self._root)
-        meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat"))
-        meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)
 
-        categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp)))
+        categories_and_wnids = self._extract_categories_and_wnids(devkit_dp)
         categories_and_wnids.sort(key=lambda category_and_wnid: category_and_wnid[1])
         return categories_and_wnids

From 3d2ae08e4fe31be40d240ab32e1692e42a6e887d Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Thu, 26 May 2022 09:44:52 +0200
Subject: [PATCH 04/12] revert changes

---
 .../prototype/datasets/_builtin/cub200.py     | 12 +++---
 .../prototype/datasets/_builtin/imagenet.py   | 37 ++++++++++++++-----
 2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/torchvision/prototype/datasets/_builtin/cub200.py b/torchvision/prototype/datasets/_builtin/cub200.py
index 4085c5bad32..0e5a80de825 100644
--- a/torchvision/prototype/datasets/_builtin/cub200.py
+++ b/torchvision/prototype/datasets/_builtin/cub200.py
@@ -107,8 +107,10 @@ def _2011_classify_archive(self, data: Tuple[str, Any]) -> Optional[int]:
             return 0
         elif path.name == "train_test_split.txt":
             return 1
-        elif path.name == "bounding_boxes.txt":
+        elif path.name == "images.txt":
             return 2
+        elif path.name == "bounding_boxes.txt":
+            return 3
         else:
             return None
 
@@ -178,17 +180,15 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
         prepare_ann_fn: Callable
         if self._year == "2011":
             archive_dp, segmentations_dp = resource_dps
+            images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer(
+                archive_dp, 4, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
+            )
 
-            image_files_dp = Filter(archive_dp, path_comparator("name", "images.txt"))
             image_files_dp = CSVParser(image_files_dp, dialect="cub200")
             image_files_map = dict(
                 (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1]) for image_id, rel_posix_path in image_files_dp
             )
 
-            images_dp, split_dp, bounding_boxes_dp = Demultiplexer(
-                archive_dp, 3, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
-            )
-
             split_dp = CSVParser(split_dp, dialect="cub200")
             split_dp = Filter(split_dp, self._2011_filter_split)
             split_dp = Mapper(split_dp, getitem(0))
diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py
index e239969be4e..1307757cef6 100644
--- a/torchvision/prototype/datasets/_builtin/imagenet.py
+++ b/torchvision/prototype/datasets/_builtin/imagenet.py
@@ -1,3 +1,4 @@
+import enum
 import functools
 import pathlib
 import re
@@ -9,6 +10,7 @@
     IterKeyZipper,
     Mapper,
     Filter,
+    Demultiplexer,
     TarArchiveLoader,
     Enumerator,
 )
@@ -25,7 +27,6 @@
     hint_shuffling,
     read_categories_file,
     path_accessor,
-    path_comparator,
 )
 from torchvision.prototype.features import Label, EncodedImage
 
@@ -45,6 +46,11 @@ def __init__(self, **kwargs: Any) -> None:
         super().__init__("Register on https://image-net.org/ and follow the instructions there.", **kwargs)
 
 
+class ImageNetDemux(enum.IntEnum):
+    META = 0
+    LABEL = 1
+
+
 @register_dataset(NAME)
 class ImageNet(Dataset):
     """
@@ -102,6 +108,12 @@ def _prepare_train_data(self, data: Tuple[str, BinaryIO]) -> Tuple[Tuple[Label,
     def _prepare_test_data(self, data: Tuple[str, BinaryIO]) -> Tuple[None, Tuple[str, BinaryIO]]:
         return None, data
 
+    def _classifiy_devkit(self, data: Tuple[str, BinaryIO]) -> Optional[int]:
+        return {
+            "meta.mat": ImageNetDemux.META,
+            "ILSVRC2012_validation_ground_truth.txt": ImageNetDemux.LABEL,
+        }.get(pathlib.Path(data[0]).name)
+
     # Although the WordNet IDs (wnids) are unique, the corresponding categories are not. For example, both n02012849
     # and n03126707 are labeled 'crane' while the first means the bird and the latter means the construction equipment
     _WNID_MAP = {
@@ -109,12 +121,8 @@ def _prepare_test_data(self, data: Tuple[str, BinaryIO]) -> Tuple[None, Tuple[st
         "n03710721": "tank suit",
     }
 
-    def _extract_categories_and_wnids(self, devkit_dp: IterDataPipe[Tuple[str, BinaryIO]]) -> List[Tuple[str, str]]:
-        meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat"))
-
-        _, buffer = list(meta_dp)[0]
-        synsets = read_mat(buffer, squeeze_me=True)["synsets"]
-
+    def _extract_categories_and_wnids(self, data: Tuple[str, BinaryIO]) -> List[Tuple[str, str]]:
+        synsets = read_mat(data[1], squeeze_me=True)["synsets"]
         return [
             (self._WNID_MAP.get(wnid, category.split(",", 1)[0]), wnid)
             for _, wnid, category, _, num_children, *_ in synsets
@@ -164,9 +172,13 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
         else:  # config.split == "val":
             images_dp, devkit_dp = resource_dps
 
-            _, wnids = zip(*self._extract_categories_and_wnids(devkit_dp))
+            meta_dp, label_dp = Demultiplexer(
+                devkit_dp, 2, self._classifiy_devkit, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
+            )
+
+            meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)
+            _, wnids = zip(*next(iter(meta_dp)))
 
-            label_dp = Filter(devkit_dp, path_comparator("name", "ILSVRC2012_validation_ground_truth.txt"))
             label_dp = LineReader(label_dp, decode=True, return_path=False)
             # We cannot use self._wnids here, since we use a different order than the dataset
             label_dp = Mapper(label_dp, functools.partial(self._imagenet_label_to_wnid, wnids=wnids))
@@ -192,12 +204,17 @@ def __len__(self) -> int:
             "test": 100_000,
         }[self._split]
 
+    def _filter_meta(self, data: Tuple[str, Any]) -> bool:
+        return self._classifiy_devkit(data) == ImageNetDemux.META
+
     def _generate_categories(self) -> List[Tuple[str, ...]]:
         self._split = "val"
         resources = self._resources()
 
         devkit_dp = resources[1].load(self._root)
+        meta_dp = Filter(devkit_dp, self._filter_meta)
+        meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)
 
-        categories_and_wnids = self._extract_categories_and_wnids(devkit_dp)
+        categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp)))
         categories_and_wnids.sort(key=lambda category_and_wnid: category_and_wnid[1])
         return categories_and_wnids

From b77c5b87592d942bb62ab7d32324f4a89780120f Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Thu, 26 May 2022 09:45:04 +0200
Subject: [PATCH 05/12] use map datapipe instead

---
 .../prototype/datasets/_builtin/cub200.py     | 13 +++--
 .../prototype/datasets/_builtin/imagenet.py   | 57 +++++++++++--------
 2 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/torchvision/prototype/datasets/_builtin/cub200.py b/torchvision/prototype/datasets/_builtin/cub200.py
index 0e5a80de825..c38a51265a1 100644
--- a/torchvision/prototype/datasets/_builtin/cub200.py
+++ b/torchvision/prototype/datasets/_builtin/cub200.py
@@ -13,6 +13,7 @@
     CSVParser,
     CSVDictParser,
 )
+from torchdata.datapipes.map import IterToMapConverter
 from torchvision.prototype.datasets.utils import Dataset, GDriveResource, OnlineResource
 from torchvision.prototype.datasets.utils._internal import (
     INFINITE_BUFFER_SIZE,
@@ -114,6 +115,9 @@ def _2011_classify_archive(self, data: Tuple[str, Any]) -> Optional[int]:
         else:
             return None
 
+    def _2011_extract_file_name(self, rel_posix_path: str) -> str:
+        return rel_posix_path.rsplit("/", maxsplit=1)[1]
+
     def _2011_filter_split(self, row: List[str]) -> bool:
         _, split_id = row
         return {
@@ -185,17 +189,16 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
             )
 
             image_files_dp = CSVParser(image_files_dp, dialect="cub200")
-            image_files_map = dict(
-                (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1]) for image_id, rel_posix_path in image_files_dp
-            )
+            image_files_dp = Mapper(image_files_dp, self._2011_extract_file_name, input_col=1)
+            image_files_map = IterToMapConverter(image_files_dp)
 
             split_dp = CSVParser(split_dp, dialect="cub200")
             split_dp = Filter(split_dp, self._2011_filter_split)
             split_dp = Mapper(split_dp, getitem(0))
-            split_dp = Mapper(split_dp, image_files_map.get)
+            split_dp = Mapper(split_dp, image_files_map.__getitem__)
 
             bounding_boxes_dp = CSVParser(bounding_boxes_dp, dialect="cub200")
-            bounding_boxes_dp = Mapper(bounding_boxes_dp, image_files_map.get, input_col=0)
+            bounding_boxes_dp = Mapper(bounding_boxes_dp, image_files_map.__getitem__, input_col=0)
 
             anns_dp = IterKeyZipper(
                 bounding_boxes_dp,
diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py
index 1307757cef6..a8d9e6b9880 100644
--- a/torchvision/prototype/datasets/_builtin/imagenet.py
+++ b/torchvision/prototype/datasets/_builtin/imagenet.py
@@ -1,8 +1,7 @@
 import enum
-import functools
 import pathlib
 import re
-from typing import Any, Dict, List, Optional, Tuple, BinaryIO, Match, cast, Union
+from typing import Any, Dict, List, Optional, Tuple, BinaryIO, Match, cast, Union, Iterator
 
 from torchdata.datapipes.iter import (
     IterDataPipe,
@@ -14,6 +13,7 @@
     TarArchiveLoader,
     Enumerator,
 )
+from torchdata.datapipes.map import IterToMapConverter
 from torchvision.prototype.datasets.utils import (
     OnlineResource,
     ManualDownloadResource,
@@ -51,6 +51,28 @@ class ImageNetDemux(enum.IntEnum):
     LABEL = 1
 
 
+class CategoryAndWordNetIDExtractor(IterDataPipe):
+    # Although the WordNet IDs (wnids) are unique, the corresponding categories are not. For example, both n02012849
+    # and n03126707 are labeled 'crane' while the first means the bird and the latter means the construction equipment
+    _WNID_MAP = {
+        "n03126707": "construction crane",
+        "n03710721": "tank suit",
+    }
+
+    def __init__(self, datapipe: IterDataPipe[Tuple[str, BinaryIO]]) -> None:
+        self.datapipe = datapipe
+
+    def __iter__(self) -> Iterator[Tuple[str, str]]:
+        for _, stream in self.datapipe:
+            synsets = read_mat(stream, squeeze_me=True)["synsets"]
+            for _, wnid, category, _, num_children, *_ in synsets:
+                if num_children > 0:
+                    # we are looking at a superclass that has no direct instance
+                    continue
+
+                yield self._WNID_MAP.get(wnid, category.split(",", 1)[0]), wnid
+
+
 @register_dataset(NAME)
 class ImageNet(Dataset):
     """
@@ -114,22 +136,6 @@ def _classifiy_devkit(self, data: Tuple[str, BinaryIO]) -> Optional[int]:
             "ILSVRC2012_validation_ground_truth.txt": ImageNetDemux.LABEL,
         }.get(pathlib.Path(data[0]).name)
 
-    # Although the WordNet IDs (wnids) are unique, the corresponding categories are not. For example, both n02012849
-    # and n03126707 are labeled 'crane' while the first means the bird and the latter means the construction equipment
-    _WNID_MAP = {
-        "n03126707": "construction crane",
-        "n03710721": "tank suit",
-    }
-
-    def _extract_categories_and_wnids(self, data: Tuple[str, BinaryIO]) -> List[Tuple[str, str]]:
-        synsets = read_mat(data[1], squeeze_me=True)["synsets"]
-        return [
-            (self._WNID_MAP.get(wnid, category.split(",", 1)[0]), wnid)
-            for _, wnid, category, _, num_children, *_ in synsets
-            # if num_children > 0, we are looking at a superclass that has no direct instance
-            if num_children == 0
-        ]
-
     def _imagenet_label_to_wnid(self, imagenet_label: str, *, wnids: Tuple[str, ...]) -> str:
         return wnids[int(imagenet_label) - 1]
 
@@ -176,12 +182,15 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
                 devkit_dp, 2, self._classifiy_devkit, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
             )
 
-            meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)
-            _, wnids = zip(*next(iter(meta_dp)))
+            # We cannot use self._wnids here, since we use a different order than the dataset
+            meta_dp = CategoryAndWordNetIDExtractor(meta_dp)
+            wnid_dp = Mapper(meta_dp, getitem(1))
+            wnid_dp = Enumerator(wnid_dp, 1)
+            wnid_dp = Mapper(wnid_dp, str, input_col=0)
+            wnid_map = IterToMapConverter(wnid_dp)
 
             label_dp = LineReader(label_dp, decode=True, return_path=False)
-            # We cannot use self._wnids here, since we use a different order than the dataset
-            label_dp = Mapper(label_dp, functools.partial(self._imagenet_label_to_wnid, wnids=wnids))
+            label_dp = Mapper(label_dp, wnid_map.__getitem__)
             label_dp: IterDataPipe[Tuple[int, str]] = Enumerator(label_dp, 1)
             label_dp = hint_shuffling(label_dp)
             label_dp = hint_sharding(label_dp)
@@ -213,8 +222,8 @@ def _generate_categories(self) -> List[Tuple[str, ...]]:
 
         devkit_dp = resources[1].load(self._root)
         meta_dp = Filter(devkit_dp, self._filter_meta)
-        meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)
+        meta_dp = CategoryAndWordNetIDExtractor(meta_dp)
 
-        categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp)))
+        categories_and_wnids = cast(List[Tuple[str, ...]], list(meta_dp))
         categories_and_wnids.sort(key=lambda category_and_wnid: category_and_wnid[1])
         return categories_and_wnids

From d8f3d07ba434cb822f7710a788c977f1ba79307d Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 31 Aug 2022 08:36:13 +0200
Subject: [PATCH 06/12] [DEBUG] run tests on full CI matrix

---
 .github/workflows/prototype-tests.yml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/prototype-tests.yml b/.github/workflows/prototype-tests.yml
index ff29168d9a7..bc9cb5402c4 100644
--- a/.github/workflows/prototype-tests.yml
+++ b/.github/workflows/prototype-tests.yml
@@ -7,6 +7,11 @@ jobs:
   prototype:
     strategy:
       matrix:
+        python-version:
+          - "3.7"
+          - "3.8"
+          - "3.9"
+          - "3.10"
         os:
           - ubuntu-latest
           - windows-latest
@@ -19,7 +24,7 @@ jobs:
       - name: Set up python
         uses: actions/setup-python@v3
         with:
-          python-version: 3.7
+          python-version: ${{ matrix.python-version }}
 
       - name: Upgrade system packages
         run: python -m pip install --upgrade pip setuptools wheel

From 797fa81dd483cfc6c8ecf9240a3b27697eec2284 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 31 Aug 2022 08:36:37 +0200
Subject: [PATCH 07/12] [SKIP CI] CircleCI


From 456dcf04f47adfb473877689dda676cc79428e81 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 31 Aug 2022 09:26:44 +0200
Subject: [PATCH 08/12] [SKIP CI] add temp fix for unnecessary strict torchdata
 check

---
 torchvision/prototype/datasets/_builtin/imagenet.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py
index 5b24a613a55..d3bf748defe 100644
--- a/torchvision/prototype/datasets/_builtin/imagenet.py
+++ b/torchvision/prototype/datasets/_builtin/imagenet.py
@@ -161,6 +161,10 @@ def _prepare_sample(
             image=EncodedImage.from_file(buffer),
         )
 
+    def _to_str(self, obj: Any):
+        # FIXME: remove this wrapper as soon as https://github.com/pytorch/pytorch/pull/84279 is landed
+        return str(obj)
+
     def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
         if self._split in {"train", "test"}:
             dp = resource_dps[0]
@@ -183,7 +187,7 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
             meta_dp = CategoryAndWordNetIDExtractor(meta_dp)
             wnid_dp = Mapper(meta_dp, getitem(1))
             wnid_dp = Enumerator(wnid_dp, 1)
-            wnid_dp = Mapper(wnid_dp, str, input_col=0)
+            wnid_dp = Mapper(wnid_dp, self._to_str, input_col=0)
             wnid_map = IterToMapConverter(wnid_dp)
 
             label_dp = LineReader(label_dp, decode=True, return_path=False)

From b961415cf65f5d7ef9d27750287a8abc892082b8 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 31 Aug 2022 08:36:13 +0200
Subject: [PATCH 09/12] Revert "[DEBUG] run tests on full CI matrix"

This reverts commit d8f3d07ba434cb822f7710a788c977f1ba79307d.
---
 .github/workflows/prototype-tests.yml | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.github/workflows/prototype-tests.yml b/.github/workflows/prototype-tests.yml
index bc9cb5402c4..ff29168d9a7 100644
--- a/.github/workflows/prototype-tests.yml
+++ b/.github/workflows/prototype-tests.yml
@@ -7,11 +7,6 @@ jobs:
   prototype:
     strategy:
       matrix:
-        python-version:
-          - "3.7"
-          - "3.8"
-          - "3.9"
-          - "3.10"
         os:
           - ubuntu-latest
           - windows-latest
@@ -24,7 +19,7 @@ jobs:
       - name: Set up python
         uses: actions/setup-python@v3
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: 3.7
 
       - name: Upgrade system packages
         run: python -m pip install --upgrade pip setuptools wheel

From e6cf29d9134e76676cc7f77c75e02070ab031f31 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 31 Aug 2022 09:26:44 +0200
Subject: [PATCH 10/12] Revert "[SKIP CI] add temp fix for unnecessary strict
 torchdata check"

This reverts commit 456dcf04f47adfb473877689dda676cc79428e81.
---
 torchvision/prototype/datasets/_builtin/imagenet.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py
index d3bf748defe..5b24a613a55 100644
--- a/torchvision/prototype/datasets/_builtin/imagenet.py
+++ b/torchvision/prototype/datasets/_builtin/imagenet.py
@@ -161,10 +161,6 @@ def _prepare_sample(
             image=EncodedImage.from_file(buffer),
         )
 
-    def _to_str(self, obj: Any):
-        # FIXME: remove this wrapper as soon as https://github.com/pytorch/pytorch/pull/84279 is landed
-        return str(obj)
-
     def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
         if self._split in {"train", "test"}:
             dp = resource_dps[0]
@@ -187,7 +183,7 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
             meta_dp = CategoryAndWordNetIDExtractor(meta_dp)
             wnid_dp = Mapper(meta_dp, getitem(1))
             wnid_dp = Enumerator(wnid_dp, 1)
-            wnid_dp = Mapper(wnid_dp, self._to_str, input_col=0)
+            wnid_dp = Mapper(wnid_dp, str, input_col=0)
             wnid_map = IterToMapConverter(wnid_dp)
 
             label_dp = LineReader(label_dp, decode=True, return_path=False)

From 5217e2df1873fc91ef510225a19ae540ad8724f7 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 13 Sep 2022 15:39:27 +0200
Subject: [PATCH 11/12] use int key

---
 torchvision/prototype/datasets/_builtin/imagenet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py
index 5b24a613a55..a64f6596be7 100644
--- a/torchvision/prototype/datasets/_builtin/imagenet.py
+++ b/torchvision/prototype/datasets/_builtin/imagenet.py
@@ -183,10 +183,10 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
             meta_dp = CategoryAndWordNetIDExtractor(meta_dp)
             wnid_dp = Mapper(meta_dp, getitem(1))
             wnid_dp = Enumerator(wnid_dp, 1)
-            wnid_dp = Mapper(wnid_dp, str, input_col=0)
             wnid_map = IterToMapConverter(wnid_dp)
 
             label_dp = LineReader(label_dp, decode=True, return_path=False)
+            label_dp = Mapper(label_dp, int)
             label_dp = Mapper(label_dp, wnid_map.__getitem__)
             label_dp: IterDataPipe[Tuple[int, str]] = Enumerator(label_dp, 1)
             label_dp = hint_shuffling(label_dp)

From c057cf3e2a2946948f866826fb235d75dc574de0 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Tue, 13 Sep 2022 15:44:07 +0200
Subject: [PATCH 12/12] remove unused function

---
 torchvision/prototype/datasets/_builtin/imagenet.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py
index a64f6596be7..3192f1f5503 100644
--- a/torchvision/prototype/datasets/_builtin/imagenet.py
+++ b/torchvision/prototype/datasets/_builtin/imagenet.py
@@ -133,9 +133,6 @@ def _classifiy_devkit(self, data: Tuple[str, BinaryIO]) -> Optional[int]:
             "ILSVRC2012_validation_ground_truth.txt": ImageNetDemux.LABEL,
         }.get(pathlib.Path(data[0]).name)
 
-    def _imagenet_label_to_wnid(self, imagenet_label: str, *, wnids: Tuple[str, ...]) -> str:
-        return wnids[int(imagenet_label) - 1]
-
     _VAL_TEST_IMAGE_NAME_PATTERN = re.compile(r"ILSVRC2012_(val|test)_(?P<id>\d{8})[.]JPEG")
 
     def _val_test_image_key(self, path: pathlib.Path) -> int: