Skip to content

Commit 74c5ba3

Browse files
authored
Adding a 's' to the functional names of open/list DataPipes (#479) (#489)
Summary: Pull Request resolved: #479 Test Plan: Imported from OSS Reviewed By: ejguan Differential Revision: D36785643 Pulled By: NivekT fbshipit-source-id: 02c3071047ac00dd34cb83a9b392be0cfa3565b0
1 parent 38b77c1 commit 74c5ba3

File tree

6 files changed

+25
-17
lines changed

6 files changed

+25
-17
lines changed

test/test_fsspec.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ def test_fsspec_file_loader_iterdatapipe(self):
9494
# Reset Test: Ensure the resulting streams are still readable after the DataPipe is reset/exhausted
9595
self._write_text_files()
9696
lister_dp = FileLister(self.temp_dir.name, "*.text")
97-
fsspec_file_loader_dp = FSSpecFileOpener(lister_dp, mode="rb")
97+
fsspec_file_opener_dp = lister_dp.open_files_by_fsspec(mode="rb")
9898

9999
n_elements_before_reset = 2
100-
res_before_reset, res_after_reset = reset_after_n_next_calls(fsspec_file_loader_dp, n_elements_before_reset)
100+
res_before_reset, res_after_reset = reset_after_n_next_calls(fsspec_file_opener_dp, n_elements_before_reset)
101101
self.assertEqual(2, len(res_before_reset))
102102
self.assertEqual(3, len(res_after_reset))
103103
for _name, stream in res_before_reset:

test/test_local_io.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -684,10 +684,10 @@ def test_io_path_file_loader_iterdatapipe(self):
684684
# Reset Test: Ensure the resulting streams are still readable after the DataPipe is reset/exhausted
685685
self._write_text_files()
686686
lister_dp = FileLister(self.temp_dir.name, "*.text")
687-
iopath_file_loader_dp = IoPathFileOpener(lister_dp, mode="rb")
687+
iopath_file_opener_dp = lister_dp.open_files_by_iopath(mode="rb")
688688

689689
n_elements_before_reset = 2
690-
res_before_reset, res_after_reset = reset_after_n_next_calls(iopath_file_loader_dp, n_elements_before_reset)
690+
res_before_reset, res_after_reset = reset_after_n_next_calls(iopath_file_opener_dp, n_elements_before_reset)
691691
self.assertEqual(2, len(res_before_reset))
692692
self.assertEqual(3, len(res_after_reset))
693693
for _name, stream in res_before_reset:

torchdata/datapipes/iter/load/README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ Note: refer to the official documentation for detailed installtion instructions
3535

3636
### S3FileLister
3737

38-
`S3FileLister` accepts a list of S3 prefixes and iterates all matching s3 urls. The functional API is `list_file_by_s3`.
39-
Acceptable prefixes include `s3://bucket-name`, `s3://bucket-name/`, `s3://bucket-name/folder`,
38+
`S3FileLister` accepts a list of S3 prefixes and iterates all matching s3 urls. The functional API is
39+
`list_files_by_s3`. Acceptable prefixes include `s3://bucket-name`, `s3://bucket-name/`, `s3://bucket-name/folder`,
4040
`s3://bucket-name/folder/`, and `s3://bucket-name/prefix`. You may also set `length`, `request_timeout_ms` (default 3000
4141
ms in aws-sdk-cpp), and `region`. Note that:
4242

@@ -48,7 +48,7 @@ ms in aws-sdk-cpp), and `region`. Note that:
4848
### S3FileLoader
4949

5050
`S3FileLoader` accepts a list of S3 URLs and iterates all files in `BytesIO` format with `(url, BytesIO)` tuples. The
51-
functional API is `load_file_by_s3`. You may also set `request_timeout_ms` (default 3000 ms in aws-sdk-cpp), `region`,
51+
functional API is `load_files_by_s3`. You may also set `request_timeout_ms` (default 3000 ms in aws-sdk-cpp), `region`,
5252
`buffer_size` (default 120Mb), and `multi_part_download` (default to use multi-part downloading). Note that:
5353

5454
1. Input **must** be a list and S3 URLs must be valid.

torchdata/datapipes/iter/load/fsspec.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,11 @@ def __iter__(self) -> Iterator[str]:
101101
yield abs_path
102102

103103

104-
@functional_datapipe("open_file_by_fsspec")
104+
@functional_datapipe("open_files_by_fsspec")
105105
class FSSpecFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]):
106106
r"""
107107
Opens files from input datapipe which contains `fsspec` paths and yields a tuple of
108-
pathname and opened file stream (functional name: ``open_file_by_fsspec``).
108+
pathname and opened file stream (functional name: ``open_files_by_fsspec``).
109109
110110
Args:
111111
source_datapipe: Iterable DataPipe that provides the pathnames or URLs
@@ -114,7 +114,7 @@ class FSSpecFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]):
114114
Example:
115115
>>> from torchdata.datapipes.iter import FSSpecFileLister
116116
>>> datapipe = FSSpecFileLister(root=dir_path)
117-
>>> file_dp = datapipe.open_file_by_fsspec()
117+
>>> file_dp = datapipe.open_files_by_fsspec()
118118
"""
119119

120120
def __init__(self, source_datapipe: IterDataPipe[str], mode: str = "r") -> None:
@@ -133,6 +133,10 @@ def __len__(self) -> int:
133133
return len(self.source_datapipe)
134134

135135

136+
# Register for functional API for backward compatibility
137+
IterDataPipe.register_datapipe_as_function("open_file_by_fsspec", FSSpecFileOpenerIterDataPipe)
138+
139+
136140
@functional_datapipe("save_by_fsspec")
137141
class FSSpecSaverIterDataPipe(IterDataPipe[str]):
138142
r"""

torchdata/datapipes/iter/load/iopath.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,11 @@ def __iter__(self) -> Iterator[str]:
9696
yield os.path.join(path, file_name)
9797

9898

99-
@functional_datapipe("open_file_by_iopath")
99+
@functional_datapipe("open_files_by_iopath")
100100
class IoPathFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]):
101101
r"""
102102
Opens files from input datapipe which contains pathnames or URLs,
103-
and yields a tuple of pathname and opened file stream (functional name: ``open_file_by_iopath``).
103+
and yields a tuple of pathname and opened file stream (functional name: ``open_files_by_iopath``).
104104
105105
Args:
106106
source_datapipe: Iterable DataPipe that provides the pathnames or URLs
@@ -114,7 +114,7 @@ class IoPathFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]):
114114
Example:
115115
>>> from torchdata.datapipes.iter import IoPathFileLister
116116
>>> datapipe = IoPathFileLister(root=S3URL)
117-
>>> file_dp = datapipe.open_file_by_iopath()
117+
>>> file_dp = datapipe.open_files_by_iopath()
118118
"""
119119

120120
def __init__(self, source_datapipe: IterDataPipe[str], mode: str = "r", pathmgr=None) -> None:
@@ -141,6 +141,10 @@ def __len__(self) -> int:
141141
return len(self.source_datapipe)
142142

143143

144+
# Register for functional API for backward compatibility
145+
IterDataPipe.register_datapipe_as_function("open_file_by_iopath", IoPathFileOpenerIterDataPipe)
146+
147+
144148
@functional_datapipe("save_by_iopath")
145149
class IoPathSaverIterDataPipe(IterDataPipe[str]):
146150

torchdata/datapipes/iter/load/s3io.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
from torchdata.datapipes.utils import StreamWrapper
1414

1515

16-
@functional_datapipe("list_file_by_s3")
16+
@functional_datapipe("list_files_by_s3")
1717
class S3FileListerIterDataPipe(IterDataPipe[str]):
1818
r"""
19-
Iterable DataPipe that lists Amazon S3 file URLs with the given prefixes (functional name: ``list_file_by_s3``).
19+
Iterable DataPipe that lists Amazon S3 file URLs with the given prefixes (functional name: ``list_files_by_s3``).
2020
Acceptable prefixes include ``s3://bucket-name``, ``s3://bucket-name/``, ``s3://bucket-name/folder``,
2121
``s3://bucket-name/folder/``, and ``s3://bucket-name/prefix``. You may also set ``length``, ``request_timeout_ms``
2222
(default 3000 ms in aws-sdk-cpp), and ``region``.
@@ -72,10 +72,10 @@ def __len__(self) -> int:
7272
return self.length
7373

7474

75-
@functional_datapipe("load_file_by_s3")
75+
@functional_datapipe("load_files_by_s3")
7676
class S3FileLoaderIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]):
7777
r"""
78-
Iterable DataPipe that loads Amazon S3 files from the given S3 URLs (functional name: ``load_file_by_s3``).
78+
Iterable DataPipe that loads Amazon S3 files from the given S3 URLs (functional name: ``load_files_by_s3``).
7979
``S3FileLoader`` iterates all given S3 URLs in ``BytesIO`` format with ``(url, BytesIO)`` tuples.
8080
You may also set ``request_timeout_ms`` (default 3000 ms in aws-sdk-cpp), ``region``,
8181
``buffer_size`` (default 120Mb), and ``multi_part_download`` (default to use multi-part downloading).

0 commit comments

Comments
 (0)