From 4283b952b72e5d47e23b1f7329e7268abe610349 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 21 Nov 2022 12:47:45 -0500 Subject: [PATCH 1/6] Reset file pointer to 0 when reading file stream Instead of raising a ValueError about the file pointer not being at the start of the file, reset the file pointer automatically to zero, and warn that the pointer has been reset. --- xarray/core/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 979075efe5e..fb0acf68f6d 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -654,9 +654,11 @@ def read_magic_number_from_file(filename_or_obj, count=8) -> bytes: magic_number = filename_or_obj[:count] elif isinstance(filename_or_obj, io.IOBase): if filename_or_obj.tell() != 0: - raise ValueError( + filename_or_obj.seek(0) + warnings.warn( "cannot guess the engine, " "file-like object read/write pointer not at the start of the file, " + "so resetting file pointer to zero. If this does not work, " "please close and reopen, or use a context manager" ) magic_number = filename_or_obj.read(count) From 68989c3c42636bc6c4f4f1d2a41966781521f61f Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 21 Nov 2022 13:13:58 -0500 Subject: [PATCH 2/6] Expect warning rather than ValueError for test_open_twice Fixes the `Failed: DID NOT RAISE ` --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 64030b3f595..dfc7f867bbb 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3031,7 +3031,7 @@ def test_open_badbytes(self) -> None: def test_open_twice(self) -> None: expected = create_test_data() expected.attrs["foo"] = "bar" - with pytest.raises(ValueError, match=r"read/write pointer not at the start"): + with pytest.warns(match=r"read/write pointer not at the start"): with create_tmp_file() as tmp_file: expected.to_netcdf(tmp_file, engine="h5netcdf") with open(tmp_file, "rb") as f: From befda318e6f696519b9ff0c7ece8511509ec0444 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 21 Nov 2022 13:19:01 -0500 Subject: [PATCH 3/6] Remove checks for errors raised in test_open_fileobj The ValueError and RuntimeWarning isn't raised anymore. --- xarray/tests/test_backends.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index dfc7f867bbb..b6930d891d5 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3069,15 +3069,7 @@ def test_open_fileobj(self) -> None: # `raises_regex`?). Ref https://github.com/pydata/xarray/pull/5191 with open(tmp_file, "rb") as f: f.seek(8) - with pytest.raises( - ValueError, - match="match in any of xarray's currently installed IO", - ): - with pytest.warns( - RuntimeWarning, - match=re.escape("'h5netcdf' fails while guessing"), - ): - open_dataset(f) + open_dataset(f) @requires_h5netcdf From 6f6fbe6f58d5eb7de015cc3bf123e8632ee578c9 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 21 Nov 2022 13:41:04 -0500 Subject: [PATCH 4/6] Fix typo form -> from --- xarray/core/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index fb0acf68f6d..e50c3838105 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -664,7 +664,7 @@ def read_magic_number_from_file(filename_or_obj, count=8) -> bytes: magic_number = filename_or_obj.read(count) filename_or_obj.seek(0) else: - raise TypeError(f"cannot read the magic number form {type(filename_or_obj)}") + raise TypeError(f"cannot read the magic number from {type(filename_or_obj)}") return magic_number From 93711b7976a823f98084bc56003e81334f6b1ba5 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 21 Nov 2022 13:44:04 -0500 Subject: [PATCH 5/6] Add changelog entry for bugfix --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 13867daebf4..215e2f3ba8a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -56,6 +56,8 @@ Bug fixes - Import ``nc_time_axis`` when needed (:issue:`7275`, :pull:`7276`). By `Michael Niklas `_. +- Fix multiple reads on fsspec S3 files by resetting file pointer to 0 when reading file streams (:issue:`6813`, :pull:`7304`). + By `David Hoese `_ and `Wei Ji Leong `_. Documentation ~~~~~~~~~~~~~ From 929cb62977d630a00ace9747bc86066555b83d0d Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 29 Nov 2022 10:07:51 -0500 Subject: [PATCH 6/6] Remove warning about resetting file pointer to zero File pointer is reset to zero after reading the magic byte number anyway, so should be ok not to warn about this. --- xarray/core/utils.py | 6 ------ xarray/tests/test_backends.py | 11 +++++------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index e50c3838105..caa4be08290 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -655,12 +655,6 @@ def read_magic_number_from_file(filename_or_obj, count=8) -> bytes: elif isinstance(filename_or_obj, io.IOBase): if filename_or_obj.tell() != 0: filename_or_obj.seek(0) - warnings.warn( - "cannot guess the engine, " - "file-like object read/write pointer not at the start of the file, " - "so resetting file pointer to zero. If this does not work, " - "please close and reopen, or use a context manager" - ) magic_number = filename_or_obj.read(count) filename_or_obj.seek(0) else: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 621bac0530f..e3faeedbd13 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3159,13 +3159,12 @@ def test_open_badbytes(self) -> None: def test_open_twice(self) -> None: expected = create_test_data() expected.attrs["foo"] = "bar" - with pytest.warns(match=r"read/write pointer not at the start"): - with create_tmp_file() as tmp_file: - expected.to_netcdf(tmp_file, engine="h5netcdf") - with open(tmp_file, "rb") as f: + with create_tmp_file() as tmp_file: + expected.to_netcdf(tmp_file, engine="h5netcdf") + with open(tmp_file, "rb") as f: + with open_dataset(f, engine="h5netcdf"): with open_dataset(f, engine="h5netcdf"): - with open_dataset(f, engine="h5netcdf"): - pass + pass @requires_scipy def test_open_fileobj(self) -> None: