ENH: storage_options for to_excel (#37818)

twoertwein · web-flow · commit 3c23e6ec9743 · 2020-11-14T12:05:50.000-05:00
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2028,9 +2028,9 @@ def _repr_data_resource_(self):
     def to_excel(
         self,
         excel_writer,
-        sheet_name="Sheet1",
-        na_rep="",
-        float_format=None,
+        sheet_name: str = "Sheet1",
+        na_rep: str = "",
+        float_format: Optional[str] = None,
         columns=None,
         header=True,
         index=True,
@@ -2043,6 +2043,7 @@ def to_excel(
         inf_rep="inf",
         verbose=True,
         freeze_panes=None,
+        storage_options: StorageOptions = None,
     ) -> None:
         """
         Write {klass} to an Excel sheet.
@@ -2059,7 +2060,7 @@ def to_excel(
 
         Parameters
         ----------
-        excel_writer : str or ExcelWriter object
+        excel_writer : path-like, file-like, or ExcelWriter object
             File path or existing ExcelWriter.
         sheet_name : str, default 'Sheet1'
             Name of sheet which will contain DataFrame.
@@ -2100,6 +2101,12 @@ def to_excel(
         freeze_panes : tuple of int (length 2), optional
             Specifies the one-based bottommost row and rightmost column that
             is to be frozen.
+        storage_options : dict, optional
+            Extra options that make sense for a particular storage connection, e.g.
+            host, port, username, password, etc., if using a URL that will
+            be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".
+
+            .. versionadded:: 1.2.0
 
         See Also
         --------
@@ -2174,6 +2181,7 @@ def to_excel(
             startcol=startcol,
             freeze_panes=freeze_panes,
             engine=engine,
+            storage_options=storage_options,
         )
 
     @final
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -3,12 +3,12 @@
 from io import BufferedIOBase, BytesIO, RawIOBase
 import os
 from textwrap import fill
-from typing import Any, Mapping, Union
+from typing import Any, Dict, Mapping, Union, cast
 
 from pandas._config import config
 
 from pandas._libs.parsers import STR_NA_VALUES
-from pandas._typing import StorageOptions
+from pandas._typing import Buffer, FilePathOrBuffer, StorageOptions
 from pandas.errors import EmptyDataError
 from pandas.util._decorators import Appender, deprecate_nonkeyword_arguments
 
@@ -567,6 +567,12 @@ class ExcelWriter(metaclass=abc.ABCMeta):
         File mode to use (write or append). Append does not work with fsspec URLs.
 
         .. versionadded:: 0.24.0
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g.
+        host, port, username, password, etc., if using a URL that will
+        be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".
+
+        .. versionadded:: 1.2.0
 
     Attributes
     ----------
@@ -710,11 +716,12 @@ def save(self):
 
     def __init__(
         self,
-        path,
+        path: Union[FilePathOrBuffer, "ExcelWriter"],
         engine=None,
         date_format=None,
         datetime_format=None,
-        mode="w",
+        mode: str = "w",
+        storage_options: StorageOptions = None,
         **engine_kwargs,
     ):
         # validate that this engine can handle the extension
@@ -729,10 +736,13 @@ def __init__(
         # the excel backend first read the existing file and then write any data to it
         mode = mode.replace("a", "r+")
 
-        self.handles = IOHandles(path, compression={"copression": None})
+        # cast ExcelWriter to avoid adding 'if self.handles is not None'
+        self.handles = IOHandles(cast(Buffer, path), compression={"copression": None})
         if not isinstance(path, ExcelWriter):
-            self.handles = get_handle(path, mode, is_text=False)
-        self.sheets = {}
+            self.handles = get_handle(
+                path, mode, storage_options=storage_options, is_text=False
+            )
+        self.sheets: Dict[str, Any] = {}
         self.cur_sheet = None
 
         if date_format is None:
diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py
@@ -3,6 +3,7 @@
 from typing import Any, DefaultDict, Dict, List, Optional, Tuple, Union
 
 import pandas._libs.json as json
+from pandas._typing import StorageOptions
 
 from pandas.io.excel._base import ExcelWriter
 from pandas.io.excel._util import validate_freeze_panes
@@ -14,7 +15,12 @@ class ODSWriter(ExcelWriter):
     supported_extensions = (".ods",)
 
     def __init__(
-        self, path: str, engine: Optional[str] = None, mode: str = "w", **engine_kwargs
+        self,
+        path: str,
+        engine: Optional[str] = None,
+        mode: str = "w",
+        storage_options: StorageOptions = None,
+        **engine_kwargs,
     ):
         from odf.opendocument import OpenDocumentSpreadsheet
 
@@ -23,7 +29,9 @@ def __init__(
         if mode == "a":
             raise ValueError("Append mode is not supported with odf!")
 
-        super().__init__(path, mode=mode, **engine_kwargs)
+        super().__init__(
+            path, mode=mode, storage_options=storage_options, **engine_kwargs
+        )
 
         self.book = OpenDocumentSpreadsheet()
         self._style_dict: Dict[str, str] = {}
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
@@ -16,11 +16,20 @@ class OpenpyxlWriter(ExcelWriter):
     engine = "openpyxl"
     supported_extensions = (".xlsx", ".xlsm")
 
-    def __init__(self, path, engine=None, mode="w", **engine_kwargs):
+    def __init__(
+        self,
+        path,
+        engine=None,
+        mode: str = "w",
+        storage_options: StorageOptions = None,
+        **engine_kwargs,
+    ):
         # Use the openpyxl module as the Excel writer.
         from openpyxl.workbook import Workbook
 
-        super().__init__(path, mode=mode, **engine_kwargs)
+        super().__init__(
+            path, mode=mode, storage_options=storage_options, **engine_kwargs
+        )
 
         # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
         # the file and later write to it
diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py
@@ -1,6 +1,7 @@
 from typing import Dict, List, Tuple
 
 import pandas._libs.json as json
+from pandas._typing import StorageOptions
 
 from pandas.io.excel._base import ExcelWriter
 from pandas.io.excel._util import validate_freeze_panes
@@ -168,7 +169,8 @@ def __init__(
         engine=None,
         date_format=None,
         datetime_format=None,
-        mode="w",
+        mode: str = "w",
+        storage_options: StorageOptions = None,
         **engine_kwargs,
     ):
         # Use the xlsxwriter module as the Excel writer.
@@ -183,6 +185,7 @@ def __init__(
             date_format=date_format,
             datetime_format=datetime_format,
             mode=mode,
+            storage_options=storage_options,
             **engine_kwargs,
         )
 
diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py
@@ -1,6 +1,7 @@
 from typing import TYPE_CHECKING, Dict
 
 import pandas._libs.json as json
+from pandas._typing import StorageOptions
 
 from pandas.io.excel._base import ExcelWriter
 from pandas.io.excel._util import validate_freeze_panes
@@ -13,7 +14,15 @@ class XlwtWriter(ExcelWriter):
     engine = "xlwt"
     supported_extensions = (".xls",)
 
-    def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs):
+    def __init__(
+        self,
+        path,
+        engine=None,
+        encoding=None,
+        mode: str = "w",
+        storage_options: StorageOptions = None,
+        **engine_kwargs,
+    ):
         # Use the xlwt module as the Excel writer.
         import xlwt
 
@@ -22,7 +31,9 @@ def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs):
         if mode == "a":
             raise ValueError("Append mode is not supported with xlwt!")
 
-        super().__init__(path, mode=mode, **engine_kwargs)
+        super().__init__(
+            path, mode=mode, storage_options=storage_options, **engine_kwargs
+        )
 
         if encoding is None:
             encoding = "ascii"
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 
-from pandas._typing import Label
+from pandas._typing import Label, StorageOptions
 
 from pandas.core.dtypes import missing
 from pandas.core.dtypes.common import is_float, is_scalar
@@ -19,7 +19,6 @@
 from pandas import DataFrame, Index, MultiIndex, PeriodIndex
 import pandas.core.common as com
 
-from pandas.io.common import stringify_path
 from pandas.io.formats.css import CSSResolver, CSSWarning
 from pandas.io.formats.format import get_level_lengths
 from pandas.io.formats.printing import pprint_thing
@@ -785,9 +784,10 @@ def write(
         startcol=0,
         freeze_panes=None,
         engine=None,
+        storage_options: StorageOptions = None,
     ):
         """
-        writer : string or ExcelWriter object
+        writer : path-like, file-like, or ExcelWriter object
             File path or existing ExcelWriter
         sheet_name : string, default 'Sheet1'
             Name of sheet which will contain DataFrame
@@ -802,6 +802,12 @@ def write(
             write engine to use if writer is a path - you can also set this
             via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``,
             and ``io.excel.xlsm.writer``.
+        storage_options : dict, optional
+            Extra options that make sense for a particular storage connection, e.g.
+            host, port, username, password, etc., if using a URL that will
+            be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".
+
+            .. versionadded:: 1.2.0
         """
         from pandas.io.excel import ExcelWriter
 
@@ -819,7 +825,7 @@ def write(
             # abstract class 'ExcelWriter' with abstract attributes 'engine',
             # 'save', 'supported_extensions' and 'write_cells'  [abstract]
             writer = ExcelWriter(  # type: ignore[abstract]
-                stringify_path(writer), engine=engine
+                writer, engine=engine, storage_options=storage_options
             )
             need_save = True
 
diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
@@ -124,6 +124,18 @@ def test_csv_options(fsspectest):
     assert fsspectest.test[0] == "csv_read"
 
 
+@pytest.mark.parametrize("extension", ["xlsx", "xls"])
+def test_excel_options(fsspectest, extension):
+    df = DataFrame({"a": [0]})
+
+    path = f"testmem://test/test.{extension}"
+
+    df.to_excel(path, storage_options={"test": "write"}, index=False)
+    assert fsspectest.test[0] == "write"
+    read_excel(path, storage_options={"test": "read"})
+    assert fsspectest.test[0] == "read"
+
+
 @td.skip_if_no("fastparquet")
 def test_to_parquet_new_file(monkeypatch, cleared_fs):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""