diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index c7820a8cb9de1..31a6648e3eb7d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1267,6 +1267,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`). +- Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 56b63fddd96ad..41e14e482d061 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4637,7 +4637,7 @@ def _convert_string_array(data, encoding, errors, itemsize=None): # create the sized dtype if itemsize is None: ensured = ensure_object(data.ravel()) - itemsize = libwriters.max_len_string_array(ensured) + itemsize = max(1, libwriters.max_len_string_array(ensured)) data = np.asarray(data, dtype="S%d" % itemsize) return data diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 337eb74b3b51a..b6cf660cf171e 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1482,6 +1482,16 @@ def check_col(key, name, size): pytest.raises(ValueError, store.append, 'df', df, min_itemsize={'foo': 20, 'foobar': 20}) + def test_append_with_empty_string(self): + + with ensure_clean_store(self.path) as store: + + # with all empty strings (GH 12242) + df = DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', '']}) + store.append('df', df[:-1], min_itemsize={'x': 1}) + store.append('df', df[-1:], min_itemsize={'x': 1}) + tm.assert_frame_equal(store.select('df'), df) + def test_to_hdf_with_min_itemsize(self): with ensure_clean_path(self.path) as path: