diff --git a/pandas/tests/io/msgpack/test_sequnpack.py b/pandas/tests/io/msgpack/test_sequnpack.py index 48f9817142762..91f5778a7ce6c 100644 --- a/pandas/tests/io/msgpack/test_sequnpack.py +++ b/pandas/tests/io/msgpack/test_sequnpack.py @@ -30,7 +30,9 @@ def test_foobar(self): assert unpacker.unpack() == ord(b'b') assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r') - pytest.raises(OutOfData, unpacker.unpack) + msg = "No more data to unpack" + with pytest.raises(OutOfData, match=msg): + unpacker.unpack() unpacker.feed(b'foo') unpacker.feed(b'bar') @@ -50,14 +52,24 @@ def test_foobar_skip(self): unpacker.skip() assert unpacker.unpack() == ord(b'a') unpacker.skip() - pytest.raises(OutOfData, unpacker.unpack) + msg = "No more data to unpack" + with pytest.raises(OutOfData, match=msg): + unpacker.unpack() + + def test_maxbuffersize_read_size_exceeds_max_buffer_size(self): + msg = "read_size should be less or equal to max_buffer_size" + with pytest.raises(ValueError, match=msg): + Unpacker(read_size=5, max_buffer_size=3) + + def test_maxbuffersize_bufferfull(self): + unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) + unpacker.feed(b'foo') + with pytest.raises(BufferFull, match=r'^$'): + unpacker.feed(b'b') def test_maxbuffersize(self): - pytest.raises(ValueError, Unpacker, read_size=5, max_buffer_size=3) unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) - unpacker.feed(b'fo') - pytest.raises(BufferFull, unpacker.feed, b'ob') - unpacker.feed(b'o') + unpacker.feed(b'foo') assert ord('f') == next(unpacker) unpacker.feed(b'b') assert ord('o') == next(unpacker) diff --git a/pandas/tests/io/msgpack/test_unpack.py b/pandas/tests/io/msgpack/test_unpack.py index e63631a97bbb4..356156296c067 100644 --- a/pandas/tests/io/msgpack/test_unpack.py +++ b/pandas/tests/io/msgpack/test_unpack.py @@ -16,7 +16,9 @@ def test_unpack_array_header_from_file(self): assert unpacker.unpack() == 2 assert unpacker.unpack() == 3 assert unpacker.unpack() == 4 - pytest.raises(OutOfData, unpacker.unpack) + msg = "No more data to unpack" + with pytest.raises(OutOfData, match=msg): + unpacker.unpack() def test_unpacker_hook_refcnt(self): if not hasattr(sys, 'getrefcount'): diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index 0fd92cb496df3..8119de67890a5 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -145,7 +145,10 @@ def test_skip_bad_lines(self, capsys): reader = TextReader(StringIO(data), delimiter=':', header=None) - pytest.raises(parser.ParserError, reader.read) + msg = (r"Error tokenizing data\. C error: Expected 3 fields in" + " line 4, saw 4") + with pytest.raises(parser.ParserError, match=msg): + reader.read() reader = TextReader(StringIO(data), delimiter=':', header=None, diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 13a8b1a0edfd3..3354bca63be92 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -3,7 +3,6 @@ """ import mmap import os -import re import pytest @@ -146,7 +145,16 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): pytest.importorskip(module) path = os.path.join(HERE, 'data', 'does_not_exist.' + fn_ext) - with pytest.raises(error_class): + msg1 = (r"File (b')?.+does_not_exist\.{}'? does not exist" + .format(fn_ext)) + msg2 = (r"\[Errno 2\] No such file or directory: '.+does_not_exist" + r"\.{}'").format(fn_ext) + msg3 = "Expected object or value" + msg4 = "path_or_buf needs to be a string file path or file-like" + msg5 = (r"\[Errno 2\] File .+does_not_exist\.{} does not exist:" + r" '.+does_not_exist\.{}'").format(fn_ext, fn_ext) + with pytest.raises(error_class, match=r"({}|{}|{}|{}|{})".format( + msg1, msg2, msg3, msg4, msg5)): reader(path) @pytest.mark.parametrize('reader, module, error_class, fn_ext', [ @@ -169,14 +177,26 @@ def test_read_expands_user_home_dir(self, reader, module, monkeypatch.setattr(icom, '_expand_user', lambda x: os.path.join('foo', x)) - message = "".join(["foo", os.path.sep, "does_not_exist.", fn_ext]) - - with pytest.raises(error_class, message=re.escape(message)): + msg1 = (r"File (b')?.+does_not_exist\.{}'? does not exist" + .format(fn_ext)) + msg2 = (r"\[Errno 2\] No such file or directory:" + r" '.+does_not_exist\.{}'").format(fn_ext) + msg3 = "Unexpected character found when decoding 'false'" + msg4 = "path_or_buf needs to be a string file path or file-like" + msg5 = (r"\[Errno 2\] File .+does_not_exist\.{} does not exist:" + r" '.+does_not_exist\.{}'").format(fn_ext, fn_ext) + + with pytest.raises(error_class, match=r"({}|{}|{}|{}|{})".format( + msg1, msg2, msg3, msg4, msg5)): reader(path) def test_read_non_existant_read_table(self): path = os.path.join(HERE, 'data', 'does_not_exist.' + 'csv') - with pytest.raises(FileNotFoundError): + msg1 = r"File b'.+does_not_exist\.csv' does not exist" + msg2 = (r"\[Errno 2\] File .+does_not_exist\.csv does not exist:" + r" '.+does_not_exist\.csv'") + with pytest.raises(FileNotFoundError, match=r"({}|{})".format( + msg1, msg2)): with tm.assert_produces_warning(FutureWarning): pd.read_table(path) @@ -326,7 +346,8 @@ def test_next(self, mmap_file): next_line = next(wrapper) assert next_line.strip() == line.strip() - pytest.raises(StopIteration, next, wrapper) + with pytest.raises(StopIteration, match=r'^$'): + next(wrapper) def test_unknown_engine(self): with tm.ensure_clean() as path: diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index da0a9ed4ba7ed..9eb6d327be025 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -156,9 +156,14 @@ class A(object): def __init__(self): self.read = 0 - pytest.raises(ValueError, read_msgpack, path_or_buf=None) - pytest.raises(ValueError, read_msgpack, path_or_buf={}) - pytest.raises(ValueError, read_msgpack, path_or_buf=A()) + msg = (r"Invalid file path or buffer object type: <(class|type)" + r" '{}'>") + with pytest.raises(ValueError, match=msg.format('NoneType')): + read_msgpack(path_or_buf=None) + with pytest.raises(ValueError, match=msg.format('dict')): + read_msgpack(path_or_buf={}) + with pytest.raises(ValueError, match=msg.format(r'.*\.A')): + read_msgpack(path_or_buf=A()) class TestNumpy(TestPackers): @@ -567,7 +572,9 @@ def _check_roundtrip(self, obj, comparator, **kwargs): # currently these are not implemetned # i_rec = self.encode_decode(obj) # comparator(obj, i_rec, **kwargs) - pytest.raises(NotImplementedError, self.encode_decode, obj) + msg = r"msgpack sparse (series|frame) is not implemented" + with pytest.raises(NotImplementedError, match=msg): + self.encode_decode(obj) def test_sparse_series(self): diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index ce9be6a7857bf..586297d2e3872 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -506,7 +506,8 @@ def test_invalid_timestamp(self, version): original = DataFrame([(1,)], columns=['variable']) time_stamp = '01 Jan 2000, 00:00:00' with tm.ensure_clean() as path: - with pytest.raises(ValueError): + msg = "time_stamp should be datetime type" + with pytest.raises(ValueError, match=msg): original.to_stata(path, time_stamp=time_stamp, version=version) @@ -547,8 +548,8 @@ def test_no_index(self): with tm.ensure_clean() as path: original.to_stata(path, write_index=False) written_and_read_again = self.read_dta(path) - pytest.raises( - KeyError, lambda: written_and_read_again['index_not_written']) + with pytest.raises(KeyError, match=original.index.name): + written_and_read_again['index_not_written'] def test_string_no_dates(self): s1 = Series(['a', 'A longer string']) @@ -713,7 +714,11 @@ def test_excessively_long_string(self): s['s' + str(str_len)] = Series(['a' * str_len, 'b' * str_len, 'c' * str_len]) original = DataFrame(s) - with pytest.raises(ValueError): + msg = (r"Fixed width strings in Stata \.dta files are limited to 244" + r" \(or fewer\)\ncharacters\. Column 's500' does not satisfy" + r" this restriction\. Use the\n'version=117' parameter to write" + r" the newer \(Stata 13 and later\) format\.") + with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: original.to_stata(path) @@ -864,11 +869,14 @@ def test_drop_column(self): columns=columns) tm.assert_frame_equal(expected, reordered) - with pytest.raises(ValueError): + msg = "columns contains duplicate entries" + with pytest.raises(ValueError, match=msg): columns = ['byte_', 'byte_'] read_stata(self.dta15_117, convert_dates=True, columns=columns) - with pytest.raises(ValueError): + msg = ("The following columns were not found in the Stata data set:" + " not_found") + with pytest.raises(ValueError, match=msg): columns = ['byte_', 'int_', 'long_', 'not_found'] read_stata(self.dta15_117, convert_dates=True, columns=columns) @@ -924,7 +932,10 @@ def test_categorical_warnings_and_errors(self): original = pd.concat([original[col].astype('category') for col in original], axis=1) with tm.ensure_clean() as path: - pytest.raises(ValueError, original.to_stata, path) + msg = ("Stata value labels for a single variable must have" + r" a combined length less than 32,000 characters\.") + with pytest.raises(ValueError, match=msg): + original.to_stata(path) original = pd.DataFrame.from_records( [['a'], @@ -1196,14 +1207,17 @@ def test_invalid_variable_labels(self, version): 'b': 'City Exponent', 'c': 'City'} with tm.ensure_clean() as path: - with pytest.raises(ValueError): + msg = "Variable labels must be 80 characters or fewer" + with pytest.raises(ValueError, match=msg): original.to_stata(path, variable_labels=variable_labels, version=version) variable_labels['a'] = u'invalid character Œ' with tm.ensure_clean() as path: - with pytest.raises(ValueError): + msg = ("Variable labels must contain only characters that can be" + " encoded in Latin-1") + with pytest.raises(ValueError, match=msg): original.to_stata(path, variable_labels=variable_labels, version=version) @@ -1221,7 +1235,9 @@ def test_write_variable_label_errors(self): 'b': 'City Exponent', 'c': u''.join(values)} - with pytest.raises(ValueError): + msg = ("Variable labels must contain only characters that can be" + " encoded in Latin-1") + with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: original.to_stata(path, variable_labels=variable_labels_utf8) @@ -1231,7 +1247,8 @@ def test_write_variable_label_errors(self): 'that is too long for Stata which means ' 'that it has more than 80 characters'} - with pytest.raises(ValueError): + msg = "Variable labels must be 80 characters or fewer" + with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: original.to_stata(path, variable_labels=variable_labels_long) @@ -1265,7 +1282,8 @@ def test_default_date_conversion(self): def test_unsupported_type(self): original = pd.DataFrame({'a': [1 + 2j, 2 + 4j]}) - with pytest.raises(NotImplementedError): + msg = "Data type complex128 not supported" + with pytest.raises(NotImplementedError, match=msg): with tm.ensure_clean() as path: original.to_stata(path) @@ -1277,7 +1295,8 @@ def test_unsupported_datetype(self): 'strs': ['apple', 'banana', 'cherry'], 'dates': dates}) - with pytest.raises(NotImplementedError): + msg = "Format %tC not implemented" + with pytest.raises(NotImplementedError, match=msg): with tm.ensure_clean() as path: original.to_stata(path, convert_dates={'dates': 'tC'}) @@ -1291,9 +1310,10 @@ def test_unsupported_datetype(self): def test_repeated_column_labels(self): # GH 13923 - with pytest.raises(ValueError) as cm: + msg = (r"Value labels for column ethnicsn are not unique\. The" + r" repeated labels are:\n\n-+wolof") + with pytest.raises(ValueError, match=msg): read_stata(self.dta23, convert_categoricals=True) - assert 'wolof' in cm.exception def test_stata_111(self): # 111 is an old version but still used by current versions of @@ -1316,17 +1336,18 @@ def test_out_of_range_double(self): 'ColumnTooBig': [0.0, np.finfo(np.double).eps, np.finfo(np.double).max]}) - with pytest.raises(ValueError) as cm: + msg = (r"Column ColumnTooBig has a maximum value \(.+\)" + r" outside the range supported by Stata \(.+\)") + with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: df.to_stata(path) - assert 'ColumnTooBig' in cm.exception df.loc[2, 'ColumnTooBig'] = np.inf - with pytest.raises(ValueError) as cm: + msg = ("Column ColumnTooBig has a maximum value of infinity which" + " is outside the range supported by Stata") + with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: df.to_stata(path) - assert 'ColumnTooBig' in cm.exception - assert 'infinity' in cm.exception def test_out_of_range_float(self): original = DataFrame({'ColumnOk': [0.0, @@ -1348,11 +1369,11 @@ def test_out_of_range_float(self): reread.set_index('index')) original.loc[2, 'ColumnTooBig'] = np.inf - with pytest.raises(ValueError) as cm: + msg = ("Column ColumnTooBig has a maximum value of infinity which" + " is outside the range supported by Stata") + with pytest.raises(ValueError, match=msg): with tm.ensure_clean() as path: original.to_stata(path) - assert 'ColumnTooBig' in cm.exception - assert 'infinity' in cm.exception def test_path_pathlib(self): df = tm.makeDataFrame() @@ -1466,7 +1487,8 @@ def test_invalid_date_conversion(self): 'dates': dates}) with tm.ensure_clean() as path: - with pytest.raises(ValueError): + msg = "convert_dates key must be a column or an integer" + with pytest.raises(ValueError, match=msg): original.to_stata(path, convert_dates={'wrong_name': 'tc'}) @@ -1546,10 +1568,14 @@ def test_all_none_exception(self, version): output = pd.DataFrame(output) output.loc[:, 'none'] = None with tm.ensure_clean() as path: - with pytest.raises(ValueError) as excinfo: + msg = (r"Column `none` cannot be exported\.\n\n" + "Only string-like object arrays containing all strings or a" + r" mix of strings and None can be exported\. Object arrays" + r" containing only null values are prohibited\. Other" + " object typescannot be exported and must first be" + r" converted to one of the supported types\.") + with pytest.raises(ValueError, match=msg): output.to_stata(path, version=version) - assert 'Only string-like' in excinfo.value.args[0] - assert 'Column `none`' in excinfo.value.args[0] @pytest.mark.parametrize('version', [114, 117]) def test_invalid_file_not_written(self, version): @@ -1557,7 +1583,12 @@ def test_invalid_file_not_written(self, version): df = DataFrame([content], columns=['invalid']) expected_exc = UnicodeEncodeError if PY3 else UnicodeDecodeError with tm.ensure_clean() as path: - with pytest.raises(expected_exc): + msg1 = (r"'latin-1' codec can't encode character '\\ufffd'" + r" in position 14: ordinal not in range\(256\)") + msg2 = ("'ascii' codec can't decode byte 0xef in position 14:" + r" ordinal not in range\(128\)") + with pytest.raises(expected_exc, match=r'{}|{}'.format( + msg1, msg2)): with tm.assert_produces_warning(ResourceWarning): df.to_stata(path)