10
10
11
11
from pandas .compat import long
12
12
13
- from pandas import DataFrame , MultiIndex , Series , Timestamp , compat , date_range
13
+ from pandas import (DataFrame , MultiIndex , Series , Timestamp , compat ,
14
+ date_range , CategoricalDtype )
14
15
from pandas .tests .frame .common import TestData
15
16
import pandas .util .testing as tm
16
17
@@ -220,6 +221,12 @@ def test_to_records_with_categorical(self):
220
221
dtype = [("index" , "<i8" ), ("A" , "<U" ),
221
222
("B" , "<U" ), ("C" , "<U" )])),
222
223
224
+ # Pass in a dtype instance.
225
+ (dict (column_dtypes = np .dtype ('unicode' )),
226
+ np .rec .array ([("0" , "1" , "0.2" , "a" ), ("1" , "2" , "1.5" , "bc" )],
227
+ dtype = [("index" , "<i8" ), ("A" , "<U" ),
228
+ ("B" , "<U" ), ("C" , "<U" )])),
229
+
223
230
# Pass in a dictionary (name-only).
224
231
(dict (column_dtypes = {"A" : np .int8 , "B" : np .float32 , "C" : "<U2" }),
225
232
np .rec .array ([("0" , "1" , "0.2" , "a" ), ("1" , "2" , "1.5" , "bc" )],
@@ -249,6 +256,12 @@ def test_to_records_with_categorical(self):
249
256
dtype = [("index" , "<i8" ), ("A" , "i1" ),
250
257
("B" , "<f4" ), ("C" , "O" )])),
251
258
259
+ # Names / indices not in dtype mapping default to array dtype.
260
+ (dict (column_dtypes = {"A" : np .dtype ('int8' ), "B" : np .dtype ('float32' )}),
261
+ np .rec .array ([("0" , "1" , "0.2" , "a" ), ("1" , "2" , "1.5" , "bc" )],
262
+ dtype = [("index" , "<i8" ), ("A" , "i1" ),
263
+ ("B" , "<f4" ), ("C" , "O" )]))])
264
+
252
265
# Mixture of everything.
253
266
(dict (column_dtypes = {"A" : np .int8 , "B" : np .float32 },
254
267
index_dtypes = "<U2" ),
@@ -258,17 +271,26 @@ def test_to_records_with_categorical(self):
258
271
259
272
# Invalid dype values.
260
273
(dict (index = False , column_dtypes = list ()),
261
- "Invalid dtype \\ [\\ ] specified for column A" ),
274
+ ( ValueError , "Invalid dtype \\ [\\ ] specified for column A" ) ),
262
275
263
276
(dict (index = False , column_dtypes = {"A" : "int32" , "B" : 5 }),
264
- "Invalid dtype 5 specified for column B" ),
277
+ (ValueError , "Invalid dtype 5 specified for column B" )),
278
+
279
+ # Numpy can't handle EA types, so check error is raised
280
+ (dict (index = False , column_dtypes = {"A" : "int32" ,
281
+ "B" : CategoricalDtype (['a' , 'b' ])}),
282
+ (ValueError , 'Invalid dtype category specified for column B' )),
283
+
284
+ # Check that bad types raise
285
+ (dict (index = False , column_dtypes = {"A" : "int32" , "B" : "foo" }),
286
+ (TypeError , 'data type "foo" not understood' )),
265
287
])
266
288
def test_to_records_dtype (self , kwargs , expected ):
267
289
# see gh-18146
268
290
df = DataFrame ({"A" : [1 , 2 ], "B" : [0.2 , 1.5 ], "C" : ["a" , "bc" ]})
269
291
270
- if isinstance (expected , str ):
271
- with pytest .raises (ValueError , match = expected ):
292
+ if not isinstance (expected , np . recarray ):
293
+ with pytest .raises (expected [ 0 ] , match = expected [ 1 ] ):
272
294
df .to_records (** kwargs )
273
295
else :
274
296
result = df .to_records (** kwargs )
0 commit comments