@@ -821,6 +821,41 @@ def test_dataframe_to_json_generator(module_under_test):
821
821
assert list (rows ) == expected
822
822
823
823
824
+ def test_dataframe_to_json_generator_repeated_field (module_under_test ):
825
+ pytest .importorskip (
826
+ "pandas" ,
827
+ minversion = str (PANDAS_MINIUM_VERSION ),
828
+ reason = (
829
+ f"Requires `pandas version >= { PANDAS_MINIUM_VERSION } ` "
830
+ "which introduces pandas.NA"
831
+ ),
832
+ )
833
+
834
+ df_data = [
835
+ collections .OrderedDict (
836
+ [("repeated_col" , [pandas .NA , 2 , None , 4 ]), ("not_repeated_col" , "first" )]
837
+ ),
838
+ collections .OrderedDict (
839
+ [
840
+ ("repeated_col" , ["a" , "b" , mock .sentinel .foo , "d" ]),
841
+ ("not_repeated_col" , "second" ),
842
+ ]
843
+ ),
844
+ ]
845
+ dataframe = pandas .DataFrame (df_data )
846
+
847
+ rows = module_under_test .dataframe_to_json_generator (dataframe )
848
+
849
+ expected = [
850
+ {"repeated_col" : [pandas .NA , 2 , None , 4 ], "not_repeated_col" : "first" },
851
+ {
852
+ "repeated_col" : ["a" , "b" , mock .sentinel .foo , "d" ],
853
+ "not_repeated_col" : "second" ,
854
+ },
855
+ ]
856
+ assert list (rows ) == expected
857
+
858
+
824
859
@pytest .mark .skipif (pandas is None , reason = "Requires `pandas`" )
825
860
def test_list_columns_and_indexes_with_named_index (module_under_test ):
826
861
df_data = collections .OrderedDict (
@@ -882,7 +917,7 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test):
882
917
def test_dataframe_to_bq_schema_dict_sequence (module_under_test ):
883
918
df_data = collections .OrderedDict (
884
919
[
885
- ("str_column" , [u "hello" , u "world" ]),
920
+ ("str_column" , ["hello" , "world" ]),
886
921
("int_column" , [42 , 8 ]),
887
922
("bool_column" , [True , False ]),
888
923
]
@@ -1070,7 +1105,7 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test):
1070
1105
]
1071
1106
1072
1107
dataframe = pandas .DataFrame (
1073
- {"field01" : [u "hello" , u "world" ], "field02" : [True , False ]}
1108
+ {"field01" : ["hello" , "world" ], "field02" : [True , False ]}
1074
1109
)
1075
1110
1076
1111
arrow_table = module_under_test .dataframe_to_arrow (dataframe , dict_schema )
@@ -1139,8 +1174,8 @@ def test_dataframe_to_parquet_compression_method(module_under_test):
1139
1174
def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow (module_under_test ):
1140
1175
dataframe = pandas .DataFrame (
1141
1176
data = [
1142
- {"id" : 10 , "status" : u "FOO" , "execution_date" : datetime .date (2019 , 5 , 10 )},
1143
- {"id" : 20 , "status" : u "BAR" , "created_at" : datetime .date (2018 , 9 , 12 )},
1177
+ {"id" : 10 , "status" : "FOO" , "execution_date" : datetime .date (2019 , 5 , 10 )},
1178
+ {"id" : 20 , "status" : "BAR" , "created_at" : datetime .date (2018 , 9 , 12 )},
1144
1179
]
1145
1180
)
1146
1181
@@ -1167,8 +1202,8 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test):
1167
1202
def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow (module_under_test ):
1168
1203
dataframe = pandas .DataFrame (
1169
1204
data = [
1170
- {"id" : 10 , "status" : u "FOO" , "created_at" : datetime .date (2019 , 5 , 10 )},
1171
- {"id" : 20 , "status" : u "BAR" , "created_at" : datetime .date (2018 , 9 , 12 )},
1205
+ {"id" : 10 , "status" : "FOO" , "created_at" : datetime .date (2019 , 5 , 10 )},
1206
+ {"id" : 20 , "status" : "BAR" , "created_at" : datetime .date (2018 , 9 , 12 )},
1172
1207
]
1173
1208
)
1174
1209
@@ -1197,8 +1232,8 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test):
1197
1232
def test_dataframe_to_bq_schema_pyarrow_fallback_fails (module_under_test ):
1198
1233
dataframe = pandas .DataFrame (
1199
1234
data = [
1200
- {"struct_field" : {"one" : 2 }, "status" : u "FOO" },
1201
- {"struct_field" : {"two" : u "222" }, "status" : u "BAR" },
1235
+ {"struct_field" : {"one" : 2 }, "status" : "FOO" },
1236
+ {"struct_field" : {"two" : "222" }, "status" : "BAR" },
1202
1237
]
1203
1238
)
1204
1239
@@ -1252,7 +1287,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test):
1252
1287
"timestamp_field" : datetime .datetime (2005 , 5 , 31 , 14 , 25 , 55 ),
1253
1288
"date_field" : datetime .date (2005 , 5 , 31 ),
1254
1289
"bytes_field" : b"some bytes" ,
1255
- "string_field" : u "some characters" ,
1290
+ "string_field" : "some characters" ,
1256
1291
"numeric_field" : decimal .Decimal ("123.456" ),
1257
1292
"bignumeric_field" : decimal .Decimal ("{d38}.{d38}" .format (d38 = "9" * 38 )),
1258
1293
}
@@ -1312,13 +1347,13 @@ def test_augment_schema_type_detection_fails(module_under_test):
1312
1347
dataframe = pandas .DataFrame (
1313
1348
data = [
1314
1349
{
1315
- "status" : u "FOO" ,
1350
+ "status" : "FOO" ,
1316
1351
"struct_field" : {"one" : 1 },
1317
- "struct_field_2" : {"foo" : u "123" },
1352
+ "struct_field_2" : {"foo" : "123" },
1318
1353
},
1319
1354
{
1320
- "status" : u "BAR" ,
1321
- "struct_field" : {"two" : u "111" },
1355
+ "status" : "BAR" ,
1356
+ "struct_field" : {"two" : "111" },
1322
1357
"struct_field_2" : {"bar" : 27 },
1323
1358
},
1324
1359
]
@@ -1351,7 +1386,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test):
1351
1386
]
1352
1387
1353
1388
dataframe = pandas .DataFrame (
1354
- {"field01" : [u "hello" , u "world" ], "field02" : [True , False ]}
1389
+ {"field01" : ["hello" , "world" ], "field02" : [True , False ]}
1355
1390
)
1356
1391
1357
1392
write_table_patch = mock .patch .object (
0 commit comments