1
1
from rest_framework import serializers
2
2
from pandas import DataFrame
3
+ from pandas .api .types import is_numeric_dtype
3
4
from django .core .exceptions import ImproperlyConfigured
4
5
import datetime
6
+ from collections import OrderedDict
5
7
6
8
7
9
class PandasSerializer (serializers .ListSerializer ):
@@ -222,12 +224,15 @@ def get_index(self, dataframe):
222
224
group_field = self .get_group_field ()
223
225
date_field = self .get_date_field ()
224
226
header_fields = self .get_header_fields ()
227
+ extra_index_fields = self .get_extra_index_fields ()
225
228
229
+ index = []
226
230
if date_field :
227
- group_fields = [date_field , group_field ]
228
- else :
229
- group_fields = [group_field ]
230
- return group_fields + header_fields
231
+ index .append (date_field )
232
+ index += extra_index_fields
233
+ index .append (group_field )
234
+ index += header_fields
235
+ return index
231
236
232
237
def transform_dataframe (self , dataframe ):
233
238
"""
@@ -255,35 +260,30 @@ def transform_dataframe(self, dataframe):
255
260
interval = None
256
261
257
262
# Compute stats for each column, potentially grouped by year
258
- all_stats = []
263
+ series_infos = OrderedDict ()
259
264
for header , series in groups .items ():
260
265
if interval :
261
266
series_stats = self .boxplots_for_interval (series , interval )
262
267
else :
263
- interval = None
264
268
series_stats = [self .compute_boxplot (series )]
265
269
266
- series_infos = []
267
270
for series_stat in series_stats :
268
- series_info = {}
269
271
if isinstance (header , tuple ):
270
272
value_name = header [0 ]
271
273
col_values = header [1 :]
272
274
else :
273
275
value_name = header
274
276
col_values = []
275
- col_names = zip (dataframe .columns .names [1 :], col_values )
276
- for col_name , value in col_names :
277
- series_info [col_name ] = value
277
+ col_names = tuple (zip (dataframe .columns .names [1 :], col_values ))
278
+ if interval in series_stat :
279
+ col_names += ((interval , series_stat [interval ]),)
280
+ series_infos .setdefault (col_names , dict (col_names ))
281
+ series_info = series_infos [col_names ]
278
282
for stat_name , val in series_stat .items ():
279
- if stat_name == interval :
280
- series_info [stat_name ] = val
281
- else :
283
+ if stat_name != interval :
282
284
series_info [value_name + '-' + stat_name ] = val
283
- series_infos .append (series_info )
284
- all_stats += series_infos
285
285
286
- dataframe = DataFrame (all_stats )
286
+ dataframe = DataFrame (list ( series_infos . values ()) )
287
287
if 'series' in grouping :
288
288
index = header_fields + [group_field ]
289
289
unstack = len (header_fields )
@@ -336,11 +336,19 @@ def compute_boxplot(self, series):
336
336
series = series [series .notnull ()]
337
337
if len (series .values ) == 0 :
338
338
return {}
339
+ elif not is_numeric_dtype (series ):
340
+ return self .non_numeric_stats (series )
339
341
stats = boxplot_stats (list (series .values ))[0 ]
340
342
stats ['count' ] = len (series .values )
341
343
stats ['fliers' ] = "|" .join (map (str , stats ['fliers' ]))
342
344
return stats
343
345
346
+ def non_numeric_stats (self , series ):
347
+ return {
348
+ 'count' : len (series ),
349
+ 'mode' : series .mode ()[0 ],
350
+ }
351
+
344
352
def get_group_field (self ):
345
353
"""
346
354
Categorical field to group datasets by.
@@ -359,6 +367,12 @@ def get_header_fields(self):
359
367
"""
360
368
return self .get_meta_option ('boxplot_header' , [])
361
369
370
+ def get_extra_index_fields (self ):
371
+ """
372
+ Fields that identify each row but don't need to be considered for plot
373
+ """
374
+ return self .get_meta_option ('boxplot_extra_index' , [])
375
+
362
376
363
377
class SimpleSerializer (serializers .Serializer ):
364
378
"""
0 commit comments