@@ -1018,7 +1018,7 @@ def _get_reserved_col_names(args):
1018
1018
return reserved_names
1019
1019
1020
1020
1021
- def _is_col_list (df_input , arg ):
1021
+ def _is_col_list (columns , arg ):
1022
1022
"""Returns True if arg looks like it's a list of columns or references to columns
1023
1023
in df_input, and False otherwise (in which case it's assumed to be a single column
1024
1024
or reference to a column).
@@ -1033,7 +1033,7 @@ def _is_col_list(df_input, arg):
1033
1033
return False # not iterable
1034
1034
for c in arg :
1035
1035
if isinstance (c , str ) or isinstance (c , int ):
1036
- if df_input is None or c not in df_input . columns :
1036
+ if columns is None or c not in columns :
1037
1037
return False
1038
1038
else :
1039
1039
try :
@@ -1059,8 +1059,8 @@ def _isinstance_listlike(x):
1059
1059
return True
1060
1060
1061
1061
1062
- def _escape_col_name (df_input , col_name , extra ):
1063
- while df_input is not None and (col_name in df_input . columns or col_name in extra ):
1062
+ def _escape_col_name (columns , col_name , extra ):
1063
+ while columns is not None and (col_name in columns or col_name in extra ):
1064
1064
col_name = "_" + col_name
1065
1065
return col_name
1066
1066
@@ -1307,37 +1307,36 @@ def build_dataframe(args, constructor):
1307
1307
1308
1308
# Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.)
1309
1309
df_provided = args ["data_frame" ] is not None
1310
+ needs_interchanging = False
1310
1311
if df_provided and not isinstance (args ["data_frame" ], pd .DataFrame ):
1311
1312
if hasattr (args ["data_frame" ], "__dataframe__" ) and version .parse (
1312
1313
pd .__version__
1313
1314
) >= version .parse ("2.0.2" ):
1314
1315
import pandas .api .interchange
1315
1316
1316
1317
df_not_pandas = args ["data_frame" ]
1317
- try :
1318
- df_pandas = pandas .api .interchange .from_dataframe (df_not_pandas )
1319
- except (ImportError , NotImplementedError ) as exc :
1320
- # temporary workaround; developers of third-party libraries themselves
1321
- # should try a different implementation, if available. For example:
1322
- # def __dataframe__(self, ...):
1323
- # if not some_condition:
1324
- # self.to_pandas(...)
1325
- if not hasattr (df_not_pandas , "to_pandas" ):
1326
- raise exc
1327
- df_pandas = df_not_pandas .to_pandas ()
1328
- args ["data_frame" ] = df_pandas
1318
+ args ["data_frame" ] = df_not_pandas .__dataframe__ ()
1319
+ columns = args ["data_frame" ].column_names ()
1320
+ needs_interchanging = True
1329
1321
elif hasattr (args ["data_frame" ], "to_pandas" ):
1330
1322
args ["data_frame" ] = args ["data_frame" ].to_pandas ()
1323
+ columns = args ["data_frame" ].columns
1331
1324
else :
1332
1325
args ["data_frame" ] = pd .DataFrame (args ["data_frame" ])
1326
+ columns = args ["data_frame" ].columns
1327
+ elif df_provided :
1328
+ columns = args ["data_frame" ].columns
1329
+ else :
1330
+ columns = None
1331
+
1333
1332
df_input = args ["data_frame" ]
1334
1333
1335
1334
# now we handle special cases like wide-mode or x-xor-y specification
1336
1335
# by rearranging args to tee things up for process_args_into_dataframe to work
1337
1336
no_x = args .get ("x" ) is None
1338
1337
no_y = args .get ("y" ) is None
1339
- wide_x = False if no_x else _is_col_list (df_input , args ["x" ])
1340
- wide_y = False if no_y else _is_col_list (df_input , args ["y" ])
1338
+ wide_x = False if no_x else _is_col_list (columns , args ["x" ])
1339
+ wide_y = False if no_y else _is_col_list (columns , args ["y" ])
1341
1340
1342
1341
wide_mode = False
1343
1342
var_name = None # will likely be "variable" in wide_mode
@@ -1352,15 +1351,18 @@ def build_dataframe(args, constructor):
1352
1351
)
1353
1352
if df_provided and no_x and no_y :
1354
1353
wide_mode = True
1355
- if isinstance (df_input . columns , pd .MultiIndex ):
1354
+ if isinstance (columns , pd .MultiIndex ):
1356
1355
raise TypeError (
1357
1356
"Data frame columns is a pandas MultiIndex. "
1358
1357
"pandas MultiIndex is not supported by plotly express "
1359
1358
"at the moment."
1360
1359
)
1361
- args ["wide_variable" ] = list (df_input .columns )
1362
- var_name = df_input .columns .name
1363
- if var_name in [None , "value" , "index" ] or var_name in df_input :
1360
+ args ["wide_variable" ] = list (columns )
1361
+ if isinstance (columns , pd .Index ):
1362
+ var_name = columns .name
1363
+ else :
1364
+ var_name = None
1365
+ if var_name in [None , "value" , "index" ] or var_name in columns :
1364
1366
var_name = "variable"
1365
1367
if constructor == go .Funnel :
1366
1368
wide_orientation = args .get ("orientation" ) or "h"
@@ -1371,12 +1373,12 @@ def build_dataframe(args, constructor):
1371
1373
elif wide_x != wide_y :
1372
1374
wide_mode = True
1373
1375
args ["wide_variable" ] = args ["y" ] if wide_y else args ["x" ]
1374
- if df_provided and args ["wide_variable" ] is df_input . columns :
1375
- var_name = df_input . columns .name
1376
+ if df_provided and args ["wide_variable" ] is columns :
1377
+ var_name = columns .name
1376
1378
if isinstance (args ["wide_variable" ], pd .Index ):
1377
1379
args ["wide_variable" ] = list (args ["wide_variable" ])
1378
1380
if var_name in [None , "value" , "index" ] or (
1379
- df_provided and var_name in df_input
1381
+ df_provided and var_name in columns
1380
1382
):
1381
1383
var_name = "variable"
1382
1384
if hist1d_orientation :
@@ -1389,8 +1391,35 @@ def build_dataframe(args, constructor):
1389
1391
wide_cross_name = "__x__" if wide_y else "__y__"
1390
1392
1391
1393
if wide_mode :
1392
- value_name = _escape_col_name (df_input , "value" , [])
1393
- var_name = _escape_col_name (df_input , var_name , [])
1394
+ value_name = _escape_col_name (columns , "value" , [])
1395
+ var_name = _escape_col_name (columns , var_name , [])
1396
+
1397
+ if needs_interchanging :
1398
+ try :
1399
+ if wide_mode or not hasattr (args ["data_frame" ], "select_columns_by_name" ):
1400
+ args ["data_frame" ] = pd .api .interchange .from_dataframe (
1401
+ args ["data_frame" ]
1402
+ )
1403
+ else :
1404
+ # Save precious resources by only interchanging columns that are
1405
+ # actually going to be plotted.
1406
+ columns = [
1407
+ i for i in args .values () if isinstance (i , str ) and i in columns
1408
+ ]
1409
+ args ["data_frame" ] = pd .api .interchange .from_dataframe (
1410
+ args ["data_frame" ].select_columns_by_name (columns )
1411
+ )
1412
+ except (ImportError , NotImplementedError ) as exc :
1413
+ # temporary workaround; developers of third-party libraries themselves
1414
+ # should try a different implementation, if available. For example:
1415
+ # def __dataframe__(self, ...):
1416
+ # if not some_condition:
1417
+ # self.to_pandas(...)
1418
+ if not hasattr (df_not_pandas , "to_pandas" ):
1419
+ raise exc
1420
+ args ["data_frame" ] = df_not_pandas .to_pandas ()
1421
+
1422
+ df_input = args ["data_frame" ]
1394
1423
1395
1424
missing_bar_dim = None
1396
1425
if (
0 commit comments