From ec10b7b17096244a2d847421a2ab88cff7389304 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Mon, 30 Mar 2020 22:23:02 -0400 Subject: [PATCH 01/69] initial cut at wide-form support --- .../python/plotly/plotly/express/_core.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 613920d05fa..f79b2ec3603 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -869,7 +869,7 @@ def _get_reserved_col_names(args, attrables, array_attrables): return reserved_names -def build_dataframe(args, attrables, array_attrables): +def build_dataframe(args, attrables, array_attrables, constructor): """ Constructs a dataframe and modifies `args` in-place. @@ -898,6 +898,22 @@ def build_dataframe(args, attrables, array_attrables): df_provided = args["data_frame"] is not None if df_provided and not isinstance(args["data_frame"], pd.DataFrame): args["data_frame"] = pd.DataFrame(args["data_frame"]) + + if not args.get("x", None) and not args.get("y", None) and df_provided: + if constructor in [go.Scatter, go.Bar]: + args["data_frame"] = args["data_frame"].reset_index().melt(id_vars="index") + args["x"] = "index" + args["y"] = "value" + args["color"] = "variable" + if constructor in [go.Violin, go.Box]: + args["data_frame"] = args["data_frame"].reset_index().melt(id_vars="index") + args["x"] = "variable" + args["y"] = "value" + if constructor in [go.Histogram]: + args["data_frame"] = args["data_frame"].reset_index().melt(id_vars="index") + args["x"] = "value" + args["color"] = "variable" + df_input = args["data_frame"] # We start from an empty DataFrame @@ -1208,7 +1224,7 @@ def infer_config(args, constructor, trace_patch): if group_attr in args: all_attrables += [group_attr] - args = build_dataframe(args, all_attrables, array_attrables) + args = build_dataframe(args, all_attrables, array_attrables, constructor) if constructor in [go.Treemap, go.Sunburst] and args["path"] is not None: args = process_dataframe_hierarchy(args) From 2168ec8f499402b9fa29ad8c39140288edb07176 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 2 Apr 2020 09:11:08 -0400 Subject: [PATCH 02/69] wip --- .../python/plotly/plotly/express/_core.py | 30 ++++++++++++++----- .../tests/test_core/test_px/test_px_input.py | 12 ++++---- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index f79b2ec3603..e239cdc5eb1 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -899,20 +899,36 @@ def build_dataframe(args, attrables, array_attrables, constructor): if df_provided and not isinstance(args["data_frame"], pd.DataFrame): args["data_frame"] = pd.DataFrame(args["data_frame"]) - if not args.get("x", None) and not args.get("y", None) and df_provided: + wide_traces = [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram] + has_x = args.get("x", None) is not None + has_y = args.get("y", None) is not None + if not has_x and not has_y and df_provided and constructor in wide_traces: + index_name = args["data_frame"].index.name or "index" + id_vars = [index_name] + # TODO multi-level index + # TODO multi-level columns + # TODO orientation + + # TODO do we need to add everything to this candidate list basically? array_attrables? + # TODO will we need to be able to glue in non-string values here, like arrays and stuff? + # ...like maybe this needs to run after we've glued together the data frame? + for candidate in ["color", "symbol", "line_dash", "facet_row", "facet_col"] + [ + "line_group", + "animation_group", + ]: + if args.get(candidate, None) not in [None, index_name, "value", "variable"]: + id_vars.append(args[candidate]) + args["data_frame"] = args["data_frame"].reset_index().melt(id_vars=id_vars) if constructor in [go.Scatter, go.Bar]: - args["data_frame"] = args["data_frame"].reset_index().melt(id_vars="index") - args["x"] = "index" + args["x"] = index_name args["y"] = "value" - args["color"] = "variable" + args["color"] = args["color"] or "variable" if constructor in [go.Violin, go.Box]: - args["data_frame"] = args["data_frame"].reset_index().melt(id_vars="index") args["x"] = "variable" args["y"] = "value" if constructor in [go.Histogram]: - args["data_frame"] = args["data_frame"].reset_index().melt(id_vars="index") args["x"] = "value" - args["color"] = "variable" + args["color"] = args["color"] or "variable" df_input = args["data_frame"] diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index e3786f6af90..8d630997e48 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -225,7 +225,7 @@ def test_build_df_from_lists(): output = {key: key for key in args} df = pd.DataFrame(args) args["data_frame"] = None - out = build_dataframe(args, all_attrables, array_attrables) + out = build_dataframe(args, all_attrables, array_attrables, None) assert_frame_equal(df.sort_index(axis=1), out["data_frame"].sort_index(axis=1)) out.pop("data_frame") assert out == output @@ -235,7 +235,7 @@ def test_build_df_from_lists(): output = {key: key for key in args} df = pd.DataFrame(args) args["data_frame"] = None - out = build_dataframe(args, all_attrables, array_attrables) + out = build_dataframe(args, all_attrables, array_attrables, None) assert_frame_equal(df.sort_index(axis=1), out["data_frame"].sort_index(axis=1)) out.pop("data_frame") assert out == output @@ -244,7 +244,7 @@ def test_build_df_from_lists(): def test_build_df_with_index(): tips = px.data.tips() args = dict(data_frame=tips, x=tips.index, y="total_bill") - out = build_dataframe(args, all_attrables, array_attrables) + out = build_dataframe(args, all_attrables, array_attrables, None) assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"]) @@ -254,15 +254,15 @@ def test_non_matching_index(): expected = pd.DataFrame(dict(x=["a", "b", "c"], y=[1, 2, 3])) args = dict(data_frame=df, x=df.index, y="y") - out = build_dataframe(args, all_attrables, array_attrables) + out = build_dataframe(args, all_attrables, array_attrables, None) assert_frame_equal(expected, out["data_frame"]) args = dict(data_frame=None, x=df.index, y=df.y) - out = build_dataframe(args, all_attrables, array_attrables) + out = build_dataframe(args, all_attrables, array_attrables, None) assert_frame_equal(expected, out["data_frame"]) args = dict(data_frame=None, x=["a", "b", "c"], y=df.y) - out = build_dataframe(args, all_attrables, array_attrables) + out = build_dataframe(args, all_attrables, array_attrables, None) assert_frame_equal(expected, out["data_frame"]) From 79be641c2b57bf3fc77ade4b3d9db180b1f3329c Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 2 Apr 2020 17:07:06 -0400 Subject: [PATCH 03/69] wip --- .../python/plotly/plotly/express/_core.py | 71 ++++++++++--------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index e239cdc5eb1..8aae2f14f28 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -887,6 +887,8 @@ def build_dataframe(args, attrables, array_attrables, constructor): array_attrables : list argument names corresponding to iterables, such as `hover_data`, ... """ + + # make copies of all the fields via dict() and list() for field in args: if field in array_attrables and args[field] is not None: args[field] = ( @@ -894,46 +896,26 @@ def build_dataframe(args, attrables, array_attrables, constructor): if isinstance(args[field], dict) else list(args[field]) ) + # Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.) df_provided = args["data_frame"] is not None if df_provided and not isinstance(args["data_frame"], pd.DataFrame): args["data_frame"] = pd.DataFrame(args["data_frame"]) - wide_traces = [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram] - has_x = args.get("x", None) is not None - has_y = args.get("y", None) is not None - if not has_x and not has_y and df_provided and constructor in wide_traces: - index_name = args["data_frame"].index.name or "index" - id_vars = [index_name] - # TODO multi-level index - # TODO multi-level columns - # TODO orientation - - # TODO do we need to add everything to this candidate list basically? array_attrables? - # TODO will we need to be able to glue in non-string values here, like arrays and stuff? - # ...like maybe this needs to run after we've glued together the data frame? - for candidate in ["color", "symbol", "line_dash", "facet_row", "facet_col"] + [ - "line_group", - "animation_group", - ]: - if args.get(candidate, None) not in [None, index_name, "value", "variable"]: - id_vars.append(args[candidate]) - args["data_frame"] = args["data_frame"].reset_index().melt(id_vars=id_vars) - if constructor in [go.Scatter, go.Bar]: - args["x"] = index_name - args["y"] = "value" - args["color"] = args["color"] or "variable" - if constructor in [go.Violin, go.Box]: - args["x"] = "variable" - args["y"] = "value" - if constructor in [go.Histogram]: - args["x"] = "value" - args["color"] = args["color"] or "variable" - df_input = args["data_frame"] - # We start from an empty DataFrame - df_output = pd.DataFrame() + wide_mode = ( + df_provided + and args.get("x", None) is None + and args.get("y", None) is None + and constructor in [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram] + ) + wide_id_vars = set() + + if wide_mode: + df_output = df_input + else: + df_output = pd.DataFrame() # Initialize set of column names # These are reserved names @@ -1063,6 +1045,29 @@ def build_dataframe(args, attrables, array_attrables, constructor): args[field_name] = str(col_name) else: args[field_name][i] = str(col_name) + wide_id_vars.add(str(col_name)) + + if wide_mode: + # TODO multi-level index + # TODO multi-level columns + index_name = df_output.index.name or "index" + wide_id_vars.add(index_name) + if index_name not in df_output.columns: + df_output = df_output.reset_index() + df_output = df_output.melt(id_vars=wide_id_vars) + orient_v = "v" == (args.get("orientation", None) or "v") + if "orientation" in args: + args["orientation"] = "v" if orient_v else "h" + if constructor in [go.Scatter, go.Bar]: + args["x" if orient_v else "y"] = index_name + args["y" if orient_v else "x"] = "value" + args["color"] = args["color"] or "variable" + if constructor in [go.Violin, go.Box]: + args["x" if orient_v else "y"] = "variable" + args["y" if orient_v else "x"] = "value" + if constructor in [go.Histogram]: + args["x" if orient_v else "y"] = "value" + args["color"] = args["color"] or "variable" args["data_frame"] = df_output return args From b99c2fa351c10e94a0b18f7a152c1a17b6b02c54 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Mon, 30 Mar 2020 21:36:55 -0400 Subject: [PATCH 04/69] initial pass at PX auto-orientation --- .../plotly/plotly/express/_chart_types.py | 40 +++++------------ .../python/plotly/plotly/express/_core.py | 43 ++++++++++++++++--- 2 files changed, 48 insertions(+), 35 deletions(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index f7e7da8cbfd..789f02af829 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -236,7 +236,7 @@ def area( labels={}, color_discrete_sequence=None, color_discrete_map={}, - orientation="v", + orientation=None, groupnorm=None, log_x=False, log_y=False, @@ -256,9 +256,7 @@ def area( return make_figure( args=locals(), constructor=go.Scatter, - trace_patch=dict( - stackgroup=1, mode="lines", orientation=orientation, groupnorm=groupnorm - ), + trace_patch=dict(stackgroup=1, mode="lines", groupnorm=groupnorm), ) @@ -291,7 +289,7 @@ def bar( range_color=None, color_continuous_midpoint=None, opacity=None, - orientation="v", + orientation=None, barmode="relative", log_x=False, log_y=False, @@ -335,7 +333,7 @@ def histogram( color_discrete_map={}, marginal=None, opacity=None, - orientation="v", + orientation=None, barmode="relative", barnorm=None, histnorm=None, @@ -361,13 +359,7 @@ def histogram( args=locals(), constructor=go.Histogram, trace_patch=dict( - orientation=orientation, - histnorm=histnorm, - histfunc=histfunc, - nbinsx=nbins if orientation == "v" else None, - nbinsy=None if orientation == "v" else nbins, - cumulative=dict(enabled=cumulative), - bingroup="x" if orientation == "v" else "y", + histnorm=histnorm, histfunc=histfunc, cumulative=dict(enabled=cumulative), ), layout_patch=dict(barmode=barmode, barnorm=barnorm), ) @@ -393,7 +385,7 @@ def violin( labels={}, color_discrete_sequence=None, color_discrete_map={}, - orientation="v", + orientation=None, violinmode="group", log_x=False, log_y=False, @@ -414,12 +406,7 @@ def violin( args=locals(), constructor=go.Violin, trace_patch=dict( - orientation=orientation, - points=points, - box=dict(visible=box), - scalegroup=True, - x0=" ", - y0=" ", + points=points, box=dict(visible=box), scalegroup=True, x0=" ", y0=" ", ), layout_patch=dict(violinmode=violinmode), ) @@ -445,7 +432,7 @@ def box( labels={}, color_discrete_sequence=None, color_discrete_map={}, - orientation="v", + orientation=None, boxmode="group", log_x=False, log_y=False, @@ -470,9 +457,7 @@ def box( return make_figure( args=locals(), constructor=go.Box, - trace_patch=dict( - orientation=orientation, boxpoints=points, notched=notched, x0=" ", y0=" " - ), + trace_patch=dict(boxpoints=points, notched=notched, x0=" ", y0=" "), layout_patch=dict(boxmode=boxmode), ) @@ -497,7 +482,7 @@ def strip( labels={}, color_discrete_sequence=None, color_discrete_map={}, - orientation="v", + orientation=None, stripmode="group", log_x=False, log_y=False, @@ -516,7 +501,6 @@ def strip( args=locals(), constructor=go.Box, trace_patch=dict( - orientation=orientation, boxpoints="all", pointpos=0, hoveron="points", @@ -1398,9 +1382,7 @@ def funnel( In a funnel plot, each row of `data_frame` is represented as a rectangular sector of a funnel. """ - return make_figure( - args=locals(), constructor=go.Funnel, trace_patch=dict(orientation=orientation), - ) + return make_figure(args=locals(), constructor=go.Funnel,) funnel.__doc__ = make_docstring(funnel) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 8aae2f14f28..55decefa435 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -92,6 +92,10 @@ def get_label(args, column): return column +def _is_continuous(df, col_name): + return df[col_name].dtype.kind in "ifc" + + def get_decorated_label(args, column, role): label = get_label(args, column) if "histfunc" in args and ( @@ -188,7 +192,7 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): if ((not attr_value) or (name in attr_value)) and ( trace_spec.constructor != go.Parcoords - or args["data_frame"][name].dtype.kind in "ifc" + or _is_continuous(args["data_frame"], name) ) and ( trace_spec.constructor != go.Parcats @@ -1161,7 +1165,7 @@ def aggfunc_discrete(x): agg_f[count_colname] = "sum" if args["color"]: - if df[args["color"]].dtype.kind not in "ifc": + if not _is_continuous(df, args["color"]): aggfunc_color = aggfunc_discrete discrete_color = True elif not aggfunc_color: @@ -1249,6 +1253,36 @@ def infer_config(args, constructor, trace_patch): if constructor in [go.Treemap, go.Sunburst] and args["path"] is not None: args = process_dataframe_hierarchy(args) + if "orientation" in args: + has_x = args["x"] is not None + has_y = args["y"] is not None + if args["orientation"] is None: + if constructor in [go.Histogram, go.Scatter]: + if has_y and not has_x: + args["orientation"] = "h" + elif constructor in [go.Violin, go.Box, go.Bar, go.Funnel]: + if has_x and not has_y: + args["orientation"] = "h" + + if args["orientation"] is None and has_x and has_y: + x_is_continuous = _is_continuous(args["data_frame"], args["x"]) + y_is_continuous = _is_continuous(args["data_frame"], args["y"]) + if x_is_continuous and not y_is_continuous: + args["orientation"] = "h" + if y_is_continuous and not x_is_continuous: + args["orientation"] = "v" + + if args["orientation"] is None: + args["orientation"] = "v" + + if constructor == go.Histogram: + orientation = args["orientation"] + nbins = args["nbins"] + trace_patch["nbinsx"] = nbins if orientation == "v" else None + trace_patch["nbinsy"] = None if orientation == "v" else nbins + trace_patch["bingroup"] = "x" if orientation == "v" else "y" + trace_patch["orientation"] = args["orientation"] + attrs = [k for k in attrables if k in args] grouped_attrs = [] @@ -1263,10 +1297,7 @@ def infer_config(args, constructor, trace_patch): if "color_discrete_sequence" not in args: attrs.append("color") else: - if ( - args["color"] - and args["data_frame"][args["color"]].dtype.kind in "ifc" - ): + if args["color"] and _is_continuous(args["data_frame"], args["color"]): attrs.append("color") args["color_is_continuous"] = True elif constructor in [go.Sunburst, go.Treemap]: From df0b4d1d0b1741311edd8b18c52984344d3b76e3 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Tue, 31 Mar 2020 17:25:48 -0400 Subject: [PATCH 05/69] cleanup --- packages/python/plotly/plotly/express/_chart_types.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index 789f02af829..223377b2ac8 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -307,7 +307,7 @@ def bar( return make_figure( args=locals(), constructor=go.Bar, - trace_patch=dict(orientation=orientation, textposition="auto"), + trace_patch=dict(textposition="auto"), layout_patch=dict(barmode=barmode), ) @@ -1368,7 +1368,7 @@ def funnel( color_discrete_sequence=None, color_discrete_map={}, opacity=None, - orientation="h", + orientation=None, log_x=False, log_y=False, range_x=None, @@ -1382,7 +1382,7 @@ def funnel( In a funnel plot, each row of `data_frame` is represented as a rectangular sector of a funnel. """ - return make_figure(args=locals(), constructor=go.Funnel,) + return make_figure(args=locals(), constructor=go.Funnel) funnel.__doc__ = make_docstring(funnel) From 4a2ddd11ce8dd4314ab82802b3a4a8176dc014b2 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Tue, 31 Mar 2020 17:38:45 -0400 Subject: [PATCH 06/69] fix for odd box/violin spacing when axis matches color --- .../python/plotly/plotly/express/_chart_types.py | 6 +++--- packages/python/plotly/plotly/express/_core.py | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index 223377b2ac8..bff581e6ff8 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -386,7 +386,7 @@ def violin( color_discrete_sequence=None, color_discrete_map={}, orientation=None, - violinmode="group", + violinmode=None, log_x=False, log_y=False, range_x=None, @@ -433,7 +433,7 @@ def box( color_discrete_sequence=None, color_discrete_map={}, orientation=None, - boxmode="group", + boxmode=None, log_x=False, log_y=False, range_x=None, @@ -483,7 +483,7 @@ def strip( color_discrete_sequence=None, color_discrete_map={}, orientation=None, - stripmode="group", + stripmode=None, log_x=False, log_y=False, range_x=None, diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 55decefa435..f8ac12d48a0 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1231,7 +1231,7 @@ def aggfunc_continuous(x): return args -def infer_config(args, constructor, trace_patch): +def infer_config(args, constructor, trace_patch, layout_patch): # Declare all supported attributes, across all plot types attrables = ( ["x", "y", "z", "a", "b", "c", "r", "theta", "size", "dimensions"] @@ -1283,6 +1283,16 @@ def infer_config(args, constructor, trace_patch): trace_patch["bingroup"] = "x" if orientation == "v" else "y" trace_patch["orientation"] = args["orientation"] + if constructor in [go.Violin, go.Box]: + mode = "boxmode" if constructor == go.Box else "violinmode" + if layout_patch[mode] is None and args["color"] is not None: + if args["y"] == args["color"] and args["orientation"] == "h": + layout_patch[mode] = "overlay" + elif args["x"] == args["color"] and args["orientation"] == "v": + layout_patch[mode] = "overlay" + if layout_patch[mode] is None: + layout_patch[mode] = "group" + attrs = [k for k in attrables if k in args] grouped_attrs = [] @@ -1433,7 +1443,7 @@ def make_figure(args, constructor, trace_patch={}, layout_patch={}): apply_default_cascade(args) args, trace_specs, grouped_mappings, sizeref, show_colorbar = infer_config( - args, constructor, trace_patch + args, constructor, trace_patch, layout_patch ) grouper = [x.grouper or one_group for x in grouped_mappings] or [one_group] grouped = args["data_frame"].groupby(grouper, sort=False) From a2ca3c5f68b9532decdba5f8fddda42513ac4564 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Tue, 31 Mar 2020 21:14:58 -0400 Subject: [PATCH 07/69] clean up default {} --- .../python/plotly/plotly/express/_core.py | 89 +++++++++---------- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index f8ac12d48a0..2d24de3bf31 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1253,46 +1253,6 @@ def infer_config(args, constructor, trace_patch, layout_patch): if constructor in [go.Treemap, go.Sunburst] and args["path"] is not None: args = process_dataframe_hierarchy(args) - if "orientation" in args: - has_x = args["x"] is not None - has_y = args["y"] is not None - if args["orientation"] is None: - if constructor in [go.Histogram, go.Scatter]: - if has_y and not has_x: - args["orientation"] = "h" - elif constructor in [go.Violin, go.Box, go.Bar, go.Funnel]: - if has_x and not has_y: - args["orientation"] = "h" - - if args["orientation"] is None and has_x and has_y: - x_is_continuous = _is_continuous(args["data_frame"], args["x"]) - y_is_continuous = _is_continuous(args["data_frame"], args["y"]) - if x_is_continuous and not y_is_continuous: - args["orientation"] = "h" - if y_is_continuous and not x_is_continuous: - args["orientation"] = "v" - - if args["orientation"] is None: - args["orientation"] = "v" - - if constructor == go.Histogram: - orientation = args["orientation"] - nbins = args["nbins"] - trace_patch["nbinsx"] = nbins if orientation == "v" else None - trace_patch["nbinsy"] = None if orientation == "v" else nbins - trace_patch["bingroup"] = "x" if orientation == "v" else "y" - trace_patch["orientation"] = args["orientation"] - - if constructor in [go.Violin, go.Box]: - mode = "boxmode" if constructor == go.Box else "violinmode" - if layout_patch[mode] is None and args["color"] is not None: - if args["y"] == args["color"] and args["orientation"] == "h": - layout_patch[mode] = "overlay" - elif args["x"] == args["color"] and args["orientation"] == "v": - layout_patch[mode] = "overlay" - if layout_patch[mode] is None: - layout_patch[mode] = "group" - attrs = [k for k in attrables if k in args] grouped_attrs = [] @@ -1346,8 +1306,45 @@ def infer_config(args, constructor, trace_patch, layout_patch): if "symbol" in args: grouped_attrs.append("marker.symbol") - # Compute final trace patch - trace_patch = trace_patch.copy() + if "orientation" in args: + has_x = args["x"] is not None + has_y = args["y"] is not None + if args["orientation"] is None: + if constructor in [go.Histogram, go.Scatter]: + if has_y and not has_x: + args["orientation"] = "h" + elif constructor in [go.Violin, go.Box, go.Bar, go.Funnel]: + if has_x and not has_y: + args["orientation"] = "h" + + if args["orientation"] is None and has_x and has_y: + x_is_continuous = _is_continuous(args["data_frame"], args["x"]) + y_is_continuous = _is_continuous(args["data_frame"], args["y"]) + if x_is_continuous and not y_is_continuous: + args["orientation"] = "h" + if y_is_continuous and not x_is_continuous: + args["orientation"] = "v" + + if args["orientation"] is None: + args["orientation"] = "v" + + if constructor == go.Histogram: + orientation = args["orientation"] + nbins = args["nbins"] + trace_patch["nbinsx"] = nbins if orientation == "v" else None + trace_patch["nbinsy"] = None if orientation == "v" else nbins + trace_patch["bingroup"] = "x" if orientation == "v" else "y" + trace_patch["orientation"] = args["orientation"] + + if constructor in [go.Violin, go.Box]: + mode = "boxmode" if constructor == go.Box else "violinmode" + if layout_patch[mode] is None and args["color"] is not None: + if args["y"] == args["color"] and args["orientation"] == "h": + layout_patch[mode] = "overlay" + elif args["x"] == args["color"] and args["orientation"] == "v": + layout_patch[mode] = "overlay" + if layout_patch[mode] is None: + layout_patch[mode] = "group" if constructor in [go.Histogram2d, go.Densitymapbox]: show_colorbar = True @@ -1395,7 +1392,7 @@ def infer_config(args, constructor, trace_patch, layout_patch): # Create trace specs trace_specs = make_trace_spec(args, constructor, attrs, trace_patch) - return args, trace_specs, grouped_mappings, sizeref, show_colorbar + return trace_specs, grouped_mappings, sizeref, show_colorbar def get_orderings(args, grouper, grouped): @@ -1439,10 +1436,12 @@ def get_orderings(args, grouper, grouped): return orders, group_names, group_values -def make_figure(args, constructor, trace_patch={}, layout_patch={}): +def make_figure(args, constructor, trace_patch=None, layout_patch=None): + trace_patch = trace_patch or {} + layout_patch = layout_patch or {} apply_default_cascade(args) - args, trace_specs, grouped_mappings, sizeref, show_colorbar = infer_config( + trace_specs, grouped_mappings, sizeref, show_colorbar = infer_config( args, constructor, trace_patch, layout_patch ) grouper = [x.grouper or one_group for x in grouped_mappings] or [one_group] From 442f3a1e9a16d129fd97c80856ba74c389c47fae Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Tue, 31 Mar 2020 22:21:11 -0400 Subject: [PATCH 08/69] smarter histfunc defaults --- packages/python/plotly/plotly/express/_core.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 2d24de3bf31..622fe7a51ff 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1329,6 +1329,9 @@ def infer_config(args, constructor, trace_patch, layout_patch): args["orientation"] = "v" if constructor == go.Histogram: + if has_x and has_y and args["histfunc"] is None: + args["histfunc"] = trace_patch["histfunc"] = "sum" + orientation = args["orientation"] nbins = args["nbins"] trace_patch["nbinsx"] = nbins if orientation == "v" else None @@ -1346,6 +1349,13 @@ def infer_config(args, constructor, trace_patch, layout_patch): if layout_patch[mode] is None: layout_patch[mode] = "group" + if ( + constructor == go.Histogram2d + and args["z"] is not None + and args["histfunc"] is None + ): + args["histfunc"] = trace_patch["histfunc"] = "sum" + if constructor in [go.Histogram2d, go.Densitymapbox]: show_colorbar = True trace_patch["coloraxis"] = "coloraxis1" From 453e9e0d72296f9cac401b343f11389b3eb0b210 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 29 Mar 2020 14:51:12 -0400 Subject: [PATCH 09/69] px.IdentityMap --- .../python/plotly/plotly/express/__init__.py | 2 ++ .../python/plotly/plotly/express/_core.py | 25 ++++++++++++++++--- .../tests/test_core/test_px/test_px_input.py | 15 +++++++++++ 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/packages/python/plotly/plotly/express/__init__.py b/packages/python/plotly/plotly/express/__init__.py index fb334c1b973..22a6914c847 100644 --- a/packages/python/plotly/plotly/express/__init__.py +++ b/packages/python/plotly/plotly/express/__init__.py @@ -53,6 +53,7 @@ set_mapbox_access_token, defaults, get_trendline_results, + IdentityMap, ) from . import data, colors # noqa: F401 @@ -95,4 +96,5 @@ "colors", "set_mapbox_access_token", "get_trendline_results", + "IdentityMap", ] diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 622fe7a51ff..f04576a31d4 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -41,6 +41,24 @@ def __init__(self): defaults = PxDefaults() del PxDefaults + +class IdentityMap(object): + """ + `dict`-like object which can be passed in to arguments like `color_discrete_map` to + use the provided data values as colors, rather than mapping them to colors cycled + from `color_discrete_sequence`. + """ + + def __getitem__(self, key): + return key + + def __contains__(self, key): + return True + + def copy(self): + return self + + MAPBOX_TOKEN = None @@ -1491,9 +1509,10 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): for col, val, m in zip(grouper, group_name, grouped_mappings): if col != one_group: key = get_label(args, col) - mapping_labels[key] = str(val) - if m.show_in_trace_name: - trace_name_labels[key] = str(val) + if not isinstance(m.val_map, IdentityMap): + mapping_labels[key] = str(val) + if m.show_in_trace_name: + trace_name_labels[key] = str(val) if m.variable == "animation_frame": frame_name = val trace_name = ", ".join(trace_name_labels.values()) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index 8d630997e48..9c3e4d4b822 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -323,3 +323,18 @@ def test_size_column(): df = px.data.tips() fig = px.scatter(df, x=df["size"], y=df.tip) assert fig.data[0].hovertemplate == "size=%{x}
tip=%{y}" + + +def test_identity_map(): + fig = px.scatter( + x=[1, 2], + y=[1, 2], + symbol=["a", "b"], + color=["red", "blue"], + color_discrete_map=px.IdentityMap(), + ) + assert fig.data[0].marker.color == "red" + assert fig.data[1].marker.color == "blue" + assert "color" not in fig.data[0].hovertemplate + assert "symbol" in fig.data[0].hovertemplate + assert fig.layout.legend.title.text == "symbol" From c08f0bfa27b702f17a62035f1e51c6de9bc6a10c Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 29 Mar 2020 15:15:11 -0400 Subject: [PATCH 10/69] px.Constant --- .../python/plotly/plotly/express/__init__.py | 2 ++ .../python/plotly/plotly/express/_core.py | 20 ++++++++++- .../tests/test_core/test_px/test_px_input.py | 34 +++++++++++++++++-- 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/packages/python/plotly/plotly/express/__init__.py b/packages/python/plotly/plotly/express/__init__.py index 22a6914c847..d27ebcc5069 100644 --- a/packages/python/plotly/plotly/express/__init__.py +++ b/packages/python/plotly/plotly/express/__init__.py @@ -54,6 +54,7 @@ defaults, get_trendline_results, IdentityMap, + Constant, ) from . import data, colors # noqa: F401 @@ -97,4 +98,5 @@ "set_mapbox_access_token", "get_trendline_results", "IdentityMap", + "Constant", ] diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index f04576a31d4..507e57fd8b4 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -59,6 +59,12 @@ def copy(self): return self +class Constant(object): + def __init__(self, value, label=None): + self.value = value + self.label = label + + MAPBOX_TOKEN = None @@ -955,6 +961,8 @@ def build_dataframe(args, attrables, array_attrables, constructor): else: df_output[df_input.columns] = df_input[df_input.columns] + constants = dict() + # Loop over possible arguments for field_name in attrables: # Massaging variables @@ -986,8 +994,15 @@ def build_dataframe(args, attrables, array_attrables, constructor): "pandas MultiIndex is not supported by plotly express " "at the moment." % field ) + # ----------------- argument is a constant ---------------------- + if isinstance(argument, Constant): + col_name = _check_name_not_reserved( + str(argument.label) if argument.label is not None else field, + reserved_names, + ) + constants[col_name] = argument.value # ----------------- argument is a col name ---------------------- - if isinstance(argument, str) or isinstance( + elif isinstance(argument, str) or isinstance( argument, int ): # just a column name given as str or int if not df_provided: @@ -1091,6 +1106,9 @@ def build_dataframe(args, attrables, array_attrables, constructor): args["x" if orient_v else "y"] = "value" args["color"] = args["color"] or "variable" + for col_name in constants: + df_output[col_name] = constants[col_name] + args["data_frame"] = df_output return args diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index 9c3e4d4b822..d4a1b39992e 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -335,6 +335,36 @@ def test_identity_map(): ) assert fig.data[0].marker.color == "red" assert fig.data[1].marker.color == "blue" - assert "color" not in fig.data[0].hovertemplate - assert "symbol" in fig.data[0].hovertemplate + assert "color=" not in fig.data[0].hovertemplate + assert "symbol=" in fig.data[0].hovertemplate + assert fig.layout.legend.title.text == "symbol" + + +def test_constants(): + fig = px.scatter(x=px.Constant(1), y=[1, 2]) + assert fig.data[0].x[0] == 1 + assert fig.data[0].x[1] == 1 + assert "x=" in fig.data[0].hovertemplate + + fig = px.scatter(x=px.Constant(1, label="time"), y=[1, 2]) + assert fig.data[0].x[0] == 1 + assert fig.data[0].x[1] == 1 + assert "x=" not in fig.data[0].hovertemplate + assert "time=" in fig.data[0].hovertemplate + + fig = px.scatter( + x=[1, 2], + y=[1, 2], + symbol=["a", "b"], + color=px.Constant("red", label="the_identity_label"), + hover_data=[px.Constant("data", label="the_data")], + color_discrete_map=px.IdentityMap(), + ) + assert fig.data[0].marker.color == "red" + assert fig.data[0].customdata[0][0] == "data" + assert fig.data[1].marker.color == "red" + assert "color=" not in fig.data[0].hovertemplate + assert "the_identity_label=" not in fig.data[0].hovertemplate + assert "symbol=" in fig.data[0].hovertemplate + assert "the_data=" in fig.data[0].hovertemplate assert fig.layout.legend.title.text == "symbol" From 77139b9fab45fcb1f0adc233d3d8a456b06bdfe4 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Mon, 30 Mar 2020 09:14:22 -0400 Subject: [PATCH 11/69] PR feedback --- .../python/plotly/plotly/express/__init__.py | 3 ++ .../python/plotly/plotly/express/_core.py | 30 ++++--------------- .../plotly/plotly/express/_special_inputs.py | 29 ++++++++++++++++++ .../tests/test_core/test_px/test_px_input.py | 13 ++++++++ 4 files changed, 51 insertions(+), 24 deletions(-) create mode 100644 packages/python/plotly/plotly/express/_special_inputs.py diff --git a/packages/python/plotly/plotly/express/__init__.py b/packages/python/plotly/plotly/express/__init__.py index d27ebcc5069..9df5b21ac8a 100644 --- a/packages/python/plotly/plotly/express/__init__.py +++ b/packages/python/plotly/plotly/express/__init__.py @@ -53,6 +53,9 @@ set_mapbox_access_token, defaults, get_trendline_results, +) + +from ._special_inputs import ( # noqa: F401 IdentityMap, Constant, ) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 507e57fd8b4..8e07f826a9d 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1,6 +1,7 @@ import plotly.graph_objs as go import plotly.io as pio from collections import namedtuple, OrderedDict +from ._special_inputs import IdentityMap, Constant from _plotly_utils.basevalidators import ColorscaleValidator from .colors import qualitative, sequential @@ -42,29 +43,6 @@ def __init__(self): del PxDefaults -class IdentityMap(object): - """ - `dict`-like object which can be passed in to arguments like `color_discrete_map` to - use the provided data values as colors, rather than mapping them to colors cycled - from `color_discrete_sequence`. - """ - - def __getitem__(self, key): - return key - - def __contains__(self, key): - return True - - def copy(self): - return self - - -class Constant(object): - def __init__(self, value, label=None): - self.value = value - self.label = label - - MAPBOX_TOKEN = None @@ -165,11 +143,15 @@ def make_mapping(args, variable): if variable == "dash": arg_name = "line_dash" vprefix = "line_dash" + if args[vprefix + "_map"] == "identity": + val_map = IdentityMap() + else: + val_map = args[vprefix + "_map"].copy() return Mapping( show_in_trace_name=True, variable=variable, grouper=args[arg_name], - val_map=args[vprefix + "_map"].copy(), + val_map=val_map, sequence=args[vprefix + "_sequence"], updater=lambda trace, v: trace.update({parent: {variable: v}}), facet=None, diff --git a/packages/python/plotly/plotly/express/_special_inputs.py b/packages/python/plotly/plotly/express/_special_inputs.py new file mode 100644 index 00000000000..8118dafe0a0 --- /dev/null +++ b/packages/python/plotly/plotly/express/_special_inputs.py @@ -0,0 +1,29 @@ + +class IdentityMap(object): + """ + `dict`-like object which acts as if the value for any key is the key itself. Objects + of this class can be passed in to arguments like `color_discrete_map` to + use the provided data values as colors, rather than mapping them to colors cycled + from `color_discrete_sequence`. This works for any `_map` argument to Plotly Express + functions, such as `line_dash_map` and `symbol_map`. + """ + + def __getitem__(self, key): + return key + + def __contains__(self, key): + return True + + def copy(self): + return self + + +class Constant(object): + """ + Objects of this class can be passed to Plotly Express functions that expect column + identifiers or list-like objects to indicate that this attribute should take on a + constant value. An optional label can be provided. + """ + def __init__(self, value, label=None): + self.value = value + self.label = label diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index d4a1b39992e..6c8147443ed 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -339,6 +339,19 @@ def test_identity_map(): assert "symbol=" in fig.data[0].hovertemplate assert fig.layout.legend.title.text == "symbol" + fig = px.scatter( + x=[1, 2], + y=[1, 2], + symbol=["a", "b"], + color=["red", "blue"], + color_discrete_map="identity", + ) + assert fig.data[0].marker.color == "red" + assert fig.data[1].marker.color == "blue" + assert "color=" not in fig.data[0].hovertemplate + assert "symbol=" in fig.data[0].hovertemplate + assert fig.layout.legend.title.text == "symbol" + def test_constants(): fig = px.scatter(x=px.Constant(1), y=[1, 2]) From ba36fa97d4bc9f54183df679f9fdf1e7d0665943 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Mon, 30 Mar 2020 09:34:26 -0400 Subject: [PATCH 12/69] black --- packages/python/plotly/plotly/express/_special_inputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/python/plotly/plotly/express/_special_inputs.py b/packages/python/plotly/plotly/express/_special_inputs.py index 8118dafe0a0..3dfff0f3c8e 100644 --- a/packages/python/plotly/plotly/express/_special_inputs.py +++ b/packages/python/plotly/plotly/express/_special_inputs.py @@ -1,4 +1,3 @@ - class IdentityMap(object): """ `dict`-like object which acts as if the value for any key is the key itself. Objects @@ -24,6 +23,7 @@ class Constant(object): identifiers or list-like objects to indicate that this attribute should take on a constant value. An optional label can be provided. """ + def __init__(self, value, label=None): self.value = value self.label = label From dd043d6542afeab94a1171f509cee24fbaa3733b Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 3 Apr 2020 16:10:02 -0400 Subject: [PATCH 13/69] satisfy flake8 --- .../tests/test_core/test_px/test_px_input.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index 6c8147443ed..9d1f0de3d27 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -167,7 +167,7 @@ def test_arrayattrable_numpy(): def test_wrong_column_name(): with pytest.raises(ValueError) as err_msg: - fig = px.scatter(px.data.tips(), x="bla", y="wrong") + px.scatter(px.data.tips(), x="bla", y="wrong") assert "Value of 'x' is not the name of a column in 'data_frame'" in str( err_msg.value ) @@ -175,33 +175,33 @@ def test_wrong_column_name(): def test_missing_data_frame(): with pytest.raises(ValueError) as err_msg: - fig = px.scatter(x="arg1", y="arg2") + px.scatter(x="arg1", y="arg2") assert "String or int arguments are only possible" in str(err_msg.value) def test_wrong_dimensions_of_array(): with pytest.raises(ValueError) as err_msg: - fig = px.scatter(x=[1, 2, 3], y=[2, 3, 4, 5]) + px.scatter(x=[1, 2, 3], y=[2, 3, 4, 5]) assert "All arguments should have the same length." in str(err_msg.value) def test_wrong_dimensions_mixed_case(): with pytest.raises(ValueError) as err_msg: df = pd.DataFrame(dict(time=[1, 2, 3], temperature=[20, 30, 25])) - fig = px.scatter(df, x="time", y="temperature", color=[1, 3, 9, 5]) + px.scatter(df, x="time", y="temperature", color=[1, 3, 9, 5]) assert "All arguments should have the same length." in str(err_msg.value) def test_wrong_dimensions(): with pytest.raises(ValueError) as err_msg: - fig = px.scatter(px.data.tips(), x="tip", y=[1, 2, 3]) + px.scatter(px.data.tips(), x="tip", y=[1, 2, 3]) assert "All arguments should have the same length." in str(err_msg.value) # the order matters with pytest.raises(ValueError) as err_msg: - fig = px.scatter(px.data.tips(), x=[1, 2, 3], y="tip") + px.scatter(px.data.tips(), x=[1, 2, 3], y="tip") assert "All arguments should have the same length." in str(err_msg.value) with pytest.raises(ValueError): - fig = px.scatter(px.data.tips(), x=px.data.iris().index, y="tip") + px.scatter(px.data.tips(), x=px.data.iris().index, y="tip") # assert "All arguments should have the same length." in str(err_msg.value) @@ -211,9 +211,9 @@ def test_multiindex_raise_error(): ) df = pd.DataFrame(np.random.random((6, 3)), index=index, columns=["A", "B", "C"]) # This is ok - fig = px.scatter(df, x="A", y="B") + px.scatter(df, x="A", y="B") with pytest.raises(TypeError) as err_msg: - fig = px.scatter(df, x=df.index, y="B") + px.scatter(df, x=df.index, y="B") assert "pandas MultiIndex is not supported by plotly express" in str( err_msg.value ) @@ -299,7 +299,7 @@ def test_arguments_not_modified(): iris = px.data.iris() petal_length = iris.petal_length hover_data = [iris.sepal_length] - fig = px.scatter(iris, x=petal_length, y="petal_width", hover_data=hover_data) + px.scatter(iris, x=petal_length, y="petal_width", hover_data=hover_data) assert iris.petal_length.equals(petal_length) assert iris.sepal_length.equals(hover_data[0]) From f952e64d98124791586ccb24953f60d11d9f6c87 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 3 Apr 2020 16:27:57 -0400 Subject: [PATCH 14/69] test auto orient box-like --- .../tests/test_core/test_px/test_px_input.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index 9d1f0de3d27..c9b3054cd26 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -381,3 +381,23 @@ def test_constants(): assert "symbol=" in fig.data[0].hovertemplate assert "the_data=" in fig.data[0].hovertemplate assert fig.layout.legend.title.text == "symbol" + + +def test_auto_orient(): + categorical = ["a", "a", "b", "b"] + numerical = [1, 2, 3, 4] + + pattern = [ + (numerical, numerical, "v"), # default + (numerical, categorical, "h"), # auto + (numerical, None, "h"), # auto + (categorical, numerical, "v"), # auto/default + (categorical, categorical, "v"), # default + (categorical, None, "h"), # auto + (None, categorical, "v"), # auto/default + (None, numerical, "v"), # auto/default + ] + + for fn in [px.violin, px.box, px.strip]: + for x, y, result in pattern: + assert fn(x=x, y=y).data[0].orientation == result From f58c3b5ed0f1fce598200615a9d1c36d512e5c87 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 3 Apr 2020 17:03:33 -0400 Subject: [PATCH 15/69] locking down auto_orientation with tests --- .../tests/test_core/test_px/test_px_input.py | 72 +++++++++++++++++-- 1 file changed, 65 insertions(+), 7 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index c9b3054cd26..5a210284578 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -387,17 +387,75 @@ def test_auto_orient(): categorical = ["a", "a", "b", "b"] numerical = [1, 2, 3, 4] - pattern = [ - (numerical, numerical, "v"), # default - (numerical, categorical, "h"), # auto + pattern_x_or_y = [ (numerical, None, "h"), # auto - (categorical, numerical, "v"), # auto/default - (categorical, categorical, "v"), # default (categorical, None, "h"), # auto (None, categorical, "v"), # auto/default (None, numerical, "v"), # auto/default ] - for fn in [px.violin, px.box, px.strip]: - for x, y, result in pattern: + pattern_x_and_y = [ + (numerical, categorical, "h"), # auto + (categorical, numerical, "v"), # auto/default + (categorical, categorical, "v"), # default + (numerical, numerical, "v"), # default + ] + + for fn in [px.violin, px.box, px.strip, px.bar, px.funnel]: + for x, y, result in pattern_x_or_y: + assert fn(x=x, y=y).data[0].orientation == result + + # these ones are the opposite of the ones above in the "or" cases + for fn in [px.area, px.histogram]: + for x, y, result in pattern_x_or_y: + assert fn(x=x, y=y).data[0].orientation != result + + # all behave the same for the "and" cases + for fn in [px.violin, px.box, px.strip, px.bar, px.funnel, px.area, px.histogram]: + for x, y, result in pattern_x_and_y: assert fn(x=x, y=y).data[0].orientation == result + + assert px.histogram(x=numerical, nbins=5).data[0].nbinsx == 5 + assert px.histogram(y=numerical, nbins=5).data[0].nbinsy == 5 + assert px.histogram(x=numerical, y=numerical, nbins=5).data[0].nbinsx == 5 + + +def test_auto_histfunc(): + a = [1, 2] + assert px.histogram(x=a).data[0].histfunc is None + assert px.histogram(y=a).data[0].histfunc is None + assert px.histogram(x=a, y=a).data[0].histfunc == "sum" + assert px.histogram(x=a, y=a, histfunc="avg").data[0].histfunc == "avg" + + assert px.density_heatmap(x=a, y=a).data[0].histfunc is None + assert px.density_heatmap(x=a, y=a, z=a).data[0].histfunc == "sum" + assert px.density_heatmap(x=a, y=a, z=a, histfunc="avg").data[0].histfunc == "avg" + + +def test_auto_boxlike_overlay(): + df = pd.DataFrame( + dict( + categorical1=["a", "a", "b", "b"], + categorical2=["a", "a", "b", "b"], + numerical=[1, 2, 3, 4], + ) + ) + + pattern = [ + ("categorical1", "numerical", None, "group"), + ("categorical1", "numerical", "categorical2", "group"), + ("categorical1", "numerical", "categorical1", "overlay"), + ("numerical", "categorical1", None, "group"), + ("numerical", "categorical1", "categorical2", "group"), + ("numerical", "categorical1", "categorical1", "overlay"), + ] + + fn_and_mode = [ + (px.violin, "violinmode"), + (px.box, "boxmode"), + (px.strip, "boxmode"), + ] + + for fn, mode in fn_and_mode: + for x, y, color, result in pattern: + assert fn(df, x=x, y=y, color=color).layout[mode] == result From 12e6aaadbb1135607108aff6d471a5dbdb741668 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 5 Apr 2020 22:05:50 -0400 Subject: [PATCH 16/69] respect column index name and pass through melted names --- .../python/plotly/plotly/express/_core.py | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 8e07f826a9d..c65604798c0 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -924,6 +924,7 @@ def build_dataframe(args, attrables, array_attrables, constructor): if wide_mode: df_output = df_input + var_name = df_output.columns.name or "_column_" else: df_output = pd.DataFrame() @@ -996,16 +997,17 @@ def build_dataframe(args, attrables, array_attrables, constructor): ) # Check validity of column name if argument not in df_input.columns: - err_msg = ( - "Value of '%s' is not the name of a column in 'data_frame'. " - "Expected one of %s but received: %s" - % (field, str(list(df_input.columns)), argument) - ) - if argument == "index": - err_msg += ( - "\n To use the index, pass it in directly as `df.index`." + if wide_mode and argument in ("_value_", var_name): + continue + else: + err_msg = ( + "Value of '%s' is not the name of a column in 'data_frame'. " + "Expected one of %s but received: %s" + % (field, str(list(df_input.columns)), argument) ) - raise ValueError(err_msg) + if argument == "index": + err_msg += "\n To use the index, pass it in directly as `df.index`." + raise ValueError(err_msg) if length and len(df_input[argument]) != length: raise ValueError( "All arguments should have the same length. " @@ -1066,6 +1068,9 @@ def build_dataframe(args, attrables, array_attrables, constructor): args[field_name][i] = str(col_name) wide_id_vars.add(str(col_name)) + for col_name in constants: + df_output[col_name] = constants[col_name] + if wide_mode: # TODO multi-level index # TODO multi-level columns @@ -1073,23 +1078,22 @@ def build_dataframe(args, attrables, array_attrables, constructor): wide_id_vars.add(index_name) if index_name not in df_output.columns: df_output = df_output.reset_index() - df_output = df_output.melt(id_vars=wide_id_vars) + df_output = df_output.melt( + id_vars=wide_id_vars, var_name=var_name, value_name="_value_" + ) orient_v = "v" == (args.get("orientation", None) or "v") if "orientation" in args: args["orientation"] = "v" if orient_v else "h" if constructor in [go.Scatter, go.Bar]: args["x" if orient_v else "y"] = index_name - args["y" if orient_v else "x"] = "value" - args["color"] = args["color"] or "variable" + args["y" if orient_v else "x"] = "_value_" + args["color"] = args["color"] or var_name if constructor in [go.Violin, go.Box]: - args["x" if orient_v else "y"] = "variable" - args["y" if orient_v else "x"] = "value" + args["x" if orient_v else "y"] = var_name + args["y" if orient_v else "x"] = "_value_" if constructor in [go.Histogram]: - args["x" if orient_v else "y"] = "value" - args["color"] = args["color"] or "variable" - - for col_name in constants: - df_output[col_name] = constants[col_name] + args["x" if orient_v else "y"] = "_value_" + args["color"] = args["color"] or var_name args["data_frame"] = df_output return args From 767c3bdd10417fe937f354b28bc097f99e459a1f Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 5 Apr 2020 22:06:10 -0400 Subject: [PATCH 17/69] fix too-narrow type check bug --- packages/python/plotly/plotly/express/_core.py | 2 +- .../plotly/plotly/tests/test_core/test_px/test_px_input.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index c65604798c0..dab20345ea3 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1024,7 +1024,7 @@ def build_dataframe(args, attrables, array_attrables, constructor): df_output[col_name] = df_input[argument].values # ----------------- argument is a column / array / list.... ------- else: - is_index = isinstance(argument, pd.RangeIndex) + is_index = isinstance(argument, pd.Index) # First pandas # pandas series have a name but it's None if ( diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index 5a210284578..ebaff96bd71 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -251,12 +251,14 @@ def test_build_df_with_index(): def test_non_matching_index(): df = pd.DataFrame(dict(y=[1, 2, 3]), index=["a", "b", "c"]) - expected = pd.DataFrame(dict(x=["a", "b", "c"], y=[1, 2, 3])) + expected = pd.DataFrame(dict(index=["a", "b", "c"], y=[1, 2, 3])) args = dict(data_frame=df, x=df.index, y="y") out = build_dataframe(args, all_attrables, array_attrables, None) assert_frame_equal(expected, out["data_frame"]) + expected = pd.DataFrame(dict(x=["a", "b", "c"], y=[1, 2, 3])) + args = dict(data_frame=None, x=df.index, y=df.y) out = build_dataframe(args, all_attrables, array_attrables, None) assert_frame_equal(expected, out["data_frame"]) From c06c5554c9df379fb72e047725265605cf1cd341 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Tue, 7 Apr 2020 09:18:46 -0400 Subject: [PATCH 18/69] tests wip --- .../tests/test_core/test_px/test_px_wide.py | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py new file mode 100644 index 00000000000..5224a5067ad --- /dev/null +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -0,0 +1,99 @@ +import plotly.express as px +import plotly.graph_objects as go +import pandas as pd +from plotly.express._core import build_dataframe +from pandas.util.testing import assert_frame_equal + +attrables = ( + ["x", "y", "z", "a", "b", "c", "r", "theta", "size", "dimensions"] + + ["custom_data", "hover_name", "hover_data", "text"] + + ["names", "values", "parents", "ids"] + + ["error_x", "error_x_minus"] + + ["error_y", "error_y_minus", "error_z", "error_z_minus"] + + ["lat", "lon", "locations", "animation_group", "path"] +) +array_attrables = ["dimensions", "custom_data", "hover_data", "path"] +group_attrables = ["animation_frame", "facet_row", "facet_col", "line_group"] +all_attrables = attrables + group_attrables + ["color"] + + +def test_wide_mode_external(): + df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=[11, 12, 13]) + for px_fn in [px.scatter, px.line, px.area, px.bar]: + fig = px_fn(df) + assert len(fig.data) == 3 + assert list(fig.data[0].x) == [11, 12, 13] + assert list(fig.data[0].y) == [1, 2, 3] + assert list(fig.data[1].x) == [11, 12, 13] + assert list(fig.data[1].y) == [4, 5, 6] + assert fig.layout.xaxis.title.text == "index" + assert fig.layout.yaxis.title.text == "_value_" + assert fig.layout.legend.title.text == "_column_" + if px_fn in [px.area, px.bar]: + fig = px_fn(df, orientation="h") + assert len(fig.data) == 3 + assert list(fig.data[0].y) == [11, 12, 13] + assert list(fig.data[0].x) == [1, 2, 3] + assert list(fig.data[1].y) == [11, 12, 13] + assert list(fig.data[1].x) == [4, 5, 6] + assert fig.layout.yaxis.title.text == "index" + assert fig.layout.xaxis.title.text == "_value_" + assert fig.layout.legend.title.text == "_column_" + for px_fn in [px.violin, px.box, px.strip]: + fig = px_fn(df) + assert len(fig.data) == 1 + assert list(fig.data[0].x) == ["a"] * 3 + ["b"] * 3 + ["c"] * 3 + assert list(fig.data[0].y) == list(range(1, 10)) + assert fig.layout.yaxis.title.text == "_value_" + assert fig.layout.xaxis.title.text == "_column_" + fig = px_fn(df, orientation="h") + assert len(fig.data) == 1 + assert list(fig.data[0].y) == ["a"] * 3 + ["b"] * 3 + ["c"] * 3 + assert list(fig.data[0].x) == list(range(1, 10)) + assert fig.layout.xaxis.title.text == "_value_" + assert fig.layout.yaxis.title.text == "_column_" + for px_fn in [px.histogram]: + fig = px_fn(df) + assert len(fig.data) == 3 + assert list(fig.data[1].x) == [4, 5, 6] + assert fig.layout.legend.title.text == "_column_" + assert fig.layout.xaxis.title.text == "_value_" + fig = px_fn(df, orientation="h") + assert len(fig.data) == 3 + assert list(fig.data[1].y) == [4, 5, 6] + assert fig.layout.legend.title.text == "_column_" + assert fig.layout.yaxis.title.text == "_value_" + + +def test_wide_mode_labels_external(): + df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=[11, 12, 13]) + fig = px.bar(df) + assert fig.layout.xaxis.title.text == "index" + assert fig.layout.yaxis.title.text == "_value_" + assert fig.layout.legend.title.text == "_column_" + labels = dict(index="my index", _value_="my value", _column_="my column") + fig = px.bar(df, labels=labels) + assert fig.layout.xaxis.title.text == "my index" + assert fig.layout.yaxis.title.text == "my value" + assert fig.layout.legend.title.text == "my column" + df.index.name = "my index" + df.columns.name = "my column" + fig = px.bar(df) + assert fig.layout.xaxis.title.text == "my index" + assert fig.layout.yaxis.title.text == "_value_" + assert fig.layout.legend.title.text == "my column" + + +def test_wide_mode_internal(): + df_in = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]), index=[11, 12, 13]) + args_in = dict(data_frame=df_in, color=None) + args_out = build_dataframe(args_in, all_attrables, array_attrables, go.Scatter) + df_out = args_out["data_frame"] + df_out_expected = pd.DataFrame( + dict( + index=[11, 12, 13, 11, 12, 13], + _column_=["a", "a", "a", "b", "b", "b"], + _value_=[1, 2, 3, 4, 5, 6], + ) + ) + assert_frame_equal(df_out, df_out_expected) From e28fbf789425898cee6ec9e9c7321f74d4b44e09 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Tue, 7 Apr 2020 10:07:04 -0400 Subject: [PATCH 19/69] DRY up attrable lists --- .../python/plotly/plotly/express/_core.py | 60 +++++++++---------- .../tests/test_core/test_px/test_px_input.py | 25 +++----- .../tests/test_core/test_px/test_px_wide.py | 14 +---- 3 files changed, 36 insertions(+), 63 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index dab20345ea3..6e48a58a1f9 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -16,6 +16,22 @@ ) +# Declare all supported attributes, across all plot types + +attrables = ( + ["x", "y", "z", "a", "b", "c", "r", "theta", "size", "dimensions"] + + ["custom_data", "hover_name", "hover_data", "text"] + + ["names", "values", "parents", "ids"] + + ["error_x", "error_x_minus"] + + ["error_y", "error_y_minus", "error_z", "error_z_minus"] + + ["lat", "lon", "locations", "animation_group", "path"] +) +group_attrables = ["animation_frame", "facet_row", "facet_col", "line_group"] +renameable_group_attrables = ["color", "symbol", "line_dash"] +non_array_attrables = attrables + group_attrables + renameable_group_attrables +array_attrables = ["dimensions", "custom_data", "hover_data", "path"] + + class PxDefaults(object): __slots__ = [ "template", @@ -850,7 +866,7 @@ def _check_name_not_reserved(field_name, reserved_names): ) -def _get_reserved_col_names(args, attrables, array_attrables): +def _get_reserved_col_names(args): """ This function builds a list of columns of the data_frame argument used as arguments, either as str/int arguments or given as columns @@ -859,7 +875,7 @@ def _get_reserved_col_names(args, attrables, array_attrables): df = args["data_frame"] reserved_names = set() for field in args: - if field not in attrables: + if field not in non_array_attrables: continue names = args[field] if field in array_attrables else [args[field]] if names is None: @@ -879,7 +895,7 @@ def _get_reserved_col_names(args, attrables, array_attrables): return reserved_names -def build_dataframe(args, attrables, array_attrables, constructor): +def build_dataframe(args, constructor): """ Constructs a dataframe and modifies `args` in-place. @@ -891,11 +907,8 @@ def build_dataframe(args, attrables, array_attrables, constructor): ---------- args : OrderedDict arguments passed to the px function and subsequently modified - attrables : list - list of keys into `args`, all of whose corresponding values are - converted into columns of a dataframe. - array_attrables : list - argument names corresponding to iterables, such as `hover_data`, ... + constructor : graph_object trace class + the trace type selected for this figure """ # make copies of all the fields via dict() and list() @@ -931,7 +944,7 @@ def build_dataframe(args, attrables, array_attrables, constructor): # Initialize set of column names # These are reserved names if df_provided: - reserved_names = _get_reserved_col_names(args, attrables, array_attrables) + reserved_names = _get_reserved_col_names(args) else: reserved_names = set() @@ -947,7 +960,7 @@ def build_dataframe(args, attrables, array_attrables, constructor): constants = dict() # Loop over possible arguments - for field_name in attrables: + for field_name in non_array_attrables: # Massaging variables argument_list = ( [args.get(field_name)] @@ -1254,27 +1267,6 @@ def aggfunc_continuous(x): def infer_config(args, constructor, trace_patch, layout_patch): - # Declare all supported attributes, across all plot types - attrables = ( - ["x", "y", "z", "a", "b", "c", "r", "theta", "size", "dimensions"] - + ["custom_data", "hover_name", "hover_data", "text"] - + ["names", "values", "parents", "ids"] - + ["error_x", "error_x_minus"] - + ["error_y", "error_y_minus", "error_z", "error_z_minus"] - + ["lat", "lon", "locations", "animation_group", "path"] - ) - array_attrables = ["dimensions", "custom_data", "hover_data", "path"] - group_attrables = ["animation_frame", "facet_row", "facet_col", "line_group"] - all_attrables = attrables + group_attrables + ["color"] - group_attrs = ["symbol", "line_dash"] - for group_attr in group_attrs: - if group_attr in args: - all_attrables += [group_attr] - - args = build_dataframe(args, all_attrables, array_attrables, constructor) - if constructor in [go.Treemap, go.Sunburst] and args["path"] is not None: - args = process_dataframe_hierarchy(args) - attrs = [k for k in attrables if k in args] grouped_attrs = [] @@ -1473,6 +1465,10 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): layout_patch = layout_patch or {} apply_default_cascade(args) + args = build_dataframe(args, constructor) + if constructor in [go.Treemap, go.Sunburst] and args["path"] is not None: + args = process_dataframe_hierarchy(args) + trace_specs, grouped_mappings, sizeref, show_colorbar = infer_config( args, constructor, trace_patch, layout_patch ) @@ -1651,7 +1647,7 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): frame_list = sorted( frame_list, key=lambda f: orders[args["animation_frame"]].index(f["name"]) ) - layout_patch = layout_patch.copy() + if show_colorbar: colorvar = "z" if constructor in [go.Histogram2d, go.Densitymapbox] else "color" range_color = args["range_color"] or [None, None] diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index ebaff96bd71..2e7152e46f0 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -1,22 +1,11 @@ import plotly.express as px +import plotly.graph_objects as go import numpy as np import pandas as pd import pytest from plotly.express._core import build_dataframe from pandas.util.testing import assert_frame_equal -attrables = ( - ["x", "y", "z", "a", "b", "c", "r", "theta", "size", "dimensions"] - + ["custom_data", "hover_name", "hover_data", "text"] - + ["error_x", "error_x_minus"] - + ["error_y", "error_y_minus", "error_z", "error_z_minus"] - + ["lat", "lon", "locations", "animation_group"] -) -array_attrables = ["dimensions", "custom_data", "hover_data"] -group_attrables = ["animation_frame", "facet_row", "facet_col", "line_group"] - -all_attrables = attrables + group_attrables + ["color"] - def test_numpy(): fig = px.scatter(x=[1, 2, 3], y=[2, 3, 4], color=[1, 3, 9]) @@ -225,7 +214,7 @@ def test_build_df_from_lists(): output = {key: key for key in args} df = pd.DataFrame(args) args["data_frame"] = None - out = build_dataframe(args, all_attrables, array_attrables, None) + out = build_dataframe(args, go.Scatter) assert_frame_equal(df.sort_index(axis=1), out["data_frame"].sort_index(axis=1)) out.pop("data_frame") assert out == output @@ -235,7 +224,7 @@ def test_build_df_from_lists(): output = {key: key for key in args} df = pd.DataFrame(args) args["data_frame"] = None - out = build_dataframe(args, all_attrables, array_attrables, None) + out = build_dataframe(args, go.Scatter) assert_frame_equal(df.sort_index(axis=1), out["data_frame"].sort_index(axis=1)) out.pop("data_frame") assert out == output @@ -244,7 +233,7 @@ def test_build_df_from_lists(): def test_build_df_with_index(): tips = px.data.tips() args = dict(data_frame=tips, x=tips.index, y="total_bill") - out = build_dataframe(args, all_attrables, array_attrables, None) + out = build_dataframe(args, go.Scatter) assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"]) @@ -254,17 +243,17 @@ def test_non_matching_index(): expected = pd.DataFrame(dict(index=["a", "b", "c"], y=[1, 2, 3])) args = dict(data_frame=df, x=df.index, y="y") - out = build_dataframe(args, all_attrables, array_attrables, None) + out = build_dataframe(args, go.Scatter) assert_frame_equal(expected, out["data_frame"]) expected = pd.DataFrame(dict(x=["a", "b", "c"], y=[1, 2, 3])) args = dict(data_frame=None, x=df.index, y=df.y) - out = build_dataframe(args, all_attrables, array_attrables, None) + out = build_dataframe(args, go.Scatter) assert_frame_equal(expected, out["data_frame"]) args = dict(data_frame=None, x=["a", "b", "c"], y=df.y) - out = build_dataframe(args, all_attrables, array_attrables, None) + out = build_dataframe(args, go.Scatter) assert_frame_equal(expected, out["data_frame"]) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 5224a5067ad..119e9d46d2d 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -4,18 +4,6 @@ from plotly.express._core import build_dataframe from pandas.util.testing import assert_frame_equal -attrables = ( - ["x", "y", "z", "a", "b", "c", "r", "theta", "size", "dimensions"] - + ["custom_data", "hover_name", "hover_data", "text"] - + ["names", "values", "parents", "ids"] - + ["error_x", "error_x_minus"] - + ["error_y", "error_y_minus", "error_z", "error_z_minus"] - + ["lat", "lon", "locations", "animation_group", "path"] -) -array_attrables = ["dimensions", "custom_data", "hover_data", "path"] -group_attrables = ["animation_frame", "facet_row", "facet_col", "line_group"] -all_attrables = attrables + group_attrables + ["color"] - def test_wide_mode_external(): df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=[11, 12, 13]) @@ -87,7 +75,7 @@ def test_wide_mode_labels_external(): def test_wide_mode_internal(): df_in = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]), index=[11, 12, 13]) args_in = dict(data_frame=df_in, color=None) - args_out = build_dataframe(args_in, all_attrables, array_attrables, go.Scatter) + args_out = build_dataframe(args_in, go.Scatter) df_out = args_out["data_frame"] df_out_expected = pd.DataFrame( dict( From fa2fe51dd49e072838769b7e3e2b33aa4bc1e6ce Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Tue, 7 Apr 2020 21:05:03 -0400 Subject: [PATCH 20/69] testing wip --- .../python/plotly/plotly/express/_core.py | 31 ++++++----- .../tests/test_core/test_px/test_px_wide.py | 55 +++++++++++++++---- 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 6e48a58a1f9..f756cf7a6c0 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -17,19 +17,22 @@ # Declare all supported attributes, across all plot types - -attrables = ( - ["x", "y", "z", "a", "b", "c", "r", "theta", "size", "dimensions"] - + ["custom_data", "hover_name", "hover_data", "text"] - + ["names", "values", "parents", "ids"] - + ["error_x", "error_x_minus"] - + ["error_y", "error_y_minus", "error_z", "error_z_minus"] - + ["lat", "lon", "locations", "animation_group", "path"] +direct_attrables = ( + ["x", "y", "z", "a", "b", "c", "r", "theta", "size"] + + ["hover_name", "text", "names", "values", "parents"] + + ["ids", "error_x", "error_x_minus", "error_y", "error_y_minus", "error_z"] + + ["error_z_minus", "lat", "lon", "locations", "animation_group"] ) -group_attrables = ["animation_frame", "facet_row", "facet_col", "line_group"] -renameable_group_attrables = ["color", "symbol", "line_dash"] -non_array_attrables = attrables + group_attrables + renameable_group_attrables array_attrables = ["dimensions", "custom_data", "hover_data", "path"] +group_attrables = ["animation_frame", "facet_row", "facet_col", "line_group"] +renameable_group_attrables = [ + "color", # renamed to marker.color or line.color in infer_config + "symbol", # renamed to marker.symbol in infer_config + "line_dash", # renamed to line.dash in infer_config +] +all_attrables = ( + direct_attrables + array_attrables + group_attrables + renameable_group_attrables +) class PxDefaults(object): @@ -875,7 +878,7 @@ def _get_reserved_col_names(args): df = args["data_frame"] reserved_names = set() for field in args: - if field not in non_array_attrables: + if field not in all_attrables: continue names = args[field] if field in array_attrables else [args[field]] if names is None: @@ -960,7 +963,7 @@ def build_dataframe(args, constructor): constants = dict() # Loop over possible arguments - for field_name in non_array_attrables: + for field_name in all_attrables: # Massaging variables argument_list = ( [args.get(field_name)] @@ -1267,7 +1270,7 @@ def aggfunc_continuous(x): def infer_config(args, constructor, trace_patch, layout_patch): - attrs = [k for k in attrables if k in args] + attrs = [k for k in direct_attrables + array_attrables if k in args] grouped_attrs = [] # Compute sizeref diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 119e9d46d2d..a6dc955b124 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -74,14 +74,49 @@ def test_wide_mode_labels_external(): def test_wide_mode_internal(): df_in = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]), index=[11, 12, 13]) - args_in = dict(data_frame=df_in, color=None) - args_out = build_dataframe(args_in, go.Scatter) - df_out = args_out["data_frame"] - df_out_expected = pd.DataFrame( - dict( - index=[11, 12, 13, 11, 12, 13], - _column_=["a", "a", "a", "b", "b", "b"], - _value_=[1, 2, 3, 4, 5, 6], + + def extract_and_check_df(args_out): + df_out = args_out.pop("data_frame") + assert_frame_equal( + df_out, + pd.DataFrame( + dict( + index=[11, 12, 13, 11, 12, 13], + _column_=["a", "a", "a", "b", "b", "b"], + _value_=[1, 2, 3, 4, 5, 6], + ) + ), + ) + return args_out + + for trace_type in [go.Scatter, go.Bar]: + args_in = dict(data_frame=df_in.copy(), color=None) + args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) + assert args_out == dict(x="index", y="_value_", color="_column_") + + # now we check with orientation + args_in = dict(data_frame=df_in.copy(), color=None, orientation="h") + args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) + assert args_out == dict( + y="index", x="_value_", color="_column_", orientation="h" ) - ) - assert_frame_equal(df_out, df_out_expected) + + for trace_type in [go.Violin, go.Box]: + args_in = dict(data_frame=df_in.copy(), color=None) + args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) + assert args_out == dict(x="_column_", y="_value_", color=None) + + # now we check with orientation + args_in = dict(data_frame=df_in.copy(), color=None, orientation="h") + args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) + assert args_out == dict(y="_column_", x="_value_", color=None, orientation="h") + + for trace_type in [go.Histogram]: + args_in = dict(data_frame=df_in.copy(), color=None) + args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) + assert args_out == dict(x="_value_", color="_column_") + + # now we check with orientation + args_in = dict(data_frame=df_in.copy(), color=None, orientation="h") + args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) + assert args_out == dict(y="_value_", color="_column_", orientation="h") From b174d7f6a09a4f15e15c1b4b1ca1c5a50940a99f Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Wed, 8 Apr 2020 08:12:23 -0400 Subject: [PATCH 21/69] sort columns for python < 3.6 --- .../plotly/plotly/tests/test_core/test_px/test_px_wide.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index a6dc955b124..0c2488657c7 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -78,14 +78,14 @@ def test_wide_mode_internal(): def extract_and_check_df(args_out): df_out = args_out.pop("data_frame") assert_frame_equal( - df_out, + df_out.sort_index(axis=1), pd.DataFrame( dict( index=[11, 12, 13, 11, 12, 13], _column_=["a", "a", "a", "b", "b", "b"], _value_=[1, 2, 3, 4, 5, 6], ) - ), + ).sort_index(axis=1), ) return args_out From 48e56eff991ce574deb7e027d472e3164a907fa2 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Wed, 8 Apr 2020 09:18:16 -0400 Subject: [PATCH 22/69] wide mode special case tests --- .../python/plotly/plotly/express/_core.py | 1 + .../tests/test_core/test_px/test_px_wide.py | 285 ++++++++++++++++++ 2 files changed, 286 insertions(+) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index f756cf7a6c0..1193caee308 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1097,6 +1097,7 @@ def build_dataframe(args, constructor): df_output = df_output.melt( id_vars=wide_id_vars, var_name=var_name, value_name="_value_" ) + df_output[var_name] = df_output[var_name].astype(str) orient_v = "v" == (args.get("orientation", None) or "v") if "orientation" in args: args["orientation"] = "v" if orient_v else "h" diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 0c2488657c7..27df43123e8 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -120,3 +120,288 @@ def extract_and_check_df(args_out): args_in = dict(data_frame=df_in.copy(), color=None, orientation="h") args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) assert args_out == dict(y="_value_", color="_column_", orientation="h") + + +def test_wide_mode_internal_special_cases(): + def assert_df_and_args(df_in, args_in, args_expect, df_expect): + args_in["data_frame"] = df_in + args_out = build_dataframe(args_in, go.Scatter) + df_out = args_out.pop("data_frame") + # print(df_out.info()) + # print(df_expect.info()) + assert_frame_equal( + df_out.sort_index(axis=1), df_expect.sort_index(axis=1), + ) + assert args_out == args_expect + + # input is single bare array: column comes out as string "0" + assert_df_and_args( + df_in=[1, 2, 3], + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="_value_", color="_column_"), + df_expect=pd.DataFrame( + dict(index=[0, 1, 2], _value_=[1, 2, 3], _column_=["0", "0", "0"]) + ), + ) + + # input is single bare Series: column comes out as string "0" + assert_df_and_args( + df_in=pd.Series([1, 2, 3]), + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="_value_", color="_column_"), + df_expect=pd.DataFrame( + dict(index=[0, 1, 2], _value_=[1, 2, 3], _column_=["0", "0", "0"]) + ), + ) + + # input is a Series from a DF: we pick up the name and index values automatically + df = pd.DataFrame(dict(my_col=[1, 2, 3]), index=["a", "b", "c"]) + assert_df_and_args( + df_in=df["my_col"], + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="_value_", color="_column_"), + df_expect=pd.DataFrame( + dict( + index=["a", "b", "c"], + _value_=[1, 2, 3], + _column_=["my_col", "my_col", "my_col"], + ) + ), + ) + + # input is an index from a DF: treated like a Series basically + df = pd.DataFrame(dict(my_col=[1, 2, 3]), index=["a", "b", "c"]) + df.index.name = "my_index" + assert_df_and_args( + df_in=df.index, + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="_value_", color="_column_"), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 2], + _value_=["a", "b", "c"], + _column_=["my_index", "my_index", "my_index"], + ) + ), + ) + + # input is a data frame with named row and col indices: we grab those + df = pd.DataFrame(dict(my_col=[1, 2, 3]), index=["a", "b", "c"]) + df.index.name = "my_index" + df.columns.name = "my_col_name" + assert_df_and_args( + df_in=df, + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="my_index", y="_value_", color="my_col_name"), + df_expect=pd.DataFrame( + dict( + my_index=["a", "b", "c"], + _value_=[1, 2, 3], + my_col_name=["my_col", "my_col", "my_col"], + ) + ), + ) + + # input is array of arrays: treated as rows, columns come out as string "0", "1" + assert_df_and_args( + df_in=[[1, 2], [4, 5]], + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="_value_", color="_column_"), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], _value_=[1, 4, 2, 5], _column_=["0", "0", "1", "1"], + ) + ), + ) + + # partial-melting by assigning symbol: we pick up that column and don't melt it + assert_df_and_args( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], symbol_col=["q", "r"])), + args_in=dict(x=None, y=None, color=None, symbol="symbol_col"), + args_expect=dict(x="index", y="_value_", color="_column_", symbol="symbol_col"), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + symbol_col=["q", "r", "q", "r"], + ) + ), + ) + + # partial-melting by assigning the same column twice: we pick it up once + assert_df_and_args( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], symbol_col=["q", "r"])), + args_in=dict( + x=None, y=None, color=None, symbol="symbol_col", custom_data=["symbol_col"], + ), + args_expect=dict( + x="index", + y="_value_", + color="_column_", + symbol="symbol_col", + custom_data=["symbol_col"], + ), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + symbol_col=["q", "r", "q", "r"], + ) + ), + ) + + # partial-melting by assigning more than one column: we pick them both up + assert_df_and_args( + df_in=pd.DataFrame( + dict(a=[1, 2], b=[3, 4], symbol_col=["q", "r"], data_col=["i", "j"]) + ), + args_in=dict( + x=None, y=None, color=None, symbol="symbol_col", custom_data=["data_col"], + ), + args_expect=dict( + x="index", + y="_value_", + color="_column_", + symbol="symbol_col", + custom_data=["data_col"], + ), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + symbol_col=["q", "r", "q", "r"], + data_col=["i", "j", "i", "j"], + ) + ), + ) + + # partial-melting by assigning symbol to a bare array: we pick it up with the attr name + assert_df_and_args( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), + args_in=dict(x=None, y=None, color=None, symbol=["q", "r"]), + args_expect=dict(x="index", y="_value_", color="_column_", symbol="symbol"), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + symbol=["q", "r", "q", "r"], + ) + ), + ) + + # assigning color to _column_ explicitly: just works + assert_df_and_args( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), + args_in=dict(x=None, y=None, color="_column_"), + args_expect=dict(x="index", y="_value_", color="_column_"), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], _column_=["a", "a", "b", "b"] + ) + ), + ) + + # assigning color to a different column: _column_ drops out of args + assert_df_and_args( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], color_col=["q", "r"])), + args_in=dict(x=None, y=None, color="color_col"), + args_expect=dict(x="index", y="_value_", color="color_col"), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + color_col=["q", "r", "q", "r"], + ) + ), + ) + + # assigning _column_ to something else: just works + assert_df_and_args( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), + args_in=dict(x=None, y=None, color=None, symbol="_column_"), + args_expect=dict(x="index", y="_value_", color="_column_", symbol="_column_"), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], _column_=["a", "a", "b", "b"], + ) + ), + ) + + # swapping symbol and color: just works + assert_df_and_args( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], color_col=["q", "r"])), + args_in=dict(x=None, y=None, color="color_col", symbol="_column_"), + args_expect=dict(x="index", y="_value_", color="color_col", symbol="_column_"), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + color_col=["q", "r", "q", "r"], + ) + ), + ) + + # a DF with a named column index: have to use that instead of _column_ + df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) + df.columns.name = "my_col_name" + assert_df_and_args( + df_in=df, + args_in=dict(x=None, y=None, color=None, facet_row="my_col_name"), + args_expect=dict( + x="index", y="_value_", color="my_col_name", facet_row="my_col_name" + ), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + my_col_name=["a", "a", "b", "b"], + ) + ), + ) + + # passing the DF index into some other attr: works + df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) + df.columns.name = "my_col_name" + df.index.name = "my_index_name" + assert_df_and_args( + df_in=df, + args_in=dict(x=None, y=None, color=None, hover_name=df.index), + args_expect=dict( + x="my_index_name", + y="_value_", + color="my_col_name", + hover_name="my_index_name", + ), + df_expect=pd.DataFrame( + dict( + my_index_name=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + my_col_name=["a", "a", "b", "b"], + ) + ), + ) + + # assigning _value_ to something: works + df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) + df.columns.name = "my_col_name" + df.index.name = "my_index_name" + assert_df_and_args( + df_in=df, + args_in=dict(x=None, y=None, color=None, hover_name="_value_"), + args_expect=dict( + x="my_index_name", y="_value_", color="my_col_name", hover_name="_value_", + ), + df_expect=pd.DataFrame( + dict( + my_index_name=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + my_col_name=["a", "a", "b", "b"], + ) + ), + ) From d57d03573c77ba023565beb8928cfdbef87f5b70 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Wed, 8 Apr 2020 15:42:13 -0400 Subject: [PATCH 23/69] extra comments and another test --- .../tests/test_core/test_px/test_px_wide.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 27df43123e8..bf7a5177ff7 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -6,6 +6,10 @@ def test_wide_mode_external(): + # here we test this feature "black box" style by calling actual PX functions and + # inspecting the figure... this is important but clunky, and is mostly a smoke test + # allowing us to do more "white box" testing below + df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=[11, 12, 13]) for px_fn in [px.scatter, px.line, px.area, px.bar]: fig = px_fn(df) @@ -54,6 +58,7 @@ def test_wide_mode_external(): def test_wide_mode_labels_external(): + # here we prove that the _uglylabels_ can be renamed using the usual labels kwarg df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=[11, 12, 13]) fig = px.bar(df) assert fig.layout.xaxis.title.text == "index" @@ -73,6 +78,13 @@ def test_wide_mode_labels_external(): def test_wide_mode_internal(): + # here we do basic exhaustive testing of the various graph_object permutations + # via build_dataframe directly, which leads to more compact test code: + # we pass in args (which includes df) and look at how build_dataframe mutates + # both args and the df, and assume that since the rest of the downstream PX + # machinery has not wide-mode-specific code, and the tests above pass, that this is + # enough to prove things work + df_in = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]), index=[11, 12, 13]) def extract_and_check_df(args_out): @@ -123,6 +135,11 @@ def extract_and_check_df(args_out): def test_wide_mode_internal_special_cases(): + # given all of the above tests, and given that the melt() code is not sensitive + # to the trace type, we can do all sorts of special-case testing just by focusing + # on build_dataframe(args, go.Scatter) for various values of args, and looking at + # how args and df get mutated + def assert_df_and_args(df_in, args_in, args_expect, df_expect): args_in["data_frame"] = df_in args_out = build_dataframe(args_in, go.Scatter) @@ -405,3 +422,23 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): ) ), ) + + # assigning a px.Constant: works + df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) + df.columns.name = "my_col_name" + df.index.name = "my_index_name" + assert_df_and_args( + df_in=df, + args_in=dict(x=None, y=None, color=None, symbol=px.Constant(1)), + args_expect=dict( + x="my_index_name", y="_value_", color="my_col_name", symbol="symbol", + ), + df_expect=pd.DataFrame( + dict( + my_index_name=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + my_col_name=["a", "a", "b", "b"], + symbol=[1, 1, 1, 1], + ) + ), + ) From e4071a9bdecfc302bd0ee55bbe50976cf7d4be2b Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 9 Apr 2020 15:56:43 -0400 Subject: [PATCH 24/69] px.Range --- packages/python/plotly/plotly/express/__init__.py | 6 ++---- packages/python/plotly/plotly/express/_core.py | 15 +++++++++++---- .../plotly/plotly/express/_special_inputs.py | 11 +++++++++++ .../tests/test_core/test_px/test_px_input.py | 15 +++++++++++++++ 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/packages/python/plotly/plotly/express/__init__.py b/packages/python/plotly/plotly/express/__init__.py index 9df5b21ac8a..72d0b445548 100644 --- a/packages/python/plotly/plotly/express/__init__.py +++ b/packages/python/plotly/plotly/express/__init__.py @@ -55,10 +55,7 @@ get_trendline_results, ) -from ._special_inputs import ( # noqa: F401 - IdentityMap, - Constant, -) +from ._special_inputs import IdentityMap, Constant, Range # noqa: F401 from . import data, colors # noqa: F401 @@ -102,4 +99,5 @@ "get_trendline_results", "IdentityMap", "Constant", + "Range", ] diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 1193caee308..17ca549e804 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1,7 +1,7 @@ import plotly.graph_objs as go import plotly.io as pio from collections import namedtuple, OrderedDict -from ._special_inputs import IdentityMap, Constant +from ._special_inputs import IdentityMap, Constant, Range from _plotly_utils.basevalidators import ColorscaleValidator from .colors import qualitative, sequential @@ -961,6 +961,7 @@ def build_dataframe(args, constructor): df_output[df_input.columns] = df_input[df_input.columns] constants = dict() + ranges = list() # Loop over possible arguments for field_name in all_attrables: @@ -993,13 +994,16 @@ def build_dataframe(args, constructor): "pandas MultiIndex is not supported by plotly express " "at the moment." % field ) - # ----------------- argument is a constant ---------------------- - if isinstance(argument, Constant): + # ----------------- argument is a special value ---------------------- + if isinstance(argument, Constant) or isinstance(argument, Range): col_name = _check_name_not_reserved( str(argument.label) if argument.label is not None else field, reserved_names, ) - constants[col_name] = argument.value + if isinstance(argument, Constant): + constants[col_name] = argument.value + else: + ranges.append(col_name) # ----------------- argument is a col name ---------------------- elif isinstance(argument, str) or isinstance( argument, int @@ -1087,6 +1091,9 @@ def build_dataframe(args, constructor): for col_name in constants: df_output[col_name] = constants[col_name] + for col_name in ranges: + df_output[col_name] = range(len(df_output)) + if wide_mode: # TODO multi-level index # TODO multi-level columns diff --git a/packages/python/plotly/plotly/express/_special_inputs.py b/packages/python/plotly/plotly/express/_special_inputs.py index 3dfff0f3c8e..c1b3d4d102f 100644 --- a/packages/python/plotly/plotly/express/_special_inputs.py +++ b/packages/python/plotly/plotly/express/_special_inputs.py @@ -27,3 +27,14 @@ class Constant(object): def __init__(self, value, label=None): self.value = value self.label = label + + +class Range(object): + """ + Objects of this class can be passed to Plotly Express functions that expect column + identifiers or list-like objects to indicate that this attribute should be mapped + onto integers starting at 0. An optional label can be provided. + """ + + def __init__(self, label=None): + self.label = label diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index 2e7152e46f0..9df6d085a23 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -374,6 +374,21 @@ def test_constants(): assert fig.layout.legend.title.text == "symbol" +def test_ranges(): + fig = px.scatter(x=px.Range(), y=[1, 2], hover_data=[px.Range()]) + assert fig.data[0].x[0] == 0 + assert fig.data[0].x[1] == 1 + assert fig.data[0].customdata[0][0] == 0 + assert fig.data[0].customdata[1][0] == 1 + assert "x=" in fig.data[0].hovertemplate + + fig = px.scatter(x=px.Range(label="time"), y=[1, 2]) + assert fig.data[0].x[0] == 0 + assert fig.data[0].x[1] == 1 + assert "x=" not in fig.data[0].hovertemplate + assert "time=" in fig.data[0].hovertemplate + + def test_auto_orient(): categorical = ["a", "a", "b", "b"] numerical = [1, 2, 3, 4] From ea14fc9eaa32e9cd503d7ed9aee1c88719b26a0a Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 10 Apr 2020 16:01:28 -0400 Subject: [PATCH 25/69] smarter x or y behaviour --- .../plotly/plotly/express/_chart_types.py | 2 + .../python/plotly/plotly/express/_core.py | 45 ++++++--- .../tests/test_core/test_px/test_px_input.py | 99 +++++++++++++++---- .../tests/test_core/test_px/test_px_wide.py | 71 +++++++++---- 4 files changed, 171 insertions(+), 46 deletions(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index bff581e6ff8..190ecab28b9 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -25,6 +25,7 @@ def scatter( animation_group=None, category_orders={}, labels={}, + orientation=None, color_discrete_sequence=None, color_discrete_map={}, color_continuous_scale=None, @@ -192,6 +193,7 @@ def line( animation_group=None, category_orders={}, labels={}, + orientation=None, color_discrete_sequence=None, color_discrete_map={}, line_dash_sequence=None, diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 17ca549e804..9c82b7383f0 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -930,12 +930,10 @@ def build_dataframe(args, constructor): df_input = args["data_frame"] - wide_mode = ( - df_provided - and args.get("x", None) is None - and args.get("y", None) is None - and constructor in [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram] - ) + no_x = args.get("x", None) is None + no_y = args.get("y", None) is None + wideable = [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram] + wide_mode = df_provided and no_x and no_y and constructor in wideable wide_id_vars = set() if wide_mode: @@ -944,6 +942,17 @@ def build_dataframe(args, constructor): else: df_output = pd.DataFrame() + missing_bar_dim = None + if constructor in [go.Scatter, go.Bar] and (no_x != no_y): + for ax in ["x", "y"]: + if args.get(ax, None) is None: + args[ax] = df_input.index if df_provided else Range() + if constructor == go.Scatter: + if args["orientation"] is None: + args["orientation"] = "v" if ax == "x" else "h" + if constructor == go.Bar: + missing_bar_dim = ax + # Initialize set of column names # These are reserved names if df_provided: @@ -1088,12 +1097,27 @@ def build_dataframe(args, constructor): args[field_name][i] = str(col_name) wide_id_vars.add(str(col_name)) - for col_name in constants: - df_output[col_name] = constants[col_name] + if missing_bar_dim and constructor == go.Bar: + # now that we've populated df_output, we check to see if the non-missing + # dimensio is categorical: if so, then setting the missing dimension to a + # constant 1 is a less-insane thing to do than setting it to the index by + # default and we let the normal auto-orientation-code do its thing later + other_dim = "x" if missing_bar_dim == "y" else "y" + if not _is_continuous(df_output, args[other_dim]): + args[missing_bar_dim] = missing_bar_dim + constants[missing_bar_dim] = 1 + else: + # on the other hand, if the non-missing dimension is continuous, then we + # can use this information to override the normal auto-orientation code + if args["orientation"] is None: + args["orientation"] = "v" if missing_bar_dim == "x" else "h" for col_name in ranges: df_output[col_name] = range(len(df_output)) + for col_name in constants: + df_output[col_name] = constants[col_name] + if wide_mode: # TODO multi-level index # TODO multi-level columns @@ -1105,9 +1129,8 @@ def build_dataframe(args, constructor): id_vars=wide_id_vars, var_name=var_name, value_name="_value_" ) df_output[var_name] = df_output[var_name].astype(str) - orient_v = "v" == (args.get("orientation", None) or "v") - if "orientation" in args: - args["orientation"] = "v" if orient_v else "h" + args["orientation"] = args.get("orientation", None) or "v" + orient_v = args["orientation"] == "v" if constructor in [go.Scatter, go.Bar]: args["x" if orient_v else "y"] = index_name args["y" if orient_v else "x"] = "_value_" diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index 9df6d085a23..848950aa426 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -393,12 +393,8 @@ def test_auto_orient(): categorical = ["a", "a", "b", "b"] numerical = [1, 2, 3, 4] - pattern_x_or_y = [ - (numerical, None, "h"), # auto - (categorical, None, "h"), # auto - (None, categorical, "v"), # auto/default - (None, numerical, "v"), # auto/default - ] + auto_orientable = [px.scatter, px.line, px.area, px.violin, px.box, px.strip] + auto_orientable += [px.bar, px.funnel, px.histogram] pattern_x_and_y = [ (numerical, categorical, "h"), # auto @@ -406,20 +402,23 @@ def test_auto_orient(): (categorical, categorical, "v"), # default (numerical, numerical, "v"), # default ] - - for fn in [px.violin, px.box, px.strip, px.bar, px.funnel]: - for x, y, result in pattern_x_or_y: + for fn in auto_orientable: + for x, y, result in pattern_x_and_y: assert fn(x=x, y=y).data[0].orientation == result - # these ones are the opposite of the ones above in the "or" cases - for fn in [px.area, px.histogram]: - for x, y, result in pattern_x_or_y: - assert fn(x=x, y=y).data[0].orientation != result + pattern_x_or_y = [ + (numerical, None, "h"), # auto + (categorical, None, "h"), # auto + (None, categorical, "v"), # auto/default + (None, numerical, "v"), # auto/default + ] - # all behave the same for the "and" cases - for fn in [px.violin, px.box, px.strip, px.bar, px.funnel, px.area, px.histogram]: - for x, y, result in pattern_x_and_y: - assert fn(x=x, y=y).data[0].orientation == result + for fn in auto_orientable: + for x, y, result in pattern_x_or_y: + if fn == px.histogram or (fn == px.bar and categorical in [x, y]): + assert fn(x=x, y=y).data[0].orientation != result + else: + assert fn(x=x, y=y).data[0].orientation == result assert px.histogram(x=numerical, nbins=5).data[0].nbinsx == 5 assert px.histogram(y=numerical, nbins=5).data[0].nbinsy == 5 @@ -465,3 +464,69 @@ def test_auto_boxlike_overlay(): for fn, mode in fn_and_mode: for x, y, color, result in pattern: assert fn(df, x=x, y=y, color=color).layout[mode] == result + + +def test_x_or_y(): + categorical = ["a", "a", "b", "b"] + numerical = [1, 2, 3, 4] + constant = [1, 1, 1, 1] + range_4 = [0, 1, 2, 3] + index = [11, 12, 13, 14] + numerical_df = pd.DataFrame(dict(col=numerical), index=index) + categorical_df = pd.DataFrame(dict(col=categorical), index=index) + scatter_like = [px.scatter, px.line, px.area] + bar_like = [px.bar] + + for fn in scatter_like + bar_like: + fig = fn(x=numerical) + assert list(fig.data[0].x) == numerical + assert list(fig.data[0].y) == range_4 + assert fig.data[0].orientation == "h" + fig = fn(y=numerical) + assert list(fig.data[0].x) == range_4 + assert list(fig.data[0].y) == numerical + assert fig.data[0].orientation == "v" + fig = fn(numerical_df, x="col") + assert list(fig.data[0].x) == numerical + assert list(fig.data[0].y) == index + assert fig.data[0].orientation == "h" + fig = fn(numerical_df, y="col") + assert list(fig.data[0].x) == index + assert list(fig.data[0].y) == numerical + assert fig.data[0].orientation == "v" + + for fn in scatter_like: + fig = fn(x=categorical) + assert list(fig.data[0].x) == categorical + assert list(fig.data[0].y) == range_4 + assert fig.data[0].orientation == "h" + fig = fn(y=categorical) + assert list(fig.data[0].x) == range_4 + assert list(fig.data[0].y) == categorical + assert fig.data[0].orientation == "v" + fig = fn(categorical_df, x="col") + assert list(fig.data[0].x) == categorical + assert list(fig.data[0].y) == index + assert fig.data[0].orientation == "h" + fig = fn(categorical_df, y="col") + assert list(fig.data[0].x) == index + assert list(fig.data[0].y) == categorical + assert fig.data[0].orientation == "v" + + for fn in bar_like: + fig = fn(x=categorical) + assert list(fig.data[0].x) == categorical + assert list(fig.data[0].y) == constant + assert fig.data[0].orientation == "v" + fig = fn(y=categorical) + assert list(fig.data[0].x) == constant + assert list(fig.data[0].y) == categorical + assert fig.data[0].orientation == "h" + fig = fn(categorical_df, x="col") + assert list(fig.data[0].x) == categorical + assert list(fig.data[0].y) == constant + assert fig.data[0].orientation == "v" + fig = fn(categorical_df, y="col") + assert list(fig.data[0].x) == constant + assert list(fig.data[0].y) == categorical + assert fig.data[0].orientation == "h" diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index bf7a5177ff7..901f14a9747 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -104,7 +104,9 @@ def extract_and_check_df(args_out): for trace_type in [go.Scatter, go.Bar]: args_in = dict(data_frame=df_in.copy(), color=None) args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) - assert args_out == dict(x="index", y="_value_", color="_column_") + assert args_out == dict( + x="index", y="_value_", color="_column_", orientation="v" + ) # now we check with orientation args_in = dict(data_frame=df_in.copy(), color=None, orientation="h") @@ -116,7 +118,7 @@ def extract_and_check_df(args_out): for trace_type in [go.Violin, go.Box]: args_in = dict(data_frame=df_in.copy(), color=None) args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) - assert args_out == dict(x="_column_", y="_value_", color=None) + assert args_out == dict(x="_column_", y="_value_", color=None, orientation="v") # now we check with orientation args_in = dict(data_frame=df_in.copy(), color=None, orientation="h") @@ -126,7 +128,7 @@ def extract_and_check_df(args_out): for trace_type in [go.Histogram]: args_in = dict(data_frame=df_in.copy(), color=None) args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) - assert args_out == dict(x="_value_", color="_column_") + assert args_out == dict(x="_value_", color="_column_", orientation="v") # now we check with orientation args_in = dict(data_frame=df_in.copy(), color=None, orientation="h") @@ -155,7 +157,7 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=[1, 2, 3], args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_"), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), df_expect=pd.DataFrame( dict(index=[0, 1, 2], _value_=[1, 2, 3], _column_=["0", "0", "0"]) ), @@ -165,7 +167,7 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=pd.Series([1, 2, 3]), args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_"), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), df_expect=pd.DataFrame( dict(index=[0, 1, 2], _value_=[1, 2, 3], _column_=["0", "0", "0"]) ), @@ -176,7 +178,7 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=df["my_col"], args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_"), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), df_expect=pd.DataFrame( dict( index=["a", "b", "c"], @@ -192,7 +194,7 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=df.index, args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_"), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), df_expect=pd.DataFrame( dict( index=[0, 1, 2], @@ -209,7 +211,9 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=df, args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="my_index", y="_value_", color="my_col_name"), + args_expect=dict( + x="my_index", y="_value_", color="my_col_name", orientation="v" + ), df_expect=pd.DataFrame( dict( my_index=["a", "b", "c"], @@ -223,7 +227,7 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=[[1, 2], [4, 5]], args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_"), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], _value_=[1, 4, 2, 5], _column_=["0", "0", "1", "1"], @@ -235,7 +239,13 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], symbol_col=["q", "r"])), args_in=dict(x=None, y=None, color=None, symbol="symbol_col"), - args_expect=dict(x="index", y="_value_", color="_column_", symbol="symbol_col"), + args_expect=dict( + x="index", + y="_value_", + color="_column_", + symbol="symbol_col", + orientation="v", + ), df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], @@ -258,6 +268,7 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): color="_column_", symbol="symbol_col", custom_data=["symbol_col"], + orientation="v", ), df_expect=pd.DataFrame( dict( @@ -283,6 +294,7 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): color="_column_", symbol="symbol_col", custom_data=["data_col"], + orientation="v", ), df_expect=pd.DataFrame( dict( @@ -299,7 +311,9 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), args_in=dict(x=None, y=None, color=None, symbol=["q", "r"]), - args_expect=dict(x="index", y="_value_", color="_column_", symbol="symbol"), + args_expect=dict( + x="index", y="_value_", color="_column_", symbol="symbol", orientation="v" + ), df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], @@ -314,7 +328,7 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), args_in=dict(x=None, y=None, color="_column_"), - args_expect=dict(x="index", y="_value_", color="_column_"), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], _column_=["a", "a", "b", "b"] @@ -326,7 +340,7 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], color_col=["q", "r"])), args_in=dict(x=None, y=None, color="color_col"), - args_expect=dict(x="index", y="_value_", color="color_col"), + args_expect=dict(x="index", y="_value_", color="color_col", orientation="v"), df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], @@ -341,7 +355,9 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), args_in=dict(x=None, y=None, color=None, symbol="_column_"), - args_expect=dict(x="index", y="_value_", color="_column_", symbol="_column_"), + args_expect=dict( + x="index", y="_value_", color="_column_", symbol="_column_", orientation="v" + ), df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], _column_=["a", "a", "b", "b"], @@ -353,7 +369,13 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): assert_df_and_args( df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], color_col=["q", "r"])), args_in=dict(x=None, y=None, color="color_col", symbol="_column_"), - args_expect=dict(x="index", y="_value_", color="color_col", symbol="_column_"), + args_expect=dict( + x="index", + y="_value_", + color="color_col", + symbol="_column_", + orientation="v", + ), df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], @@ -371,7 +393,11 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): df_in=df, args_in=dict(x=None, y=None, color=None, facet_row="my_col_name"), args_expect=dict( - x="index", y="_value_", color="my_col_name", facet_row="my_col_name" + x="index", + y="_value_", + color="my_col_name", + facet_row="my_col_name", + orientation="v", ), df_expect=pd.DataFrame( dict( @@ -394,6 +420,7 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): y="_value_", color="my_col_name", hover_name="my_index_name", + orientation="v", ), df_expect=pd.DataFrame( dict( @@ -412,7 +439,11 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): df_in=df, args_in=dict(x=None, y=None, color=None, hover_name="_value_"), args_expect=dict( - x="my_index_name", y="_value_", color="my_col_name", hover_name="_value_", + x="my_index_name", + y="_value_", + color="my_col_name", + hover_name="_value_", + orientation="v", ), df_expect=pd.DataFrame( dict( @@ -431,7 +462,11 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): df_in=df, args_in=dict(x=None, y=None, color=None, symbol=px.Constant(1)), args_expect=dict( - x="my_index_name", y="_value_", color="my_col_name", symbol="symbol", + x="my_index_name", + y="_value_", + color="my_col_name", + symbol="symbol", + orientation="v", ), df_expect=pd.DataFrame( dict( From a376c6dd69ca3cd1dbdaab72d03ec7ee91da3e6e Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 10 Apr 2020 16:02:08 -0400 Subject: [PATCH 26/69] smarter x or y behaviour --- .../tests/test_core/test_px/test_px_wide.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 901f14a9747..1ec0cb28cd1 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -21,16 +21,15 @@ def test_wide_mode_external(): assert fig.layout.xaxis.title.text == "index" assert fig.layout.yaxis.title.text == "_value_" assert fig.layout.legend.title.text == "_column_" - if px_fn in [px.area, px.bar]: - fig = px_fn(df, orientation="h") - assert len(fig.data) == 3 - assert list(fig.data[0].y) == [11, 12, 13] - assert list(fig.data[0].x) == [1, 2, 3] - assert list(fig.data[1].y) == [11, 12, 13] - assert list(fig.data[1].x) == [4, 5, 6] - assert fig.layout.yaxis.title.text == "index" - assert fig.layout.xaxis.title.text == "_value_" - assert fig.layout.legend.title.text == "_column_" + fig = px_fn(df, orientation="h") + assert len(fig.data) == 3 + assert list(fig.data[0].y) == [11, 12, 13] + assert list(fig.data[0].x) == [1, 2, 3] + assert list(fig.data[1].y) == [11, 12, 13] + assert list(fig.data[1].x) == [4, 5, 6] + assert fig.layout.yaxis.title.text == "index" + assert fig.layout.xaxis.title.text == "_value_" + assert fig.layout.legend.title.text == "_column_" for px_fn in [px.violin, px.box, px.strip]: fig = px_fn(df) assert len(fig.data) == 1 From 6f3b5c44565debd82990304b5f48361adcf0d393 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 10 Apr 2020 21:46:19 -0400 Subject: [PATCH 27/69] scattergl doesn't support orientation --- packages/python/plotly/plotly/express/_core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 9c82b7383f0..c7ffbb39251 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1565,11 +1565,12 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): and args["animation_frame"] is None ) ): - constructor_to_use = ( - go.Scattergl - if constructor_to_use == go.Scatter - else go.Scatterpolargl - ) + if constructor_to_use == go.Scatter: + constructor_to_use = go.Scattergl + if "orientation" in trace_patch: + del trace_patch["orientation"] + else: + constructor_to_use = go.Scatterpolargl # Create the trace trace = constructor_to_use(name=trace_name) if trace_spec.constructor not in [ From e5b96796b1952f0a62b5775250f9de6e227e955e Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Mon, 13 Apr 2020 23:17:56 -0400 Subject: [PATCH 28/69] move gl switch --- .../python/plotly/plotly/express/_core.py | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index c7ffbb39251..e31860f56eb 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -708,6 +708,21 @@ def frame_args(duration): def make_trace_spec(args, constructor, attrs, trace_patch): + if constructor in [go.Scatter, go.Scatterpolar]: + if "render_mode" in args and ( + args["render_mode"] == "webgl" + or ( + args["render_mode"] == "auto" + and len(args["data_frame"]) > 1000 + and args["animation_frame"] is None + ) + ): + if constructor == go.Scatter: + constructor = go.Scattergl + if "orientation" in trace_patch: + del trace_patch["orientation"] + else: + constructor = go.Scatterpolargl # Create base trace specification result = [TraceSpec(constructor, attrs, trace_patch, None)] @@ -1555,24 +1570,8 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): trace_names = trace_names_by_frame[frame_name] for trace_spec in trace_specs: - constructor_to_use = trace_spec.constructor - if constructor_to_use in [go.Scatter, go.Scatterpolar]: - if "render_mode" in args and ( - args["render_mode"] == "webgl" - or ( - args["render_mode"] == "auto" - and len(args["data_frame"]) > 1000 - and args["animation_frame"] is None - ) - ): - if constructor_to_use == go.Scatter: - constructor_to_use = go.Scattergl - if "orientation" in trace_patch: - del trace_patch["orientation"] - else: - constructor_to_use = go.Scatterpolargl # Create the trace - trace = constructor_to_use(name=trace_name) + trace = trace_spec.constructor(name=trace_name) if trace_spec.constructor not in [ go.Parcats, go.Parcoords, From 6b28eda96cc8d1ec8a7b33efe185afc529202667 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Wed, 15 Apr 2020 13:10:08 -0400 Subject: [PATCH 29/69] fix Pandas warning, use parameterized tests --- .../tests/test_core/test_px/test_px_input.py | 143 +++++++++--------- 1 file changed, 69 insertions(+), 74 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index 848950aa426..7f2ab7606d2 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -4,7 +4,7 @@ import pandas as pd import pytest from plotly.express._core import build_dataframe -from pandas.util.testing import assert_frame_equal +from pandas.testing import assert_frame_equal def test_numpy(): @@ -389,37 +389,38 @@ def test_ranges(): assert "time=" in fig.data[0].hovertemplate -def test_auto_orient(): - categorical = ["a", "a", "b", "b"] +@pytest.mark.parametrize( + "fn", + [px.scatter, px.line, px.area, px.violin, px.box, px.strip] + + [px.bar, px.funnel, px.histogram], +) +@pytest.mark.parametrize( + "x,y,result", + [ + ("numerical", "categorical", "h"), + ("categorical", "numerical", "v"), + ("categorical", "categorical", "v"), + ("numerical", "numerical", "v"), + ("numerical", "none", "h"), + ("categorical", "none", "h"), + ("none", "categorical", "v"), + ("none", "numerical", "v"), + ], +) +def test_auto_orient_x_and_y(fn, x, y, result): + series = dict(categorical=["a", "a", "b", "b"], numerical=[1, 2, 3, 4], none=None) + + if "none" not in [x, y]: + assert fn(x=series[x], y=series[y]).data[0].orientation == result + else: + if fn == px.histogram or (fn == px.bar and "categorical" in [x, y]): + assert fn(x=series[x], y=series[y]).data[0].orientation != result + else: + assert fn(x=series[x], y=series[y]).data[0].orientation == result + + +def test_histogram_auto_orient(): numerical = [1, 2, 3, 4] - - auto_orientable = [px.scatter, px.line, px.area, px.violin, px.box, px.strip] - auto_orientable += [px.bar, px.funnel, px.histogram] - - pattern_x_and_y = [ - (numerical, categorical, "h"), # auto - (categorical, numerical, "v"), # auto/default - (categorical, categorical, "v"), # default - (numerical, numerical, "v"), # default - ] - for fn in auto_orientable: - for x, y, result in pattern_x_and_y: - assert fn(x=x, y=y).data[0].orientation == result - - pattern_x_or_y = [ - (numerical, None, "h"), # auto - (categorical, None, "h"), # auto - (None, categorical, "v"), # auto/default - (None, numerical, "v"), # auto/default - ] - - for fn in auto_orientable: - for x, y, result in pattern_x_or_y: - if fn == px.histogram or (fn == px.bar and categorical in [x, y]): - assert fn(x=x, y=y).data[0].orientation != result - else: - assert fn(x=x, y=y).data[0].orientation == result - assert px.histogram(x=numerical, nbins=5).data[0].nbinsx == 5 assert px.histogram(y=numerical, nbins=5).data[0].nbinsy == 5 assert px.histogram(x=numerical, y=numerical, nbins=5).data[0].nbinsx == 5 @@ -437,7 +438,21 @@ def test_auto_histfunc(): assert px.density_heatmap(x=a, y=a, z=a, histfunc="avg").data[0].histfunc == "avg" -def test_auto_boxlike_overlay(): +@pytest.mark.parametrize( + "fn,mode", [(px.violin, "violinmode"), (px.box, "boxmode"), (px.strip, "boxmode")] +) +@pytest.mark.parametrize( + "x,y,color,result", + [ + ("categorical1", "numerical", None, "group"), + ("categorical1", "numerical", "categorical2", "group"), + ("categorical1", "numerical", "categorical1", "overlay"), + ("numerical", "categorical1", None, "group"), + ("numerical", "categorical1", "categorical2", "group"), + ("numerical", "categorical1", "categorical1", "overlay"), + ], +) +def test_auto_boxlike_overlay(fn, mode, x, y, color, result): df = pd.DataFrame( dict( categorical1=["a", "a", "b", "b"], @@ -445,28 +460,11 @@ def test_auto_boxlike_overlay(): numerical=[1, 2, 3, 4], ) ) - - pattern = [ - ("categorical1", "numerical", None, "group"), - ("categorical1", "numerical", "categorical2", "group"), - ("categorical1", "numerical", "categorical1", "overlay"), - ("numerical", "categorical1", None, "group"), - ("numerical", "categorical1", "categorical2", "group"), - ("numerical", "categorical1", "categorical1", "overlay"), - ] - - fn_and_mode = [ - (px.violin, "violinmode"), - (px.box, "boxmode"), - (px.strip, "boxmode"), - ] - - for fn, mode in fn_and_mode: - for x, y, color, result in pattern: - assert fn(df, x=x, y=y, color=color).layout[mode] == result + assert fn(df, x=x, y=y, color=color).layout[mode] == result -def test_x_or_y(): +@pytest.mark.parametrize("fn", [px.scatter, px.line, px.area, px.bar]) +def test_x_or_y(fn): categorical = ["a", "a", "b", "b"] numerical = [1, 2, 3, 4] constant = [1, 1, 1, 1] @@ -474,28 +472,25 @@ def test_x_or_y(): index = [11, 12, 13, 14] numerical_df = pd.DataFrame(dict(col=numerical), index=index) categorical_df = pd.DataFrame(dict(col=categorical), index=index) - scatter_like = [px.scatter, px.line, px.area] - bar_like = [px.bar] - - for fn in scatter_like + bar_like: - fig = fn(x=numerical) - assert list(fig.data[0].x) == numerical - assert list(fig.data[0].y) == range_4 - assert fig.data[0].orientation == "h" - fig = fn(y=numerical) - assert list(fig.data[0].x) == range_4 - assert list(fig.data[0].y) == numerical - assert fig.data[0].orientation == "v" - fig = fn(numerical_df, x="col") - assert list(fig.data[0].x) == numerical - assert list(fig.data[0].y) == index - assert fig.data[0].orientation == "h" - fig = fn(numerical_df, y="col") - assert list(fig.data[0].x) == index - assert list(fig.data[0].y) == numerical - assert fig.data[0].orientation == "v" - for fn in scatter_like: + fig = fn(x=numerical) + assert list(fig.data[0].x) == numerical + assert list(fig.data[0].y) == range_4 + assert fig.data[0].orientation == "h" + fig = fn(y=numerical) + assert list(fig.data[0].x) == range_4 + assert list(fig.data[0].y) == numerical + assert fig.data[0].orientation == "v" + fig = fn(numerical_df, x="col") + assert list(fig.data[0].x) == numerical + assert list(fig.data[0].y) == index + assert fig.data[0].orientation == "h" + fig = fn(numerical_df, y="col") + assert list(fig.data[0].x) == index + assert list(fig.data[0].y) == numerical + assert fig.data[0].orientation == "v" + + if fn != px.bar: fig = fn(x=categorical) assert list(fig.data[0].x) == categorical assert list(fig.data[0].y) == range_4 @@ -513,7 +508,7 @@ def test_x_or_y(): assert list(fig.data[0].y) == categorical assert fig.data[0].orientation == "v" - for fn in bar_like: + else: fig = fn(x=categorical) assert list(fig.data[0].x) == categorical assert list(fig.data[0].y) == constant From b96f8c15569657a565f47bbf5d989ee3a2c5ef40 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Wed, 15 Apr 2020 16:54:33 -0400 Subject: [PATCH 30/69] fix Pandas warning, use parameterized tests --- .../tests/test_core/test_px/test_px_wide.py | 99 ++++++++----------- 1 file changed, 42 insertions(+), 57 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 1ec0cb28cd1..2d0ec350c54 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -2,7 +2,8 @@ import plotly.graph_objects as go import pandas as pd from plotly.express._core import build_dataframe -from pandas.util.testing import assert_frame_equal +from pandas.testing import assert_frame_equal +import pytest def test_wide_mode_external(): @@ -76,63 +77,47 @@ def test_wide_mode_labels_external(): assert fig.layout.legend.title.text == "my column" -def test_wide_mode_internal(): - # here we do basic exhaustive testing of the various graph_object permutations - # via build_dataframe directly, which leads to more compact test code: - # we pass in args (which includes df) and look at how build_dataframe mutates - # both args and the df, and assume that since the rest of the downstream PX - # machinery has not wide-mode-specific code, and the tests above pass, that this is - # enough to prove things work - +# here we do basic exhaustive testing of the various graph_object permutations +# via build_dataframe directly, which leads to more compact test code: +# we pass in args (which includes df) and look at how build_dataframe mutates +# both args and the df, and assume that since the rest of the downstream PX +# machinery has not wide-mode-specific code, and the tests above pass, that this is +# enough to prove things work +@pytest.mark.parametrize( + "trace_type,x,y,color", + [ + (go.Scatter, "index", "_value_", "_column_"), + (go.Bar, "index", "_value_", "_column_"), + (go.Box, "_column_", "_value_", None), + (go.Violin, "_column_", "_value_", None), + (go.Histogram, "_value_", None, "_column_"), + ], +) +@pytest.mark.parametrize( + "orientation", [None, "v", "h"], +) +def test_wide_mode_internal(trace_type, x, y, color, orientation): df_in = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]), index=[11, 12, 13]) - - def extract_and_check_df(args_out): - df_out = args_out.pop("data_frame") - assert_frame_equal( - df_out.sort_index(axis=1), - pd.DataFrame( - dict( - index=[11, 12, 13, 11, 12, 13], - _column_=["a", "a", "a", "b", "b", "b"], - _value_=[1, 2, 3, 4, 5, 6], - ) - ).sort_index(axis=1), - ) - return args_out - - for trace_type in [go.Scatter, go.Bar]: - args_in = dict(data_frame=df_in.copy(), color=None) - args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) - assert args_out == dict( - x="index", y="_value_", color="_column_", orientation="v" - ) - - # now we check with orientation - args_in = dict(data_frame=df_in.copy(), color=None, orientation="h") - args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) - assert args_out == dict( - y="index", x="_value_", color="_column_", orientation="h" - ) - - for trace_type in [go.Violin, go.Box]: - args_in = dict(data_frame=df_in.copy(), color=None) - args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) - assert args_out == dict(x="_column_", y="_value_", color=None, orientation="v") - - # now we check with orientation - args_in = dict(data_frame=df_in.copy(), color=None, orientation="h") - args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) - assert args_out == dict(y="_column_", x="_value_", color=None, orientation="h") - - for trace_type in [go.Histogram]: - args_in = dict(data_frame=df_in.copy(), color=None) - args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) - assert args_out == dict(x="_value_", color="_column_", orientation="v") - - # now we check with orientation - args_in = dict(data_frame=df_in.copy(), color=None, orientation="h") - args_out = extract_and_check_df(build_dataframe(args_in, trace_type)) - assert args_out == dict(y="_value_", color="_column_", orientation="h") + args_in = dict(data_frame=df_in, color=None, orientation=orientation) + args_out = build_dataframe(args_in, trace_type) + df_out = args_out.pop("data_frame") + assert_frame_equal( + df_out.sort_index(axis=1), + pd.DataFrame( + dict( + index=[11, 12, 13, 11, 12, 13], + _column_=["a", "a", "a", "b", "b", "b"], + _value_=[1, 2, 3, 4, 5, 6], + ) + ).sort_index(axis=1), + ) + for arg in ["x", "y"]: + if arg not in args_out: + args_out[arg] = None # so this doesn't fail for histogram + if orientation is None or orientation == "v": + assert args_out == dict(x=x, y=y, color=color, orientation="v") + else: + assert args_out == dict(x=y, y=x, color=color, orientation="h") def test_wide_mode_internal_special_cases(): From 7e129b203a7ffe810d999b0fd0ddbc0718486c73 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Wed, 15 Apr 2020 21:49:18 -0400 Subject: [PATCH 31/69] align bar and histogram behaviours in wide mode with categorical values --- .../python/plotly/plotly/express/_core.py | 20 +++++++++---- .../tests/test_core/test_px/test_px_wide.py | 29 +++++++++++++++++++ 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index e31860f56eb..73d6a3a8001 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1114,13 +1114,13 @@ def build_dataframe(args, constructor): if missing_bar_dim and constructor == go.Bar: # now that we've populated df_output, we check to see if the non-missing - # dimensio is categorical: if so, then setting the missing dimension to a + # dimension is categorical: if so, then setting the missing dimension to a # constant 1 is a less-insane thing to do than setting it to the index by # default and we let the normal auto-orientation-code do its thing later other_dim = "x" if missing_bar_dim == "y" else "y" if not _is_continuous(df_output, args[other_dim]): - args[missing_bar_dim] = missing_bar_dim - constants[missing_bar_dim] = 1 + args[missing_bar_dim] = "_count_" + constants["_count_"] = 1 else: # on the other hand, if the non-missing dimension is continuous, then we # can use this information to override the normal auto-orientation code @@ -1146,14 +1146,24 @@ def build_dataframe(args, constructor): df_output[var_name] = df_output[var_name].astype(str) args["orientation"] = args.get("orientation", None) or "v" orient_v = args["orientation"] == "v" - if constructor in [go.Scatter, go.Bar]: + if constructor == go.Scatter: args["x" if orient_v else "y"] = index_name args["y" if orient_v else "x"] = "_value_" args["color"] = args["color"] or var_name + if constructor == go.Bar: + if _is_continuous(df_output, "_value_"): + args["x" if orient_v else "y"] = index_name + args["y" if orient_v else "x"] = "_value_" + args["color"] = args["color"] or var_name + else: + args["x" if orient_v else "y"] = "_value_" + args["y" if orient_v else "x"] = "_count_" + df_output["_count_"] = 1 + args["color"] = args["color"] or var_name if constructor in [go.Violin, go.Box]: args["x" if orient_v else "y"] = var_name args["y" if orient_v else "x"] = "_value_" - if constructor in [go.Histogram]: + if constructor == go.Histogram: args["x" if orient_v else "y"] = "_value_" args["color"] = args["color"] or var_name diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 2d0ec350c54..6a8dddb6ae5 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -120,6 +120,35 @@ def test_wide_mode_internal(trace_type, x, y, color, orientation): assert args_out == dict(x=y, y=x, color=color, orientation="h") +@pytest.mark.parametrize( + "orientation", [None, "v", "h"], +) +def test_wide_mode_internal_bar_exception(orientation): + df_in = pd.DataFrame(dict(a=["q", "r", "s"], b=["t", "u", "v"]), index=[11, 12, 13]) + args_in = dict(data_frame=df_in, color=None, orientation=orientation) + args_out = build_dataframe(args_in, go.Bar) + df_out = args_out.pop("data_frame") + assert_frame_equal( + df_out.sort_index(axis=1), + pd.DataFrame( + dict( + index=[11, 12, 13, 11, 12, 13], + _column_=["a", "a", "a", "b", "b", "b"], + _value_=["q", "r", "s", "t", "u", "v"], + _count_=[1, 1, 1, 1, 1, 1], + ) + ).sort_index(axis=1), + ) + if orientation is None or orientation == "v": + assert args_out == dict( + x="_value_", y="_count_", color="_column_", orientation="v" + ) + else: + assert args_out == dict( + x="_count_", y="_value_", color="_column_", orientation="h" + ) + + def test_wide_mode_internal_special_cases(): # given all of the above tests, and given that the melt() code is not sensitive # to the trace type, we can do all sorts of special-case testing just by focusing From 9d45dc8e05ba7cf0b0e0a346bf7b1941ab4811e4 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 16 Apr 2020 14:11:46 -0400 Subject: [PATCH 32/69] wip --- .../python/plotly/plotly/express/_core.py | 52 ++++++++++++++----- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 73d6a3a8001..bc8d8e3c34c 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -23,7 +23,7 @@ + ["ids", "error_x", "error_x_minus", "error_y", "error_y_minus", "error_z"] + ["error_z_minus", "lat", "lon", "locations", "animation_group"] ) -array_attrables = ["dimensions", "custom_data", "hover_data", "path"] +array_attrables = ["dimensions", "custom_data", "hover_data", "path", "wide_cols"] group_attrables = ["animation_frame", "facet_row", "facet_col", "line_group"] renameable_group_attrables = [ "color", # renamed to marker.color or line.color in infer_config @@ -942,7 +942,6 @@ def build_dataframe(args, constructor): df_provided = args["data_frame"] is not None if df_provided and not isinstance(args["data_frame"], pd.DataFrame): args["data_frame"] = pd.DataFrame(args["data_frame"]) - df_input = args["data_frame"] no_x = args.get("x", None) is None @@ -952,10 +951,35 @@ def build_dataframe(args, constructor): wide_id_vars = set() if wide_mode: - df_output = df_input - var_name = df_output.columns.name or "_column_" - else: - df_output = pd.DataFrame() + # currently assuming that df_provided == True + args["wide_cols"] = [df_input.index] + list(df_input.columns) + var_name = df_input.columns.name or "_column_" + index_name = df_input.index.name or "index" + wide_id_vars.add(index_name) + + """ + wide_x detection + - if scalar = False + - else if list of lists = True + - else if not df_provided = False + - else if contents are unique and are contained in columns = True + - else = False + + + wide detection: + - if no_x and no_y = wide mode + - else if wide_x and wide_y = error + - else if wide_x xor wide_y = wide mode + - else = long mode + + so what we want is: + - y = [col col] -> melt just those + - x = [col col] -> melt just those but swap the orientation? except in hist mode + - y = [col col] / x=col -> melt just those and force x to not be the index ... what about hist + - y = [col col] / x=[col col] -> error + """ + + df_output = pd.DataFrame() missing_bar_dim = None if constructor in [go.Scatter, go.Bar] and (no_x != no_y): @@ -1110,7 +1134,8 @@ def build_dataframe(args, constructor): args[field_name] = str(col_name) else: args[field_name][i] = str(col_name) - wide_id_vars.add(str(col_name)) + if field_name != "wide_cols": + wide_id_vars.add(str(col_name)) if missing_bar_dim and constructor == go.Bar: # now that we've populated df_output, we check to see if the non-missing @@ -1134,14 +1159,13 @@ def build_dataframe(args, constructor): df_output[col_name] = constants[col_name] if wide_mode: - # TODO multi-level index - # TODO multi-level columns - index_name = df_output.index.name or "index" - wide_id_vars.add(index_name) - if index_name not in df_output.columns: - df_output = df_output.reset_index() + wide_value_vars = [c for c in args["wide_cols"] if c not in wide_id_vars] + del args["wide_cols"] df_output = df_output.melt( - id_vars=wide_id_vars, var_name=var_name, value_name="_value_" + id_vars=wide_id_vars, + value_vars=wide_value_vars, + var_name=var_name, + value_name="_value_", ) df_output[var_name] = df_output[var_name].astype(str) args["orientation"] = args.get("orientation", None) or "v" From f2a00793242dd1091eca0458ce86eca109b1bbcf Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 16 Apr 2020 16:39:03 -0400 Subject: [PATCH 33/69] wip --- .../python/plotly/plotly/express/_core.py | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index bc8d8e3c34c..b4d74b7500b 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -19,7 +19,7 @@ # Declare all supported attributes, across all plot types direct_attrables = ( ["x", "y", "z", "a", "b", "c", "r", "theta", "size"] - + ["hover_name", "text", "names", "values", "parents"] + + ["hover_name", "text", "names", "values", "parents", "wide_cross"] + ["ids", "error_x", "error_x_minus", "error_y", "error_y_minus", "error_z"] + ["error_z_minus", "lat", "lon", "locations", "animation_group"] ) @@ -952,10 +952,11 @@ def build_dataframe(args, constructor): if wide_mode: # currently assuming that df_provided == True - args["wide_cols"] = [df_input.index] + list(df_input.columns) + args["wide_cols"] = list(df_input.columns) + args["wide_cross"] = df_input.index var_name = df_input.columns.name or "_column_" - index_name = df_input.index.name or "index" - wide_id_vars.add(index_name) + wide_orientation = args.get("orientation", None) or "v" + args["orientation"] = wide_orientation """ wide_x detection @@ -973,10 +974,11 @@ def build_dataframe(args, constructor): - else = long mode so what we want is: - - y = [col col] -> melt just those - - x = [col col] -> melt just those but swap the orientation? except in hist mode - - y = [col col] / x=col -> melt just those and force x to not be the index ... what about hist + - y = [col col] -> melt just those, wide_orientation = 'v'/no override, cross_dim = index or range + - y = [col col] / x=col -> wide_orientation = 'h'/no override, cross_dim = x - y = [col col] / x=[col col] -> error + + need to merge wide logic into no_x/no_y logic below for range() etc """ df_output = pd.DataFrame() @@ -1161,6 +1163,8 @@ def build_dataframe(args, constructor): if wide_mode: wide_value_vars = [c for c in args["wide_cols"] if c not in wide_id_vars] del args["wide_cols"] + wide_cross = args["wide_cross"] + del args["wide_cross"] df_output = df_output.melt( id_vars=wide_id_vars, value_vars=wide_value_vars, @@ -1168,15 +1172,15 @@ def build_dataframe(args, constructor): value_name="_value_", ) df_output[var_name] = df_output[var_name].astype(str) - args["orientation"] = args.get("orientation", None) or "v" - orient_v = args["orientation"] == "v" + orient_v = wide_orientation == "v" + if constructor == go.Scatter: - args["x" if orient_v else "y"] = index_name + args["x" if orient_v else "y"] = wide_cross args["y" if orient_v else "x"] = "_value_" args["color"] = args["color"] or var_name if constructor == go.Bar: if _is_continuous(df_output, "_value_"): - args["x" if orient_v else "y"] = index_name + args["x" if orient_v else "y"] = wide_cross args["y" if orient_v else "x"] = "_value_" args["color"] = args["color"] or var_name else: From 977dbcf7bed26eb1b016f65e192527287d982c08 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 17 Apr 2020 11:29:41 -0400 Subject: [PATCH 34/69] wip wide_y --- .../python/plotly/plotly/express/_core.py | 145 ++++++++++-------- .../tests/test_core/test_px/__init__.py | 0 .../tests/test_core/test_px/test_px_wide.py | 75 +++++++-- 3 files changed, 145 insertions(+), 75 deletions(-) create mode 100644 packages/python/plotly/plotly/tests/test_core/test_px/__init__.py diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index b4d74b7500b..560ecd0d0f1 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -23,7 +23,7 @@ + ["ids", "error_x", "error_x_minus", "error_y", "error_y_minus", "error_z"] + ["error_z_minus", "lat", "lon", "locations", "animation_group"] ) -array_attrables = ["dimensions", "custom_data", "hover_data", "path", "wide_cols"] +array_attrables = ["dimensions", "custom_data", "hover_data", "path", "_column_"] group_attrables = ["animation_frame", "facet_row", "facet_col", "line_group"] renameable_group_attrables = [ "color", # renamed to marker.color or line.color in infer_config @@ -913,6 +913,27 @@ def _get_reserved_col_names(args): return reserved_names +def _is_col_list(df_input, arg): + if arg is None or isinstance(arg, str) or isinstance(arg, int): + return False + if isinstance(arg, pd.MultiIndex): + return False # just to keep existing behaviour for now + try: + iter(arg) + except TypeError: + return False # not iterable + for c in arg: + if isinstance(c, str) or isinstance(c, int): + if df_input is None or c not in df_input.columns: + return False + else: + try: + iter(c) + except TypeError: + return False # not iterable + return True + + def build_dataframe(args, constructor): """ Constructs a dataframe and modifies `args` in-place. @@ -946,60 +967,60 @@ def build_dataframe(args, constructor): no_x = args.get("x", None) is None no_y = args.get("y", None) is None - wideable = [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram] - wide_mode = df_provided and no_x and no_y and constructor in wideable - wide_id_vars = set() + wide_x = False if no_x else _is_col_list(df_input, args["x"]) + wide_y = False if no_y else _is_col_list(df_input, args["y"]) - if wide_mode: - # currently assuming that df_provided == True - args["wide_cols"] = list(df_input.columns) - args["wide_cross"] = df_input.index - var_name = df_input.columns.name or "_column_" - wide_orientation = args.get("orientation", None) or "v" - args["orientation"] = wide_orientation + wide_mode = False + if constructor in [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram]: + wide_cross_name = None + if wide_x and wide_y: + raise ValueError( + "Cannot accept list of column references or list of columns for both `x` and `y`." + ) + if df_provided and no_x and no_y: + wide_mode = True + args["_column_"] = list(df_input.columns) + var_name = df_input.columns.name or "_column_" + wide_orientation = args.get("orientation", None) or "v" + args["orientation"] = wide_orientation + args["wide_cross"] = None + elif wide_x != wide_y: + wide_mode = True + args["_column_"] = args["y"] if wide_y else args["x"] + var_name = "_column_" + if constructor == go.Histogram: + wide_orientation = "v" if wide_x else "h" + else: + wide_orientation = "v" if wide_y else "h" + args["y" if wide_y else "x"] = None + args["wide_cross"] = None + if not no_x and not no_y: + wide_cross_name = "__x__" if wide_y else "__y__" - """ - wide_x detection - - if scalar = False - - else if list of lists = True - - else if not df_provided = False - - else if contents are unique and are contained in columns = True - - else = False - - - wide detection: - - if no_x and no_y = wide mode - - else if wide_x and wide_y = error - - else if wide_x xor wide_y = wide mode - - else = long mode - - so what we want is: - - y = [col col] -> melt just those, wide_orientation = 'v'/no override, cross_dim = index or range - - y = [col col] / x=col -> wide_orientation = 'h'/no override, cross_dim = x - - y = [col col] / x=[col col] -> error - - need to merge wide logic into no_x/no_y logic below for range() etc - """ + missing_bar_dim = None + if constructor in [go.Scatter, go.Bar]: + if not wide_mode and (no_x != no_y): + for ax in ["x", "y"]: + if args.get(ax, None) is None: + args[ax] = df_input.index if df_provided else Range() + if constructor == go.Scatter: + if args["orientation"] is None: + args["orientation"] = "v" if ax == "x" else "h" + if constructor == go.Bar: + missing_bar_dim = ax + if wide_mode and wide_cross_name is None: + if df_provided: + args["wide_cross"] = df_input.index + wide_cross_name = df_input.index.name or "index" + else: + args["wide_cross"] = Range(label="index") + wide_cross_name = "index" df_output = pd.DataFrame() - - missing_bar_dim = None - if constructor in [go.Scatter, go.Bar] and (no_x != no_y): - for ax in ["x", "y"]: - if args.get(ax, None) is None: - args[ax] = df_input.index if df_provided else Range() - if constructor == go.Scatter: - if args["orientation"] is None: - args["orientation"] = "v" if ax == "x" else "h" - if constructor == go.Bar: - missing_bar_dim = ax - - # Initialize set of column names - # These are reserved names - if df_provided: - reserved_names = _get_reserved_col_names(args) - else: - reserved_names = set() + constants = dict() + ranges = list() + wide_id_vars = set() + reserved_names = _get_reserved_col_names(args) if df_provided else set() # Case of functions with a "dimensions" kw: scatter_matrix, parcats, parcoords if "dimensions" in args and args["dimensions"] is None: @@ -1010,8 +1031,6 @@ def build_dataframe(args, constructor): else: df_output[df_input.columns] = df_input[df_input.columns] - constants = dict() - ranges = list() # Loop over possible arguments for field_name in all_attrables: @@ -1136,10 +1155,10 @@ def build_dataframe(args, constructor): args[field_name] = str(col_name) else: args[field_name][i] = str(col_name) - if field_name != "wide_cols": + if field_name != "_column_": wide_id_vars.add(str(col_name)) - if missing_bar_dim and constructor == go.Bar: + if not wide_mode and missing_bar_dim and constructor == go.Bar: # now that we've populated df_output, we check to see if the non-missing # dimension is categorical: if so, then setting the missing dimension to a # constant 1 is a less-insane thing to do than setting it to the index by @@ -1161,9 +1180,8 @@ def build_dataframe(args, constructor): df_output[col_name] = constants[col_name] if wide_mode: - wide_value_vars = [c for c in args["wide_cols"] if c not in wide_id_vars] - del args["wide_cols"] - wide_cross = args["wide_cross"] + wide_value_vars = [c for c in args["_column_"] if c not in wide_id_vars] + del args["_column_"] del args["wide_cross"] df_output = df_output.melt( id_vars=wide_id_vars, @@ -1173,14 +1191,18 @@ def build_dataframe(args, constructor): ) df_output[var_name] = df_output[var_name].astype(str) orient_v = wide_orientation == "v" + if wide_cross_name == "__x__": + wide_cross_name = args["x"] + if wide_cross_name == "__y__": + wide_cross_name = args["y"] if constructor == go.Scatter: - args["x" if orient_v else "y"] = wide_cross + args["x" if orient_v else "y"] = wide_cross_name args["y" if orient_v else "x"] = "_value_" args["color"] = args["color"] or var_name if constructor == go.Bar: if _is_continuous(df_output, "_value_"): - args["x" if orient_v else "y"] = wide_cross + args["x" if orient_v else "y"] = wide_cross_name args["y" if orient_v else "x"] = "_value_" args["color"] = args["color"] or var_name else: @@ -1189,10 +1211,11 @@ def build_dataframe(args, constructor): df_output["_count_"] = 1 args["color"] = args["color"] or var_name if constructor in [go.Violin, go.Box]: - args["x" if orient_v else "y"] = var_name + args["x" if orient_v else "y"] = wide_cross_name or var_name args["y" if orient_v else "x"] = "_value_" if constructor == go.Histogram: args["x" if orient_v else "y"] = "_value_" + args["y" if orient_v else "x"] = wide_cross_name args["color"] = args["color"] or var_name args["data_frame"] = df_output diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/__init__.py b/packages/python/plotly/plotly/tests/test_core/test_px/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 6a8dddb6ae5..0e596a3a17c 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -1,11 +1,50 @@ import plotly.express as px import plotly.graph_objects as go import pandas as pd -from plotly.express._core import build_dataframe +from plotly.express._core import build_dataframe, _is_col_list from pandas.testing import assert_frame_equal import pytest +def test_is_col_list(): + df_input = pd.DataFrame(dict(a=[1, 2], b=[1, 2])) + assert _is_col_list(df_input, ["a"]) + assert _is_col_list(df_input, ["a", "b"]) + assert _is_col_list(df_input, [[3, 4]]) + assert _is_col_list(df_input, [[3, 4], [3, 4]]) + assert not _is_col_list(df_input, pytest) + assert not _is_col_list(df_input, False) + assert not _is_col_list(df_input, ["a", 1]) + assert not _is_col_list(df_input, "a") + assert not _is_col_list(df_input, 1) + assert not _is_col_list(df_input, ["a", "b", "c"]) + assert not _is_col_list(df_input, [1, 2]) + df_input = pd.DataFrame([[1, 2], [1, 2]]) + assert _is_col_list(df_input, [0]) + assert _is_col_list(df_input, [0, 1]) + assert _is_col_list(df_input, [[3, 4]]) + assert _is_col_list(df_input, [[3, 4], [3, 4]]) + assert not _is_col_list(df_input, pytest) + assert not _is_col_list(df_input, False) + assert not _is_col_list(df_input, ["a", 1]) + assert not _is_col_list(df_input, "a") + assert not _is_col_list(df_input, 1) + assert not _is_col_list(df_input, [0, 1, 2]) + assert not _is_col_list(df_input, ["a", "b"]) + df_input = None + assert _is_col_list(df_input, [[3, 4]]) + assert _is_col_list(df_input, [[3, 4], [3, 4]]) + assert not _is_col_list(df_input, [0]) + assert not _is_col_list(df_input, [0, 1]) + assert not _is_col_list(df_input, pytest) + assert not _is_col_list(df_input, False) + assert not _is_col_list(df_input, ["a", 1]) + assert not _is_col_list(df_input, "a") + assert not _is_col_list(df_input, 1) + assert not _is_col_list(df_input, [0, 1, 2]) + assert not _is_col_list(df_input, ["a", "b"]) + + def test_wide_mode_external(): # here we test this feature "black box" style by calling actual PX functions and # inspecting the figure... this is important but clunky, and is mostly a smoke test @@ -101,25 +140,35 @@ def test_wide_mode_internal(trace_type, x, y, color, orientation): args_in = dict(data_frame=df_in, color=None, orientation=orientation) args_out = build_dataframe(args_in, trace_type) df_out = args_out.pop("data_frame") + expected = dict( + _column_=["a", "a", "a", "b", "b", "b"], _value_=[1, 2, 3, 4, 5, 6], + ) + if x == "index": + expected["index"] = [11, 12, 13, 11, 12, 13] assert_frame_equal( - df_out.sort_index(axis=1), - pd.DataFrame( - dict( - index=[11, 12, 13, 11, 12, 13], - _column_=["a", "a", "a", "b", "b", "b"], - _value_=[1, 2, 3, 4, 5, 6], - ) - ).sort_index(axis=1), + df_out.sort_index(axis=1), pd.DataFrame(expected).sort_index(axis=1), ) - for arg in ["x", "y"]: - if arg not in args_out: - args_out[arg] = None # so this doesn't fail for histogram if orientation is None or orientation == "v": assert args_out == dict(x=x, y=y, color=color, orientation="v") else: assert args_out == dict(x=y, y=x, color=color, orientation="h") +def test_wide_x_or_y(): + args_in = dict(data_frame=None, y=[[1, 2], [3, 4]], color=None, orientation=None) + args_out = build_dataframe(args_in, go.Scatter) + df_out = args_out.pop("data_frame") + expected = dict( + _column_=["_column__0", "_column__0", "_column__1", "_column__1"], + _value_=[1, 2, 3, 4], + # x=["a", "b", "a", "b"], + index=[0, 1, 0, 1], + ) + assert_frame_equal( + df_out.sort_index(axis=1), pd.DataFrame(expected).sort_index(axis=1), + ) + + @pytest.mark.parametrize( "orientation", [None, "v", "h"], ) @@ -159,8 +208,6 @@ def assert_df_and_args(df_in, args_in, args_expect, df_expect): args_in["data_frame"] = df_in args_out = build_dataframe(args_in, go.Scatter) df_out = args_out.pop("data_frame") - # print(df_out.info()) - # print(df_expect.info()) assert_frame_equal( df_out.sort_index(axis=1), df_expect.sort_index(axis=1), ) From 553bd0b95ea457dc204a45ba9bd8961e2ba5dddb Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 17 Apr 2020 16:52:37 -0400 Subject: [PATCH 35/69] wip --- .../python/plotly/plotly/data/__init__.py | 29 ++- .../python/plotly/plotly/express/_core.py | 200 ++++++++++-------- .../package_data/datasets/experiment.csv.gz | Bin 0 -> 3114 bytes .../package_data/datasets/timeseries.csv.gz | Bin 0 -> 6165 bytes .../tests/test_core/test_px/test_px_wide.py | 9 + 5 files changed, 149 insertions(+), 89 deletions(-) create mode 100644 packages/python/plotly/plotly/package_data/datasets/experiment.csv.gz create mode 100644 packages/python/plotly/plotly/package_data/datasets/timeseries.csv.gz diff --git a/packages/python/plotly/plotly/data/__init__.py b/packages/python/plotly/plotly/data/__init__.py index dfedcfd28a7..cf51e3b6595 100644 --- a/packages/python/plotly/plotly/data/__init__.py +++ b/packages/python/plotly/plotly/data/__init__.py @@ -102,7 +102,31 @@ def carshare(): return _get_dataset("carshare") -def _get_dataset(d): +def timeseries(): + """ +Each row in this wide dataset represents values from 6 random walk time-series. The +index contains dates. + +Returns: + A `pandas.DataFrame` with 100 rows and the following columns: + `['MOL.AA', 'JJK.OB', 'LFF.KP', 'UJS.PX', 'BTK.HH', 'SHX.QQ']`. +""" + return _get_dataset("timeseries", index_col=0) + + +def experiment(): + """ +Each row in this wide dataset represents the results of 100 simulated participants +on three hypothetical experiments, along with their gender and smoker status. + +Returns: + A `pandas.DataFrame` with 100 rows and the following columns: + `['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'smoker']`. +""" + return _get_dataset("experiment") + + +def _get_dataset(d, index_col=None): import pandas import os @@ -112,5 +136,6 @@ def _get_dataset(d): "package_data", "datasets", d + ".csv.gz", - ) + ), + index_col=index_col, ) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 560ecd0d0f1..583b4ed271a 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -934,88 +934,16 @@ def _is_col_list(df_input, arg): return True -def build_dataframe(args, constructor): +def process_args_into_dataframe(args, wide_mode, var_name): """ - Constructs a dataframe and modifies `args` in-place. - - The argument values in `args` can be either strings corresponding to - existing columns of a dataframe, or data arrays (lists, numpy arrays, - pandas columns, series). - - Parameters - ---------- - args : OrderedDict - arguments passed to the px function and subsequently modified - constructor : graph_object trace class - the trace type selected for this figure + After this function runs, the `all_attrables` keys of `args` all contain only + references to columns of `df_output`. This function handles the extraction of data + from `args["attrable"]` and column-name-generation as appropriate, and adds the + data to `df_output` and then replaces `args["attrable"]` with the appropriate + reference. """ - - # make copies of all the fields via dict() and list() - for field in args: - if field in array_attrables and args[field] is not None: - args[field] = ( - dict(args[field]) - if isinstance(args[field], dict) - else list(args[field]) - ) - - # Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.) - df_provided = args["data_frame"] is not None - if df_provided and not isinstance(args["data_frame"], pd.DataFrame): - args["data_frame"] = pd.DataFrame(args["data_frame"]) df_input = args["data_frame"] - - no_x = args.get("x", None) is None - no_y = args.get("y", None) is None - wide_x = False if no_x else _is_col_list(df_input, args["x"]) - wide_y = False if no_y else _is_col_list(df_input, args["y"]) - - wide_mode = False - if constructor in [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram]: - wide_cross_name = None - if wide_x and wide_y: - raise ValueError( - "Cannot accept list of column references or list of columns for both `x` and `y`." - ) - if df_provided and no_x and no_y: - wide_mode = True - args["_column_"] = list(df_input.columns) - var_name = df_input.columns.name or "_column_" - wide_orientation = args.get("orientation", None) or "v" - args["orientation"] = wide_orientation - args["wide_cross"] = None - elif wide_x != wide_y: - wide_mode = True - args["_column_"] = args["y"] if wide_y else args["x"] - var_name = "_column_" - if constructor == go.Histogram: - wide_orientation = "v" if wide_x else "h" - else: - wide_orientation = "v" if wide_y else "h" - args["y" if wide_y else "x"] = None - args["wide_cross"] = None - if not no_x and not no_y: - wide_cross_name = "__x__" if wide_y else "__y__" - - missing_bar_dim = None - if constructor in [go.Scatter, go.Bar]: - if not wide_mode and (no_x != no_y): - for ax in ["x", "y"]: - if args.get(ax, None) is None: - args[ax] = df_input.index if df_provided else Range() - if constructor == go.Scatter: - if args["orientation"] is None: - args["orientation"] = "v" if ax == "x" else "h" - if constructor == go.Bar: - missing_bar_dim = ax - if wide_mode and wide_cross_name is None: - if df_provided: - args["wide_cross"] = df_input.index - wide_cross_name = df_input.index.name or "index" - else: - args["wide_cross"] = Range(label="index") - wide_cross_name = "index" - + df_provided = df_input is not None df_output = pd.DataFrame() constants = dict() ranges = list() @@ -1031,7 +959,6 @@ def build_dataframe(args, constructor): else: df_output[df_input.columns] = df_input[df_input.columns] - # Loop over possible arguments for field_name in all_attrables: # Massaging variables @@ -1158,6 +1085,108 @@ def build_dataframe(args, constructor): if field_name != "_column_": wide_id_vars.add(str(col_name)) + for col_name in ranges: + df_output[col_name] = range(len(df_output)) + + for col_name in constants: + df_output[col_name] = constants[col_name] + + return df_output, wide_id_vars + + +def build_dataframe(args, constructor): + """ + Constructs a dataframe and modifies `args` in-place. + + The argument values in `args` can be either strings corresponding to + existing columns of a dataframe, or data arrays (lists, numpy arrays, + pandas columns, series). + + Parameters + ---------- + args : OrderedDict + arguments passed to the px function and subsequently modified + constructor : graph_object trace class + the trace type selected for this figure + """ + + # make copies of all the fields via dict() and list() + for field in args: + if field in array_attrables and args[field] is not None: + args[field] = ( + dict(args[field]) + if isinstance(args[field], dict) + else list(args[field]) + ) + + # Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.) + df_provided = args["data_frame"] is not None + if df_provided and not isinstance(args["data_frame"], pd.DataFrame): + args["data_frame"] = pd.DataFrame(args["data_frame"]) + df_input = args["data_frame"] + + # now we handle special cases like wide-mode or x-xor-y specification + # by rearranging args to tee things up for process_args_into_dataframe to work + no_x = args.get("x", None) is None + no_y = args.get("y", None) is None + wide_x = False if no_x else _is_col_list(df_input, args["x"]) + wide_y = False if no_y else _is_col_list(df_input, args["y"]) + + wide_mode = False + var_name = None + if constructor in [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram]: + wide_cross_name = None + if wide_x and wide_y: + raise ValueError( + "Cannot accept list of column references or list of columns for both `x` and `y`." + ) + if df_provided and no_x and no_y: + wide_mode = True + args["_column_"] = list(df_input.columns) + var_name = df_input.columns.name or "_column_" + wide_orientation = args.get("orientation", None) or "v" + args["orientation"] = wide_orientation + args["wide_cross"] = None + elif wide_x != wide_y: + wide_mode = True + args["_column_"] = args["y"] if wide_y else args["x"] + var_name = "_column_" + if constructor == go.Histogram: + wide_orientation = "v" if wide_x else "h" + else: + wide_orientation = "v" if wide_y else "h" + args["y" if wide_y else "x"] = None + args["wide_cross"] = None + if not no_x and not no_y: + wide_cross_name = "__x__" if wide_y else "__y__" + + missing_bar_dim = None + if constructor in [go.Scatter, go.Bar]: + if not wide_mode and (no_x != no_y): + for ax in ["x", "y"]: + if args.get(ax, None) is None: + args[ax] = df_input.index if df_provided else Range() + if constructor == go.Scatter: + if args["orientation"] is None: + args["orientation"] = "v" if ax == "x" else "h" + if constructor == go.Bar: + missing_bar_dim = ax + if wide_mode and wide_cross_name is None: + if df_provided: + args["wide_cross"] = df_input.index + wide_cross_name = df_input.index.name or "index" + else: + args["wide_cross"] = Range(label="index") + wide_cross_name = "index" + + # now that things have been prepped, we do the systematic rewriting of `args` + + df_output, wide_id_vars = process_args_into_dataframe(args, wide_mode, var_name) + + # now that `df_output` exists and `args` contains only references, we complete + # the special-case and wide-mode handling by further rewriting args and/or mutating + # df_output + if not wide_mode and missing_bar_dim and constructor == go.Bar: # now that we've populated df_output, we check to see if the non-missing # dimension is categorical: if so, then setting the missing dimension to a @@ -1166,20 +1195,17 @@ def build_dataframe(args, constructor): other_dim = "x" if missing_bar_dim == "y" else "y" if not _is_continuous(df_output, args[other_dim]): args[missing_bar_dim] = "_count_" - constants["_count_"] = 1 + df_output["_count_"] = 1 else: # on the other hand, if the non-missing dimension is continuous, then we # can use this information to override the normal auto-orientation code if args["orientation"] is None: args["orientation"] = "v" if missing_bar_dim == "x" else "h" - for col_name in ranges: - df_output[col_name] = range(len(df_output)) - - for col_name in constants: - df_output[col_name] = constants[col_name] - if wide_mode: + # at this point, `df_output` is semi-long/semi-wide, but we know which columns + # are which, so we melt it and reassign `args` to refer to the newly-tidy + # columns, keeping track of various names and manglings set up above wide_value_vars = [c for c in args["_column_"] if c not in wide_id_vars] del args["_column_"] del args["wide_cross"] diff --git a/packages/python/plotly/plotly/package_data/datasets/experiment.csv.gz b/packages/python/plotly/plotly/package_data/datasets/experiment.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ee1e69de5facd99e20c04556d6638c107038b43 GIT binary patch literal 3114 zcmV+_4At`=iwFoF|CwF@17&z{WpZh4Wo~pXV{>)@WtrQK9JdjM@AWJVf-I7Cc$y$U zHbIa`4&+=UZ=dhythL<}TY$B*Jx#LeKNbD)pMQP){QLKhAOHT-xIVsqu=@}5`rF5k zzkK|>e*ga4KR$kbD{mRzecKv+En8R3wno$%>DG5nzhbsN=P2`$-ACB<{m=jWxc>X` z`?riX`kSpnYs_KY*P7=08r|E- zsKHBPMf&0+Hf0uFOH^j$YYRVq`!>3@iX7W!*yJSV1UZ)Hh}kz=xwf~eozX|u96kMy z4e)(Vz=Bm4#(`G^i$&|BD))+V^B7lH8>?H{7;|@< zsFCwt0#INQVlR2#vF-%`T`{Mzxz6g8 z>;TRIFo2^2oI9?v<~+sw#~yx%fH)-Zlml2cJLy1Dt=vO^%(dJuuhyxL`UEdH=o<-^ zn#UH|Ec>Q{FAxLZ@Ni&vNQ<2jqPtmunx$Pk%ihU zL54>S+Jy~@v9$%YYV&ZXH2L5`&$bJ!7bx5#h7PgX-Wz$*8_YAVSOOA^?*@(l$4v~; zxejG(1P5?25pkl6IU(XgIBXnwtxiXhLK@|MiUGK>o>r86+1`1mk1En!$s@==Oq3dP z*uAAQHzuH5Mi&b-O0CxQ;^u_HYp`qwoGvX0`Ybg=mf}$Z7i>Tr>qVrfs4h&`h5-fw z9On$6oeIENv~SdRlInE&|DO7odc3^>$vsD-A9)ei3k+QN^z=vnl zOAJ_k&o~g{4Y|Yx&PZ?OqPRC0NPnpU1nImb4iX29)A`Nm`MPql0XR@d5uUK`OsIzF zEHPc;YjnUMP99#-UECv80M~v1GjH5wcP1F>y$;535IyXTv-2|#v!>*s7h=dVB7B14 zCkB=<01UAcmRly~Zb=j3LMI&q=KKL&IU-&-@8_@Hl2$`QuodCQkm)4n9tAbFK9Zew z5FLB1%}@99L*&9|vS98ul`n)?UP>_*sJjAt8Pa6>3^Q(Lr`00wLc$?D@_UNM0szJF z4O;fHr;QIe1g7`y^*H{phHYXrY>=Xd$^~L+v}nkYkc2Bck-{)2&V{Y-uj2#vl*L7t z&~;a@N@1b{6UzB%E(W2cq9`GaeK25?Ur-yQcqWho02mB}<4iCX2)8=odtXpu2rt!3zU|@(~H?O#$eRUJEbLT$CKqWkZJP z0YPaXD7tVLRtE%ZN3PTTS%5tUwE?OKmfQ#xqWY&E7$on%9Gvn3C;>^>Sv)jgwbzDS zI|K`yVz8~@p%lhxa4*WrDP)(@n!JMm#wFh6WK-)zCce%F6o3%!lzW&Gf#OxEfspY| zfzJMSr`<^*@(_ml)36X7q4miVbf1GjDB!i%bvJW`9OM&pWqs03z84D_z|ZI&EU*&#R_HMqimWKO4`OtMEP3*cti1gZ22CfwPSk& z$dTygDcGfKU|0qHgDP<4%?a@~Nfadlym4TJ>#^RXVH{*gi-_%P9V2j|;;jV43ygHV z>_3lnLhL1cvt0()D-tg_W_t|i`!Ws%23gd9)bGR(=P~2tRD`m#G#oiJejXzH} zU;qWLOcAnQngH|V!q^GJiX+7vu!A71v_ymb(88euYd}Oz6g}jT8qH3Aa_ITe2Y{Vy zUo%_odks>tm};Z3&>=vPk~s?Kny48E&{L^Vibl8wYPaXO@c``sz2tUD2 z!zfZRPn4X=9Nq+lwSz-+mKK%fTRQd}4TLgEMj=Q=$ft^u@R$X2YZ}uX1u)kPwobOb z=b^(DNde1=MPvshS!n?tm!cK5Q5u71*uet;C@*S36kxz&4zzIs5n2v`Od_kNwQZY0k3?W z#p?u$Oatfn@IDqS=?;UWO!ZumXgPF15#L( zXYUt^r2N!Xi6CD@0>-*10SA)hCl8ohm3Ghrd~lqa@#t2F8oFl5v#ZVF31w5a4)I8a z=&2I42I}5VYS)-e$bbq2GxQu+(jd!jD5a{1%{;CN6qfL2k}H3CF`uf^`v~1gh}y-! zoL$!Ik5Hf+jgpL)wqP$mq1!Z6%GKkLo8<|#Shdo#=Ea96D(om)QAK3`lsq^mUjR@H z=XFZJPU&dZ9If#5ECCfu4e0s@E&**NLt+{!L@rgwr8K_+H&y}Yw-Tg+9J3Jhf@Snf z_Y4pTr_dZjn_gjqWRsfwMe!UDK~$V(dXyZii_>*20>0(o^Ni@FWOqj1L<{0R9SY=< zQ``VSJ7SbgH)BKuWn;Q|Whj8_n!_;`u=JJ;5u;Oe)B;^(2hjv5T`Uq~;@Bk9EjZ-s zv|!#-fs!wV%2!EUWLMowfr-~O$qAc60UaF@9Oht(dcQ41dQ(%sCRy|%K9doeASono zXX*t?J(@R`uyHRywbLk814=`e91}lK-;fQ0W$K=VDlJ_x$$p`SzkA|3cM_mX%e}+^ zSm>H4j#U3)v^0g9k`;K(tpOZsy{>Y7jR zDpir3pU2d&4QbyxS2%ecwqoianPTqwP{cDP=FnxbW`~oEWOB?T7&8M7hr+vwM34i5 z>eBUD1d0~tmW67j3{N8Gx*yQ6scew)1c(T&K*M%j2;GTr6qzp3IOO2K2z{Mf7l_1W zwFb0wT>?Xab%g3s1;j!-#n-HBNIB2zZLo1_)L>U3QSNe{()8)`2?aPxBOy6M_*ThC z`vk*tRWmdvD)HhOWKd0TeZJ2L=oVil!m*(86|;VaGY~S!wPfdOo8X0I^U0Zv--_TW zk4#Tv1_vlzBr?((Hmos?LB-D1H(skr<1UHdoTbmNI6=Pdsrjpv1DV3D^nH`3Oxd?C z6K@3&7wML#oK&|U#CbG)>Ry7zYN!mHpK+HtUY*g{_C^IPe`y{D{U=@NZoxE|8>Io_ zG}cH!8W5FmxBarEm!`AMaRiWaZkhlQy&&A7JyZWL0b`e;mgb&S`+imQKQ2mz0C5=r E01kfBRR910 literal 0 HcmV?d00001 diff --git a/packages/python/plotly/plotly/package_data/datasets/timeseries.csv.gz b/packages/python/plotly/plotly/package_data/datasets/timeseries.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..ee3e5f6a3bd2f21e9eff2a1e447d61fc4d87971b GIT binary patch literal 6165 zcmV+w80zOAiwFn+{Fz<=18iw-WpgfLb9Mktn%k}{Ns&a~`Ib6calbsm146(E5Hk}0 z|0T1QWz$)7)6jLQ(j)w0X6~M!|M|z?fByK#&tL!gx1WFf>GStL|M}-{|NQ*lU;q8{ z&;NY>^uK@m`O9BE|NhH=e*X2ZKeSTkuTuH@Q+{rlrQCh?ZFlUtKEKLO9lexMPAmJa zJI~;IW1Mnxw)vx7O7~2&&E|7=+55EmX+Ni*F?S#5?m7Ehe(UyLFw@7vcHLT^tBifht$Vat zX8ZhVKdUTGGx@ai+v;n)f9g-|r|ddQn;fECKAENVvR31xzH2>vbjzOSaQ}7Byz^pw za;3!q`fmJA47wmIf#wc@ni+WGt%KdXAdy4Kiw=`}yNR@r6Fb@p24jyg6UwaGJx z$llm*)kAWOvRYeQsu31l&$IBz-gPg+WPobx#F9U+t>Tc)kruxsGt^Lb#C>V*Vw@?d*9LLCidQQl1znw-oi6V(HrEN|M3d750isH+*|o$5^NFB z$MYmFMhVCvGWC*n{paHI)ee4Lh1`Cv``~liT4py7J5%c&?;RX^E+?ZR;&>cK(&CL; zN8*n0JL}-$b(G{+>l?S@#!<^SvD`MnAKYP_V#>az&u8$|!}rZN>g{%8PG zF*roAHbUA6FQL%yo6DTxL-}joW5N?>Duh6^TKIW0`v)zYwT*9Qlz49Kd0lIKAxM;I}ArL55G#i zmekhOlE$Xh=z;q6xF7|o&@8}ps&$(1Q1Uv%$e8I=3Po5`)wq~XD8F5F59wwp5pg25 zFIHdE)kknpN@z6-m{jrNp0f!kyPrh@_8+2kS81JMv#yC1aXpRlB~XXBaKl=#jLFHx zr+7KROR}VIzOWkE_8M+J6EmshwF;GVta_|SAq~k8EZ~cU z6NKd=lnB*SRoyie&K)NIGAZ#%nZ331^wNOR^de;ctzL4dwy#RAu+{w9$I z^nnvL@Ul#Y?d3qS0_)&DP6!lEQnLvRoUEnVIY%L-$PY;E^J~K}pabQF$BE*3ZJSYx zkE#KoBsRq)M{eP$ij5Z(+(N2_4S;N(i%CAgCh-6UVONqbXFk0z)0Ja3n4%VbAB!m#1to4}PA zyJVqlqAmY1AM1P#Sb>Mz7oJipl;6Qm$O?)S_eBTrE9jzJKJM~9H@^$Gf~lLHm{umW zYROJB4))|b)IlX}E9eQnJQT{er9M;*g)?9NZUxh%0tvGl+y}hGv>GSn0~bgh#s;ni z;wf@e!4UyG9j59U6SwOcPsc$)ZK`=2ZkUf~9S9Whv|sK<4^4{i$kBw$yk1jCRB}O2 zAZ(?wn`w>~0h~99V}gmUo)=8#yTD2dv{&VtbPkO@s^!2TD1VJxOvRyhhBL)}y1-WS*ipQ3uO zROAQfm?CX_x{L&VkVZ&@1Jlmb7zxKsij6=QGGFuSBl?h{{-{S@syMs81D1hEKJ8Wz z4uW0?I%!Kf|As?AARecrC<5|H`y7VT0ltKu{L?<9-xcH4N*`Zg-yahafE1pj$UGL3 zm})4p7V|y$Td9T<{o8i8y}|9KDfv->Y;q=2peyG9WO|>aV%tCFAisgY@%{Po{Nd_= zt|;8p{2bw<+{L0K96fkFjvoj_9m&+zBgvPX3Takp6m;f@Rt_F&11^L5dN`#63y3h{ zK%evF_xte=m0XB0RHNK2Es?fMTLIVA#aw{Y3t;10lmUUU2}lcflz^9}5=}|; z!!z752d9^TRfaHQ2@4cm3JgOU#_otnF&RDsa`n!N>|a} zihj-DL>N4jkH{fp{1eyGmBMHs9tZ(|;Bz1`_zIdpfr~% z4T;UyxX`&2)npE5h`+tS6lHj>rDVV7+<~OQ>!bsWo=EIwuM+{)(0?N#=SXz+_9p!i zZy+?>4u+UK>+9lD1~j1L=3Z*?aFZ=J5Y_FvEnl*r$O?F8inl!^yX;`FDxPBAUy?rJ z64^WO-ZkGAzW@ZmB0fG}wgoPwKvChq{XHHgKG0btEX=YVMODozWaAbM!Epr05t5N| zV(V-Z~0}0C#X<3*p>aPP#%OH*(a$tr1{c5cPm%+GlO=n`svLPt3ZIBgXUS!Vf8y z-L%aOz@=*EfH;1!jedCjct3ny4**wg&GchP3sXj(>9Pfad-OxGV>k+4(Yrk-;)u4$ zTopseV`qEu_LYPbSavS2SzBSzl%znS9j>}fUOcdMFQ??2J7yQ7PFSMa$W<;s(4aF89wf zJ`k1nxX;&rBN`KvH|G;=j$YgZ}MouRYLd0 z!aFjEYWJKc)6yk?GDTjwU)H0!h-9(lxibHJJACy zE)KXE;NzBUn_#PN3HLcK`qC%H!;Kn)jlWSGr9k$|Wxt$dibrDCP zTI^<)LctMeb4>K`CXz7~W|Kj5l4~RhzqFfkeDHPe5RROt5C_ihwz^%>V0l`5VVap6mCsgXZq%_#__2A)K#w`(_h<)s}6P#JFt`PE9vI` zs74IgBjig&4cd1@b8;SMaW}29!|A@mt}cumHaVLsPb-Lbpt)^_j2eBRy#3bOLuyLU$$jD9$+PXhJ@OJz!9X0SZHHhZeyKqLPDAV*D>;|7Tl^(912rp*EGJe$Ei?Wm9RP%~C6S%&#B#w>o$nL6 zqh>^cEfQQGICuQQmZmJ8ot!p0J&SFQ>m)@eqxQq= z&eD;;&^D=klH>v|DUWYe?*F}Z|CLd+&ErrLBR$unIXQ%S$I{Nd@wi-wU@O*{)5i+` z(9VK;Tltt76-NYt5hfs{3g$N+*;x@CN5gl$w(Y`9i*2(`NZ_c*Z4h`!KDJe9JV1r! zf(pnmlGvU$uC`os7%qeY`qh!hP!iLZ)scy6ci!QEFUybp8t)m7gVp$hHWf)}l1rUR zWPVO=x;B75ECVN#cAc7*-(Ab%i?d@Hp5G(8Lz|Y2&*>I8@Z=}6ecLsTL)x}c8TP7~ zl^=1`kqWMBkq1E867A7(+pS!U#z8B<_@H-t#&U2Gb;M(ulhefd+E&v_*FrS%cayoU)|J;6qZ|Xv&sdf;?yQjOfm*xNvvHlG*K6YgJpj`YeTx6PHV-RyT1%?2#|g zUg|+7AgM9Ok=wDPT6sUiWEaXDLH#v;FYo^A>F*uP&1Vfp&sw8envz^)JY0rFOOir0 zd(I40VMYW2HXSSy`s7;nqBsoiJJiacgXg+lu)Th_a^3dJmKIl0I+Osl2_o2>0Mei` z6l~v-HP)1V@1#=i6xKFyhz-ce4sh>BKrbP`s;C|Pr^LuaB1`?KMfm%7>oi~@p#zv> z6}TW)x2%jr_`wxQ(kp=g93SOk0hd^((co4k2Ja(f!Mf>$b2!c1F;1%92vklAp}n@V zz?9MXz^%&+hLTFY(4dr=a*W5pK*cCY+g4U62d=YskUVU!ZF-%qI~p~*E*3_>1w^eb z!d!WL6?siF?PwmkZ6|VvC!tk_qu3u6Kr$wsh4oMSer^s$K9E1Q&o_) zTP5Fo$e3le^aY#=_0_(-p5nCM?E-c8{Q+9Q!!O7su))b$b=WN`D}JYYQpY)4y+-3v zlNWuZ+24*gidtZ0rnlXY0u7Me>gVgyNf4JIt-C4MzVfltVr8@hbl`TPc1ht~v+@82 zIs21qPL+yty!9-@)uzat>E-TtH0p8vuGDdj{Z(Smu$r3;t9vVwvRU@M1EL5pqZBy4 zLlRMg9);5bw7D~C%x1&73OthpT@i6gR`?m)13gZ~m)@ISX%;NfB_LZ9?_JV4w83MT z6`Or$L(9&3Oh~B~RF7%f26O|J>lB0o%Ob@*oMP#7X9qu|J$ix&zNXVmla{uoeN^X4 zS>(H=MwbKT*6qxt)sWzs^HyoC+PJhNmmco`As?HU$Fl^Pq^R50B<>bWum1O7H z3DM=_&py*@s)wd<5+k<=u!hE26WGeR(}>M>cn!~pqLv8dht~S=?ba8Uca~JO)t*>bZ=Jwjywm>PV|CJy~c4T zOzbYrG1fNZIJP>R?8DbOvNlrwZyWMr!ks{BqorJOJm4_aTjA;ppDRl9$FT1VVIu67 zh{Y_p^s*KSX?b>DX&kE{yGm~V8M7%oQf%>A2Ebe<~NIhT1Yk0;gnwxbMW zn|l(zPnnVS&E5C4U+YeS5=at%*?U)eda%;8cgK>=yX)!1?dGMIOtzXeBd9H-_?muJ z&O3Vs^`~9DgF4D9d(AG9%%|s5J)_OZ7nDnq4Y#2%yyZYEpBjXAVD z9HRa{Y3a6NnGgbmf`o%2o~1%@eaF|2?PmF^dzwnoLF2ex=%eqP^X;SKDLi&@M@)2H+(!h$+|X4hnr?W4EBbouKnOer65b zN2_*6@5?sYP~1#V4HZ@IQ1sjO(Ke_;3QmW`PPRpKx>|Q+F4*V*l34JF!x9dUV;{WP zigm<{8$c;hbWw1e3^z0Nh2rU6UHSPg6Fzxu2ih7eGTdePuEDcBaH0Z*w)7dfwyP0z z;Z;1Bm1{@R`|4Q91kgtbyUoBljm#Tm)Sc~Kcj#pE@Ls$}uDKTX<${nBMI4RZ47u^P z<&iy9njpNT)>qEsAZ#dFK=x#{I~yn!Q^Uzq?tSxq>XDcnkG!UzcAW|VS1A+@s0wuV zQL=}zlXm?KXF3xw-)OmwX5xL)SvuEkPU}Tz-?j`n72o~v-sw}S1FkF0{bqdF0}J?0 ztfa^lwc@)K3Pty$&g^DvFJD`n&h~uMHhAN)tf2T3(Mew@FpN8CPCUkdX{ z=QZ7)SKAzpLnppqc_Un0Z+iaWzWAmSaJ*c7kp(cb)5GU3oSz1|?)~34X&g@ZcC5&A zbUL`)GU>Ur^1r`kGHij(5MR|uMK;$oX?d3{;_lD|cyysH2$v$Xy=Qp^vR&uaOKCt! zjOU#a1QgA?FYQ^iPr9MwfhLzE*Bfrh^0lUJYRPJ;t+Dvz<<9evIukFRlm=UY^r zgB@RYCEI-iOV!pRKSs|xYx}Y7OPM3nuDkm)_wb!np6gWX#_6se9DTaHN&}Gn__K|h zc`NF*ePY{pZ+(p*gY(q*w$7aG3}4MUwm?Abcs4WOb`9bvV?^TRf;^AP>tJrDoocVg z_()sqLf@6{SF+tlwm}`7w%Uti{1ENv>xwOdR+CLFUVV*+;6cFE`n-b+;l nnARNJJFgI_v$CY<@N2(M#IIV^r~?!5+#miQOa91;t1tinbe=Wf literal 0 HcmV?d00001 diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 0e596a3a17c..410afa967f4 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -168,6 +168,15 @@ def test_wide_x_or_y(): df_out.sort_index(axis=1), pd.DataFrame(expected).sort_index(axis=1), ) + """ + for each orientation + for each trace type + with and without df + wide x or y + numerical or categorical wide values + with and without cross value + """ + @pytest.mark.parametrize( "orientation", [None, "v", "h"], From 9524d94300b6190e67f84758cc372bd5c0c3e35b Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sat, 18 Apr 2020 22:05:11 -0400 Subject: [PATCH 36/69] more tests, more parameterization --- .../tests/test_core/test_px/test_px_wide.py | 825 ++++++++++-------- 1 file changed, 439 insertions(+), 386 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 410afa967f4..aadd7cd431a 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -45,55 +45,50 @@ def test_is_col_list(): assert not _is_col_list(df_input, ["a", "b"]) -def test_wide_mode_external(): +@pytest.mark.parametrize( + "px_fn", + [px.scatter, px.line, px.area, px.bar, px.violin, px.box, px.strip, px.histogram], +) +@pytest.mark.parametrize("orientation", [None, "v", "h"]) +@pytest.mark.parametrize("style", ["implicit", "explicit"]) +def test_wide_mode_external(px_fn, orientation, style): # here we test this feature "black box" style by calling actual PX functions and # inspecting the figure... this is important but clunky, and is mostly a smoke test # allowing us to do more "white box" testing below + x, y = ("y", "x") if orientation == "h" else ("x", "y") + xaxis, yaxis = x + "axis", y + "axis" + df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=[11, 12, 13]) - for px_fn in [px.scatter, px.line, px.area, px.bar]: - fig = px_fn(df) - assert len(fig.data) == 3 - assert list(fig.data[0].x) == [11, 12, 13] - assert list(fig.data[0].y) == [1, 2, 3] - assert list(fig.data[1].x) == [11, 12, 13] - assert list(fig.data[1].y) == [4, 5, 6] - assert fig.layout.xaxis.title.text == "index" - assert fig.layout.yaxis.title.text == "_value_" - assert fig.layout.legend.title.text == "_column_" - fig = px_fn(df, orientation="h") + if style == "implicit": + fig = px_fn(df, orientation=orientation) + + if px_fn in [px.scatter, px.line, px.area, px.bar]: + if style == "explicit": + fig = px_fn(**{"data_frame": df, y: list(df.columns), x: df.index}) assert len(fig.data) == 3 - assert list(fig.data[0].y) == [11, 12, 13] - assert list(fig.data[0].x) == [1, 2, 3] - assert list(fig.data[1].y) == [11, 12, 13] - assert list(fig.data[1].x) == [4, 5, 6] - assert fig.layout.yaxis.title.text == "index" - assert fig.layout.xaxis.title.text == "_value_" + assert list(fig.data[0][x]) == [11, 12, 13] + assert list(fig.data[0][y]) == [1, 2, 3] + assert list(fig.data[1][x]) == [11, 12, 13] + assert list(fig.data[1][y]) == [4, 5, 6] + assert fig.layout[xaxis].title.text == "index" + assert fig.layout[yaxis].title.text == "_value_" assert fig.layout.legend.title.text == "_column_" - for px_fn in [px.violin, px.box, px.strip]: - fig = px_fn(df) + if px_fn in [px.violin, px.box, px.strip]: + if style == "explicit": + fig = px_fn(**{"data_frame": df, y: list(df.columns)}) assert len(fig.data) == 1 - assert list(fig.data[0].x) == ["a"] * 3 + ["b"] * 3 + ["c"] * 3 - assert list(fig.data[0].y) == list(range(1, 10)) - assert fig.layout.yaxis.title.text == "_value_" - assert fig.layout.xaxis.title.text == "_column_" - fig = px_fn(df, orientation="h") - assert len(fig.data) == 1 - assert list(fig.data[0].y) == ["a"] * 3 + ["b"] * 3 + ["c"] * 3 - assert list(fig.data[0].x) == list(range(1, 10)) - assert fig.layout.xaxis.title.text == "_value_" - assert fig.layout.yaxis.title.text == "_column_" - for px_fn in [px.histogram]: - fig = px_fn(df) - assert len(fig.data) == 3 - assert list(fig.data[1].x) == [4, 5, 6] - assert fig.layout.legend.title.text == "_column_" - assert fig.layout.xaxis.title.text == "_value_" - fig = px_fn(df, orientation="h") + assert list(fig.data[0][x]) == ["a"] * 3 + ["b"] * 3 + ["c"] * 3 + assert list(fig.data[0][y]) == list(range(1, 10)) + assert fig.layout[yaxis].title.text == "_value_" + assert fig.layout[xaxis].title.text == "_column_" + if px_fn in [px.histogram]: + if style == "explicit": + fig = px_fn(**{"data_frame": df, x: list(df.columns)}) assert len(fig.data) == 3 - assert list(fig.data[1].y) == [4, 5, 6] + assert list(fig.data[1][x]) == [4, 5, 6] assert fig.layout.legend.title.text == "_column_" - assert fig.layout.yaxis.title.text == "_value_" + assert fig.layout[xaxis].title.text == "_value_" def test_wide_mode_labels_external(): @@ -120,7 +115,7 @@ def test_wide_mode_labels_external(): # via build_dataframe directly, which leads to more compact test code: # we pass in args (which includes df) and look at how build_dataframe mutates # both args and the df, and assume that since the rest of the downstream PX -# machinery has not wide-mode-specific code, and the tests above pass, that this is +# machinery has no wide-mode-specific code, and the tests above pass, that this is # enough to prove things work @pytest.mark.parametrize( "trace_type,x,y,color", @@ -132,9 +127,7 @@ def test_wide_mode_labels_external(): (go.Histogram, "_value_", None, "_column_"), ], ) -@pytest.mark.parametrize( - "orientation", [None, "v", "h"], -) +@pytest.mark.parametrize("orientation", [None, "v", "h"]) def test_wide_mode_internal(trace_type, x, y, color, orientation): df_in = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]), index=[11, 12, 13]) args_in = dict(data_frame=df_in, color=None, orientation=orientation) @@ -154,33 +147,104 @@ def test_wide_mode_internal(trace_type, x, y, color, orientation): assert args_out == dict(x=y, y=x, color=color, orientation="h") -def test_wide_x_or_y(): - args_in = dict(data_frame=None, y=[[1, 2], [3, 4]], color=None, orientation=None) - args_out = build_dataframe(args_in, go.Scatter) - df_out = args_out.pop("data_frame") - expected = dict( - _column_=["_column__0", "_column__0", "_column__1", "_column__1"], - _value_=[1, 2, 3, 4], - # x=["a", "b", "a", "b"], - index=[0, 1, 0, 1], - ) - assert_frame_equal( - df_out.sort_index(axis=1), pd.DataFrame(expected).sort_index(axis=1), - ) +cases = [] +for transpose in [True, False]: + for tt in [go.Scatter, go.Bar]: + df_in = dict(a=[1, 2], b=[3, 4]) + args = dict(x=None, y=["a", "b"], color=None, orientation=None) + df_exp = dict( + _column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4], index=[0, 1, 0, 1], + ) + cases.append( + (tt, df_in, args, "index", "_value_", "_column_", df_exp, transpose) + ) - """ - for each orientation - for each trace type - with and without df - wide x or y - numerical or categorical wide values - with and without cross value - """ + df_in = dict(a=[1, 2], b=[3, 4], c=[5, 6]) + args = dict(x="c", y=["a", "b"], color=None, orientation=None) + df_exp = dict( + _column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4], c=[5, 6, 5, 6], + ) + cases.append((tt, df_in, args, "c", "_value_", "_column_", df_exp, transpose)) + args = dict(x=None, y=[[1, 2], [3, 4]], color=None, orientation=None) + df_exp = dict( + _column_=["_column__0", "_column__0", "_column__1", "_column__1"], + _value_=[1, 2, 3, 4], + index=[0, 1, 0, 1], + ) + cases.append( + (tt, None, args, "index", "_value_", "_column_", df_exp, transpose) + ) -@pytest.mark.parametrize( - "orientation", [None, "v", "h"], -) + for tt in [go.Bar]: # bar categorical exception + df_in = dict(a=["q", "r"], b=["s", "t"]) + args = dict(x=None, y=["a", "b"], color=None, orientation=None) + df_exp = dict( + _column_=["a", "a", "b", "b"], + _value_=["q", "r", "s", "t"], + index=[0, 1, 0, 1], + _count_=[1, 1, 1, 1], + ) + cases.append( + (tt, df_in, args, "_value_", "_count_", "_column_", df_exp, transpose) + ) + + for tt in [go.Violin, go.Box]: + df_in = dict(a=[1, 2], b=[3, 4]) + args = dict(x=None, y=["a", "b"], color=None, orientation=None) + df_exp = dict(_column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4],) + cases.append((tt, df_in, args, "_column_", "_value_", None, df_exp, transpose)) + + df_in = dict(a=[1, 2], b=[3, 4], c=[5, 6]) + args = dict(x="c", y=["a", "b"], color=None, orientation=None) + df_exp = dict( + _column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4], c=[5, 6, 5, 6], + ) + cases.append((tt, df_in, args, "c", "_value_", None, df_exp, transpose)) + + args = dict(x=None, y=[[1, 2], [3, 4]], color=None, orientation=None) + df_exp = dict( + _column_=["_column__0", "_column__0", "_column__1", "_column__1"], + _value_=[1, 2, 3, 4], + ) + cases.append((tt, None, args, "_column_", "_value_", None, df_exp, transpose)) + + for tt in [go.Histogram]: + df_in = dict(a=[1, 2], b=[3, 4]) + args = dict(x=None, y=["a", "b"], color=None, orientation=None) + df_exp = dict(_column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4],) + cases.append((tt, df_in, args, None, "_value_", "_column_", df_exp, transpose)) + + df_in = dict(a=[1, 2], b=[3, 4], c=[5, 6]) + args = dict(x="c", y=["a", "b"], color=None, orientation=None) + df_exp = dict( + _column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4], c=[5, 6, 5, 6], + ) + cases.append((tt, df_in, args, "c", "_value_", "_column_", df_exp, transpose)) + + args = dict(x=None, y=[[1, 2], [3, 4]], color=None, orientation=None) + df_exp = dict( + _column_=["_column__0", "_column__0", "_column__1", "_column__1"], + _value_=[1, 2, 3, 4], + ) + cases.append((tt, None, args, None, "_value_", "_column_", df_exp, transpose)) + + +@pytest.mark.parametrize("tt,df_in,args_in,x,y,color,df_out_exp,transpose", cases) +def test_wide_x_or_y(tt, df_in, args_in, x, y, color, df_out_exp, transpose): + if transpose: + args_in["y"], args_in["x"] = args_in["x"], args_in["y"] + args_in["data_frame"] = df_in + args_out = build_dataframe(args_in, tt) + df_out = args_out.pop("data_frame").sort_index(axis=1) + assert_frame_equal(df_out, pd.DataFrame(df_out_exp).sort_index(axis=1)) + if transpose: + assert args_out == dict(x=y, y=x, color=color, orientation=None) + else: + assert args_out == dict(x=x, y=y, color=color, orientation=None) + + +@pytest.mark.parametrize("orientation", [None, "v", "h"]) def test_wide_mode_internal_bar_exception(orientation): df_in = pd.DataFrame(dict(a=["q", "r", "s"], b=["t", "u", "v"]), index=[11, 12, 13]) args_in = dict(data_frame=df_in, color=None, orientation=orientation) @@ -207,342 +271,331 @@ def test_wide_mode_internal_bar_exception(orientation): ) -def test_wide_mode_internal_special_cases(): - # given all of the above tests, and given that the melt() code is not sensitive - # to the trace type, we can do all sorts of special-case testing just by focusing - # on build_dataframe(args, go.Scatter) for various values of args, and looking at - # how args and df get mutated +# given all of the above tests, and given that the melt() code is not sensitive +# to the trace type, we can do all sorts of special-case testing just by focusing +# on build_dataframe(args, go.Scatter) for various values of args, and looking at +# how args and df get mutated +special_cases = [] + + +def append_special_case(df_in, args_in, args_expect, df_expect): + special_cases.append((df_in, args_in, args_expect, df_expect)) - def assert_df_and_args(df_in, args_in, args_expect, df_expect): - args_in["data_frame"] = df_in - args_out = build_dataframe(args_in, go.Scatter) - df_out = args_out.pop("data_frame") - assert_frame_equal( - df_out.sort_index(axis=1), df_expect.sort_index(axis=1), + +# input is single bare array: column comes out as string "0" +append_special_case( + df_in=[1, 2, 3], + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + df_expect=pd.DataFrame( + dict(index=[0, 1, 2], _value_=[1, 2, 3], _column_=["0", "0", "0"]) + ), +) + +# input is single bare Series: column comes out as string "0" +append_special_case( + df_in=pd.Series([1, 2, 3]), + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + df_expect=pd.DataFrame( + dict(index=[0, 1, 2], _value_=[1, 2, 3], _column_=["0", "0", "0"]) + ), +) + +# input is a Series from a DF: we pick up the name and index values automatically +df = pd.DataFrame(dict(my_col=[1, 2, 3]), index=["a", "b", "c"]) +append_special_case( + df_in=df["my_col"], + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + df_expect=pd.DataFrame( + dict( + index=["a", "b", "c"], + _value_=[1, 2, 3], + _column_=["my_col", "my_col", "my_col"], ) - assert args_out == args_expect - - # input is single bare array: column comes out as string "0" - assert_df_and_args( - df_in=[1, 2, 3], - args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), - df_expect=pd.DataFrame( - dict(index=[0, 1, 2], _value_=[1, 2, 3], _column_=["0", "0", "0"]) - ), - ) + ), +) - # input is single bare Series: column comes out as string "0" - assert_df_and_args( - df_in=pd.Series([1, 2, 3]), - args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), - df_expect=pd.DataFrame( - dict(index=[0, 1, 2], _value_=[1, 2, 3], _column_=["0", "0", "0"]) - ), - ) +# input is an index from a DF: treated like a Series basically +df = pd.DataFrame(dict(my_col=[1, 2, 3]), index=["a", "b", "c"]) +df.index.name = "my_index" +append_special_case( + df_in=df.index, + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 2], + _value_=["a", "b", "c"], + _column_=["my_index", "my_index", "my_index"], + ) + ), +) - # input is a Series from a DF: we pick up the name and index values automatically - df = pd.DataFrame(dict(my_col=[1, 2, 3]), index=["a", "b", "c"]) - assert_df_and_args( - df_in=df["my_col"], - args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), - df_expect=pd.DataFrame( - dict( - index=["a", "b", "c"], - _value_=[1, 2, 3], - _column_=["my_col", "my_col", "my_col"], - ) - ), - ) +# input is a data frame with named row and col indices: we grab those +df = pd.DataFrame(dict(my_col=[1, 2, 3]), index=["a", "b", "c"]) +df.index.name = "my_index" +df.columns.name = "my_col_name" +append_special_case( + df_in=df, + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="my_index", y="_value_", color="my_col_name", orientation="v"), + df_expect=pd.DataFrame( + dict( + my_index=["a", "b", "c"], + _value_=[1, 2, 3], + my_col_name=["my_col", "my_col", "my_col"], + ) + ), +) - # input is an index from a DF: treated like a Series basically - df = pd.DataFrame(dict(my_col=[1, 2, 3]), index=["a", "b", "c"]) - df.index.name = "my_index" - assert_df_and_args( - df_in=df.index, - args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), - df_expect=pd.DataFrame( - dict( - index=[0, 1, 2], - _value_=["a", "b", "c"], - _column_=["my_index", "my_index", "my_index"], - ) - ), - ) +# input is array of arrays: treated as rows, columns come out as string "0", "1" +append_special_case( + df_in=[[1, 2], [4, 5]], + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + df_expect=pd.DataFrame( + dict(index=[0, 1, 0, 1], _value_=[1, 4, 2, 5], _column_=["0", "0", "1", "1"],) + ), +) - # input is a data frame with named row and col indices: we grab those - df = pd.DataFrame(dict(my_col=[1, 2, 3]), index=["a", "b", "c"]) - df.index.name = "my_index" - df.columns.name = "my_col_name" - assert_df_and_args( - df_in=df, - args_in=dict(x=None, y=None, color=None), - args_expect=dict( - x="my_index", y="_value_", color="my_col_name", orientation="v" - ), - df_expect=pd.DataFrame( - dict( - my_index=["a", "b", "c"], - _value_=[1, 2, 3], - my_col_name=["my_col", "my_col", "my_col"], - ) - ), - ) +# partial-melting by assigning symbol: we pick up that column and don't melt it +append_special_case( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], symbol_col=["q", "r"])), + args_in=dict(x=None, y=None, color=None, symbol="symbol_col"), + args_expect=dict( + x="index", y="_value_", color="_column_", symbol="symbol_col", orientation="v", + ), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + symbol_col=["q", "r", "q", "r"], + ) + ), +) - # input is array of arrays: treated as rows, columns come out as string "0", "1" - assert_df_and_args( - df_in=[[1, 2], [4, 5]], - args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), - df_expect=pd.DataFrame( - dict( - index=[0, 1, 0, 1], _value_=[1, 4, 2, 5], _column_=["0", "0", "1", "1"], - ) - ), - ) +# partial-melting by assigning the same column twice: we pick it up once +append_special_case( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], symbol_col=["q", "r"])), + args_in=dict( + x=None, y=None, color=None, symbol="symbol_col", custom_data=["symbol_col"], + ), + args_expect=dict( + x="index", + y="_value_", + color="_column_", + symbol="symbol_col", + custom_data=["symbol_col"], + orientation="v", + ), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + symbol_col=["q", "r", "q", "r"], + ) + ), +) - # partial-melting by assigning symbol: we pick up that column and don't melt it - assert_df_and_args( - df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], symbol_col=["q", "r"])), - args_in=dict(x=None, y=None, color=None, symbol="symbol_col"), - args_expect=dict( - x="index", - y="_value_", - color="_column_", - symbol="symbol_col", - orientation="v", - ), - df_expect=pd.DataFrame( - dict( - index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], - symbol_col=["q", "r", "q", "r"], - ) - ), - ) +# partial-melting by assigning more than one column: we pick them both up +append_special_case( + df_in=pd.DataFrame( + dict(a=[1, 2], b=[3, 4], symbol_col=["q", "r"], data_col=["i", "j"]) + ), + args_in=dict( + x=None, y=None, color=None, symbol="symbol_col", custom_data=["data_col"], + ), + args_expect=dict( + x="index", + y="_value_", + color="_column_", + symbol="symbol_col", + custom_data=["data_col"], + orientation="v", + ), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + symbol_col=["q", "r", "q", "r"], + data_col=["i", "j", "i", "j"], + ) + ), +) - # partial-melting by assigning the same column twice: we pick it up once - assert_df_and_args( - df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], symbol_col=["q", "r"])), - args_in=dict( - x=None, y=None, color=None, symbol="symbol_col", custom_data=["symbol_col"], - ), - args_expect=dict( - x="index", - y="_value_", - color="_column_", - symbol="symbol_col", - custom_data=["symbol_col"], - orientation="v", - ), - df_expect=pd.DataFrame( - dict( - index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], - symbol_col=["q", "r", "q", "r"], - ) - ), - ) +# partial-melting by assigning symbol to a bare array: we pick it up with the attr name +append_special_case( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), + args_in=dict(x=None, y=None, color=None, symbol=["q", "r"]), + args_expect=dict( + x="index", y="_value_", color="_column_", symbol="symbol", orientation="v" + ), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + symbol=["q", "r", "q", "r"], + ) + ), +) - # partial-melting by assigning more than one column: we pick them both up - assert_df_and_args( - df_in=pd.DataFrame( - dict(a=[1, 2], b=[3, 4], symbol_col=["q", "r"], data_col=["i", "j"]) - ), - args_in=dict( - x=None, y=None, color=None, symbol="symbol_col", custom_data=["data_col"], - ), - args_expect=dict( - x="index", - y="_value_", - color="_column_", - symbol="symbol_col", - custom_data=["data_col"], - orientation="v", - ), - df_expect=pd.DataFrame( - dict( - index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], - symbol_col=["q", "r", "q", "r"], - data_col=["i", "j", "i", "j"], - ) - ), - ) +# assigning color to _column_ explicitly: just works +append_special_case( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), + args_in=dict(x=None, y=None, color="_column_"), + args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + df_expect=pd.DataFrame( + dict(index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], _column_=["a", "a", "b", "b"]) + ), +) - # partial-melting by assigning symbol to a bare array: we pick it up with the attr name - assert_df_and_args( - df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), - args_in=dict(x=None, y=None, color=None, symbol=["q", "r"]), - args_expect=dict( - x="index", y="_value_", color="_column_", symbol="symbol", orientation="v" - ), - df_expect=pd.DataFrame( - dict( - index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], - symbol=["q", "r", "q", "r"], - ) - ), - ) +# assigning color to a different column: _column_ drops out of args +append_special_case( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], color_col=["q", "r"])), + args_in=dict(x=None, y=None, color="color_col"), + args_expect=dict(x="index", y="_value_", color="color_col", orientation="v"), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + color_col=["q", "r", "q", "r"], + ) + ), +) - # assigning color to _column_ explicitly: just works - assert_df_and_args( - df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), - args_in=dict(x=None, y=None, color="_column_"), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), - df_expect=pd.DataFrame( - dict( - index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], _column_=["a", "a", "b", "b"] - ) - ), - ) +# assigning _column_ to something else: just works +append_special_case( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), + args_in=dict(x=None, y=None, color=None, symbol="_column_"), + args_expect=dict( + x="index", y="_value_", color="_column_", symbol="_column_", orientation="v" + ), + df_expect=pd.DataFrame( + dict(index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], _column_=["a", "a", "b", "b"],) + ), +) - # assigning color to a different column: _column_ drops out of args - assert_df_and_args( - df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], color_col=["q", "r"])), - args_in=dict(x=None, y=None, color="color_col"), - args_expect=dict(x="index", y="_value_", color="color_col", orientation="v"), - df_expect=pd.DataFrame( - dict( - index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], - color_col=["q", "r", "q", "r"], - ) - ), - ) +# swapping symbol and color: just works +append_special_case( + df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], color_col=["q", "r"])), + args_in=dict(x=None, y=None, color="color_col", symbol="_column_"), + args_expect=dict( + x="index", y="_value_", color="color_col", symbol="_column_", orientation="v", + ), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + _column_=["a", "a", "b", "b"], + color_col=["q", "r", "q", "r"], + ) + ), +) - # assigning _column_ to something else: just works - assert_df_and_args( - df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), - args_in=dict(x=None, y=None, color=None, symbol="_column_"), - args_expect=dict( - x="index", y="_value_", color="_column_", symbol="_column_", orientation="v" - ), - df_expect=pd.DataFrame( - dict( - index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], _column_=["a", "a", "b", "b"], - ) - ), - ) +# a DF with a named column index: have to use that instead of _column_ +df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) +df.columns.name = "my_col_name" +append_special_case( + df_in=df, + args_in=dict(x=None, y=None, color=None, facet_row="my_col_name"), + args_expect=dict( + x="index", + y="_value_", + color="my_col_name", + facet_row="my_col_name", + orientation="v", + ), + df_expect=pd.DataFrame( + dict( + index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], my_col_name=["a", "a", "b", "b"], + ) + ), +) - # swapping symbol and color: just works - assert_df_and_args( - df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], color_col=["q", "r"])), - args_in=dict(x=None, y=None, color="color_col", symbol="_column_"), - args_expect=dict( - x="index", - y="_value_", - color="color_col", - symbol="_column_", - orientation="v", - ), - df_expect=pd.DataFrame( - dict( - index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], - color_col=["q", "r", "q", "r"], - ) - ), - ) +# passing the DF index into some other attr: works +df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) +df.columns.name = "my_col_name" +df.index.name = "my_index_name" +append_special_case( + df_in=df, + args_in=dict(x=None, y=None, color=None, hover_name=df.index), + args_expect=dict( + x="my_index_name", + y="_value_", + color="my_col_name", + hover_name="my_index_name", + orientation="v", + ), + df_expect=pd.DataFrame( + dict( + my_index_name=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + my_col_name=["a", "a", "b", "b"], + ) + ), +) - # a DF with a named column index: have to use that instead of _column_ - df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) - df.columns.name = "my_col_name" - assert_df_and_args( - df_in=df, - args_in=dict(x=None, y=None, color=None, facet_row="my_col_name"), - args_expect=dict( - x="index", - y="_value_", - color="my_col_name", - facet_row="my_col_name", - orientation="v", - ), - df_expect=pd.DataFrame( - dict( - index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - my_col_name=["a", "a", "b", "b"], - ) - ), - ) +# assigning _value_ to something: works +df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) +df.columns.name = "my_col_name" +df.index.name = "my_index_name" +append_special_case( + df_in=df, + args_in=dict(x=None, y=None, color=None, hover_name="_value_"), + args_expect=dict( + x="my_index_name", + y="_value_", + color="my_col_name", + hover_name="_value_", + orientation="v", + ), + df_expect=pd.DataFrame( + dict( + my_index_name=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + my_col_name=["a", "a", "b", "b"], + ) + ), +) - # passing the DF index into some other attr: works - df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) - df.columns.name = "my_col_name" - df.index.name = "my_index_name" - assert_df_and_args( - df_in=df, - args_in=dict(x=None, y=None, color=None, hover_name=df.index), - args_expect=dict( - x="my_index_name", - y="_value_", - color="my_col_name", - hover_name="my_index_name", - orientation="v", - ), - df_expect=pd.DataFrame( - dict( - my_index_name=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - my_col_name=["a", "a", "b", "b"], - ) - ), - ) +# assigning a px.Constant: works +df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) +df.columns.name = "my_col_name" +df.index.name = "my_index_name" +append_special_case( + df_in=df, + args_in=dict(x=None, y=None, color=None, symbol=px.Constant(1)), + args_expect=dict( + x="my_index_name", + y="_value_", + color="my_col_name", + symbol="symbol", + orientation="v", + ), + df_expect=pd.DataFrame( + dict( + my_index_name=[0, 1, 0, 1], + _value_=[1, 2, 3, 4], + my_col_name=["a", "a", "b", "b"], + symbol=[1, 1, 1, 1], + ) + ), +) - # assigning _value_ to something: works - df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) - df.columns.name = "my_col_name" - df.index.name = "my_index_name" - assert_df_and_args( - df_in=df, - args_in=dict(x=None, y=None, color=None, hover_name="_value_"), - args_expect=dict( - x="my_index_name", - y="_value_", - color="my_col_name", - hover_name="_value_", - orientation="v", - ), - df_expect=pd.DataFrame( - dict( - my_index_name=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - my_col_name=["a", "a", "b", "b"], - ) - ), - ) - # assigning a px.Constant: works - df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) - df.columns.name = "my_col_name" - df.index.name = "my_index_name" - assert_df_and_args( - df_in=df, - args_in=dict(x=None, y=None, color=None, symbol=px.Constant(1)), - args_expect=dict( - x="my_index_name", - y="_value_", - color="my_col_name", - symbol="symbol", - orientation="v", - ), - df_expect=pd.DataFrame( - dict( - my_index_name=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - my_col_name=["a", "a", "b", "b"], - symbol=[1, 1, 1, 1], - ) - ), +@pytest.mark.parametrize("df_in, args_in, args_expect, df_expect", special_cases) +def test_wide_mode_internal_special_cases(df_in, args_in, args_expect, df_expect): + args_in["data_frame"] = df_in + args_out = build_dataframe(args_in, go.Scatter) + df_out = args_out.pop("data_frame") + assert_frame_equal( + df_out.sort_index(axis=1), df_expect.sort_index(axis=1), ) + assert args_out == args_expect From 7b022f19f3088df2e07485d39cf1e6e490a255ee Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 19 Apr 2020 20:45:23 -0400 Subject: [PATCH 37/69] funnel is wideable --- .../python/plotly/plotly/express/_core.py | 31 ++++++++++--------- .../tests/test_core/test_px/test_px_wide.py | 19 ++++++++---- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 583b4ed271a..2552f0ef57a 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -424,14 +424,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): def configure_axes(args, constructor, fig, orders): configurators = { - go.Scatter: configure_cartesian_axes, - go.Scattergl: configure_cartesian_axes, - go.Bar: configure_cartesian_axes, - go.Box: configure_cartesian_axes, - go.Violin: configure_cartesian_axes, - go.Histogram: configure_cartesian_axes, - go.Histogram2dContour: configure_cartesian_axes, - go.Histogram2d: configure_cartesian_axes, go.Scatter3d: configure_3d_axes, go.Scatterternary: configure_ternary_axes, go.Scatterpolar: configure_polar_axes, @@ -443,6 +435,10 @@ def configure_axes(args, constructor, fig, orders): go.Scattergeo: configure_geo, go.Choropleth: configure_geo, } + cartesians = [go.Scatter, go.Scattergl, go.Bar, go.Funnel, go.Box, go.Violin] + cartesians += [go.Histogram, go.Histogram2d, go.Histogram2dContour] + for c in cartesians: + configurators[c] = configure_cartesian_axes if constructor in configurators: configurators[constructor](args, fig, orders) @@ -1134,7 +1130,7 @@ def build_dataframe(args, constructor): wide_mode = False var_name = None - if constructor in [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram]: + if constructor in [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram, go.Funnel]: wide_cross_name = None if wide_x and wide_y: raise ValueError( @@ -1144,7 +1140,10 @@ def build_dataframe(args, constructor): wide_mode = True args["_column_"] = list(df_input.columns) var_name = df_input.columns.name or "_column_" - wide_orientation = args.get("orientation", None) or "v" + if constructor == go.Funnel: + wide_orientation = args.get("orientation", None) or "h" + else: + wide_orientation = args.get("orientation", None) or "v" args["orientation"] = wide_orientation args["wide_cross"] = None elif wide_x != wide_y: @@ -1161,17 +1160,19 @@ def build_dataframe(args, constructor): wide_cross_name = "__x__" if wide_y else "__y__" missing_bar_dim = None - if constructor in [go.Scatter, go.Bar]: + if constructor in [go.Scatter, go.Bar, go.Funnel]: if not wide_mode and (no_x != no_y): for ax in ["x", "y"]: if args.get(ax, None) is None: args[ax] = df_input.index if df_provided else Range() - if constructor == go.Scatter: - if args["orientation"] is None: - args["orientation"] = "v" if ax == "x" else "h" if constructor == go.Bar: missing_bar_dim = ax + else: + if args["orientation"] is None: + args["orientation"] = "v" if ax == "x" else "h" if wide_mode and wide_cross_name is None: + if no_x != no_y and args["orientation"] is None: + args["orientation"] = "v" if no_x else "h" if df_provided: args["wide_cross"] = df_input.index wide_cross_name = df_input.index.name or "index" @@ -1222,7 +1223,7 @@ def build_dataframe(args, constructor): if wide_cross_name == "__y__": wide_cross_name = args["y"] - if constructor == go.Scatter: + if constructor in [go.Scatter, go.Funnel]: args["x" if orient_v else "y"] = wide_cross_name args["y" if orient_v else "x"] = "_value_" args["color"] = args["color"] or var_name diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index aadd7cd431a..838924eb7d8 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -47,7 +47,8 @@ def test_is_col_list(): @pytest.mark.parametrize( "px_fn", - [px.scatter, px.line, px.area, px.bar, px.violin, px.box, px.strip, px.histogram], + [px.scatter, px.line, px.area, px.bar, px.violin, px.box, px.strip] + + [px.histogram, px.funnel], ) @pytest.mark.parametrize("orientation", [None, "v", "h"]) @pytest.mark.parametrize("style", ["implicit", "explicit"]) @@ -56,14 +57,17 @@ def test_wide_mode_external(px_fn, orientation, style): # inspecting the figure... this is important but clunky, and is mostly a smoke test # allowing us to do more "white box" testing below - x, y = ("y", "x") if orientation == "h" else ("x", "y") + if px_fn != px.funnel: + x, y = ("y", "x") if orientation == "h" else ("x", "y") + else: + x, y = ("y", "x") if orientation != "v" else ("x", "y") xaxis, yaxis = x + "axis", y + "axis" df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=[11, 12, 13]) if style == "implicit": fig = px_fn(df, orientation=orientation) - if px_fn in [px.scatter, px.line, px.area, px.bar]: + if px_fn in [px.scatter, px.line, px.area, px.bar, px.funnel]: if style == "explicit": fig = px_fn(**{"data_frame": df, y: list(df.columns), x: df.index}) assert len(fig.data) == 3 @@ -149,7 +153,7 @@ def test_wide_mode_internal(trace_type, x, y, color, orientation): cases = [] for transpose in [True, False]: - for tt in [go.Scatter, go.Bar]: + for tt in [go.Scatter, go.Bar, go.Funnel]: df_in = dict(a=[1, 2], b=[3, 4]) args = dict(x=None, y=["a", "b"], color=None, orientation=None) df_exp = dict( @@ -238,10 +242,13 @@ def test_wide_x_or_y(tt, df_in, args_in, x, y, color, df_out_exp, transpose): args_out = build_dataframe(args_in, tt) df_out = args_out.pop("data_frame").sort_index(axis=1) assert_frame_equal(df_out, pd.DataFrame(df_out_exp).sort_index(axis=1)) + orientation_exp = args_in["orientation"] + if (args_in["x"] is None) != (args_in["y"] is None) and tt != go.Histogram: + orientation_exp = "h" if transpose else "v" if transpose: - assert args_out == dict(x=y, y=x, color=color, orientation=None) + assert args_out == dict(x=y, y=x, color=color, orientation=orientation_exp) else: - assert args_out == dict(x=x, y=y, color=color, orientation=None) + assert args_out == dict(x=x, y=y, color=color, orientation=orientation_exp) @pytest.mark.parametrize("orientation", [None, "v", "h"]) From e8027f0cfbca25b582b591a976095257a8062255 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 19 Apr 2020 21:50:51 -0400 Subject: [PATCH 38/69] all cartesians now support wide mode --- .../plotly/plotly/express/_chart_types.py | 2 + .../python/plotly/plotly/express/_core.py | 18 ++++--- .../tests/test_core/test_px/test_px_wide.py | 53 ++++++++++++------- 3 files changed, 49 insertions(+), 24 deletions(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index 190ecab28b9..446246b8f8d 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -74,6 +74,7 @@ def density_contour( animation_group=None, category_orders={}, labels={}, + orientation=None, color_discrete_sequence=None, color_discrete_map={}, marginal_x=None, @@ -130,6 +131,7 @@ def density_heatmap( animation_group=None, category_orders={}, labels={}, + orientation=None, color_continuous_scale=None, range_color=None, color_continuous_midpoint=None, diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 2552f0ef57a..22f87bfa043 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -34,6 +34,9 @@ direct_attrables + array_attrables + group_attrables + renameable_group_attrables ) +cartesians = [go.Scatter, go.Scattergl, go.Bar, go.Funnel, go.Box, go.Violin] +cartesians += [go.Histogram, go.Histogram2d, go.Histogram2dContour] + class PxDefaults(object): __slots__ = [ @@ -435,8 +438,6 @@ def configure_axes(args, constructor, fig, orders): go.Scattergeo: configure_geo, go.Choropleth: configure_geo, } - cartesians = [go.Scatter, go.Scattergl, go.Bar, go.Funnel, go.Box, go.Violin] - cartesians += [go.Histogram, go.Histogram2d, go.Histogram2dContour] for c in cartesians: configurators[c] = configure_cartesian_axes if constructor in configurators: @@ -1130,7 +1131,8 @@ def build_dataframe(args, constructor): wide_mode = False var_name = None - if constructor in [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram, go.Funnel]: + hist2d_types = [go.Histogram2d, go.Histogram2dContour] + if constructor in cartesians: wide_cross_name = None if wide_x and wide_y: raise ValueError( @@ -1160,7 +1162,7 @@ def build_dataframe(args, constructor): wide_cross_name = "__x__" if wide_y else "__y__" missing_bar_dim = None - if constructor in [go.Scatter, go.Bar, go.Funnel]: + if constructor in [go.Scatter, go.Bar, go.Funnel] + hist2d_types: if not wide_mode and (no_x != no_y): for ax in ["x", "y"]: if args.get(ax, None) is None: @@ -1203,6 +1205,9 @@ def build_dataframe(args, constructor): if args["orientation"] is None: args["orientation"] = "v" if missing_bar_dim == "x" else "h" + if constructor in hist2d_types: + del args["orientation"] + if wide_mode: # at this point, `df_output` is semi-long/semi-wide, but we know which columns # are which, so we melt it and reassign `args` to refer to the newly-tidy @@ -1223,10 +1228,11 @@ def build_dataframe(args, constructor): if wide_cross_name == "__y__": wide_cross_name = args["y"] - if constructor in [go.Scatter, go.Funnel]: + if constructor in [go.Scatter, go.Funnel] + hist2d_types: args["x" if orient_v else "y"] = wide_cross_name args["y" if orient_v else "x"] = "_value_" - args["color"] = args["color"] or var_name + if constructor != go.Histogram2d: + args["color"] = args["color"] or var_name if constructor == go.Bar: if _is_continuous(df_output, "_value_"): args["x" if orient_v else "y"] = wide_cross_name diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 838924eb7d8..2e7a64b1f1c 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -48,7 +48,7 @@ def test_is_col_list(): @pytest.mark.parametrize( "px_fn", [px.scatter, px.line, px.area, px.bar, px.violin, px.box, px.strip] - + [px.histogram, px.funnel], + + [px.histogram, px.funnel, px.density_contour, px.density_heatmap], ) @pytest.mark.parametrize("orientation", [None, "v", "h"]) @pytest.mark.parametrize("style", ["implicit", "explicit"]) @@ -67,7 +67,7 @@ def test_wide_mode_external(px_fn, orientation, style): if style == "implicit": fig = px_fn(df, orientation=orientation) - if px_fn in [px.scatter, px.line, px.area, px.bar, px.funnel]: + if px_fn in [px.scatter, px.line, px.area, px.bar, px.funnel, px.density_contour]: if style == "explicit": fig = px_fn(**{"data_frame": df, y: list(df.columns), x: df.index}) assert len(fig.data) == 3 @@ -78,6 +78,14 @@ def test_wide_mode_external(px_fn, orientation, style): assert fig.layout[xaxis].title.text == "index" assert fig.layout[yaxis].title.text == "_value_" assert fig.layout.legend.title.text == "_column_" + if px_fn in [px.density_heatmap]: + if style == "explicit": + fig = px_fn(**{"data_frame": df, y: list(df.columns), x: df.index}) + assert len(fig.data) == 1 + assert list(fig.data[0][x]) == [11, 12, 13, 11, 12, 13, 11, 12, 13] + assert list(fig.data[0][y]) == [1, 2, 3, 4, 5, 6, 7, 8, 9] + assert fig.layout[xaxis].title.text == "index" + assert fig.layout[yaxis].title.text == "_value_" if px_fn in [px.violin, px.box, px.strip]: if style == "explicit": fig = px_fn(**{"data_frame": df, y: list(df.columns)}) @@ -125,7 +133,10 @@ def test_wide_mode_labels_external(): "trace_type,x,y,color", [ (go.Scatter, "index", "_value_", "_column_"), + (go.Histogram2dContour, "index", "_value_", "_column_"), + (go.Histogram2d, "index", "_value_", None), (go.Bar, "index", "_value_", "_column_"), + (go.Funnel, "index", "_value_", "_column_"), (go.Box, "_column_", "_value_", None), (go.Violin, "_column_", "_value_", None), (go.Histogram, "_value_", None, "_column_"), @@ -145,30 +156,35 @@ def test_wide_mode_internal(trace_type, x, y, color, orientation): assert_frame_equal( df_out.sort_index(axis=1), pd.DataFrame(expected).sort_index(axis=1), ) - if orientation is None or orientation == "v": - assert args_out == dict(x=x, y=y, color=color, orientation="v") + if trace_type in [go.Histogram2dContour, go.Histogram2d]: + if orientation is None or orientation == "v": + assert args_out == dict(x=x, y=y, color=color) + else: + assert args_out == dict(x=y, y=x, color=color) else: - assert args_out == dict(x=y, y=x, color=color, orientation="h") + if (orientation is None and trace_type != go.Funnel) or orientation == "v": + assert args_out == dict(x=x, y=y, color=color, orientation="v") + else: + assert args_out == dict(x=y, y=x, color=color, orientation="h") cases = [] for transpose in [True, False]: - for tt in [go.Scatter, go.Bar, go.Funnel]: + for tt in [go.Scatter, go.Bar, go.Funnel, go.Histogram2dContour, go.Histogram2d]: + color = None if tt == go.Histogram2d else "_column_" df_in = dict(a=[1, 2], b=[3, 4]) args = dict(x=None, y=["a", "b"], color=None, orientation=None) df_exp = dict( _column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4], index=[0, 1, 0, 1], ) - cases.append( - (tt, df_in, args, "index", "_value_", "_column_", df_exp, transpose) - ) + cases.append((tt, df_in, args, "index", "_value_", color, df_exp, transpose)) df_in = dict(a=[1, 2], b=[3, 4], c=[5, 6]) args = dict(x="c", y=["a", "b"], color=None, orientation=None) df_exp = dict( _column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4], c=[5, 6, 5, 6], ) - cases.append((tt, df_in, args, "c", "_value_", "_column_", df_exp, transpose)) + cases.append((tt, df_in, args, "c", "_value_", color, df_exp, transpose)) args = dict(x=None, y=[[1, 2], [3, 4]], color=None, orientation=None) df_exp = dict( @@ -176,9 +192,7 @@ def test_wide_mode_internal(trace_type, x, y, color, orientation): _value_=[1, 2, 3, 4], index=[0, 1, 0, 1], ) - cases.append( - (tt, None, args, "index", "_value_", "_column_", df_exp, transpose) - ) + cases.append((tt, None, args, "index", "_value_", color, df_exp, transpose)) for tt in [go.Bar]: # bar categorical exception df_in = dict(a=["q", "r"], b=["s", "t"]) @@ -242,13 +256,16 @@ def test_wide_x_or_y(tt, df_in, args_in, x, y, color, df_out_exp, transpose): args_out = build_dataframe(args_in, tt) df_out = args_out.pop("data_frame").sort_index(axis=1) assert_frame_equal(df_out, pd.DataFrame(df_out_exp).sort_index(axis=1)) - orientation_exp = args_in["orientation"] - if (args_in["x"] is None) != (args_in["y"] is None) and tt != go.Histogram: - orientation_exp = "h" if transpose else "v" if transpose: - assert args_out == dict(x=y, y=x, color=color, orientation=orientation_exp) + args_exp = dict(x=y, y=x, color=color) else: - assert args_out == dict(x=x, y=y, color=color, orientation=orientation_exp) + args_exp = dict(x=x, y=y, color=color) + if tt not in [go.Histogram2dContour, go.Histogram2d]: + orientation_exp = args_in["orientation"] + if (args_in["x"] is None) != (args_in["y"] is None) and tt != go.Histogram: + orientation_exp = "h" if transpose else "v" + args_exp["orientation"] = orientation_exp + assert args_out == args_exp @pytest.mark.parametrize("orientation", [None, "v", "h"]) From a85acf6e3c467f5dd94f93318e0bff99316d65c3 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 30 Apr 2020 13:51:42 -0400 Subject: [PATCH 39/69] get rid of --- .../python/plotly/plotly/express/_core.py | 40 +-- .../tests/test_core/test_px/test_px_wide.py | 239 +++++++++--------- 2 files changed, 142 insertions(+), 137 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 67baade4dda..fe5090cbaf5 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -23,7 +23,7 @@ + ["ids", "error_x", "error_x_minus", "error_y", "error_y_minus", "error_z"] + ["error_z_minus", "lat", "lon", "locations", "animation_group"] ) -array_attrables = ["dimensions", "custom_data", "hover_data", "path", "_column_"] +array_attrables = ["dimensions", "custom_data", "hover_data", "path", "wide_variable"] group_attrables = ["animation_frame", "facet_row", "facet_col", "line_group"] renameable_group_attrables = [ "color", # renamed to marker.color or line.color in infer_config @@ -1083,7 +1083,7 @@ def process_args_into_dataframe(args, wide_mode, var_name): ) # Check validity of column name if argument not in df_input.columns: - if wide_mode and argument in ("_value_", var_name): + if wide_mode and argument in ("value", var_name): continue else: err_msg = ( @@ -1154,7 +1154,7 @@ def process_args_into_dataframe(args, wide_mode, var_name): pass else: args[field_name][i] = str(col_name) - if field_name != "_column_": + if field_name != "wide_variable": wide_id_vars.add(str(col_name)) for col_name in ranges: @@ -1215,8 +1215,8 @@ def build_dataframe(args, constructor): ) if df_provided and no_x and no_y: wide_mode = True - args["_column_"] = list(df_input.columns) - var_name = df_input.columns.name or "_column_" + args["wide_variable"] = list(df_input.columns) + var_name = df_input.columns.name or "variable" if constructor == go.Funnel: wide_orientation = args.get("orientation", None) or "h" else: @@ -1225,8 +1225,8 @@ def build_dataframe(args, constructor): args["wide_cross"] = None elif wide_x != wide_y: wide_mode = True - args["_column_"] = args["y"] if wide_y else args["x"] - var_name = "_column_" + args["wide_variable"] = args["y"] if wide_y else args["x"] + var_name = "variable" if constructor == go.Histogram: wide_orientation = "v" if wide_x else "h" else: @@ -1272,8 +1272,8 @@ def build_dataframe(args, constructor): # default and we let the normal auto-orientation-code do its thing later other_dim = "x" if missing_bar_dim == "y" else "y" if not _is_continuous(df_output, args[other_dim]): - args[missing_bar_dim] = "_count_" - df_output["_count_"] = 1 + args[missing_bar_dim] = "count" + df_output["count"] = 1 else: # on the other hand, if the non-missing dimension is continuous, then we # can use this information to override the normal auto-orientation code @@ -1287,14 +1287,14 @@ def build_dataframe(args, constructor): # at this point, `df_output` is semi-long/semi-wide, but we know which columns # are which, so we melt it and reassign `args` to refer to the newly-tidy # columns, keeping track of various names and manglings set up above - wide_value_vars = [c for c in args["_column_"] if c not in wide_id_vars] - del args["_column_"] + wide_value_vars = [c for c in args["wide_variable"] if c not in wide_id_vars] + del args["wide_variable"] del args["wide_cross"] df_output = df_output.melt( id_vars=wide_id_vars, value_vars=wide_value_vars, var_name=var_name, - value_name="_value_", + value_name="value", ) df_output[var_name] = df_output[var_name].astype(str) orient_v = wide_orientation == "v" @@ -1305,24 +1305,24 @@ def build_dataframe(args, constructor): if constructor in [go.Scatter, go.Funnel] + hist2d_types: args["x" if orient_v else "y"] = wide_cross_name - args["y" if orient_v else "x"] = "_value_" + args["y" if orient_v else "x"] = "value" if constructor != go.Histogram2d: args["color"] = args["color"] or var_name if constructor == go.Bar: - if _is_continuous(df_output, "_value_"): + if _is_continuous(df_output, "value"): args["x" if orient_v else "y"] = wide_cross_name - args["y" if orient_v else "x"] = "_value_" + args["y" if orient_v else "x"] = "value" args["color"] = args["color"] or var_name else: - args["x" if orient_v else "y"] = "_value_" - args["y" if orient_v else "x"] = "_count_" - df_output["_count_"] = 1 + args["x" if orient_v else "y"] = "value" + args["y" if orient_v else "x"] = "count" + df_output["count"] = 1 args["color"] = args["color"] or var_name if constructor in [go.Violin, go.Box]: args["x" if orient_v else "y"] = wide_cross_name or var_name - args["y" if orient_v else "x"] = "_value_" + args["y" if orient_v else "x"] = "value" if constructor == go.Histogram: - args["x" if orient_v else "y"] = "_value_" + args["x" if orient_v else "y"] = "value" args["y" if orient_v else "x"] = wide_cross_name args["color"] = args["color"] or var_name diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 2e7a64b1f1c..ee83467d38a 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -76,8 +76,8 @@ def test_wide_mode_external(px_fn, orientation, style): assert list(fig.data[1][x]) == [11, 12, 13] assert list(fig.data[1][y]) == [4, 5, 6] assert fig.layout[xaxis].title.text == "index" - assert fig.layout[yaxis].title.text == "_value_" - assert fig.layout.legend.title.text == "_column_" + assert fig.layout[yaxis].title.text == "value" + assert fig.layout.legend.title.text == "variable" if px_fn in [px.density_heatmap]: if style == "explicit": fig = px_fn(**{"data_frame": df, y: list(df.columns), x: df.index}) @@ -85,22 +85,22 @@ def test_wide_mode_external(px_fn, orientation, style): assert list(fig.data[0][x]) == [11, 12, 13, 11, 12, 13, 11, 12, 13] assert list(fig.data[0][y]) == [1, 2, 3, 4, 5, 6, 7, 8, 9] assert fig.layout[xaxis].title.text == "index" - assert fig.layout[yaxis].title.text == "_value_" + assert fig.layout[yaxis].title.text == "value" if px_fn in [px.violin, px.box, px.strip]: if style == "explicit": fig = px_fn(**{"data_frame": df, y: list(df.columns)}) assert len(fig.data) == 1 assert list(fig.data[0][x]) == ["a"] * 3 + ["b"] * 3 + ["c"] * 3 assert list(fig.data[0][y]) == list(range(1, 10)) - assert fig.layout[yaxis].title.text == "_value_" - assert fig.layout[xaxis].title.text == "_column_" + assert fig.layout[yaxis].title.text == "value" + assert fig.layout[xaxis].title.text == "variable" if px_fn in [px.histogram]: if style == "explicit": fig = px_fn(**{"data_frame": df, x: list(df.columns)}) assert len(fig.data) == 3 assert list(fig.data[1][x]) == [4, 5, 6] - assert fig.layout.legend.title.text == "_column_" - assert fig.layout[xaxis].title.text == "_value_" + assert fig.layout.legend.title.text == "variable" + assert fig.layout[xaxis].title.text == "value" def test_wide_mode_labels_external(): @@ -108,9 +108,9 @@ def test_wide_mode_labels_external(): df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=[11, 12, 13]) fig = px.bar(df) assert fig.layout.xaxis.title.text == "index" - assert fig.layout.yaxis.title.text == "_value_" - assert fig.layout.legend.title.text == "_column_" - labels = dict(index="my index", _value_="my value", _column_="my column") + assert fig.layout.yaxis.title.text == "value" + assert fig.layout.legend.title.text == "variable" + labels = dict(index="my index", value="my value", variable="my column") fig = px.bar(df, labels=labels) assert fig.layout.xaxis.title.text == "my index" assert fig.layout.yaxis.title.text == "my value" @@ -119,7 +119,7 @@ def test_wide_mode_labels_external(): df.columns.name = "my column" fig = px.bar(df) assert fig.layout.xaxis.title.text == "my index" - assert fig.layout.yaxis.title.text == "_value_" + assert fig.layout.yaxis.title.text == "value" assert fig.layout.legend.title.text == "my column" @@ -132,14 +132,14 @@ def test_wide_mode_labels_external(): @pytest.mark.parametrize( "trace_type,x,y,color", [ - (go.Scatter, "index", "_value_", "_column_"), - (go.Histogram2dContour, "index", "_value_", "_column_"), - (go.Histogram2d, "index", "_value_", None), - (go.Bar, "index", "_value_", "_column_"), - (go.Funnel, "index", "_value_", "_column_"), - (go.Box, "_column_", "_value_", None), - (go.Violin, "_column_", "_value_", None), - (go.Histogram, "_value_", None, "_column_"), + (go.Scatter, "index", "value", "variable"), + (go.Histogram2dContour, "index", "value", "variable"), + (go.Histogram2d, "index", "value", None), + (go.Bar, "index", "value", "variable"), + (go.Funnel, "index", "value", "variable"), + (go.Box, "variable", "value", None), + (go.Violin, "variable", "value", None), + (go.Histogram, "value", None, "variable"), ], ) @pytest.mark.parametrize("orientation", [None, "v", "h"]) @@ -148,9 +148,7 @@ def test_wide_mode_internal(trace_type, x, y, color, orientation): args_in = dict(data_frame=df_in, color=None, orientation=orientation) args_out = build_dataframe(args_in, trace_type) df_out = args_out.pop("data_frame") - expected = dict( - _column_=["a", "a", "a", "b", "b", "b"], _value_=[1, 2, 3, 4, 5, 6], - ) + expected = dict(variable=["a", "a", "a", "b", "b", "b"], value=[1, 2, 3, 4, 5, 6],) if x == "index": expected["index"] = [11, 12, 13, 11, 12, 13] assert_frame_equal( @@ -171,81 +169,94 @@ def test_wide_mode_internal(trace_type, x, y, color, orientation): cases = [] for transpose in [True, False]: for tt in [go.Scatter, go.Bar, go.Funnel, go.Histogram2dContour, go.Histogram2d]: - color = None if tt == go.Histogram2d else "_column_" + color = None if tt == go.Histogram2d else "variable" df_in = dict(a=[1, 2], b=[3, 4]) args = dict(x=None, y=["a", "b"], color=None, orientation=None) df_exp = dict( - _column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4], index=[0, 1, 0, 1], + variable=["a", "a", "b", "b"], value=[1, 2, 3, 4], index=[0, 1, 0, 1], ) - cases.append((tt, df_in, args, "index", "_value_", color, df_exp, transpose)) + cases.append((tt, df_in, args, "index", "value", color, df_exp, transpose)) df_in = dict(a=[1, 2], b=[3, 4], c=[5, 6]) args = dict(x="c", y=["a", "b"], color=None, orientation=None) df_exp = dict( - _column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4], c=[5, 6, 5, 6], + variable=["a", "a", "b", "b"], value=[1, 2, 3, 4], c=[5, 6, 5, 6], ) - cases.append((tt, df_in, args, "c", "_value_", color, df_exp, transpose)) + cases.append((tt, df_in, args, "c", "value", color, df_exp, transpose)) args = dict(x=None, y=[[1, 2], [3, 4]], color=None, orientation=None) df_exp = dict( - _column_=["_column__0", "_column__0", "_column__1", "_column__1"], - _value_=[1, 2, 3, 4], + variable=[ + "wide_variable_0", + "wide_variable_0", + "wide_variable_1", + "wide_variable_1", + ], + value=[1, 2, 3, 4], index=[0, 1, 0, 1], ) - cases.append((tt, None, args, "index", "_value_", color, df_exp, transpose)) + cases.append((tt, None, args, "index", "value", color, df_exp, transpose)) for tt in [go.Bar]: # bar categorical exception df_in = dict(a=["q", "r"], b=["s", "t"]) args = dict(x=None, y=["a", "b"], color=None, orientation=None) df_exp = dict( - _column_=["a", "a", "b", "b"], - _value_=["q", "r", "s", "t"], + variable=["a", "a", "b", "b"], + value=["q", "r", "s", "t"], index=[0, 1, 0, 1], - _count_=[1, 1, 1, 1], - ) - cases.append( - (tt, df_in, args, "_value_", "_count_", "_column_", df_exp, transpose) + count=[1, 1, 1, 1], ) + cases.append((tt, df_in, args, "value", "count", "variable", df_exp, transpose)) for tt in [go.Violin, go.Box]: df_in = dict(a=[1, 2], b=[3, 4]) args = dict(x=None, y=["a", "b"], color=None, orientation=None) - df_exp = dict(_column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4],) - cases.append((tt, df_in, args, "_column_", "_value_", None, df_exp, transpose)) + df_exp = dict(variable=["a", "a", "b", "b"], value=[1, 2, 3, 4],) + cases.append((tt, df_in, args, "variable", "value", None, df_exp, transpose)) df_in = dict(a=[1, 2], b=[3, 4], c=[5, 6]) args = dict(x="c", y=["a", "b"], color=None, orientation=None) df_exp = dict( - _column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4], c=[5, 6, 5, 6], + variable=["a", "a", "b", "b"], value=[1, 2, 3, 4], c=[5, 6, 5, 6], ) - cases.append((tt, df_in, args, "c", "_value_", None, df_exp, transpose)) + cases.append((tt, df_in, args, "c", "value", None, df_exp, transpose)) args = dict(x=None, y=[[1, 2], [3, 4]], color=None, orientation=None) df_exp = dict( - _column_=["_column__0", "_column__0", "_column__1", "_column__1"], - _value_=[1, 2, 3, 4], + variable=[ + "wide_variable_0", + "wide_variable_0", + "wide_variable_1", + "wide_variable_1", + ], + value=[1, 2, 3, 4], ) - cases.append((tt, None, args, "_column_", "_value_", None, df_exp, transpose)) + cases.append((tt, None, args, "variable", "value", None, df_exp, transpose)) for tt in [go.Histogram]: df_in = dict(a=[1, 2], b=[3, 4]) args = dict(x=None, y=["a", "b"], color=None, orientation=None) - df_exp = dict(_column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4],) - cases.append((tt, df_in, args, None, "_value_", "_column_", df_exp, transpose)) + df_exp = dict(variable=["a", "a", "b", "b"], value=[1, 2, 3, 4],) + cases.append((tt, df_in, args, None, "value", "variable", df_exp, transpose)) df_in = dict(a=[1, 2], b=[3, 4], c=[5, 6]) args = dict(x="c", y=["a", "b"], color=None, orientation=None) df_exp = dict( - _column_=["a", "a", "b", "b"], _value_=[1, 2, 3, 4], c=[5, 6, 5, 6], + variable=["a", "a", "b", "b"], value=[1, 2, 3, 4], c=[5, 6, 5, 6], ) - cases.append((tt, df_in, args, "c", "_value_", "_column_", df_exp, transpose)) + cases.append((tt, df_in, args, "c", "value", "variable", df_exp, transpose)) args = dict(x=None, y=[[1, 2], [3, 4]], color=None, orientation=None) df_exp = dict( - _column_=["_column__0", "_column__0", "_column__1", "_column__1"], - _value_=[1, 2, 3, 4], + variable=[ + "wide_variable_0", + "wide_variable_0", + "wide_variable_1", + "wide_variable_1", + ], + value=[1, 2, 3, 4], ) - cases.append((tt, None, args, None, "_value_", "_column_", df_exp, transpose)) + cases.append((tt, None, args, None, "value", "variable", df_exp, transpose)) @pytest.mark.parametrize("tt,df_in,args_in,x,y,color,df_out_exp,transpose", cases) @@ -279,20 +290,16 @@ def test_wide_mode_internal_bar_exception(orientation): pd.DataFrame( dict( index=[11, 12, 13, 11, 12, 13], - _column_=["a", "a", "a", "b", "b", "b"], - _value_=["q", "r", "s", "t", "u", "v"], - _count_=[1, 1, 1, 1, 1, 1], + variable=["a", "a", "a", "b", "b", "b"], + value=["q", "r", "s", "t", "u", "v"], + count=[1, 1, 1, 1, 1, 1], ) ).sort_index(axis=1), ) if orientation is None or orientation == "v": - assert args_out == dict( - x="_value_", y="_count_", color="_column_", orientation="v" - ) + assert args_out == dict(x="value", y="count", color="variable", orientation="v") else: - assert args_out == dict( - x="_count_", y="_value_", color="_column_", orientation="h" - ) + assert args_out == dict(x="count", y="value", color="variable", orientation="h") # given all of the above tests, and given that the melt() code is not sensitive @@ -310,9 +317,9 @@ def append_special_case(df_in, args_in, args_expect, df_expect): append_special_case( df_in=[1, 2, 3], args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + args_expect=dict(x="index", y="value", color="variable", orientation="v"), df_expect=pd.DataFrame( - dict(index=[0, 1, 2], _value_=[1, 2, 3], _column_=["0", "0", "0"]) + dict(index=[0, 1, 2], value=[1, 2, 3], variable=["0", "0", "0"]) ), ) @@ -320,9 +327,9 @@ def append_special_case(df_in, args_in, args_expect, df_expect): append_special_case( df_in=pd.Series([1, 2, 3]), args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + args_expect=dict(x="index", y="value", color="variable", orientation="v"), df_expect=pd.DataFrame( - dict(index=[0, 1, 2], _value_=[1, 2, 3], _column_=["0", "0", "0"]) + dict(index=[0, 1, 2], value=[1, 2, 3], variable=["0", "0", "0"]) ), ) @@ -331,12 +338,12 @@ def append_special_case(df_in, args_in, args_expect, df_expect): append_special_case( df_in=df["my_col"], args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + args_expect=dict(x="index", y="value", color="variable", orientation="v"), df_expect=pd.DataFrame( dict( index=["a", "b", "c"], - _value_=[1, 2, 3], - _column_=["my_col", "my_col", "my_col"], + value=[1, 2, 3], + variable=["my_col", "my_col", "my_col"], ) ), ) @@ -347,12 +354,12 @@ def append_special_case(df_in, args_in, args_expect, df_expect): append_special_case( df_in=df.index, args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + args_expect=dict(x="index", y="value", color="variable", orientation="v"), df_expect=pd.DataFrame( dict( index=[0, 1, 2], - _value_=["a", "b", "c"], - _column_=["my_index", "my_index", "my_index"], + value=["a", "b", "c"], + variable=["my_index", "my_index", "my_index"], ) ), ) @@ -364,11 +371,11 @@ def append_special_case(df_in, args_in, args_expect, df_expect): append_special_case( df_in=df, args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="my_index", y="_value_", color="my_col_name", orientation="v"), + args_expect=dict(x="my_index", y="value", color="my_col_name", orientation="v"), df_expect=pd.DataFrame( dict( my_index=["a", "b", "c"], - _value_=[1, 2, 3], + value=[1, 2, 3], my_col_name=["my_col", "my_col", "my_col"], ) ), @@ -378,9 +385,9 @@ def append_special_case(df_in, args_in, args_expect, df_expect): append_special_case( df_in=[[1, 2], [4, 5]], args_in=dict(x=None, y=None, color=None), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + args_expect=dict(x="index", y="value", color="variable", orientation="v"), df_expect=pd.DataFrame( - dict(index=[0, 1, 0, 1], _value_=[1, 4, 2, 5], _column_=["0", "0", "1", "1"],) + dict(index=[0, 1, 0, 1], value=[1, 4, 2, 5], variable=["0", "0", "1", "1"],) ), ) @@ -389,13 +396,13 @@ def append_special_case(df_in, args_in, args_expect, df_expect): df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], symbol_col=["q", "r"])), args_in=dict(x=None, y=None, color=None, symbol="symbol_col"), args_expect=dict( - x="index", y="_value_", color="_column_", symbol="symbol_col", orientation="v", + x="index", y="value", color="variable", symbol="symbol_col", orientation="v", ), df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], + value=[1, 2, 3, 4], + variable=["a", "a", "b", "b"], symbol_col=["q", "r", "q", "r"], ) ), @@ -409,8 +416,8 @@ def append_special_case(df_in, args_in, args_expect, df_expect): ), args_expect=dict( x="index", - y="_value_", - color="_column_", + y="value", + color="variable", symbol="symbol_col", custom_data=["symbol_col"], orientation="v", @@ -418,8 +425,8 @@ def append_special_case(df_in, args_in, args_expect, df_expect): df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], + value=[1, 2, 3, 4], + variable=["a", "a", "b", "b"], symbol_col=["q", "r", "q", "r"], ) ), @@ -435,8 +442,8 @@ def append_special_case(df_in, args_in, args_expect, df_expect): ), args_expect=dict( x="index", - y="_value_", - color="_column_", + y="value", + color="variable", symbol="symbol_col", custom_data=["data_col"], orientation="v", @@ -444,8 +451,8 @@ def append_special_case(df_in, args_in, args_expect, df_expect): df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], + value=[1, 2, 3, 4], + variable=["a", "a", "b", "b"], symbol_col=["q", "r", "q", "r"], data_col=["i", "j", "i", "j"], ) @@ -457,73 +464,73 @@ def append_special_case(df_in, args_in, args_expect, df_expect): df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), args_in=dict(x=None, y=None, color=None, symbol=["q", "r"]), args_expect=dict( - x="index", y="_value_", color="_column_", symbol="symbol", orientation="v" + x="index", y="value", color="variable", symbol="symbol", orientation="v" ), df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], + value=[1, 2, 3, 4], + variable=["a", "a", "b", "b"], symbol=["q", "r", "q", "r"], ) ), ) -# assigning color to _column_ explicitly: just works +# assigning color to variable explicitly: just works append_special_case( df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), - args_in=dict(x=None, y=None, color="_column_"), - args_expect=dict(x="index", y="_value_", color="_column_", orientation="v"), + args_in=dict(x=None, y=None, color="variable"), + args_expect=dict(x="index", y="value", color="variable", orientation="v"), df_expect=pd.DataFrame( - dict(index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], _column_=["a", "a", "b", "b"]) + dict(index=[0, 1, 0, 1], value=[1, 2, 3, 4], variable=["a", "a", "b", "b"]) ), ) -# assigning color to a different column: _column_ drops out of args +# assigning color to a different column: variable drops out of args append_special_case( df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], color_col=["q", "r"])), args_in=dict(x=None, y=None, color="color_col"), - args_expect=dict(x="index", y="_value_", color="color_col", orientation="v"), + args_expect=dict(x="index", y="value", color="color_col", orientation="v"), df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], + value=[1, 2, 3, 4], + variable=["a", "a", "b", "b"], color_col=["q", "r", "q", "r"], ) ), ) -# assigning _column_ to something else: just works +# assigning variable to something else: just works append_special_case( df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4])), - args_in=dict(x=None, y=None, color=None, symbol="_column_"), + args_in=dict(x=None, y=None, color=None, symbol="variable"), args_expect=dict( - x="index", y="_value_", color="_column_", symbol="_column_", orientation="v" + x="index", y="value", color="variable", symbol="variable", orientation="v" ), df_expect=pd.DataFrame( - dict(index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], _column_=["a", "a", "b", "b"],) + dict(index=[0, 1, 0, 1], value=[1, 2, 3, 4], variable=["a", "a", "b", "b"],) ), ) # swapping symbol and color: just works append_special_case( df_in=pd.DataFrame(dict(a=[1, 2], b=[3, 4], color_col=["q", "r"])), - args_in=dict(x=None, y=None, color="color_col", symbol="_column_"), + args_in=dict(x=None, y=None, color="color_col", symbol="variable"), args_expect=dict( - x="index", y="_value_", color="color_col", symbol="_column_", orientation="v", + x="index", y="value", color="color_col", symbol="variable", orientation="v", ), df_expect=pd.DataFrame( dict( index=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], - _column_=["a", "a", "b", "b"], + value=[1, 2, 3, 4], + variable=["a", "a", "b", "b"], color_col=["q", "r", "q", "r"], ) ), ) -# a DF with a named column index: have to use that instead of _column_ +# a DF with a named column index: have to use that instead of variable df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) df.columns.name = "my_col_name" append_special_case( @@ -531,15 +538,13 @@ def append_special_case(df_in, args_in, args_expect, df_expect): args_in=dict(x=None, y=None, color=None, facet_row="my_col_name"), args_expect=dict( x="index", - y="_value_", + y="value", color="my_col_name", facet_row="my_col_name", orientation="v", ), df_expect=pd.DataFrame( - dict( - index=[0, 1, 0, 1], _value_=[1, 2, 3, 4], my_col_name=["a", "a", "b", "b"], - ) + dict(index=[0, 1, 0, 1], value=[1, 2, 3, 4], my_col_name=["a", "a", "b", "b"],) ), ) @@ -552,7 +557,7 @@ def append_special_case(df_in, args_in, args_expect, df_expect): args_in=dict(x=None, y=None, color=None, hover_name=df.index), args_expect=dict( x="my_index_name", - y="_value_", + y="value", color="my_col_name", hover_name="my_index_name", orientation="v", @@ -560,30 +565,30 @@ def append_special_case(df_in, args_in, args_expect, df_expect): df_expect=pd.DataFrame( dict( my_index_name=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], + value=[1, 2, 3, 4], my_col_name=["a", "a", "b", "b"], ) ), ) -# assigning _value_ to something: works +# assigning value to something: works df = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) df.columns.name = "my_col_name" df.index.name = "my_index_name" append_special_case( df_in=df, - args_in=dict(x=None, y=None, color=None, hover_name="_value_"), + args_in=dict(x=None, y=None, color=None, hover_name="value"), args_expect=dict( x="my_index_name", - y="_value_", + y="value", color="my_col_name", - hover_name="_value_", + hover_name="value", orientation="v", ), df_expect=pd.DataFrame( dict( my_index_name=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], + value=[1, 2, 3, 4], my_col_name=["a", "a", "b", "b"], ) ), @@ -598,7 +603,7 @@ def append_special_case(df_in, args_in, args_expect, df_expect): args_in=dict(x=None, y=None, color=None, symbol=px.Constant(1)), args_expect=dict( x="my_index_name", - y="_value_", + y="value", color="my_col_name", symbol="symbol", orientation="v", @@ -606,7 +611,7 @@ def append_special_case(df_in, args_in, args_expect, df_expect): df_expect=pd.DataFrame( dict( my_index_name=[0, 1, 0, 1], - _value_=[1, 2, 3, 4], + value=[1, 2, 3, 4], my_col_name=["a", "a", "b", "b"], symbol=[1, 1, 1, 1], ) From dff2c119e97f34beb6e12db10e39c716807fd537 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 30 Apr 2020 15:09:50 -0400 Subject: [PATCH 40/69] raise errors on multi-index in column/index in wide mode --- packages/python/plotly/plotly/express/_core.py | 12 ++++++++++++ .../tests/test_core/test_px/test_px_wide.py | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index fe5090cbaf5..d0dbacdc042 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1215,6 +1215,12 @@ def build_dataframe(args, constructor): ) if df_provided and no_x and no_y: wide_mode = True + if isinstance(df_input.columns, pd.MultiIndex): + raise TypeError( + "Data frame columns is a pandas MultiIndex. " + "pandas MultiIndex is not supported by plotly express " + "at the moment." + ) args["wide_variable"] = list(df_input.columns) var_name = df_input.columns.name or "variable" if constructor == go.Funnel: @@ -1251,6 +1257,12 @@ def build_dataframe(args, constructor): if no_x != no_y and args["orientation"] is None: args["orientation"] = "v" if no_x else "h" if df_provided: + if isinstance(df_input.index, pd.MultiIndex): + raise TypeError( + "Data frame index is a pandas MultiIndex. " + "pandas MultiIndex is not supported by plotly express " + "at the moment." + ) args["wide_cross"] = df_input.index wide_cross_name = df_input.index.name or "index" else: diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index ee83467d38a..68666979758 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -628,3 +628,21 @@ def test_wide_mode_internal_special_cases(df_in, args_in, args_expect, df_expect df_out.sort_index(axis=1), df_expect.sort_index(axis=1), ) assert args_out == args_expect + + +def test_multi_index(): + df = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 2, 3, 4], [3, 4, 5, 6]]) + df.index = [["a", "a", "b", "b"], ["c", "d", "c", "d"]] + with pytest.raises(TypeError) as err_msg: + px.scatter(df) + assert "pandas MultiIndex is not supported by plotly express" in str( + err_msg.value + ) + + df = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 2, 3, 4], [3, 4, 5, 6]]) + df.columns = [["e", "e", "f", "f"], ["g", "h", "g", "h"]] + with pytest.raises(TypeError) as err_msg: + px.scatter(df) + assert "pandas MultiIndex is not supported by plotly express" in str( + err_msg.value + ) From f3039ac4fa69ab5b7b93a2a97f8b8f989be970b3 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 30 Apr 2020 15:59:15 -0400 Subject: [PATCH 41/69] parameterize test_px --- .../plotly/tests/test_core/test_px/test_px.py | 73 +++++++++---------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px.py index 60699e6e21d..21b8b0ba0b7 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px.py @@ -1,6 +1,7 @@ import plotly.express as px import numpy as np import pytest +from itertools import permutations def test_scatter(): @@ -185,44 +186,40 @@ def test_px_templates(): assert fig.layout.yaxis3.showgrid -def test_orthogonal_orderings(): - from itertools import permutations - - df = px.data.tips() - +def assert_orderings(days_order, days_check, times_order, times_check): symbol_sequence = ["circle", "diamond", "square", "cross"] color_sequence = ["red", "blue"] + fig = px.scatter( + px.data.tips(), + x="total_bill", + y="tip", + facet_row="time", + facet_col="day", + color="time", + symbol="day", + symbol_sequence=symbol_sequence, + color_discrete_sequence=color_sequence, + category_orders=dict(day=days_order, time=times_order), + ) + + for col in range(len(days_check)): + for trace in fig.select_traces(col=col + 1): + assert days_check[col] in trace.hovertemplate - def assert_orderings(days_order, days_check, times_order, times_check): - fig = px.scatter( - df, - x="total_bill", - y="tip", - facet_row="time", - facet_col="day", - color="time", - symbol="day", - symbol_sequence=symbol_sequence, - color_discrete_sequence=color_sequence, - category_orders=dict(day=days_order, time=times_order), - ) - - for col in range(len(days_check)): - for trace in fig.select_traces(col=col + 1): - assert days_check[col] in trace.hovertemplate - - for row in range(len(times_check)): - for trace in fig.select_traces(row=2 - row): - assert times_check[row] in trace.hovertemplate - - for trace in fig.data: - for i, day in enumerate(days_check): - if day in trace.name: - assert trace.marker.symbol == symbol_sequence[i] - for i, time in enumerate(times_check): - if time in trace.name: - assert trace.marker.color == color_sequence[i] + for row in range(len(times_check)): + for trace in fig.select_traces(row=2 - row): + assert times_check[row] in trace.hovertemplate + for trace in fig.data: + for i, day in enumerate(days_check): + if day in trace.name: + assert trace.marker.symbol == symbol_sequence[i] + for i, time in enumerate(times_check): + if time in trace.name: + assert trace.marker.color == color_sequence[i] + + +def test_noisy_orthogonal_orderings(): assert_orderings( ["x", "Sun", "Sat", "y", "Fri", "z"], # add extra noise, missing Thur ["Sun", "Sat", "Fri", "Thur"], # Thur is at the back @@ -230,9 +227,11 @@ def assert_orderings(days_order, days_check, times_order, times_check): ["Lunch", "Dinner"], # Dinner is at the back ) - for days in permutations(df["day"].unique()): - for times in permutations(df["time"].unique()): - assert_orderings(days, days, times, times) + +@pytest.mark.parametrize("days", permutations(["Sun", "Sat", "Fri", "Thur"])) +@pytest.mark.parametrize("times", permutations(["Lunch", "Dinner"])) +def test_orthogonal_orderings(days, times): + assert_orderings(days, days, times, times) def test_permissive_defaults(): From f73383101c49bd6506bf3477e24b9b0694faa3de Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 30 Apr 2020 16:33:24 -0400 Subject: [PATCH 42/69] manage ugly name collisions --- .../python/plotly/plotly/express/_core.py | 63 ++++++++++--------- .../tests/test_core/test_px/test_px_input.py | 30 ++++----- .../tests/test_core/test_px/test_px_wide.py | 18 ++++-- 3 files changed, 59 insertions(+), 52 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index d0dbacdc042..e2069172d39 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -970,7 +970,13 @@ def _isinstance_listlike(x): return True -def process_args_into_dataframe(args, wide_mode, var_name): +def _escape_col_name(df_input, col_name): + while df_input is not None and col_name in df_input.columns: + col_name = "_" + col_name + return col_name + + +def process_args_into_dataframe(args, wide_mode, var_name, value_name): """ After this function runs, the `all_attrables` keys of `args` all contain only references to columns of `df_output`. This function handles the extraction of data @@ -978,19 +984,10 @@ def process_args_into_dataframe(args, wide_mode, var_name): data to `df_output` and then replaces `args["attrable"]` with the appropriate reference. """ - for field in args: - if field in array_attrables and args[field] is not None: - args[field] = ( - OrderedDict(args[field]) - if isinstance(args[field], dict) - else list(args[field]) - ) - # Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.) - df_provided = args["data_frame"] is not None - if df_provided and not isinstance(args["data_frame"], pd.DataFrame): - args["data_frame"] = pd.DataFrame(args["data_frame"]) + df_input = args["data_frame"] df_provided = df_input is not None + df_output = pd.DataFrame() constants = dict() ranges = list() @@ -1083,7 +1080,7 @@ def process_args_into_dataframe(args, wide_mode, var_name): ) # Check validity of column name if argument not in df_input.columns: - if wide_mode and argument in ("value", var_name): + if wide_mode and argument in (value_name, var_name): continue else: err_msg = ( @@ -1205,10 +1202,11 @@ def build_dataframe(args, constructor): wide_y = False if no_y else _is_col_list(df_input, args["y"]) wide_mode = False - var_name = None + var_name = None # will likely be "variable" in wide_mode + wide_cross_name = None # will likely be "index" in wide_mode + value_name = "value" hist2d_types = [go.Histogram2d, go.Histogram2dContour] if constructor in cartesians: - wide_cross_name = None if wide_x and wide_y: raise ValueError( "Cannot accept list of column references or list of columns for both `x` and `y`." @@ -1266,17 +1264,24 @@ def build_dataframe(args, constructor): args["wide_cross"] = df_input.index wide_cross_name = df_input.index.name or "index" else: - args["wide_cross"] = Range(label="index") - wide_cross_name = "index" + wide_cross_name = _escape_col_name(df_input, "index") + args["wide_cross"] = Range(label=wide_cross_name) + + if wide_mode: + var_name = _escape_col_name(df_input, var_name) + value_name = _escape_col_name(df_input, value_name) # now that things have been prepped, we do the systematic rewriting of `args` - df_output, wide_id_vars = process_args_into_dataframe(args, wide_mode, var_name) + df_output, wide_id_vars = process_args_into_dataframe( + args, wide_mode, var_name, value_name + ) # now that `df_output` exists and `args` contains only references, we complete # the special-case and wide-mode handling by further rewriting args and/or mutating # df_output + count_name = _escape_col_name(df_output, "count") if not wide_mode and missing_bar_dim and constructor == go.Bar: # now that we've populated df_output, we check to see if the non-missing # dimension is categorical: if so, then setting the missing dimension to a @@ -1284,8 +1289,8 @@ def build_dataframe(args, constructor): # default and we let the normal auto-orientation-code do its thing later other_dim = "x" if missing_bar_dim == "y" else "y" if not _is_continuous(df_output, args[other_dim]): - args[missing_bar_dim] = "count" - df_output["count"] = 1 + args[missing_bar_dim] = count_name + df_output[count_name] = 1 else: # on the other hand, if the non-missing dimension is continuous, then we # can use this information to override the normal auto-orientation code @@ -1306,7 +1311,7 @@ def build_dataframe(args, constructor): id_vars=wide_id_vars, value_vars=wide_value_vars, var_name=var_name, - value_name="value", + value_name=value_name, ) df_output[var_name] = df_output[var_name].astype(str) orient_v = wide_orientation == "v" @@ -1317,24 +1322,24 @@ def build_dataframe(args, constructor): if constructor in [go.Scatter, go.Funnel] + hist2d_types: args["x" if orient_v else "y"] = wide_cross_name - args["y" if orient_v else "x"] = "value" + args["y" if orient_v else "x"] = value_name if constructor != go.Histogram2d: args["color"] = args["color"] or var_name if constructor == go.Bar: - if _is_continuous(df_output, "value"): + if _is_continuous(df_output, value_name): args["x" if orient_v else "y"] = wide_cross_name - args["y" if orient_v else "x"] = "value" + args["y" if orient_v else "x"] = value_name args["color"] = args["color"] or var_name else: - args["x" if orient_v else "y"] = "value" - args["y" if orient_v else "x"] = "count" - df_output["count"] = 1 + args["x" if orient_v else "y"] = value_name + args["y" if orient_v else "x"] = count_name + df_output[count_name] = 1 args["color"] = args["color"] or var_name if constructor in [go.Violin, go.Box]: args["x" if orient_v else "y"] = wide_cross_name or var_name - args["y" if orient_v else "x"] = "value" + args["y" if orient_v else "x"] = value_name if constructor == go.Histogram: - args["x" if orient_v else "y"] = "value" + args["x" if orient_v else "y"] = value_name args["y" if orient_v else "x"] = wide_cross_name args["color"] = args["color"] or var_name diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index 7f2ab7606d2..89d3b4027ff 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -38,9 +38,7 @@ def test_with_index(): # We do not allow "x=index" with pytest.raises(ValueError) as err_msg: fig = px.scatter(tips, x="index", y="total_bill") - assert "To use the index, pass it in directly as `df.index`." in str( - err_msg.value - ) + assert "To use the index, pass it in directly as `df.index`." in str(err_msg.value) tips = px.data.tips() tips.index.name = "item" fig = px.scatter(tips, x=tips.index, y="total_bill") @@ -75,10 +73,10 @@ def test_several_dataframes(): # Name conflict with pytest.raises(NameError) as err_msg: fig = px.scatter(df, x="z", y=df2.money, size="y") - assert "A name conflict was encountered for argument y" in str(err_msg.value) + assert "A name conflict was encountered for argument y" in str(err_msg.value) with pytest.raises(NameError) as err_msg: fig = px.scatter(df, x="z", y=df2.money, size=df.y) - assert "A name conflict was encountered for argument y" in str(err_msg.value) + assert "A name conflict was encountered for argument y" in str(err_msg.value) # No conflict when the dataframe is not given, fields are used df = pd.DataFrame(dict(x=[0, 1], y=[3, 4])) @@ -157,41 +155,41 @@ def test_arrayattrable_numpy(): def test_wrong_column_name(): with pytest.raises(ValueError) as err_msg: px.scatter(px.data.tips(), x="bla", y="wrong") - assert "Value of 'x' is not the name of a column in 'data_frame'" in str( - err_msg.value - ) + assert "Value of 'x' is not the name of a column in 'data_frame'" in str( + err_msg.value + ) def test_missing_data_frame(): with pytest.raises(ValueError) as err_msg: px.scatter(x="arg1", y="arg2") - assert "String or int arguments are only possible" in str(err_msg.value) + assert "String or int arguments are only possible" in str(err_msg.value) def test_wrong_dimensions_of_array(): with pytest.raises(ValueError) as err_msg: px.scatter(x=[1, 2, 3], y=[2, 3, 4, 5]) - assert "All arguments should have the same length." in str(err_msg.value) + assert "All arguments should have the same length." in str(err_msg.value) def test_wrong_dimensions_mixed_case(): with pytest.raises(ValueError) as err_msg: df = pd.DataFrame(dict(time=[1, 2, 3], temperature=[20, 30, 25])) px.scatter(df, x="time", y="temperature", color=[1, 3, 9, 5]) - assert "All arguments should have the same length." in str(err_msg.value) + assert "All arguments should have the same length." in str(err_msg.value) def test_wrong_dimensions(): with pytest.raises(ValueError) as err_msg: px.scatter(px.data.tips(), x="tip", y=[1, 2, 3]) - assert "All arguments should have the same length." in str(err_msg.value) + assert "All arguments should have the same length." in str(err_msg.value) # the order matters with pytest.raises(ValueError) as err_msg: px.scatter(px.data.tips(), x=[1, 2, 3], y="tip") - assert "All arguments should have the same length." in str(err_msg.value) + assert "All arguments should have the same length." in str(err_msg.value) with pytest.raises(ValueError): px.scatter(px.data.tips(), x=px.data.iris().index, y="tip") - # assert "All arguments should have the same length." in str(err_msg.value) + assert "All arguments should have the same length." in str(err_msg.value) def test_multiindex_raise_error(): @@ -203,9 +201,7 @@ def test_multiindex_raise_error(): px.scatter(df, x="A", y="B") with pytest.raises(TypeError) as err_msg: px.scatter(df, x=df.index, y="B") - assert "pandas MultiIndex is not supported by plotly express" in str( - err_msg.value - ) + assert "pandas MultiIndex is not supported by plotly express" in str(err_msg.value) def test_build_df_from_lists(): diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 68666979758..544427c3284 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -635,14 +635,20 @@ def test_multi_index(): df.index = [["a", "a", "b", "b"], ["c", "d", "c", "d"]] with pytest.raises(TypeError) as err_msg: px.scatter(df) - assert "pandas MultiIndex is not supported by plotly express" in str( - err_msg.value - ) + assert "pandas MultiIndex is not supported by plotly express" in str(err_msg.value) df = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 2, 3, 4], [3, 4, 5, 6]]) df.columns = [["e", "e", "f", "f"], ["g", "h", "g", "h"]] with pytest.raises(TypeError) as err_msg: px.scatter(df) - assert "pandas MultiIndex is not supported by plotly express" in str( - err_msg.value - ) + assert "pandas MultiIndex is not supported by plotly express" in str(err_msg.value) + + +def test_special_name_collisions(): + df = pd.DataFrame( + dict(a=range(10), b=range(10), value=range(10), variable=range(10)) + ) + args_in = dict(data_frame=df, color="value", symbol="variable") + args_out = build_dataframe(args_in, go.Scatter) + df_out = args_out["data_frame"] + assert len(set(df_out.columns)) == len(df_out.columns) From e78cb508f3c979b6e13d46b6130551993a8f67cc Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 1 May 2020 09:10:36 -0400 Subject: [PATCH 43/69] lock down edge cases around name collisions --- .../python/plotly/plotly/express/_core.py | 93 +++++++++++-------- .../tests/test_core/test_px/test_px_input.py | 4 +- .../tests/test_core/test_px/test_px_wide.py | 57 +++++++++--- 3 files changed, 101 insertions(+), 53 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index e2069172d39..a96c53c2c9e 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -899,8 +899,8 @@ def _check_name_not_reserved(field_name, reserved_names): return field_name else: raise NameError( - "A name conflict was encountered for argument %s. " - "A column with name %s is already used." % (field_name, field_name) + "A name conflict was encountered for argument '%s'. " + "A column or index with name '%s' is ambiguous." % (field_name, field_name) ) @@ -929,6 +929,8 @@ def _get_reserved_col_names(args): in_df = arg is df[arg_name] if in_df: reserved_names.add(arg_name) + elif arg is df.index and arg.name is not None: + reserved_names.add(arg.name) return reserved_names @@ -970,8 +972,8 @@ def _isinstance_listlike(x): return True -def _escape_col_name(df_input, col_name): - while df_input is not None and col_name in df_input.columns: +def _escape_col_name(df_input, col_name, extra): + while df_input is not None and (col_name in df_input.columns or col_name in extra): col_name = "_" + col_name return col_name @@ -1040,6 +1042,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): length = len(df_output) if argument is None: continue + col_name = None # Case of multiindex if isinstance(argument, pd.MultiIndex): raise TypeError( @@ -1107,31 +1110,25 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): df_output[col_name] = df_input[argument].values # ----------------- argument is a column / array / list.... ------- else: - is_index = isinstance(argument, pd.Index) - # First pandas - # pandas series have a name but it's None - if ( - hasattr(argument, "name") and argument.name is not None - ) or is_index: - col_name = argument.name # pandas df - if col_name is None and is_index: - col_name = "index" - if not df_provided: - col_name = field - else: - if is_index: - keep_name = df_provided and argument is df_input.index + if df_provided and hasattr(argument, "name"): + if argument is df_input.index: + if argument.name is None or argument.name in df_input: + col_name = "index" else: - keep_name = ( - col_name in df_input and argument is df_input[col_name] - ) - col_name = ( - col_name - if keep_name - else _check_name_not_reserved(field, reserved_names) + col_name = argument.name + col_name = _escape_col_name( + df_input, col_name, [var_name, value_name] ) - else: # numpy array, list... + else: + if ( + argument.name is not None + and argument.name in df_input + and argument is df_input[argument.name] + ): + col_name = argument.name + if col_name is None: # numpy array, list... col_name = _check_name_not_reserved(field, reserved_names) + if length and len(argument) != length: raise ValueError( "All arguments should have the same length. " @@ -1145,6 +1142,12 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): df_output[str(col_name)] = np.array(argument) # Finally, update argument with column name now that column exists + assert col_name is not None, ( + "Data-frame processing failure, likely due to a internal bug. " + "Please report this to " + "https://github.com/plotly/plotly.py/issues/new and we will try to " + "replicate and fix it." + ) if field_name not in array_attrables: args[field_name] = str(col_name) elif isinstance(args[field_name], dict): @@ -1204,7 +1207,7 @@ def build_dataframe(args, constructor): wide_mode = False var_name = None # will likely be "variable" in wide_mode wide_cross_name = None # will likely be "index" in wide_mode - value_name = "value" + value_name = None # will likely be "value" in wide_mode hist2d_types = [go.Histogram2d, go.Histogram2dContour] if constructor in cartesians: if wide_x and wide_y: @@ -1220,7 +1223,9 @@ def build_dataframe(args, constructor): "at the moment." ) args["wide_variable"] = list(df_input.columns) - var_name = df_input.columns.name or "variable" + var_name = df_input.columns.name + if var_name in [None, "value", "index"] or var_name in df_input: + var_name = "variable" if constructor == go.Funnel: wide_orientation = args.get("orientation", None) or "h" else: @@ -1240,6 +1245,10 @@ def build_dataframe(args, constructor): if not no_x and not no_y: wide_cross_name = "__x__" if wide_y else "__y__" + if wide_mode: + value_name = _escape_col_name(df_input, "value", []) + var_name = _escape_col_name(df_input, var_name, []) + missing_bar_dim = None if constructor in [go.Scatter, go.Bar, go.Funnel] + hist2d_types: if not wide_mode and (no_x != no_y): @@ -1262,14 +1271,10 @@ def build_dataframe(args, constructor): "at the moment." ) args["wide_cross"] = df_input.index - wide_cross_name = df_input.index.name or "index" else: - wide_cross_name = _escape_col_name(df_input, "index") - args["wide_cross"] = Range(label=wide_cross_name) - - if wide_mode: - var_name = _escape_col_name(df_input, var_name) - value_name = _escape_col_name(df_input, value_name) + args["wide_cross"] = Range( + label=_escape_col_name(df_input, "index", [var_name, value_name]) + ) # now that things have been prepped, we do the systematic rewriting of `args` @@ -1281,7 +1286,7 @@ def build_dataframe(args, constructor): # the special-case and wide-mode handling by further rewriting args and/or mutating # df_output - count_name = _escape_col_name(df_output, "count") + count_name = _escape_col_name(df_output, "count", [var_name, value_name]) if not wide_mode and missing_bar_dim and constructor == go.Bar: # now that we've populated df_output, we check to see if the non-missing # dimension is categorical: if so, then setting the missing dimension to a @@ -1306,6 +1311,12 @@ def build_dataframe(args, constructor): # columns, keeping track of various names and manglings set up above wide_value_vars = [c for c in args["wide_variable"] if c not in wide_id_vars] del args["wide_variable"] + if wide_cross_name == "__x__": + wide_cross_name = args["x"] + elif wide_cross_name == "__y__": + wide_cross_name = args["y"] + else: + wide_cross_name = args["wide_cross"] del args["wide_cross"] df_output = df_output.melt( id_vars=wide_id_vars, @@ -1313,12 +1324,14 @@ def build_dataframe(args, constructor): var_name=var_name, value_name=value_name, ) + assert len(df_output.columns) == len(set(df_output.columns)), ( + "Wide-mode name-inference failure, likely due to a internal bug. " + "Please report this to " + "https://github.com/plotly/plotly.py/issues/new and we will try to " + "replicate and fix it." + ) df_output[var_name] = df_output[var_name].astype(str) orient_v = wide_orientation == "v" - if wide_cross_name == "__x__": - wide_cross_name = args["x"] - if wide_cross_name == "__y__": - wide_cross_name = args["y"] if constructor in [go.Scatter, go.Funnel] + hist2d_types: args["x" if orient_v else "y"] = wide_cross_name diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index 89d3b4027ff..e440fefbe89 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -73,10 +73,10 @@ def test_several_dataframes(): # Name conflict with pytest.raises(NameError) as err_msg: fig = px.scatter(df, x="z", y=df2.money, size="y") - assert "A name conflict was encountered for argument y" in str(err_msg.value) + assert "A name conflict was encountered for argument 'y'" in str(err_msg.value) with pytest.raises(NameError) as err_msg: fig = px.scatter(df, x="z", y=df2.money, size=df.y) - assert "A name conflict was encountered for argument y" in str(err_msg.value) + assert "A name conflict was encountered for argument 'y'" in str(err_msg.value) # No conflict when the dataframe is not given, fields are used df = pd.DataFrame(dict(x=[0, 1], y=[3, 4])) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 544427c3284..adb9f3c749a 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -618,16 +618,61 @@ def append_special_case(df_in, args_in, args_expect, df_expect): ), ) +# df has columns named after every special string +df = pd.DataFrame(dict(variable=[1, 2], index=[3, 4], value=[5, 6]), index=[7, 8]) +append_special_case( + df_in=df, + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="_index", y="_value", color="_variable", orientation="v",), + df_expect=pd.DataFrame( + dict( + _index=[7, 8, 7, 8, 7, 8], + _value=[1, 2, 3, 4, 5, 6], + _variable=["variable", "variable", "index", "index", "value", "value"], + ) + ), +) + +# df has columns with name collisions with indexes +df = pd.DataFrame(dict(a=[1, 2], b=[3, 4]), index=[7, 8]) +df.index.name = "a" +df.columns.name = "b" +append_special_case( + df_in=df, + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="value", color="variable", orientation="v",), + df_expect=pd.DataFrame( + dict(index=[7, 8, 7, 8], value=[1, 2, 3, 4], variable=["a", "a", "b", "b"],) + ), +) + +# everything is called value, OMG +df = pd.DataFrame(dict(value=[1, 2], b=[3, 4]), index=[7, 8]) +df.index.name = "value" +df.columns.name = "value" +append_special_case( + df_in=df, + args_in=dict(x=None, y=None, color=None), + args_expect=dict(x="index", y="_value", color="variable", orientation="v",), + df_expect=pd.DataFrame( + dict( + index=[7, 8, 7, 8], + _value=[1, 2, 3, 4], + variable=["value", "value", "b", "b"], + ) + ), +) + @pytest.mark.parametrize("df_in, args_in, args_expect, df_expect", special_cases) def test_wide_mode_internal_special_cases(df_in, args_in, args_expect, df_expect): args_in["data_frame"] = df_in args_out = build_dataframe(args_in, go.Scatter) df_out = args_out.pop("data_frame") + assert args_out == args_expect assert_frame_equal( df_out.sort_index(axis=1), df_expect.sort_index(axis=1), ) - assert args_out == args_expect def test_multi_index(): @@ -642,13 +687,3 @@ def test_multi_index(): with pytest.raises(TypeError) as err_msg: px.scatter(df) assert "pandas MultiIndex is not supported by plotly express" in str(err_msg.value) - - -def test_special_name_collisions(): - df = pd.DataFrame( - dict(a=range(10), b=range(10), value=range(10), variable=range(10)) - ) - args_in = dict(data_frame=df, color="value", symbol="variable") - args_out = build_dataframe(args_in, go.Scatter) - df_out = args_out["data_frame"] - assert len(set(df_out.columns)) == len(df_out.columns) From b1bcd0888227df95437a0ff1ba82da5d304dc1f0 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 14 May 2020 13:38:27 -0400 Subject: [PATCH 44/69] PR comments --- .../python/plotly/plotly/data/__init__.py | 4 ++-- .../python/plotly/plotly/express/_core.py | 4 ++++ .../package_data/datasets/experiment.csv.gz | Bin 3114 -> 3154 bytes 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/python/plotly/plotly/data/__init__.py b/packages/python/plotly/plotly/data/__init__.py index cf51e3b6595..b3ddffcd153 100644 --- a/packages/python/plotly/plotly/data/__init__.py +++ b/packages/python/plotly/plotly/data/__init__.py @@ -117,11 +117,11 @@ def timeseries(): def experiment(): """ Each row in this wide dataset represents the results of 100 simulated participants -on three hypothetical experiments, along with their gender and smoker status. +on three hypothetical experiments, along with their gender and control/treatment group. Returns: A `pandas.DataFrame` with 100 rows and the following columns: - `['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'smoker']`. + `['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'group']`. """ return _get_dataset("experiment") diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index a96c53c2c9e..02f8f720f55 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -936,6 +936,10 @@ def _get_reserved_col_names(args): def _is_col_list(df_input, arg): + """Returns True if arg looks like it's a list of columns or references to columns + in df_input, and False otherwise (in which case it's assumed to be a single column + or reference to a column). + """ if arg is None or isinstance(arg, str) or isinstance(arg, int): return False if isinstance(arg, pd.MultiIndex): diff --git a/packages/python/plotly/plotly/package_data/datasets/experiment.csv.gz b/packages/python/plotly/plotly/package_data/datasets/experiment.csv.gz index 7ee1e69de5facd99e20c04556d6638c107038b43..427e5409901d873ae4048ec2566fb28b3d10c0be 100644 GIT binary patch literal 3154 zcmV-Y46XAYiwFo^fxTV;17&z{WpZh4Wo~pXV{>)@ZJF(k9k&gJ|NB}7f<%cTc{N4Q zZi51e9T+aq%kT5*Y?}JaBmv?*KBJMy_lw$(|NQas{a?R-eEspaaee&tgZ=*x^ZNDU z>pwoeU%!6;_U}Kw)NUEOZ?`r2TDGp5ZH=fka$EPD`-<86oTF@y?0bY=zyI>v$MyGb zUw?f6_S=_?Hv5>9@4D4;yM|TUJW{@9ujIdL@78_0&s@o0t#$qLax!)=C54t5TdW9=LS%pbrLU-MJH{2(#_Co!#`JD;{9G69e>;ay zz8`&FwOAo++nk{S`xSh{*6M>@^y)p|t1|j%wSyH{E!>h-ymgiwNiSmT+SeF^6Yg{y zd(TaUZ&ex5BH8O$?6vo$hrG>V|9qOmcK55Y?Nu{5a?PMZ_qyn*HEhyYijzIn6K{jB z&)DV^SqI&XzoY#iOAVjzg#j@ZfFS$wHQ3BtZB>@aI;qx}SMIhz1IyaVp&h>i28>Zn zp66YCe&U5C=3X%>JnaHhRBm;*K8Lz^LrZT(>4*MX1&URKRl(+b_E>Y(m7{Tx#kTS+ zwI0d>E@Mt(yPdsI&jZyB{DCT6q8f3PwavG9&G50`;UXpqY_)+q+n=;3sbl&Oj&rST z*RIy7ni>hSIQj#PmO9Li**nLewlDmH{n)XA>XqISq`);;QoBFkWwaQ+_82r{aP*58 zHXsJ@plKCHP^qc}J5h1&S@*(lWGF+EBrnrkxT^*RL5!MoaL0Ch&IvTxjy`^ei}WWt zouwEe3D47{PJ=vVawKZYs?_W>30P3vcP5#&<*aUIwAu!gHeMXyzsCTF6q=QLLpy42 zEgZg|OJg0Hpi&D#TS6I67#t28l*qLk_0surr(pTSq@K+SwiNjF5ktpX&D~KiEo3|6 ziY4r!BHe%PAL9b5Bk`fz@xQh%pvz}HoWSM&& zNkM}_fW#Q?98SvvIJ>(v?9L?w(r9%7Vu|9qUPPa0!W!(LgXt}e3BVxHMET;WGFwOk ztGqoTMQwFzZXE#-A^ver1y`sG8i30B8=JDBvh*4i-$kKWqqmKNNu~-rSfR~W;3VvN zi@G=YEqYDS**bv5yXqw}Y{O?9NU};Uv56zoxw$C&9WoMrY6H_ci^T!(fWE4VZSb+K zoa_Y_6>5hs`2&}#A#FWH2xfK|ujVmMY8E?w|q-d)Yge4EI>k6Ws(Ay^dVfddlM0;N-56w*jww+eSJ-}c+ke#<-VMDrRa5;!O0SfxMf<=+afwJP-bvGm>7$Vnb_jW`Pu6V|}=Y!Yr!yUYEe^?N}8@a+WZ*CTGFz2M0t)Qpto3@DVxh zSy||!UIN$Adz=KpXM2Vn170aEz`HOy_5xtooD8tu)k2&8Z{mCPf&AYEOW%Dh1miqQ! z(X&lC7w2EZ?Deu%se!pB1N^|xN2pVmxU-x75bG2wa0^@Xx#+?=IzUZu4Ooqzv9zWM zDn&v8Kv;{t73P2royWNlTD~xkNfV7R6wM`!IH5@(k0JwL$nB!%PjHs9?d_gSiYoC-yJUC*3$|=ln;5q!3U67ZhH{iV#2jM*}0)<;?NU8q_ zs3xY8gtC@2E8Eqlxp)kZEY<|7sN*-fJDVC_g?iu>8Nc`xG?WTS|%+q5lfg&!Q{kf{F~Wm(hAPDb8ik@LE#Pd7a2p$Nr~>Hl`Vx`+ zro2OC&`^z1DK*KxHN&1HxZ(Oscc4XbAPu+a@0Cdn0`vl9hfo3PG?SpWTlW);MJTw` zE=5M13sLoXbEG400H47{x;)Mm0ay%LraXyb6E&3CD9{Nai1CB;DrvhW7eR(e?<7?U z!(J%K&@GH|1I_@*(RkQ`^1KmA`i-(+SV$SuS;^XQ5+}YaW-2g!;ES@;!_qAQExnIO zLMFweJY+irTct{bffuK1X4PE~4AGRj&P4H$>JI57F}zuHB=RVLW%(7p72VXB#Kh;UzH9r0klfrCjseA#g(V!taA4>cPI@BLS*j^BubFULt_+ zdfh2?875w>Sto8(BAPQ#r5s=)dG?A@K%FJ%J{vrjObi4X^$-z0d>D_Gq>UL|u7573 z^k$@v!5Qn&mDP9+24u4$z#1|Aai&IwutY_3A`1RO^~I*;CE)7>SJM$-UYzcMs- z=wfMOaA5Bt^&)nsCnymr%ub>SfVvbWg~c)O#le^vx-LQHLyA$>$MIQ#7)eambs_k9 zT~3`mEyU3FAwAoWE*^>k$Pgu6q7uW9qxj5sXpMxIK_gS^D7R_+TEfwLS*o3e@?xMj zbgeY;B=r|bA~>gRxTxLIm7l@^1bY~%iQDJy2W0BCFOdO4y0VL75P!x1T1K_WHY|0o zjAj!Ct04-KRMtr{OLyf6FuD&zL@pc`Y?>~q`s4iAHMQZODlxN89;?N{q_pc?qvdtj zPXK`jRNCiFf6r*0L%%f89Ch-Ytd}v3^uXxHF$ow1JLm=5b&Y;64dsk;zeSx>RxB;u zx_{A-t}K)CB`6I!L5^?T$~{o&SOKl1yU5Hz9-=;77rzV&>K(}Hy5WaJ>uBLqZO|g^ z9k;cv(d|5@MPc{UiVs@X5JccrMLW?PSNAx;Rhi0=P5lkQFq~ zGP~LNZ3kgMgV%!q!xT1hzQLLD^_vO)C}Bbj@lP#dvbU+2p-bSmvWVDpb*Fq@cTbEo zc*xWh5CG6H9;B2`MX*p>r}_4QOhlBWn+5oDa-!~UOjjL7!C(j7Hv*i7YK3{X|5@Zu shQce4D*?81`3LT37Ks&koBG`n2}F)x8V6VT!?oRi0i=clNR}G_03%rRp8x;= literal 3114 zcmV+_4At`=iwFoF|CwF@17&z{WpZh4Wo~pXV{>)@WtrQK9JdjM@AWJVf-I7Cc$y$U zHbIa`4&+=UZ=dhythL<}TY$B*Jx#LeKNbD)pMQP){QLKhAOHT-xIVsqu=@}5`rF5k zzkK|>e*ga4KR$kbD{mRzecKv+En8R3wno$%>DG5nzhbsN=P2`$-ACB<{m=jWxc>X` z`?riX`kSpnYs_KY*P7=08r|E- zsKHBPMf&0+Hf0uFOH^j$YYRVq`!>3@iX7W!*yJSV1UZ)Hh}kz=xwf~eozX|u96kMy z4e)(Vz=Bm4#(`G^i$&|BD))+V^B7lH8>?H{7;|@< zsFCwt0#INQVlR2#vF-%`T`{Mzxz6g8 z>;TRIFo2^2oI9?v<~+sw#~yx%fH)-Zlml2cJLy1Dt=vO^%(dJuuhyxL`UEdH=o<-^ zn#UH|Ec>Q{FAxLZ@Ni&vNQ<2jqPtmunx$Pk%ihU zL54>S+Jy~@v9$%YYV&ZXH2L5`&$bJ!7bx5#h7PgX-Wz$*8_YAVSOOA^?*@(l$4v~; zxejG(1P5?25pkl6IU(XgIBXnwtxiXhLK@|MiUGK>o>r86+1`1mk1En!$s@==Oq3dP z*uAAQHzuH5Mi&b-O0CxQ;^u_HYp`qwoGvX0`Ybg=mf}$Z7i>Tr>qVrfs4h&`h5-fw z9On$6oeIENv~SdRlInE&|DO7odc3^>$vsD-A9)ei3k+QN^z=vnl zOAJ_k&o~g{4Y|Yx&PZ?OqPRC0NPnpU1nImb4iX29)A`Nm`MPql0XR@d5uUK`OsIzF zEHPc;YjnUMP99#-UECv80M~v1GjH5wcP1F>y$;535IyXTv-2|#v!>*s7h=dVB7B14 zCkB=<01UAcmRly~Zb=j3LMI&q=KKL&IU-&-@8_@Hl2$`QuodCQkm)4n9tAbFK9Zew z5FLB1%}@99L*&9|vS98ul`n)?UP>_*sJjAt8Pa6>3^Q(Lr`00wLc$?D@_UNM0szJF z4O;fHr;QIe1g7`y^*H{phHYXrY>=Xd$^~L+v}nkYkc2Bck-{)2&V{Y-uj2#vl*L7t z&~;a@N@1b{6UzB%E(W2cq9`GaeK25?Ur-yQcqWho02mB}<4iCX2)8=odtXpu2rt!3zU|@(~H?O#$eRUJEbLT$CKqWkZJP z0YPaXD7tVLRtE%ZN3PTTS%5tUwE?OKmfQ#xqWY&E7$on%9Gvn3C;>^>Sv)jgwbzDS zI|K`yVz8~@p%lhxa4*WrDP)(@n!JMm#wFh6WK-)zCce%F6o3%!lzW&Gf#OxEfspY| zfzJMSr`<^*@(_ml)36X7q4miVbf1GjDB!i%bvJW`9OM&pWqs03z84D_z|ZI&EU*&#R_HMqimWKO4`OtMEP3*cti1gZ22CfwPSk& z$dTygDcGfKU|0qHgDP<4%?a@~Nfadlym4TJ>#^RXVH{*gi-_%P9V2j|;;jV43ygHV z>_3lnLhL1cvt0()D-tg_W_t|i`!Ws%23gd9)bGR(=P~2tRD`m#G#oiJejXzH} zU;qWLOcAnQngH|V!q^GJiX+7vu!A71v_ymb(88euYd}Oz6g}jT8qH3Aa_ITe2Y{Vy zUo%_odks>tm};Z3&>=vPk~s?Kny48E&{L^Vibl8wYPaXO@c``sz2tUD2 z!zfZRPn4X=9Nq+lwSz-+mKK%fTRQd}4TLgEMj=Q=$ft^u@R$X2YZ}uX1u)kPwobOb z=b^(DNde1=MPvshS!n?tm!cK5Q5u71*uet;C@*S36kxz&4zzIs5n2v`Od_kNwQZY0k3?W z#p?u$Oatfn@IDqS=?;UWO!ZumXgPF15#L( zXYUt^r2N!Xi6CD@0>-*10SA)hCl8ohm3Ghrd~lqa@#t2F8oFl5v#ZVF31w5a4)I8a z=&2I42I}5VYS)-e$bbq2GxQu+(jd!jD5a{1%{;CN6qfL2k}H3CF`uf^`v~1gh}y-! zoL$!Ik5Hf+jgpL)wqP$mq1!Z6%GKkLo8<|#Shdo#=Ea96D(om)QAK3`lsq^mUjR@H z=XFZJPU&dZ9If#5ECCfu4e0s@E&**NLt+{!L@rgwr8K_+H&y}Yw-Tg+9J3Jhf@Snf z_Y4pTr_dZjn_gjqWRsfwMe!UDK~$V(dXyZii_>*20>0(o^Ni@FWOqj1L<{0R9SY=< zQ``VSJ7SbgH)BKuWn;Q|Whj8_n!_;`u=JJ;5u;Oe)B;^(2hjv5T`Uq~;@Bk9EjZ-s zv|!#-fs!wV%2!EUWLMowfr-~O$qAc60UaF@9Oht(dcQ41dQ(%sCRy|%K9doeASono zXX*t?J(@R`uyHRywbLk814=`e91}lK-;fQ0W$K=VDlJ_x$$p`SzkA|3cM_mX%e}+^ zSm>H4j#U3)v^0g9k`;K(tpOZsy{>Y7jR zDpir3pU2d&4QbyxS2%ecwqoianPTqwP{cDP=FnxbW`~oEWOB?T7&8M7hr+vwM34i5 z>eBUD1d0~tmW67j3{N8Gx*yQ6scew)1c(T&K*M%j2;GTr6qzp3IOO2K2z{Mf7l_1W zwFb0wT>?Xab%g3s1;j!-#n-HBNIB2zZLo1_)L>U3QSNe{()8)`2?aPxBOy6M_*ThC z`vk*tRWmdvD)HhOWKd0TeZJ2L=oVil!m*(86|;VaGY~S!wPfdOo8X0I^U0Zv--_TW zk4#Tv1_vlzBr?((Hmos?LB-D1H(skr<1UHdoTbmNI6=Pdsrjpv1DV3D^nH`3Oxd?C z6K@3&7wML#oK&|U#CbG)>Ry7zYN!mHpK+HtUY*g{_C^IPe`y{D{U=@NZoxE|8>Io_ zG}cH!8W5FmxBarEm!`AMaRiWaZkhlQy&&A7JyZWL0b`e;mgb&S`+imQKQ2mz0C5=r E01kfBRR910 From 3a87007cc491efe9343fb895f4f334c0bce2dac8 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 14 May 2020 22:21:23 -0400 Subject: [PATCH 45/69] wide-mode docstrings --- .../plotly/plotly/express/_chart_types.py | 54 +++++++++++++++---- packages/python/plotly/plotly/express/_doc.py | 13 +++-- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index 446246b8f8d..1227979e469 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -2,6 +2,12 @@ from ._doc import make_docstring import plotly.graph_objs as go +_wide_mode_xy_append = [ + "Either `x` or `y` can optionally be a list of column references or array_likes, ", + "in which case the data will be treated as if it were 'wide' rather than 'long'.", +] +_cartesian_append_dict = dict(x=_wide_mode_xy_append, y=_wide_mode_xy_append) + def scatter( data_frame=None, @@ -56,7 +62,7 @@ def scatter( return make_figure(args=locals(), constructor=go.Scatter) -scatter.__doc__ = make_docstring(scatter) +scatter.__doc__ = make_docstring(scatter, append_dict=_cartesian_append_dict) def density_contour( @@ -114,7 +120,16 @@ def density_contour( ) -density_contour.__doc__ = make_docstring(density_contour) +density_contour.__doc__ = make_docstring( + density_contour, + append_dict=dict( + x=_wide_mode_xy_append, + y=_wide_mode_xy_append, + z=[ + "For `density_heatmap` and `density_contour` these values are used as the inputs to `histfunc`.", + ], + ), +) def density_heatmap( @@ -170,7 +185,16 @@ def density_heatmap( ) -density_heatmap.__doc__ = make_docstring(density_heatmap) +density_heatmap.__doc__ = make_docstring( + density_heatmap, + append_dict=dict( + x=_wide_mode_xy_append, + y=_wide_mode_xy_append, + z=[ + "For `density_heatmap` and `density_contour` these values are used as the inputs to `histfunc`.", + ], + ), +) def line( @@ -218,7 +242,7 @@ def line( return make_figure(args=locals(), constructor=go.Scatter) -line.__doc__ = make_docstring(line) +line.__doc__ = make_docstring(line, append_dict=_cartesian_append_dict) def area( @@ -264,7 +288,7 @@ def area( ) -area.__doc__ = make_docstring(area) +area.__doc__ = make_docstring(area, append_dict=_cartesian_append_dict) def bar( @@ -316,7 +340,7 @@ def bar( ) -bar.__doc__ = make_docstring(bar) +bar.__doc__ = make_docstring(bar, append_dict=_cartesian_append_dict) def histogram( @@ -369,7 +393,15 @@ def histogram( ) -histogram.__doc__ = make_docstring(histogram) +histogram.__doc__ = make_docstring( + histogram, + append_dict=dict( + x=["For horizontal histograms, these values are used as inputs to `histfunc`."] + + _wide_mode_xy_append, + y=["For vertical histograms, these values are used as inputs to `histfunc`."] + + _wide_mode_xy_append, + ), +) def violin( @@ -416,7 +448,7 @@ def violin( ) -violin.__doc__ = make_docstring(violin) +violin.__doc__ = make_docstring(violin, append_dict=_cartesian_append_dict) def box( @@ -466,7 +498,7 @@ def box( ) -box.__doc__ = make_docstring(box) +box.__doc__ = make_docstring(box, append_dict=_cartesian_append_dict) def strip( @@ -517,7 +549,7 @@ def strip( ) -strip.__doc__ = make_docstring(strip) +strip.__doc__ = make_docstring(strip, append_dict=_cartesian_append_dict) def scatter_3d( @@ -1389,7 +1421,7 @@ def funnel( return make_figure(args=locals(), constructor=go.Funnel) -funnel.__doc__ = make_docstring(funnel) +funnel.__doc__ = make_docstring(funnel, append_dict=_cartesian_append_dict) def funnel_area( diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index 05a5b214cad..c1df8127de2 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -25,19 +25,16 @@ colref_type, colref_desc, "Values from this column or array_like are used to position marks along the x axis in cartesian coordinates.", - "For horizontal histograms, these values are used as inputs to `histfunc`.", ], y=[ colref_type, colref_desc, "Values from this column or array_like are used to position marks along the y axis in cartesian coordinates.", - "For vertical histograms, these values are used as inputs to `histfunc`.", ], z=[ colref_type, colref_desc, "Values from this column or array_like are used to position marks along the z axis in cartesian coordinates.", - "For `density_heatmap` and `density_contour` these values are used as the inputs to `histfunc`.", ], a=[ colref_type, @@ -173,7 +170,7 @@ colref_desc, "Values from this column or array_like are used to assign mark sizes.", ], - radius=["int (default is 30)", "Sets the radius of influence of each point.",], + radius=["int (default is 30)", "Sets the radius of influence of each point."], hover_name=[ colref_type, colref_desc, @@ -518,14 +515,16 @@ ) -def make_docstring(fn, override_dict={}): +def make_docstring(fn, override_dict={}, append_dict={}): tw = TextWrapper(width=75, initial_indent=" ", subsequent_indent=" ") result = (fn.__doc__ or "") + "\nParameters\n----------\n" for param in getfullargspec(fn)[0]: if override_dict.get(param): - param_doc = override_dict[param] + param_doc = override_dict[param].copy() else: - param_doc = docs[param] + param_doc = docs[param].copy() + if append_dict.get(param): + param_doc += append_dict[param] param_desc_list = param_doc[1:] param_desc = ( tw.fill(" ".join(param_desc_list or "")) From 3129e639e967f2e9e20fab474374b2c69080b6a2 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 14 May 2020 22:27:13 -0400 Subject: [PATCH 46/69] fix ordering in tests in py < 3.6 --- .../plotly/plotly/tests/test_core/test_px/test_px_wide.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index adb9f3c749a..c4d386d4bdd 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -619,7 +619,7 @@ def append_special_case(df_in, args_in, args_expect, df_expect): ) # df has columns named after every special string -df = pd.DataFrame(dict(variable=[1, 2], index=[3, 4], value=[5, 6]), index=[7, 8]) +df = pd.DataFrame(dict(index=[1, 2], value=[3, 4], variable=[5, 6]), index=[7, 8]) append_special_case( df_in=df, args_in=dict(x=None, y=None, color=None), @@ -628,7 +628,7 @@ def append_special_case(df_in, args_in, args_expect, df_expect): dict( _index=[7, 8, 7, 8, 7, 8], _value=[1, 2, 3, 4, 5, 6], - _variable=["variable", "variable", "index", "index", "value", "value"], + _variable=["index", "index", "value", "value", "variable", "variable"], ) ), ) @@ -647,7 +647,7 @@ def append_special_case(df_in, args_in, args_expect, df_expect): ) # everything is called value, OMG -df = pd.DataFrame(dict(value=[1, 2], b=[3, 4]), index=[7, 8]) +df = pd.DataFrame(dict(b=[1, 2], value=[3, 4]), index=[7, 8]) df.index.name = "value" df.columns.name = "value" append_special_case( @@ -658,7 +658,7 @@ def append_special_case(df_in, args_in, args_expect, df_expect): dict( index=[7, 8, 7, 8], _value=[1, 2, 3, 4], - variable=["value", "value", "b", "b"], + variable=["b", "b", "value", "value",], ) ), ) From bd551c00d7c32a16c1de4d97e6354c984746f4c9 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 14 May 2020 22:34:06 -0400 Subject: [PATCH 47/69] fix tests py2.7 --- packages/python/plotly/plotly/express/_doc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index c1df8127de2..6d4399ad3c1 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -520,9 +520,9 @@ def make_docstring(fn, override_dict={}, append_dict={}): result = (fn.__doc__ or "") + "\nParameters\n----------\n" for param in getfullargspec(fn)[0]: if override_dict.get(param): - param_doc = override_dict[param].copy() + param_doc = list(override_dict[param]) else: - param_doc = docs[param].copy() + param_doc = list(docs[param]) if append_dict.get(param): param_doc += append_dict[param] param_desc_list = param_doc[1:] From 18276cfbf03ba8724f03da30ef67307b1b4164d3 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 14 May 2020 22:46:22 -0400 Subject: [PATCH 48/69] expand docstrings a bit --- packages/python/plotly/plotly/express/_chart_types.py | 9 +++++++-- packages/python/plotly/plotly/express/_doc.py | 11 +++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index 1227979e469..2d41c40590c 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -128,6 +128,7 @@ def density_contour( z=[ "For `density_heatmap` and `density_contour` these values are used as the inputs to `histfunc`.", ], + histfunc=["The arguments to this function are the values of `z`."], ), ) @@ -193,6 +194,7 @@ def density_heatmap( z=[ "For `density_heatmap` and `density_contour` these values are used as the inputs to `histfunc`.", ], + histfunc=["The arguments to this function are the values of `z`.",], ), ) @@ -396,10 +398,13 @@ def histogram( histogram.__doc__ = make_docstring( histogram, append_dict=dict( - x=["For horizontal histograms, these values are used as inputs to `histfunc`."] + x=["If `orientation` is `'h'`, these values are used as inputs to `histfunc`."] + _wide_mode_xy_append, - y=["For vertical histograms, these values are used as inputs to `histfunc`."] + y=["If `orientation` is `'v'`, these values are used as inputs to `histfunc`."] + _wide_mode_xy_append, + histfunc=[ + "The arguments to this function are the values of `y`(`x`) if `orientation` is `'v'`(`'h'`).", + ], ), ) diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index 6d4399ad3c1..d240a5c560b 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -383,12 +383,9 @@ "Sets start angle for the angular axis, with 0 being due east and 90 being due north.", ], histfunc=[ - "str (default `'count'`)", + "str (default `'count'` if no arguments are provided, else `'sum'`)", "One of `'count'`, `'sum'`, `'avg'`, `'min'`, or `'max'`." "Function used to aggregate values for summarization (note: can be normalized with `histnorm`).", - "The arguments to this function for `histogram` are the values of `y` if `orientation` is `'v'`,", - "otherwise the arguements are the values of `x`.", - "The arguments to this function for `density_heatmap` and `density_contour` are the values of `z`.", ], histnorm=[ "str (default `None`)", @@ -440,8 +437,10 @@ ], zoom=["int (default `8`)", "Between 0 and 20.", "Sets map zoom level."], orientation=[ - "str (default `'v'`)", - "One of `'h'` for horizontal or `'v'` for vertical)", + "str, one of `'h'` for horizontal or `'v'` for vertical. ", + "(default `'v'` if `x` and `y` are provided and both continous or both categorical, ", + "otherwise `'v'`(`'h'`) if `x`(`y`) is categorical and `y`(`x`) is continuous, ", + "otherwise `'v'`(`'h'`) if only `x`(`y`) is provided) ", ], line_close=[ "boolean (default `False`)", From b788721ef34ae7dbb878672f42ce7db7376d504f Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 15 May 2020 08:57:42 -0400 Subject: [PATCH 49/69] wide datasets --- .../python/plotly/plotly/data/__init__.py | 64 +++++++++++++++--- .../package_data/datasets/short_track.csv.gz | Bin 0 -> 110 bytes .../package_data/datasets/timeseries.csv.gz | Bin 6165 -> 6178 bytes 3 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 packages/python/plotly/plotly/package_data/datasets/short_track.csv.gz diff --git a/packages/python/plotly/plotly/data/__init__.py b/packages/python/plotly/plotly/data/__init__.py index b3ddffcd153..43dcf79f3ba 100644 --- a/packages/python/plotly/plotly/data/__init__.py +++ b/packages/python/plotly/plotly/data/__init__.py @@ -93,7 +93,7 @@ def election_geojson(): def carshare(): """ Each row represents the availability of car-sharing services near the centroid of a zone -in Montreal. +in Montreal over a month-long period. Returns: A `pandas.DataFrame` with 249 rows and the following columns: @@ -102,31 +102,78 @@ def carshare(): return _get_dataset("carshare") -def timeseries(): +def timeseries(indexed=False): """ Each row in this wide dataset represents values from 6 random walk time-series. The index contains dates. Returns: A `pandas.DataFrame` with 100 rows and the following columns: - `['MOL.AA', 'JJK.OB', 'LFF.KP', 'UJS.PX', 'BTK.HH', 'SHX.QQ']`. + `['day', 'MOL.AA', 'JJK.OB', 'LFF.KP', 'UJS.PX', 'BTK.HH', 'SHX.QQ']`. + If `indexed` is True, the 'day' column is used as the index and the column index + is named 'ticker' """ - return _get_dataset("timeseries", index_col=0) + df = _get_dataset("timeseries") + if indexed: + df = df.set_index("day") + df.columns.name = "ticker" + return df -def experiment(): +def experiment(indexed=False): """ Each row in this wide dataset represents the results of 100 simulated participants on three hypothetical experiments, along with their gender and control/treatment group. + Returns: A `pandas.DataFrame` with 100 rows and the following columns: `['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'group']`. + If `indexed` is True, the data frame index is named "participant" +""" + df = _get_dataset("experiment") + if indexed: + df.index.name = "participant" + return df + + +def short_track_wide(indexed=False): + """ +This dataset represents the medal table for Olympic Short Track Speed Skating for the +top three nations as of 2020. + +Returns: + A `pandas.DataFrame` with 3 rows and the following columns: + `['nation', 'gold', 'silver', 'bronze']`. + If `indexed` is True, the 'nation' column is used as the index and the column index + is named 'medal' """ - return _get_dataset("experiment") + df = _get_dataset("short_track") + if indexed: + df = df.set_index("nation") + df.index.name = "medal" + return df + + +def short_track_long(indexed=False): + """ +This dataset represents the medal table for Olympic Short Track Speed Skating for the +top three nations as of 2020. + +Returns: + A `pandas.DataFrame` with 9 rows and the following columns: + `['nation', 'medal', 'count']`. + If `indexed` is True, the 'nation' column is used as the index. +""" + df = _get_dataset("short_track").melt( + id_vars=["nation"], value_name="count", var_name="medal" + ) + if indexed: + df = df.set_index("nation") + return df -def _get_dataset(d, index_col=None): +def _get_dataset(d): import pandas import os @@ -136,6 +183,5 @@ def _get_dataset(d, index_col=None): "package_data", "datasets", d + ".csv.gz", - ), - index_col=index_col, + ) ) diff --git a/packages/python/plotly/plotly/package_data/datasets/short_track.csv.gz b/packages/python/plotly/plotly/package_data/datasets/short_track.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..ae2e1dd90f8650ec3a8e4fd09811d7f1acd2d36f GIT binary patch literal 110 zcmV-!0FnP6iwFp5k-lC419NC^a&%vGa$#d@E@N|c0Lx1($;{8wNzczo(J9W%DN8NV zNh-?Et4iex&Mz&=Q1H$#N=?);GSM+K)-g2Xa?Z%iOVlwm&@nXCvEXt}%u7s3)Unht QGy))@O`6TlY{d;l=R8Y)t@?M>Fd0M$ zhyamDOe5tb^7dKFb-AYlA)#;gag~4e-nP$w|MS1sKY#t@{kOlpe){R>_pg6{{qm20 zynp`f_3xklc>nh2>+k>i`TgUMuRnhL^ZonpKeSTkSE>B{D(_QfDJOQEc1G0o`YP`_ zdMTsqR^qHP_uzYD?7C|ytB(<*jLBzfma|VkbF5Ko=ePCUdg<$oJ=&gamwaBwM2phP z-lLB*pA)ovGB#!l{wPZ6o@ut(d~TG8-Ri5oci&@1AN$0deJ;OsdM}viV`007*5@iC zb~!ahn`O4wS9`CrIL+kK(od_8@&2ms+INXMOPd^`96p()_Oe#vqaL+xJ~}1l-rRqk zJc(3XO>sll9(qn#btrBIw)$gy zzWSTf9|E~1KH$dK?yYdf*(>RQdl$CH-od@rCaSab#z(l_OO1B+o}ALVx9y3%H!(aU z!|Q9l4dGs6;k4Gd)ni^`8^1)H(Pt2Q?>R}PLO@U98KvkAa?SsEh1!S7K_Jel{4oi( z2e7@Soud9&Tk98lsPg~3E=3#qkz2m)&L-*liR75%+$C0#n zqt=me$M~6baPc}y=2z>1+i~NlW$d)vg5VGCFm^FzAJh9acQlVLZYgg+u!$ZmI3?pNvQz;Z-&8o)5d_ws}(LJP_r9{Mu zR2-~6rmK(Opp?*R6fmjc#XV;eP@?Ze0`@o2I;*r!v02x|in#7Z`4Xs2T)1H^SjOb! z;#0hw;3ZkogD+#T+EzW*I?lI-a2r;r*h10~N6vQmq>zSW2o~_g z!U@815lVz=s;WlK3g-@!f0>l{q|6@Mou^RXMq*Gb^WZn|E|C$Zb+1943?PsXDLT>! zeAidRhu`6?iT!p#X$RgH`w!^9w2Y%FKkCC`9Otn-ytUv+rE~(lR#q2Y;{UF^*z`L% z1W*og!6My$1P!#Xiivig0^OEJAOoO}Tdce@WiSg#L!>$Lhrf+jqaZ+Evtog3Q-704 z1Ny)T0lX~JA-o(&R$v|6#|eSLNoqEMfs?hYcFs{qDe?nSdwm5A13FM%c$_Hi$F>=@ z_^28XN+KvGIWmN!DmETaFoaYK8vqHOi%CAgCh-6UVONqb&%AqIrYpy6#y{TR5{6-t z=wS9!*wvCHACme=ST3FUqHk8BVrJIKVwcr{d5{jE9v-JkzGM~dIxyaNWo65&Kq-_= zY3p=sCtgw-JnX~zmP#skLwX-10}zo)0A8L&+M||zGS4M-i9V>d_%hj%A7R+=@J--K zj9s!&m}tv?%xj&m0W0uu`@mCbh4S0@30Xmr;=a@Y{0h1#hmX6w&&|&Qu3+k>C#IE2 zty;3vjDtP-4s}pT+X{MuFPB33w$z8Jp>XEI->qPpR3KrZ!F|9>npWdv`M?E|hp~aH zfq04>Rj@?>Plu_x#>DNq#?x_7P@8HF!wvHhtqp-9p5ozd^w6aEwj52k%ZD;2d@u4*}(<5!^= z68X4!$GA^G-vRsLX-gs^K%fONga&bGDw(7L>I6p@k&kSGQb{wELf}2IWht{Z*+)jM zc5QPtNEa{#+18V3ZM-(XL%nvQiqtD7g%8RTG_#(nn69WhstCSNKu~fkyd@FfWav-| zMUExQ34&gnWV>{1$jxtgg|x;0AGtO$KC4euZCFDi#Wnz?6f2_hN$Q>o_rAbJ_!QNH zr6NB-#}sMf(`6*^gET@KY?yXtjgfHNq}T{_A@ec6UZM{v>W_NLOBH9=cfc|b$*0{4 z!a>jrK__iV=WjR!1mbZ@iXtGNw9jEU9pFpo$v^Ew`du+zt@QB`_IR0)0Hp9FMdo86 ziK&JnYcbywe=F6nQ~$Q;wl}!lG$lVOkWJ2{6zIx10GZxrso3Ie4)Pla9N+H`&u^{{ z=!(Kk&Cd~D%3UlkK2h|P@c;g58F{1ATyahge0N3n}rQi%iG=r=8#P- zI`aTdC%r5TaLwx1eIhKZyT?ViV`m`ms8!ZC)6K3Jg)xPvk|5r9HnE5uqE79S%2tSa zv2I9__6caAI8tWv2SI8PFBJL8^2-i!3H}&Yz3FxwRF!umJYTMld6@}CdZ=DF{YY1- zzZLzO!HF<z zT&TziNy=3&K%$Y@Qa1;xGdp7N8%E^`q+m}nzvtY6q`~W?1B{-Lh-R;o0;-|^Muwat(b?OZ z^ef&#Xt*5=F?rU<#ia~rK*`O$)Z)oaw%kBex9hfi$$}y);GHSn_9WS52ZL4d6!ZR) z^bwcH-huay`L_54AP5%m@&2$aa47|f3J31*>0#mnou!0@S=Lol)vQ7`hG+93IoS+@t?C2kDox~0wyBns zQxK|)h$^<5Y8U7#NhNo9L?WQ+RXjE){

L*pOm$+EK=1--cM1Qmz0Z7Yk$2)Njkd zl#G~dw)jcKY}1=OR$=foCnqzHIjuG52Ua2K0juTosS3kbAGR)zOdiwrSu}%+hp?&% zOs2LolVNuWvh$I)i3E8Irq~dVtP*)M5*bkfJtYT-LT)Tn(H#kGtJdN{&R}Eh@3!fN z2mL%(L@XwRSVFjt=C@ACtJMrS89*+g0)-9{7`aQ9P#=N{(kYuQbO&kQok?Il4ER(i z$ZkCm_OUFybrcE#?%+ZQ;oMqIIzl2ha@4`C5nx;p^?+sCXKlyHX%_iU%({>x#`EaH z4=I*t+U5q}QnhnH96#7bKfK=F4`0^}z?D;T`Z1)1DI?G9vIT;(^+T~^*a}|JyWJCU zL|bI8iXr5&v%U28k%Sajb}o-uTVc_Zq(Gu=t{NsU9thpbDfwok*@={L_LoDA(5`+_ zt0kB0oJQQ91%4Qbz-by@?M_Wp3GJyb+Nr4uC;jPg9kk6HD{LV~rDS)MTF#yoH{eZi zxxeoCOiV&M0F)_faWdWVa%Pr;KrC0tz+RT3i-*74ZHBqA+HfmZGk~HHL2RlCQ0v?9 ztQ&RPx+$UXH9Rs#wy_{aT{$@5D2h`;RB8?Yj>HV$32E&EO{-G{%Hs$(PzEF*R9VT@ zLLzzH!Oa;hJF#ou;uz&&WR94Fn)eQAmu;z5RpR%}lPY05Rfm6K@>MjKxo_u!?S!ybIHoJP)59A7@B z?ewNcbb=`{IV5)~g?sOe9BGk(P6(Yw8BSARQo1kzas)IXDRSKA#D;!}?x6T3M?KRn zedOG*e@KRE9fnOT!2|RPwWlxDA4CsNCROWj{EA3Kee!TRz|eLz#Ytp`Y&n3V@C1MP z7co6^lw0!I}p!1*}I;hQ{~ zaFwL{V&QEWM76v3&9v+iK$#-1oDb`%xrk)3bQp-O%hjkF&Uwt+K=E9?7mQj2t#On=7{!#5>Sj*de1vA1H6% zdV5HXD+3kunL-Oz$bzn5rqs;|X%B31v^Z=Ny5%js;xCv)MKkQJ@HDAv*lkwl+y2o~ zq;S*~kLBArs%%=xY2Ga|{Y>r)|4yE5|Lu{NfC>iUKy5n&8|jytD8Olmons~EvUZDK zVlz-9vxK7d5erR?X8T1$rzQ`f95}#7nLpLNSWYmV?!t0n71T1*-=qV8P(l)k+)gYP zEYTgIx_1A{QMAqDP!l6P*VUXHLcL>Y=e_Z`T!>&R z)|u1C3jff~f_tHS%o!C&1c4DIAfyWB8;?Y;h_<8QvmV=aVWvgctP>JAD&;l^JR~36 zs%$(!h30|^$S{)Fo;I$wTyz*Ngc9_tBaxvbrZ1}_C#v0fhXcM`e#B$E=WraX#vin) zNJ^7j>Qo~0b8^$Q0rX)RIGMET)U^EWS{7gINXzhijNBdCwB-1lZh-?&elpvqUE?^U zZ5x$gubQ*+Esi=;!F4U=0Z_I?dvsj5m8+?7&AUhqv_lQ}RY8o)rPM)R-PDT|mRoSM$_1t3)s;$p)p$Ed0;MT1 zpmwaw@N~Qzw{MqiNC};OOh2g3WaaRW{kUU9cV5MXyDOHQ-G*AL+S=8-6xvQ)4w+ir z#0jxmzC?Se2c3XqjX91C$C7I0xWi-@${a!cHNKa3|GNFXgSmOvVDzjtRZCNntBi-s zu+);IP|ZDO4pd=A1OYZ3EE4+6wcLy1Fudz29=>;`;J^=P3iYcD)mfZEr3I8Ku&gm5m!JjA-}4q9sOsCk%>f>`caGU_jl_w zU?QOdm~9ofAXc}m9EtFQBa~#X1Ojk;l#2yi(mIU>w<>AyTqz6IO(&egX=bExQuU-j z<)jeWV>=g^GCCi)b(x8wq>?W*C}pM`<8d%hF-p?5l`E7D*V#Kr9=6vuy-wF1jT&7S z3nRe=M6E8uTzPyHdCg|p(L8d=w*~Z&bM?8@M?J^KLYoucV2fZ7MY# zcjsiAs)DTDD*5I^j#*~QzJN2KzS@V^vpDT{yFlH2e}EQn^9ynbY;bZ`9d?V#ir?v; z)NxLz*JwOy=0#uG?1$rxq83;=(+fAGKm%mA`u@0dCWyEe@Z{20M+7y{Hz1$gBqi)ylN*&kOA0>8&)!bxQ-JwXz&9d(u z5Ji9)rNHqWl874gD4ZUk%{!yUY&NW`glCeVD4*9a*^+r8eI;UTemYWt%d~8oVQAA)yAb|g1LZ1Ybk80q3T=_3&U0&JP-A+ zC?_YzN0RNk6Qaw(SdKYI_6frmUAVWyMO3=RfqQVzWyOV&>T|<4aH;~i<-}e^<;xkm zPa61A2lSdFUN=tl(G8B&yDcKjr>FR<63 zm8QXMRqHU6k>gKKrDg?z%+!(k7_*$DxCJKLJ`lCl`_7F`n=>0gD?qg-}K`Aw|letBjq`8 zwo@<9Akwc$FbGnWFNlPk!vI6f7p;06K*G@Hd@Lh#{&*yJr%CL@VTNi ze+>K1Bus?ek{pXoxm^IgnQfMo>A_0V-W^MB-d#^;+)lpql9R1w z%?N7CC_bj|$~kh+p#HRrcTh)pyg#f>?%JRG8apR{yav78VBgo1>FBA%r}aec?vx9=(;0Cai2@8GSe0Xxuf7sxB6 zlxz8viz^jmI7n7JJH#wb{Ay+@s`ew*;zm2%v{&0>>d-DprUu|8GKeX!*&P)6!bY@2 zeLF$tcl^v6I#;W9NAJrv+EARFpc*Qw-l6EH?X7K4g%q3)i=AwX=ybL2$Xu||0c2vq zBMwVAJdVA16N+`jj2l2HQFKvo>>O_9)EA1Udv)dKyG;1xv7OM?V3FZ2%Xba#^1z7- z6xy=S$ZNYAK^Gpyb6I)ql=QwjR&oO9qlDdNV4X(h8)ej;?Ou22Wb^P`yr*1qE$+(& zNlFxPYV>Bvjkhh2?4i;G;Vrek@;na0hN1;z&#ZQ51EpeWIC;u@pL{=cOH7VO9@Dp7 zr$WG03WWoz0^NO-+{4&OyMBfPF>m&>Zuj7gQS`r zKhAIK5w{Q3m%@C~eoVLfYMaAx=)@N+PYM^;o1TBTFFx4`I9{&4$O4$z>EUx1&bNWC z5r4f&<8aEiV?~~=)4}DIlb%B>|NS+SVGC@A_^L)$WOGfEE$@;=+#R|APhDsW!lejp z@437J*^YDTr8J-<#(k#*0Y&rfOM6!BO*eEr(BzVY{9ljx8z%>Uv;m|KWwo*eA}hm* zfSNaDlHHv6L_vhB7fIZ=koxA?4#pgw5_%a1bnSPQ=R3~MC43ofJzO$ugtbgrPhUiF z-eRMduauq~)LW_YGp&GkIXU5S;^f7T@9>fN)7QIdQYQX&i0F0=foNk6V?d}!XAFt7 zJ=RlS5JiX2cPiWM%%L4$x1+P#u9me;S5R}p;dJD(2{(bmKttd0$ycA{PJ;t+Dvz<< z9evJB9$(}B%(ql^4t9Lqm2CG7ELB@i`7wIFvliF3FJ+FLcAe;V?%_MDJlCn%NvFGd zaP;Z&DjR^@kKb+F%(tQ*+dFOh?yav8r=4oAYkXu|>_Xp_?nknnE8CzBPFw9oa{Q3m(bpAQ4q8n%wRrV49)bq} zSL^dQlDBnB{KK}JeG0}ch&lk6(^5F90BNfu zYbOo%D%@RV9MyZzl4DwPFd|GStL|M}-{|NQ*lU;q8{ z&;NY>^uK@m`O9BE|NhH=e*X2ZKeSTkuTuH@Q+{rlrQCh?ZFlUtKEKLO9lexMPAmJa zJI~;IW1Mnxw)vx7O7~2&&E|7=+55EmX+Ni*F?S#5?m7Ehe(UyLFw@7vcHLT^tBifht$Vat zX8ZhVKdUTGGx@ai+v;n)f9g-|r|ddQn;fECKAENVvR31xzH2>vbjzOSaQ}7Byz^pw za;3!q`fmJA47wmIf#wc@ni+WGt%KdXAdy4Kiw=`}yNR@r6Fb@p24jyg6UwaGJx z$llm*)kAWOvRYeQsu31l&$IBz-gPg+WPobx#F9U+t>Tc)kruxsGt^Lb#C>V*Vw@?d*9LLCidQQl1znw-oi6V(HrEN|M3d750isH+*|o$5^NFB z$MYmFMhVCvGWC*n{paHI)ee4Lh1`Cv``~liT4py7J5%c&?;RX^E+?ZR;&>cK(&CL; zN8*n0JL}-$b(G{+>l?S@#!<^SvD`MnAKYP_V#>az&u8$|!}rZN>g{%8PG zF*roAHbUA6FQL%yo6DTxL-}joW5N?>Duh6^TKIW0`v)zYwT*9Qlz49Kd0lIKAxM;I}ArL55G#i zmekhOlE$Xh=z;q6xF7|o&@8}ps&$(1Q1Uv%$e8I=3Po5`)wq~XD8F5F59wwp5pg25 zFIHdE)kknpN@z6-m{jrNp0f!kyPrh@_8+2kS81JMv#yC1aXpRlB~XXBaKl=#jLFHx zr+7KROR}VIzOWkE_8M+J6EmshwF;GVta_|SAq~k8EZ~cU z6NKd=lnB*SRoyie&K)NIGAZ#%nZ331^wNOR^de;ctzL4dwy#RAu+{w9$I z^nnvL@Ul#Y?d3qS0_)&DP6!lEQnLvRoUEnVIY%L-$PY;E^J~K}pabQF$BE*3ZJSYx zkE#KoBsRq)M{eP$ij5Z(+(N2_4S;N(i%CAgCh-6UVONqbXFk0z)0Ja3n4%VbAB!m#1to4}PA zyJVqlqAmY1AM1P#Sb>Mz7oJipl;6Qm$O?)S_eBTrE9jzJKJM~9H@^$Gf~lLHm{umW zYROJB4))|b)IlX}E9eQnJQT{er9M;*g)?9NZUxh%0tvGl+y}hGv>GSn0~bgh#s;ni z;wf@e!4UyG9j59U6SwOcPsc$)ZK`=2ZkUf~9S9Whv|sK<4^4{i$kBw$yk1jCRB}O2 zAZ(?wn`w>~0h~99V}gmUo)=8#yTD2dv{&VtbPkO@s^!2TD1VJxOvRyhhBL)}y1-WS*ipQ3uO zROAQfm?CX_x{L&VkVZ&@1Jlmb7zxKsij6=QGGFuSBl?h{{-{S@syMs81D1hEKJ8Wz z4uW0?I%!Kf|As?AARecrC<5|H`y7VT0ltKu{L?<9-xcH4N*`Zg-yahafE1pj$UGL3 zm})4p7V|y$Td9T<{o8i8y}|9KDfv->Y;q=2peyG9WO|>aV%tCFAisgY@%{Po{Nd_= zt|;8p{2bw<+{L0K96fkFjvoj_9m&+zBgvPX3Takp6m;f@Rt_F&11^L5dN`#63y3h{ zK%evF_xte=m0XB0RHNK2Es?fMTLIVA#aw{Y3t;10lmUUU2}lcflz^9}5=}|; z!!z752d9^TRfaHQ2@4cm3JgOU#_otnF&RDsa`n!N>|a} zihj-DL>N4jkH{fp{1eyGmBMHs9tZ(|;Bz1`_zIdpfr~% z4T;UyxX`&2)npE5h`+tS6lHj>rDVV7+<~OQ>!bsWo=EIwuM+{)(0?N#=SXz+_9p!i zZy+?>4u+UK>+9lD1~j1L=3Z*?aFZ=J5Y_FvEnl*r$O?F8inl!^yX;`FDxPBAUy?rJ z64^WO-ZkGAzW@ZmB0fG}wgoPwKvChq{XHHgKG0btEX=YVMODozWaAbM!Epr05t5N| zV(V-Z~0}0C#X<3*p>aPP#%OH*(a$tr1{c5cPm%+GlO=n`svLPt3ZIBgXUS!Vf8y z-L%aOz@=*EfH;1!jedCjct3ny4**wg&GchP3sXj(>9Pfad-OxGV>k+4(Yrk-;)u4$ zTopseV`qEu_LYPbSavS2SzBSzl%znS9j>}fUOcdMFQ??2J7yQ7PFSMa$W<;s(4aF89wf zJ`k1nxX;&rBN`KvH|G;=j$YgZ}MouRYLd0 z!aFjEYWJKc)6yk?GDTjwU)H0!h-9(lxibHJJACy zE)KXE;NzBUn_#PN3HLcK`qC%H!;Kn)jlWSGr9k$|Wxt$dibrDCP zTI^<)LctMeb4>K`CXz7~W|Kj5l4~RhzqFfkeDHPe5RROt5C_ihwz^%>V0l`5VVap6mCsgXZq%_#__2A)K#w`(_h<)s}6P#JFt`PE9vI` zs74IgBjig&4cd1@b8;SMaW}29!|A@mt}cumHaVLsPb-Lbpt)^_j2eBRy#3bOLuyLU$$jD9$+PXhJ@OJz!9X0SZHHhZeyKqLPDAV*D>;|7Tl^(912rp*EGJe$Ei?Wm9RP%~C6S%&#B#w>o$nL6 zqh>^cEfQQGICuQQmZmJ8ot!p0J&SFQ>m)@eqxQq= z&eD;;&^D=klH>v|DUWYe?*F}Z|CLd+&ErrLBR$unIXQ%S$I{Nd@wi-wU@O*{)5i+` z(9VK;Tltt76-NYt5hfs{3g$N+*;x@CN5gl$w(Y`9i*2(`NZ_c*Z4h`!KDJe9JV1r! zf(pnmlGvU$uC`os7%qeY`qh!hP!iLZ)scy6ci!QEFUybp8t)m7gVp$hHWf)}l1rUR zWPVO=x;B75ECVN#cAc7*-(Ab%i?d@Hp5G(8Lz|Y2&*>I8@Z=}6ecLsTL)x}c8TP7~ zl^=1`kqWMBkq1E867A7(+pS!U#z8B<_@H-t#&U2Gb;M(ulhefd+E&v_*FrS%cayoU)|J;6qZ|Xv&sdf;?yQjOfm*xNvvHlG*K6YgJpj`YeTx6PHV-RyT1%?2#|g zUg|+7AgM9Ok=wDPT6sUiWEaXDLH#v;FYo^A>F*uP&1Vfp&sw8envz^)JY0rFOOir0 zd(I40VMYW2HXSSy`s7;nqBsoiJJiacgXg+lu)Th_a^3dJmKIl0I+Osl2_o2>0Mei` z6l~v-HP)1V@1#=i6xKFyhz-ce4sh>BKrbP`s;C|Pr^LuaB1`?KMfm%7>oi~@p#zv> z6}TW)x2%jr_`wxQ(kp=g93SOk0hd^((co4k2Ja(f!Mf>$b2!c1F;1%92vklAp}n@V zz?9MXz^%&+hLTFY(4dr=a*W5pK*cCY+g4U62d=YskUVU!ZF-%qI~p~*E*3_>1w^eb z!d!WL6?siF?PwmkZ6|VvC!tk_qu3u6Kr$wsh4oMSer^s$K9E1Q&o_) zTP5Fo$e3le^aY#=_0_(-p5nCM?E-c8{Q+9Q!!O7su))b$b=WN`D}JYYQpY)4y+-3v zlNWuZ+24*gidtZ0rnlXY0u7Me>gVgyNf4JIt-C4MzVfltVr8@hbl`TPc1ht~v+@82 zIs21qPL+yty!9-@)uzat>E-TtH0p8vuGDdj{Z(Smu$r3;t9vVwvRU@M1EL5pqZBy4 zLlRMg9);5bw7D~C%x1&73OthpT@i6gR`?m)13gZ~m)@ISX%;NfB_LZ9?_JV4w83MT z6`Or$L(9&3Oh~B~RF7%f26O|J>lB0o%Ob@*oMP#7X9qu|J$ix&zNXVmla{uoeN^X4 zS>(H=MwbKT*6qxt)sWzs^HyoC+PJhNmmco`As?HU$Fl^Pq^R50B<>bWum1O7H z3DM=_&py*@s)wd<5+k<=u!hE26WGeR(}>M>cn!~pqLv8dht~S=?ba8Uca~JO)t*>bZ=Jwjywm>PV|CJy~c4T zOzbYrG1fNZIJP>R?8DbOvNlrwZyWMr!ks{BqorJOJm4_aTjA;ppDRl9$FT1VVIu67 zh{Y_p^s*KSX?b>DX&kE{yGm~V8M7%oQf%>A2Ebe<~NIhT1Yk0;gnwxbMW zn|l(zPnnVS&E5C4U+YeS5=at%*?U)eda%;8cgK>=yX)!1?dGMIOtzXeBd9H-_?muJ z&O3Vs^`~9DgF4D9d(AG9%%|s5J)_OZ7nDnq4Y#2%yyZYEpBjXAVD z9HRa{Y3a6NnGgbmf`o%2o~1%@eaF|2?PmF^dzwnoLF2ex=%eqP^X;SKDLi&@M@)2H+(!h$+|X4hnr?W4EBbouKnOer65b zN2_*6@5?sYP~1#V4HZ@IQ1sjO(Ke_;3QmW`PPRpKx>|Q+F4*V*l34JF!x9dUV;{WP zigm<{8$c;hbWw1e3^z0Nh2rU6UHSPg6Fzxu2ih7eGTdePuEDcBaH0Z*w)7dfwyP0z z;Z;1Bm1{@R`|4Q91kgtbyUoBljm#Tm)Sc~Kcj#pE@Ls$}uDKTX<${nBMI4RZ47u^P z<&iy9njpNT)>qEsAZ#dFK=x#{I~yn!Q^Uzq?tSxq>XDcnkG!UzcAW|VS1A+@s0wuV zQL=}zlXm?KXF3xw-)OmwX5xL)SvuEkPU}Tz-?j`n72o~v-sw}S1FkF0{bqdF0}J?0 ztfa^lwc@)K3Pty$&g^DvFJD`n&h~uMHhAN)tf2T3(Mew@FpN8CPCUkdX{ z=QZ7)SKAzpLnppqc_Un0Z+iaWzWAmSaJ*c7kp(cb)5GU3oSz1|?)~34X&g@ZcC5&A zbUL`)GU>Ur^1r`kGHij(5MR|uMK;$oX?d3{;_lD|cyysH2$v$Xy=Qp^vR&uaOKCt! zjOU#a1QgA?FYQ^iPr9MwfhLzE*Bfrh^0lUJYRPJ;t+Dvz<<9evIukFRlm=UY^r zgB@RYCEI-iOV!pRKSs|xYx}Y7OPM3nuDkm)_wb!np6gWX#_6se9DTaHN&}Gn__K|h zc`NF*ePY{pZ+(p*gY(q*w$7aG3}4MUwm?Abcs4WOb`9bvV?^TRf;^AP>tJrDoocVg z_()sqLf@6{SF+tlwm}`7w%Uti{1ENv>xwOdR+CLFUVV*+;6cFE`n-b+;l nnARNJJFgI_v$CY<@N2(M#IIV^r~?!5+#miQOa91;t1tinbe=Wf From be36f37f996353f130377ef21b821966a56d5e21 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 15 May 2020 22:58:47 -0400 Subject: [PATCH 50/69] better timeseries dataset --- packages/python/plotly/plotly/data/__init__.py | 17 ++++++++--------- .../plotly/package_data/datasets/stocks.csv.gz | Bin 0 -> 5895 bytes .../package_data/datasets/timeseries.csv.gz | Bin 6178 -> 0 bytes 3 files changed, 8 insertions(+), 9 deletions(-) create mode 100644 packages/python/plotly/plotly/package_data/datasets/stocks.csv.gz delete mode 100644 packages/python/plotly/plotly/package_data/datasets/timeseries.csv.gz diff --git a/packages/python/plotly/plotly/data/__init__.py b/packages/python/plotly/plotly/data/__init__.py index 43dcf79f3ba..8960bbf0f24 100644 --- a/packages/python/plotly/plotly/data/__init__.py +++ b/packages/python/plotly/plotly/data/__init__.py @@ -102,21 +102,20 @@ def carshare(): return _get_dataset("carshare") -def timeseries(indexed=False): +def stocks(indexed=False): """ -Each row in this wide dataset represents values from 6 random walk time-series. The -index contains dates. +Each row in this wide dataset represents closing prices from 6 tech stocks in 2018/2019. Returns: A `pandas.DataFrame` with 100 rows and the following columns: - `['day', 'MOL.AA', 'JJK.OB', 'LFF.KP', 'UJS.PX', 'BTK.HH', 'SHX.QQ']`. - If `indexed` is True, the 'day' column is used as the index and the column index - is named 'ticker' + `['date', 'GOOG', 'AAPL', 'AMZN', 'FB', 'NFLX', 'MSFT']`. + If `indexed` is True, the 'date' column is used as the index and the column index + is named 'company' """ - df = _get_dataset("timeseries") + df = _get_dataset("stocks") if indexed: - df = df.set_index("day") - df.columns.name = "ticker" + df = df.set_index("date") + df.columns.name = "company" return df diff --git a/packages/python/plotly/plotly/package_data/datasets/stocks.csv.gz b/packages/python/plotly/plotly/package_data/datasets/stocks.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f178a4d49853b5492d47053a205e26a6564a944 GIT binary patch literal 5895 zcmV+i7x?HOiwFpzOTS(K19Nn5V{3CRV{>)@b(_hqT-k9&=lT}1aQbFg9+W2P0DE9~ zX2AXf|3AD|#C@6hsw7fHkoj)jI47DNI~x7ZfB)bA{`m9X|NiG6zyA7ZS= z%w0$GqgBu0kGjj8d(I!_mpkuWe65x{$L-svW}j_!zOqK|XU4ncJ*TcZ7e8Gu*)@LH zTPgdjHTc@i)7*{7m4eU7m$p&($LM9R9in$>qmR*AJ#+2hQ{$d_XPaX#4ijH&Www6C z+Dzm> z-O=lg@9nz645RF_XXARaj`7HDKT8N2bvE?qt2x8wql3cu{(MA-WzIc|r|z5wEw1=t zo2~5H*U(_g6XFv**)F1L4;@~ty(X_Y9I~C4?Aqd|z0F$>+MKBUhSI10GQKYETk!}4 zt$I-rze4tHRpYF(c(K~)INCTH2{EvJVWJ`^7I zZmnRGF)(%KqAR`PqUkzNxHnVu(sVin8l1QNYkRk!sk*QrM{_Q6&#)%~cR+loEQr;u56wHe!>ADeWaQ-dcL;xJ9pR zED<4GcYE3OVQ8Xn;R|OBUN%Q)3bEi!Q#AJ1s*~^FWdtI+xoPOlxB4aS0D{}!vcseJ zSwj&Ky132bx=6hr!PN*AfTeOQw9)lWJQ3|xA{|Jnd#ADqPi#>ZD0jn$G36k3fJpRI z-^xx)qbHnPv)$6uhVyL2e)(B=IMK;N#5V9aPY4G~bQ8{iPkBTxfYV``adFvO$?(CIfqh>-R@WeQ4{pL60&os+-H0zecut=goC08I zXqF&*yc)m7=ft(q`b%QXkOKdSOH&eInO0s}xPQj;(C?AMn{FBz*s0Yh97IvX9f6By zZ30@;ukhbZ#9#C%JQ0sC*{#QI#Y!LmAOZeDHc*S)`=RN!F9$@SxDS5`bZj~W%4!`< z*(>k82H<%x+e{;S9p%@q(%$#(CA^8uLkMC#Tx_PIikhC!y+=efY8${{?fU*mDB(t+ z6iBkcA}>x-tOgr8BrY?u;)J*yd6g{n2%kU84x0i!hn!k?SKW`M)??;Q$lYSrAbM9O zS<*X}#-J)Yv4lX^h0%Y-IFUvGPKX09*9Xpc39WH(qETD~Zn$Ylw8bq>q@af4wlPXT zK^>AEkd7PZ)kKF8CMMUBa+~=_n+AXZGJKcw5P|2ugeGF_c{t%V!_9v8b5rh9Ke8l{ zYI;~A1lCFtaa9~H&)4}GPc8BwUP3UTh!bpu0|Ou=^VSLJ`d0Ik<>&GSD2hN2*@t(| z{UQj;?o%Upbo3K~N(cBr+-}~yezJ`@%(=$~t8a*wC*XlN&^4Dlq8A>P?1u2D2go6f zCMCP!S@R>T`YuN2g8D%n7e!`9aq6mn>+(=A9+Gf!p9syE%6ayDMLE5pokIhMu1O2sF_01 z>`Qotd+iq}>vAwC5cb0};wcj1hCPVhz*StFV-eJ-n70MKVa}=# zV_N>a^O3!OW-Q60d4#C-p^RyBdb^*VZn$4)&m;;#mGsHAq}SC4gKD5%9Ij3vOq2?( zi&Vt4L~*()%oIIkC%{0xnU&yX(*xlyJ@mJwDS#Z; zL$}+np^Rb-fr1NcdS5ysg*Q+l;Xddc<)r8uMbn3;jm?b!`Yw?tr$^e6JE~vEJ6TO= z0RZq){jJ=f#gs&VlweVyC*!e{a<{3f}3SPc62);o>0UEM3e)5Rc&Ewk8Z_XXW?L;b|csm~SL*s-7Y|rwMS6fPn z|8l`nOg>~?pW+i!#sb(#G(E^O%2}Qbln0&0OZelf+yngM`n+a_k;XSii+m`2$TaaQ z94m53SKXkA)brt^MFL2yOPda>}z(_rmF3p=}v> zj3UxImrKb;&ZQ_IzT+YDF>!<%`FIBSG;PTxZdhl(Z{5OIq5j1n=n}$!CNE}P{b)Rt z$@d6c+(?!jX@11FP~(aU5z*pkkx&3oE#5@#M$^?gGRVjy`u7~kt3NQU19mipEwCO2E+lXsQntO3_PJQ(Ai&<`2Nh2 zj3SQ+=*Eu;C=Pq?hq(hXc!}i}+<(IESJH9H1QXK&al*RbB9Gu@((GDNIM8T$hQi3A z53Ay`XibtExHxf8<-LxxkibiYgCkw2(Wp? zn?|tYBjsGRy(MSc1|APxx4wby1wJ&Xoo$emon<7VhX?`EquLNnB#?Ya)t8Hphz1hz zky=O~0uJ%TTI~Sfl{T|p!dp~)c!*UtpIOaCkaam~sqggJZ88xykeV7bNBM<1vv_Gj zQSArhQ;RnnU>lmU!U`k`0H5lGpqJvlW#8=nEo(#U&B;bhI5%qdN0kjnMUd?ru=hmU zkkSv&;wZWoa38tO06TU8tXRX|L?j1+bl9GJf~?l3g@x}!Jq#4sXLh0%IPl%xppU5; zwQuX^9v7c>bExSI{q%~Vx8n66G)LY4+T!);VJ)8Hde%qOVJC6+Gmotd7P~~(-l3FQ z;kd1^DzoLf!RQD^o*_tKYby#cJ+j(mfzd#m?pt{FHqbY&A%n0_5(P&<%O|Yj-f+MH zY!Wt9^$IsWyL=gpo7wq+9YbTlAE2}*HQ|R1Q%cFrvm%8T(ZGs^d%P&2*9(soRnQ9Rbg@15ohM z!Sl6XS0l|p=?ZCASJ~Z$Fg}H6w}@~Vc(2hawr{l`3IvPMd5rw|l3hy{&cjw4sa9Og zPQy1(ZwbvLS?B|$v@W2tL0QO>B(8c&k`#gzlh{@W9<}2!3b~Z}Tl12wG zxqe{Q`oWxHU))N8DjQT5fW&HSR(@MD4v_X%@@Vrbb}0R|52MjdHW*1a1So)S2;V{0`iF3oDK;&uED9%O172h@-2Bd*3s9#n`2|^%;%p^ON!OuQ z!LHKPLUgt|e&1hFGk3jf-EKUN(0r1#bGl zY*F`5TjzDXWj_ggw`eU-8wo8dj`>W0I>~RDkgb0RuLum#qpEH0QKP}XC|9Z!U^*HI zYKyjk4g2an3ZzG9`&m-keZaTv22g6OlU>E%PeOdZ)vthvKjyq7s!!VkOD9*jCLa!= zz?KzsOg;w`QX<>yAL}JNdmS8gSxML~UICO>?|pJt0SE2nS6fyI1+=9lYNvYf9NCn2yZ2+0$)ArzZmpk zN@96tGaih!vnIncoPsz3&>|()IbUPt8`Fr;IoNSboy9URn-A|f-&b2n=FYjDddW$@ zSO&Ph<#-e!-Wz6LW3aNw&&hD6t#sZ4WZ5u#%T6vhEXJb;J@Vpy{Ude7#jP9c0ogPp zN#vNwA@8xbjpm~=$KX3s1p&T$(yM5wH9Nd!v6t|`Doo+~rdg27B&G8_-+Fn@F5phX zlA92Zo1+D?dXQF_ZIr&m80L3~LwOQ&Ml@~_LBhtRO7Ab>(d(~j(&I)QyHj|ytj~w1 z&l5N9Ld%fi2kuZ0HjN#1TQp_r-IdzNLB95f1PhR7*#o zM!~flyW*-TYgkG*{b!VPLfh4xXBwv?t0*4gw^0X8E3dGc*~!8$o!Iu`=xS&A(c-2b zW!IG53_mzeXV#Vy4~aVNO{9*kQ7i$#*MI1Yr$HO;>l%oNq zk>W7B8xpQ5JLluL`?XaEpkTiUVj!tGK1QeZNa{;m^sY*9y(!;YI}ryW zv>wW9n^0)-By%Mv=M_9}$bcs`Wl|rLv!XnY0J7N#!5~}2$Y;G}#(m^au;}Tq@aCAl zn4?lV!$lHO$hes^!F*XZF0nz`ARk!Ke7 zC?Wi7+eVoV(#fPz9`U3o`58sjz_tgYa_pJQ#WI^F)80*p#uuo2hMcVA=zN;>kbW#w z&zI!=qz9V^wwjExoAVF~E5EqBW!70lc)BmFa$QLMP|= zU$OV*2;wa~vS5$mi1ROd9Ep;D$jRa`H#IfNc|+$m#GQ@^mO1t!cLEhrSAkoR@|{f- zDIK`y8p%s|ZuMA+gd%sj$Hpdr7WGVV;-Z+`(C=TZ;15?dDS`JRsi*46!VE-#x1UCj_%NL3M*K^APoj zlUzb;1}IWXsrcxu;-330E>t|6LR<0xcWpn;bw4&=0t{_55V|szHFMB`|6VLhE<7`b zIo(17P=t9CDiI_+G#%DlcU*}7m++S3yzRcymB{%wh2L-ep|4(_jRLXm6qq1>ooBA8iH5Z$-j{6On(|WAa zxk0sC-nrVp?+#CQ%dIQY&2qSC6SefW;B5NX5O>9dP|OZmNOEkpoR6TW$CYYOkZV^t zt0lW(lvz!Rva&JH@m-v{l#%dK@K++m_tsdw>1fJ~X7ED9=&=8MRv% z<8eEit0x9qCq~qH8r$G5Lyo9KUkUO)<5CzWc|{soM7p@?n-B;J%TX+@TC#sLb`;NY z0?r#{vAdAf0J^SCjEtQ z?46sGJ2nGPOlhMb>H?p=WMuV{_-c>|RgloHhKR$A$buZzT@~Y_28%|gZYh3tw!vvz z5#S?O-*Y(ZbpAw{RAugd(biga$Z+ZX4863o$(%g#RVO=7c=iOgi-+jj=GT1gsM$Z_ zsJr81Cz+>`EP9UHMofRhBRB$X)uQsWUXFBO_!7>p$CR(!pyVjeW^}4YE+xg{9p-A$ zQwh-lzM54Ic|@fwCAvkks}uN9cC`gd&Rk*>?HRVgHCn{JROXRfvoBFy-Dy0t?aJQB zKn|1oHz&hfw$uq@MCYbLbC>1DMkmUO2Exhl2`KjTxm_*x)k9)bzR=o1Lasj&fK)K9 ze$4tHtg!XkK{)B);`wndytC(2WZ>Q9GjOZtnxYF5Kt2k;Ir4NpNS@YldF#Bl>`?f2 zfZAP@h-?Ak zzxx)Qo5kQ?iQU?h>z0%i!glt5;KQ-iF8t>DV?Uu+w}zounYdvZ=9LrOXGKo+JPi3Q zA%2O>0c>Q5?F4Pb3@WPe^RYme<)=)mNelsyET!Ed5 djV=p~4j_)9+xdS%u+{sg{|8Zq;T;b$004~`n?3*l literal 0 HcmV?d00001 diff --git a/packages/python/plotly/plotly/package_data/datasets/timeseries.csv.gz b/packages/python/plotly/plotly/package_data/datasets/timeseries.csv.gz deleted file mode 100644 index a34fe16fcdc188e9c599b2c62138ddeb0d4fe744..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6178 zcmV+-7~SU|iwFpvjlNz019WL^Wpib6X=QURV{>)@O`6TlY{d;l=R8Y)t@?M>Fd0M$ zhyamDOe5tb^7dKFb-AYlA)#;gag~4e-nP$w|MS1sKY#t@{kOlpe){R>_pg6{{qm20 zynp`f_3xklc>nh2>+k>i`TgUMuRnhL^ZonpKeSTkSE>B{D(_QfDJOQEc1G0o`YP`_ zdMTsqR^qHP_uzYD?7C|ytB(<*jLBzfma|VkbF5Ko=ePCUdg<$oJ=&gamwaBwM2phP z-lLB*pA)ovGB#!l{wPZ6o@ut(d~TG8-Ri5oci&@1AN$0deJ;OsdM}viV`007*5@iC zb~!ahn`O4wS9`CrIL+kK(od_8@&2ms+INXMOPd^`96p()_Oe#vqaL+xJ~}1l-rRqk zJc(3XO>sll9(qn#btrBIw)$gy zzWSTf9|E~1KH$dK?yYdf*(>RQdl$CH-od@rCaSab#z(l_OO1B+o}ALVx9y3%H!(aU z!|Q9l4dGs6;k4Gd)ni^`8^1)H(Pt2Q?>R}PLO@U98KvkAa?SsEh1!S7K_Jel{4oi( z2e7@Soud9&Tk98lsPg~3E=3#qkz2m)&L-*liR75%+$C0#n zqt=me$M~6baPc}y=2z>1+i~NlW$d)vg5VGCFm^FzAJh9acQlVLZYgg+u!$ZmI3?pNvQz;Z-&8o)5d_ws}(LJP_r9{Mu zR2-~6rmK(Opp?*R6fmjc#XV;eP@?Ze0`@o2I;*r!v02x|in#7Z`4Xs2T)1H^SjOb! z;#0hw;3ZkogD+#T+EzW*I?lI-a2r;r*h10~N6vQmq>zSW2o~_g z!U@815lVz=s;WlK3g-@!f0>l{q|6@Mou^RXMq*Gb^WZn|E|C$Zb+1943?PsXDLT>! zeAidRhu`6?iT!p#X$RgH`w!^9w2Y%FKkCC`9Otn-ytUv+rE~(lR#q2Y;{UF^*z`L% z1W*og!6My$1P!#Xiivig0^OEJAOoO}Tdce@WiSg#L!>$Lhrf+jqaZ+Evtog3Q-704 z1Ny)T0lX~JA-o(&R$v|6#|eSLNoqEMfs?hYcFs{qDe?nSdwm5A13FM%c$_Hi$F>=@ z_^28XN+KvGIWmN!DmETaFoaYK8vqHOi%CAgCh-6UVONqb&%AqIrYpy6#y{TR5{6-t z=wS9!*wvCHACme=ST3FUqHk8BVrJIKVwcr{d5{jE9v-JkzGM~dIxyaNWo65&Kq-_= zY3p=sCtgw-JnX~zmP#skLwX-10}zo)0A8L&+M||zGS4M-i9V>d_%hj%A7R+=@J--K zj9s!&m}tv?%xj&m0W0uu`@mCbh4S0@30Xmr;=a@Y{0h1#hmX6w&&|&Qu3+k>C#IE2 zty;3vjDtP-4s}pT+X{MuFPB33w$z8Jp>XEI->qPpR3KrZ!F|9>npWdv`M?E|hp~aH zfq04>Rj@?>Plu_x#>DNq#?x_7P@8HF!wvHhtqp-9p5ozd^w6aEwj52k%ZD;2d@u4*}(<5!^= z68X4!$GA^G-vRsLX-gs^K%fONga&bGDw(7L>I6p@k&kSGQb{wELf}2IWht{Z*+)jM zc5QPtNEa{#+18V3ZM-(XL%nvQiqtD7g%8RTG_#(nn69WhstCSNKu~fkyd@FfWav-| zMUExQ34&gnWV>{1$jxtgg|x;0AGtO$KC4euZCFDi#Wnz?6f2_hN$Q>o_rAbJ_!QNH zr6NB-#}sMf(`6*^gET@KY?yXtjgfHNq}T{_A@ec6UZM{v>W_NLOBH9=cfc|b$*0{4 z!a>jrK__iV=WjR!1mbZ@iXtGNw9jEU9pFpo$v^Ew`du+zt@QB`_IR0)0Hp9FMdo86 ziK&JnYcbywe=F6nQ~$Q;wl}!lG$lVOkWJ2{6zIx10GZxrso3Ie4)Pla9N+H`&u^{{ z=!(Kk&Cd~D%3UlkK2h|P@c;g58F{1ATyahge0N3n}rQi%iG=r=8#P- zI`aTdC%r5TaLwx1eIhKZyT?ViV`m`ms8!ZC)6K3Jg)xPvk|5r9HnE5uqE79S%2tSa zv2I9__6caAI8tWv2SI8PFBJL8^2-i!3H}&Yz3FxwRF!umJYTMld6@}CdZ=DF{YY1- zzZLzO!HF<z zT&TziNy=3&K%$Y@Qa1;xGdp7N8%E^`q+m}nzvtY6q`~W?1B{-Lh-R;o0;-|^Muwat(b?OZ z^ef&#Xt*5=F?rU<#ia~rK*`O$)Z)oaw%kBex9hfi$$}y);GHSn_9WS52ZL4d6!ZR) z^bwcH-huay`L_54AP5%m@&2$aa47|f3J31*>0#mnou!0@S=Lol)vQ7`hG+93IoS+@t?C2kDox~0wyBns zQxK|)h$^<5Y8U7#NhNo9L?WQ+RXjE){

L*pOm$+EK=1--cM1Qmz0Z7Yk$2)Njkd zl#G~dw)jcKY}1=OR$=foCnqzHIjuG52Ua2K0juTosS3kbAGR)zOdiwrSu}%+hp?&% zOs2LolVNuWvh$I)i3E8Irq~dVtP*)M5*bkfJtYT-LT)Tn(H#kGtJdN{&R}Eh@3!fN z2mL%(L@XwRSVFjt=C@ACtJMrS89*+g0)-9{7`aQ9P#=N{(kYuQbO&kQok?Il4ER(i z$ZkCm_OUFybrcE#?%+ZQ;oMqIIzl2ha@4`C5nx;p^?+sCXKlyHX%_iU%({>x#`EaH z4=I*t+U5q}QnhnH96#7bKfK=F4`0^}z?D;T`Z1)1DI?G9vIT;(^+T~^*a}|JyWJCU zL|bI8iXr5&v%U28k%Sajb}o-uTVc_Zq(Gu=t{NsU9thpbDfwok*@={L_LoDA(5`+_ zt0kB0oJQQ91%4Qbz-by@?M_Wp3GJyb+Nr4uC;jPg9kk6HD{LV~rDS)MTF#yoH{eZi zxxeoCOiV&M0F)_faWdWVa%Pr;KrC0tz+RT3i-*74ZHBqA+HfmZGk~HHL2RlCQ0v?9 ztQ&RPx+$UXH9Rs#wy_{aT{$@5D2h`;RB8?Yj>HV$32E&EO{-G{%Hs$(PzEF*R9VT@ zLLzzH!Oa;hJF#ou;uz&&WR94Fn)eQAmu;z5RpR%}lPY05Rfm6K@>MjKxo_u!?S!ybIHoJP)59A7@B z?ewNcbb=`{IV5)~g?sOe9BGk(P6(Yw8BSARQo1kzas)IXDRSKA#D;!}?x6T3M?KRn zedOG*e@KRE9fnOT!2|RPwWlxDA4CsNCROWj{EA3Kee!TRz|eLz#Ytp`Y&n3V@C1MP z7co6^lw0!I}p!1*}I;hQ{~ zaFwL{V&QEWM76v3&9v+iK$#-1oDb`%xrk)3bQp-O%hjkF&Uwt+K=E9?7mQj2t#On=7{!#5>Sj*de1vA1H6% zdV5HXD+3kunL-Oz$bzn5rqs;|X%B31v^Z=Ny5%js;xCv)MKkQJ@HDAv*lkwl+y2o~ zq;S*~kLBArs%%=xY2Ga|{Y>r)|4yE5|Lu{NfC>iUKy5n&8|jytD8Olmons~EvUZDK zVlz-9vxK7d5erR?X8T1$rzQ`f95}#7nLpLNSWYmV?!t0n71T1*-=qV8P(l)k+)gYP zEYTgIx_1A{QMAqDP!l6P*VUXHLcL>Y=e_Z`T!>&R z)|u1C3jff~f_tHS%o!C&1c4DIAfyWB8;?Y;h_<8QvmV=aVWvgctP>JAD&;l^JR~36 zs%$(!h30|^$S{)Fo;I$wTyz*Ngc9_tBaxvbrZ1}_C#v0fhXcM`e#B$E=WraX#vin) zNJ^7j>Qo~0b8^$Q0rX)RIGMET)U^EWS{7gINXzhijNBdCwB-1lZh-?&elpvqUE?^U zZ5x$gubQ*+Esi=;!F4U=0Z_I?dvsj5m8+?7&AUhqv_lQ}RY8o)rPM)R-PDT|mRoSM$_1t3)s;$p)p$Ed0;MT1 zpmwaw@N~Qzw{MqiNC};OOh2g3WaaRW{kUU9cV5MXyDOHQ-G*AL+S=8-6xvQ)4w+ir z#0jxmzC?Se2c3XqjX91C$C7I0xWi-@${a!cHNKa3|GNFXgSmOvVDzjtRZCNntBi-s zu+);IP|ZDO4pd=A1OYZ3EE4+6wcLy1Fudz29=>;`;J^=P3iYcD)mfZEr3I8Ku&gm5m!JjA-}4q9sOsCk%>f>`caGU_jl_w zU?QOdm~9ofAXc}m9EtFQBa~#X1Ojk;l#2yi(mIU>w<>AyTqz6IO(&egX=bExQuU-j z<)jeWV>=g^GCCi)b(x8wq>?W*C}pM`<8d%hF-p?5l`E7D*V#Kr9=6vuy-wF1jT&7S z3nRe=M6E8uTzPyHdCg|p(L8d=w*~Z&bM?8@M?J^KLYoucV2fZ7MY# zcjsiAs)DTDD*5I^j#*~QzJN2KzS@V^vpDT{yFlH2e}EQn^9ynbY;bZ`9d?V#ir?v; z)NxLz*JwOy=0#uG?1$rxq83;=(+fAGKm%mA`u@0dCWyEe@Z{20M+7y{Hz1$gBqi)ylN*&kOA0>8&)!bxQ-JwXz&9d(u z5Ji9)rNHqWl874gD4ZUk%{!yUY&NW`glCeVD4*9a*^+r8eI;UTemYWt%d~8oVQAA)yAb|g1LZ1Ybk80q3T=_3&U0&JP-A+ zC?_YzN0RNk6Qaw(SdKYI_6frmUAVWyMO3=RfqQVzWyOV&>T|<4aH;~i<-}e^<;xkm zPa61A2lSdFUN=tl(G8B&yDcKjr>FR<63 zm8QXMRqHU6k>gKKrDg?z%+!(k7_*$DxCJKLJ`lCl`_7F`n=>0gD?qg-}K`Aw|letBjq`8 zwo@<9Akwc$FbGnWFNlPk!vI6f7p;06K*G@Hd@Lh#{&*yJr%CL@VTNi ze+>K1Bus?ek{pXoxm^IgnQfMo>A_0V-W^MB-d#^;+)lpql9R1w z%?N7CC_bj|$~kh+p#HRrcTh)pyg#f>?%JRG8apR{yav78VBgo1>FBA%r}aec?vx9=(;0Cai2@8GSe0Xxuf7sxB6 zlxz8viz^jmI7n7JJH#wb{Ay+@s`ew*;zm2%v{&0>>d-DprUu|8GKeX!*&P)6!bY@2 zeLF$tcl^v6I#;W9NAJrv+EARFpc*Qw-l6EH?X7K4g%q3)i=AwX=ybL2$Xu||0c2vq zBMwVAJdVA16N+`jj2l2HQFKvo>>O_9)EA1Udv)dKyG;1xv7OM?V3FZ2%Xba#^1z7- z6xy=S$ZNYAK^Gpyb6I)ql=QwjR&oO9qlDdNV4X(h8)ej;?Ou22Wb^P`yr*1qE$+(& zNlFxPYV>Bvjkhh2?4i;G;Vrek@;na0hN1;z&#ZQ51EpeWIC;u@pL{=cOH7VO9@Dp7 zr$WG03WWoz0^NO-+{4&OyMBfPF>m&>Zuj7gQS`r zKhAIK5w{Q3m%@C~eoVLfYMaAx=)@N+PYM^;o1TBTFFx4`I9{&4$O4$z>EUx1&bNWC z5r4f&<8aEiV?~~=)4}DIlb%B>|NS+SVGC@A_^L)$WOGfEE$@;=+#R|APhDsW!lejp z@437J*^YDTr8J-<#(k#*0Y&rfOM6!BO*eEr(BzVY{9ljx8z%>Uv;m|KWwo*eA}hm* zfSNaDlHHv6L_vhB7fIZ=koxA?4#pgw5_%a1bnSPQ=R3~MC43ofJzO$ugtbgrPhUiF z-eRMduauq~)LW_YGp&GkIXU5S;^f7T@9>fN)7QIdQYQX&i0F0=foNk6V?d}!XAFt7 zJ=RlS5JiX2cPiWM%%L4$x1+P#u9me;S5R}p;dJD(2{(bmKttd0$ycA{PJ;t+Dvz<< z9evJB9$(}B%(ql^4t9Lqm2CG7ELB@i`7wIFvliF3FJ+FLcAe;V?%_MDJlCn%NvFGd zaP;Z&DjR^@kKb+F%(tQ*+dFOh?yav8r=4oAYkXu|>_Xp_?nknnE8CzBPFw9oa{Q3m(bpAQ4q8n%wRrV49)bq} zSL^dQlDBnB{KK}JeG0}ch&lk6(^5F90BNfu zYbOo%D%@RV9MyZzl4DwPFd| Date: Fri, 15 May 2020 22:58:54 -0400 Subject: [PATCH 51/69] pandas backend --- .../plotly/plotly/express/pandas_backend.py | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 packages/python/plotly/plotly/express/pandas_backend.py diff --git a/packages/python/plotly/plotly/express/pandas_backend.py b/packages/python/plotly/plotly/express/pandas_backend.py new file mode 100644 index 00000000000..ac5410d9e7a --- /dev/null +++ b/packages/python/plotly/plotly/express/pandas_backend.py @@ -0,0 +1,81 @@ +""" +Pandas plotting backend functions, see: +https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py + +To activate, set pandas.options.plotting.backend="plotly.express.pandas_backend" +""" +from ._chart_types import scatter, line, area, bar, box, histogram + + +def plot(data, kind, **kwargs): + if kind == "scatter": + del kwargs["s"] + del kwargs["c"] + return scatter(data, **kwargs) + if kind == "line": + return line(data, **kwargs) + if kind == "area": + return area(data, **kwargs) + if kind == "bar": + return bar(data, **kwargs) + if kind == "barh": + return bar(data, orientation="h", **kwargs) + if kind == "box": + del kwargs["by"] + return box(data, **kwargs) + if kind in "hist": + del kwargs["by"] + if kwargs.get("bins"): + kwargs["nbins"] = kwargs["bins"] + del kwargs["bins"] + return histogram(data, **kwargs) + raise NotImplementedError( + "The plotly.express backend doesn't yet support kind='%s'" % kind + ) + + +def boxplot_frame(data, **kwargs): + del kwargs["by"] + del kwargs["column"] + del kwargs["ax"] + del kwargs["fontsize"] + del kwargs["rot"] + del kwargs["grid"] + del kwargs["figsize"] + del kwargs["layout"] + del kwargs["return_type"] + return box(data, **kwargs) + + +def hist_frame(data, **kwargs): + del kwargs["column"] + del kwargs["by"] + del kwargs["grid"] + del kwargs["xlabelsize"] + del kwargs["xrot"] + del kwargs["ylabelsize"] + del kwargs["yrot"] + del kwargs["ax"] + del kwargs["sharex"] + del kwargs["sharey"] + del kwargs["figsize"] + del kwargs["layout"] + if kwargs.get("bins"): + kwargs["nbins"] = kwargs["bins"] + del kwargs["bins"] + return histogram(data, **kwargs) + + +def hist_series(data, **kwargs): + del kwargs["by"] + del kwargs["grid"] + del kwargs["xlabelsize"] + del kwargs["xrot"] + del kwargs["ylabelsize"] + del kwargs["yrot"] + del kwargs["ax"] + del kwargs["figsize"] + if kwargs.get("bins"): + kwargs["nbins"] = kwargs["bins"] + del kwargs["bins"] + return histogram(data, **kwargs) From 0b442b1894bd77851e563d539a949f7cea7087b8 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sat, 16 May 2020 21:40:59 -0400 Subject: [PATCH 52/69] cleaner pandas backend --- packages/python/plotly/plotly/__init__.py | 72 +++++++++++++++++ .../plotly/plotly/express/pandas_backend.py | 81 ------------------- .../test_core/test_px/test_pandas_backend.py | 26 ++++++ 3 files changed, 98 insertions(+), 81 deletions(-) delete mode 100644 packages/python/plotly/plotly/express/pandas_backend.py create mode 100644 packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py diff --git a/packages/python/plotly/plotly/__init__.py b/packages/python/plotly/plotly/__init__.py index b6e35378c44..60b33e95dd7 100644 --- a/packages/python/plotly/plotly/__init__.py +++ b/packages/python/plotly/plotly/__init__.py @@ -75,3 +75,75 @@ ], [".version.__version__"], ) + + +def plot(data_frame, kind, **kwargs): + """ + Pandas plotting backend function, not meant to be called directly. + To activate, set pandas.options.plotting.backend="plotly.express.pandas_backend" + See https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py + """ + from .express import scatter, line, area, bar, box, histogram + + if kind == "scatter": + new_kwargs = {k: kwargs[k] for k in kwargs if k not in ["s", "c"]} + return scatter(data_frame, **new_kwargs) + if kind == "line": + return line(data_frame, **kwargs) + if kind == "area": + return area(data_frame, **kwargs) + if kind == "bar": + return bar(data_frame, **kwargs) + if kind == "barh": + return bar(data_frame, orientation="h", **kwargs) + if kind == "box": + new_kwargs = {k: kwargs[k] for k in kwargs if k not in ["by"]} + return box(data_frame, **new_kwargs) + if kind in "hist": + new_kwargs = {k: kwargs[k] for k in kwargs if k not in ["by", "bins"]} + return histogram(data_frame, **new_kwargs) + raise NotImplementedError( + "The plotly.express backend doesn't yet support kind='%s'" % kind + ) + + +def boxplot_frame(data_frame, **kwargs): + """ + Pandas plotting backend function, not meant to be called directly. + To activate, set pandas.options.plotting.backend="plotly.express.pandas_backend" + See https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py + """ + from .express import box + + skip = ["by", "column", "ax", "fontsize", "rot", "grid", "figsize", "layout"] + skip += ["return_type"] + new_kwargs = {k: kwargs[k] for k in kwargs if k not in skip} + return box(data_frame, **new_kwargs) + + +def hist_frame(data_frame, **kwargs): + """ + Pandas plotting backend function, not meant to be called directly. + To activate, set pandas.options.plotting.backend="plotly.express.pandas_backend" + See https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py + """ + from .express import histogram + + skip = ["column", "by", "grid", "xlabelsize", "xrot", "ylabelsize", "yrot"] + skip += ["ax", "sharex", "sharey", "figsize", "layout", "bins"] + new_kwargs = {k: kwargs[k] for k in kwargs if k not in skip} + return histogram(data_frame, **new_kwargs) + + +def hist_series(data_frame, **kwargs): + """ + Pandas plotting backend function, not meant to be called directly. + To activate, set pandas.options.plotting.backend="plotly.express.pandas_backend" + See https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py + """ + from .express import histogram + + skip = ["by", "grid", "xlabelsize", "xrot", "ylabelsize", "yrot", "ax"] + skip += ["figsize", "bins"] + new_kwargs = {k: kwargs[k] for k in kwargs if k not in skip} + return histogram(data_frame, **new_kwargs) diff --git a/packages/python/plotly/plotly/express/pandas_backend.py b/packages/python/plotly/plotly/express/pandas_backend.py deleted file mode 100644 index ac5410d9e7a..00000000000 --- a/packages/python/plotly/plotly/express/pandas_backend.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -Pandas plotting backend functions, see: -https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py - -To activate, set pandas.options.plotting.backend="plotly.express.pandas_backend" -""" -from ._chart_types import scatter, line, area, bar, box, histogram - - -def plot(data, kind, **kwargs): - if kind == "scatter": - del kwargs["s"] - del kwargs["c"] - return scatter(data, **kwargs) - if kind == "line": - return line(data, **kwargs) - if kind == "area": - return area(data, **kwargs) - if kind == "bar": - return bar(data, **kwargs) - if kind == "barh": - return bar(data, orientation="h", **kwargs) - if kind == "box": - del kwargs["by"] - return box(data, **kwargs) - if kind in "hist": - del kwargs["by"] - if kwargs.get("bins"): - kwargs["nbins"] = kwargs["bins"] - del kwargs["bins"] - return histogram(data, **kwargs) - raise NotImplementedError( - "The plotly.express backend doesn't yet support kind='%s'" % kind - ) - - -def boxplot_frame(data, **kwargs): - del kwargs["by"] - del kwargs["column"] - del kwargs["ax"] - del kwargs["fontsize"] - del kwargs["rot"] - del kwargs["grid"] - del kwargs["figsize"] - del kwargs["layout"] - del kwargs["return_type"] - return box(data, **kwargs) - - -def hist_frame(data, **kwargs): - del kwargs["column"] - del kwargs["by"] - del kwargs["grid"] - del kwargs["xlabelsize"] - del kwargs["xrot"] - del kwargs["ylabelsize"] - del kwargs["yrot"] - del kwargs["ax"] - del kwargs["sharex"] - del kwargs["sharey"] - del kwargs["figsize"] - del kwargs["layout"] - if kwargs.get("bins"): - kwargs["nbins"] = kwargs["bins"] - del kwargs["bins"] - return histogram(data, **kwargs) - - -def hist_series(data, **kwargs): - del kwargs["by"] - del kwargs["grid"] - del kwargs["xlabelsize"] - del kwargs["xrot"] - del kwargs["ylabelsize"] - del kwargs["yrot"] - del kwargs["ax"] - del kwargs["figsize"] - if kwargs.get("bins"): - kwargs["nbins"] = kwargs["bins"] - del kwargs["bins"] - return histogram(data, **kwargs) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py new file mode 100644 index 00000000000..ca2f6955425 --- /dev/null +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py @@ -0,0 +1,26 @@ +import plotly.express as px +import numpy as np +import pandas as pd +import pytest + + +@pytest.mark.parametrize( + "pandas_fn,px_fn", + [ + (lambda df: df.plot(), px.line), + (lambda df: df.plot.scatter("A", "B"), lambda df: px.scatter(df, "A", "B"),), + (lambda df: df.plot.line(), px.line), + (lambda df: df.plot.area(), px.area), + (lambda df: df.plot.bar(), px.bar), + (lambda df: df.plot.barh(), lambda df: px.bar(df, orientation="h")), + (lambda df: df.plot.box(), px.box), + (lambda df: df.plot.hist(), px.histogram), + (lambda df: df.boxplot(), px.box), + (lambda df: df.hist(), px.histogram), + (lambda df: df["A"].hist(), lambda df: px.histogram(df["A"])), + ], +) +def test_pandas_equiv(pandas_fn, px_fn): + pd.options.plotting.backend = "plotly" + df = pd.DataFrame(np.random.randn(100, 4), columns=list("ABCD")).cumsum() + assert pandas_fn(df).to_json() == px_fn(df).to_json() From 773b669637bacef5eb910e99147709af134c18e8 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sat, 16 May 2020 22:02:42 -0400 Subject: [PATCH 53/69] special case for wide-var=columns --- packages/python/plotly/plotly/express/_core.py | 8 +++++++- .../plotly/tests/test_core/test_px/test_px_wide.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 02f8f720f55..fb5ff218654 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1239,7 +1239,13 @@ def build_dataframe(args, constructor): elif wide_x != wide_y: wide_mode = True args["wide_variable"] = args["y"] if wide_y else args["x"] - var_name = "variable" + if df_provided and args["wide_variable"] is df_input.columns: + var_name = df_input.columns.name + args["wide_variable"] = list(args["wide_variable"]) + if var_name in [None, "value", "index"] or ( + df_provided and var_name in df_input + ): + var_name = "variable" if constructor == go.Histogram: wide_orientation = "v" if wide_x else "h" else: diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index c4d386d4bdd..4fe57a6e404 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -663,6 +663,19 @@ def append_special_case(df_in, args_in, args_expect, df_expect): ), ) +# y = columns +df = pd.DataFrame(dict(a=[1, 2], b=[3, 4]), index=[7, 8]) +df.index.name = "c" +df.columns.name = "d" +append_special_case( + df_in=df, + args_in=dict(x=df.index, y=df.columns, color=None), + args_expect=dict(x="c", y="value", color="d"), + df_expect=pd.DataFrame( + dict(c=[7, 8, 7, 8], d=["a", "a", "b", "b"], value=[1, 2, 3, 4]) + ), +) + @pytest.mark.parametrize("df_in, args_in, args_expect, df_expect", special_cases) def test_wide_mode_internal_special_cases(df_in, args_in, args_expect, df_expect): From 7745f2382d1e72865f19e205284c95cd70fef31e Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sat, 16 May 2020 22:08:04 -0400 Subject: [PATCH 54/69] straight equality test --- .../plotly/tests/test_core/test_px/test_pandas_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py index ca2f6955425..d7310a94590 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py @@ -23,4 +23,4 @@ def test_pandas_equiv(pandas_fn, px_fn): pd.options.plotting.backend = "plotly" df = pd.DataFrame(np.random.randn(100, 4), columns=list("ABCD")).cumsum() - assert pandas_fn(df).to_json() == px_fn(df).to_json() + assert pandas_fn(df) == px_fn(df) From 5dfcd4e8cea8d821d2476700f2c661c27cae3557 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sat, 16 May 2020 22:20:58 -0400 Subject: [PATCH 55/69] make CI pass for now --- .../plotly/tests/test_core/test_px/test_pandas_backend.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py index d7310a94590..31213410147 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_pandas_backend.py @@ -4,6 +4,10 @@ import pytest +@pytest.mark.skipif( + not hasattr(pd.options.plotting, "backend"), + reason="Currently installed pandas doesn't support plotting backends.", +) @pytest.mark.parametrize( "pandas_fn,px_fn", [ From f2e64f402f35286cdf5487104062a4325471ce65 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 17 May 2020 21:58:22 -0400 Subject: [PATCH 56/69] changelog and more docstring --- CHANGELOG.md | 21 +++++++++++++++++++ packages/python/plotly/plotly/express/_doc.py | 3 +++ 2 files changed, 24 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 599942e04c9..87d1c5bfc76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,27 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). +## [4.8.0] - not yet released + +### Added + +- `plotly` now provides a Plotly Express-backed Pandas-compatible plotting backend, which can be activated via `pandas.options.plotting.backend = "plotly"`. Note that it is not intended to implement every Pandas plotting function, nor is it intended to replicate the behaviour of every argument, although per the changes below, `x` and `y` should behave similarly. +- New datasets have been added to `plotly.express.data`: `stocks`, `experiment`, `short_track_wide` and `short_track_long` + +### Updated + +- The behaviour of the `x`, `y`, `orientation`, `histfunc`, `violinmode`, `boxmode` and `stripmode` arguments for 2d-cartesian functions in Plotly Express (i.e. `scatter`, `line`, `area`, `bar`, `histogram`, `violin`, `box`, `strip`, `funnel`, `density_heatmap` and `density_contour`) has been refined: + - if `x` or `y` is missing, it is inferred to be the index of `data_frame` if `data_frame` provided, otherwise a stable index of integers starting at 0. In the case of `px.bar`, if the provided value is not continuous, the missing value is treated as a column of 1s named "count", so as to behave more like `px.histogram` and to avoid sizing the resulting bars differently based on their position in the column. Previously, missing values defaulted to integers starting at 0 *per trace* which made it potentially inconsistent or misleading. + - if `x` (`y`) is missing, `orientation` now defaults to `v` (`h`). Previously it always defaulted to `v` but this is not considered a breaking change, as the cases in which it now defaults to `h` caused unreadable output if set to `v`. + - if both `x` and `y` are provided and one of them does not contain continuous values, `orientation` defaults to the value perpendicular to that axis. Previously it always defaulted to `v` but this is not considered a breaking change, as the cases in which it now defaults to `h` caused unreadable output if set to `v`. + - if either `x` or `y` (but not both) may now be provided as a list of column references into `data_frame` or columns of data, in which case the imputed data frame will be treated as "wide" data and `melt()`ed internally before applying the usual mapping rules, with function-specific defaults. + - if neither `x` nor `y` is provided but `data_frame` is, the data frame will be treated as "wide" with defaults depending on the value of `orientation` (and `orientation` has accordingly been added to `scatter`, `line`, `density_heatmap`, and `density_contour` for this purpose). Previously this would have resulted in an empty figure. + - if both `x` and `y` are provided to `histogram`, and if `x`, `y` and `z` are provided to `density_heatmap` or `density_contour`, then `histfunc` now defaults to `sum` so as to avoid ignoring the provided data, and to cause `histogram` and `bar` to behave more similarly. + - `violinmode`, `boxmode` and `stripmode` now default to `overlay` if `x` (`y`) in in `v` (`h`) orientation is also mapped to `color`, to avoid strange spacing issues with the previous default of `group` in all cases. +- The Plotly Express arguments `color_discrete_map`, `symbol_map` and `line_dash_map` now accept the string `"identity"` which causes the corresponding input data to be used as-is rather than mapped into `color_discrete_sequence`, `symbol_sequence` or `line_dash_sequence`, respectively. +- Plotly Express now accepts `px.Constant` or `px.Range` objects in the place of column references so as to express constant or increasing integer values. + + ## [4.7.1] - 2020-05-08 ### Fixed diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index d240a5c560b..4c7b591f785 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -244,12 +244,14 @@ "String values should define plotly.js symbols", "Used to override `symbol_sequence` to assign a specific symbols to marks corresponding with specific values.", "Keys in `symbol_map` should be values in the column denoted by `symbol`.", + "Alternatively, if the values of `symbol` are valid symbol names, the string `'identity'` may be passed to cause them to be used directly.", ], line_dash_map=[ "dict with str keys and str values (default `{}`)", "Strings values define plotly.js dash-patterns.", "Used to override `line_dash_sequences` to assign a specific dash-patterns to lines corresponding with specific values.", "Keys in `line_dash_map` should be values in the column denoted by `line_dash`.", + "Alternatively, if the values of `line_dash` are valid line-dash names, the string `'identity'` may be passed to cause them to be used directly.", ], line_dash_sequence=[ "list of str", @@ -267,6 +269,7 @@ "String values should define valid CSS-colors", "Used to override `color_discrete_sequence` to assign a specific colors to marks corresponding with specific values.", "Keys in `color_discrete_map` should be values in the column denoted by `color`.", + "Alternatively, if the values of `color` are valid colors, the string `'identity'` may be passed to cause them to be used directly.", ], color_continuous_scale=[ "list of str", From 58999bfb9ea9de4cd663087b2bd219235abfd14d Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 17 May 2020 22:00:10 -0400 Subject: [PATCH 57/69] changelog --- CHANGELOG.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 87d1c5bfc76..2c2854ccde8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,12 +6,12 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Added -- `plotly` now provides a Plotly Express-backed Pandas-compatible plotting backend, which can be activated via `pandas.options.plotting.backend = "plotly"`. Note that it is not intended to implement every Pandas plotting function, nor is it intended to replicate the behaviour of every argument, although per the changes below, `x` and `y` should behave similarly. -- New datasets have been added to `plotly.express.data`: `stocks`, `experiment`, `short_track_wide` and `short_track_long` +- `plotly` now provides a Plotly Express-backed Pandas-compatible plotting backend, which can be activated via `pandas.options.plotting.backend = "plotly"`. Note that it is not intended to implement every Pandas plotting function, nor is it intended to replicate the behaviour of every argument, although per the changes below, `x` and `y` should behave similarly. ([#2336](https://github.com/plotly/plotly.py/pull/2336)) +- New datasets have been added to `plotly.express.data`: `stocks`, `experiment`, `short_track_wide` and `short_track_long`. ([#2336](https://github.com/plotly/plotly.py/pull/2336)) ### Updated -- The behaviour of the `x`, `y`, `orientation`, `histfunc`, `violinmode`, `boxmode` and `stripmode` arguments for 2d-cartesian functions in Plotly Express (i.e. `scatter`, `line`, `area`, `bar`, `histogram`, `violin`, `box`, `strip`, `funnel`, `density_heatmap` and `density_contour`) has been refined: +- The behaviour of the `x`, `y`, `orientation`, `histfunc`, `violinmode`, `boxmode` and `stripmode` arguments for 2d-cartesian functions in Plotly Express (i.e. `scatter`, `line`, `area`, `bar`, `histogram`, `violin`, `box`, `strip`, `funnel`, `density_heatmap` and `density_contour`) has been refined ([#2336](https://github.com/plotly/plotly.py/pull/2336)): - if `x` or `y` is missing, it is inferred to be the index of `data_frame` if `data_frame` provided, otherwise a stable index of integers starting at 0. In the case of `px.bar`, if the provided value is not continuous, the missing value is treated as a column of 1s named "count", so as to behave more like `px.histogram` and to avoid sizing the resulting bars differently based on their position in the column. Previously, missing values defaulted to integers starting at 0 *per trace* which made it potentially inconsistent or misleading. - if `x` (`y`) is missing, `orientation` now defaults to `v` (`h`). Previously it always defaulted to `v` but this is not considered a breaking change, as the cases in which it now defaults to `h` caused unreadable output if set to `v`. - if both `x` and `y` are provided and one of them does not contain continuous values, `orientation` defaults to the value perpendicular to that axis. Previously it always defaulted to `v` but this is not considered a breaking change, as the cases in which it now defaults to `h` caused unreadable output if set to `v`. @@ -19,8 +19,8 @@ This project adheres to [Semantic Versioning](http://semver.org/). - if neither `x` nor `y` is provided but `data_frame` is, the data frame will be treated as "wide" with defaults depending on the value of `orientation` (and `orientation` has accordingly been added to `scatter`, `line`, `density_heatmap`, and `density_contour` for this purpose). Previously this would have resulted in an empty figure. - if both `x` and `y` are provided to `histogram`, and if `x`, `y` and `z` are provided to `density_heatmap` or `density_contour`, then `histfunc` now defaults to `sum` so as to avoid ignoring the provided data, and to cause `histogram` and `bar` to behave more similarly. - `violinmode`, `boxmode` and `stripmode` now default to `overlay` if `x` (`y`) in in `v` (`h`) orientation is also mapped to `color`, to avoid strange spacing issues with the previous default of `group` in all cases. -- The Plotly Express arguments `color_discrete_map`, `symbol_map` and `line_dash_map` now accept the string `"identity"` which causes the corresponding input data to be used as-is rather than mapped into `color_discrete_sequence`, `symbol_sequence` or `line_dash_sequence`, respectively. -- Plotly Express now accepts `px.Constant` or `px.Range` objects in the place of column references so as to express constant or increasing integer values. +- The Plotly Express arguments `color_discrete_map`, `symbol_map` and `line_dash_map` now accept the string `"identity"` which causes the corresponding input data to be used as-is rather than mapped into `color_discrete_sequence`, `symbol_sequence` or `line_dash_sequence`, respectively. ([#2336](https://github.com/plotly/plotly.py/pull/2336)) +- Plotly Express now accepts `px.Constant` or `px.Range` objects in the place of column references so as to express constant or increasing integer values. ([#2336](https://github.com/plotly/plotly.py/pull/2336)) ## [4.7.1] - 2020-05-08 From 883f02efce488300bc0d5f0ee78a36e4d4b452a3 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 21 May 2020 09:35:25 -0400 Subject: [PATCH 58/69] long and wide --- doc/python/2D-Histogram.md | 2 +- doc/python/2d-histogram-contour.md | 2 +- doc/python/3d-scatter-plots.md | 2 +- doc/python/bar-charts.md | 2 +- doc/python/box-plots.md | 2 +- doc/python/bubble-charts.md | 2 +- doc/python/bubble-maps.md | 2 +- doc/python/choropleth-maps.md | 3 +- doc/python/distplot.md | 2 +- doc/python/dot-plots.md | 4 +- doc/python/error-bars.md | 2 +- doc/python/filled-area-plots.md | 2 +- doc/python/funnel-charts.md | 2 +- doc/python/heatmaps.md | 2 +- doc/python/histograms.md | 2 +- doc/python/horizontal-bar-charts.md | 4 +- doc/python/line-and-scatter.md | 2 +- doc/python/line-charts.md | 2 +- doc/python/linear-fits.md | 2 +- doc/python/lines-on-maps.md | 10 +-- doc/python/mapbox-county-choropleth.md | 2 +- doc/python/mapbox-density-heatmaps.md | 2 +- doc/python/parallel-coordinates-plot.md | 2 +- doc/python/pie-charts.md | 2 +- doc/python/plotly-express.md | 2 +- doc/python/polar-chart.md | 2 +- doc/python/px-arguments.md | 88 ++++++++++++++----- doc/python/radar-chart.md | 4 +- doc/python/scatter-plots-on-maps.md | 2 +- doc/python/scattermapbox.md | 2 +- doc/python/splom.md | 2 +- doc/python/styling-plotly-express.md | 2 +- doc/python/sunburst-charts.md | 2 +- doc/python/ternary-plots.md | 4 +- doc/python/treemaps.md | 2 +- doc/python/violin.md | 2 +- doc/python/wind-rose-charts.md | 2 +- .../python/plotly/plotly/data/__init__.py | 2 +- 38 files changed, 110 insertions(+), 69 deletions(-) diff --git a/doc/python/2D-Histogram.md b/doc/python/2D-Histogram.md index 8150ab29365..8f52f3d2460 100644 --- a/doc/python/2D-Histogram.md +++ b/doc/python/2D-Histogram.md @@ -42,7 +42,7 @@ A 2D histogram, also known as a density heatmap, is the 2-dimensional generaliza ## Density Heatmaps with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). The Plotly Express function `density_heatmap()` can be used to produce density heatmaps. +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). The Plotly Express function `density_heatmap()` can be used to produce density heatmaps. ```python import plotly.express as px diff --git a/doc/python/2d-histogram-contour.md b/doc/python/2d-histogram-contour.md index 35955911489..50221b18f96 100644 --- a/doc/python/2d-histogram-contour.md +++ b/doc/python/2d-histogram-contour.md @@ -40,7 +40,7 @@ A 2D histogram contour plot, also known as a density contour plot, is a 2-dimens ## Density Contours with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). The Plotly Express function `density_contour()` can be used to produce density contours. +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). The Plotly Express function `density_contour()` can be used to produce density contours. ```python import plotly.express as px diff --git a/doc/python/3d-scatter-plots.md b/doc/python/3d-scatter-plots.md index eedc437f719..2140e4ce908 100644 --- a/doc/python/3d-scatter-plots.md +++ b/doc/python/3d-scatter-plots.md @@ -35,7 +35,7 @@ jupyter: ## 3D scatter plot with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). Like the [2D scatter plot](https://plotly.com/python/line-and-scatter/) `px.scatter`, the 3D function `px.scatter_3d` plots individual data in three-dimensional space. diff --git a/doc/python/bar-charts.md b/doc/python/bar-charts.md index c137c0b213c..e63dc20d61a 100644 --- a/doc/python/bar-charts.md +++ b/doc/python/bar-charts.md @@ -35,7 +35,7 @@ jupyter: ### Bar chart with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.bar`, each row of the DataFrame is represented as a rectangular mark. diff --git a/doc/python/box-plots.md b/doc/python/box-plots.md index 55952b6727c..694198ee7c8 100644 --- a/doc/python/box-plots.md +++ b/doc/python/box-plots.md @@ -40,7 +40,7 @@ A [box plot](https://en.wikipedia.org/wiki/Box_plot) is a statistical representa ## Box Plot with `plotly.express` -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). In a box plot created by `px.box`, the distribution of the column given as `y` argument is represented. diff --git a/doc/python/bubble-charts.md b/doc/python/bubble-charts.md index 348825851da..5d3309e7cc3 100644 --- a/doc/python/bubble-charts.md +++ b/doc/python/bubble-charts.md @@ -38,7 +38,7 @@ jupyter: A [bubble chart](https://en.wikipedia.org/wiki/Bubble_chart) is a scatter plot in which a third dimension of the data is shown through the size of markers. For other types of scatter plot, see the [line and scatter page](https://plotly.com/python/line-and-scatter/). -We first show a bubble chart example using Plotly Express. [Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). The size of markers is set from the dataframe column given as the `size` parameter. +We first show a bubble chart example using Plotly Express. [Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). The size of markers is set from the dataframe column given as the `size` parameter. ```python import plotly.express as px diff --git a/doc/python/bubble-maps.md b/doc/python/bubble-maps.md index d9c1b78a20f..6c72e2f197f 100644 --- a/doc/python/bubble-maps.md +++ b/doc/python/bubble-maps.md @@ -39,7 +39,7 @@ Plotly figures made with `px.scatter_geo`, `px.line_geo` or `px.choropleth` func ### Bubble map with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.scatter_geo`, each line of the dataframe is represented as a marker point. The column set as the `size` argument gives the size of markers. +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.scatter_geo`, each line of the dataframe is represented as a marker point. The column set as the `size` argument gives the size of markers. ```python import plotly.express as px diff --git a/doc/python/choropleth-maps.md b/doc/python/choropleth-maps.md index bd3535e2e6e..67ccb580f6c 100644 --- a/doc/python/choropleth-maps.md +++ b/doc/python/choropleth-maps.md @@ -56,7 +56,7 @@ The GeoJSON data is passed to the `geojson` argument, and the data is passed int ### Choropleth Map with plotly.express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). #### GeoJSON with `feature.id` @@ -208,7 +208,6 @@ fig.show() ```python import plotly.graph_objects as go -# Load data frame and tidy it. import pandas as pd df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv') diff --git a/doc/python/distplot.md b/doc/python/distplot.md index 1b3f2256fd2..3a212bd174c 100644 --- a/doc/python/distplot.md +++ b/doc/python/distplot.md @@ -37,7 +37,7 @@ jupyter: Several representations of statistical distributions are available in plotly, such as [histograms](https://plotly.com/python/histograms/), [violin plots](https://plotly.com/python/violin/), [box plots](https://plotly.com/python/box-plots/) (see [the complete list here](https://plotly.com/python/statistical-charts/)). It is also possible to combine several representations in the same plot. -For example, the `plotly.express` function `px.histogram` can add a subplot with a different statistical representation than the histogram, given by the parameter `marginal`. [Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +For example, the `plotly.express` function `px.histogram` can add a subplot with a different statistical representation than the histogram, given by the parameter `marginal`. [Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). ```python import plotly.express as px diff --git a/doc/python/dot-plots.md b/doc/python/dot-plots.md index bb212eb4069..70d1b773dea 100644 --- a/doc/python/dot-plots.md +++ b/doc/python/dot-plots.md @@ -37,9 +37,9 @@ jupyter: Dot plots (also known as [Cleveland dot plots]()) show changes between two (or more) points in time or between two (or more) conditions. Compared to a [bar chart](/python/bar-charts/), dot plots can be less cluttered and allow for an easier comparison between conditions. -For the same data, we show below how to create a dot plot using either `px.scatter` (for a tidy pandas DataFrame) or `go.Scatter`. +For the same data, we show below how to create a dot plot using either `px.scatter` or `go.Scatter`. -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). ```python import plotly.express as px diff --git a/doc/python/error-bars.md b/doc/python/error-bars.md index bd3cf91b6a2..4dcfb101456 100644 --- a/doc/python/error-bars.md +++ b/doc/python/error-bars.md @@ -35,7 +35,7 @@ jupyter: ### Error Bars with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). For functions representing 2D data points such as [`px.scatter`](https://plotly.com/python/line-and-scatter/), [`px.line`](https://plotly.com/python/line-charts/), [`px.bar`](https://plotly.com/python/bar-charts/) etc., error bars are given as a column name which is the value of the `error_x` (for the error on x position) and `error_y` (for the error on y position). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). For functions representing 2D data points such as [`px.scatter`](https://plotly.com/python/line-and-scatter/), [`px.line`](https://plotly.com/python/line-charts/), [`px.bar`](https://plotly.com/python/bar-charts/) etc., error bars are given as a column name which is the value of the `error_x` (for the error on x position) and `error_y` (for the error on y position). ```python import plotly.express as px diff --git a/doc/python/filled-area-plots.md b/doc/python/filled-area-plots.md index 347fbeb1efb..63f578c741a 100644 --- a/doc/python/filled-area-plots.md +++ b/doc/python/filled-area-plots.md @@ -37,7 +37,7 @@ This example shows how to fill the area enclosed by traces. ## Filled area plot with plotly.express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). `px.area` creates a stacked area plot. Each filled area corresponds to one value of the column given by the `line_group` parameter. diff --git a/doc/python/funnel-charts.md b/doc/python/funnel-charts.md index cf9c6898f93..ad6e5b67660 100644 --- a/doc/python/funnel-charts.md +++ b/doc/python/funnel-charts.md @@ -30,7 +30,7 @@ Funnel charts are often used to represent data in different stages of a business ### Basic Funnel Plot with plotly.express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.funnel`, each row of the DataFrame is represented as a stage of the funnel. diff --git a/doc/python/heatmaps.md b/doc/python/heatmaps.md index a4d401698fb..a92eb30a8b3 100644 --- a/doc/python/heatmaps.md +++ b/doc/python/heatmaps.md @@ -36,7 +36,7 @@ jupyter: ### Heatmap with `plotly.express` and `px.imshow` -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.imshow`, each value of the input array is represented as a heatmap pixel. +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.imshow`, each value of the input array is represented as a heatmap pixel. For more examples using `px.imshow`, see the [tutorial on displaying image data with plotly](/python/imshow). diff --git a/doc/python/histograms.md b/doc/python/histograms.md index ea60960f3a5..b40bf2f2667 100644 --- a/doc/python/histograms.md +++ b/doc/python/histograms.md @@ -41,7 +41,7 @@ bar, go to the [Bar Chart tutorial](/python/bar-charts/). ## Histogram with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). ```python import plotly.express as px diff --git a/doc/python/horizontal-bar-charts.md b/doc/python/horizontal-bar-charts.md index d80525f3c58..2578a5c7b9e 100644 --- a/doc/python/horizontal-bar-charts.md +++ b/doc/python/horizontal-bar-charts.md @@ -37,7 +37,7 @@ See more examples of bar charts (including vertical bar charts) and styling opti ### Horizontal Bar Chart with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). For a horizontal bar char, use the `px.bar` function with `orientation='h'`. +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). For a horizontal bar char, use the `px.bar` function with `orientation='h'`. #### Basic Horizontal Bar Chart with Plotly Express @@ -64,7 +64,7 @@ fig.show() ### Horizontal Bar Chart with go.Bar -When data are not available as a tidy dataframe, you can use the more generic function `go.Bar` from `plotly.graph_objects`. All the options of `go.Bar` are documented in the reference https://plotly.com/python/reference/#bar +You can also use the more generic function `go.Bar` from `plotly.graph_objects`. All the options of `go.Bar` are documented in the reference https://plotly.com/python/reference/#bar #### Basic Horizontal Bar Chart diff --git a/doc/python/line-and-scatter.md b/doc/python/line-and-scatter.md index 1bd9fe4fdcd..28cfbe4cf44 100644 --- a/doc/python/line-and-scatter.md +++ b/doc/python/line-and-scatter.md @@ -36,7 +36,7 @@ jupyter: ## Scatter plot with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.scatter`, each data point is represented as a marker point, whose location is given by the `x` and `y` columns. diff --git a/doc/python/line-charts.md b/doc/python/line-charts.md index 31c11a8dfab..dcc9c831503 100644 --- a/doc/python/line-charts.md +++ b/doc/python/line-charts.md @@ -37,7 +37,7 @@ jupyter: ### Line Plot with plotly.express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.line`, each data point is represented as a vertex (which location is given by the `x` and `y` columns) of a **polyline mark** in 2D space. +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.line`, each data point is represented as a vertex (which location is given by the `x` and `y` columns) of a **polyline mark** in 2D space. For more examples of line plots, see the [line and scatter notebook](https://plotly.com/python/line-and-scatter/). diff --git a/doc/python/linear-fits.md b/doc/python/linear-fits.md index 7d1c970efca..75e27c93e54 100644 --- a/doc/python/linear-fits.md +++ b/doc/python/linear-fits.md @@ -37,7 +37,7 @@ jupyter: ### Linear fit trendlines with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). Plotly Express allows you to add [Ordinary Least](https://en.wikipedia.org/wiki/Ordinary_least_squares) Squares regression trendline to scatterplots with the `trendline` argument. In order to do so, you will need to install `statsmodels` and its dependencies. Hovering over the trendline will show the equation of the line and its R-squared value. diff --git a/doc/python/lines-on-maps.md b/doc/python/lines-on-maps.md index f224d37e778..5f00ee0a950 100644 --- a/doc/python/lines-on-maps.md +++ b/doc/python/lines-on-maps.md @@ -41,7 +41,7 @@ Plotly figures made with `px.scatter_geo`, `px.line_geo` or `px.choropleth` func ## Lines on Maps with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). ```python import plotly.express as px @@ -112,11 +112,11 @@ fig.update_layout( fig.show() ``` ### Performance improvement: put many lines in the same trace -For very large amounts (>1000) of lines, performance may become critcal. If you can relinquish setting individual line styles (e.g. opacity), you can put multiple paths into one trace. This makes the map render faster and reduces the script execution time and memory consumption. +For very large amounts (>1000) of lines, performance may become critcal. If you can relinquish setting individual line styles (e.g. opacity), you can put multiple paths into one trace. This makes the map render faster and reduces the script execution time and memory consumption. -Use ```None``` between path coordinates to create a break in the otherwise connected paths. +Use ```None``` between path coordinates to create a break in the otherwise connected paths. -```python +```python import plotly.graph_objects as go import pandas as pd @@ -164,7 +164,7 @@ fig.add_trace( lat = lats, mode = 'lines', line = dict(width = 1,color = 'red'), - opacity = 0.5 + opacity = 0.5 ) ) diff --git a/doc/python/mapbox-county-choropleth.md b/doc/python/mapbox-county-choropleth.md index a6d73afe769..d6484a9d8a9 100644 --- a/doc/python/mapbox-county-choropleth.md +++ b/doc/python/mapbox-county-choropleth.md @@ -80,7 +80,7 @@ df.head() ### Choropleth map using plotly.express and carto base map (no token needed) -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.choropleth_mapbox`, each row of the DataFrame is represented as a region of the choropleth. diff --git a/doc/python/mapbox-density-heatmaps.md b/doc/python/mapbox-density-heatmaps.md index ab96b9fd3c0..52e32eb46a9 100644 --- a/doc/python/mapbox-density-heatmaps.md +++ b/doc/python/mapbox-density-heatmaps.md @@ -39,7 +39,7 @@ To plot on Mapbox maps with Plotly you _may_ need a Mapbox account and a public ### Stamen Terrain base map (no token needed): density mapbox with `plotly.express` -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.density_mapbox`, each row of the DataFrame is represented as a point smoothed with a given radius of influence. diff --git a/doc/python/parallel-coordinates-plot.md b/doc/python/parallel-coordinates-plot.md index 034f372e0fb..327f9702d08 100644 --- a/doc/python/parallel-coordinates-plot.md +++ b/doc/python/parallel-coordinates-plot.md @@ -37,7 +37,7 @@ jupyter: ## Parallel Coordinates plot with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). In a parallel coordinates plot with `px.parallel_coordinates`, each row of the DataFrame is represented by a polyline mark which traverses a set of parallel axes, one for each of the dimensions. For other representations of multivariate data, also see [parallel categories](/python/parallel-categories-diagram/), [radar charts](/python/radar-chart/) and [scatterplot matrix (SPLOM)](/python/splom/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). In a parallel coordinates plot with `px.parallel_coordinates`, each row of the DataFrame is represented by a polyline mark which traverses a set of parallel axes, one for each of the dimensions. For other representations of multivariate data, also see [parallel categories](/python/parallel-categories-diagram/), [radar charts](/python/radar-chart/) and [scatterplot matrix (SPLOM)](/python/splom/). ```python import plotly.express as px diff --git a/doc/python/pie-charts.md b/doc/python/pie-charts.md index dce6a8dea13..cd9ffbf348e 100644 --- a/doc/python/pie-charts.md +++ b/doc/python/pie-charts.md @@ -40,7 +40,7 @@ If you're looking instead for a multilevel hierarchical pie-like chart, go to th ### Pie chart with plotly express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). In `px.pie`, data visualized by the sectors of the pie is set in `values`. The sector labels are set in `names`. diff --git a/doc/python/plotly-express.md b/doc/python/plotly-express.md index e0396d9c213..d46a2c3733a 100644 --- a/doc/python/plotly-express.md +++ b/doc/python/plotly-express.md @@ -37,7 +37,7 @@ jupyter: ### Plotly Express -Plotly Express is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). Every Plotly Express function returns a `graph_objects.Figure` object whose `data` and `layout` has been pre-populated according to the provided arguments. +Plotly Express is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). Every Plotly Express function returns a `graph_objects.Figure` object whose `data` and `layout` has been pre-populated according to the provided arguments. > **Note**: Plotly Express was previously its own separately-installed `plotly_express` package but is now part of `plotly` and importable via `import plotly.express as px`. diff --git a/doc/python/polar-chart.md b/doc/python/polar-chart.md index ad8b8671ea5..3430fe6b51d 100644 --- a/doc/python/polar-chart.md +++ b/doc/python/polar-chart.md @@ -37,7 +37,7 @@ jupyter: A polar chart represents data along radial and angular axes. With Plotly Express, it is possible to represent polar data as scatter markers with `px.scatter_polar`, and as lines with `px.line_polar`. -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). For other types of arguments, see the section below using `go.Scatterpolar`. diff --git a/doc/python/px-arguments.md b/doc/python/px-arguments.md index 9cb7827738e..b7848f8523e 100644 --- a/doc/python/px-arguments.md +++ b/doc/python/px-arguments.md @@ -5,8 +5,8 @@ jupyter: text_representation: extension: .md format_name: markdown - format_version: "1.1" - jupytext_version: 1.1.1 + format_version: '1.2' + jupytext_version: 1.4.2 kernelspec: display_name: Python 3 language: python @@ -20,7 +20,7 @@ jupyter: name: python nbconvert_exporter: python pygments_lexer: ipython3 - version: 3.6.8 + version: 3.7.7 plotly: description: Arguments accepted by Plotly Express functions display_as: file_settings @@ -33,41 +33,73 @@ jupyter: thumbnail: thumbnail/plotly-express.png --- -### Tidy Data +### Column-oriented, Matrix or Geographic Data -[Plotly Express](/python/plotly-express) operates on "tidy" or "long" data rather than "wide" data. You may pass data in either as a Pandas `DataFrame` objects or as individual array-like objects which `px` will assemble into a data frame internally, such as lists, `numpy` arrays or Pandas `Series` objects. +Plotly Express provides functions to visualize a variety of types of data. Most functions such as `px.bar` or `px.scatter` expect to operate on column-oriented data of the type you might store in a Pandas `DataFrame` (in either "long" or "wide" format, see below). [`px.imshow` operates on matrix-like data](/python/imshow/) you might store in a `numpy` or `xarray` array and functions like [`px.choropleth` and `px.choropleth_mapbox` can operate on geographic data](/python/maps/) of the kind you might store in a GeoPandas `GeoDataFrame`. This page details how to provide column-oriented data to most Plotly Express functions. -What follows is a very short example of the difference between wide and tidy/long data, and the excellent [Tidy Data in Python blog post](https://www.jeannicholashould.com/tidy-data-in-python.html) contains much more information about the tidy approach to structuring data. + + +### Long-, Wide-, and Mixed-Form Data + +*Until version 4.8, Plotly Express only operated on long-form (previously called "tidy") data, but [now accepts wide-form and mixed-form data](/python/wide-form/) as well.* + +There are three common conventions for storing data in a data frame: + +* **long-form data** is suitable for storing multivariate data (i.e. dimensions greater than 2), with one row per observation, and one column per variable. +* **wide-form data** is suitable for storing 2-dimensional data, with one row per value of one of the first variable, and one column per value of the second variable. +* **mixed-form data** is a hybrid of long-form and wide-form data, with one row per value of one variable, and some columns representing values of another, and some columns representing more variables (see our [wide-form documentation](/python/wide-form/) for examples of how to use Plotly Express to visualize this kind of data) + +All Plotly Express functions can operate on long-form data, and the following 2D-Cartesian functions can operate on wide-form data as well:: `px.scatter`, `px.line`, `px.area`, `px.bar`, `px.histogram`, `px.violin`, `px.box`, `px.strip`, `px.funnel`, `px.density_heatmap` and `px.density_contour`. Read on for a short example of the differences between these forms, or check out our [detailed documentation about wide-form support](/python/wide-form/). + +By way of example here is the same data, represented in long-form first, and then in wide-form: ```python -import pandas as pd -print("This is 'wide' data, unsuitable as-is for Plotly Express:") -wide_df = pd.DataFrame(dict(Month=["Jan", "Feb", "Mar"], London=[1,2,3], Paris=[3,1,2])) +import plotly.express as px +long_df = px.data.short_track_long() +long_df +``` + +```python +import plotly.express as px +wide_df = px.data.short_track_wide() wide_df ``` +Plotly Express can produce the same plot from either form: + ```python -import pandas as pd -print("This is the same data in 'long' format, ready for Plotly Express:") -wide_df = pd.DataFrame(dict(Month=["Jan", "Feb", "Mar"], London=[1,2,3], Paris=[3,1,2])) -tidy_df = wide_df.melt(id_vars="Month") -tidy_df +import plotly.express as px +long_df = px.data.short_track_long() + +fig = px.bar(long_df, x="nation", y="count", color="medal", title="Long-Form Input") +fig.show() ``` ```python import plotly.express as px -import pandas as pd +wide_df = px.data.short_track_wide() + +fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input") +fig.show() +``` + +You might notice that y-axis and legend labels are slightly different for the second plot: they are "value" and "variable", respectively. This is because Plotly Express performed an [internal Pandas `melt()` operation](https://pandas.pydata.org/docs/reference/api/pandas.melt.html) to convert the wide-form data into long-form for plotting, and used the Pandas convention for assign column names to the intermediate long-form data. Note that the labels "medal" and "count" do not appear in the wide-form data frame, so in this case, you must supply these yourself, or [you can use a data frame with named row- and column-indexes](/python/wide-form/). You can [rename these labels with the `labels` argument](/python/styling-plotly-express/): -wide_df = pd.DataFrame(dict(Month=["Jan", "Feb", "Mar"], London=[1,2,3], Paris=[3,1,2])) -tidy_df = wide_df.melt(id_vars="Month") +```python +import plotly.express as px +wide_df = px.data.short_track_wide() -fig = px.bar(tidy_df, x="Month", y="value", color="variable", barmode="group") +fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input, relabelled", + labels={"value": "count", "variable": "medals"}) fig.show() ``` -### pandas DataFrame input data +Many more examples of wide-form and messy data input can be found in our [detailed wide-form support documentation](/python/wide-form/). -`px` functions supports natively pandas DataFrame. Arguments can either be passed as dataframe columns, or as column names if the `data_frame` argument is provided. + +### Input Data as Pandas `DataFrame`s + +As shown above, `px` functions supports natively pandas DataFrame. Arguments can either be passed as dataframe columns, or as column names if the `data_frame` argument is provided. #### Passing columns as arguments @@ -101,7 +133,7 @@ fig = px.scatter(df, x=df.sepal_length, y=df.sepal_width, size=df.petal_length, fig.show() ``` -### Columns not in the data_frame argument +### Columns not in the `data_frame` argument In the addition to columns from the `data_frame` argument, one may also pass columns from a different DataFrame, _as long as all columns have the same length_. It is also possible to pass columns without passing the `data_frame` argument. @@ -132,9 +164,9 @@ fig = px.bar(df, x='year', y=gdp, color='continent', labels={'y':'gdp'}, fig.show() ``` -### Using array-like arguments: NumPy arrays, lists... +### Input Data as array-like columns: NumPy arrays, lists... -`px` arguments can also be array-like objects such as lists, NumPy arrays. +`px` arguments can also be array-like objects such as lists, NumPy arrays, in both long-form or wide-form (for certain functions). ```python import plotly.express as px @@ -144,6 +176,16 @@ fig = px.line(x=[1, 2, 3, 4], y=[3, 5, 4, 8]) fig.show() ``` +```python +import plotly.express as px + +# List arguments in wide form +series1 = [3, 5, 4, 8] +series2 = [5, 4, 8, 3] +fig = px.line(x=[1, 2, 3, 4], y=[series1, series2]) +fig.show() +``` + ```python import numpy as np import plotly.express as px diff --git a/doc/python/radar-chart.md b/doc/python/radar-chart.md index 22e021a2312..51fe5835466 100644 --- a/doc/python/radar-chart.md +++ b/doc/python/radar-chart.md @@ -35,11 +35,11 @@ jupyter: A [Radar Chart](https://en.wikipedia.org/wiki/Radar_chart) (also known as a spider plot or star plot) displays multivariate data in the form of a two-dimensional chart of quantitative variables represented on axes originating from the center. The relative position and angle of the axes is typically uninformative. It is equivalent to a [parallel coordinates plot](/python/parallel-coordinates-plot/) with the axes arranged radially. -For a Radar Chart, use a [polar chart](/python/polar-chart/) with categorical angular variables, with `px.line_polar` for data available as a tidy pandas DataFrame, or with `go.Scatterpolar` in the general case. See more examples of [polar charts here](/python/polar-chart/). +For a Radar Chart, use a [polar chart](/python/polar-chart/) with categorical angular variables, with `px.line_polar`, or with `go.Scatterpolar`. See more examples of [polar charts here](/python/polar-chart/). #### Radar Chart with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). Use `line_close=True` for closed lines. diff --git a/doc/python/scatter-plots-on-maps.md b/doc/python/scatter-plots-on-maps.md index 6fa0a7bde1c..c2130aa74bf 100644 --- a/doc/python/scatter-plots-on-maps.md +++ b/doc/python/scatter-plots-on-maps.md @@ -43,7 +43,7 @@ Plotly figures made with `px.scatter_geo`, `px.line_geo` or `px.choropleth` func Here we show the [Plotly Express](/python/plotly-express/) function `px.scatter_geo` for a geographical scatter plot. The `size` argument is used to set the size of markers from a given column of the DataFrame. -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). ```python import plotly.express as px diff --git a/doc/python/scattermapbox.md b/doc/python/scattermapbox.md index e6eff5d373c..03789fba8f0 100644 --- a/doc/python/scattermapbox.md +++ b/doc/python/scattermapbox.md @@ -41,7 +41,7 @@ To plot on Mapbox maps with Plotly you _may_ need a Mapbox account and a public Here we show the [Plotly Express](/python/plotly-express/) function `px.scatter_mapbox` for a scatter plot on a tile map. -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). ```python import plotly.express as px diff --git a/doc/python/splom.md b/doc/python/splom.md index 0a35089ac33..4176719b1ec 100644 --- a/doc/python/splom.md +++ b/doc/python/splom.md @@ -42,7 +42,7 @@ A scatterplot matrix is a matrix associated to n numerical arrays (data variable Here we show the Plotly Express function `px.scatter_matrix` to plot the scatter matrix for the columns of the dataframe. By default, all columns are considered. -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). ```python import plotly.express as px diff --git a/doc/python/styling-plotly-express.md b/doc/python/styling-plotly-express.md index 3151b09f15d..c4e98f0cb14 100644 --- a/doc/python/styling-plotly-express.md +++ b/doc/python/styling-plotly-express.md @@ -35,7 +35,7 @@ jupyter: ### Styling Figures made with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/). Every Plotly Express function returns a `graph_objects.Figure` object whose `data` and `layout` has been pre-populated according to the provided arguments. +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/). Every Plotly Express function returns a `graph_objects.Figure` object whose `data` and `layout` has been pre-populated according to the provided arguments. > You can style and customize figures made with Plotly Express _in all the same ways_ as you can style figures made more manually by explicitly assembling `graph_objects` into a figure. diff --git a/doc/python/sunburst-charts.md b/doc/python/sunburst-charts.md index cef5dbaface..a6ec9f59802 100644 --- a/doc/python/sunburst-charts.md +++ b/doc/python/sunburst-charts.md @@ -43,7 +43,7 @@ Main arguments: ### Basic Sunburst Plot with plotly.express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.sunburst`, each row of the DataFrame is represented as a sector of the sunburst. diff --git a/doc/python/ternary-plots.md b/doc/python/ternary-plots.md index efd01136dce..66f7e7f9281 100644 --- a/doc/python/ternary-plots.md +++ b/doc/python/ternary-plots.md @@ -39,7 +39,7 @@ A ternary plot depicts the ratios of three variables as positions in an equilate ## Ternary scatter plot with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). Here we use `px.scatter_ternary` to visualize thre three-way split between the three major candidates in a municipal election. @@ -55,7 +55,7 @@ We can scale and color the markers to produce a ternary bubble chart. ```python import plotly.express as px df = px.data.election() -fig = px.scatter_ternary(df, a="Joly", b="Coderre", c="Bergeron", hover_name="district", +fig = px.scatter_ternary(df, a="Joly", b="Coderre", c="Bergeron", hover_name="district", color="winner", size="total", size_max=15, color_discrete_map = {"Joly": "blue", "Bergeron": "green", "Coderre":"red"} ) fig.show() diff --git a/doc/python/treemaps.md b/doc/python/treemaps.md index 2d73b895bbb..33f7df7bf9c 100644 --- a/doc/python/treemaps.md +++ b/doc/python/treemaps.md @@ -37,7 +37,7 @@ jupyter: ### Basic Treemap with plotly.express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). With `px.treemap`, each row of the DataFrame is represented as a sector of the treemap. diff --git a/doc/python/violin.md b/doc/python/violin.md index 7f6be074e98..f9e791be931 100644 --- a/doc/python/violin.md +++ b/doc/python/violin.md @@ -42,7 +42,7 @@ See also the [list of other statistical charts](https://plotly.com/python/statis ### Basic Violin Plot with Plotly Express -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). ```python import plotly.express as px diff --git a/doc/python/wind-rose-charts.md b/doc/python/wind-rose-charts.md index 1ed4fda77b3..ed1c98deef1 100644 --- a/doc/python/wind-rose-charts.md +++ b/doc/python/wind-rose-charts.md @@ -39,7 +39,7 @@ jupyter: A [wind rose chart](https://en.wikipedia.org/wiki/Wind_rose) (also known as a polar bar chart) is a graphical tool used to visualize how wind speed and direction are typically distributed at a given location. You can use the `px.bar_polar` function from Plotly Express as below, otherwise use `go.Barpolar` as explained in the next section. -[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on "tidy" data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). ```python import plotly.express as px diff --git a/packages/python/plotly/plotly/data/__init__.py b/packages/python/plotly/plotly/data/__init__.py index 8960bbf0f24..0dcaff7eb55 100644 --- a/packages/python/plotly/plotly/data/__init__.py +++ b/packages/python/plotly/plotly/data/__init__.py @@ -150,7 +150,7 @@ def short_track_wide(indexed=False): df = _get_dataset("short_track") if indexed: df = df.set_index("nation") - df.index.name = "medal" + df.columns.name = "medal" return df From b1b2d7cc495fe8f6d2d33ba1b78cb97e94d341dd Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 21 May 2020 12:47:56 -0400 Subject: [PATCH 59/69] wide form docs draft --- doc/python/px-arguments.md | 8 +- doc/python/wide-form.md | 260 +++++++++++++++++++++++++++++++++++++ 2 files changed, 264 insertions(+), 4 deletions(-) create mode 100644 doc/python/wide-form.md diff --git a/doc/python/px-arguments.md b/doc/python/px-arguments.md index b7848f8523e..bbf6d63ceac 100644 --- a/doc/python/px-arguments.md +++ b/doc/python/px-arguments.md @@ -22,7 +22,7 @@ jupyter: pygments_lexer: ipython3 version: 3.7.7 plotly: - description: Arguments accepted by Plotly Express functions + description: Input data arguments accepted by Plotly Express functions display_as: file_settings language: python layout: base @@ -43,7 +43,7 @@ Plotly Express provides functions to visualize a variety of types of data. Most *Until version 4.8, Plotly Express only operated on long-form (previously called "tidy") data, but [now accepts wide-form and mixed-form data](/python/wide-form/) as well.* -There are three common conventions for storing data in a data frame: +There are three common conventions for storing column-oriented data, usually in a data frame with column names: * **long-form data** is suitable for storing multivariate data (i.e. dimensions greater than 2), with one row per observation, and one column per variable. * **wide-form data** is suitable for storing 2-dimensional data, with one row per value of one of the first variable, and one column per value of the second variable. @@ -83,14 +83,14 @@ fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Fo fig.show() ``` -You might notice that y-axis and legend labels are slightly different for the second plot: they are "value" and "variable", respectively. This is because Plotly Express performed an [internal Pandas `melt()` operation](https://pandas.pydata.org/docs/reference/api/pandas.melt.html) to convert the wide-form data into long-form for plotting, and used the Pandas convention for assign column names to the intermediate long-form data. Note that the labels "medal" and "count" do not appear in the wide-form data frame, so in this case, you must supply these yourself, or [you can use a data frame with named row- and column-indexes](/python/wide-form/). You can [rename these labels with the `labels` argument](/python/styling-plotly-express/): +You might notice that y-axis and legend labels are slightly different for the second plot: they are "value" and "variable", respectively, and this is also reflected in the hoverlabel text. This is because Plotly Express performed an [internal Pandas `melt()` operation](https://pandas.pydata.org/docs/reference/api/pandas.melt.html) to convert the wide-form data into long-form for plotting, and used the Pandas convention for assign column names to the intermediate long-form data. Note that the labels "medal" and "count" do not appear in the wide-form data frame, so in this case, you must supply these yourself, or [you can use a data frame with named row- and column-indexes](/python/wide-form/). You can [rename these labels with the `labels` argument](/python/styling-plotly-express/): ```python import plotly.express as px wide_df = px.data.short_track_wide() fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input, relabelled", - labels={"value": "count", "variable": "medals"}) + labels={"value": "count", "variable": "medal"}) fig.show() ``` diff --git a/doc/python/wide-form.md b/doc/python/wide-form.md new file mode 100644 index 00000000000..dd040184bdf --- /dev/null +++ b/doc/python/wide-form.md @@ -0,0 +1,260 @@ +--- +jupyter: + jupytext: + notebook_metadata_filter: all + text_representation: + extension: .md + format_name: markdown + format_version: '1.2' + jupytext_version: 1.4.2 + kernelspec: + display_name: Python 3 + language: python + name: python3 + language_info: + codemirror_mode: + name: ipython + version: 3 + file_extension: .py + mimetype: text/x-python + name: python + nbconvert_exporter: python + pygments_lexer: ipython3 + version: 3.7.7 + plotly: + description: Plotly Express' 2D-Cartesian functions accept data in long-, wide-, + and mixed-form. + display_as: file_settings + language: python + layout: base + name: Plotly Express Wide-Form Support + order: 33 + page_type: u-guide + permalink: python/wide-form/ + thumbnail: thumbnail/plotly-express.png +--- + +### Column-oriented, Matrix or Geographic Data + +Plotly Express provides functions to visualize a variety of types of data. Most functions such as `px.bar` or `px.scatter` expect to operate on column-oriented data of the type you might store in a Pandas `DataFrame` (in either "long" or "wide" format, see below). [`px.imshow` operates on matrix-like data](/python/imshow/) you might store in a `numpy` or `xarray` array and functions like [`px.choropleth` and `px.choropleth_mapbox` can operate on geographic data](/python/maps/) of the kind you might store in a GeoPandas `GeoDataFrame`. This page details how to provide a specific form of column-oriented data to 2D-Cartesian Plotly Express functions, but you can also check out our [detailed column-input-format documentation](/python/px-arguments/). + +### Long-, Wide-, and Mixed-Form Data + +*Until version 4.8, Plotly Express only operated on long-form (previously called "tidy") data, but now accepts wide-form and mixed-form data as well.* + +There are three common conventions for storing column-oriented data, usually in a data frame with column names: + +* **long-form data** is suitable for storing multivariate data (i.e. dimensions greater than 2), with one row per observation, and one column per variable. +* **wide-form data** is suitable for storing 2-dimensional data, with one row per value of one of the first variable, and one column per value of the second variable. +* **mixed-form data** is a hybrid of long-form and wide-form data, with one row per value of one variable, and some columns representing values of another, and some columns representing more variables + +All Plotly Express functions can operate on long-form data, and the following 2D-Cartesian functions can operate on wide-form data as well:: `px.scatter`, `px.line`, `px.area`, `px.bar`, `px.histogram`, `px.violin`, `px.box`, `px.strip`, `px.funnel`, `px.density_heatmap` and `px.density_contour`. + +By way of example here is the same data, represented in long-form first, and then in wide-form: + +```python +import plotly.express as px +long_df = px.data.short_track_long() +long_df +``` + +```python +import plotly.express as px +wide_df = px.data.short_track_wide() +wide_df +``` + +Plotly Express can produce the same plot from either form: + +```python +import plotly.express as px +long_df = px.data.short_track_long() + +fig = px.bar(long_df, x="nation", y="count", color="medal", title="Long-Form Input") +fig.show() +``` + +```python +import plotly.express as px +wide_df = px.data.short_track_wide() + +fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input") +fig.show() +``` + +### Labeling axes, legends and hover text + +You might notice that y-axis and legend labels are slightly different for the second plot: they are "value" and "variable", respectively, and this is also reflected in the hoverlabel text. This is because Plotly Express performed an [internal Pandas `melt()` operation](https://pandas.pydata.org/docs/reference/api/pandas.melt.html) to convert the wide-form data into long-form for plotting, and used the Pandas convention for assign column names to the intermediate long-form data. Note that the labels "medal" and "count" do not appear in the wide-form data frame, so in this case, you must supply these yourself, (or see below regarding using a data frame with named row- and column-indexes). You can [rename these labels with the `labels` argument](/python/styling-plotly-express/): + +```python +import plotly.express as px +wide_df = px.data.short_track_wide() + +fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input, relabelled", + labels={"value": "count", "variable": "medal"}) +fig.show() +``` + +Plotly Express figures created using wide-form data can be [styled just like any other Plotly Express figure](/python/styling-plotly-express/): + +```python +import plotly.express as px +wide_df = px.data.short_track_wide() + +fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], + title="Wide-Form Input, styled", + labels={"value": "Medal Count", "variable": "Medal", "nation": "Olympic Nation"}, + color_discrete_map={"gold":"gold", "silver": "silver", "bronze": "#c96"}, + template="simple_white" + ) +fig.update_layout(font_family="Rockwell", showlegend=False) +fig.show() +``` + +### Data Frames with Named Indexes + +Pandas `DataFrames` support not only column names and "row names" via the value of `index`, but the indexes themselves can be named. Here is how to assign one column of the wide sample data frame above as the index, and to name the column index. The result "indexed" sample data frame can also be obtained by calling `px.data.short_track_wide(indexed=True)` + +```python +import plotly.express as px +wide_df = px.data.short_track_wide() +wide_df = wide_df.set_index("nation") +wide_df.columns.name = "medals" +wide_df +``` + +When working with a data frame like the one above, you can pass the index references directly as arguments, to benefit from automatic labelling for everything except the y axis label, which will default to "values", but this can be overridden with the `labels` argument as above: + +```python +import plotly.express as px +wide_df = px.data.short_track_wide(indexed=True) + +fig = px.bar(wide_df, x=wide_df.index, y=wide_df.columns) +fig.show() +``` + +If you transpose `x` and `y`, thereby assigning the columns to `x`, the orientation will be switched to horizontal: + +```python +import plotly.express as px +wide_df = px.data.short_track_wide(indexed=True) + +fig = px.bar(wide_df, x=wide_df.columns, y=wide_df.index) +fig.show() +``` + +### Wide-Form Defaults + +For bar, scatter, line and area charts, this pattern of assigning `x=df.index` and `y=df.columns` is so common that if you provide neither `x` nor `y` this is the default behaviour + +```python +import plotly.express as px +wide_df = px.data.short_track_wide(indexed=True) + +fig = px.bar(wide_df) +fig.show() + +fig = px.area(wide_df) +fig.show() + +fig = px.line(wide_df) +fig.show() + +fig = px.scatter(wide_df) +fig.show() +``` + +### Orientation Control When Using Defaults + +If you specify neither `x` nor `y`, you can specify whether the Y or X xaxis is assigned to the index with `orientation`. + +```python +import plotly.express as px +wide_df = px.data.short_track_wide(indexed=True) + +fig = px.bar(wide_df, orientation="h") +fig.show() +``` + +### Assigning Columns to Non-Color Arguments + + +In the examples above, the columns of the wide data frame are always assigned to the `color` argument, but this is not a hard constraint. The columns can be assigned to any Plotly Express argument, for example to accomplish faceting, and `color` can be reassigned to any other value. When plotting with a data frame without named indexes, you can reassign the inferred column named `"variable"` and `"value"` to any argument: + +```python +import plotly.express as px +wide_df = px.data.short_track_wide(indexed=False) + +fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], facet_col="variable", color="nation") +fig.show() +``` + +If using a data frame's named indexes, either explicitly or relying on the defaults, the index references or names must be used: + +```python +import plotly.express as px +wide_df = px.data.short_track_wide(indexed=True) + +fig = px.bar(wide_df, facet_col="medal", color=wide_df.index) +fig.show() +``` + +### Mixed-Form Data + +In some cases, a data frame is neither clearly long-form nor wide-form, and we can call this "mixed-form". For example, in the data frame below, if it contained only the `experiment` columns, the data could be described as wide-form, and if it contained only `gender` and `group` it could be described as long-form, but it contains both: + +```python +import plotly.express as px +mixed_df = px.data.experiment(indexed=True) +mixed_df.head() +``` + +We can visualize just the wide-form portion of the data frame easily with a [violin chart](/python/violin/). As a special note, we'll assign the index, which is the participant ID, to the hover_data, so that hovering over outlier points will identify their row. + +```python +import plotly.express as px +mixed_df = px.data.experiment(indexed=True) + +fig = px.violin(mixed_df, y=["experiment_1", "experiment_2", "experiment_3"], hover_data=[mixed_df.index]) +fig.show() +``` + + + + +We can also leverage the long-form portion of the data frame, for example to color by `gender` and facet by `group`: + +```python +import plotly.express as px +mixed_df = px.data.experiment(indexed=True) + +fig = px.violin(mixed_df, y=["experiment_1", "experiment_2", "experiment_3"], + color="gender", facet_col="group", hover_data=[mixed_df.index]) +fig.show() +``` + +And of course, we can reassign `variable` to another argument as well, in this case we'll assign it to `x` and facet by the wide variable, and we'll switch to a [box plot](/python/box-plots/) for variety. + +```python +import plotly.express as px +mixed_df = px.data.experiment(indexed=True) + +fig = px.box(mixed_df, x="group", y=["experiment_1", "experiment_2", "experiment_3"], + color="gender", facet_col="variable", hover_data=[mixed_df.index]) +fig.show() +``` + +One interesting thing about a mixed-form data frame like this is that it remains easy to plot, say, one experiment against another, which would require some preliminary data wrangling if this was represented as a pure long-form dataset: + +```python +import plotly.express as px +mixed_df = px.data.experiment(indexed=True) + +fig = px.scatter(mixed_df, x="experiment_1", y="experiment_2", + color="group", facet_col="gender", hover_data=[mixed_df.index]) +fig.show() +``` + +```python + +``` From 65822bc32116ba4d7a9a6be40a90452846897ae6 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 21 May 2020 12:53:11 -0400 Subject: [PATCH 60/69] Emma's changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c2854ccde8..040fa9e84b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). - `plotly` now provides a Plotly Express-backed Pandas-compatible plotting backend, which can be activated via `pandas.options.plotting.backend = "plotly"`. Note that it is not intended to implement every Pandas plotting function, nor is it intended to replicate the behaviour of every argument, although per the changes below, `x` and `y` should behave similarly. ([#2336](https://github.com/plotly/plotly.py/pull/2336)) - New datasets have been added to `plotly.express.data`: `stocks`, `experiment`, `short_track_wide` and `short_track_long`. ([#2336](https://github.com/plotly/plotly.py/pull/2336)) +- plotly `go.Figure` and `go.FigureWidget` now have a `_repr_html_` and a `_repr_mimebundle_` method, which are [standard hooks for integration in systems based on IPython](https://ipython.readthedocs.io/en/stable/config/integrating.html). In particular, with `_repr_html_` plotly figures can now be used within [sphinx-gallery](https://sphinx-gallery.github.io/stable/index.html) without any scraper. These additions should not change anything to the way plotly figures are displayed in notebook environments, since the `_ipython_display_` method (already present in earlier versions) takes precedence over the new methods. ### Updated From 2eaea98f230b9cc05523b8eb9b546a2a67515521 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 24 May 2020 12:25:20 -0400 Subject: [PATCH 61/69] accept indexes as wide-mode spec --- packages/python/plotly/plotly/express/_core.py | 1 + .../plotly/tests/test_core/test_px/test_px_wide.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index fb5ff218654..4e845ed30bf 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1241,6 +1241,7 @@ def build_dataframe(args, constructor): args["wide_variable"] = args["y"] if wide_y else args["x"] if df_provided and args["wide_variable"] is df_input.columns: var_name = df_input.columns.name + if isinstance(args["wide_variable"], pd.Index): args["wide_variable"] = list(args["wide_variable"]) if var_name in [None, "value", "index"] or ( df_provided and var_name in df_input diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index 4fe57a6e404..a29a755e151 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -658,7 +658,7 @@ def append_special_case(df_in, args_in, args_expect, df_expect): dict( index=[7, 8, 7, 8], _value=[1, 2, 3, 4], - variable=["b", "b", "value", "value",], + variable=["b", "b", "value", "value"], ) ), ) @@ -675,6 +675,16 @@ def append_special_case(df_in, args_in, args_expect, df_expect): dict(c=[7, 8, 7, 8], d=["a", "a", "b", "b"], value=[1, 2, 3, 4]) ), ) +# y = columns +df = pd.DataFrame(dict(a=[1, 2], b=[3, 4]), index=[7, 8]) +df.index.name = "c" +df.columns.name = "d" +append_special_case( + df_in=df, + args_in=dict(x=df.index, y=df.columns[:1], color=None), + args_expect=dict(x="c", y="value", color="variable"), + df_expect=pd.DataFrame(dict(c=[7, 8], variable=["a", "a"], value=[1, 2])), +) @pytest.mark.parametrize("df_in, args_in, args_expect, df_expect", special_cases) From 3adcfd256a3216af2942967540996c0fd9d3e668 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 24 May 2020 12:29:45 -0400 Subject: [PATCH 62/69] reject wide-mode with different types --- packages/python/plotly/plotly/express/_core.py | 8 ++++++++ .../plotly/tests/test_core/test_px/test_px_wide.py | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 4e845ed30bf..fe247e2825e 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1329,6 +1329,14 @@ def build_dataframe(args, constructor): else: wide_cross_name = args["wide_cross"] del args["wide_cross"] + dtype = None + for v in wide_value_vars: + if dtype is None: + dtype = df_output[v].dtype + elif dtype != df_output[v].dtype: + raise ValueError( + "Plotly Express cannot process wide-form data with columns of different type." + ) df_output = df_output.melt( id_vars=wide_id_vars, value_vars=wide_value_vars, diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index a29a755e151..c639e04fe6f 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -710,3 +710,13 @@ def test_multi_index(): with pytest.raises(TypeError) as err_msg: px.scatter(df) assert "pandas MultiIndex is not supported by plotly express" in str(err_msg.value) + + +@pytest.mark.parametrize("df", [px.data.stocks(), dict(a=[1, 2], b=["1", "2"])]) +def test_mixed_input_error(df): + with pytest.raises(ValueError) as err_msg: + px.line(df) + assert ( + "Plotly Express cannot process wide-form data with columns of different type" + in str(err_msg.value) + ) From f8f08803f92fc1df8d43203eae31366e4e77a9af Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 24 May 2020 13:35:20 -0400 Subject: [PATCH 63/69] make list-like hover_data more robust --- .../python/plotly/plotly/express/_core.py | 43 +++++++++---- .../tests/test_core/test_px/test_px_hover.py | 60 ++++++++++++++++--- .../tests/test_core/test_px/test_px_wide.py | 24 +++++++- 3 files changed, 108 insertions(+), 19 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index fe247e2825e..216b16716c9 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1022,6 +1022,11 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): args["hover_data"][k] = (True, args["hover_data"][k]) if not isinstance(args["hover_data"][k], tuple): args["hover_data"][k] = (args["hover_data"][k], None) + if df_provided and args["hover_data"][k][1] is not None and k in df_input: + raise ValueError( + "Ambiguous input: values for '%s' appear both in hover_data and data_frame" + % k + ) # Loop over possible arguments for field_name in all_attrables: # Massaging variables @@ -1074,11 +1079,28 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): and hover_data_is_dict and args["hover_data"][str(argument)][1] is not None ): + # hover_data has onboard data + # previously-checked to have no name-conflict with data_frame col_name = str(argument) - df_output[col_name] = args["hover_data"][col_name][1] - continue - - if not df_provided: + real_argument = args["hover_data"][col_name][1] + + if length and len(real_argument) != length: + raise ValueError( + "All arguments should have the same length. " + "The length of hover_data key `%s` is %d, whereas the " + "length of previously-processed arguments %s is %d" + % ( + argument, + len(real_argument), + str(list(df_output.columns)), + length, + ) + ) + if hasattr(real_argument, "values"): + df_output[col_name] = real_argument.values + else: + df_output[col_name] = np.array(real_argument) + elif not df_provided: raise ValueError( "String or int arguments are only possible when a " "DataFrame or an array is provided in the `data_frame` " @@ -1086,7 +1108,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): "'%s' is of type str or int." % field ) # Check validity of column name - if argument not in df_input.columns: + elif argument not in df_input.columns: if wide_mode and argument in (value_name, var_name): continue else: @@ -1098,11 +1120,11 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): if argument == "index": err_msg += "\n To use the index, pass it in directly as `df.index`." raise ValueError(err_msg) - if length and len(df_input[argument]) != length: + elif length and len(df_input[argument]) != length: raise ValueError( "All arguments should have the same length. " "The length of column argument `df[%s]` is %d, whereas the " - "length of previous arguments %s is %d" + "length of previously-processed arguments %s is %d" % ( field, len(df_input[argument]), @@ -1110,8 +1132,9 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): length, ) ) - col_name = str(argument) - df_output[col_name] = df_input[argument].values + else: + col_name = str(argument) + df_output[col_name] = df_input[argument].values # ----------------- argument is a column / array / list.... ------- else: if df_provided and hasattr(argument, "name"): @@ -1137,7 +1160,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): raise ValueError( "All arguments should have the same length. " "The length of argument `%s` is %d, whereas the " - "length of previous arguments %s is %d" + "length of previously-processed arguments %s is %d" % (field, len(argument), str(list(df_output.columns)), length) ) if hasattr(argument, "values"): diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py index 509f48d1991..f63696e8dee 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py @@ -2,7 +2,6 @@ import numpy as np import pandas as pd import pytest -import plotly.graph_objects as go from collections import OrderedDict # an OrderedDict is needed for Python 2 @@ -74,24 +73,69 @@ def test_newdatain_hover_data(): def test_fail_wrong_column(): - with pytest.raises(ValueError): - fig = px.scatter( + with pytest.raises(ValueError) as err_msg: + px.scatter( {"a": [1, 2], "b": [3, 4], "c": [2, 1]}, x="a", y="b", hover_data={"d": True}, ) - with pytest.raises(ValueError): - fig = px.scatter( + assert ( + "Value of 'hover_data_0' is not the name of a column in 'data_frame'." + in str(err_msg.value) + ) + with pytest.raises(ValueError) as err_msg: + px.scatter( {"a": [1, 2], "b": [3, 4], "c": [2, 1]}, x="a", y="b", hover_data={"d": ":.1f"}, ) - with pytest.raises(ValueError): - fig = px.scatter( + assert ( + "Value of 'hover_data_0' is not the name of a column in 'data_frame'." + in str(err_msg.value) + ) + with pytest.raises(ValueError) as err_msg: + px.scatter( + {"a": [1, 2], "b": [3, 4], "c": [2, 1]}, + x="a", + y="b", + hover_data={"d": [3, 4, 5]}, # d is too long + ) + assert ( + "All arguments should have the same length. The length of hover_data key `d` is 3" + in str(err_msg.value) + ) + with pytest.raises(ValueError) as err_msg: + px.scatter( + {"a": [1, 2], "b": [3, 4], "c": [2, 1]}, + x="a", + y="b", + hover_data={"d": (True, [3, 4, 5])}, # d is too long + ) + assert ( + "All arguments should have the same length. The length of hover_data key `d` is 3" + in str(err_msg.value) + ) + with pytest.raises(ValueError) as err_msg: + px.scatter( + {"a": [1, 2], "b": [3, 4], "c": [2, 1]}, + x="a", + y="b", + hover_data={"c": [3, 4]}, + ) + assert ( + "Ambiguous input: values for 'c' appear both in hover_data and data_frame" + in str(err_msg.value) + ) + with pytest.raises(ValueError) as err_msg: + px.scatter( {"a": [1, 2], "b": [3, 4], "c": [2, 1]}, x="a", y="b", - hover_data={"d": (True, [3, 4, 5])}, + hover_data={"c": (True, [3, 4])}, ) + assert ( + "Ambiguous input: values for 'c' appear both in hover_data and data_frame" + in str(err_msg.value) + ) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py index c639e04fe6f..2c49b4bb63b 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py @@ -675,7 +675,8 @@ def append_special_case(df_in, args_in, args_expect, df_expect): dict(c=[7, 8, 7, 8], d=["a", "a", "b", "b"], value=[1, 2, 3, 4]) ), ) -# y = columns + +# y = columns subset df = pd.DataFrame(dict(a=[1, 2], b=[3, 4]), index=[7, 8]) df.index.name = "c" df.columns.name = "d" @@ -686,6 +687,27 @@ def append_special_case(df_in, args_in, args_expect, df_expect): df_expect=pd.DataFrame(dict(c=[7, 8], variable=["a", "a"], value=[1, 2])), ) +# list-like hover_data +df = pd.DataFrame(dict(a=[1, 2], b=[3, 4]), index=[7, 8]) +df.index.name = "c" +df.columns.name = "d" +append_special_case( + df_in=df, + args_in=dict(x=None, y=None, color=None, hover_data=dict(new=[5, 6])), + args_expect=dict( + x="c", + y="value", + color="d", + orientation="v", + hover_data=dict(new=(True, [5, 6])), + ), + df_expect=pd.DataFrame( + dict( + c=[7, 8, 7, 8], d=["a", "a", "b", "b"], new=[5, 6, 5, 6], value=[1, 2, 3, 4] + ) + ), +) + @pytest.mark.parametrize("df_in, args_in, args_expect, df_expect", special_cases) def test_wide_mode_internal_special_cases(df_in, args_in, args_expect, df_expect): From 3479c6840bd06f231a41d30644f8e94ffb8aa0ec Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 24 May 2020 13:38:33 -0400 Subject: [PATCH 64/69] Update doc/python/wide-form.md Co-authored-by: Chris Parmer --- doc/python/wide-form.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/python/wide-form.md b/doc/python/wide-form.md index dd040184bdf..8c35df0e5ea 100644 --- a/doc/python/wide-form.md +++ b/doc/python/wide-form.md @@ -104,7 +104,7 @@ wide_df = px.data.short_track_wide() fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input, styled", labels={"value": "Medal Count", "variable": "Medal", "nation": "Olympic Nation"}, - color_discrete_map={"gold":"gold", "silver": "silver", "bronze": "#c96"}, + color_discrete_map={"gold": "gold", "silver": "silver", "bronze": "#c96"}, template="simple_white" ) fig.update_layout(font_family="Rockwell", showlegend=False) From 45994151492fbcd153f3de7d520de37e99657265 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 24 May 2020 13:38:53 -0400 Subject: [PATCH 65/69] Update doc/python/radar-chart.md Co-authored-by: Chris Parmer --- doc/python/radar-chart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/python/radar-chart.md b/doc/python/radar-chart.md index 51fe5835466..eb753aec3be 100644 --- a/doc/python/radar-chart.md +++ b/doc/python/radar-chart.md @@ -35,7 +35,7 @@ jupyter: A [Radar Chart](https://en.wikipedia.org/wiki/Radar_chart) (also known as a spider plot or star plot) displays multivariate data in the form of a two-dimensional chart of quantitative variables represented on axes originating from the center. The relative position and angle of the axes is typically uninformative. It is equivalent to a [parallel coordinates plot](/python/parallel-coordinates-plot/) with the axes arranged radially. -For a Radar Chart, use a [polar chart](/python/polar-chart/) with categorical angular variables, with `px.line_polar`, or with `go.Scatterpolar`. See more examples of [polar charts here](/python/polar-chart/). +For a Radar Chart, use a [polar chart](/python/polar-chart/) with categorical angular variables, with `px.line_polar`, or with `go.Scatterpolar`. See [more examples of polar charts](/python/polar-chart/). #### Radar Chart with Plotly Express From 5430490dda349b2542d35d9cd0fa8ba13c733d52 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 24 May 2020 13:41:32 -0400 Subject: [PATCH 66/69] PR feedback --- packages/python/plotly/plotly/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/python/plotly/plotly/__init__.py b/packages/python/plotly/plotly/__init__.py index 60b33e95dd7..118c00dce4e 100644 --- a/packages/python/plotly/plotly/__init__.py +++ b/packages/python/plotly/plotly/__init__.py @@ -80,7 +80,7 @@ def plot(data_frame, kind, **kwargs): """ Pandas plotting backend function, not meant to be called directly. - To activate, set pandas.options.plotting.backend="plotly.express.pandas_backend" + To activate, set pandas.options.plotting.backend="plotly" See https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py """ from .express import scatter, line, area, bar, box, histogram @@ -103,14 +103,14 @@ def plot(data_frame, kind, **kwargs): new_kwargs = {k: kwargs[k] for k in kwargs if k not in ["by", "bins"]} return histogram(data_frame, **new_kwargs) raise NotImplementedError( - "The plotly.express backend doesn't yet support kind='%s'" % kind + "kind='%s' not yet supported for plotting.backend='plotly'" % kind ) def boxplot_frame(data_frame, **kwargs): """ Pandas plotting backend function, not meant to be called directly. - To activate, set pandas.options.plotting.backend="plotly.express.pandas_backend" + To activate, set pandas.options.plotting.backend="plotly" See https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py """ from .express import box @@ -124,7 +124,7 @@ def boxplot_frame(data_frame, **kwargs): def hist_frame(data_frame, **kwargs): """ Pandas plotting backend function, not meant to be called directly. - To activate, set pandas.options.plotting.backend="plotly.express.pandas_backend" + To activate, set pandas.options.plotting.backend="plotly" See https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py """ from .express import histogram @@ -138,7 +138,7 @@ def hist_frame(data_frame, **kwargs): def hist_series(data_frame, **kwargs): """ Pandas plotting backend function, not meant to be called directly. - To activate, set pandas.options.plotting.backend="plotly.express.pandas_backend" + To activate, set pandas.options.plotting.backend="plotly" See https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py """ from .express import histogram From 0bc7ba5951434778d27c2f6cef384d0917f0af74 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sun, 24 May 2020 13:44:19 -0400 Subject: [PATCH 67/69] PR feedback --- CHANGELOG.md | 2 +- doc/python/px-arguments.md | 10 +++--- doc/python/wide-form.md | 34 +++++++++--------- .../python/plotly/plotly/data/__init__.py | 8 ++--- .../{short_track.csv.gz => medals.csv.gz} | Bin 5 files changed, 27 insertions(+), 27 deletions(-) rename packages/python/plotly/plotly/package_data/datasets/{short_track.csv.gz => medals.csv.gz} (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 040fa9e84b0..26b34bbec8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Added - `plotly` now provides a Plotly Express-backed Pandas-compatible plotting backend, which can be activated via `pandas.options.plotting.backend = "plotly"`. Note that it is not intended to implement every Pandas plotting function, nor is it intended to replicate the behaviour of every argument, although per the changes below, `x` and `y` should behave similarly. ([#2336](https://github.com/plotly/plotly.py/pull/2336)) -- New datasets have been added to `plotly.express.data`: `stocks`, `experiment`, `short_track_wide` and `short_track_long`. ([#2336](https://github.com/plotly/plotly.py/pull/2336)) +- New datasets have been added to `plotly.express.data`: `stocks`, `experiment`, `medals_wide` and `medals_long`. ([#2336](https://github.com/plotly/plotly.py/pull/2336)) - plotly `go.Figure` and `go.FigureWidget` now have a `_repr_html_` and a `_repr_mimebundle_` method, which are [standard hooks for integration in systems based on IPython](https://ipython.readthedocs.io/en/stable/config/integrating.html). In particular, with `_repr_html_` plotly figures can now be used within [sphinx-gallery](https://sphinx-gallery.github.io/stable/index.html) without any scraper. These additions should not change anything to the way plotly figures are displayed in notebook environments, since the `_ipython_display_` method (already present in earlier versions) takes precedence over the new methods. ### Updated diff --git a/doc/python/px-arguments.md b/doc/python/px-arguments.md index bbf6d63ceac..4b140759ab8 100644 --- a/doc/python/px-arguments.md +++ b/doc/python/px-arguments.md @@ -55,13 +55,13 @@ By way of example here is the same data, represented in long-form first, and the ```python import plotly.express as px -long_df = px.data.short_track_long() +long_df = px.data.medals_long() long_df ``` ```python import plotly.express as px -wide_df = px.data.short_track_wide() +wide_df = px.data.medals_wide() wide_df ``` @@ -69,7 +69,7 @@ Plotly Express can produce the same plot from either form: ```python import plotly.express as px -long_df = px.data.short_track_long() +long_df = px.data.medals_long() fig = px.bar(long_df, x="nation", y="count", color="medal", title="Long-Form Input") fig.show() @@ -77,7 +77,7 @@ fig.show() ```python import plotly.express as px -wide_df = px.data.short_track_wide() +wide_df = px.data.medals_wide() fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input") fig.show() @@ -87,7 +87,7 @@ You might notice that y-axis and legend labels are slightly different for the se ```python import plotly.express as px -wide_df = px.data.short_track_wide() +wide_df = px.data.medals_wide() fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input, relabelled", labels={"value": "count", "variable": "medal"}) diff --git a/doc/python/wide-form.md b/doc/python/wide-form.md index 8c35df0e5ea..b989e727f09 100644 --- a/doc/python/wide-form.md +++ b/doc/python/wide-form.md @@ -54,13 +54,13 @@ By way of example here is the same data, represented in long-form first, and the ```python import plotly.express as px -long_df = px.data.short_track_long() +long_df = px.data.medals_long() long_df ``` ```python import plotly.express as px -wide_df = px.data.short_track_wide() +wide_df = px.data.medals_wide() wide_df ``` @@ -68,7 +68,7 @@ Plotly Express can produce the same plot from either form: ```python import plotly.express as px -long_df = px.data.short_track_long() +long_df = px.data.medals_long() fig = px.bar(long_df, x="nation", y="count", color="medal", title="Long-Form Input") fig.show() @@ -76,7 +76,7 @@ fig.show() ```python import plotly.express as px -wide_df = px.data.short_track_wide() +wide_df = px.data.medals_wide() fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input") fig.show() @@ -88,7 +88,7 @@ You might notice that y-axis and legend labels are slightly different for the se ```python import plotly.express as px -wide_df = px.data.short_track_wide() +wide_df = px.data.medals_wide() fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input, relabelled", labels={"value": "count", "variable": "medal"}) @@ -99,9 +99,9 @@ Plotly Express figures created using wide-form data can be [styled just like any ```python import plotly.express as px -wide_df = px.data.short_track_wide() +wide_df = px.data.medals_wide() -fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], +fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input, styled", labels={"value": "Medal Count", "variable": "Medal", "nation": "Olympic Nation"}, color_discrete_map={"gold": "gold", "silver": "silver", "bronze": "#c96"}, @@ -113,11 +113,11 @@ fig.show() ### Data Frames with Named Indexes -Pandas `DataFrames` support not only column names and "row names" via the value of `index`, but the indexes themselves can be named. Here is how to assign one column of the wide sample data frame above as the index, and to name the column index. The result "indexed" sample data frame can also be obtained by calling `px.data.short_track_wide(indexed=True)` +Pandas `DataFrames` support not only column names and "row names" via the value of `index`, but the indexes themselves can be named. Here is how to assign one column of the wide sample data frame above as the index, and to name the column index. The result "indexed" sample data frame can also be obtained by calling `px.data.medals_wide(indexed=True)` ```python import plotly.express as px -wide_df = px.data.short_track_wide() +wide_df = px.data.medals_wide() wide_df = wide_df.set_index("nation") wide_df.columns.name = "medals" wide_df @@ -127,7 +127,7 @@ When working with a data frame like the one above, you can pass the index refere ```python import plotly.express as px -wide_df = px.data.short_track_wide(indexed=True) +wide_df = px.data.medals_wide(indexed=True) fig = px.bar(wide_df, x=wide_df.index, y=wide_df.columns) fig.show() @@ -137,7 +137,7 @@ If you transpose `x` and `y`, thereby assigning the columns to `x`, the orientat ```python import plotly.express as px -wide_df = px.data.short_track_wide(indexed=True) +wide_df = px.data.medals_wide(indexed=True) fig = px.bar(wide_df, x=wide_df.columns, y=wide_df.index) fig.show() @@ -149,7 +149,7 @@ For bar, scatter, line and area charts, this pattern of assigning `x=df.index` a ```python import plotly.express as px -wide_df = px.data.short_track_wide(indexed=True) +wide_df = px.data.medals_wide(indexed=True) fig = px.bar(wide_df) fig.show() @@ -170,7 +170,7 @@ If you specify neither `x` nor `y`, you can specify whether the Y or X xaxis is ```python import plotly.express as px -wide_df = px.data.short_track_wide(indexed=True) +wide_df = px.data.medals_wide(indexed=True) fig = px.bar(wide_df, orientation="h") fig.show() @@ -183,7 +183,7 @@ In the examples above, the columns of the wide data frame are always assigned to ```python import plotly.express as px -wide_df = px.data.short_track_wide(indexed=False) +wide_df = px.data.medals_wide(indexed=False) fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], facet_col="variable", color="nation") fig.show() @@ -193,7 +193,7 @@ If using a data frame's named indexes, either explicitly or relying on the defau ```python import plotly.express as px -wide_df = px.data.short_track_wide(indexed=True) +wide_df = px.data.medals_wide(indexed=True) fig = px.bar(wide_df, facet_col="medal", color=wide_df.index) fig.show() @@ -228,7 +228,7 @@ We can also leverage the long-form portion of the data frame, for example to col import plotly.express as px mixed_df = px.data.experiment(indexed=True) -fig = px.violin(mixed_df, y=["experiment_1", "experiment_2", "experiment_3"], +fig = px.violin(mixed_df, y=["experiment_1", "experiment_2", "experiment_3"], color="gender", facet_col="group", hover_data=[mixed_df.index]) fig.show() ``` @@ -239,7 +239,7 @@ And of course, we can reassign `variable` to another argument as well, in this c import plotly.express as px mixed_df = px.data.experiment(indexed=True) -fig = px.box(mixed_df, x="group", y=["experiment_1", "experiment_2", "experiment_3"], +fig = px.box(mixed_df, x="group", y=["experiment_1", "experiment_2", "experiment_3"], color="gender", facet_col="variable", hover_data=[mixed_df.index]) fig.show() ``` diff --git a/packages/python/plotly/plotly/data/__init__.py b/packages/python/plotly/plotly/data/__init__.py index 0dcaff7eb55..f64e326c9d7 100644 --- a/packages/python/plotly/plotly/data/__init__.py +++ b/packages/python/plotly/plotly/data/__init__.py @@ -136,7 +136,7 @@ def experiment(indexed=False): return df -def short_track_wide(indexed=False): +def medals_wide(indexed=False): """ This dataset represents the medal table for Olympic Short Track Speed Skating for the top three nations as of 2020. @@ -147,14 +147,14 @@ def short_track_wide(indexed=False): If `indexed` is True, the 'nation' column is used as the index and the column index is named 'medal' """ - df = _get_dataset("short_track") + df = _get_dataset("medals") if indexed: df = df.set_index("nation") df.columns.name = "medal" return df -def short_track_long(indexed=False): +def medals_long(indexed=False): """ This dataset represents the medal table for Olympic Short Track Speed Skating for the top three nations as of 2020. @@ -164,7 +164,7 @@ def short_track_long(indexed=False): `['nation', 'medal', 'count']`. If `indexed` is True, the 'nation' column is used as the index. """ - df = _get_dataset("short_track").melt( + df = _get_dataset("medals").melt( id_vars=["nation"], value_name="count", var_name="medal" ) if indexed: diff --git a/packages/python/plotly/plotly/package_data/datasets/short_track.csv.gz b/packages/python/plotly/plotly/package_data/datasets/medals.csv.gz similarity index 100% rename from packages/python/plotly/plotly/package_data/datasets/short_track.csv.gz rename to packages/python/plotly/plotly/package_data/datasets/medals.csv.gz From 51dbf60eeacaca8d285a4dbdc132c506b359f39d Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Mon, 25 May 2020 12:22:46 -0400 Subject: [PATCH 68/69] docs --- doc/python/cufflinks.md | 166 ----------------------------- doc/python/discrete-color.md | 13 ++- doc/python/pandas-backend.md | 201 +++++++++++++++++++++++++++++++++++ doc/python/wide-form.md | 131 +++++++++++++++-------- 4 files changed, 300 insertions(+), 211 deletions(-) delete mode 100644 doc/python/cufflinks.md create mode 100644 doc/python/pandas-backend.md diff --git a/doc/python/cufflinks.md b/doc/python/cufflinks.md deleted file mode 100644 index 5683bfcd422..00000000000 --- a/doc/python/cufflinks.md +++ /dev/null @@ -1,166 +0,0 @@ ---- -jupyter: - jupytext: - notebook_metadata_filter: all - text_representation: - extension: .md - format_name: markdown - format_version: "1.2" - jupytext_version: 1.3.1 - kernelspec: - display_name: Python 3 - language: python - name: python3 - language_info: - codemirror_mode: - name: ipython - version: 3 - file_extension: .py - mimetype: text/x-python - name: python - nbconvert_exporter: python - pygments_lexer: ipython3 - version: 3.6.8 - plotly: - description: - Cufflinks is a third-party wrapper library around Plotly, inspired by the Pandas .plot() API. - display_as: file_settings - language: python - layout: base - name: Cufflinks - order: 31 - permalink: python/cufflinks/ - thumbnail: thumbnail/plotly-express.png ---- - -### Introduction - -[Cufflinks](https://github.com/santosjorge/cufflinks) is a third-party wrapper library around Plotly, maintained by [Santos Jorge](https://github.com/santosjorge). - -When you import cufflinks, all [Pandas](https://pandas.pydata.org/) data frames and series objects have a new method attached to them called `.iplot()` which has a similar API to Pandas' built-in `.plot()` method. - -By passing the `asFigure=True` argument to `.iplot()`, Cufflinks works similarly to [Plotly Express](/python/plotly-express/), by returning [customizable `go.Figure` objects](/python/styling-plotly-express/) which are compatible with [Dash](https://dash.plot.ly)'s [`dcc.Graph` component](https://dash.plotly.com/dash-core-components/graph). Cufflinks also adds a `.figure()` method which has the same signature as `.iplot()` except that it has `asFigure=True` set as the default. - -This page shows some high-level examples of how to use Cufflinks, and more examples and documentation are available in the [Cufflinks Github repository](https://github.com/santosjorge/cufflinks). - -> Issues and questions regarding Cufflinks should be [raised in the Cufflinks repository](https://github.com/santosjorge/cufflinks/issues/new). - -```python -import cufflinks as cf -import pandas as pd -import numpy as np - -df = pd.DataFrame(np.random.randn(1000, 2), columns=['A', 'B']).cumsum() -fig = df.iplot(asFigure=True, xTitle="The X Axis", - yTitle="The Y Axis", title="The Figure Title") -fig.show() -``` - -Cufflinks has a `datagen` module for generating demo data. - -```python -import cufflinks as cf - -df = cf.datagen.lines() -fig = df.iplot(asFigure=True) -fig.show() -df.head() -``` - -### Scatter Plots - -```python -import cufflinks as cf -import pandas as pd -import numpy as np - -df = pd.DataFrame(np.random.randn(1000, 2), columns=['A', 'B']).cumsum() -fig = df.iplot(asFigure=True, x='A', y='B', mode='markers') -fig.show() -``` - -### Bar Charts - -```python -import cufflinks as cf -import pandas as pd -df = pd.DataFrame(np.random.rand(10, 4), columns=['A', 'B', 'C', 'D']) -fig = df.iplot(asFigure=True, kind="bar") -fig.show() -``` - -### Histograms - -```python -import cufflinks as cf -import pandas as pd -df = pd.DataFrame({'a': np.random.randn(1000) + 1, - 'b': np.random.randn(1000), - 'c': np.random.randn(1000) - 1}) - -fig = df.iplot(asFigure=True, kind="histogram") -fig.show() -``` - -### Box Plots - -```python -import cufflinks as cf -import pandas as pd -df = pd.DataFrame({'a': np.random.randn(1000) + 1, - 'b': np.random.randn(1000), - 'c': np.random.randn(1000) - 1}) - -fig = df.iplot(asFigure=True, kind="box") -fig.show() -``` - -### Subplots - -```python -import cufflinks as cf - -df=cf.datagen.lines(4) -fig = df.iplot(asFigure=True, subplots=True, shape=(4,1), shared_xaxes=True, fill=True) -fig.show() -``` - -```python -import cufflinks as cf - -df=cf.datagen.lines(4) -fig = df.iplot(asFigure=True, subplots=True, subplot_titles=True, legend=False) -fig.show() -``` - -### Line and Box Annotations - -```python -import cufflinks as cf - -df=cf.datagen.lines(4) -fig = df.iplot(asFigure=True, hline=[2,4], vline=['2015-02-10']) -fig.show() -``` - -```python -import cufflinks as cf - -df=cf.datagen.lines(4) -fig = df.iplot(asFigure=True, hspan=[(-1,1),(2,5)]) -fig.show() -``` - -```python -import cufflinks as cf - -df=cf.datagen.lines(4) -fig = df.iplot(asFigure=True, - vspan={'x0':'2015-02-15','x1':'2015-03-15', - 'color':'rgba(30,30,30,0.3)','fill':True,'opacity':.4}) -fig.show() -``` - -### More Examples - -More documentation and examples for Cufflinks can be found in its [Github repository](https://github.com/santosjorge/cufflinks). diff --git a/doc/python/discrete-color.md b/doc/python/discrete-color.md index 1afd6b2fbc8..5177bfb5be6 100644 --- a/doc/python/discrete-color.md +++ b/doc/python/discrete-color.md @@ -6,7 +6,7 @@ jupyter: extension: .md format_name: markdown format_version: '1.2' - jupytext_version: 1.3.1 + jupytext_version: 1.4.2 kernelspec: display_name: Python 3 language: python @@ -20,7 +20,7 @@ jupyter: name: python nbconvert_exporter: python pygments_lexer: ipython3 - version: 3.6.8 + version: 3.7.7 plotly: description: How to use and configure discrete color sequences, also known as categorical or qualitative color scales. @@ -178,6 +178,15 @@ fig = px.bar(df, y="continent", x="pop", color="continent", orientation="h", hov fig.show() ``` +If your data set already contains valid CSS colors which you wish to use directly, you can pass the special value `"identity"` to `color_discrete_map`, in which case the legend is hidden by default, and the color does not appear in the hover label: + +```python +import plotly.express as px + +fig = px.bar(x=["a","b","c"], y=[1,3,2], color=["red", "goldenrod", "#00D"], color_discrete_map="identity") +fig.show() +``` + ### Controlling Discrete Color Order Plotly Express lets you specify an ordering over categorical variables with `category_orders`, which will apply to colors and legends as well as symbols, [axes](/python/axes/) and [facets](/python/facet-plots/). This can be used with either `color_discrete_sequence` or `color_discrete_map`. diff --git a/doc/python/pandas-backend.md b/doc/python/pandas-backend.md new file mode 100644 index 00000000000..17eb84a7110 --- /dev/null +++ b/doc/python/pandas-backend.md @@ -0,0 +1,201 @@ +--- +jupyter: + jupytext: + notebook_metadata_filter: all + text_representation: + extension: .md + format_name: markdown + format_version: '1.2' + jupytext_version: 1.4.2 + kernelspec: + display_name: Python 3 + language: python + name: python3 + language_info: + codemirror_mode: + name: ipython + version: 3 + file_extension: .py + mimetype: text/x-python + name: python + nbconvert_exporter: python + pygments_lexer: ipython3 + version: 3.7.7 + plotly: + description: Cufflinks is a third-party wrapper library around Plotly, inspired + by the Pandas .plot() API. + display_as: file_settings + language: python + layout: base + name: Pandas Plotting Backend + order: 31 + permalink: python/pandas-backend/ + redirect_from: python/cufflinks/ + thumbnail: thumbnail/plotly-express.png +--- + +### Introduction + +The popular [Pandas](https://pandas.pydata.org/) data analysis and manipulation tool provides plotting functions on its `DataFrame` and `Series` objects, which have historically produced `matplotlib` plots. Since version 0.25, Pandas has provided a mechanism to use different backends, and as of version 4.8 of `plotly`, you can now use a [Plotly Express-powered](/python/plotly-express/) backend for Pandas plotting. + +To activate it, you just need to set `pd.options.plotting.backend` to `"plotly"` and call `.plot()` to get a `plotly.graph_objects.Figure` object back, just like if you had called Plotly Express directly: + +```python +import pandas as pd +pd.options.plotting.backend = "plotly" + +df = pd.DataFrame(dict(a=[1,3,2], b=[3,2,1])) +fig = df.plot() +fig.show() +``` + +This functionality wraps [Plotly Express](/python/plotly-express/) and so you can use any of the [styling options available to Plotly Express methods](/python/styling-plotly-expres/). Since what you get back is a regular `Figure` object, you can use any of the update mechanisms supported by these objects to apply [templates](/python/templates/) or further customize [axes](/python/axes/), [colors](/python/colorscales/), [legends](/python/legend/), [fonts](/python/figure-labels/), [hover labels](/python/hover-text-and-formatting/) etc. [Faceting](/python/facet-plots/) is also supported. + +```python +import pandas as pd +pd.options.plotting.backend = "plotly" + +df = pd.DataFrame(dict(a=[1,3,2], b=[3,2,1])) +fig = df.plot(title="Pandas Backend Example", template="simple_white", + labels=dict(index="time", value="money", variable="option")) +fig.update_yaxes(tickprefix="$") +fig.show() +``` + +### A Note on API Compatibility + +> The Plotly plotting backend for Pandas is *not intended* to be a drop-in replacement for the default; it does not implement all or even most of the same keyword arguments, such as `subplots=True` etc. + +The Plotly plotting backend for Pandas is a more convenient way to invoke certain [Plotly Express](/python/plotly-express/) functions by chaining a `.plot()` call without having to import Plotly Express directly. Plotly Express, as of version 4.8 with [wide-form data support](/python/wide-form/) implements behaviour for the `x` and `y` keywords that are very simlar to the `matplotlib` backend. + +In practice, this means that the following two ways of making a chart are identical and support the same additional arguments, because they call the same underlying code: + +```python +import pandas as pd +pd.options.plotting.backend = "plotly" +df = pd.DataFrame(dict(a=[1,3,2], b=[3,2,1])) + +# using Plotly Express via the Pandas backend +fig1 = df.plot.bar() +fig1.show() + +# using Plotly Express directly +import plotly.express as px +fig2 = px.bar(df) +fig2.show() +``` + +To achieve a similar effect to `subplots=True`, the [Plotly Express `facet_row` and `facet_col` options](/python/facet-plots/) can be used, the same was as they work when directly calling [Plotly Express with wide-form data](/python/wide-form/): + +```python +import pandas as pd +pd.options.plotting.backend = "plotly" +df = pd.DataFrame(dict(a=[1,3,2], b=[3,2,1])) + +fig = df.plot.bar(facet_row="variable") +fig.show() +``` + +### Supported Methods + +The Plotly backend supports the following `kind`s of Pandas plots: `scatter`, `line`, `area`, `bar`, `barh`, `hist` and `box`, via the call pattern `df.plot(kind='scatter')` or `df.plot.scatter()`. + +```python +import pandas as pd +import numpy as np +pd.options.plotting.backend = "plotly" +np.random.seed(1) + +df = pd.DataFrame(dict( + a=np.random.normal(loc=1, scale=2, size=100), + b=np.random.normal(loc=2, scale=1, size=100) +)) +fig = df.plot.scatter(x="a", y="b") +fig.show() +``` + +```python +import pandas as pd +pd.options.plotting.backend = "plotly" + +df = pd.DataFrame(dict(a=[1,3,2], b=[3,2,1])) +fig = df.plot.line() +fig.show() +``` + +```python +import pandas as pd +pd.options.plotting.backend = "plotly" + +df = pd.DataFrame(dict(a=[1,3,2], b=[3,2,1])) +fig = df.plot.area() +fig.show() +``` + +```python +import pandas as pd +pd.options.plotting.backend = "plotly" + +df = pd.DataFrame(dict(a=[1,3,2], b=[3,2,1])) +fig = df.plot.bar() +fig.show() +``` + +```python +import pandas as pd +pd.options.plotting.backend = "plotly" + +df = pd.DataFrame(dict(a=[1,3,2], b=[3,2,1])) +fig = df.plot.barh() +fig.show() +``` + +```python +import pandas as pd +import numpy as np +pd.options.plotting.backend = "plotly" +np.random.seed(1) + +df = pd.DataFrame(dict( + a=np.random.normal(loc=1, scale=2, size=100), + b=np.random.normal(loc=2, scale=1, size=100) +)) +fig = df.plot.hist() +fig.show() +``` + +```python +import pandas as pd +import numpy as np +pd.options.plotting.backend = "plotly" +np.random.seed(1) + +df = pd.DataFrame(dict( + a=np.random.normal(loc=1, scale=2, size=100), + b=np.random.normal(loc=2, scale=1, size=100) +)) +fig = df.plot.box() +fig.show() +``` + +### `Series` and `DataFrame` functions: `hist` and `boxplot` + +The Pandas plotting API also exposes `.hist()` on `DataFrame`s and `Series` objects, and `.boxplot()` on `DataFrames`, which can also be used with the Plotly backend. + +```python +import pandas as pd +import numpy as np +pd.options.plotting.backend = "plotly" +np.random.seed(1) + +df = pd.DataFrame(dict( + a=np.random.normal(loc=1, scale=2, size=100), + b=np.random.normal(loc=2, scale=1, size=100) +)) +fig = df.boxplot() +fig.show() +``` + +### What about Cufflinks? + +There also exists an independent third-party wrapper library around Plotly called [Cufflinks](https://github.com/santosjorge/cufflinks), which provides similar functionality (with an API closer to that of Pandas' default `matplotlib` backend) by adding a `.iplot()` method to Pandas dataframes, as it was developed before Pandas supported configurable backends. Issues and questions regarding Cufflinks should be [raised in the Cufflinks repository](https://github.com/santosjorge/cufflinks/issues/new). diff --git a/doc/python/wide-form.md b/doc/python/wide-form.md index b989e727f09..9097b4cb6ea 100644 --- a/doc/python/wide-form.md +++ b/doc/python/wide-form.md @@ -34,11 +34,11 @@ jupyter: thumbnail: thumbnail/plotly-express.png --- -### Column-oriented, Matrix or Geographic Data +### Plotly Express with Column-oriented, Matrix or Geographic Data Plotly Express provides functions to visualize a variety of types of data. Most functions such as `px.bar` or `px.scatter` expect to operate on column-oriented data of the type you might store in a Pandas `DataFrame` (in either "long" or "wide" format, see below). [`px.imshow` operates on matrix-like data](/python/imshow/) you might store in a `numpy` or `xarray` array and functions like [`px.choropleth` and `px.choropleth_mapbox` can operate on geographic data](/python/maps/) of the kind you might store in a GeoPandas `GeoDataFrame`. This page details how to provide a specific form of column-oriented data to 2D-Cartesian Plotly Express functions, but you can also check out our [detailed column-input-format documentation](/python/px-arguments/). -### Long-, Wide-, and Mixed-Form Data +### Plotly Express with Long-, Wide-, and Mixed-Form Data *Until version 4.8, Plotly Express only operated on long-form (previously called "tidy") data, but now accepts wide-form and mixed-form data as well.* @@ -48,7 +48,7 @@ There are three common conventions for storing column-oriented data, usually in * **wide-form data** is suitable for storing 2-dimensional data, with one row per value of one of the first variable, and one column per value of the second variable. * **mixed-form data** is a hybrid of long-form and wide-form data, with one row per value of one variable, and some columns representing values of another, and some columns representing more variables -All Plotly Express functions can operate on long-form data, and the following 2D-Cartesian functions can operate on wide-form data as well:: `px.scatter`, `px.line`, `px.area`, `px.bar`, `px.histogram`, `px.violin`, `px.box`, `px.strip`, `px.funnel`, `px.density_heatmap` and `px.density_contour`. +All Plotly Express functions other than `imshow` can operate on long-form data, and in addition, the following 2D-Cartesian functions can operate on wide-form and mixed-form data: `px.scatter`, `px.line`, `px.area`, `px.bar`, `px.histogram`, `px.violin`, `px.box`, `px.strip`, `px.funnel`, `px.density_heatmap` and `px.density_contour`. By way of example here is the same data, represented in long-form first, and then in wide-form: @@ -64,7 +64,7 @@ wide_df = px.data.medals_wide() wide_df ``` -Plotly Express can produce the same plot from either form: +Plotly Express can produce **the same plot from either form**. For the long-form input, `x` and `y` are set to the respective column names. ```python import plotly.express as px @@ -74,6 +74,8 @@ fig = px.bar(long_df, x="nation", y="count", color="medal", title="Long-Form Inp fig.show() ``` +For the wide-form input, we **pass in a list of column-names `y`**, which is enough to trigger the wide-form processing mode. Wide-form mode is also the default if neither `x` nor `y` are specified, see section at bottom regarding Wide-Form Defaults. + ```python import plotly.express as px wide_df = px.data.medals_wide() @@ -143,118 +145,161 @@ fig = px.bar(wide_df, x=wide_df.columns, y=wide_df.index) fig.show() ``` -### Wide-Form Defaults +### Assigning Inferred Columns to Non-Default Arguments + -For bar, scatter, line and area charts, this pattern of assigning `x=df.index` and `y=df.columns` is so common that if you provide neither `x` nor `y` this is the default behaviour +In the examples above, the columns of the wide data frame are assigned by default as an "inferred" column named `variable` to the `color` argument (see section below for documentation of the default behaviours), but this is not a hard constraint. The `variable` column can be assigned to any Plotly Express argument, for example to accomplish faceting, and `color` can be reassigned to any other value. More generally, when plotting with a data frame without named indexes, you can reassign the inferred column named `variable` and `value` to any argument: ```python import plotly.express as px -wide_df = px.data.medals_wide(indexed=True) +wide_df = px.data.medals_wide(indexed=False) -fig = px.bar(wide_df) +fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], facet_col="variable", color="nation") fig.show() +``` -fig = px.area(wide_df) -fig.show() +If using a data frame's named indexes, either explicitly or relying on the defaults, the row-index references (i.e. `df.index`) or column-index names (i.e. the value of `df.columns.name`) must be used: -fig = px.line(wide_df) -fig.show() +```python +import plotly.express as px +wide_df = px.data.medals_wide(indexed=True) -fig = px.scatter(wide_df) +fig = px.bar(wide_df, facet_col="medal", color=wide_df.index) fig.show() ``` -### Orientation Control When Using Defaults +### Mixed-Form Data -If you specify neither `x` nor `y`, you can specify whether the Y or X xaxis is assigned to the index with `orientation`. +In some cases, a data frame is neither clearly long-form nor wide-form, and we can call this "mixed-form". For example, in the data frame below, if it contained only the `experiment` columns, the data could be described as wide-form, and if it contained only `gender` and `group` it could be described as long-form, but it contains both, so it is best described as mixed-form data: ```python import plotly.express as px -wide_df = px.data.medals_wide(indexed=True) +mixed_df = px.data.experiment(indexed=True) +mixed_df.head() +``` -fig = px.bar(wide_df, orientation="h") +We can visualize just the wide-form portion of the data frame easily with a [violin chart](/python/violin/). As a special note, we'll assign the index, which is the participant ID, to the `hover_data`, so that hovering over outlier points will identify their row. + +```python +import plotly.express as px +mixed_df = px.data.experiment(indexed=True) + +fig = px.violin(mixed_df, y=["experiment_1", "experiment_2", "experiment_3"], hover_data=[mixed_df.index]) fig.show() ``` -### Assigning Columns to Non-Color Arguments -In the examples above, the columns of the wide data frame are always assigned to the `color` argument, but this is not a hard constraint. The columns can be assigned to any Plotly Express argument, for example to accomplish faceting, and `color` can be reassigned to any other value. When plotting with a data frame without named indexes, you can reassign the inferred column named `"variable"` and `"value"` to any argument: + +We are not limited to visualizing only the wide-form portion of the data, however. We can also leverage the long-form portion of the data frame, for example to color by participant `gender` and facet by participant `group`, all without having to manipulate the data frame: ```python import plotly.express as px -wide_df = px.data.medals_wide(indexed=False) +mixed_df = px.data.experiment(indexed=True) -fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], facet_col="variable", color="nation") +fig = px.violin(mixed_df, y=["experiment_1", "experiment_2", "experiment_3"], + color="gender", facet_col="group", hover_data=[mixed_df.index]) fig.show() ``` -If using a data frame's named indexes, either explicitly or relying on the defaults, the index references or names must be used: +In the plots above, the column names provided to `y` are internally mapped to long-form column called `variable`, as is apparent in the x-axis labels. We can reassign `variable` to another argument as well, in this case we'll assign it to `facet_col` and reassign `group` to the `x` axis. We'll switch to a [box plot](/python/box-plots/) for variety. ```python import plotly.express as px -wide_df = px.data.medals_wide(indexed=True) +mixed_df = px.data.experiment(indexed=True) -fig = px.bar(wide_df, facet_col="medal", color=wide_df.index) +fig = px.box(mixed_df, x="group", y=["experiment_1", "experiment_2", "experiment_3"], + color="gender", facet_col="variable", hover_data=[mixed_df.index]) fig.show() ``` -### Mixed-Form Data - -In some cases, a data frame is neither clearly long-form nor wide-form, and we can call this "mixed-form". For example, in the data frame below, if it contained only the `experiment` columns, the data could be described as wide-form, and if it contained only `gender` and `group` it could be described as long-form, but it contains both: +One interesting thing about a mixed-form data frame like this is that it remains easy to plot, say, one experiment against another, which would require some preliminary data wrangling if this was represented as a pure long-form dataset: ```python import plotly.express as px mixed_df = px.data.experiment(indexed=True) -mixed_df.head() + +fig = px.scatter(mixed_df, x="experiment_1", y="experiment_2", + color="group", facet_col="gender", hover_data=[mixed_df.index]) +fig.show() ``` -We can visualize just the wide-form portion of the data frame easily with a [violin chart](/python/violin/). As a special note, we'll assign the index, which is the participant ID, to the hover_data, so that hovering over outlier points will identify their row. +In fact, we can even visualize the results of every experiment against every other, using a [scatterplot matrix](/python/splom/): ```python import plotly.express as px mixed_df = px.data.experiment(indexed=True) -fig = px.violin(mixed_df, y=["experiment_1", "experiment_2", "experiment_3"], hover_data=[mixed_df.index]) +fig = px.scatter_matrix(mixed_df, dimensions=["experiment_1", "experiment_2", "experiment_3"], color="gender") fig.show() ``` +### Wide-Form Defaults +For bar, scatter, line and area charts, the pattern of assigning `x=df.index`, `y=df.columns`, `color="variable"` is so common that if you provide neither `x` nor `y` this is the default behaviour. An exception is made for bar charts when the values are not continuous variables, in which case the default is similar to histograms, with `x=df.columns`, `color="variable"` and `y=`. +For violin and box plots, the default is to assign `x=variable`, `y=df.columns` and for histograms the default is `x=df.columns`, `color="variable"` -We can also leverage the long-form portion of the data frame, for example to color by `gender` and facet by `group`: +These defaults are also filled in if you specify only `y` (`x` for histograms) as a list-of-columns. See below for orientation control. ```python import plotly.express as px -mixed_df = px.data.experiment(indexed=True) +wide_df = px.data.medals_wide(indexed=True) -fig = px.violin(mixed_df, y=["experiment_1", "experiment_2", "experiment_3"], - color="gender", facet_col="group", hover_data=[mixed_df.index]) +fig = px.bar(wide_df) fig.show() -``` -And of course, we can reassign `variable` to another argument as well, in this case we'll assign it to `x` and facet by the wide variable, and we'll switch to a [box plot](/python/box-plots/) for variety. +fig = px.area(wide_df) +fig.show() + +fig = px.line(wide_df) +fig.show() + +fig = px.scatter(wide_df) +fig.show() +``` ```python import plotly.express as px + mixed_df = px.data.experiment(indexed=True) +wide_df = mixed_df[["experiment_1", "experiment_2", "experiment_3"]] -fig = px.box(mixed_df, x="group", y=["experiment_1", "experiment_2", "experiment_3"], - color="gender", facet_col="variable", hover_data=[mixed_df.index]) +fig = px.histogram(wide_df) +fig.show() + +fig = px.violin(wide_df) +fig.show() + +fig = px.box(wide_df) fig.show() ``` -One interesting thing about a mixed-form data frame like this is that it remains easy to plot, say, one experiment against another, which would require some preliminary data wrangling if this was represented as a pure long-form dataset: +### Orientation Control When Using Defaults + +If you specify neither `x` nor `y`, you can swap the default behaviour of `x` and `y` by setting `orientation="h"`. + +If you specify only `x` as a list-of-columns (`y` in the case of histograms), then the defaults are filled in as if `orientation="h"` ```python import plotly.express as px +wide_df = px.data.medals_wide(indexed=True) + +fig = px.bar(wide_df, orientation="h") +fig.show() + +fig = px.area(wide_df, x=wide_df.columns) +fig.show() + mixed_df = px.data.experiment(indexed=True) +wide_df = mixed_df[["experiment_1", "experiment_2", "experiment_3"]] -fig = px.scatter(mixed_df, x="experiment_1", y="experiment_2", - color="group", facet_col="gender", hover_data=[mixed_df.index]) +fig = px.histogram(wide_df, orientation="h") fig.show() -``` -```python +fig = px.violin(wide_df, orientation="h") +fig.show() +fig = px.box(wide_df, orientation="h") +fig.show() ``` From b19d2c6c627adc3e7c473f4d258ef3569e78d0c0 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Mon, 25 May 2020 12:35:33 -0400 Subject: [PATCH 69/69] bump doc pandas --- binder/requirements.txt | 2 +- doc/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/binder/requirements.txt b/binder/requirements.txt index 9e5793b32e9..07219544030 100644 --- a/binder/requirements.txt +++ b/binder/requirements.txt @@ -2,7 +2,7 @@ jupytext plotly==4.7.0 jupyter notebook -pandas +pandas==1.0.3 statsmodels==0.10.1 scipy patsy==0.5.1 diff --git a/doc/requirements.txt b/doc/requirements.txt index d28ddc9c15d..62874967f19 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -2,7 +2,7 @@ plotly==4.7.1 jupytext==1.1.1 jupyter notebook -pandas==0.23.0 +pandas==1.0.3 statsmodels==0.10.1 scipy==1.3.1 patsy==0.5.1