From 31c71fc83e7ca31df56151909720e77f7393e476 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Tue, 23 Jun 2020 17:05:44 +0200 Subject: [PATCH 1/6] solve aggregation bug when color and values correspond to the same color --- .../python/plotly/plotly/express/_core.py | 23 +++++++++---- .../test_core/test_px/test_px_functions.py | 32 ++++--------------- 2 files changed, 22 insertions(+), 33 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index d89794a5a49..6dba7b55052 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -473,12 +473,16 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): for k, v in mapping_labels.items(): # We need to invert the mapping here k_args = invert_label(args, k) + print(k, k_args, args["hover_data"]) if k_args in args["hover_data"]: - if args["hover_data"][k_args][0]: - if isinstance(args["hover_data"][k_args][0], str): - mapping_labels_copy[k] = v.replace( - "}", "%s}" % args["hover_data"][k_args][0] - ) + formatter = ( + args["hover_data"][k_args][0] + if isinstance(args["hover_data"][k_args], tuple) + else args["hover_data"][k_args] + ) + if formatter: + if isinstance(formatter, str): + mapping_labels_copy[k] = v.replace("}", "%s}" % formatter) else: _ = mapping_labels_copy.pop(k) hover_lines = [k + "=" + v for k, v in mapping_labels_copy.items()] @@ -1499,7 +1503,9 @@ def aggfunc_discrete(x): if args["color"]: if args["color"] == args["values"]: - aggfunc_color = "sum" + new_value_col_name = args["values"] + "_total" + df[new_value_col_name] = df[args["values"]] + args["values"] = new_value_col_name count_colname = args["values"] else: # we need a count column for the first groupby and the weighted mean of color @@ -1518,7 +1524,7 @@ def aggfunc_discrete(x): if not _is_continuous(df, args["color"]): aggfunc_color = aggfunc_discrete discrete_color = True - elif not aggfunc_color: + else: def aggfunc_continuous(x): return np.average(x, weights=df.loc[x.index, count_colname]) @@ -1576,6 +1582,9 @@ def aggfunc_continuous(x): if args["color"]: if not args["hover_data"]: args["hover_data"] = [args["color"]] + elif isinstance(args["hover_data"], dict): + if not args["hover_data"].get(args["color"]): + args["hover_data"][args["color"]] = True else: args["hover_data"].append(args["color"]) return args diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py index 9c89fcd25f0..02f9e1c6b83 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py @@ -149,10 +149,6 @@ def test_sunburst_treemap_with_path(): fig = px.sunburst(df, path=path, values="values") assert fig.data[0].branchvalues == "total" assert fig.data[0].values[-1] == np.sum(values) - # Continuous colorscale - fig = px.sunburst(df, path=path, values="values", color="values") - assert "coloraxis" in fig.data[0].marker - assert np.all(np.array(fig.data[0].marker.colors) == np.array(fig.data[0].values)) # Error when values cannot be converted to numerical data type df["values"] = ["1 000", "3 000", "2", "4", "2", "2", "1 000", "4 000"] msg = "Column `values` of `df` could not be converted to a numerical data type." @@ -162,6 +158,12 @@ def test_sunburst_treemap_with_path(): path = [df.total, "regions", df.sectors, "vendors"] fig = px.sunburst(df, path=path) assert fig.data[0].branchvalues == "total" + # Continuous colorscale + df["values"] = 1 + fig = px.sunburst(df, path=path, values="values", color="values") + assert "coloraxis" in fig.data[0].marker + assert np.all(np.array(fig.data[0].marker.colors) == 1) + assert fig.data[0].values[-1] == 8 def test_sunburst_treemap_with_path_and_hover(): @@ -329,28 +331,6 @@ def test_parcats_dimensions_max(): # default behaviour fig = px.parallel_categories(df) - assert [d.label for d in fig.data[0].dimensions] == [ - "sex", - "smoker", - "day", - "time", - "size", - ] - - # explicit subset of default - fig = px.parallel_categories(df, dimensions=["sex", "smoker", "day"]) - assert [d.label for d in fig.data[0].dimensions] == ["sex", "smoker", "day"] - - # shrinking max - fig = px.parallel_categories(df, dimensions_max_cardinality=4) - assert [d.label for d in fig.data[0].dimensions] == [ - "sex", - "smoker", - "day", - "time", - ] - - # explicit superset of default, violating the max fig = px.parallel_categories( df, dimensions=["sex", "smoker", "day", "size"], dimensions_max_cardinality=4 ) From ad011471326f6e6c7fc28121c14de46f42f3e97d Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Thu, 25 Jun 2020 15:17:47 +0200 Subject: [PATCH 2/6] Update packages/python/plotly/plotly/express/_core.py --- packages/python/plotly/plotly/express/_core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 6dba7b55052..175e280f30b 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -473,7 +473,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): for k, v in mapping_labels.items(): # We need to invert the mapping here k_args = invert_label(args, k) - print(k, k_args, args["hover_data"]) if k_args in args["hover_data"]: formatter = ( args["hover_data"][k_args][0] From 06b6e08a5942cb79f50a4336a23d95e08b095b97 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Thu, 25 Jun 2020 15:54:09 +0200 Subject: [PATCH 3/6] put back deleted code --- .../test_core/test_px/test_px_functions.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py index 02f9e1c6b83..8dde6a7be4e 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py @@ -331,6 +331,28 @@ def test_parcats_dimensions_max(): # default behaviour fig = px.parallel_categories(df) + assert [d.label for d in fig.data[0].dimensions] == [ + "sex", + "smoker", + "day", + "time", + "size", + ] + + # explicit subset of default + fig = px.parallel_categories(df, dimensions=["sex", "smoker", "day"]) + assert [d.label for d in fig.data[0].dimensions] == ["sex", "smoker", "day"] + + # shrinking max + fig = px.parallel_categories(df, dimensions_max_cardinality=4) + assert [d.label for d in fig.data[0].dimensions] == [ + "sex", + "smoker", + "day", + "time", + ] + + # explicit superset of default, violating the max fig = px.parallel_categories( df, dimensions=["sex", "smoker", "day", "size"], dimensions_max_cardinality=4 ) From 32432d43baf1aebec2fdbfd672486bfd506fc49c Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Thu, 25 Jun 2020 20:45:09 +0200 Subject: [PATCH 4/6] added test --- .../plotly/tests/test_core/test_px/test_px_hover.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py index 07d0e201a43..6e1b57cba34 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py @@ -151,3 +151,15 @@ def test_fail_wrong_column(): "Ambiguous input: values for 'c' appear both in hover_data and data_frame" in str(err_msg.value) ) + + +def test_sunburst_hoverdict_color(): + df = px.data.gapminder().query("year == 2007") + fig = px.sunburst( + df, + path=["continent", "country"], + values="pop", + color="lifeExp", + hover_data={"pop": ":,"}, + ) + assert "color" in fig.data[0].hovertemplate From fc8a93f8a7cabe6ee2a118cfcd46f2daccb035e5 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Thu, 25 Jun 2020 21:09:41 +0200 Subject: [PATCH 5/6] code improvement --- packages/python/plotly/plotly/express/_core.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 175e280f30b..96d8083bbd3 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -474,11 +474,7 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): # We need to invert the mapping here k_args = invert_label(args, k) if k_args in args["hover_data"]: - formatter = ( - args["hover_data"][k_args][0] - if isinstance(args["hover_data"][k_args], tuple) - else args["hover_data"][k_args] - ) + formatter = args["hover_data"][k_args][0] if formatter: if isinstance(formatter, str): mapping_labels_copy[k] = v.replace("}", "%s}" % formatter) @@ -1583,7 +1579,7 @@ def aggfunc_continuous(x): args["hover_data"] = [args["color"]] elif isinstance(args["hover_data"], dict): if not args["hover_data"].get(args["color"]): - args["hover_data"][args["color"]] = True + args["hover_data"][args["color"]] = (True, None) else: args["hover_data"].append(args["color"]) return args From f896eb42391188e4907446d88d479ade4c8c38de Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Fri, 26 Jun 2020 14:23:05 +0200 Subject: [PATCH 6/6] changelog entry + improve name of new column --- CHANGELOG.md | 3 +++ packages/python/plotly/plotly/express/_core.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6e9fdc3702..69ff4b50353 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Fixed special cases with `px.sunburst` and `px.treemap` with `path` input ([#2524](https://github.com/plotly/plotly.py/pull/2524)) - Fixed bug in `hover_data` argument of `px` functions, when the column name is changed with labels and `hover_data` is a dictionary setting up a specific format for the hover data ([#2544](https://github.com/plotly/plotly.py/pull/2544)). - Made the Plotly Express `trendline` argument more robust and made it work with datetime `x` values ([#2554](https://github.com/plotly/plotly.py/pull/2554)) +- Fixed bug in `px.sunburst` and `px.treemap`: when the `color` and `values` + arguments correspond to the same column, a different aggregation function has +to be used for the two arguments ([#2591](https://github.com/plotly/plotly.py/pull/2591))#2591 ## [4.8.1] - 2020-05-28 diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 96d8083bbd3..0f2def689ff 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1498,7 +1498,7 @@ def aggfunc_discrete(x): if args["color"]: if args["color"] == args["values"]: - new_value_col_name = args["values"] + "_total" + new_value_col_name = args["values"] + "_sum" df[new_value_col_name] = df[args["values"]] args["values"] = new_value_col_name count_colname = args["values"]