fix PX timezone treatment

nicolaskruchten · nicolaskruchten · commit 36013228bc2a · 2020-09-05T19:27:06.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,9 +16,15 @@ This project adheres to [Semantic Versioning](http://semver.org/).
   `binary_backend`, `binary_format` and `binary_compression_level` control
   how to generate the b64 string ([#2691](https://github.com/plotly/plotly.py/pull/2691)
 - `px.imshow` has a new `constrast_rescaling` argument in order to choose how
-  to set data values corresponding to the bounds of the color range 
+  to set data values corresponding to the bounds of the color range
   ([#2691](https://github.com/plotly/plotly.py/pull/2691)
 
+### Fixed
+
+- Plotly Express no longer converts datetime columns of input dataframes to UTC ([#2749](https://github.com/plotly/plotly.py/pull/2749))
+- Plotly Express has more complete support for datetimes as additional `hover_data` ([#2749](https://github.com/plotly/plotly.py/pull/2749))
+
+
 ## [4.9.0] - 2020-07-16
 
 ### Added
diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py
@@ -222,7 +222,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
     trace_patch = trace_spec.trace_patch.copy() or {}
     fit_results = None
     hover_header = ""
-    custom_data_len = 0
     for attr_name in trace_spec.attrs:
         attr_value = args[attr_name]
         attr_label = get_decorated_label(args, attr_value, attr_name)
@@ -243,7 +242,7 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
                 )
             ]
             trace_patch["dimensions"] = [
-                dict(label=get_label(args, name), values=column.values)
+                dict(label=get_label(args, name), values=column)
                 for (name, column) in dims
             ]
             if trace_spec.constructor == go.Splom:
@@ -287,10 +286,8 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
                     y = sorted_trace_data[args["y"]].values
                     x = sorted_trace_data[args["x"]].values
 
-                    x_is_date = False
                     if x.dtype.type == np.datetime64:
                         x = x.astype(int) / 10 ** 9  # convert to unix epoch seconds
-                        x_is_date = True
                     elif x.dtype.type == np.object_:
                         try:
                             x = x.astype(np.float64)
@@ -308,21 +305,22 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
                                 "Could not convert value of 'y' into a numeric type."
                             )
 
+                    # preserve original values of "x" in case they're dates
+                    trace_patch["x"] = sorted_trace_data[args["x"]][
+                        np.logical_not(np.logical_or(np.isnan(y), np.isnan(x)))
+                    ]
+
                     if attr_value == "lowess":
                         # missing ='drop' is the default value for lowess but not for OLS (None)
                         # we force it here in case statsmodels change their defaults
                         trendline = sm.nonparametric.lowess(y, x, missing="drop")
-                        trace_patch["x"] = trendline[:, 0]
                         trace_patch["y"] = trendline[:, 1]
                         hover_header = "<b>LOWESS trendline</b><br><br>"
                     elif attr_value == "ols":
                         fit_results = sm.OLS(
                             y, sm.add_constant(x), missing="drop"
                         ).fit()
                         trace_patch["y"] = fit_results.predict()
-                        trace_patch["x"] = x[
-                            np.logical_not(np.logical_or(np.isnan(y), np.isnan(x)))
-                        ]
                         hover_header = "<b>OLS trendline</b><br>"
                         if len(fit_results.params) == 2:
                             hover_header += "%s = %g * %s + %g<br>" % (
@@ -339,8 +337,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
                         hover_header += (
                             "R<sup>2</sup>=%f<br><br>" % fit_results.rsquared
                         )
-                    if x_is_date:
-                        trace_patch["x"] = pd.to_datetime(trace_patch["x"] * 10 ** 9)
                     mapping_labels[get_label(args, args["x"])] = "%{x}"
                     mapping_labels[get_label(args, args["y"])] = "%{y} <b>(trend)</b>"
             elif attr_name.startswith("error"):
@@ -350,8 +346,9 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
                     trace_patch[error_xy] = {}
                 trace_patch[error_xy][arr] = trace_data[attr_value]
             elif attr_name == "custom_data":
-                trace_patch["customdata"] = trace_data[attr_value].values
-                custom_data_len = len(attr_value)  # number of custom data columns
+                # here we store a data frame in customdata, and it's serialized
+                # as a list of row lists, which is what we want
+                trace_patch["customdata"] = trace_data[attr_value]
             elif attr_name == "hover_name":
                 if trace_spec.constructor not in [
                     go.Histogram,
@@ -368,29 +365,23 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
                     go.Histogram2dContour,
                 ]:
                     hover_is_dict = isinstance(attr_value, dict)
+                    customdata_cols = args.get("custom_data") or []
                     for col in attr_value:
                         if hover_is_dict and not attr_value[col]:
                             continue
                         try:
                             position = args["custom_data"].index(col)
                         except (ValueError, AttributeError, KeyError):
-                            position = custom_data_len
-                            custom_data_len += 1
-                            if "customdata" in trace_patch:
-                                trace_patch["customdata"] = np.hstack(
-                                    (
-                                        trace_patch["customdata"],
-                                        trace_data[col].values[:, None],
-                                    )
-                                )
-                            else:
-                                trace_patch["customdata"] = trace_data[col].values[
-                                    :, None
-                                ]
+                            position = len(customdata_cols)
+                            customdata_cols.append(col)
                         attr_label_col = get_decorated_label(args, col, None)
                         mapping_labels[attr_label_col] = "%%{customdata[%d]}" % (
                             position
                         )
+
+                    # here we store a data frame in customdata, and it's serialized
+                    # as a list of row lists, which is what we want
+                    trace_patch["customdata"] = trace_data[customdata_cols]
             elif attr_name == "color":
                 if trace_spec.constructor in [go.Choropleth, go.Choroplethmapbox]:
                     trace_patch["z"] = trace_data[attr_value]
@@ -1029,6 +1020,16 @@ def _escape_col_name(df_input, col_name, extra):
     return col_name
 
 
+def to_unindexed_series(x):
+    """
+    assuming x is list-like or even an existing pd.Series, return a new pd.Series with
+    no index, without extracting the data from an existing Series via numpy, which
+    seems to mangle datetime columns. Stripping the index from existing pd.Series is
+    required to get things to match up right in the new DataFrame we're building
+    """
+    return pd.Series(x).reset_index(drop=True)
+
+
 def process_args_into_dataframe(args, wide_mode, var_name, value_name):
     """
     After this function runs, the `all_attrables` keys of `args` all contain only
@@ -1140,10 +1141,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
                                 length,
                             )
                         )
-                    if hasattr(real_argument, "values"):
-                        df_output[col_name] = real_argument.values
-                    else:
-                        df_output[col_name] = np.array(real_argument)
+                    df_output[col_name] = to_unindexed_series(real_argument)
                 elif not df_provided:
                     raise ValueError(
                         "String or int arguments are only possible when a "
@@ -1178,7 +1176,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
                     )
                 else:
                     col_name = str(argument)
-                    df_output[col_name] = df_input[argument].values
+                    df_output[col_name] = to_unindexed_series(df_input[argument])
             # ----------------- argument is likely a column / array / list.... -------
             else:
                 if df_provided and hasattr(argument, "name"):
@@ -1207,10 +1205,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
                         "length of  previously-processed arguments %s is %d"
                         % (field, len(argument), str(list(df_output.columns)), length)
                     )
-                if hasattr(argument, "values"):
-                    df_output[str(col_name)] = argument.values
-                else:
-                    df_output[str(col_name)] = np.array(argument)
+                df_output[str(col_name)] = to_unindexed_series(argument)
 
             # Finally, update argument with column name now that column exists
             assert col_name is not None, (
diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py
@@ -163,3 +163,10 @@ def test_sunburst_hoverdict_color():
         hover_data={"pop": ":,"},
     )
     assert "color" in fig.data[0].hovertemplate
+
+
+def test_date_in_hover():
+    df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]})
+    df["date"] = pd.to_datetime(df["date"])
+    fig = px.scatter(df, x="value", y="value", hover_data=["date"])
+    assert str(fig.data[0].customdata[0][0]) == str(df["date"][0])
diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py
@@ -233,6 +233,14 @@ def test_build_df_with_index():
     assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"])
 
 
+def test_timezones():
+    df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]})
+    df["date"] = pd.to_datetime(df["date"])
+    args = dict(data_frame=df, x="date", y="value")
+    out = build_dataframe(args, go.Scatter)
+    assert str(out["data_frame"]["date"][0]) == str(df["date"][0])
+
+
 def test_non_matching_index():
     df = pd.DataFrame(dict(y=[1, 2, 3]), index=["a", "b", "c"])
 
diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py
@@ -102,9 +102,11 @@ def test_trendline_on_timeseries(mode):
     )
 
     df["date"] = pd.to_datetime(df["date"])
+    df["date"] = df["date"].dt.tz_localize("CET")  # force a timezone
     fig = px.scatter(df, x="date", y="GOOG", trendline=mode)
     assert len(fig.data) == 2
     assert len(fig.data[0].x) == len(fig.data[1].x)
     assert type(fig.data[0].x[0]) == datetime
     assert type(fig.data[1].x[0]) == datetime
     assert np.all(fig.data[0].x == fig.data[1].x)
+    assert str(fig.data[0].x[0]) == str(fig.data[1].x[0])
diff --git a/packages/python/plotly/tox.ini b/packages/python/plotly/tox.ini
@@ -24,13 +24,13 @@
 ; PASSING ADDITONAL ARGUMENTS TO TEST COMMANDS
 ; The {posargs} is tox-specific and passes in any command line args after `--`.
 ; For example, given the testing command in *this* file:
-;   pytest {posargs} -x plotly/tests/test_core
+;   pytest {posargs} plotly/tests/test_core
 ;
 ; The following command:
 ;   tox -- -k 'not nodev'
 ;
 ; Tells tox to call:
-;   pytest -k 'not nodev' -x plotly/tests/test_core
+;   pytest -k 'not nodev' plotly/tests/test_core
 ;
 
 [tox]
@@ -81,25 +81,25 @@ deps=
 basepython={env:PLOTLY_TOX_PYTHON_27:}
 commands=
     python --version
-    pytest {posargs} -x plotly/tests/test_core
+    pytest {posargs} plotly/tests/test_core
 
 [testenv:py35-core]
 basepython={env:PLOTLY_TOX_PYTHON_35:}
 commands=
     python --version
-    pytest {posargs} -x plotly/tests/test_core
+    pytest {posargs} plotly/tests/test_core
 
 [testenv:py36-core]
 basepython={env:PLOTLY_TOX_PYTHON_36:}
 commands=
     python --version
-    pytest {posargs} -x plotly/tests/test_core
+    pytest {posargs} plotly/tests/test_core
 
 [testenv:py37-core]
 basepython={env:PLOTLY_TOX_PYTHON_37:}
 commands=
     python --version
-    pytest {posargs} -x plotly/tests/test_core
+    pytest {posargs} plotly/tests/test_core
     pytest {posargs} -x test_init/test_dependencies_not_imported.py
     pytest {posargs} -x test_init/test_lazy_imports.py
 
@@ -111,41 +111,41 @@ commands=
 ;;   Do some coverage reporting. No need to do this for all environments.
 ;    mkdir -p {envbindir}/../../coverage-reports/{envname}
 ;    coverage erase
-;    coverage run --include="*/plotly/*" --omit="*/tests*" {envbindir}/nosetests {posargs} -x plotly/tests
+;    coverage run --include="*/plotly/*" --omit="*/tests*" {envbindir}/nosetests {posargs} plotly/tests
 ;    coverage html -d "{envbindir}/../../coverage-reports/{envname}" --title={envname}
 
 [testenv:py27-optional]
 basepython={env:PLOTLY_TOX_PYTHON_27:}
 commands=
     python --version
-    pytest {posargs} -x plotly/tests/test_core
-    pytest {posargs} -x plotly/tests/test_optional
+    pytest {posargs} plotly/tests/test_core
+    pytest {posargs} plotly/tests/test_optional
     pytest _plotly_utils/tests/
     pytest plotly/tests/test_io
 
 [testenv:py35-optional]
 basepython={env:PLOTLY_TOX_PYTHON_35:}
 commands=
     python --version
-    pytest {posargs} -x plotly/tests/test_core
-    pytest {posargs} -x plotly/tests/test_optional
+    pytest {posargs} plotly/tests/test_core
+    pytest {posargs} plotly/tests/test_optional
     pytest _plotly_utils/tests/
     pytest plotly/tests/test_io
 
 [testenv:py36-optional]
 basepython={env:PLOTLY_TOX_PYTHON_36:}
 commands=
     python --version
-    pytest {posargs} -x plotly/tests/test_core
-    pytest {posargs} -x plotly/tests/test_optional
+    pytest {posargs} plotly/tests/test_core
+    pytest {posargs} plotly/tests/test_optional
     pytest _plotly_utils/tests/
     pytest plotly/tests/test_io
 
 [testenv:py37-optional]
 basepython={env:PLOTLY_TOX_PYTHON_37:}
 commands=
     python --version
-    pytest {posargs} -x plotly/tests/test_core
-    pytest {posargs} -x plotly/tests/test_optional
+    pytest {posargs} plotly/tests/test_core
+    pytest {posargs} plotly/tests/test_optional
     pytest _plotly_utils/tests/
     pytest plotly/tests/test_io