predict-idlab · jvdd · Dec 2, 2022 · Sep 7, 2022 · Sep 7, 2022 · Sep 7, 2022
diff --git a/plotly_resampler/figure_resampler/figure_resampler_interface.py b/plotly_resampler/figure_resampler/figure_resampler_interface.py
@@ -13,12 +13,14 @@
 __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"
 
 import re
+import warnings
 from copy import copy
 from typing import Dict, Iterable, List, Optional, Tuple, Union
 from uuid import uuid4
 from collections import namedtuple
 
 import dash
+import datetime
 import numpy as np
 import pandas as pd
 import plotly.graph_objects as go
@@ -689,6 +691,46 @@ def _parse_get_trace_props(
                 if isinstance(hf_hovertext, np.ndarray):
                     hf_hovertext = hf_hovertext[not_nan_mask]
 
+            # Try to parse the hf_x data if it is of object type or 
+            if len(hf_x) and (hf_x.dtype.type is np.str_ or hf_x.dtype == "object"):# and not isinstance(hf_x[0], (pd.Timestamp, datetime.datetime)):
+                try:
+                    # Try to parse to numeric
+                    hf_x = pd.to_numeric(hf_x, errors="raise")
+                except (ValueError, TypeError):
+                    try:
+                        # Try to parse to datetime
+                        hf_x = pd.to_datetime(hf_x, utc=False, errors="raise")
+                        # Will be cast to object array if it contains multiple timezones.
+                        if hf_x.dtype == "object":
+                            raise ValueError(
+                                "The x-data contains multiple timezones, which is not "
+                                "supported by plotly-resampler!"
+                            )
+                    except (ValueError, TypeError):
+                        raise ValueError(
+                            "plotly-resampler requires the x-data to be numeric or datetime-like"
+                            "\nMore details in the stacktrace above."
+                        )
+
+            # # Check and update timezones of the hf_x data when there are multiple
+            # # timezones in the data
+            # if len(hf_x) and hf_x.dtype == "object" and isinstance(hf_x[0], (pd.Timestamp, datetime.datetime)):
+            #     # Assumes that all values in hf_x are either pd.Timestamp or datetime.datetime
+            #     try:
+            #         hf_x = pd.to_datetime(hf_x, utc=False)
+            #     except ValueError:
+            #         # ValueError will be thrown when there are multiple timezones in the data
+            #         # => remove the timezone data for plotting when multiple timezones
+            #         warnings.warn(
+            #             "x-data of multiple timezones / fixedoffsets is passed, "
+            #             + "omitting the timezone data for plotting\n"
+            #             + "If you are dealing with daylight savings time we suggest converting to one and the same timezone.",
+            #             UserWarning,
+            #         )
+
+            #         hf_x = [x.replace(tzinfo=None) for x in hf_x]
+            #         hf_x = pd.to_datetime(hf_x, utc=False)
+
             # If the categorical or string-like hf_y data is of type object (happens
             # when y argument is used for the trace constructor instead of hf_y), we
             # transform it to type string as such it will be sent as categorical data

diff --git a/plotly_resampler/registering.py b/plotly_resampler/registering.py
@@ -33,7 +33,7 @@ def _get_plotly_constr(constr):
     Parameters
     ----------
     constr : callable
-        The constructor of a instantiatedplotly-object.
+        The constructor of a instantiated plotly-object.
 
     Returns
     -------
@@ -98,10 +98,10 @@ def register_plotly_resampler(mode="auto", **aggregator_kwargs):
         The mode of the plotly-resampler.
         Possible values are: 'auto', 'figure', 'widget', None.
         If 'auto' is used, the mode is determined based on the environment; if it is in
-        an ipython environment, the mode is 'widget', otherwise it is 'figure'.
+        an iPython environment, the mode is 'widget', otherwise it is 'figure'.
         If 'figure' is used, all plotly figures are wrapped as FigureResampler objects.
         If 'widget' is used, all plotly figure widgets are wrapped as
-        FigureWidgetResampler objects (we advise to use this mode in ipython environment
+        FigureWidgetResampler objects (we advise to use this mode in iPython environment
         with a kernel).
         If None is used, wrapping is done as expected (go.Figure -> FigureResampler,
         go.FigureWidget -> FigureWidgetResampler).

diff --git a/tests/test_figure_resampler.py b/tests/test_figure_resampler.py
@@ -5,6 +5,7 @@
 
 import pytest
 import time
+import datetime
 import multiprocessing
 
 import numpy as np
@@ -479,6 +480,9 @@ def test_multiple_timezones():
         dr.tz_convert("Australia/Canberra"),
     ]
 
+    plain_plotly_fig = make_subplots(rows=len(cs), cols=1, shared_xaxes=True)
+    plain_plotly_fig.update_layout(height=min(300, 250 * len(cs)))
+
     fr_fig = FigureResampler(
         make_subplots(rows=len(cs), cols=1, shared_xaxes=True),
         default_n_shown_samples=500,
@@ -488,14 +492,72 @@ def test_multiple_timezones():
     fr_fig.update_layout(height=min(300, 250 * len(cs)))
 
     for i, date_range in enumerate(cs, 1):
+        name = date_range.dtype.name.split(", ")[-1][:-1]
+        plain_plotly_fig.add_trace(
+            go.Scattergl(x=date_range, y=dr_v, name=name), row=i, col=1
+        )
         fr_fig.add_trace(
-            go.Scattergl(name=date_range.dtype.name.split(", ")[-1]),
+            go.Scattergl(name=name),
             hf_x=date_range,
             hf_y=dr_v,
             row=i,
             col=1,
         )
-
+        # Assert that the time parsing is exactly the same
+        assert plain_plotly_fig.data[0].x[0] == fr_fig.data[0].x[0]
+
+
+def test_multiple_timezones_in_single_x_index__datetimes_and_timestamps():
+    # TODO: can be improved with pytest parametrize
+    y = np.arange(20)
+
+    index1 = pd.date_range('2018-01-01', periods=10, freq='H', tz="US/Eastern")
+    index2 = pd.date_range('2018-01-02', periods=10, freq='H', tz="Asia/Dubai")
+    index_timestamps = index1.append(index2)
+    assert all(isinstance(x, pd.Timestamp) for x in index_timestamps)
+    index_datetimes = pd.Index([x.to_pydatetime() for x in index_timestamps])
+    assert not any(isinstance(x, pd.Timestamp) for x in index_datetimes)
+    assert all(isinstance(x, datetime.datetime) for x in index_datetimes)
+
+    ## Test why we throw ValueError if array is still of object type after
+    ## successful pd.to_datetime call
+    # String array of datetimes with same tz -> NOT object array
+    assert not pd.to_datetime(index1.astype("str")).dtype == "object"
+    # String array of datetimes with multiple tz -> object array
+    assert pd.to_datetime(index_timestamps.astype("str")).dtype == "object"
+    assert pd.to_datetime(index_datetimes.astype("str")).dtype == "object"
+
+    for index in [index_timestamps, index_datetimes]:
+        fig = go.Figure()
+        fig.add_trace(go.Scattergl(x=index, y=y))
+        with pytest.raises(ValueError):
+            fr_fig = FigureResampler(fig, default_n_shown_samples=10)
+        # Add as hf_x as index
+        fr_fig = FigureResampler(default_n_shown_samples=10)
+        with pytest.raises(ValueError):
+            fr_fig.add_trace(go.Scattergl(), hf_x=index, hf_y=y)
+        # Add as hf_x as object array of datetime values
+        fr_fig = FigureResampler(default_n_shown_samples=10)
+        with pytest.raises(ValueError):
+            fr_fig.add_trace(go.Scattergl(), hf_x=index.values.astype("object"), hf_y=y)
+        # Add as hf_x as string array
+        fr_fig = FigureResampler(default_n_shown_samples=10)
+        with pytest.raises(ValueError):
+            fr_fig.add_trace(go.Scattergl(), hf_x=index.astype(str), hf_y=y)
+        # Add as hf_x as object array of strings
+        fr_fig = FigureResampler(default_n_shown_samples=10)
+        with pytest.raises(ValueError):
+            fr_fig.add_trace(go.Scattergl(), hf_x=index.astype(str).astype("object"), hf_y=y)
+
+        fig = go.Figure()
+        fig.add_trace(go.Scattergl(x=index.astype("object"), y=y))
+        with pytest.raises(ValueError):
+            fr_fig = FigureResampler(fig, default_n_shown_samples=10)
+
+        fig = go.Figure()
+        fig.add_trace(go.Scattergl(x=index.astype("str"), y=y))
+        with pytest.raises(ValueError):
+            fr_fig = FigureResampler(fig, default_n_shown_samples=10)
 
 def test_proper_copy_of_wrapped_fig(float_series):
     plotly_fig = go.Figure()
@@ -541,6 +603,82 @@ def test_2d_input_y():
         assert "1 dimensional" in e_info
 
 
+def test_hf_x_object_array():
+    y = np.random.randn(100)
+
+    ## Object array of datetime
+    ### Should be parsed to a pd.DatetimeIndex (is more efficient than object array)
+    x = pd.date_range("2020-01-01", freq="s", periods=100).astype("object")
+    assert x.dtype == "object"
+    assert isinstance(x[0], pd.Timestamp)
+    # Add in the scatter
+    fig = FigureResampler(default_n_shown_samples=50)
+    fig.add_trace(go.Scatter(name="blabla", x=x, y=y))
+    assert isinstance(fig.hf_data[0]["x"], pd.DatetimeIndex)
+    assert isinstance(fig.hf_data[0]["x"][0], pd.Timestamp)
+    # Add as hf_x
+    fig = FigureResampler(default_n_shown_samples=50)
+    fig.add_trace(go.Scatter(name="blabla"), hf_x=x, hf_y=y)
+    assert isinstance(fig.hf_data[0]["x"], pd.DatetimeIndex)
+    assert isinstance(fig.hf_data[0]["x"][0], pd.Timestamp)
+
+    ## Object array of datetime strings
+    ### Should be parsed to a pd.DatetimeIndex (is more efficient than object array)
+    x = pd.date_range("2020-01-01", freq="s", periods=100).astype(str).astype("object")
+    assert x.dtype == "object"
+    assert isinstance(x[0], str)
+    # Add in the scatter
+    fig = FigureResampler(default_n_shown_samples=50)
+    fig.add_trace(go.Scatter(name="blabla", x=x, y=y))
+    assert isinstance(fig.hf_data[0]["x"], pd.DatetimeIndex)
+    assert isinstance(fig.hf_data[0]["x"][0], pd.Timestamp)
+    # Add as hf_x
+    fig = FigureResampler(default_n_shown_samples=50)
+    fig.add_trace(go.Scatter(name="blabla"), hf_x=x, hf_y=y)
+    assert isinstance(fig.hf_data[0]["x"], pd.DatetimeIndex)
+    assert isinstance(fig.hf_data[0]["x"][0], pd.Timestamp)
+
+    ## Object array of ints
+    ### Should be parsed to an int array (is more efficient than object array)
+    x = np.arange(100).astype("object")
+    assert x.dtype == "object"
+    assert isinstance(x[0], int)
+    # Add in the scatter
+    fig = FigureResampler(default_n_shown_samples=50)
+    fig.add_trace(go.Scatter(name="blabla", x=x, y=y))
+    assert np.issubdtype(fig.hf_data[0]["x"].dtype, np.integer)
+    # Add as hf_x
+    fig = FigureResampler(default_n_shown_samples=50)
+    fig.add_trace(go.Scatter(name="blabla"), hf_x=x, hf_y=y)
+    assert np.issubdtype(fig.hf_data[0]["x"].dtype, np.integer)
+
+    ## Object array of ints as strings
+    ### Should be an integer array where the values are int objects
+    x = np.arange(100).astype(str).astype("object")
+    assert x.dtype == "object"
+    assert isinstance(x[0], str)
+    # Add in the scatter
+    fig = FigureResampler(default_n_shown_samples=50)
+    fig.add_trace(go.Scatter(name="blabla", x=x, y=y))
+    assert np.issubdtype(fig.hf_data[0]["x"].dtype, np.integer)
+    # Add as hf_x
+    fig = FigureResampler(default_n_shown_samples=50)
+    fig.add_trace(go.Scatter(name="blabla"), hf_x=x, hf_y=y)
+    assert np.issubdtype(fig.hf_data[0]["x"].dtype, np.integer)
+
+    ## Object array of strings
+    x = np.array(["x", "y"]*50).astype("object")
+    assert x.dtype == "object"
+    # Add in the scatter
+    with pytest.raises(ValueError):
+        fig = FigureResampler(default_n_shown_samples=50)
+        fig.add_trace(go.Scatter(name="blabla", x=x, y=y))
+    # Add as hf_x
+    with pytest.raises(ValueError):
+        fig = FigureResampler(default_n_shown_samples=50)
+        fig.add_trace(go.Scatter(name="blabla"), hf_x=x, hf_y=y)
+
+
 def test_time_tz_slicing():
     n = 5050
     dr = pd.Series(