Skip to content

🐛 parse object arrays for hf_x #116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Dec 2, 2022
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions plotly_resampler/figure_resampler/figure_resampler_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@
__author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"

import re
import warnings
from copy import copy
from typing import Dict, Iterable, List, Optional, Tuple, Union
from uuid import uuid4
from collections import namedtuple

import dash
import datetime
import numpy as np
import pandas as pd
import plotly.graph_objects as go
Expand Down Expand Up @@ -689,6 +691,46 @@ def _parse_get_trace_props(
if isinstance(hf_hovertext, np.ndarray):
hf_hovertext = hf_hovertext[not_nan_mask]

# Try to parse the hf_x data if it is of object type or
if len(hf_x) and (hf_x.dtype.type is np.str_ or hf_x.dtype == "object"):# and not isinstance(hf_x[0], (pd.Timestamp, datetime.datetime)):
try:
# Try to parse to numeric
hf_x = pd.to_numeric(hf_x, errors="raise")
except (ValueError, TypeError):
try:
# Try to parse to datetime
hf_x = pd.to_datetime(hf_x, utc=False, errors="raise")
# Will be cast to object array if it contains multiple timezones.
if hf_x.dtype == "object":
raise ValueError(
"The x-data contains multiple timezones, which is not "
"supported by plotly-resampler!"
)
except (ValueError, TypeError):
raise ValueError(
"plotly-resampler requires the x-data to be numeric or datetime-like"
"\nMore details in the stacktrace above."
)

# # Check and update timezones of the hf_x data when there are multiple
# # timezones in the data
# if len(hf_x) and hf_x.dtype == "object" and isinstance(hf_x[0], (pd.Timestamp, datetime.datetime)):
# # Assumes that all values in hf_x are either pd.Timestamp or datetime.datetime
# try:
# hf_x = pd.to_datetime(hf_x, utc=False)
# except ValueError:
# # ValueError will be thrown when there are multiple timezones in the data
# # => remove the timezone data for plotting when multiple timezones
# warnings.warn(
# "x-data of multiple timezones / fixedoffsets is passed, "
# + "omitting the timezone data for plotting\n"
# + "If you are dealing with daylight savings time we suggest converting to one and the same timezone.",
# UserWarning,
# )

# hf_x = [x.replace(tzinfo=None) for x in hf_x]
# hf_x = pd.to_datetime(hf_x, utc=False)

# If the categorical or string-like hf_y data is of type object (happens
# when y argument is used for the trace constructor instead of hf_y), we
# transform it to type string as such it will be sent as categorical data
Expand Down
6 changes: 3 additions & 3 deletions plotly_resampler/registering.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def _get_plotly_constr(constr):
Parameters
----------
constr : callable
The constructor of a instantiatedplotly-object.
The constructor of a instantiated plotly-object.

Returns
-------
Expand Down Expand Up @@ -98,10 +98,10 @@ def register_plotly_resampler(mode="auto", **aggregator_kwargs):
The mode of the plotly-resampler.
Possible values are: 'auto', 'figure', 'widget', None.
If 'auto' is used, the mode is determined based on the environment; if it is in
an ipython environment, the mode is 'widget', otherwise it is 'figure'.
an iPython environment, the mode is 'widget', otherwise it is 'figure'.
If 'figure' is used, all plotly figures are wrapped as FigureResampler objects.
If 'widget' is used, all plotly figure widgets are wrapped as
FigureWidgetResampler objects (we advise to use this mode in ipython environment
FigureWidgetResampler objects (we advise to use this mode in iPython environment
with a kernel).
If None is used, wrapping is done as expected (go.Figure -> FigureResampler,
go.FigureWidget -> FigureWidgetResampler).
Expand Down
142 changes: 140 additions & 2 deletions tests/test_figure_resampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pytest
import time
import datetime
import multiprocessing

import numpy as np
Expand Down Expand Up @@ -479,6 +480,9 @@ def test_multiple_timezones():
dr.tz_convert("Australia/Canberra"),
]

plain_plotly_fig = make_subplots(rows=len(cs), cols=1, shared_xaxes=True)
plain_plotly_fig.update_layout(height=min(300, 250 * len(cs)))

fr_fig = FigureResampler(
make_subplots(rows=len(cs), cols=1, shared_xaxes=True),
default_n_shown_samples=500,
Expand All @@ -488,14 +492,72 @@ def test_multiple_timezones():
fr_fig.update_layout(height=min(300, 250 * len(cs)))

for i, date_range in enumerate(cs, 1):
name = date_range.dtype.name.split(", ")[-1][:-1]
plain_plotly_fig.add_trace(
go.Scattergl(x=date_range, y=dr_v, name=name), row=i, col=1
)
fr_fig.add_trace(
go.Scattergl(name=date_range.dtype.name.split(", ")[-1]),
go.Scattergl(name=name),
hf_x=date_range,
hf_y=dr_v,
row=i,
col=1,
)

# Assert that the time parsing is exactly the same
assert plain_plotly_fig.data[0].x[0] == fr_fig.data[0].x[0]


def test_multiple_timezones_in_single_x_index__datetimes_and_timestamps():
# TODO: can be improved with pytest parametrize
y = np.arange(20)

index1 = pd.date_range('2018-01-01', periods=10, freq='H', tz="US/Eastern")
index2 = pd.date_range('2018-01-02', periods=10, freq='H', tz="Asia/Dubai")
index_timestamps = index1.append(index2)
assert all(isinstance(x, pd.Timestamp) for x in index_timestamps)
index_datetimes = pd.Index([x.to_pydatetime() for x in index_timestamps])
assert not any(isinstance(x, pd.Timestamp) for x in index_datetimes)
assert all(isinstance(x, datetime.datetime) for x in index_datetimes)

## Test why we throw ValueError if array is still of object type after
## successful pd.to_datetime call
# String array of datetimes with same tz -> NOT object array
assert not pd.to_datetime(index1.astype("str")).dtype == "object"
# String array of datetimes with multiple tz -> object array
assert pd.to_datetime(index_timestamps.astype("str")).dtype == "object"
assert pd.to_datetime(index_datetimes.astype("str")).dtype == "object"

for index in [index_timestamps, index_datetimes]:
fig = go.Figure()
fig.add_trace(go.Scattergl(x=index, y=y))
with pytest.raises(ValueError):
fr_fig = FigureResampler(fig, default_n_shown_samples=10)
# Add as hf_x as index
fr_fig = FigureResampler(default_n_shown_samples=10)
with pytest.raises(ValueError):
fr_fig.add_trace(go.Scattergl(), hf_x=index, hf_y=y)
# Add as hf_x as object array of datetime values
fr_fig = FigureResampler(default_n_shown_samples=10)
with pytest.raises(ValueError):
fr_fig.add_trace(go.Scattergl(), hf_x=index.values.astype("object"), hf_y=y)
# Add as hf_x as string array
fr_fig = FigureResampler(default_n_shown_samples=10)
with pytest.raises(ValueError):
fr_fig.add_trace(go.Scattergl(), hf_x=index.astype(str), hf_y=y)
# Add as hf_x as object array of strings
fr_fig = FigureResampler(default_n_shown_samples=10)
with pytest.raises(ValueError):
fr_fig.add_trace(go.Scattergl(), hf_x=index.astype(str).astype("object"), hf_y=y)

fig = go.Figure()
fig.add_trace(go.Scattergl(x=index.astype("object"), y=y))
with pytest.raises(ValueError):
fr_fig = FigureResampler(fig, default_n_shown_samples=10)

fig = go.Figure()
fig.add_trace(go.Scattergl(x=index.astype("str"), y=y))
with pytest.raises(ValueError):
fr_fig = FigureResampler(fig, default_n_shown_samples=10)

def test_proper_copy_of_wrapped_fig(float_series):
plotly_fig = go.Figure()
Expand Down Expand Up @@ -541,6 +603,82 @@ def test_2d_input_y():
assert "1 dimensional" in e_info


def test_hf_x_object_array():
y = np.random.randn(100)

## Object array of datetime
### Should be parsed to a pd.DatetimeIndex (is more efficient than object array)
x = pd.date_range("2020-01-01", freq="s", periods=100).astype("object")
assert x.dtype == "object"
assert isinstance(x[0], pd.Timestamp)
# Add in the scatter
fig = FigureResampler(default_n_shown_samples=50)
fig.add_trace(go.Scatter(name="blabla", x=x, y=y))
assert isinstance(fig.hf_data[0]["x"], pd.DatetimeIndex)
assert isinstance(fig.hf_data[0]["x"][0], pd.Timestamp)
# Add as hf_x
fig = FigureResampler(default_n_shown_samples=50)
fig.add_trace(go.Scatter(name="blabla"), hf_x=x, hf_y=y)
assert isinstance(fig.hf_data[0]["x"], pd.DatetimeIndex)
assert isinstance(fig.hf_data[0]["x"][0], pd.Timestamp)

## Object array of datetime strings
### Should be parsed to a pd.DatetimeIndex (is more efficient than object array)
x = pd.date_range("2020-01-01", freq="s", periods=100).astype(str).astype("object")
assert x.dtype == "object"
assert isinstance(x[0], str)
# Add in the scatter
fig = FigureResampler(default_n_shown_samples=50)
fig.add_trace(go.Scatter(name="blabla", x=x, y=y))
assert isinstance(fig.hf_data[0]["x"], pd.DatetimeIndex)
assert isinstance(fig.hf_data[0]["x"][0], pd.Timestamp)
# Add as hf_x
fig = FigureResampler(default_n_shown_samples=50)
fig.add_trace(go.Scatter(name="blabla"), hf_x=x, hf_y=y)
assert isinstance(fig.hf_data[0]["x"], pd.DatetimeIndex)
assert isinstance(fig.hf_data[0]["x"][0], pd.Timestamp)

## Object array of ints
### Should be parsed to an int array (is more efficient than object array)
x = np.arange(100).astype("object")
assert x.dtype == "object"
assert isinstance(x[0], int)
# Add in the scatter
fig = FigureResampler(default_n_shown_samples=50)
fig.add_trace(go.Scatter(name="blabla", x=x, y=y))
assert np.issubdtype(fig.hf_data[0]["x"].dtype, np.integer)
# Add as hf_x
fig = FigureResampler(default_n_shown_samples=50)
fig.add_trace(go.Scatter(name="blabla"), hf_x=x, hf_y=y)
assert np.issubdtype(fig.hf_data[0]["x"].dtype, np.integer)

## Object array of ints as strings
### Should be an integer array where the values are int objects
x = np.arange(100).astype(str).astype("object")
assert x.dtype == "object"
assert isinstance(x[0], str)
# Add in the scatter
fig = FigureResampler(default_n_shown_samples=50)
fig.add_trace(go.Scatter(name="blabla", x=x, y=y))
assert np.issubdtype(fig.hf_data[0]["x"].dtype, np.integer)
# Add as hf_x
fig = FigureResampler(default_n_shown_samples=50)
fig.add_trace(go.Scatter(name="blabla"), hf_x=x, hf_y=y)
assert np.issubdtype(fig.hf_data[0]["x"].dtype, np.integer)

## Object array of strings
x = np.array(["x", "y"]*50).astype("object")
assert x.dtype == "object"
# Add in the scatter
with pytest.raises(ValueError):
fig = FigureResampler(default_n_shown_samples=50)
fig.add_trace(go.Scatter(name="blabla", x=x, y=y))
# Add as hf_x
with pytest.raises(ValueError):
fig = FigureResampler(default_n_shown_samples=50)
fig.add_trace(go.Scatter(name="blabla"), hf_x=x, hf_y=y)


def test_time_tz_slicing():
n = 5050
dr = pd.Series(
Expand Down
Loading