Skip to content

Commit 71b4efe

Browse files
authored
Merge pull request #140 from predict-idlab/tsdownsample
✨ add check_nans to add_trace(s)
2 parents 7f80ef8 + 7b8e71c commit 71b4efe

File tree

4 files changed

+115
-19
lines changed

4 files changed

+115
-19
lines changed

plotly_resampler/figure_resampler/figure_resampler_interface.py

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,7 @@ def _parse_get_trace_props(
595595
hf_y: Iterable = None,
596596
hf_text: Iterable = None,
597597
hf_hovertext: Iterable = None,
598+
check_nans: bool = True,
598599
) -> _hf_data_container:
599600
"""Parse and capture the possibly high-frequency trace-props in a datacontainer.
600601
@@ -603,14 +604,19 @@ def _parse_get_trace_props(
603604
trace : BaseTraceType
604605
The trace which will be parsed.
605606
hf_x : Iterable, optional
606-
high-frequency trace "x" data, overrides the current trace its x-data.
607+
High-frequency trace "x" data, overrides the current trace its x-data.
607608
hf_y : Iterable, optional
608-
high-frequency trace "y" data, overrides the current trace its y-data.
609+
High-frequency trace "y" data, overrides the current trace its y-data.
609610
hf_text : Iterable, optional
610-
high-frequency trace "text" data, overrides the current trace its text-data.
611+
High-frequency trace "text" data, overrides the current trace its text-data.
611612
hf_hovertext : Iterable, optional
612-
high-frequency trace "hovertext" data, overrides the current trace its
613+
High-frequency trace "hovertext" data, overrides the current trace its
613614
hovertext data.
615+
check_nans: bool, optional
616+
Whether the `hf_y` should be checked for NaNs, by default True.
617+
As checking for NaNs is expensive, this can be disabled when the `hf_y` is
618+
already known to contain no NaNs (or when the downsampler can handle NaNs,
619+
e.g., EveryNthPoint).
614620
615621
Returns
616622
-------
@@ -680,7 +686,7 @@ def _parse_get_trace_props(
680686
# Remove NaNs for efficiency (storing less meaningless data)
681687
# NaNs introduce gaps between enclosing non-NaN data points & might distort
682688
# the resampling algorithms
683-
if pd.isna(hf_y).any():
689+
if check_nans and pd.isna(hf_y).any():
684690
not_nan_mask = ~pd.isna(hf_y)
685691
hf_x = hf_x[not_nan_mask]
686692
hf_y = hf_y[not_nan_mask]
@@ -821,6 +827,7 @@ def add_trace(
821827
hf_y: Iterable = None,
822828
hf_text: Union[str, Iterable] = None,
823829
hf_hovertext: Union[str, Iterable] = None,
830+
check_nans: bool = True,
824831
**trace_kwargs,
825832
):
826833
"""Add a trace to the figure.
@@ -848,7 +855,7 @@ def add_trace(
848855
.. note::
849856
If this variable is not set, ``_global_downsampler`` will be used.
850857
limit_to_view: boolean, optional
851-
If set to True the trace's datapoints will be cut to the corresponding
858+
If set to True, the trace's datapoints will be cut to the corresponding
852859
front-end view, even if the total number of samples is lower than
853860
``max_n_samples``, By default False.\n
854861
Remark that setting this parameter to True ensures that low frequency traces
@@ -866,6 +873,13 @@ def add_trace(
866873
hf_hovertext: Iterable, optional
867874
The original high frequency hovertext. If set, this has priority over the
868875
trace its ```hovertext`` argument.
876+
check_nans: boolean, optional
877+
If set to True, the trace's data will be checked for NaNs - which will be
878+
removed. By default True.
879+
As this is a costly operation, it is recommended to set this parameter to
880+
False if you are sure that your data does not contain NaNs (or when the
881+
downsampler can handle NaNs, e.g., EveryNthPoint). This should considerably
882+
speed up the graph construction time.
869883
**trace_kwargs: dict
870884
Additional trace related keyword arguments.
871885
e.g.: row=.., col=..., secondary_y=...
@@ -937,7 +951,7 @@ def add_trace(
937951

938952
# construct the hf_data_container
939953
# TODO in future version -> maybe regex on kwargs which start with `hf_`
940-
dc = self._parse_get_trace_props(trace, hf_x, hf_y, hf_text, hf_hovertext)
954+
dc = self._parse_get_trace_props(trace, hf_x, hf_y, hf_text, hf_hovertext, check_nans)
941955

942956
# These traces will determine the autoscale RANGE!
943957
# -> so also store when `limit_to_view` is set.
@@ -996,6 +1010,7 @@ def add_traces(
9961010
| List[AbstractSeriesAggregator]
9971011
| AbstractFigureAggregator = None,
9981012
limit_to_views: List[bool] | bool = False,
1013+
check_nans: List[bool] | bool = True,
9991014
**traces_kwargs,
10001015
):
10011016
"""Add traces to the figure.
@@ -1030,13 +1045,22 @@ def add_traces(
10301045
aggregator is passed, all traces will use this aggregator.
10311046
If this variable is not set, ``_global_downsampler`` will be used.
10321047
limit_to_views : None | List[bool] | bool, optional
1033-
List of limit_to_view booleans for the added traces. If set to True
1034-
the trace's datapoints will be cut to the corresponding front-end view,
1035-
even if the total number of samples is lower than ``max_n_samples``. If a
1036-
single boolean is passed, all to be added traces will use this value,
1048+
List of limit_to_view booleans for the added traces. If set to True the
1049+
trace's datapoints will be cut to the corresponding front-end view, even if
1050+
the total number of samples is lower than ``max_n_samples``.
1051+
If a single boolean is passed, all to be added traces will use this value,
10371052
by default False.\n
10381053
Remark that setting this parameter to True ensures that low frequency traces
10391054
are added to the ``hf_data`` property.
1055+
check_nans : None | List[bool] | bool, optional
1056+
List of check_nans booleans for the added traces. If set to True, the
1057+
trace's datapoints will be checked for NaNs. If a single boolean is passed,
1058+
all to be added traces will use this value, by default True.\n
1059+
As this is a costly operation, it is recommended to set this parameter to
1060+
False if the data is known to contain no NaNs (or when the downsampler can
1061+
handle NaNs, e.g., EveryNthPoint). This will considerably speed up the graph
1062+
construction time.
1063+
10401064
**traces_kwargs: dict
10411065
Additional trace related keyword arguments.
10421066
e.g.: rows=.., cols=..., secondary_ys=...
@@ -1076,9 +1100,11 @@ def add_traces(
10761100
downsamplers = [downsamplers] * len(data)
10771101
if isinstance(limit_to_views, bool):
10781102
limit_to_views = [limit_to_views] * len(data)
1103+
if isinstance(check_nans, bool):
1104+
check_nans = [check_nans] * len(data)
10791105

1080-
for i, (trace, max_out, downsampler, limit_to_view) in enumerate(
1081-
zip(data, max_n_samples, downsamplers, limit_to_views)
1106+
for i, (trace, max_out, downsampler, limit_to_view, check_nan) in enumerate(
1107+
zip(data, max_n_samples, downsamplers, limit_to_views, check_nans)
10821108
):
10831109
if (
10841110
trace.type.lower() not in self._high_frequency_traces
@@ -1090,7 +1116,7 @@ def add_traces(
10901116
if not limit_to_view and (trace.y is None or len(trace.y) <= max_out_s):
10911117
continue
10921118

1093-
dc = self._parse_get_trace_props(trace)
1119+
dc = self._parse_get_trace_props(trace, check_nans=check_nan)
10941120
self._hf_data[trace.uid] = self._construct_hf_data_dict(
10951121
dc,
10961122
trace=trace,

plotly_resampler/registering.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def _get_plotly_constr(constr):
3333
Parameters
3434
----------
3535
constr : callable
36-
The constructor of a instantiatedplotly-object.
36+
The constructor of a instantiated plotly-object.
3737
3838
Returns
3939
-------
@@ -98,10 +98,10 @@ def register_plotly_resampler(mode="auto", **aggregator_kwargs):
9898
The mode of the plotly-resampler.
9999
Possible values are: 'auto', 'figure', 'widget', None.
100100
If 'auto' is used, the mode is determined based on the environment; if it is in
101-
an ipython environment, the mode is 'widget', otherwise it is 'figure'.
101+
an IPython environment, the mode is 'widget', otherwise it is 'figure'.
102102
If 'figure' is used, all plotly figures are wrapped as FigureResampler objects.
103103
If 'widget' is used, all plotly figure widgets are wrapped as
104-
FigureWidgetResampler objects (we advise to use this mode in ipython environment
104+
FigureWidgetResampler objects (we advise to use this mode in IPython environment
105105
with a kernel).
106106
If None is used, wrapping is done as expected (go.Figure -> FigureResampler,
107107
go.FigureWidget -> FigureWidgetResampler).

tests/test_figure_resampler.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,14 +343,17 @@ def test_nan_removed_input(float_series):
343343
)
344344

345345
float_series = float_series.copy()
346-
float_series.iloc[np.random.choice(len(float_series), 100)] = np.nan
346+
float_series.iloc[np.random.choice(len(float_series), 100, replace=False)] = np.nan
347347
fig.add_trace(
348348
go.Scatter(x=float_series.index, y=float_series, name="float_series"),
349349
row=1,
350350
col=1,
351351
hf_text="text",
352352
hf_hovertext="hovertext",
353353
)
354+
# Check the desired behavior
355+
assert len(fig.hf_data[0]["y"]) == len(float_series) - 100
356+
assert ~pd.isna(fig.hf_data[0]["y"]).any()
354357

355358
# here we test whether we are able to deal with not-nan output
356359
float_series.iloc[np.random.choice(len(float_series), 100)] = np.nan
@@ -374,6 +377,37 @@ def test_nan_removed_input(float_series):
374377
col=2,
375378
)
376379

380+
def test_nan_removed_input_check_nans_false(float_series):
381+
# see: https://plotly.com/python/subplots/#custom-sized-subplot-with-subplot-titles
382+
base_fig = make_subplots(
383+
rows=2,
384+
cols=2,
385+
specs=[[{}, {}], [{"colspan": 2}, None]],
386+
)
387+
388+
fig = FigureResampler(
389+
base_fig,
390+
default_n_shown_samples=1000,
391+
resampled_trace_prefix_suffix=(
392+
'<b style="color:sandybrown">[R]</b>',
393+
'<b style="color:sandybrown">[R]</b>',
394+
),
395+
)
396+
397+
float_series = float_series.copy()
398+
float_series.iloc[np.random.choice(len(float_series), 100)] = np.nan
399+
fig.add_trace(
400+
go.Scatter(x=float_series.index, y=float_series, name="float_series"),
401+
row=1,
402+
col=1,
403+
hf_text="text",
404+
hf_hovertext="hovertext",
405+
check_nans=False
406+
)
407+
# Check the undesired behavior
408+
assert len(fig.hf_data[0]["y"]) == len(float_series)
409+
assert pd.isna(fig.hf_data[0]["y"]).any()
410+
377411

378412
def test_hf_text():
379413
y = np.arange(10_000)

tests/test_figurewidget_resampler.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,14 +263,17 @@ def test_nan_removed_input(float_series):
263263
)
264264

265265
float_series = float_series.copy()
266-
float_series.iloc[np.random.choice(len(float_series), 100)] = np.nan
266+
float_series.iloc[np.random.choice(len(float_series), 100, replace=False)] = np.nan
267267
fig.add_trace(
268268
go.Scatter(x=float_series.index, y=float_series, name="float_series"),
269269
row=1,
270270
col=1,
271271
hf_text="text",
272272
hf_hovertext="hovertext",
273273
)
274+
# Check the desired behavior
275+
assert len(fig.hf_data[0]["y"]) == len(float_series) - 100
276+
assert ~pd.isna(fig.hf_data[0]["y"]).any()
274277

275278
# here we test whether we are able to deal with not-nan output
276279
float_series.iloc[np.random.choice(len(float_series), 100)] = np.nan
@@ -295,6 +298,38 @@ def test_nan_removed_input(float_series):
295298
)
296299

297300

301+
def test_nan_removed_input_check_nans_false(float_series):
302+
# see: https://plotly.com/python/subplots/#custom-sized-subplot-with-subplot-titles
303+
base_fig = make_subplots(
304+
rows=2,
305+
cols=2,
306+
specs=[[{}, {}], [{"colspan": 2}, None]],
307+
)
308+
309+
fig = FigureWidgetResampler(
310+
base_fig,
311+
default_n_shown_samples=1000,
312+
resampled_trace_prefix_suffix=(
313+
'<b style="color:sandybrown">[R]</b>',
314+
'<b style="color:sandybrown">[R]</b>',
315+
),
316+
)
317+
318+
float_series = float_series.copy()
319+
float_series.iloc[np.random.choice(len(float_series), 100)] = np.nan
320+
fig.add_trace(
321+
go.Scatter(x=float_series.index, y=float_series, name="float_series"),
322+
row=1,
323+
col=1,
324+
hf_text="text",
325+
hf_hovertext="hovertext",
326+
check_nans=False
327+
)
328+
# Check the undesired behavior
329+
assert len(fig.hf_data[0]["y"]) == len(float_series)
330+
assert pd.isna(fig.hf_data[0]["y"]).any()
331+
332+
298333
def test_hf_text():
299334
y = np.arange(10_000)
300335

@@ -795,6 +830,7 @@ def test_hf_data_subplots_non_shared_xaxes_row_col_none():
795830
assert 40_000 <= x_1[0] <= 40_000 + (20_000 / 1000)
796831
assert (60_000 - 20_000 / 1_000) <= x_1[-1] <= 60_000
797832

833+
798834
def test_updates_two_traces():
799835
n = 1_000_000
800836
X = np.arange(n)

0 commit comments

Comments
 (0)