From 6158533b1ecc7abb8addead22399ff4f3d0b184a Mon Sep 17 00:00:00 2001 From: jonasvdd Date: Tue, 30 May 2023 15:13:12 +0200 Subject: [PATCH 1/3] :sparkles: first implementation --- examples/requirements.txt | 3 +- plotly_resampler/__init__.py | 2 +- .../aggregation/plotly_aggregator_parser.py | 94 ++++++++++++------- tests/test_figure_resampler.py | 23 +++++ 4 files changed, 85 insertions(+), 37 deletions(-) diff --git a/examples/requirements.txt b/examples/requirements.txt index d3a2e49e..e72e430a 100644 --- a/examples/requirements.txt +++ b/examples/requirements.txt @@ -5,4 +5,5 @@ ipywidgets>=7.7.0 memory-profiler>=0.60.0 line-profiler>=3.5.1 pyarrow>=6.0.0 -kaleido>=0.2.1 \ No newline at end of file +kaleido>=0.2.1 +flask-cors>=3.0.10 \ No newline at end of file diff --git a/plotly_resampler/__init__.py b/plotly_resampler/__init__.py index e069fa6a..e0a5d03e 100644 --- a/plotly_resampler/__init__.py +++ b/plotly_resampler/__init__.py @@ -8,7 +8,7 @@ __docformat__ = "numpy" __author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost" -__version__ = "0.9.0rc0" +__version__ = "0.9.0rc3" __all__ = [ "__version__", diff --git a/plotly_resampler/aggregation/plotly_aggregator_parser.py b/plotly_resampler/aggregation/plotly_aggregator_parser.py index 9641a0f9..25e5bb13 100644 --- a/plotly_resampler/aggregation/plotly_aggregator_parser.py +++ b/plotly_resampler/aggregation/plotly_aggregator_parser.py @@ -87,6 +87,58 @@ def get_start_end_indices(hf_trace_data, axis_type, start, end) -> Tuple[int, in end_idx = bisect.bisect_right(hf_trace_data["x"], end) return start_idx, end_idx + @staticmethod + def _handle_gaps( + hf_trace_data: dict, + hf_x: np.ndarray, + agg_x: np.ndarray, + agg_y: np.ndarray, + indices: np.ndarray, + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Handle the gaps in the aggregated data. + + Returns: + - agg_x: the aggregated x-values + - agg_y: the aggregated y-values + - indices: the indices of the hf_data data that were aggregated + + """ + gap_handler: AbstractGapHandler = hf_trace_data["gap_handler"] + downsampler = hf_trace_data["downsampler"] + + # TODO check for trace mode (markers, lines, etc.) and only perform the + # gap insertion methodology when the mode is lines. + # if trace.get("connectgaps") != True and + if ( + isinstance(gap_handler, NoGapHandler) + # rangeIndex | datetimeIndex with freq -> equally spaced x; so no gaps + or isinstance(hf_trace_data["x"], pd.RangeIndex) + or ( + isinstance(hf_trace_data["x"], pd.DatetimeIndex) + and hf_trace_data["x"].freq is not None + ) + ): + return agg_x, agg_y, indices + + # Interleave the gaps + # View the data as an int64 when we have a DatetimeIndex + # We only want to detect gaps, so we only want to compare values. + agg_x_parsed = PlotlyAggregatorParser.parse_hf_data(agg_x) + xdt = agg_x_parsed.dtype + if np.issubdtype(xdt, np.timedelta64) or np.issubdtype(xdt, np.datetime64): + agg_x_parsed = agg_x_parsed.view("int64") + + agg_y, indices = gap_handler.insert_fill_value_between_gaps( + agg_x_parsed, agg_y, indices + ) + if isinstance(downsampler, DataPointSelector): + agg_x = hf_x[indices] + elif isinstance(downsampler, DataAggregator): + # The indices are in this case a repeat + agg_x = agg_x[indices] + + return agg_x, agg_y, indices + @staticmethod def aggregate( hf_trace_data: dict, @@ -109,14 +161,15 @@ def aggregate( # No downsampling needed ; we show the raw data as is, no gap detection if (end_idx - start_idx) <= hf_trace_data["max_n_samples"]: - return hf_x, hf_y, np.arange(len(hf_y)) - - downsampler = hf_trace_data["downsampler"] - gap_handler: AbstractGapHandler = hf_trace_data["gap_handler"] + indices = np.arange(len(hf_y)) + return PlotlyAggregatorParser._handle_gaps( + hf_trace_data, hf_x=hf_x, agg_x=hf_x, agg_y=hf_y, indices=indices + ) hf_x_parsed = PlotlyAggregatorParser.parse_hf_data(hf_x) hf_y_parsed = PlotlyAggregatorParser.parse_hf_data(hf_y) + downsampler = hf_trace_data["downsampler"] if isinstance(downsampler, DataPointSelector): s_v = hf_y_parsed if isinstance(s_v, pd.Categorical): # pd.Categorical (has no .values) @@ -161,35 +214,6 @@ def aggregate( + f"DataAggregator or a DataPointSelector, got {type(downsampler)}" ) - # TODO check for trace mode (markers, lines, etc.) and only perform the - # gap insertion methodology when the mode is lines. - # if trace.get("connectgaps") != True and - if ( - isinstance(gap_handler, NoGapHandler) - # rangeIndex | datetimeIndex with freq -> equally spaced x; so no gaps - or isinstance(hf_trace_data["x"], pd.RangeIndex) - or ( - isinstance(hf_trace_data["x"], pd.DatetimeIndex) - and hf_trace_data["x"].freq is not None - ) - ): - return agg_x, agg_y, indices - - # Interleave the gaps - # View the data as an int64 when we have a DatetimeIndex - # We only want to detect gaps, so we only want to compare values. - agg_x_parsed = PlotlyAggregatorParser.parse_hf_data(agg_x) - xdt = agg_x_parsed.dtype - if np.issubdtype(xdt, np.timedelta64) or np.issubdtype(xdt, np.datetime64): - agg_x_parsed = agg_x_parsed.view("int64") - - agg_y, indices = gap_handler.insert_fill_value_between_gaps( - agg_x_parsed, agg_y, indices + return PlotlyAggregatorParser._handle_gaps( + hf_trace_data, hf_x=hf_x, agg_x=agg_x, agg_y=agg_y, indices=indices ) - if isinstance(downsampler, DataPointSelector): - agg_x = hf_x[indices] - elif isinstance(downsampler, DataAggregator): - # The indices are in this case a repeat - agg_x = agg_x[indices] - - return agg_x, agg_y, indices diff --git a/tests/test_figure_resampler.py b/tests/test_figure_resampler.py index 709c4b4f..6cf9bed7 100644 --- a/tests/test_figure_resampler.py +++ b/tests/test_figure_resampler.py @@ -105,6 +105,29 @@ def test_add_trace_not_resampling(float_series): ) +def test_add_trace_not_resampling_insert_gaps(): + # This test verifies whether gaps are inserted correctly when adding a trace that + # is not resampled (but `limit_to_view` is True) + idx = np.arange(500) + for i in np.random.randint(0, 500, 4): + idx[i:] += 100 + s = pd.Series(np.arange(500), index=idx) + + # limit_to_view=False -> no gaps inserted + fr = FigureResampler(default_n_shown_samples=1000) + fr.add_trace({}, hf_x=s.index, hf_y=s.values) + fr.add_trace(dict(x=s.index, y=s.values)) + assert np.isnan(fr.data[0]["y"]).sum() == 0 + assert np.isnan(fr.data[1]["y"]).sum() == 0 + + # limit_to_view=True -> gaps inserted + fr = FigureResampler(default_n_shown_samples=1000) + fr.add_trace({}, hf_x=s.index, hf_y=s.values, limit_to_view=True) + fr.add_trace(dict(x=s.index, y=s.values), limit_to_view=True) + assert np.isnan(fr.data[0]["y"]).sum() > 0 + assert np.isnan(fr.data[1]["y"]).sum() > 0 + + def test_various_dtypes(float_series): # List of dtypes supported by orjson >= 3.8 valid_dtype_list = [ From d60d004717c067316798268e7bd545dc9ddcdf0e Mon Sep 17 00:00:00 2001 From: jonasvdd Date: Tue, 30 May 2023 15:22:00 +0200 Subject: [PATCH 2/3] :pen: review --- plotly_resampler/aggregation/plotly_aggregator_parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plotly_resampler/aggregation/plotly_aggregator_parser.py b/plotly_resampler/aggregation/plotly_aggregator_parser.py index 25e5bb13..c3b5026a 100644 --- a/plotly_resampler/aggregation/plotly_aggregator_parser.py +++ b/plotly_resampler/aggregation/plotly_aggregator_parser.py @@ -159,17 +159,18 @@ def aggregate( hf_x = hf_trace_data["x"][start_idx:end_idx] hf_y = hf_trace_data["y"][start_idx:end_idx] - # No downsampling needed ; we show the raw data as is, no gap detection + # No downsampling needed ; we show the raw data as is, but with gap-detection if (end_idx - start_idx) <= hf_trace_data["max_n_samples"]: indices = np.arange(len(hf_y)) return PlotlyAggregatorParser._handle_gaps( hf_trace_data, hf_x=hf_x, agg_x=hf_x, agg_y=hf_y, indices=indices ) + downsampler = hf_trace_data["downsampler"] + hf_x_parsed = PlotlyAggregatorParser.parse_hf_data(hf_x) hf_y_parsed = PlotlyAggregatorParser.parse_hf_data(hf_y) - downsampler = hf_trace_data["downsampler"] if isinstance(downsampler, DataPointSelector): s_v = hf_y_parsed if isinstance(s_v, pd.Categorical): # pd.Categorical (has no .values) From a905a108b978f019a5b7bbde9f694baef9557f51 Mon Sep 17 00:00:00 2001 From: Jeroen Van Der Donckt <18898740+jvdd@users.noreply.github.com> Date: Tue, 30 May 2023 15:26:59 +0200 Subject: [PATCH 3/3] :pen: code review --- plotly_resampler/aggregation/plotly_aggregator_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plotly_resampler/aggregation/plotly_aggregator_parser.py b/plotly_resampler/aggregation/plotly_aggregator_parser.py index c3b5026a..92206678 100644 --- a/plotly_resampler/aggregation/plotly_aggregator_parser.py +++ b/plotly_resampler/aggregation/plotly_aggregator_parser.py @@ -161,7 +161,7 @@ def aggregate( # No downsampling needed ; we show the raw data as is, but with gap-detection if (end_idx - start_idx) <= hf_trace_data["max_n_samples"]: - indices = np.arange(len(hf_y)) + indices = np.arange(len(hf_y)) # no downsampling - all values are selected return PlotlyAggregatorParser._handle_gaps( hf_trace_data, hf_x=hf_x, agg_x=hf_x, agg_y=hf_y, indices=indices )