From 717b1917c0633e7ba0109841a5df50afa9f5f2ab Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Fri, 7 Sep 2018 09:41:02 -0400 Subject: [PATCH 1/3] Fix error when setting compound or compound array property to `None` --- plotly/basedatatypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plotly/basedatatypes.py b/plotly/basedatatypes.py index 16921089d5c..1c134f875f8 100644 --- a/plotly/basedatatypes.py +++ b/plotly/basedatatypes.py @@ -2981,7 +2981,7 @@ def _set_compound_prop(self, prop, val): # ------------------ if not self._in_batch_mode: if not new_dict_val: - if prop in self._props: + if self._props and prop in self._props: self._props.pop(prop) else: self._init_props() @@ -3055,7 +3055,7 @@ def _set_array_prop(self, prop, val): # ------------------ if not self._in_batch_mode: if not new_dict_vals: - if prop in self._props: + if self._props and prop in self._props: self._props.pop(prop) else: self._init_props() From ea0043d28ff95f09275cb35e17680c16e094a368 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Fri, 7 Sep 2018 09:45:53 -0400 Subject: [PATCH 2/3] Pandas datetime and numpy numeric array fixes 1) Preserve numeric numpy types as is in validator out, even if that numeric type is not supported as JavaScript TypedArray 2) Update widget serializer to check numeric numpy arrays for whether they are compatible with TypedArrays. If not, serialize as list. 3) Call to_pydatetime() on pandas datetime series/index values when passed to copy_to_readonly_numpy_array. This returns numpy array of datetimes (which we already know how to serialize) Fixes datetime issue in #1160 Fixes FigureWidget issue in #1155 --- _plotly_utils/basevalidators.py | 76 +++++--- .../validators/test_integer_validator.py | 2 +- .../validators/test_pandas_series_input.py | 179 ++++++++++++++++++ plotly/serializers.py | 11 +- 4 files changed, 243 insertions(+), 25 deletions(-) create mode 100644 _plotly_utils/tests/validators/test_pandas_series_input.py diff --git a/_plotly_utils/basevalidators.py b/_plotly_utils/basevalidators.py index c9d55a1a9a5..b69eb75c46b 100644 --- a/_plotly_utils/basevalidators.py +++ b/_plotly_utils/basevalidators.py @@ -52,7 +52,7 @@ def to_scalar_or_list(v): return v -def copy_to_readonly_numpy_array(v, dtype=None, force_numeric=False): +def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False): """ Convert an array-like value into a read-only numpy array @@ -60,9 +60,10 @@ def copy_to_readonly_numpy_array(v, dtype=None, force_numeric=False): ---------- v : array like Array like value (list, tuple, numpy array, pandas series, etc.) - dtype : str - If specified, the numpy dtype that the array should be forced to - have. If not specified then let numpy infer the datatype + kind : str or tuple of str + If specified, the numpy dtype kind (or kinds) that the array should + have, or be converted to if possible. + If not specified then let numpy infer the datatype force_numeric : bool If true, raise an exception if the resulting numpy array does not have a numeric dtype (i.e. dtype.kind not in ['u', 'i', 'f']) @@ -81,23 +82,57 @@ def copy_to_readonly_numpy_array(v, dtype=None, force_numeric=False): # TODO: support datetime dtype here and in widget serialization # u: unsigned int, i: signed int, f: float - numeric_kinds = ['u', 'i', 'f'] + + # ### Process kind ### + if not kind: + kind = () + elif isinstance(kind, string_types): + kind = (kind,) + + first_kind = kind[0] if kind else None + + numeric_kinds = {'u', 'i', 'f'} + kind_default_dtypes = { + 'u': 'uint32', 'i': 'int32', 'f': 'float64', 'O': 'object'} # Unwrap data types that have a `values` property that might be a numpy # array. If this values property is a numeric numpy array then we # can take the fast path below + # + # Use date_series.to_pydatetime() + # if pd and isinstance(v, (pd.Series, pd.Index)): - v = v.values + if v.dtype.kind in numeric_kinds: + # Get the numeric numpy array so we use fast path below + v = v.values + elif v.dtype.kind == 'M': + # Convert datetime Series/Index to numpy array of datetime's + if isinstance(v, pd.Series): + v = v.dt.to_pydatetime() + else: + v = v.to_pydatetime() if not isinstance(v, np.ndarray): + # v is not homogenous array v_list = [to_scalar_or_list(e) for e in v] + + # Lookup dtype for requested kind, if any + dtype = kind_default_dtypes.get(first_kind, None) + + # construct new array from list new_v = np.array(v_list, order='C', dtype=dtype) elif v.dtype.kind in numeric_kinds: - if dtype: + # v is a homogenous numeric array + if kind and v.dtype.kind not in kind: + # Kind(s) were specified and this array doens't match + # Convert to the default dtype for the first kind + dtype = kind_default_dtypes.get(first_kind, None) new_v = np.ascontiguousarray(v.astype(dtype)) else: + # Either no kind was requested or requested kind is satisfied new_v = np.ascontiguousarray(v.copy()) else: + # v is a non-numeric homogenous array new_v = v.copy() # Handle force numeric param @@ -106,7 +141,7 @@ def copy_to_readonly_numpy_array(v, dtype=None, force_numeric=False): raise ValueError('Input value is not numeric and' 'force_numeric parameter set to True') - if dtype != 'unicode': + if 'U' not in kind: # Force non-numeric arrays to have object type # -------------------------------------------- # Here we make sure that non-numeric arrays have the object @@ -116,12 +151,6 @@ def copy_to_readonly_numpy_array(v, dtype=None, force_numeric=False): if new_v.dtype.kind not in ['u', 'i', 'f', 'O']: new_v = np.array(v, dtype='object') - # Convert int64 arrays to int32 - # ----------------------------- - # JavaScript doesn't support int64 typed arrays - if new_v.dtype == 'int64': - new_v = new_v.astype('int32') - # Set new array to be read-only # ----------------------------- new_v.flags['WRITEABLE'] = False @@ -749,10 +778,13 @@ def validate_coerce(self, v): # Pass None through pass elif self.array_ok and is_homogeneous_array(v): - if v.dtype.kind not in ['i', 'u']: - self.raise_invalid_val(v) - v_array = copy_to_readonly_numpy_array(v, dtype='int32') + v_array = copy_to_readonly_numpy_array(v, + kind=('i', 'u'), + force_numeric=True) + + if v_array.dtype.kind not in ['i', 'u']: + self.raise_invalid_val(v) # Check min/max if self.has_min_max: @@ -875,7 +907,7 @@ def validate_coerce(self, v): if is_homogeneous_array(v): # If not strict, let numpy cast elements to strings - v = copy_to_readonly_numpy_array(v, dtype='unicode') + v = copy_to_readonly_numpy_array(v, kind='U') # Check no_blank if self.no_blank: @@ -1057,10 +1089,10 @@ def validate_coerce(self, v, should_raise=True): # ### Check that elements have valid colors types ### elif self.numbers_allowed() or invalid_els: v = copy_to_readonly_numpy_array( - validated_v, dtype='object') + validated_v, kind='O') else: v = copy_to_readonly_numpy_array( - validated_v, dtype='unicode') + validated_v, kind='U') elif self.array_ok and is_simple_array(v): validated_v = [ self.validate_coerce(e, should_raise=False) @@ -1509,7 +1541,7 @@ def validate_coerce(self, v): self.raise_invalid_elements(invalid_els) if is_homogeneous_array(v): - v = copy_to_readonly_numpy_array(validated_v, dtype='unicode') + v = copy_to_readonly_numpy_array(validated_v, kind='U') else: v = to_scalar_or_list(v) else: @@ -1559,7 +1591,7 @@ def validate_coerce(self, v): # Pass None through pass elif self.array_ok and is_homogeneous_array(v): - v = copy_to_readonly_numpy_array(v, dtype='object') + v = copy_to_readonly_numpy_array(v, kind='O') elif self.array_ok and is_simple_array(v): v = to_scalar_or_list(v) return v diff --git a/_plotly_utils/tests/validators/test_integer_validator.py b/_plotly_utils/tests/validators/test_integer_validator.py index 2aefff0c246..603a7c9f89a 100644 --- a/_plotly_utils/tests/validators/test_integer_validator.py +++ b/_plotly_utils/tests/validators/test_integer_validator.py @@ -128,7 +128,7 @@ def test_acceptance_aok_list(val, validator_aok): def test_coercion_aok_list(val, expected, validator_aok): v = validator_aok.validate_coerce(val) if isinstance(val, (np.ndarray, pd.Series, pd.Index)): - assert v.dtype == np.int32 + assert v.dtype == val.dtype assert np.array_equal(validator_aok.present(v), np.array(expected, dtype=np.int32)) else: diff --git a/_plotly_utils/tests/validators/test_pandas_series_input.py b/_plotly_utils/tests/validators/test_pandas_series_input.py new file mode 100644 index 00000000000..b0c4e91dec0 --- /dev/null +++ b/_plotly_utils/tests/validators/test_pandas_series_input.py @@ -0,0 +1,179 @@ +import pytest +import numpy as np +import pandas as pd +from datetime import datetime +from _plotly_utils.basevalidators import (NumberValidator, + IntegerValidator, + DataArrayValidator, + ColorValidator) + + +@pytest.fixture +def data_array_validator(request): + return DataArrayValidator('prop', 'parent') + + +@pytest.fixture +def integer_validator(request): + return IntegerValidator('prop', 'parent', array_ok=True) + + +@pytest.fixture +def number_validator(request): + return NumberValidator('prop', 'parent', array_ok=True) + + +@pytest.fixture +def color_validator(request): + return ColorValidator('prop', 'parent', array_ok=True, colorscale_path='') + + +@pytest.fixture( + params=['int8', 'int16', 'int32', 'int64', + 'uint8', 'uint16', 'uint32', 'uint64', + 'float16', 'float32', 'float64']) +def numeric_dtype(request): + return request.param + + +@pytest.fixture( + params=[pd.Series, pd.Index]) +def pandas_type(request): + return request.param + + +@pytest.fixture +def numeric_pandas(request, pandas_type, numeric_dtype): + return pandas_type(np.arange(10), dtype=numeric_dtype) + + +@pytest.fixture +def color_object_pandas(request, pandas_type): + return pandas_type(['blue', 'green', 'red']*3, dtype='object') + + +@pytest.fixture +def color_categorical_pandas(request, pandas_type): + return pandas_type(pd.Categorical(['blue', 'green', 'red']*3)) + + +@pytest.fixture +def dates_array(request): + return np.array([ + datetime(year=2013, month=10, day=10), + datetime(year=2013, month=11, day=10), + datetime(year=2013, month=12, day=10), + datetime(year=2014, month=1, day=10), + datetime(year=2014, month=2, day=10) + ]) + + +@pytest.fixture +def datetime_pandas(request, pandas_type, dates_array): + return pandas_type(dates_array) + + +def test_numeric_validator_numeric_pandas(number_validator, numeric_pandas): + res = number_validator.validate_coerce(numeric_pandas) + + # Check type + assert isinstance(res, np.ndarray) + + # Check dtype + assert res.dtype == numeric_pandas.dtype + + # Check values + np.testing.assert_array_equal(res, numeric_pandas) + + +def test_integer_validator_numeric_pandas(integer_validator, numeric_pandas): + res = integer_validator.validate_coerce(numeric_pandas) + + # Check type + assert isinstance(res, np.ndarray) + + # Check dtype + if numeric_pandas.dtype.kind in ('u', 'i'): + # Integer and unsigned integer dtype unchanged + assert res.dtype == numeric_pandas.dtype + else: + # Float datatypes converted to default integer type of int32 + assert res.dtype == 'int32' + + # Check values + np.testing.assert_array_equal(res, numeric_pandas) + + +def test_data_array_validator(data_array_validator, + numeric_pandas): + res = data_array_validator.validate_coerce(numeric_pandas) + + # Check type + assert isinstance(res, np.ndarray) + + # Check dtype + assert res.dtype == numeric_pandas.dtype + + # Check values + np.testing.assert_array_equal(res, numeric_pandas) + + +def test_color_validator_numeric(color_validator, + numeric_pandas): + res = color_validator.validate_coerce(numeric_pandas) + + # Check type + assert isinstance(res, np.ndarray) + + # Check dtype + assert res.dtype == numeric_pandas.dtype + + # Check values + np.testing.assert_array_equal(res, numeric_pandas) + + +def test_color_validator_object(color_validator, + color_object_pandas): + + res = color_validator.validate_coerce(color_object_pandas) + + # Check type + assert isinstance(res, np.ndarray) + + # Check dtype + assert res.dtype == 'object' + + # Check values + np.testing.assert_array_equal(res, color_object_pandas) + + +def test_color_validator_categorical(color_validator, + color_categorical_pandas): + + res = color_validator.validate_coerce(color_categorical_pandas) + + # Check type + assert color_categorical_pandas.dtype == 'category' + assert isinstance(res, np.ndarray) + + # Check dtype + assert res.dtype == 'object' + + # Check values + np.testing.assert_array_equal(res, np.array(color_categorical_pandas)) + + +def test_data_array_validator_dates(data_array_validator, + datetime_pandas, + dates_array): + + res = data_array_validator.validate_coerce(datetime_pandas) + + # Check type + assert isinstance(res, np.ndarray) + + # Check dtype + assert res.dtype == 'object' + + # Check values + np.testing.assert_array_equal(res, dates_array) diff --git a/plotly/serializers.py b/plotly/serializers.py index 460dad92fce..27cdda23af0 100644 --- a/plotly/serializers.py +++ b/plotly/serializers.py @@ -2,6 +2,7 @@ from .optional_imports import get_module np = get_module('numpy') + def _py_to_js(v, widget_manager): """ Python -> Javascript ipywidget serializer @@ -38,12 +39,18 @@ def _py_to_js(v, widget_manager): elif np is not None and isinstance(v, np.ndarray): # Convert 1D numpy arrays with numeric types to memoryviews with # datatype and shape metadata. - if v.ndim == 1 and v.dtype.kind in ['u', 'i', 'f']: + if (v.ndim == 1 and + v.dtype.kind in ['u', 'i', 'f'] and + v.dtype != 'int64' and + v.dtype != 'uint64'): + + # We have a numpy array the we can directly map to a JavaScript + # Typed array return {'buffer': memoryview(v), 'dtype': str(v.dtype), 'shape': v.shape} else: - # Convert all other numpy to lists + # Convert all other numpy arrays to lists return v.tolist() # Handle Undefined From c0076dc6505416f1ebc97734e8243d9cd7d57fff Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sat, 8 Sep 2018 18:25:06 -0400 Subject: [PATCH 3/3] Review comments [skip ci] --- _plotly_utils/basevalidators.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/_plotly_utils/basevalidators.py b/_plotly_utils/basevalidators.py index b69eb75c46b..233a53670bf 100644 --- a/_plotly_utils/basevalidators.py +++ b/_plotly_utils/basevalidators.py @@ -75,14 +75,6 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False): assert np is not None - # Copy to numpy array and handle dtype param - # ------------------------------------------ - # If dtype was not specified then it will be passed to the numpy array - # constructor as None and the data type will be inferred automatically - - # TODO: support datetime dtype here and in widget serialization - # u: unsigned int, i: signed int, f: float - # ### Process kind ### if not kind: kind = () @@ -91,25 +83,22 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False): first_kind = kind[0] if kind else None + # u: unsigned int, i: signed int, f: float numeric_kinds = {'u', 'i', 'f'} kind_default_dtypes = { 'u': 'uint32', 'i': 'int32', 'f': 'float64', 'O': 'object'} - # Unwrap data types that have a `values` property that might be a numpy - # array. If this values property is a numeric numpy array then we - # can take the fast path below - # - # Use date_series.to_pydatetime() - # + # Handle pandas Series and Index objects if pd and isinstance(v, (pd.Series, pd.Index)): if v.dtype.kind in numeric_kinds: # Get the numeric numpy array so we use fast path below v = v.values elif v.dtype.kind == 'M': - # Convert datetime Series/Index to numpy array of datetime's + # Convert datetime Series/Index to numpy array of datetimes if isinstance(v, pd.Series): v = v.dt.to_pydatetime() else: + # DatetimeIndex v = v.to_pydatetime() if not isinstance(v, np.ndarray): @@ -124,7 +113,7 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False): elif v.dtype.kind in numeric_kinds: # v is a homogenous numeric array if kind and v.dtype.kind not in kind: - # Kind(s) were specified and this array doens't match + # Kind(s) were specified and this array doesn't match # Convert to the default dtype for the first kind dtype = kind_default_dtypes.get(first_kind, None) new_v = np.ascontiguousarray(v.astype(dtype))