diff --git a/doc/source/api.rst b/doc/source/api.rst index 017739adbc8b1..242ce9865dc9a 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -146,8 +146,8 @@ Top-level missing data isnull notnull -Top-level dealing with datetimes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Top-level dealing with datetimelike +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ @@ -157,6 +157,7 @@ Top-level dealing with datetimes date_range bdate_range period_range + timedelta_range Top-level evaluation ~~~~~~~~~~~~~~~~~~~~ @@ -440,13 +441,16 @@ Time series-related Datetimelike Properties ~~~~~~~~~~~~~~~~~~~~~~~ + ``Series.dt`` can be used to access the values of the series as datetimelike and return several properties. Due to implementation details the methods show up here as methods of the -``DatetimeProperties/PeriodProperties`` classes. These can be accessed like ``Series.dt.``. +``DatetimeProperties/PeriodProperties/TimedeltaProperties`` classes. These can be accessed like ``Series.dt.``. .. currentmodule:: pandas.tseries.common +**Datetime Properties** + .. autosummary:: :toctree: generated/ @@ -473,6 +477,37 @@ Due to implementation details the methods show up here as methods of the DatetimeProperties.is_year_start DatetimeProperties.is_year_end +**Datetime Methods** + +.. autosummary:: + :toctree: generated/ + + DatetimeProperties.to_period + DatetimeProperties.to_pydatetime + DatetimeProperties.tz_localize + DatetimeProperties.tz_convert + +**Timedelta Properties** + +.. autosummary:: + :toctree: generated/ + + TimedeltaProperties.days + TimedeltaProperties.hours + TimedeltaProperties.minutes + TimedeltaProperties.seconds + TimedeltaProperties.milliseconds + TimedeltaProperties.microseconds + TimedeltaProperties.nanoseconds + TimedeltaProperties.components + +**Timedelta Methods** + +.. autosummary:: + :toctree: generated/ + + TimedeltaProperties.to_pytimedelta + String handling ~~~~~~~~~~~~~~~ ``Series.str`` can be used to access the values of the series as @@ -1289,6 +1324,37 @@ Conversion DatetimeIndex.to_pydatetime DatetimeIndex.to_series +TimedeltaIndex +-------------- + +.. autosummary:: + :toctree: generated/ + + TimedeltaIndex + +Components +~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + TimedeltaIndex.days + TimedeltaIndex.hours + TimedeltaIndex.minutes + TimedeltaIndex.seconds + TimedeltaIndex.milliseconds + TimedeltaIndex.microseconds + TimedeltaIndex.nanoseconds + TimedeltaIndex.components + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + TimedeltaIndex.to_pytimedelta + TimedeltaIndex.to_series + GroupBy ------- .. currentmodule:: pandas.core.groupby diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 884976b55d6d1..985cd22c03382 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1122,6 +1122,16 @@ This enables nice expressions like this: s[s.dt.day==2] +You can easily produces tz aware transformations: + +.. ipython:: python + + stz = s.dt.tz_localize('US/Eastern') + stz + stz.dt.tz + +The ``.dt`` accessor works for period and timedelta dtypes. + .. ipython:: python # period @@ -1130,6 +1140,15 @@ This enables nice expressions like this: s.dt.year s.dt.day +.. ipython:: python + + # timedelta + s = Series(timedelta_range('1 day 00:00:05',periods=4,freq='s')) + s + s.dt.days + s.dt.seconds + s.dt.components + .. note:: ``Series.dt`` will raise a ``TypeError`` if you access with a non-datetimelike values diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 243d1c02d1a65..a293e0a57fc0f 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -636,7 +636,7 @@ Computation Miscellaneous ------------- -The :ref:`Timedeltas ` docs. +The :ref:`Timedeltas ` docs. `Operating with timedeltas `__ diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index a845e31d95e90..1b692a317051d 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -131,6 +131,7 @@ See the package overview for more detail about what's in the library. merging reshaping timeseries + timedeltas categorical visualization rplot diff --git a/doc/source/internals.rst b/doc/source/internals.rst index e5d2b001c18f8..9418ca5265f1a 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -36,7 +36,8 @@ containers for the axis labels: data, such as time stamps - ``Float64Index``: a version of ``Index`` highly optimized for 64-bit float data - ``MultiIndex``: the standard hierarchical index object -- ``DatetimeIndex``: An Index object with Timestamp elements +- ``DatetimeIndex``: An Index object with ``Timestamp`` boxed elements (impl are the int64 values) +- ``TimedeltaIndex``: An Index object with ``Timedelta`` boxed elements (impl are the in64 values) - ``PeriodIndex``: An Index object with Period elements These are range generates to make the creation of a regular index easy: diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst new file mode 100644 index 0000000000000..b847f02b40594 --- /dev/null +++ b/doc/source/timedeltas.rst @@ -0,0 +1,364 @@ +.. currentmodule:: pandas +.. _timedeltas: + +.. ipython:: python + :suppress: + + from datetime import datetime, timedelta + import numpy as np + np.random.seed(123456) + from pandas import * + randn = np.random.randn + randint = np.random.randint + np.set_printoptions(precision=4, suppress=True) + options.display.max_rows=15 + import dateutil + import pytz + from dateutil.relativedelta import relativedelta + from pandas.tseries.api import * + from pandas.tseries.offsets import * + +.. _timedeltas.timedeltas: + +*********** +Time Deltas +*********** + +.. note:: + + Starting in v0.15.0, we introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner, + but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes. + +Timedeltas are differences in times, expressed in difference units, e.g. days,hours,minutes,seconds. +They can be both positive and negative. + +Parsing +------- + +You can construct a ``Timedelta`` scalar thru various arguments: + +.. ipython:: python + + # strings + Timedelta('1 days') + Timedelta('1 days 00:00:00') + Timedelta('1 days 2 hours') + Timedelta('-1 days 2 min 3us') + + # like datetime.timedelta + # note: these MUST be specified as keyword argments + Timedelta(days=1,seconds=1) + + # integers with a unit + Timedelta(1,unit='d') + + # from a timedelta/np.timedelta64 + Timedelta(timedelta(days=1,seconds=1)) + Timedelta(np.timedelta64(1,'ms')) + + # a NaT + Timedelta('nan') + Timedelta('nat') + +:ref:`DateOffsets` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction. + +.. ipython:: python + + Timedelta(Second(2)) + +Further, operations among the scalars yield another scalar ``Timedelta`` + +.. ipython:: python + + Timedelta(Day(2)) + Timedelta(Second(2)) + Timedelta('00:00:00.000123') + +to_timedelta +~~~~~~~~~~~~ + +.. warning:: + + Prior to 0.15.0 ``to_timedelta`` would return a ``Series`` for list-like/Series input, and a ``np.timedelta64`` for scalar input. + It will now return a ``TimedeltaIndex`` for list-like input, ``Series`` for Series input, and ``Timedelta`` for scalar input. + + The arguments to ``pd.to_timedelta`` are now ``(arg,unit='ns',box=True)``, previously were ``(arg,unit='ns',box=True)`` as these are more logical. + +Using the top-level ``pd.to_timedelta``, you can convert a scalar, array, list, or Series from a recognized timedelta format / value into a ``Timedelta`` type. +It will construct Series if the input is a Series, a scalar if the input is scalar-like, otherwise will output a ``TimedeltaIndex`` + +.. ipython:: python + + to_timedelta('1 days 06:05:01.00003') + to_timedelta('15.5us') + to_timedelta(['1 days 06:05:01.00003','15.5us','nan']) + to_timedelta(np.arange(5),unit='s') + to_timedelta(np.arange(5),unit='d') + +Operations +---------- + +You can operate on Series/DataFrames and construct ``timedelta64[ns]`` Series thru +subtraction operations on ``datetime64[ns]`` Series, or ``Timestamps``. + +.. ipython:: python + + s = Series(date_range('2012-1-1', periods=3, freq='D')) + td = Series([ Timedelta(days=i) for i in range(3) ]) + df = DataFrame(dict(A = s, B = td)) + df + df['C'] = df['A'] + df['B'] + df + df.dtypes + + s - s.max() + s - datetime(2011,1,1,3,5) + s + timedelta(minutes=5) + s + Minute(5) + s + Minute(5) + Milli(5) + +Operations with scalars from a ``timedelta64[ns]`` series + +.. ipython:: python + + y = s - s[0] + y + +Series of timedeltas with ``NaT`` values are supported + +.. ipython:: python + + y = s - s.shift() + y + +Elements can be set to ``NaT`` using ``np.nan`` analogously to datetimes + +.. ipython:: python + + y[1] = np.nan + y + +Operands can also appear in a reversed order (a singular object operated with a Series) + +.. ipython:: python + + s.max() - s + datetime(2011,1,1,3,5) - s + timedelta(minutes=5) + s + +``min, max`` and the corresponding ``idxmin, idxmax`` operations are supported on frames + +.. ipython:: python + + A = s - Timestamp('20120101') - Timedelta('00:05:05') + B = s - Series(date_range('2012-1-2', periods=3, freq='D')) + + df = DataFrame(dict(A=A, B=B)) + df + + df.min() + df.min(axis=1) + + df.idxmin() + df.idxmax() + +``min, max, idxmin, idxmax`` operations are supported on Series / DataFrames. A single result will be a ``Timedelta``. + +.. ipython:: python + + df.min().max() + df.min(axis=1).min() + + df.min().idxmax() + df.min(axis=1).idxmin() + +You can fillna on timedeltas. Integers will be interpreted as seconds. You can +pass a timedelta to get a particular value. + +.. ipython:: python + + y.fillna(0) + y.fillna(10) + y.fillna(Timedelta('-1 days, 00:00:05')) + +You can also negate, multiply and use ``abs`` on ``Timedeltas`` + +.. ipython:: python + + td1 = Timedelta('-1 days 2 hours 3 seconds') + -1 * td1 + - td1 + abs(td1) + +.. _timedeltas.timedeltas_reductions: + +Reductions +---------- + +Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` objects. + +.. ipython:: python + + y2 = y.fillna(timedelta(days=-1,seconds=5)) + y2 + y2.mean() + y2.quantile(.1) + +.. _timedeltas.timedeltas_convert: + +Frequency Conversion +-------------------- + +.. versionadded:: 0.13 + +Timedelta Series, ``TimedeltaIndex``, and ``Timedelta`` scalars can be converted to other 'frequencies' by dividing by another timedelta, +or by astyping to a specific timedelta type. These operations yield Series and propogate ``NaT`` -> ``nan``. +Note that division by the numpy scalar is true division, while astyping is equivalent of floor division. + +.. ipython:: python + + td = Series(date_range('20130101',periods=4)) - \ + Series(date_range('20121201',periods=4)) + td[2] += timedelta(minutes=5,seconds=3) + td[3] = np.nan + td + + # to days + td / np.timedelta64(1,'D') + td.astype('timedelta64[D]') + + # to seconds + td / np.timedelta64(1,'s') + td.astype('timedelta64[s]') + + # to months (these are constant months) + td / np.timedelta64(1,'M') + +Dividing or multiplying a ``timedelta64[ns]`` Series by an integer or integer Series +yields another ``timedelta64[ns]`` dtypes Series. + +.. ipython:: python + + td * -1 + td * Series([1,2,3,4]) + +Attributes +---------- + +You can access various components of the ``Timedelta`` or ``TimedeltaIndex`` directly using the attributes ``days,hours,minutes,seconds,milliseconds,microseconds,nanoseconds``. +These operations can be directly accessed via the ``.dt`` property of the ``Series`` as well. These return an integer representing that interval (which is signed according to whether the ``Timedelta`` is signed). + +For a ``Series`` + +.. ipython:: python + + td.dt.days + td.dt.seconds + +You can access the component field for a scalar ``Timedelta`` directly. + +.. ipython:: python + + tds = Timedelta('31 days 5 min 3 sec') + tds.days + tds.seconds + (-tds).seconds + +You can use the ``.components`` property to access a reduced form of the timedelta. This returns a ``DataFrame`` indexed +similarly to the ``Series`` + +.. ipython:: python + + td.dt.components + +.. _timedeltas.attribues_warn: + +.. warning:: + + ``Timedelta`` scalars (and ``TimedeltaIndex``) component fields are *not the same* as the component fields on a ``datetime.timedelta`` object. For example, ``.seconds`` on a ``datetime.timedelta`` object returns the total number of seconds combined between ``hours``, ``minutes`` and ``seconds``. In contrast, the pandas ``Timedelta`` breaks out hours, minutes, microseconds and nanoseconds separately. + + .. ipython:: python + + # Timedelta accessor + tds = Timedelta('31 days 5 min 3 sec') + tds.minutes + tds.seconds + + # datetime.timedelta accessor + # this is 5 minutes * 60 + 3 seconds + tds.to_timedelta().seconds + + +TimedeltaIndex +-------------- + +.. versionadded:: 0.15.0 + +To generate an index with time delta, you can use either the TimedeltaIndex or +the ``timedelta_range`` constructor. + +Using ``TimedeltaIndex`` you can pass string-like, ``Timedelta``, ``timedelta``, +or ``np.timedelta64`` objects. Passing ``np.nan/pd.NaT/nat`` will represent missing values. + +.. ipython:: python + + TimedeltaIndex(['1 days','1 days, 00:00:05', + np.timedelta64(2,'D'),timedelta(days=2,seconds=2)]) + +Similarly to ``date_range``, you can construct regular ranges of a ``TimedeltaIndex``: + +.. ipython:: python + + timedelta_range(start='1 days',periods=5,freq='D') + timedelta_range(start='1 days',end='2 days',freq='30T') + +Using the TimedeltaIndex +~~~~~~~~~~~~~~~~~~~~~~~~ + +Similarly to other of the datetime-like indices, ``DatetimeIndex`` and ``PeriodIndex``, you can use +``TimedeltaIndex`` as the index of pandas objects. + +.. ipython:: python + + s = Series(np.arange(100), + index=timedelta_range('1 days',periods=100,freq='h')) + s + +Selections work similary, with coercion on string-likes and slices: + +.. ipython:: python + + s['1 day':'2 day'] + s['1 day 01:00:00'] + s[Timedelta('1 day 1h')] + +Furthermore you can use partial string selection and the range will be inferred: + +.. ipython:: python + + s['1 day':'1 day 5 hours'] + +Finally, the combination of ``TimedeltaIndex`` with ``DatetimeIndex`` allow certain combination operations that are NaT preserving: + +.. ipython:: python + + tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days']) + tdi.tolist() + dti = date_range('20130101',periods=3) + dti.tolist() + (dti + tdi).tolist() + (dti - tdi).tolist() + +Similarly to frequency conversion on a ``Series`` above, you can convert these indices to yield another Index. + +.. ipython:: python + + tdi / np.timedelta64(1,'s') + tdi.astype('timedelta64[s]') + +Scalars type ops work as well + +.. ipython:: python + + tdi + Timestamp('20130101') + tdi + Timedelta('10 days') + (Timestamp('20130101') - tdi).tolist() + tdi / tdi[0] diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index a23d067cefa4f..963dcde0f1a1f 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1491,8 +1491,8 @@ TimeSeries, aligning the data on the UTC timestamps: result result.index -To remove timezone from tz-aware ``DatetimeIndex``, use ``tz_localize(None)`` or ``tz_convert(None)``. -``tz_localize(None)`` will remove timezone holding local time representations. +To remove timezone from tz-aware ``DatetimeIndex``, use ``tz_localize(None)`` or ``tz_convert(None)``. +``tz_localize(None)`` will remove timezone holding local time representations. ``tz_convert(None)`` will remove timezone after converting to UTC time. .. ipython:: python @@ -1511,7 +1511,7 @@ Ambiguous Times when Localizing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In some cases, localize cannot determine the DST and non-DST hours when there are -duplicates. This often happens when reading files or database records that simply +duplicates. This often happens when reading files or database records that simply duplicate the hours. Passing ``ambiguous='infer'`` (``infer_dst`` argument in prior releases) into ``tz_localize`` will attempt to determine the right offset. @@ -1526,186 +1526,23 @@ releases) into ``tz_localize`` will attempt to determine the right offset. rng_hourly_eastern.values In addition to 'infer', there are several other arguments supported. Passing -an array-like of bools or 0s/1s where True represents a DST hour and False a -non-DST hour, allows for distinguishing more than one DST -transition (e.g., if you have multiple records in a database each with their +an array-like of bools or 0s/1s where True represents a DST hour and False a +non-DST hour, allows for distinguishing more than one DST +transition (e.g., if you have multiple records in a database each with their own DST transition). Or passing 'NaT' will fill in transition times with not-a-time values. These methods are available in the ``DatetimeIndex`` constructor as well as ``tz_localize``. .. ipython:: python - + rng_hourly_dst = np.array([1, 1, 0, 0, 0]) rng_hourly.tz_localize('US/Eastern', ambiguous=rng_hourly_dst).values rng_hourly.tz_localize('US/Eastern', ambiguous='NaT').values + didx = DatetimeIndex(start='2014-08-01 09:00', freq='H', periods=10, tz='US/Eastern') + didx + didx.tz_localize(None) + didx.tz_convert(None) -.. _timeseries.timedeltas: - -Time Deltas ------------ - -Timedeltas are differences in times, expressed in difference units, e.g. days,hours,minutes,seconds. -They can be both positive and negative. :ref:`DateOffsets` that are absolute in nature -(``Day, Hour, Minute, Second, Milli, Micro, Nano``) can be used as ``timedeltas``. - -.. ipython:: python - - from datetime import datetime, timedelta - s = Series(date_range('2012-1-1', periods=3, freq='D')) - td = Series([ timedelta(days=i) for i in range(3) ]) - df = DataFrame(dict(A = s, B = td)) - df - df['C'] = df['A'] + df['B'] - df - df.dtypes - - s - s.max() - s - datetime(2011,1,1,3,5) - s + timedelta(minutes=5) - s + Minute(5) - s + Minute(5) + Milli(5) - -Getting scalar results from a ``timedelta64[ns]`` series - -.. ipython:: python - - y = s - s[0] - y - -Series of timedeltas with ``NaT`` values are supported - -.. ipython:: python - - y = s - s.shift() - y - -Elements can be set to ``NaT`` using ``np.nan`` analogously to datetimes - -.. ipython:: python - - y[1] = np.nan - y - -Operands can also appear in a reversed order (a singular object operated with a Series) - -.. ipython:: python - - s.max() - s - datetime(2011,1,1,3,5) - s - timedelta(minutes=5) + s - -Some timedelta numeric like operations are supported. - -.. ipython:: python - - td - timedelta(minutes=5, seconds=5, microseconds=5) - -``min, max`` and the corresponding ``idxmin, idxmax`` operations are supported on frames - -.. ipython:: python - - A = s - Timestamp('20120101') - timedelta(minutes=5, seconds=5) - B = s - Series(date_range('2012-1-2', periods=3, freq='D')) - - df = DataFrame(dict(A=A, B=B)) - df - - df.min() - df.min(axis=1) - - df.idxmin() - df.idxmax() - -``min, max`` operations are supported on series; these return a single element -``timedelta64[ns]`` Series (this avoids having to deal with numpy timedelta64 -issues). ``idxmin, idxmax`` are supported as well. - -.. ipython:: python - - df.min().max() - df.min(axis=1).min() - - df.min().idxmax() - df.min(axis=1).idxmin() - -You can fillna on timedeltas. Integers will be interpreted as seconds. You can -pass a timedelta to get a particular value. - -.. ipython:: python - - y.fillna(0) - y.fillna(10) - y.fillna(timedelta(days=-1,seconds=5)) - -.. _timeseries.timedeltas_reductions: - -Time Deltas & Reductions ------------------------- - -.. warning:: - - A numeric reduction operation for ``timedelta64[ns]`` can return a single-element ``Series`` of - dtype ``timedelta64[ns]``. - -You can do numeric reduction operations on timedeltas. - -.. ipython:: python - - y2 = y.fillna(timedelta(days=-1,seconds=5)) - y2 - y2.mean() - y2.quantile(.1) - -.. _timeseries.timedeltas_convert: - -Time Deltas & Conversions -------------------------- - -.. versionadded:: 0.13 - -**string/integer conversion** - -Using the top-level ``to_timedelta``, you can convert a scalar or array from the standard -timedelta format (produced by ``to_csv``) into a timedelta type (``np.timedelta64`` in ``nanoseconds``). -It can also construct Series. - -.. warning:: - - This requires ``numpy >= 1.7`` - -.. ipython:: python - - to_timedelta('1 days 06:05:01.00003') - to_timedelta('15.5us') - to_timedelta(['1 days 06:05:01.00003','15.5us','nan']) - to_timedelta(np.arange(5),unit='s') - to_timedelta(np.arange(5),unit='d') - -**frequency conversion** - -Timedeltas can be converted to other 'frequencies' by dividing by another timedelta, -or by astyping to a specific timedelta type. These operations yield ``float64`` dtyped Series. - -.. ipython:: python - - td = Series(date_range('20130101',periods=4))-Series(date_range('20121201',periods=4)) - td[2] += np.timedelta64(timedelta(minutes=5,seconds=3)) - td[3] = np.nan - td - - # to days - td / np.timedelta64(1,'D') - td.astype('timedelta64[D]') - - # to seconds - td / np.timedelta64(1,'s') - td.astype('timedelta64[s]') - -Dividing or multiplying a ``timedelta64[ns]`` Series by an integer or integer Series -yields another ``timedelta64[ns]`` dtypes Series. - -.. ipython:: python - - td * -1 - td * Series([1,2,3,4]) + # tz_convert(None) is identical with tz_convert('UTC').tz_localize(None) + didx.tz_convert('UCT').tz_localize(None) diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index ac0a14f45b69e..78239eef1b98f 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -460,7 +460,7 @@ Enhancements get_dummies([1, 2, np.nan], dummy_na=True) -- ``timedelta64[ns]`` operations. See :ref:`the docs`. +- ``timedelta64[ns]`` operations. See :ref:`the docs`. .. warning:: @@ -479,7 +479,7 @@ Enhancements A Series of dtype ``timedelta64[ns]`` can now be divided by another ``timedelta64[ns]`` object, or astyped to yield a ``float64`` dtyped Series. This - is frequency conversion. See :ref:`the docs` for the docs. + is frequency conversion. See :ref:`the docs` for the docs. .. ipython:: python diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 49c431d8071e8..5a4f3b7da4843 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -16,11 +16,11 @@ users upgrade to this version. - Highlights include: - The ``Categorical`` type was integrated as a first-class pandas type, see :ref:`here ` + - New scalar type ``Timedelta``, and a new index type ``TimedeltaIndex``, see :ref:`here ` - Internal refactoring of the ``Index`` class to no longer sub-class ``ndarray``, see :ref:`Internal Refactoring ` - New datetimelike properties accessor ``.dt`` for Series, see :ref:`Datetimelike Properties ` - dropping support for ``PyTables`` less than version 3.0.0, and ``numexpr`` less than version 2.1 (:issue:`7990`) - API change in using Indexes in set operations, see :ref:`here ` - - API change in using Indexs set operations, see :ref:`here ` - Split indexing documentation into :ref:`Indexing and Selecing Data ` and :ref:`MultiIndex / Advanced Indexing ` - :ref:`Other Enhancements ` @@ -57,7 +57,7 @@ API changes .. ipython:: python - idx = pandas.MultiIndex.from_product([['a'], range(3), list("pqr")], names=['foo', 'bar', 'baz']) + idx = MultiIndex.from_product([['a'], range(3), list("pqr")], names=['foo', 'bar', 'baz']) idx.set_names('qux', level=0) idx.set_names(['qux','baz'], level=[0,1]) idx.set_levels(['a','b','c'], level='bar') @@ -384,6 +384,7 @@ This will return a Series, indexed like the existing Series. See the :ref:`docs s.dt.hour s.dt.second s.dt.day + s.dt.freq This enables nice expressions like this: @@ -391,6 +392,16 @@ This enables nice expressions like this: s[s.dt.day==2] +You can easily produce tz aware transformations: + +.. ipython:: python + + stz = s.dt.tz_localize('US/Eastern') + stz + stz.dt.tz + +The ``.dt`` accessor works for period and timedelta dtypes. + .. ipython:: python # period @@ -399,6 +410,15 @@ This enables nice expressions like this: s.dt.year s.dt.day +.. ipython:: python + + # timedelta + s = Series(timedelta_range('1 day 00:00:05',periods=4,freq='s')) + s + s.dt.days + s.dt.seconds + s.dt.components + .. _whatsnew_0150.refactoring: Internal Refactoring @@ -455,6 +475,108 @@ For full docs, see the :ref:`Categorical introduction ` and the only. If you want to manipulate codes, please use one of the :ref:`API methods on Categoricals `. +.. _whatsnew_0150.timedeltaindex: + +TimedeltaIndex/Scalar +~~~~~~~~~~~~~~~~~~~~~ + +We introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner, +but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes. +This type is very similar to how ``Timestamp`` works for ``datetimes``. It is a nice-API box for the type. See the :ref:`docs `. +(:issue:`3009`, :issue:`4533`, :issue:`8209`, :issue:`8187`, :issue:`8190`, :issue:`7869`) + +.. warning:: + + ``Timedelta`` scalars (and ``TimedeltaIndex``) component fields are *not the same* as the component fields on a ``datetime.timedelta`` object. For example, ``.seconds`` on a ``datetime.timedelta`` object returns the total number of seconds combined between ``hours``, ``minutes`` and ``seconds``. In contrast, the pandas ``Timedelta`` breaks out hours, minutes, microseconds and nanoseconds separately. + + .. ipython:: python + + # Timedelta accessor + tds = Timedelta('31 days 5 min 3 sec') + tds.minutes + tds.seconds + + # datetime.timedelta accessor + # this is 5 minutes * 60 + 3 seconds + tds.to_timedelta().seconds + +.. warning:: + + Prior to 0.15.0 ``to_timedelta`` would return a ``Series`` for list-like/Series input, and a ``np.timedelta64`` for scalar input. + It will now return a ``TimedeltaIndex`` for list-like input, ``Series`` for Series input, and ``Timedelta`` for scalar input. + + The arguments to ``pd.to_timedelta`` are now ``(arg,unit='ns',box=True)``, previously were ``(arg,unit='ns',box=True)`` as these are more logical. + +Consruct a scalar + +.. ipython:: python + + Timedelta('1 days 06:05:01.00003') + Timedelta('15.5us') + Timedelta('1 hour 15.5us') + + # a NaT + Timedelta('nan') + +Access fields for a Timedelta + +.. ipython:: python + + td = Timedelta('1 hour 3m 15.5us') + td.hours + td.minutes + td.microseconds + td.nanoseconds + +Construct a ``TimedeltaIndex`` + +.. ipython:: python + :suppress: + + import datetime + from datetime import timedelta + +.. ipython:: python + + TimedeltaIndex(['1 days','1 days, 00:00:05', + np.timedelta64(2,'D'),timedelta(days=2,seconds=2)]) + +Constructing a ``TimedeltaIndex`` with a regular range + +.. ipython:: python + + timedelta_range('1 days',periods=5,freq='D') + timedelta_range(start='1 days',end='2 days',freq='30T') + +You can now use a ``TimedeltaIndex`` as the index of a pandas object + +.. ipython:: python + + s = Series(np.arange(5), + index=timedelta_range('1 days',periods=5,freq='s')) + s + +You can select with partial string selections + +.. ipython:: python + + s['1 day 00:00:02'] + s['1 day':'1 day 00:00:02'] + +Finally, the combination of ``TimedeltaIndex`` with ``DatetimeIndex`` allow certain combination operations that are NaT preserving: + +.. ipython:: python + + tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days']) + tdi.tolist() + dti = date_range('20130101',periods=3) + dti.tolist() + + (dti + tdi).tolist() + (dti - tdi).tolist() + +- iteration of a ``Series`` e.g. ``list(Series(...))`` of ``timedelta64[ns]`` would prior to v0.15.0 return ``np.timedelta64`` for each element. These will now be wrapped in ``Timedelta``. + .. _whatsnew_0150.prior_deprecations: Prior Version Deprecations/Changes diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index 9a1e61ad30386..25d6a7f293dac 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -180,7 +180,7 @@ def stringify(value): v = time.mktime(v.timetuple()) return TermValue(v, pd.Timestamp(v), kind) elif kind == u('timedelta64') or kind == u('timedelta'): - v = _coerce_scalar_to_timedelta_type(v, unit='s').item() + v = _coerce_scalar_to_timedelta_type(v, unit='s').value return TermValue(int(v), v, kind) elif kind == u('integer'): v = int(float(v)) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ee9854f8dc5f9..8d1b1588552bf 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -124,7 +124,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1): from pandas.core.index import Index from pandas.core.series import Series vals = np.asarray(values) + is_datetime = com.is_datetime64_dtype(vals) + is_timedelta = com.is_timedelta64_dtype(vals) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(len(vals)) @@ -161,6 +163,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1): if is_datetime: uniques = uniques.astype('M8[ns]') + elif is_timedelta: + uniques = uniques.astype('m8[ns]') if isinstance(values, Index): uniques = values._simple_new(uniques, None, freq=getattr(values, 'freq', None), tz=getattr(values, 'tz', None)) @@ -401,7 +405,8 @@ def _get_data_algo(values, func_map): if com.is_float_dtype(values): f = func_map['float64'] values = com._ensure_float64(values) - elif com.is_datetime64_dtype(values): + + elif com.needs_i8_conversion(values): # if we have NaT, punt to object dtype mask = com.isnull(values) diff --git a/pandas/core/base.py b/pandas/core/base.py index 4e8228f3d8631..36cf3d9c7407c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -161,7 +161,9 @@ def f(self, *args, **kwargs): else: f = _create_delegator_method(name) - setattr(cls,name,f) + # don't overwrite existing methods/properties + if not hasattr(cls, name): + setattr(cls,name,f) class FrozenList(PandasObject, list): @@ -539,218 +541,3 @@ def duplicated(self, take_last=False): def _update_inplace(self, result): raise NotImplementedError - -class DatetimeIndexOpsMixin(object): - """ common ops mixin to support a unified inteface datetimelike Index """ - - def __iter__(self): - return (self._box_func(v) for v in self.asi8) - - @property - def _box_func(self): - """ - box function to get object from internal representation - """ - raise NotImplementedError - - def _box_values(self, values): - """ - apply box func to passed values - """ - return lib.map_infer(values, self._box_func) - - @cache_readonly - def hasnans(self): - """ return if I have any nans; enables various perf speedups """ - return (self.asi8 == tslib.iNaT).any() - - @property - def asobject(self): - from pandas.core.index import Index - return Index(self._box_values(self.asi8), name=self.name, dtype=object) - - def tolist(self): - """ - return a list of the underlying data - """ - return list(self.asobject) - - def min(self, axis=None): - """ - return the minimum value of the Index - - See also - -------- - numpy.ndarray.min - """ - try: - i8 = self.asi8 - - # quick check - if len(i8) and self.is_monotonic: - if i8[0] != tslib.iNaT: - return self._box_func(i8[0]) - - if self.hasnans: - mask = i8 == tslib.iNaT - min_stamp = self[~mask].asi8.min() - else: - min_stamp = i8.min() - return self._box_func(min_stamp) - except ValueError: - return self._na_value - - def argmin(self, axis=None): - """ - return a ndarray of the minimum argument indexer - - See also - -------- - numpy.ndarray.argmin - """ - - i8 = self.asi8 - if self.hasnans: - mask = i8 == tslib.iNaT - if mask.all(): - return -1 - i8 = i8.copy() - i8[mask] = np.iinfo('int64').max - return i8.argmin() - - def max(self, axis=None): - """ - return the maximum value of the Index - - See also - -------- - numpy.ndarray.max - """ - try: - i8 = self.asi8 - - # quick check - if len(i8) and self.is_monotonic: - if i8[-1] != tslib.iNaT: - return self._box_func(i8[-1]) - - if self.hasnans: - mask = i8 == tslib.iNaT - max_stamp = self[~mask].asi8.max() - else: - max_stamp = i8.max() - return self._box_func(max_stamp) - except ValueError: - return self._na_value - - def argmax(self, axis=None): - """ - return a ndarray of the maximum argument indexer - - See also - -------- - numpy.ndarray.argmax - """ - - i8 = self.asi8 - if self.hasnans: - mask = i8 == tslib.iNaT - if mask.all(): - return -1 - i8 = i8.copy() - i8[mask] = 0 - return i8.argmax() - - @property - def _formatter_func(self): - """ - Format function to convert value to representation - """ - return str - - def _format_footer(self): - tagline = 'Length: %d, Freq: %s, Timezone: %s' - return tagline % (len(self), self.freqstr, self.tz) - - def __unicode__(self): - formatter = self._formatter_func - summary = str(self.__class__) + '\n' - - n = len(self) - if n == 0: - pass - elif n == 1: - first = formatter(self[0]) - summary += '[%s]\n' % first - elif n == 2: - first = formatter(self[0]) - last = formatter(self[-1]) - summary += '[%s, %s]\n' % (first, last) - else: - first = formatter(self[0]) - last = formatter(self[-1]) - summary += '[%s, ..., %s]\n' % (first, last) - - summary += self._format_footer() - return summary - - @cache_readonly - def _resolution(self): - from pandas.tseries.frequencies import Resolution - return Resolution.get_reso_from_freq(self.freqstr) - - @cache_readonly - def resolution(self): - """ - Returns day, hour, minute, second, millisecond or microsecond - """ - from pandas.tseries.frequencies import get_reso_string - return get_reso_string(self._resolution) - - def __add__(self, other): - from pandas.core.index import Index - from pandas.tseries.offsets import DateOffset - if isinstance(other, Index): - warnings.warn("using '+' to provide set union with Indexes is deprecated, " - "use .union()",FutureWarning) - return self.union(other) - if isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)): - return self._add_delta(other) - elif com.is_integer(other): - return self.shift(other) - else: # pragma: no cover - return NotImplemented - - def __sub__(self, other): - from pandas.core.index import Index - from pandas.tseries.offsets import DateOffset - if isinstance(other, Index): - warnings.warn("using '-' to provide set differences with Indexes is deprecated, " - "use .difference()",FutureWarning) - return self.difference(other) - if isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)): - return self._add_delta(-other) - elif com.is_integer(other): - return self.shift(-other) - else: # pragma: no cover - return NotImplemented - - __iadd__ = __add__ - __isub__ = __sub__ - - def _add_delta(self, other): - return NotImplemented - - def unique(self): - """ - Index.unique with handling for DatetimeIndex/PeriodIndex metadata - - Returns - ------- - result : DatetimeIndex or PeriodIndex - """ - from pandas.core.index import Int64Index - result = Int64Index.unique(self) - return self._simple_new(result, name=self.name, freq=self.freq, - tz=getattr(self, 'tz', None)) - diff --git a/pandas/core/common.py b/pandas/core/common.py index ff9da5d401850..3695bc1898091 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -64,6 +64,13 @@ def _check(cls, inst): return meta(name, tuple(), dct) +ABCIndex = create_pandas_abc_type("ABCIndex", "_typ", ("index",)) +ABCInt64Index = create_pandas_abc_type("ABCInt64Index", "_typ", ("int64index",)) +ABCFloat64Index = create_pandas_abc_type("ABCFloat64Index", "_typ", ("float64index",)) +ABCMultiIndex = create_pandas_abc_type("ABCMultiIndex", "_typ", ("multiindex",)) +ABCDatetimeIndex = create_pandas_abc_type("ABCDatetimeIndex", "_typ", ("datetimeindex",)) +ABCTimedeltaIndex = create_pandas_abc_type("ABCTimedeltaIndex", "_typ", ("timedeltaindex",)) +ABCPeriodIndex = create_pandas_abc_type("ABCPeriodIndex", "_typ", ("periodindex",)) ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",)) ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",)) ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel",)) @@ -879,7 +886,6 @@ def func(arr, indexer, out, fill_value=np.nan): func(arr, indexer, out=out, fill_value=fill_value) return out - _diff_special = { 'float64': algos.diff_2d_float64, 'float32': algos.diff_2d_float32, @@ -889,24 +895,25 @@ def func(arr, indexer, out, fill_value=np.nan): 'int8': algos.diff_2d_int8, } - def diff(arr, n, axis=0): """ difference of n between self, analagoust to s-s.shift(n) """ n = int(n) - dtype = arr.dtype na = np.nan - - if is_timedelta64_dtype(arr) or is_datetime64_dtype(arr): - dtype = 'timedelta64[ns]' + dtype = arr.dtype + is_timedelta = False + if needs_i8_conversion(arr): + dtype = np.float64 arr = arr.view('i8') na = tslib.iNaT + is_timedelta = True elif issubclass(dtype.type, np.integer): dtype = np.float64 elif issubclass(dtype.type, np.bool_): dtype = np.object_ + dtype = np.dtype(dtype) out_arr = np.empty(arr.shape, dtype=dtype) na_indexer = [slice(None)] * arr.ndim @@ -927,7 +934,7 @@ def diff(arr, n, axis=0): # need to make sure that we account for na for datelike/timedelta # we don't actually want to subtract these i8 numbers - if dtype == 'timedelta64[ns]': + if is_timedelta: res = arr[res_indexer] lag = arr[lag_indexer] @@ -944,6 +951,9 @@ def diff(arr, n, axis=0): else: out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer] + if is_timedelta: + out_arr = lib.map_infer(out_arr.ravel(),lib.Timedelta).reshape(out_arr.shape) + return out_arr @@ -1780,7 +1790,7 @@ def _maybe_box_datetimelike(value): if isinstance(value, np.datetime64): value = tslib.Timestamp(value) elif isinstance(value, np.timedelta64): - pass + value = tslib.Timedelta(value) return value @@ -2335,6 +2345,14 @@ def is_period_arraylike(arr): return arr.dtype == object and lib.infer_dtype(arr) == 'period' return getattr(arr, 'inferred_type', None) == 'period' +def is_datetime_arraylike(arr): + """ return if we are datetime arraylike / DatetimeIndex """ + if isinstance(arr, pd.DatetimeIndex): + return True + elif isinstance(arr, (np.ndarray, ABCSeries)): + return arr.dtype == object and lib.infer_dtype(arr) == 'datetime' + return getattr(arr, 'inferred_type', None) == 'datetime' + def _coerce_to_dtype(dtype): """ coerce a string / np.dtype to a dtype """ if is_categorical_dtype(dtype): @@ -2406,6 +2424,13 @@ def _is_datetime_or_timedelta_dtype(arr_or_dtype): needs_i8_conversion = _is_datetime_or_timedelta_dtype +def i8_boxer(arr_or_dtype): + """ return the scalar boxer for the dtype """ + if is_datetime64_dtype(arr_or_dtype): + return lib.Timestamp + elif is_timedelta64_dtype(arr_or_dtype): + return lambda x: lib.Timedelta(x,unit='ns') + raise ValueError("cannot find a scalar boxer for {0}".format(arr_or_dtype)) def is_numeric_dtype(arr_or_dtype): tipo = _get_dtype_type(arr_or_dtype) @@ -2523,7 +2548,7 @@ def _astype_nansafe(arr, dtype, copy=True): if dtype == np.int64: return arr.view(dtype) elif dtype == object: - return arr.astype(object) + return tslib.ints_to_pytimedelta(arr.view(np.int64)) # in py3, timedelta64[ns] are int64 elif ((compat.PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or @@ -2745,27 +2770,38 @@ def _concat_compat(to_concat, axis=0): # marginal given that it would still require shape & dtype calculation and # np.concatenate which has them both implemented is compiled. if nonempty: + is_datetime64 = [x.dtype == _NS_DTYPE for x in nonempty] + is_timedelta64 = [x.dtype == _TD_DTYPE for x in nonempty] + if all(is_datetime64): - # work around NumPy 1.6 bug new_values = np.concatenate([x.view(np.int64) for x in nonempty], axis=axis) return new_values.view(_NS_DTYPE) - elif any(is_datetime64): + elif all(is_timedelta64): + new_values = np.concatenate([x.view(np.int64) for x in nonempty], + axis=axis) + return new_values.view(_TD_DTYPE) + elif any(is_datetime64) or any(is_timedelta64): to_concat = [_to_pydatetime(x) for x in nonempty] return np.concatenate(to_concat, axis=axis) def _to_pydatetime(x): + # coerce to an object dtyped + if x.dtype == _NS_DTYPE: shape = x.shape x = tslib.ints_to_pydatetime(x.view(np.int64).ravel()) x = x.reshape(shape) + elif x.dtype == _TD_DTYPE: + shape = x.shape + x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel()) + x = x.reshape(shape) return x - def _where_compat(mask, arr1, arr2): if arr1.dtype == _NS_DTYPE and arr2.dtype == _NS_DTYPE: new_vals = np.where(mask, arr1.view('i8'), arr2.view('i8')) diff --git a/pandas/core/format.py b/pandas/core/format.py index 2658410358000..190eb2dc3bbda 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -16,7 +16,7 @@ from pandas.core.config import get_option, set_option, reset_option import pandas.core.common as com import pandas.lib as lib -from pandas.tslib import iNaT +from pandas.tslib import iNaT, Timestamp, Timedelta import numpy as np @@ -1230,10 +1230,10 @@ def _helper_csv(self, writer, na_rep=None, cols=None, writer.writerow(encoded_cols) if date_format is None: - date_formatter = lambda x: lib.Timestamp(x)._repr_base + date_formatter = lambda x: Timestamp(x)._repr_base else: def strftime_with_nulls(x): - x = lib.Timestamp(x) + x = Timestamp(x) if notnull(x): return x.strftime(date_format) @@ -1273,7 +1273,7 @@ def strftime_with_nulls(x): if float_format is not None and com.is_float(val): val = float_format % val - elif isinstance(val, (np.datetime64, lib.Timestamp)): + elif isinstance(val, (np.datetime64, Timestamp)): val = date_formatter(val) row_fields.append(val) @@ -1922,8 +1922,8 @@ def _format_datetime64(x, tz=None, nat_rep='NaT'): if x is None or lib.checknull(x): return nat_rep - if tz is not None or not isinstance(x, lib.Timestamp): - x = lib.Timestamp(x, tz=tz) + if tz is not None or not isinstance(x, Timestamp): + x = Timestamp(x, tz=tz) return str(x) @@ -1932,8 +1932,8 @@ def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None): if x is None or lib.checknull(x): return nat_rep - if not isinstance(x, lib.Timestamp): - x = lib.Timestamp(x) + if not isinstance(x, Timestamp): + x = Timestamp(x) if date_format: return x.strftime(date_format) @@ -1944,7 +1944,7 @@ def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None): def _is_dates_only(values): for d in values: if isinstance(d, np.datetime64): - d = lib.Timestamp(d) + d = Timestamp(d) if d is not None and not lib.checknull(d) and d._has_time_component(): return False @@ -1972,15 +1972,24 @@ def _get_format_datetime64_from_values(values, class Timedelta64Formatter(GenericArrayFormatter): - def _format_strings(self): - formatter = self.formatter or _get_format_timedelta64(self.values) + def __init__(self, values, nat_rep='NaT', box=False, **kwargs): + super(Timedelta64Formatter, self).__init__(values, **kwargs) + self.nat_rep = nat_rep + self.box = box + def _format_strings(self): + formatter = self.formatter or _get_format_timedelta64(self.values, nat_rep=self.nat_rep, box=self.box) fmt_values = [formatter(x) for x in self.values] - return fmt_values -def _get_format_timedelta64(values): +def _get_format_timedelta64(values, nat_rep='NaT', box=False): + """ + return a formatter function for a range of timedeltas. These will all have the same format argument + + if box, then show the return in quotes + """ + values_int = values.astype(np.int64) consider_values = values_int != iNaT @@ -1989,19 +1998,25 @@ def _get_format_timedelta64(values): even_days = np.logical_and(consider_values, values_int % one_day_in_nanos != 0).sum() == 0 all_sub_day = np.logical_and(consider_values, np.abs(values_int) >= one_day_in_nanos).sum() == 0 - format_short = even_days or all_sub_day - format = "short" if format_short else "long" + if even_days: + format = 'even_day' + elif all_sub_day: + format = 'sub_day' + else: + format = 'long' - def impl(x): + def _formatter(x): if x is None or lib.checknull(x): - return 'NaT' - elif format_short and com.is_integer(x) and x.view('int64') == 0: - return "0 days" if even_days else "00:00:00" - else: - return lib.repr_timedelta64(x, format=format) + return nat_rep - return impl + if not isinstance(x, Timedelta): + x = Timedelta(x) + result = x._repr_base(format=format) + if box: + result = "'{0}'".format(result) + return result + return _formatter def _make_fixed_width(strings, justify='right', minimum=None): if len(strings) == 0 or justify == 'all': diff --git a/pandas/core/index.py b/pandas/core/index.py index 961e488026731..9140ef25019db 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -11,7 +11,7 @@ import pandas.lib as lib import pandas.algos as _algos import pandas.index as _index -from pandas.lib import Timestamp, is_datetime_array +from pandas.lib import Timestamp, Timedelta, is_datetime_array from pandas.core.base import PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin, _shared_docs from pandas.util.decorators import Appender, cache_readonly, deprecate from pandas.core.common import isnull, array_equivalent @@ -136,7 +136,12 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, else: return result elif issubclass(data.dtype.type, np.timedelta64): - return Int64Index(data, copy=copy, name=name) + from pandas.tseries.tdi import TimedeltaIndex + result = TimedeltaIndex(data, copy=copy, name=name, **kwargs) + if dtype is not None and _o_dtype == dtype: + return Index(result.to_pytimedelta(), dtype=_o_dtype) + else: + return result if dtype is not None: try: @@ -196,6 +201,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tslib.is_timestamp_array(subarr)): from pandas.tseries.index import DatetimeIndex return DatetimeIndex(subarr, copy=copy, name=name, **kwargs) + elif (inferred.startswith('timedelta') or + lib.is_timedelta_array(subarr)): + from pandas.tseries.tdi import TimedeltaIndex + return TimedeltaIndex(subarr, copy=copy, name=name, **kwargs) elif inferred == 'period': return PeriodIndex(subarr, name=name, **kwargs) @@ -398,27 +407,25 @@ def __unicode__(self): quote_strings=True) return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) - def to_series(self, keep_tz=False): + def to_series(self, **kwargs): """ Create a Series with both index and values equal to the index keys useful with map for returning an indexer based on an index - Parameters - ---------- - keep_tz : optional, defaults False. - applies only to a DatetimeIndex - Returns ------- Series : dtype will be based on the type of the Index values. """ - import pandas as pd - values = self._to_embed(keep_tz) - return pd.Series(values, index=self, name=self.name) + from pandas import Series + return Series(self._to_embed(), index=self, name=self.name) def _to_embed(self, keep_tz=False): - """ return an array repr of this object, potentially casting to object """ + """ + return an array repr of this object, potentially casting to object + + This is for internal compat + """ return self.values def astype(self, dtype): @@ -931,8 +938,8 @@ def append(self, other): @staticmethod def _ensure_compat_concat(indexes): - from pandas.tseries.api import DatetimeIndex, PeriodIndex - klasses = DatetimeIndex, PeriodIndex + from pandas.tseries.api import DatetimeIndex, PeriodIndex, TimedeltaIndex + klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex is_ts = [isinstance(idx, klasses) for idx in indexes] @@ -2043,6 +2050,13 @@ def drop_duplicates(self, take_last=False): def duplicated(self, take_last=False): return super(Index, self).duplicated(take_last=take_last) + + def _evaluate_with_timedelta_like(self, other, op, opstr): + raise TypeError("can only perform ops with timedelta like values") + + def _evaluate_with_datetime_like(self, other, op, opstr): + raise TypeError("can only perform ops with datetime like values") + @classmethod def _add_numeric_methods_disabled(cls): """ add in numeric methods to disable """ @@ -2054,11 +2068,15 @@ def _invalid_op(self, other): typ=type(self))) return _invalid_op - cls.__mul__ = cls.__rmul__ = _make_invalid_op('multiplication') - cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('floor division') - cls.__truediv__ = cls.__rtruediv__ = _make_invalid_op('true division') + cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__') + cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__') + cls.__truediv__ = cls.__rtruediv__ = _make_invalid_op('__truediv__') if not compat.PY3: - cls.__div__ = cls.__rdiv__ = _make_invalid_op('division') + cls.__div__ = cls.__rdiv__ = _make_invalid_op('__div__') + cls.__neg__ = _make_invalid_op('__neg__') + cls.__pos__ = _make_invalid_op('__pos__') + cls.__abs__ = _make_invalid_op('__abs__') + cls.__inv__ = _make_invalid_op('__inv__') @classmethod def _add_numeric_methods(cls): @@ -2067,6 +2085,7 @@ def _add_numeric_methods(cls): def _make_evaluate_binop(op, opstr): def _evaluate_numeric_binop(self, other): + import pandas.tseries.offsets as offsets # if we are an inheritor of numeric, but not actually numeric (e.g. DatetimeIndex/PeriodInde) if not self._is_numeric_dtype: @@ -2086,6 +2105,10 @@ def _evaluate_numeric_binop(self, other): other = _values_from_object(other) if other.dtype.kind not in ['f','i']: raise TypeError("cannot evaluate a numeric op with a non-numeric dtype") + elif isinstance(other, (offsets.DateOffset, np.timedelta64, Timedelta, datetime.timedelta)): + return self._evaluate_with_timedelta_like(other, op, opstr) + elif isinstance(other, (Timestamp, np.datetime64)): + return self._evaluate_with_datetime_like(other, op, opstr) else: if not (com.is_float(other) or com.is_integer(other)): raise TypeError("can only perform ops with scalar values") @@ -2093,12 +2116,29 @@ def _evaluate_numeric_binop(self, other): return _evaluate_numeric_binop + def _make_evaluate_unary(op, opstr): + + def _evaluate_numeric_unary(self): + + # if we are an inheritor of numeric, but not actually numeric (e.g. DatetimeIndex/PeriodInde) + if not self._is_numeric_dtype: + raise TypeError("cannot evaluate a numeric op {opstr} for type: {typ}".format(opstr=opstr, + typ=type(self))) + + return self._shallow_copy(op(self.values)) - cls.__mul__ = cls.__rmul__ = _make_evaluate_binop(operator.mul,'multiplication') - cls.__floordiv__ = cls.__rfloordiv__ = _make_evaluate_binop(operator.floordiv,'floor division') - cls.__truediv__ = cls.__rtruediv__ = _make_evaluate_binop(operator.truediv,'true division') + return _evaluate_numeric_unary + + cls.__mul__ = cls.__rmul__ = _make_evaluate_binop(operator.mul,'__mul__') + cls.__floordiv__ = cls.__rfloordiv__ = _make_evaluate_binop(operator.floordiv,'__floordiv__') + cls.__truediv__ = cls.__rtruediv__ = _make_evaluate_binop(operator.truediv,'__truediv__') if not compat.PY3: - cls.__div__ = cls.__rdiv__ = _make_evaluate_binop(operator.div,'division') + cls.__div__ = cls.__rdiv__ = _make_evaluate_binop(operator.div,'__div__') + cls.__neg__ = _make_evaluate_unary(lambda x: -x,'__neg__') + cls.__pos__ = _make_evaluate_unary(lambda x: x,'__pos__') + cls.__abs__ = _make_evaluate_unary(lambda x: np.abs(x),'__abs__') + cls.__inv__ = _make_evaluate_unary(lambda x: -x,'__inv__') + Index._add_numeric_methods_disabled() class NumericIndex(Index): @@ -4490,8 +4530,8 @@ def _get_consensus_names(indexes): def _maybe_box(idx): - from pandas.tseries.api import DatetimeIndex, PeriodIndex - klasses = DatetimeIndex, PeriodIndex + from pandas.tseries.api import DatetimeIndex, PeriodIndex, TimedeltaIndex + klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex if isinstance(idx, klasses): return idx.asobject diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6672546fb4bad..95c82cc0233a4 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -24,7 +24,7 @@ import pandas.computation.expressions as expressions from pandas.util.decorators import cache_readonly -from pandas.tslib import Timestamp +from pandas.tslib import Timestamp, Timedelta from pandas import compat from pandas.compat import range, map, zip, u from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type @@ -357,6 +357,9 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None, return self.copy() return self + if klass is None: + if dtype == np.object_: + klass = ObjectBlock try: # force the copy here if values is None: @@ -1232,6 +1235,8 @@ def _try_fill(self, value): """ if we are a NaT, return the actual fill value """ if isinstance(value, type(tslib.NaT)) or np.array(isnull(value)).all(): value = tslib.iNaT + elif isinstance(value, Timedelta): + value = value.value elif isinstance(value, np.timedelta64): pass elif com.is_integer(value): @@ -1257,8 +1262,8 @@ def masker(v): if _is_null_datelike_scalar(other): other = np.nan - elif isinstance(other, np.timedelta64): - other = _coerce_scalar_to_timedelta_type(other, unit='s').item() + elif isinstance(other, (np.timedelta64, Timedelta, timedelta)): + other = _coerce_scalar_to_timedelta_type(other, unit='s', box=False).item() if other == tslib.iNaT: other = np.nan else: @@ -1278,7 +1283,7 @@ def _try_coerce_result(self, result): result = result.astype('m8[ns]') result[mask] = tslib.iNaT elif isinstance(result, np.integer): - result = np.timedelta64(result) + result = lib.Timedelta(result) return result def should_store(self, value): @@ -1297,17 +1302,21 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs): na_rep = 'NaT' rvalues[mask] = na_rep imask = (~mask).ravel() - rvalues.flat[imask] = np.array([lib.repr_timedelta64(val) + + #### FIXME #### + # should use the core.format.Timedelta64Formatter here + # to figure what format to pass to the Timedelta + # e.g. to not show the decimals say + rvalues.flat[imask] = np.array([Timedelta(val)._repr_base(format='all') for val in values.ravel()[imask]], dtype=object) return rvalues.tolist() def get_values(self, dtype=None): - # return object dtypes as datetime.timedeltas + # return object dtypes as Timedelta if dtype == object: - return lib.map_infer(self.values.ravel(), - lambda x: timedelta(microseconds=x.item() / 1000) + return lib.map_infer(self.values.ravel(), lib.Timedelta ).reshape(self.values.shape) return self.values @@ -1816,16 +1825,6 @@ def to_native_types(self, slicer=None, na_rep=None, date_format=None, def should_store(self, value): return issubclass(value.dtype.type, np.datetime64) - def astype(self, dtype, copy=False, raise_on_error=True): - """ - handle convert to object as a special case - """ - klass = None - if np.dtype(dtype).type == np.object_: - klass = ObjectBlock - return self._astype(dtype, copy=copy, raise_on_error=raise_on_error, - klass=klass) - def set(self, locs, values, check=False): """ Modify Block in-place with new item value diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index aa6140383a27a..163ae0ee5a199 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -72,6 +72,10 @@ def f(values, axis=None, skipna=True, **kwds): try: if self.zero_value is not None and values.size == 0: if values.ndim == 1: + + # wrap the 0's if needed + if is_timedelta64_dtype(values): + return lib.Timedelta(0) return 0 else: result_shape = (values.shape[:axis] + @@ -222,17 +226,7 @@ def _wrap_results(result, dtype): result = result.view(dtype) elif is_timedelta64_dtype(dtype): if not isinstance(result, np.ndarray): - - # this is a scalar timedelta result! - # we have series convert then take the element (scalar) - # as series will do the right thing in py3 (and deal with numpy - # 1.6.2 bug in that it results dtype of timedelta64[us] - from pandas import Series - - # coerce float to results - if is_float(result): - result = int(result) - result = Series([result], dtype='timedelta64[ns]') + result = lib.Timedelta(result) else: result = result.view(dtype) @@ -314,7 +308,7 @@ def get_median(x): return ret # otherwise return a scalar value - return _wrap_results(get_median(values), dtype) if notempty else np.nan + return _wrap_results(get_median(values) if notempty else np.nan, dtype) def _get_counts_nanvar(mask, axis, ddof): @@ -709,6 +703,10 @@ def unique1d(values): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('M8[ns]') + elif np.issubdtype(values.dtype, np.timedelta64): + table = _hash.Int64HashTable(len(values)) + uniques = table.unique(_ensure_int64(values)) + uniques = uniques.view('m8[ns]') elif np.issubdtype(values.dtype, np.integer): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 7efcfb9898053..cad49aa68a250 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -575,7 +575,7 @@ def wrapper(self, other): values = self.get_values() other = _index.convert_scalar(values,_values_from_object(other)) - if issubclass(values.dtype.type, np.datetime64): + if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): values = values.view('i8') # scalars diff --git a/pandas/core/series.py b/pandas/core/series.py index 519e4c4457f04..4137b58885802 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -28,6 +28,7 @@ from pandas.core.internals import SingleBlockManager from pandas.core.categorical import Categorical from pandas.tseries.index import DatetimeIndex +from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.period import PeriodIndex, Period from pandas import compat from pandas.util.terminal import get_terminal_size @@ -248,9 +249,7 @@ def _set_axis(self, axis, labels, fastpath=False): is_all_dates = labels.is_all_dates if is_all_dates: - from pandas.tseries.index import DatetimeIndex - from pandas.tseries.period import PeriodIndex - if not isinstance(labels, (DatetimeIndex, PeriodIndex)): + if not isinstance(labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): labels = DatetimeIndex(labels) # need to set here becuase we changed the index @@ -1003,6 +1002,8 @@ def __iter__(self): return iter(self.values) elif np.issubdtype(self.dtype, np.datetime64): return (lib.Timestamp(x) for x in self.values) + elif np.issubdtype(self.dtype, np.timedelta64): + return (lib.Timedelta(x) for x in self.values) else: return iter(self.values) @@ -1242,9 +1243,7 @@ def quantile(self, q=0.5): 0.75 3.25 dtype: float64 """ - valid_values = self.dropna().values - if len(valid_values) == 0: - return pa.NA + valid = self.dropna() def multi(values, qs): if com.is_list_like(qs): @@ -1253,17 +1252,7 @@ def multi(values, qs): else: return _quantile(values, qs*100) - if com.is_datetime64_dtype(self): - values = _values_from_object(self).view('i8') - result = multi(values, q) - if com.is_list_like(q): - result = result.map(lib.Timestamp) - else: - result = lib.Timestamp(result) - else: - result = multi(valid_values, q) - - return result + return self._maybe_box(lambda values: multi(values, q), dropna=True) def ptp(self, axis=None, out=None): return _values_from_object(self).ptp(axis, out) @@ -2016,9 +2005,49 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None, delegate = self.values if isinstance(delegate, np.ndarray): return op(delegate, skipna=skipna, **kwds) + return delegate._reduce(op=op, axis=axis, skipna=skipna, numeric_only=numeric_only, filter_type=filter_type, name=name, **kwds) + def _maybe_box(self, func, dropna=False): + """ + evaluate a function with possible input/output conversion if we are i8 + + Parameters + ---------- + dropna : bool, default False + whether to drop values if necessary + + """ + if dropna: + values = self.dropna().values + else: + values = self.values + + if com.needs_i8_conversion(self): + boxer = com.i8_boxer(self) + + if len(values) == 0: + return boxer(iNaT) + + values = values.view('i8') + result = func(values) + + if com.is_list_like(result): + result = result.map(boxer) + else: + result = boxer(result) + + else: + + # let the function return nan if appropriate + if dropna: + if len(values) == 0: + return np.nan + result = func(values) + + return result + def _reindex_indexer(self, new_index, indexer, copy): if indexer is None: if copy: @@ -2446,6 +2475,11 @@ def _sanitize_index(data, index, copy=False): data = data._to_embed(keep_tz=True) if copy: data = data.copy() + elif isinstance(data, np.ndarray): + + # coerce datetimelike types + if data.dtype.kind in ['M','m']: + data = _sanitize_array(data, index, copy=copy) return data diff --git a/pandas/index.pyx b/pandas/index.pyx index 3dcdbf207fb3f..d6e358a96e904 100644 --- a/pandas/index.pyx +++ b/pandas/index.pyx @@ -1,7 +1,7 @@ from numpy cimport ndarray from numpy cimport (float64_t, int32_t, int64_t, uint8_t, - NPY_DATETIME) + NPY_DATETIME, NPY_TIMEDELTA) cimport cython cimport numpy as cnp @@ -16,7 +16,7 @@ import numpy as np cimport tslib from hashtable cimport * from pandas import algos, tslib, hashtable as _hash -from pandas.tslib import Timestamp +from pandas.tslib import Timestamp, Timedelta from datetime cimport (get_datetime64_value, _pydatetime_to_dts, pandas_datetimestruct) @@ -57,6 +57,8 @@ cdef inline is_definitely_invalid_key(object val): def get_value_at(ndarray arr, object loc): if arr.descr.type_num == NPY_DATETIME: return Timestamp(util.get_value_at(arr, loc)) + elif arr.descr.type_num == NPY_TIMEDELTA: + return Timedelta(util.get_value_at(arr, loc)) return util.get_value_at(arr, loc) def set_value_at(ndarray arr, object loc, object val): @@ -108,6 +110,8 @@ cdef class IndexEngine: else: if arr.descr.type_num == NPY_DATETIME: return Timestamp(util.get_value_at(arr, loc)) + elif arr.descr.type_num == NPY_TIMEDELTA: + return Timedelta(util.get_value_at(arr, loc)) return util.get_value_at(arr, loc) cpdef set_value(self, ndarray arr, object key, object value): @@ -498,6 +502,9 @@ cdef class ObjectEngine(IndexEngine): cdef class DatetimeEngine(Int64Engine): + cdef _get_box_dtype(self): + return 'M8[ns]' + def __contains__(self, object val): if self.over_size_threshold and self.is_monotonic: if not self.is_unique: @@ -559,26 +566,31 @@ cdef class DatetimeEngine(Int64Engine): def get_indexer(self, values): self._ensure_mapping_populated() - if values.dtype != 'M8[ns]': + if values.dtype != self._get_box_dtype(): return np.repeat(-1, len(values)).astype('i4') values = np.asarray(values).view('i8') return self.mapping.lookup(values) def get_pad_indexer(self, other, limit=None): - if other.dtype != 'M8[ns]': + if other.dtype != self._get_box_dtype(): return np.repeat(-1, len(other)).astype('i4') other = np.asarray(other).view('i8') return algos.pad_int64(self._get_index_values(), other, limit=limit) def get_backfill_indexer(self, other, limit=None): - if other.dtype != 'M8[ns]': + if other.dtype != self._get_box_dtype(): return np.repeat(-1, len(other)).astype('i4') other = np.asarray(other).view('i8') return algos.backfill_int64(self._get_index_values(), other, limit=limit) +cdef class TimedeltaEngine(DatetimeEngine): + + cdef _get_box_dtype(self): + return 'm8[ns]' + cpdef convert_scalar(ndarray arr, object value): if arr.descr.type_num == NPY_DATETIME: if isinstance(value,np.ndarray): @@ -589,6 +601,15 @@ cpdef convert_scalar(ndarray arr, object value): return iNaT else: return Timestamp(value).value + elif arr.descr.type_num == NPY_TIMEDELTA: + if isinstance(value,np.ndarray): + pass + elif isinstance(value, Timedelta): + return value.value + elif value is None or value != value: + return iNaT + else: + return Timedelta(value).value if issubclass(arr.dtype.type, (np.integer, np.bool_)): if util.is_float_object(value) and value != value: diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 53ddd5c42a1d7..4a4b9da619b5f 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -325,7 +325,7 @@ def _execute_sql(self): tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def _to_sql_save_index(self): - df = DataFrame.from_records([(1,2.1,'line1'), (2,1.5,'line2')], + df = DataFrame.from_records([(1,2.1,'line1'), (2,1.5,'line2')], columns=['A','B','C'], index=['A']) self.pandasSQL.to_sql(df, 'test_to_sql_saves_index') ix_cols = self._get_index_columns('test_to_sql_saves_index') @@ -523,6 +523,7 @@ def test_date_and_index(self): "IntDateCol loaded with incorrect type") def test_timedelta(self): + # see #6921 df = to_timedelta(Series(['00:00:01', '00:00:03'], name='foo')).to_frame() with tm.assert_produces_warning(UserWarning): @@ -1067,7 +1068,7 @@ def _get_index_columns(self, tbl_name): ixs = insp.get_indexes(tbl_name) ixs = [i['column_names'] for i in ixs] return ixs - + def test_to_sql_save_index(self): self._to_sql_save_index() diff --git a/pandas/lib.pyx b/pandas/lib.pyx index 07b1efcd834db..7a90072b2410e 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -46,7 +46,7 @@ from datetime cimport * from tslib cimport convert_to_tsobject, convert_to_timedelta64 import tslib -from tslib import NaT, Timestamp, repr_timedelta64 +from tslib import NaT, Timestamp, Timedelta cdef int64_t NPY_NAT = util.get_nat() @@ -235,7 +235,7 @@ cpdef checknull_old(object val): return util._checknull(val) def isscalar(object val): - return np.isscalar(val) or val is None or PyDateTime_Check(val) + return np.isscalar(val) or val is None or PyDateTime_Check(val) or PyDelta_Check(val) @cython.wraparound(False) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index dd7bc41c8d62c..f508b8915da1c 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -5,10 +5,11 @@ import pandas.compat as compat import pandas as pd from pandas.compat import u, StringIO -from pandas.core.base import FrozenList, FrozenNDArray, PandasDelegate, DatetimeIndexOpsMixin +from pandas.core.base import FrozenList, FrozenNDArray, PandasDelegate +from pandas.tseries.base import DatetimeIndexOpsMixin from pandas.util.testing import assertRaisesRegexp, assert_isinstance from pandas.tseries.common import is_datetimelike -from pandas import Series, Index, Int64Index, DatetimeIndex, PeriodIndex +from pandas import Series, Index, Int64Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta import pandas.tslib as tslib import nose @@ -519,25 +520,21 @@ def test_value_counts_inferred(self): td = klass(td) result = td.value_counts() - expected_s = Series([6], index=[86400000000000]) - self.assertEqual(result.index.dtype, 'int64') + expected_s = Series([6], index=[Timedelta('1day')]) tm.assert_series_equal(result, expected_s) - # get nanoseconds to compare - expected = np.array([86400000000000]) - self.assert_numpy_array_equal(td.unique(), expected) - self.assertEqual(td.nunique(), 1) + expected = TimedeltaIndex(['1 days']) + if isinstance(td, TimedeltaIndex): + self.assertTrue(td.unique().equals(expected)) + else: + self.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2) result2 = td2.value_counts() - self.assertEqual(result2.index.dtype, 'int64') tm.assert_series_equal(result2, expected_s) - self.assert_numpy_array_equal(td.unique(), expected) - self.assertEqual(td.nunique(), 1) - def test_factorize(self): for o in self.objs: exp_arr = np.array(range(len(o))) @@ -637,632 +634,6 @@ def test_duplicated_drop_duplicates(self): s.drop_duplicates(inplace=True) tm.assert_series_equal(s, original) - -class TestDatetimeIndexOps(Ops): - tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', - 'dateutil/Asia/Singapore', 'dateutil/US/Pacific'] - - def setUp(self): - super(TestDatetimeIndexOps, self).setUp() - mask = lambda x: isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex) or is_datetimelike(x) - self.is_valid_objs = [ o for o in self.objs if mask(o) ] - self.not_valid_objs = [ o for o in self.objs if not mask(o) ] - - def test_ops_properties(self): - self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter']) - self.check_ops_properties(['date','time','microsecond','nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start', - 'is_quarter_end', 'is_year_start', 'is_year_end'], lambda x: isinstance(x,DatetimeIndex)) - - def test_ops_properties_basic(self): - - # sanity check that the behavior didn't change - # GH7206 - for op in ['year','day','second','weekday']: - self.assertRaises(TypeError, lambda x: getattr(self.dt_series,op)) - - # attribute access should still work! - s = Series(dict(year=2000,month=1,day=10)) - self.assertEquals(s.year,2000) - self.assertEquals(s.month,1) - self.assertEquals(s.day,10) - self.assertRaises(AttributeError, lambda : s.weekday) - - def test_asobject_tolist(self): - idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx') - expected_list = [pd.Timestamp('2013-01-31'), pd.Timestamp('2013-02-28'), - pd.Timestamp('2013-03-31'), pd.Timestamp('2013-04-30')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx', tz='Asia/Tokyo') - expected_list = [pd.Timestamp('2013-01-31', tz='Asia/Tokyo'), - pd.Timestamp('2013-02-28', tz='Asia/Tokyo'), - pd.Timestamp('2013-03-31', tz='Asia/Tokyo'), - pd.Timestamp('2013-04-30', tz='Asia/Tokyo')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), - pd.NaT, datetime(2013, 1, 4)], name='idx') - expected_list = [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'), - pd.NaT, pd.Timestamp('2013-01-04')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - def test_minmax(self): - for tz in self.tz: - # monotonic - idx1 = pd.DatetimeIndex([pd.NaT, '2011-01-01', '2011-01-02', - '2011-01-03'], tz=tz) - self.assertTrue(idx1.is_monotonic) - - # non-monotonic - idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03', - '2011-01-02', pd.NaT], tz=tz) - self.assertFalse(idx2.is_monotonic) - - for idx in [idx1, idx2]: - self.assertEqual(idx.min(), pd.Timestamp('2011-01-01', tz=tz)) - self.assertEqual(idx.max(), pd.Timestamp('2011-01-03', tz=tz)) - - for op in ['min', 'max']: - # Return NaT - obj = DatetimeIndex([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = DatetimeIndex([pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - def test_representation(self): - idx1 = DatetimeIndex([], freq='D') - idx2 = DatetimeIndex(['2011-01-01'], freq='D') - idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], - freq='H', tz='Asia/Tokyo') - idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], - tz='US/Eastern') - - exp1 = """ -Length: 0, Freq: D, Timezone: None""" - exp2 = """ -[2011-01-01] -Length: 1, Freq: D, Timezone: None""" - exp3 = """ -[2011-01-01, 2011-01-02] -Length: 2, Freq: D, Timezone: None""" - exp4 = """ -[2011-01-01, ..., 2011-01-03] -Length: 3, Freq: D, Timezone: None""" - exp5 = """ -[2011-01-01 09:00:00+09:00, ..., 2011-01-01 11:00:00+09:00] -Length: 3, Freq: H, Timezone: Asia/Tokyo""" - exp6 = """ -[2011-01-01 09:00:00-05:00, ..., NaT] -Length: 3, Freq: None, Timezone: US/Eastern""" - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], - [exp1, exp2, exp3, exp4, exp5, exp6]): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(idx, func)() - self.assertEqual(result, expected) - - def test_resolution(self): - for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], - ['day', 'day', 'day', 'day', - 'hour', 'minute', 'second', 'millisecond', 'microsecond']): - for tz in [None, 'Asia/Tokyo', 'US/Eastern']: - idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) - self.assertEqual(idx.resolution, expected) - - def test_add_iadd(self): - for tz in self.tz: - # union - rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) - expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz) - - rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) - expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz) - - rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other3 = pd.DatetimeIndex([], tz=tz) - expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), - (rng3, other3, expected3)]: - result_union = rng.union(other) - tm.assert_index_equal(result_union, expected) - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h')] - - for delta in offsets: - rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - result = rng + delta - expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - # int - rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, tz=tz) - result = rng + 1 - expected = pd.date_range('2000-01-01 10:00', freq='H', periods=10, tz=tz) - tm.assert_index_equal(result, expected) - rng += 1 - tm.assert_index_equal(rng, expected) - - def test_sub_isub(self): - for tz in self.tz: - # diff - rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) - expected1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) - expected2 = pd.date_range('1/1/2000', freq='D', periods=3, tz=tz) - - rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other3 = pd.DatetimeIndex([], tz=tz) - expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), - (rng3, other3, expected3)]: - result_union = rng.difference(other) - tm.assert_index_equal(result_union, expected) - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h')] - - for delta in offsets: - rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - result = rng - delta - expected = pd.date_range('1999-12-31 22:00', '2000-01-31 22:00', tz=tz) - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - # int - rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, tz=tz) - result = rng - 1 - expected = pd.date_range('2000-01-01 08:00', freq='H', periods=10, tz=tz) - tm.assert_index_equal(result, expected) - rng -= 1 - tm.assert_index_equal(rng, expected) - - def test_value_counts_unique(self): - # GH 7735 - for tz in [None, 'UTC', 'Asia/Tokyo', 'US/Eastern']: - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) - - exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, tz=tz) - expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') - tm.assert_series_equal(idx.value_counts(), expected) - - expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, tz=tz) - tm.assert_index_equal(idx.unique(), expected) - - idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00', - '2013-01-01 08:00', '2013-01-01 08:00', pd.NaT], tz=tz) - - exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], tz=tz) - expected = Series([3, 2], index=exp_idx) - tm.assert_series_equal(idx.value_counts(), expected) - - exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', pd.NaT], tz=tz) - expected = Series([3, 2, 1], index=exp_idx) - tm.assert_series_equal(idx.value_counts(dropna=False), expected) - - tm.assert_index_equal(idx.unique(), exp_idx) - - -class TestPeriodIndexOps(Ops): - - def setUp(self): - super(TestPeriodIndexOps, self).setUp() - mask = lambda x: isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex) or is_datetimelike(x) - self.is_valid_objs = [ o for o in self.objs if mask(o) ] - self.not_valid_objs = [ o for o in self.objs if not mask(o) ] - - def test_ops_properties(self): - self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter']) - self.check_ops_properties(['qyear'], lambda x: isinstance(x,PeriodIndex)) - - def test_asobject_tolist(self): - idx = pd.period_range(start='2013-01-01', periods=4, freq='M', name='idx') - expected_list = [pd.Period('2013-01-31', freq='M'), pd.Period('2013-02-28', freq='M'), - pd.Period('2013-03-31', freq='M'), pd.Period('2013-04-30', freq='M')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT', '2013-01-04'], freq='D', name='idx') - expected_list = [pd.Period('2013-01-01', freq='D'), pd.Period('2013-01-02', freq='D'), - pd.Period('NaT', freq='D'), pd.Period('2013-01-04', freq='D')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - for i in [0, 1, 3]: - self.assertTrue(result[i], expected[i]) - self.assertTrue(result[2].ordinal, pd.tslib.iNaT) - self.assertTrue(result[2].freq, 'D') - self.assertEqual(result.name, expected.name) - - result_list = idx.tolist() - for i in [0, 1, 3]: - self.assertTrue(result_list[i], expected_list[i]) - self.assertTrue(result_list[2].ordinal, pd.tslib.iNaT) - self.assertTrue(result_list[2].freq, 'D') - - def test_minmax(self): - - # monotonic - idx1 = pd.PeriodIndex([pd.NaT, '2011-01-01', '2011-01-02', - '2011-01-03'], freq='D') - self.assertTrue(idx1.is_monotonic) - - # non-monotonic - idx2 = pd.PeriodIndex(['2011-01-01', pd.NaT, '2011-01-03', - '2011-01-02', pd.NaT], freq='D') - self.assertFalse(idx2.is_monotonic) - - for idx in [idx1, idx2]: - self.assertEqual(idx.min(), pd.Period('2011-01-01', freq='D')) - self.assertEqual(idx.max(), pd.Period('2011-01-03', freq='D')) - - for op in ['min', 'max']: - # Return NaT - obj = PeriodIndex([], freq='M') - result = getattr(obj, op)() - self.assertEqual(result.ordinal, tslib.iNaT) - self.assertEqual(result.freq, 'M') - - obj = PeriodIndex([pd.NaT], freq='M') - result = getattr(obj, op)() - self.assertEqual(result.ordinal, tslib.iNaT) - self.assertEqual(result.freq, 'M') - - obj = PeriodIndex([pd.NaT, pd.NaT, pd.NaT], freq='M') - result = getattr(obj, op)() - self.assertEqual(result.ordinal, tslib.iNaT) - self.assertEqual(result.freq, 'M') - - def test_representation(self): - # GH 7601 - idx1 = PeriodIndex([], freq='D') - idx2 = PeriodIndex(['2011-01-01'], freq='D') - idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') - idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], freq='H') - - idx7 = pd.period_range('2013Q1', periods=1, freq="Q") - idx8 = pd.period_range('2013Q1', periods=2, freq="Q") - idx9 = pd.period_range('2013Q1', periods=3, freq="Q") - - exp1 = """ -Length: 0, Freq: D""" - exp2 = """ -[2011-01-01] -Length: 1, Freq: D""" - exp3 = """ -[2011-01-01, 2011-01-02] -Length: 2, Freq: D""" - exp4 = """ -[2011-01-01, ..., 2011-01-03] -Length: 3, Freq: D""" - exp5 = """ -[2011, ..., 2013] -Length: 3, Freq: A-DEC""" - exp6 = """ -[2011-01-01 09:00, ..., NaT] -Length: 3, Freq: H""" - exp7 = """ -[2013Q1] -Length: 1, Freq: Q-DEC""" - exp8 = """ -[2013Q1, 2013Q2] -Length: 2, Freq: Q-DEC""" - exp9 = """ -[2013Q1, ..., 2013Q3] -Length: 3, Freq: Q-DEC""" - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9], - [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(idx, func)() - self.assertEqual(result, expected) - - def test_resolution(self): - for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], - ['day', 'day', 'day', 'day', - 'hour', 'minute', 'second', 'millisecond', 'microsecond']): - - idx = pd.period_range(start='2013-04-01', periods=30, freq=freq) - self.assertEqual(idx.resolution, expected) - - def test_add_iadd(self): - # union - rng1 = pd.period_range('1/1/2000', freq='D', periods=5) - other1 = pd.period_range('1/6/2000', freq='D', periods=5) - expected1 = pd.period_range('1/1/2000', freq='D', periods=10) - - rng2 = pd.period_range('1/1/2000', freq='D', periods=5) - other2 = pd.period_range('1/4/2000', freq='D', periods=5) - expected2 = pd.period_range('1/1/2000', freq='D', periods=8) - - rng3 = pd.period_range('1/1/2000', freq='D', periods=5) - other3 = pd.PeriodIndex([], freq='D') - expected3 = pd.period_range('1/1/2000', freq='D', periods=5) - - rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) - other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) - expected4 = pd.PeriodIndex(['2000-01-01 09:00', '2000-01-01 10:00', - '2000-01-01 11:00', '2000-01-01 12:00', - '2000-01-01 13:00', '2000-01-02 09:00', - '2000-01-02 10:00', '2000-01-02 11:00', - '2000-01-02 12:00', '2000-01-02 13:00'], - freq='H') - - rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', - '2000-01-01 09:05'], freq='T') - other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05' - '2000-01-01 09:08'], freq='T') - expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', - '2000-01-01 09:05', '2000-01-01 09:08'], - freq='T') - - rng6 = pd.period_range('2000-01-01', freq='M', periods=7) - other6 = pd.period_range('2000-04-01', freq='M', periods=7) - expected6 = pd.period_range('2000-01-01', freq='M', periods=10) - - rng7 = pd.period_range('2003-01-01', freq='A', periods=5) - other7 = pd.period_range('1998-01-01', freq='A', periods=8) - expected7 = pd.period_range('1998-01-01', freq='A', periods=10) - - for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), - (rng3, other3, expected3), (rng4, other4, expected4), - (rng5, other5, expected5), (rng6, other6, expected6), - (rng7, other7, expected7)]: - - result_union = rng.union(other) - tm.assert_index_equal(result_union, expected) - - # offset - # DateOffset - rng = pd.period_range('2014', '2024', freq='A') - result = rng + pd.offsets.YearEnd(5) - expected = pd.period_range('2019', '2029', freq='A') - tm.assert_index_equal(result, expected) - rng += pd.offsets.YearEnd(5) - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), - np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): - rng + o - - rng = pd.period_range('2014-01', '2016-12', freq='M') - result = rng + pd.offsets.MonthEnd(5) - expected = pd.period_range('2014-06', '2017-05', freq='M') - tm.assert_index_equal(result, expected) - rng += pd.offsets.MonthEnd(5) - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), - np.timedelta64(365, 'D'), timedelta(365)]: - rng = pd.period_range('2014-01', '2016-12', freq='M') - with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): - rng + o - - # Tick - offsets = [pd.offsets.Day(3), timedelta(days=3), np.timedelta64(3, 'D'), - pd.offsets.Hour(72), timedelta(minutes=60*24*3), np.timedelta64(72, 'h')] - for delta in offsets: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - result = rng + delta - expected = pd.period_range('2014-05-04', '2014-05-18', freq='D') - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), - np.timedelta64(4, 'h'), timedelta(hours=23)]: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): - rng + o - - offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), - pd.offsets.Minute(120), timedelta(minutes=120), np.timedelta64(120, 'm')] - for delta in offsets: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') - result = rng + delta - expected = pd.period_range('2014-01-01 12:00', '2014-01-05 12:00', freq='H') - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - for delta in [pd.offsets.YearBegin(2), timedelta(minutes=30), np.timedelta64(30, 's')]: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') - with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): - result = rng + delta - with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): - rng += delta - - # int - rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) - result = rng + 1 - expected = pd.period_range('2000-01-01 10:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng += 1 - tm.assert_index_equal(rng, expected) - - def test_sub_isub(self): - # diff - rng1 = pd.period_range('1/1/2000', freq='D', periods=5) - other1 = pd.period_range('1/6/2000', freq='D', periods=5) - expected1 = pd.period_range('1/1/2000', freq='D', periods=5) - - rng2 = pd.period_range('1/1/2000', freq='D', periods=5) - other2 = pd.period_range('1/4/2000', freq='D', periods=5) - expected2 = pd.period_range('1/1/2000', freq='D', periods=3) - - rng3 = pd.period_range('1/1/2000', freq='D', periods=5) - other3 = pd.PeriodIndex([], freq='D') - expected3 = pd.period_range('1/1/2000', freq='D', periods=5) - - rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) - other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) - expected4 = rng4 - - rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', - '2000-01-01 09:05'], freq='T') - other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05'], freq='T') - expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T') - - rng6 = pd.period_range('2000-01-01', freq='M', periods=7) - other6 = pd.period_range('2000-04-01', freq='M', periods=7) - expected6 = pd.period_range('2000-01-01', freq='M', periods=3) - - rng7 = pd.period_range('2003-01-01', freq='A', periods=5) - other7 = pd.period_range('1998-01-01', freq='A', periods=8) - expected7 = pd.period_range('2006-01-01', freq='A', periods=2) - - for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), - (rng3, other3, expected3), (rng4, other4, expected4), - (rng5, other5, expected5), (rng6, other6, expected6), - (rng7, other7, expected7),]: - result_union = rng.difference(other) - tm.assert_index_equal(result_union, expected) - - # offset - # DateOffset - rng = pd.period_range('2014', '2024', freq='A') - result = rng - pd.offsets.YearEnd(5) - expected = pd.period_range('2009', '2019', freq='A') - tm.assert_index_equal(result, expected) - rng -= pd.offsets.YearEnd(5) - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), - np.timedelta64(365, 'D'), timedelta(365)]: - rng = pd.period_range('2014', '2024', freq='A') - with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): - rng - o - - rng = pd.period_range('2014-01', '2016-12', freq='M') - result = rng - pd.offsets.MonthEnd(5) - expected = pd.period_range('2013-08', '2016-07', freq='M') - tm.assert_index_equal(result, expected) - rng -= pd.offsets.MonthEnd(5) - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), - np.timedelta64(365, 'D'), timedelta(365)]: - rng = pd.period_range('2014-01', '2016-12', freq='M') - with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): - rng - o - - # Tick - offsets = [pd.offsets.Day(3), timedelta(days=3), np.timedelta64(3, 'D'), - pd.offsets.Hour(72), timedelta(minutes=60*24*3), np.timedelta64(72, 'h')] - for delta in offsets: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - result = rng - delta - expected = pd.period_range('2014-04-28', '2014-05-12', freq='D') - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), - np.timedelta64(4, 'h'), timedelta(hours=23)]: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): - rng - o - - offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), - pd.offsets.Minute(120), timedelta(minutes=120), np.timedelta64(120, 'm')] - for delta in offsets: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') - result = rng - delta - expected = pd.period_range('2014-01-01 08:00', '2014-01-05 08:00', freq='H') - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - for delta in [pd.offsets.YearBegin(2), timedelta(minutes=30), np.timedelta64(30, 's')]: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') - with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): - result = rng + delta - with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): - rng += delta - - # int - rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) - result = rng - 1 - expected = pd.period_range('2000-01-01 08:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng -= 1 - tm.assert_index_equal(rng, expected) - - def test_value_counts_unique(self): - # GH 7735 - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = PeriodIndex(np.repeat(idx.values, range(1, len(idx) + 1)), freq='H') - - exp_idx = PeriodIndex(['2011-01-01 18:00', '2011-01-01 17:00', '2011-01-01 16:00', - '2011-01-01 15:00', '2011-01-01 14:00', '2011-01-01 13:00', - '2011-01-01 12:00', '2011-01-01 11:00', '2011-01-01 10:00', - '2011-01-01 09:00'], freq='H') - expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') - tm.assert_series_equal(idx.value_counts(), expected) - - expected = pd.period_range('2011-01-01 09:00', freq='H', periods=10) - tm.assert_index_equal(idx.unique(), expected) - - idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00', - '2013-01-01 08:00', '2013-01-01 08:00', pd.NaT], freq='H') - - exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00'], freq='H') - expected = Series([3, 2], index=exp_idx) - tm.assert_series_equal(idx.value_counts(), expected) - - exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00', pd.NaT], freq='H') - expected = Series([3, 2, 1], index=exp_idx) - tm.assert_series_equal(idx.value_counts(dropna=False), expected) - - tm.assert_index_equal(idx.unique(), exp_idx) - - if __name__ == '__main__': import nose diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index d6f734be56c32..51d767a291694 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -2460,33 +2460,33 @@ def test_timedelta64(self): o = Series([datetime(2012,1,1,microsecond=150)]*3) y = s-o result = y.to_string() - self.assertTrue('-0 days, 00:00:00.000150' in result) + self.assertTrue('-1 days +23:59:59.999850' in result) # rounding? o = Series([datetime(2012,1,1,1)]*3) y = s-o result = y.to_string() - self.assertTrue('-0 days, 01:00:00' in result) - self.assertTrue('1 days, 23:00:00' in result) + self.assertTrue('-1 days +23:00:00' in result) + self.assertTrue('1 days 23:00:00' in result) o = Series([datetime(2012,1,1,1,1)]*3) y = s-o result = y.to_string() - self.assertTrue('-0 days, 01:01:00' in result) - self.assertTrue('1 days, 22:59:00' in result) + self.assertTrue('-1 days +22:59:00' in result) + self.assertTrue('1 days 22:59:00' in result) o = Series([datetime(2012,1,1,1,1,microsecond=150)]*3) y = s-o result = y.to_string() - self.assertTrue('-0 days, 01:01:00.000150' in result) - self.assertTrue('1 days, 22:58:59.999850' in result) + self.assertTrue('-1 days +22:58:59.999850' in result) + self.assertTrue('0 days 22:58:59.999850' in result) # neg time td = timedelta(minutes=5,seconds=3) s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td y = s - s2 result = y.to_string() - self.assertTrue('-00:05:03' in result) + self.assertTrue('-1 days +23:54:57' in result) td = timedelta(microseconds=550) s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td @@ -2494,6 +2494,11 @@ def test_timedelta64(self): result = y.to_string() self.assertTrue('2012-01-01 23:59:59.999450' in result) + # no boxing of the actual elements + td = Series(pd.timedelta_range('1 days',periods=3)) + result = td.to_string() + self.assertEqual(result,u("0 1 days\n1 2 days\n2 3 days")) + def test_mixed_datetime64(self): df = DataFrame({'A': [1, 2], 'B': ['2012-01-01', '2012-01-02']}) @@ -2759,33 +2764,51 @@ def test_format(self): class TestRepr_timedelta64(tm.TestCase): - def test_legacy(self): + + def test_none(self): delta_1d = pd.to_timedelta(1, unit='D') delta_0d = pd.to_timedelta(0, unit='D') delta_1s = pd.to_timedelta(1, unit='s') delta_500ms = pd.to_timedelta(500, unit='ms') - self.assertEqual(tslib.repr_timedelta64(delta_1d), "1 days, 00:00:00") - self.assertEqual(tslib.repr_timedelta64(-delta_1d), "-1 days, 00:00:00") - self.assertEqual(tslib.repr_timedelta64(delta_0d), "00:00:00") - self.assertEqual(tslib.repr_timedelta64(delta_1s), "00:00:01") - self.assertEqual(tslib.repr_timedelta64(delta_500ms), "00:00:00.500000") - self.assertEqual(tslib.repr_timedelta64(delta_1d + delta_1s), "1 days, 00:00:01") - self.assertEqual(tslib.repr_timedelta64(delta_1d + delta_500ms), "1 days, 00:00:00.500000") + drepr = lambda x: x._repr_base() + self.assertEqual(drepr(delta_1d), "1 days") + self.assertEqual(drepr(-delta_1d), "-1 days") + self.assertEqual(drepr(delta_0d), "0 days") + self.assertEqual(drepr(delta_1s), "0 days 00:00:01") + self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") + self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") + self.assertEqual(drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") - def test_short(self): + def test_even_day(self): delta_1d = pd.to_timedelta(1, unit='D') delta_0d = pd.to_timedelta(0, unit='D') delta_1s = pd.to_timedelta(1, unit='s') delta_500ms = pd.to_timedelta(500, unit='ms') - self.assertEqual(tslib.repr_timedelta64(delta_1d, format='short'), "1 days") - self.assertEqual(tslib.repr_timedelta64(-delta_1d, format='short'), "-1 days") - self.assertEqual(tslib.repr_timedelta64(delta_0d, format='short'), "00:00:00") - self.assertEqual(tslib.repr_timedelta64(delta_1s, format='short'), "00:00:01") - self.assertEqual(tslib.repr_timedelta64(delta_500ms, format='short'), "00:00:00.500000") - self.assertEqual(tslib.repr_timedelta64(delta_1d + delta_1s, format='short'), "1 days, 00:00:01") - self.assertEqual(tslib.repr_timedelta64(delta_1d + delta_500ms, format='short'), "1 days, 00:00:00.500000") + drepr = lambda x: x._repr_base(format='even_day') + self.assertEqual(drepr(delta_1d), "1 days") + self.assertEqual(drepr(-delta_1d), "-1 days") + self.assertEqual(drepr(delta_0d), "0 days") + self.assertEqual(drepr(delta_1s), "0 days 00:00:01") + self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") + self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") + self.assertEqual(drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") + + def test_sub_day(self): + delta_1d = pd.to_timedelta(1, unit='D') + delta_0d = pd.to_timedelta(0, unit='D') + delta_1s = pd.to_timedelta(1, unit='s') + delta_500ms = pd.to_timedelta(500, unit='ms') + + drepr = lambda x: x._repr_base(format='sub_day') + self.assertEqual(drepr(delta_1d), "1 days") + self.assertEqual(drepr(-delta_1d), "-1 days") + self.assertEqual(drepr(delta_0d), "00:00:00") + self.assertEqual(drepr(delta_1s), "00:00:01") + self.assertEqual(drepr(delta_500ms), "00:00:00.500000") + self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") + self.assertEqual(drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") def test_long(self): delta_1d = pd.to_timedelta(1, unit='D') @@ -2793,65 +2816,69 @@ def test_long(self): delta_1s = pd.to_timedelta(1, unit='s') delta_500ms = pd.to_timedelta(500, unit='ms') - self.assertEqual(tslib.repr_timedelta64(delta_1d, format='long'), "1 days, 00:00:00") - self.assertEqual(tslib.repr_timedelta64(-delta_1d, format='long'), "-1 days, 00:00:00") - self.assertEqual(tslib.repr_timedelta64(delta_0d, format='long'), "0 days, 00:00:00") - self.assertEqual(tslib.repr_timedelta64(delta_1s, format='long'), "0 days, 00:00:01") - self.assertEqual(tslib.repr_timedelta64(delta_500ms, format='long'), "0 days, 00:00:00.500000") - self.assertEqual(tslib.repr_timedelta64(delta_1d + delta_1s, format='long'), "1 days, 00:00:01") - self.assertEqual(tslib.repr_timedelta64(delta_1d + delta_500ms, format='long'), "1 days, 00:00:00.500000") + drepr = lambda x: x._repr_base(format='long') + self.assertEqual(drepr(delta_1d), "1 days 00:00:00") + self.assertEqual(drepr(-delta_1d), "-1 days +00:00:00") + self.assertEqual(drepr(delta_0d), "0 days 00:00:00") + self.assertEqual(drepr(delta_1s), "0 days 00:00:01") + self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") + self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") + self.assertEqual(drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") + def test_all(self): + delta_1d = pd.to_timedelta(1, unit='D') + delta_0d = pd.to_timedelta(0, unit='D') + delta_1ns = pd.to_timedelta(1, unit='ns') -class TestTimedelta64Formatter(tm.TestCase): - def test_mixed(self): - x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') - y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') - result = fmt.Timedelta64Formatter(x + y).get_result() - self.assertEqual(result[0].strip(), "0 days, 00:00:00") - self.assertEqual(result[1].strip(), "1 days, 00:00:01") + drepr = lambda x: x._repr_base(format='all') + self.assertEqual(drepr(delta_1d), "1 days 00:00:00.000000000") + self.assertEqual(drepr(delta_0d), "0 days 00:00:00.000000000") + self.assertEqual(drepr(delta_1ns), "0 days 00:00:00.000000001") - def test_mixed_neg(self): - x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') - y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') - result = fmt.Timedelta64Formatter(-(x + y)).get_result() - self.assertEqual(result[0].strip(), "0 days, 00:00:00") - self.assertEqual(result[1].strip(), "-1 days, 00:00:01") +class TestTimedelta64Formatter(tm.TestCase): def test_days(self): x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') - result = fmt.Timedelta64Formatter(x).get_result() + result = fmt.Timedelta64Formatter(x,box=True).get_result() + self.assertEqual(result[0].strip(), "'0 days'") + self.assertEqual(result[1].strip(), "'1 days'") + + result = fmt.Timedelta64Formatter(x[1:2],box=True).get_result() + self.assertEqual(result[0].strip(), "'1 days'") + + result = fmt.Timedelta64Formatter(x,box=False).get_result() self.assertEqual(result[0].strip(), "0 days") self.assertEqual(result[1].strip(), "1 days") - result = fmt.Timedelta64Formatter(x[1:2]).get_result() + result = fmt.Timedelta64Formatter(x[1:2],box=False).get_result() self.assertEqual(result[0].strip(), "1 days") def test_days_neg(self): x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') - result = fmt.Timedelta64Formatter(-x).get_result() - self.assertEqual(result[0].strip(), "0 days") - self.assertEqual(result[1].strip(), "-1 days") + result = fmt.Timedelta64Formatter(-x,box=True).get_result() + self.assertEqual(result[0].strip(), "'0 days'") + self.assertEqual(result[1].strip(), "'-1 days'") def test_subdays(self): y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') - result = fmt.Timedelta64Formatter(y).get_result() - self.assertEqual(result[0].strip(), "00:00:00") - self.assertEqual(result[1].strip(), "00:00:01") + result = fmt.Timedelta64Formatter(y,box=True).get_result() + self.assertEqual(result[0].strip(), "'00:00:00'") + self.assertEqual(result[1].strip(), "'00:00:01'") def test_subdays_neg(self): y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') - result = fmt.Timedelta64Formatter(-y).get_result() - self.assertEqual(result[0].strip(), "00:00:00") - self.assertEqual(result[1].strip(), "-00:00:01") + result = fmt.Timedelta64Formatter(-y,box=True).get_result() + self.assertEqual(result[0].strip(), "'00:00:00'") + self.assertEqual(result[1].strip(), "'-1 days +23:59:59'") def test_zero(self): x = pd.to_timedelta(list(range(1)) + [pd.NaT], unit='D') - result = fmt.Timedelta64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "0 days") + result = fmt.Timedelta64Formatter(x,box=True).get_result() + self.assertEqual(result[0].strip(), "'0 days'") x = pd.to_timedelta(list(range(1)), unit='D') - result = fmt.Timedelta64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "0 days") + result = fmt.Timedelta64Formatter(x,box=True).get_result() + self.assertEqual(result[0].strip(), "'0 days'") class TestDatetime64Formatter(tm.TestCase): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a7de624842b2b..8245d1bd0759c 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -32,7 +32,8 @@ import pandas.core.format as fmt import pandas.core.datetools as datetools from pandas import (DataFrame, Index, Series, notnull, isnull, - MultiIndex, DatetimeIndex, Timestamp, date_range, read_csv, + MultiIndex, DatetimeIndex, Timestamp, date_range, + read_csv, timedelta_range, Timedelta, option_context) import pandas as pd from pandas.parser import CParserError @@ -9515,6 +9516,18 @@ def test_diff(self): assert_series_equal(the_diff['A'], tf['A'] - tf['A'].shift(1)) + def test_diff_timedelta(self): + # GH 4533 + df = DataFrame(dict(time=[Timestamp('20130101 9:01'), + Timestamp('20130101 9:02')], + value=[1.0,2.0])) + + res = df.diff() + exp = DataFrame([[pd.NaT, np.nan], + [Timedelta('00:01:00'), 1]], + columns=['time', 'value']) + assert_frame_equal(res, exp) + def test_diff_mixed_dtype(self): df = DataFrame(np.random.randn(5, 3)) df['A'] = np.array([1, 2, 3, 4, 5], dtype=object) @@ -12175,6 +12188,42 @@ def test_construction_with_mixed(self): 'timedelta64[ns]' : 1}).order() assert_series_equal(result,expected) + def test_construction_with_conversions(self): + + # convert from a numpy array of non-ns timedelta64 + arr = np.array([1,2,3],dtype='timedelta64[s]') + s = Series(arr) + expected = Series(timedelta_range('00:00:01',periods=3,freq='s')) + assert_series_equal(s,expected) + + df = DataFrame(index=range(3)) + df['A'] = arr + expected = DataFrame({'A' : timedelta_range('00:00:01',periods=3,freq='s')}, + index=range(3)) + assert_frame_equal(df,expected) + + # convert from a numpy array of non-ns datetime64 + #### note that creating a numpy datetime64 is in LOCAL time!!!! + #### seems to work for M8[D], but not for M8[s] + + s = Series(np.array(['2013-01-01','2013-01-02','2013-01-03'],dtype='datetime64[D]')) + assert_series_equal(s,Series(date_range('20130101',periods=3,freq='D'))) + #s = Series(np.array(['2013-01-01 00:00:01','2013-01-01 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]')) + #assert_series_equal(s,date_range('20130101 00:00:01',period=3,freq='s')) + + expected = DataFrame({ + 'dt1' : Timestamp('20130101'), + 'dt2' : date_range('20130101',periods=3), + #'dt3' : date_range('20130101 00:00:01',periods=3,freq='s'), + },index=range(3)) + + + df = DataFrame(index=range(3)) + df['dt1'] = np.datetime64('2013-01-01') + df['dt2'] = np.array(['2013-01-01','2013-01-02','2013-01-03'],dtype='datetime64[D]') + #df['dt3'] = np.array(['2013-01-01 00:00:01','2013-01-01 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]') + assert_frame_equal(df, expected) + def test_constructor_frame_copy(self): cop = DataFrame(self.frame, copy=True) cop['A'] = 5 diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 8d0b54f2ef0b4..295af483289e5 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -16,6 +16,7 @@ from pandas.core.index import (Index, Float64Index, Int64Index, MultiIndex, InvalidIndexError, NumericIndex) from pandas.tseries.index import DatetimeIndex +from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.period import PeriodIndex from pandas.core.series import Series from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, @@ -53,13 +54,13 @@ def test_numeric_compat(self): idx = self.create_index() tm.assertRaisesRegexp(TypeError, - "cannot perform multiplication", + "cannot perform __mul__", lambda : idx * 1) tm.assertRaisesRegexp(TypeError, - "cannot perform multiplication", + "cannot perform __mul__", lambda : 1 * idx) - div_err = "cannot perform true division" if compat.PY3 else "cannot perform division" + div_err = "cannot perform __truediv__" if compat.PY3 else "cannot perform __div__" tm.assertRaisesRegexp(TypeError, div_err, lambda : idx / 1) @@ -67,10 +68,10 @@ def test_numeric_compat(self): div_err, lambda : 1 / idx) tm.assertRaisesRegexp(TypeError, - "cannot perform floor division", + "cannot perform __floordiv__", lambda : idx // 1) tm.assertRaisesRegexp(TypeError, - "cannot perform floor division", + "cannot perform __floordiv__", lambda : 1 // idx) def test_boolean_context_compat(self): @@ -1654,6 +1655,52 @@ def create_index(self): def test_pickle_compat_construction(self): pass +class TestTimedeltaIndex(Base, tm.TestCase): + _holder = TimedeltaIndex + _multiprocess_can_split_ = True + + def create_index(self): + return pd.to_timedelta(range(5),unit='d') + pd.offsets.Hour(1) + + def test_numeric_compat(self): + + idx = self._holder(np.arange(5,dtype='int64')) + didx = self._holder(np.arange(5,dtype='int64')**2 + ) + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + result = idx / 1 + tm.assert_index_equal(result, idx) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5,dtype='int64') + tm.assert_index_equal(result, self._holder(np.arange(5,dtype='int64')*5)) + + result = idx * np.arange(5,dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5,dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5,dtype='float64')+0.1) + tm.assert_index_equal(result, + Float64Index(np.arange(5,dtype='float64')*(np.arange(5,dtype='float64')+0.1))) + + + # invalid + self.assertRaises(TypeError, lambda : idx * idx) + self.assertRaises(ValueError, lambda : idx * self._holder(np.arange(3))) + self.assertRaises(ValueError, lambda : idx * np.array([1,2])) + + def test_pickle_compat_construction(self): + pass + class TestMultiIndex(Base, tm.TestCase): _holder = MultiIndex _multiprocess_can_split_ = True diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 3e8a5fecbb579..509ef4925bb66 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -118,11 +118,32 @@ def setUp(self): def check_results(self, targ, res, axis): res = getattr(res, 'asm8', res) res = getattr(res, 'values', res) - if axis != 0 and hasattr(targ, 'shape') and targ.ndim: - res = np.split(res, [targ.shape[0]], axis=0)[0] + + # timedeltas are a beast here + def _coerce_tds(targ, res): + if targ.dtype == 'm8[ns]': + if len(targ) == 1: + targ = targ[0].item() + res = res.item() + else: + targ = targ.view('i8') + return targ, res + + try: + if axis != 0 and hasattr(targ, 'shape') and targ.ndim: + res = np.split(res, [targ.shape[0]], axis=0)[0] + except: + targ, res = _coerce_tds(targ, res) + try: tm.assert_almost_equal(targ, res) except: + + if targ.dtype == 'm8[ns]': + targ, res = _coerce_tds(targ, res) + tm.assert_almost_equal(targ, res) + return + # There are sometimes rounding errors with # complex and object dtypes. # If it isn't one of those, re-raise the error. @@ -208,7 +229,7 @@ def check_fun(self, testfunc, targfunc, def check_funs(self, testfunc, targfunc, allow_complex=True, allow_all_nan=True, allow_str=True, - allow_date=True, allow_obj=True, + allow_date=True, allow_tdelta=True, allow_obj=True, **kwargs): self.check_fun(testfunc, targfunc, 'arr_float', **kwargs) self.check_fun(testfunc, targfunc, 'arr_float_nan', 'arr_float', @@ -244,6 +265,8 @@ def check_funs(self, testfunc, targfunc, else: self.check_fun(testfunc, targfunc, 'arr_date', **kwargs) objs += [self.arr_date.astype('O')] + + if allow_tdelta: try: targfunc(self.arr_tdelta) except TypeError: @@ -264,12 +287,12 @@ def check_funs(self, testfunc, targfunc, def check_funs_ddof(self, testfunc, targfunc, allow_complex=True, allow_all_nan=True, allow_str=True, - allow_date=True, allow_obj=True,): + allow_date=False, allow_tdelta=False, allow_obj=True,): for ddof in range(3): try: self.check_funs(self, testfunc, targfunc, allow_complex, allow_all_nan, allow_str, - allow_date, allow_obj, + allow_date, allow_tdelta, allow_obj, ddof=ddof) except BaseException as exc: exc.args += ('ddof %s' % ddof,) @@ -284,34 +307,35 @@ def _badobj_wrap(self, value, func, allow_complex=True, **kwargs): def test_nanany(self): self.check_funs(nanops.nanany, np.any, - allow_all_nan=False, allow_str=False, allow_date=False) + allow_all_nan=False, allow_str=False, allow_date=False, allow_tdelta=False) def test_nanall(self): self.check_funs(nanops.nanall, np.all, - allow_all_nan=False, allow_str=False, allow_date=False) + allow_all_nan=False, allow_str=False, allow_date=False, allow_tdelta=False) def test_nansum(self): self.check_funs(nanops.nansum, np.sum, - allow_str=False, allow_date=False) + allow_str=False, allow_date=False, allow_tdelta=True) def test_nanmean(self): self.check_funs(nanops.nanmean, np.mean, allow_complex=False, allow_obj=False, - allow_str=False, allow_date=False) + allow_str=False, allow_date=False, allow_tdelta=True) def test_nanmedian(self): self.check_funs(nanops.nanmedian, np.median, allow_complex=False, allow_str=False, allow_date=False, + allow_tdelta=True, allow_obj='convert') def test_nanvar(self): self.check_funs_ddof(nanops.nanvar, np.var, - allow_complex=False, allow_date=False) + allow_complex=False, allow_date=False, allow_tdelta=False) def test_nansem(self): tm.skip_if_no_package('scipy.stats') self.check_funs_ddof(nanops.nansem, np.var, - allow_complex=False, allow_date=False) + allow_complex=False, allow_date=False, allow_tdelta=False) def _minmax_wrap(self, value, axis=None, func=None): res = func(value, axis) @@ -343,13 +367,16 @@ def _argminmax_wrap(self, value, axis=None, func=None): def test_nanargmax(self): func = partial(self._argminmax_wrap, func=np.argmax) self.check_funs(nanops.nanargmax, func, - allow_str=False, allow_obj=False) + allow_str=False, allow_obj=False, + allow_date=True, + allow_tdelta=True) def test_nanargmin(self): func = partial(self._argminmax_wrap, func=np.argmin) if tm.sys.version_info[0:2] == (2, 6): self.check_funs(nanops.nanargmin, func, - allow_date=False, + allow_date=True, + allow_tdelta=True, allow_str=False, allow_obj=False) else: self.check_funs(nanops.nanargmin, func, @@ -372,7 +399,7 @@ def test_nanskew(self): from scipy.stats import skew func = partial(self._skew_kurt_wrap, func=skew) self.check_funs(nanops.nanskew, func, - allow_complex=False, allow_str=False, allow_date=False) + allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=False) def test_nankurt(self): tm.skip_if_no_package('scipy.stats') @@ -380,11 +407,11 @@ def test_nankurt(self): func1 = partial(kurtosis, fisher=True) func = partial(self._skew_kurt_wrap, func=func1) self.check_funs(nanops.nankurt, func, - allow_complex=False, allow_str=False, allow_date=False) + allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=False) def test_nanprod(self): self.check_funs(nanops.nanprod, np.prod, - allow_str=False, allow_date=False) + allow_str=False, allow_date=False, allow_tdelta=False) def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs): res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 02a8f79e5a8c1..0b76d6247060d 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -16,10 +16,12 @@ import pandas as pd from pandas import (Index, Series, DataFrame, isnull, notnull, bdate_range, - date_range, period_range) + date_range, period_range, timedelta_range) from pandas.core.index import MultiIndex from pandas.core.indexing import IndexingError +from pandas.tseries.period import PeriodIndex from pandas.tseries.index import Timestamp, DatetimeIndex +from pandas.tseries.tdi import Timedelta, TimedeltaIndex import pandas.core.common as com import pandas.core.config as cf import pandas.lib as lib @@ -76,18 +78,31 @@ def test_dt_namespace_accessor(self): # GH 7207 # test .dt namespace accessor - ok_for_base = ['year','month','day','hour','minute','second','weekofyear','week','dayofweek','weekday','dayofyear','quarter'] + ok_for_base = ['year','month','day','hour','minute','second','weekofyear','week','dayofweek','weekday','dayofyear','quarter','freq'] ok_for_period = ok_for_base + ['qyear'] ok_for_dt = ok_for_base + ['date','time','microsecond','nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start', - 'is_quarter_end', 'is_year_start', 'is_year_end'] + 'is_quarter_end', 'is_year_start', 'is_year_end', 'tz'] + ok_for_dt_methods = ['to_period','to_pydatetime','tz_localize','tz_convert'] + ok_for_td = ['days','hours','minutes','seconds','milliseconds','microseconds','nanoseconds'] + ok_for_td_methods = ['components','to_pytimedelta'] def get_expected(s, name): result = getattr(Index(s.values),prop) if isinstance(result, np.ndarray): if com.is_integer_dtype(result): result = result.astype('int64') + elif not com.is_list_like(result): + return result return Series(result,index=s.index) + def compare(s, name): + a = getattr(s.dt,prop) + b = get_expected(s,prop) + if not (com.is_list_like(a) and com.is_list_like(b)): + self.assertEqual(a,b) + else: + tm.assert_series_equal(a,b) + # invalids for s in [Series(np.arange(5)), Series(list('abcde')), @@ -98,9 +113,51 @@ def get_expected(s, name): for s in [Series(date_range('20130101',periods=5)), Series(date_range('20130101',periods=5,freq='s')), Series(date_range('20130101 00:00:00',periods=5,freq='ms'))]: - for prop in ok_for_dt: - tm.assert_series_equal(getattr(s.dt,prop),get_expected(s,prop)) + + # we test freq below + if prop != 'freq': + compare(s, prop) + + for prop in ok_for_dt_methods: + getattr(s.dt,prop) + + result = s.dt.to_pydatetime() + self.assertIsInstance(result,np.ndarray) + self.assertTrue(result.dtype == object) + + result = s.dt.tz_localize('US/Eastern') + expected = Series(DatetimeIndex(s.values).tz_localize('US/Eastern'),index=s.index) + tm.assert_series_equal(result, expected) + + tz_result = result.dt.tz + self.assertEqual(str(tz_result), 'US/Eastern') + freq_result = s.dt.freq + self.assertEqual(freq_result, DatetimeIndex(s.values, freq='infer').freq) + + # timedeltaindex + for s in [Series(timedelta_range('1 day',periods=5)), + Series(timedelta_range('1 day 01:23:45',periods=5,freq='s')), + Series(timedelta_range('2 days 01:23:45.012345',periods=5,freq='ms'))]: + for prop in ok_for_td: + + # we test freq below + if prop != 'freq': + compare(s, prop) + + for prop in ok_for_td_methods: + getattr(s.dt,prop) + + result = s.dt.components + self.assertIsInstance(result,DataFrame) + tm.assert_index_equal(result.index,s.index) + + result = s.dt.to_pytimedelta() + self.assertIsInstance(result,np.ndarray) + self.assertTrue(result.dtype == object) + + freq_result = s.dt.freq + self.assertEqual(freq_result, TimedeltaIndex(s.values, freq='infer').freq) # both index = date_range('20130101',periods=3,freq='D') @@ -113,7 +170,13 @@ def get_expected(s, name): for s in [Series(period_range('20130101',periods=5,freq='D'))]: for prop in ok_for_period: - tm.assert_series_equal(getattr(s.dt,prop),get_expected(s,prop)) + + # we test freq below + if prop != 'freq': + compare(s, prop) + + freq_result = s.dt.freq + self.assertEqual(freq_result, PeriodIndex(s.values).freq) # test limited display api def get_dir(s): @@ -122,7 +185,7 @@ def get_dir(s): s = Series(date_range('20130101',periods=5,freq='D')) results = get_dir(s) - tm.assert_almost_equal(results,list(sorted(set(ok_for_dt)))) + tm.assert_almost_equal(results,list(sorted(set(ok_for_dt + ok_for_dt_methods)))) s = Series(period_range('20130101',periods=5,freq='D').asobject) results = get_dir(s) @@ -2216,7 +2279,7 @@ def testit(): self.series[5:15] = np.NaN # idxmax, idxmin, min, and max are valid for dates - if not ('max' in name or 'min' in name): + if name not in ['max','min']: ds = Series(date_range('1/1/2001', periods=10)) self.assertRaises(TypeError, f, ds) @@ -2820,6 +2883,16 @@ def test_timedelta64_conversions(self): expected = s1.apply(lambda x: np.timedelta64(m,unit) / x) result = np.timedelta64(m,unit) / s1 + # astype + s = Series(date_range('20130101',periods=3)) + result = s.astype(object) + self.assertIsInstance(result.iloc[0],datetime) + self.assertTrue(result.dtype == np.object_) + + result = s1.astype(object) + self.assertIsInstance(result.iloc[0],timedelta) + self.assertTrue(result.dtype == np.object_) + def test_timedelta64_equal_timedelta_supported_ops(self): ser = Series([Timestamp('20130301'), Timestamp('20130228 23:00:00'), Timestamp('20130228 22:00:00'), @@ -2846,6 +2919,22 @@ def timedelta64(*args): raise AssertionError( "invalid comparsion [op->{0},d->{1},h->{2},m->{3},s->{4},us->{5}]\n{6}\n{7}\n".format(op, d, h, m, s, us, lhs, rhs)) + def test_timedelta_assignment(self): + # GH 8209 + s = Series([]) + s.loc['B'] = timedelta(1) + tm.assert_series_equal(s,Series(Timedelta('1 days'),index=['B'])) + + s = s.reindex(s.index.insert(0, 'A')) + tm.assert_series_equal(s,Series([np.nan,Timedelta('1 days')],index=['A','B'])) + + result = s.fillna(timedelta(1)) + expected = Series(Timedelta('1 days'),index=['A','B']) + tm.assert_series_equal(result, expected) + + s.loc['A'] = timedelta(1) + tm.assert_series_equal(s, expected) + def test_operators_datetimelike(self): def run_ops(ops, get_ser, test_ser): @@ -2955,12 +3044,37 @@ def test_timedelta64_functions(self): # max/min result = td.max() - expected = Series([timedelta(2)], dtype='timedelta64[ns]') - assert_series_equal(result, expected) + expected = Timedelta('2 days') + self.assertEqual(result, expected) result = td.min() - expected = Series([timedelta(1)], dtype='timedelta64[ns]') - assert_series_equal(result, expected) + expected = Timedelta('1 days') + self.assertEqual(result, expected) + + def test_ops_consistency_on_empty(self): + + # GH 7869 + # consistency on empty + + # float + result = Series(dtype=float).sum() + self.assertEqual(result,0) + + result = Series(dtype=float).mean() + self.assertTrue(isnull(result)) + + result = Series(dtype=float).median() + self.assertTrue(isnull(result)) + + # timedelta64[ns] + result = Series(dtype='m8[ns]').sum() + self.assertEqual(result, Timedelta(0)) + + result = Series(dtype='m8[ns]').mean() + self.assertTrue(result is pd.NaT) + + result = Series(dtype='m8[ns]').median() + self.assertTrue(result is pd.NaT) def test_timedelta_fillna(self): #GH 3371 @@ -3212,19 +3326,19 @@ def test_timedelta64_nan(self): td1 = td.copy() td1[0] = np.nan self.assertTrue(isnull(td1[0])) - self.assertEqual(td1[0].view('i8'), tslib.iNaT) + self.assertEqual(td1[0].value, tslib.iNaT) td1[0] = td[0] self.assertFalse(isnull(td1[0])) td1[1] = tslib.iNaT self.assertTrue(isnull(td1[1])) - self.assertEqual(td1[1].view('i8'), tslib.iNaT) + self.assertEqual(td1[1].value, tslib.iNaT) td1[1] = td[1] self.assertFalse(isnull(td1[1])) td1[2] = tslib.NaT self.assertTrue(isnull(td1[2])) - self.assertEqual(td1[2].view('i8'), tslib.iNaT) + self.assertEqual(td1[2].value, tslib.iNaT) td1[2] = td[2] self.assertFalse(isnull(td1[2])) diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py index c2cc3723802fc..7c47bd9a232a9 100644 --- a/pandas/tseries/api.py +++ b/pandas/tseries/api.py @@ -5,6 +5,7 @@ from pandas.tseries.index import DatetimeIndex, date_range, bdate_range from pandas.tseries.frequencies import infer_freq +from pandas.tseries.tdi import Timedelta, TimedeltaIndex, timedelta_range from pandas.tseries.period import Period, PeriodIndex, period_range, pnow from pandas.tseries.resample import TimeGrouper from pandas.tseries.timedeltas import to_timedelta diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py new file mode 100644 index 0000000000000..1d9a062c624f7 --- /dev/null +++ b/pandas/tseries/base.py @@ -0,0 +1,469 @@ +""" +Base and utility classes for tseries type pandas objects. +""" + + +from datetime import datetime, time, timedelta + +from pandas import compat +import numpy as np +from pandas.core import common as com +import pandas.tslib as tslib +import pandas.lib as lib +from pandas.core.index import Index +from pandas.util.decorators import Appender, cache_readonly +from pandas.tseries.frequencies import ( + infer_freq, to_offset, get_period_alias, + Resolution) +import pandas.algos as _algos + +class DatetimeIndexOpsMixin(object): + """ common ops mixin to support a unified inteface datetimelike Index """ + + def __iter__(self): + return (self._box_func(v) for v in self.asi8) + + @staticmethod + def _join_i8_wrapper(joinf, dtype, with_indexers=True): + """ create the join wrapper methods """ + + @staticmethod + def wrapper(left, right): + if isinstance(left, (np.ndarray, com.ABCIndex, com.ABCSeries)): + left = left.view('i8') + if isinstance(right, (np.ndarray, com.ABCIndex, com.ABCSeries)): + right = right.view('i8') + results = joinf(left, right) + if with_indexers: + join_index, left_indexer, right_indexer = results + join_index = join_index.view(dtype) + return join_index, left_indexer, right_indexer + return results + + return wrapper + + @property + def _box_func(self): + """ + box function to get object from internal representation + """ + raise NotImplementedError + + def _box_values(self, values): + """ + apply box func to passed values + """ + return lib.map_infer(values, self._box_func) + + def groupby(self, f): + objs = self.asobject.values + return _algos.groupby_object(objs, f) + + def _format_with_header(self, header, **kwargs): + return header + self._format_native_types(**kwargs) + + def __contains__(self, key): + try: + res = self.get_loc(key) + return np.isscalar(res) or type(res) == slice + except (KeyError, TypeError): + return False + + @cache_readonly + def inferred_freq(self): + try: + return infer_freq(self) + except ValueError: + return None + + # Try to run function on index first, and then on elements of index + # Especially important for group-by functionality + def map(self, f): + try: + result = f(self) + if not isinstance(result, (np.ndarray, Index)): + raise TypeError + return result + except Exception: + return _algos.arrmap_object(self.asobject.values, f) + + def order(self, return_indexer=False, ascending=True): + """ + Return sorted copy of Index + """ + if return_indexer: + _as = self.argsort() + if not ascending: + _as = _as[::-1] + sorted_index = self.take(_as) + return sorted_index, _as + else: + sorted_values = np.sort(self.values) + if not ascending: + sorted_values = sorted_values[::-1] + attribs = self._get_attributes_dict() + attribs['freq'] = None + return self._simple_new(sorted_values, **attribs) + + def take(self, indices, axis=0): + """ + Analogous to ndarray.take + """ + maybe_slice = lib.maybe_indices_to_slice(com._ensure_int64(indices)) + if isinstance(maybe_slice, slice): + return self[maybe_slice] + return super(DatetimeIndexOpsMixin, self).take(indices, axis) + + def slice_locs(self, start=None, end=None): + """ + Index.slice_locs, customized to handle partial ISO-8601 string slicing + """ + if isinstance(start, compat.string_types) or isinstance(end, compat.string_types): + + if self.is_monotonic: + try: + if start: + start_loc = self._get_string_slice(start).start + else: + start_loc = 0 + + if end: + end_loc = self._get_string_slice(end).stop + else: + end_loc = len(self) + + return start_loc, end_loc + except KeyError: + pass + + else: + # can't use a slice indexer because we are not sorted! + # so create an indexer directly + try: + if start: + start_loc = self._get_string_slice(start, + use_rhs=False) + else: + start_loc = np.arange(len(self)) + + if end: + end_loc = self._get_string_slice(end, use_lhs=False) + else: + end_loc = np.arange(len(self)) + + return start_loc, end_loc + except KeyError: + pass + + if isinstance(start, time) or isinstance(end, time): + raise KeyError('Cannot use slice_locs with time slice keys') + + return Index.slice_locs(self, start, end) + + def get_duplicates(self): + values = Index.get_duplicates(self) + return self._simple_new(values) + + @cache_readonly + def hasnans(self): + """ return if I have any nans; enables various perf speedups """ + return (self.asi8 == tslib.iNaT).any() + + @property + def asobject(self): + from pandas.core.index import Index + return Index(self._box_values(self.asi8), name=self.name, dtype=object) + + def tolist(self): + """ + return a list of the underlying data + """ + return list(self.asobject) + + def min(self, axis=None): + """ + return the minimum value of the Index + + See also + -------- + numpy.ndarray.min + """ + try: + i8 = self.asi8 + + # quick check + if len(i8) and self.is_monotonic: + if i8[0] != tslib.iNaT: + return self._box_func(i8[0]) + + if self.hasnans: + mask = i8 == tslib.iNaT + min_stamp = self[~mask].asi8.min() + else: + min_stamp = i8.min() + return self._box_func(min_stamp) + except ValueError: + return self._na_value + + def argmin(self, axis=None): + """ + return a ndarray of the minimum argument indexer + + See also + -------- + numpy.ndarray.argmin + """ + + i8 = self.asi8 + if self.hasnans: + mask = i8 == tslib.iNaT + if mask.all(): + return -1 + i8 = i8.copy() + i8[mask] = np.iinfo('int64').max + return i8.argmin() + + def max(self, axis=None): + """ + return the maximum value of the Index + + See also + -------- + numpy.ndarray.max + """ + try: + i8 = self.asi8 + + # quick check + if len(i8) and self.is_monotonic: + if i8[-1] != tslib.iNaT: + return self._box_func(i8[-1]) + + if self.hasnans: + mask = i8 == tslib.iNaT + max_stamp = self[~mask].asi8.max() + else: + max_stamp = i8.max() + return self._box_func(max_stamp) + except ValueError: + return self._na_value + + def argmax(self, axis=None): + """ + return a ndarray of the maximum argument indexer + + See also + -------- + numpy.ndarray.argmax + """ + + i8 = self.asi8 + if self.hasnans: + mask = i8 == tslib.iNaT + if mask.all(): + return -1 + i8 = i8.copy() + i8[mask] = 0 + return i8.argmax() + + @property + def _formatter_func(self): + """ + Format function to convert value to representation + """ + return str + + def _format_footer(self): + raise NotImplementedError + + def __unicode__(self): + formatter = self._formatter_func + summary = str(self.__class__) + '\n' + + n = len(self) + if n == 0: + pass + elif n == 1: + first = formatter(self[0]) + summary += '[%s]\n' % first + elif n == 2: + first = formatter(self[0]) + last = formatter(self[-1]) + summary += '[%s, %s]\n' % (first, last) + else: + first = formatter(self[0]) + last = formatter(self[-1]) + summary += '[%s, ..., %s]\n' % (first, last) + + summary += self._format_footer() + return summary + + @cache_readonly + def _resolution(self): + from pandas.tseries.frequencies import Resolution + return Resolution.get_reso_from_freq(self.freqstr) + + @cache_readonly + def resolution(self): + """ + Returns day, hour, minute, second, millisecond or microsecond + """ + from pandas.tseries.frequencies import get_reso_string + return get_reso_string(self._resolution) + + def _add_datelike(self, other): + return NotImplemented + + def _sub_datelike(self, other): + return NotImplemented + + def __add__(self, other): + from pandas.core.index import Index + from pandas.tseries.tdi import TimedeltaIndex + from pandas.tseries.offsets import DateOffset + if isinstance(other, TimedeltaIndex): + return self._add_delta(other) + elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): + if hasattr(other,'_add_delta'): + return other._add_delta(self) + raise TypeError("cannot perform a numeric operation with a TimedeltaIndex and {typ}".format(typ=type(other))) + elif isinstance(other, Index): + return self.union(other) + elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): + return self._add_delta(other) + elif com.is_integer(other): + return self.shift(other) + elif isinstance(other, (tslib.Timestamp, datetime)): + return self._add_datelike(other) + else: # pragma: no cover + return NotImplemented + + def __sub__(self, other): + from pandas.core.index import Index + from pandas.tseries.tdi import TimedeltaIndex + from pandas.tseries.offsets import DateOffset + if isinstance(other, TimedeltaIndex): + return self._add_delta(-other) + elif isinstance(other, Index): + return self.diff(other) + elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): + return self._add_delta(-other) + elif com.is_integer(other): + return self.shift(-other) + elif isinstance(other, (tslib.Timestamp, datetime)): + return self._sub_datelike(other) + else: # pragma: no cover + return NotImplemented + + __iadd__ = __add__ + __isub__ = __sub__ + + def _add_delta(self, other): + return NotImplemented + + def _add_delta_td(self, other): + # add a delta of a timedeltalike + # return the i8 result view + + inc = tslib._delta_to_nanoseconds(other) + mask = self.asi8 == tslib.iNaT + new_values = (self.asi8 + inc).view(self.dtype) + new_values[mask] = tslib.iNaT + return new_values.view(self.dtype) + + def _add_delta_tdi(self, other): + # add a delta of a TimedeltaIndex + # return the i8 result view + + # delta operation + if not len(self) == len(other): + raise ValueError("cannot add indices of unequal length") + + self_i8 = self.asi8 + other_i8 = other.asi8 + mask = (self_i8 == tslib.iNaT) | (other_i8 == tslib.iNaT) + new_values = self_i8 + other_i8 + new_values[mask] = tslib.iNaT + return new_values.view(self.dtype) + + def isin(self, values): + """ + Compute boolean array of whether each index value is found in the + passed set of values + + Parameters + ---------- + values : set or sequence of values + + Returns + ------- + is_contained : ndarray (boolean dtype) + """ + if not isinstance(values, type(self)): + try: + values = type(self)(values) + except ValueError: + return self.asobject.isin(values) + + value_set = set(values.asi8) + return lib.ismember(self.asi8, value_set) + + def shift(self, n, freq=None): + """ + Specialized shift which produces a DatetimeIndex + + Parameters + ---------- + n : int + Periods to shift by + freq : DateOffset or timedelta-like, optional + + Returns + ------- + shifted : DatetimeIndex + """ + if freq is not None and freq != self.freq: + if isinstance(freq, compat.string_types): + freq = to_offset(freq) + result = Index.shift(self, n, freq) + + if hasattr(self,'tz'): + result.tz = self.tz + + return result + + if n == 0: + # immutable so OK + return self + + if self.freq is None: + raise ValueError("Cannot shift with no freq") + + start = self[0] + n * self.freq + end = self[-1] + n * self.freq + attribs = self._get_attributes_dict() + attribs['start'] = start + attribs['end'] = end + return type(self)(**attribs) + + def unique(self): + """ + Index.unique with handling for DatetimeIndex/PeriodIndex metadata + + Returns + ------- + result : DatetimeIndex or PeriodIndex + """ + from pandas.core.index import Int64Index + result = Int64Index.unique(self) + return self._simple_new(result, name=self.name, freq=self.freq, + tz=getattr(self, 'tz', None)) + + def repeat(self, repeats, axis=None): + """ + Analogous to ndarray.repeat + """ + return self._simple_new(self.values.repeat(repeats), + name=self.name) + + diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index d3b86d73dca3a..227af42f07411 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -3,7 +3,7 @@ import numpy as np from pandas.core.base import PandasDelegate from pandas.core import common as com -from pandas import Series, DatetimeIndex, PeriodIndex +from pandas import Series, DatetimeIndex, PeriodIndex, TimedeltaIndex from pandas import lib, tslib def is_datetimelike(data): @@ -17,7 +17,8 @@ def is_datetimelike(data): def maybe_to_datetimelike(data, copy=False): """ - return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns] dtype or a Series of Periods) + return a DelegatedClass of a Series that is datetimelike + (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters @@ -37,10 +38,14 @@ def maybe_to_datetimelike(data, copy=False): index = data.index if issubclass(data.dtype.type, np.datetime64): - return DatetimeProperties(DatetimeIndex(data, copy=copy), index) + return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index) + elif issubclass(data.dtype.type, np.timedelta64): + return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index) else: if com.is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index) + if com.is_datetime_arraylike(data): + return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index) raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data))) @@ -57,6 +62,8 @@ def _delegate_property_get(self, name): if isinstance(result, np.ndarray): if com.is_integer_dtype(result): result = result.astype('int64') + elif not com.is_list_like(result): + return result # return the result as a Series, which is by definition a copy result = Series(result, index=self.index) @@ -71,6 +78,21 @@ def _delegate_property_set(self, name, value, *args, **kwargs): raise ValueError("modifications to a property of a datetimelike object are not " "supported. Change values on the original.") + def _delegate_method(self, name, *args, **kwargs): + method = getattr(self.values, name) + result = method(*args, **kwargs) + + if not com.is_list_like(result): + return result + + result = Series(result, index=self.index) + + # setting this object will show a SettingWithCopyWarning/Error + result.is_copy = ("modifications to a method of a datetimelike object are not " + "supported and are discarded. Change values on the original.") + + return result + class DatetimeProperties(Properties): """ @@ -86,9 +108,42 @@ class DatetimeProperties(Properties): Raises TypeError if the Series does not contain datetimelike values. """ + def to_pydatetime(self): + return self.values.to_pydatetime() + DatetimeProperties._add_delegate_accessors(delegate=DatetimeIndex, accessors=DatetimeIndex._datetimelike_ops, typ='property') +DatetimeProperties._add_delegate_accessors(delegate=DatetimeIndex, + accessors=["to_period","tz_localize","tz_convert"], + typ='method') + +class TimedeltaProperties(Properties): + """ + Accessor object for datetimelike properties of the Series values. + + Examples + -------- + >>> s.dt.hours + >>> s.dt.seconds + + Returns a Series indexed like the original Series. + Raises TypeError if the Series does not contain datetimelike values. + """ + + def to_pytimedelta(self): + return self.values.to_pytimedelta() + + @property + def components(self): + return self.values.components + +TimedeltaProperties._add_delegate_accessors(delegate=TimedeltaIndex, + accessors=TimedeltaIndex._datetimelike_ops, + typ='property') +TimedeltaProperties._add_delegate_accessors(delegate=TimedeltaIndex, + accessors=["to_pytimedelta"], + typ='method') class PeriodProperties(Properties): """ diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 873d24530d1d9..7cd286129e936 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -661,13 +661,17 @@ def infer_freq(index, warn=True): if isinstance(index, com.ABCSeries): values = index.values - if not (com.is_datetime64_dtype(index.values) or values.dtype == object): + if not (com.is_datetime64_dtype(index.values) or com.is_timedelta64_dtype(index.values) or values.dtype == object): raise TypeError("cannot infer freq from a non-convertible dtype on a Series of {0}".format(index.dtype)) index = values if com.is_period_arraylike(index): raise TypeError("PeriodIndex given. Check the `freq` attribute " "instead of using infer_freq.") + elif isinstance(index, pd.TimedeltaIndex): + inferer = _TimedeltaFrequencyInferer(index, warn=warn) + return inferer.get_freq() + if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): if isinstance(index, (pd.Int64Index, pd.Float64Index)): raise TypeError("cannot infer freq from a non-convertible index type {0}".format(type(index))) @@ -694,8 +698,9 @@ def __init__(self, index, warn=True): self.index = index self.values = np.asarray(index).view('i8') - if index.tz is not None: - self.values = tslib.tz_convert(self.values, 'UTC', index.tz) + if hasattr(index,'tz'): + if index.tz is not None: + self.values = tslib.tz_convert(self.values, 'UTC', index.tz) self.warn = warn @@ -892,6 +897,18 @@ def _get_wom_rule(self): import pandas.core.algorithms as algos +class _TimedeltaFrequencyInferer(_FrequencyInferer): + + def _infer_daily_rule(self): + if self.is_unique: + days = self.deltas[0] / _ONE_DAY + if days % 7 == 0: + # Weekly + alias = _weekday_rule_aliases[self.rep_stamp.weekday()] + return _maybe_add_count('W-%s' % alias, days / 7) + else: + return _maybe_add_count('D', days) + def _maybe_add_count(base, count): if count > 1: diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index e2cb8216bb270..45e851afb49e0 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -15,9 +15,9 @@ import pandas.compat as compat from pandas.compat import u from pandas.tseries.frequencies import ( - infer_freq, to_offset, get_period_alias, + to_offset, get_period_alias, Resolution) -from pandas.core.base import DatetimeIndexOpsMixin +from pandas.tseries.base import DatetimeIndexOpsMixin from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay from pandas.tseries.tools import parse_time_string, normalize_date from pandas.util.decorators import cache_readonly, deprecate_kwarg @@ -58,22 +58,6 @@ def f(self): return property(f) -def _join_i8_wrapper(joinf, with_indexers=True): - @staticmethod - def wrapper(left, right): - if isinstance(left, (np.ndarray, Index, ABCSeries)): - left = left.view('i8') - if isinstance(right, (np.ndarray, Index, ABCSeries)): - right = right.view('i8') - results = joinf(left, right) - if with_indexers: - join_index, left_indexer, right_indexer = results - join_index = join_index.view('M8[ns]') - return join_index, left_indexer, right_indexer - return results - return wrapper - - def _dt_index_cmp(opname, nat_result=False): """ Wrap comparison operations to convert datetime-like to datetime64 @@ -162,6 +146,10 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index): _typ = 'datetimeindex' _join_precedence = 10 + + def _join_i8_wrapper(joinf, **kwargs): + return DatetimeIndexOpsMixin._join_i8_wrapper(joinf, dtype='M8[ns]', **kwargs) + _inner_indexer = _join_i8_wrapper(_algos.inner_join_indexer_int64) _outer_indexer = _join_i8_wrapper(_algos.outer_join_indexer_int64) _left_indexer = _join_i8_wrapper(_algos.left_join_indexer_int64) @@ -176,9 +164,6 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index): __le__ = _dt_index_cmp('__le__') __ge__ = _dt_index_cmp('__ge__') - # structured array cache for datetime fields - _sarr_cache = None - _engine_type = _index.DatetimeEngine tz = None @@ -188,7 +173,7 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index): _datetimelike_ops = ['year','month','day','hour','minute','second', 'weekofyear','week','dayofweek','weekday','dayofyear','quarter', 'date','time','microsecond','nanosecond','is_month_start','is_month_end', - 'is_quarter_start','is_quarter_end','is_year_start','is_year_end'] + 'is_quarter_start','is_quarter_end','is_year_start','is_year_end','tz','freq'] _is_numeric_dtype = False @@ -301,6 +286,8 @@ def __new__(cls, data=None, # make sure that we have a index/ndarray like (and not a Series) if isinstance(subarr, ABCSeries): subarr = subarr.values + if subarr.dtype == np.object_: + subarr = tools.to_datetime(subarr, box=False) except ValueError: # tz aware @@ -492,7 +479,7 @@ def _local_timestamps(self): return result.take(reverse) @classmethod - def _simple_new(cls, values, name=None, freq=None, tz=None): + def _simple_new(cls, values, name=None, freq=None, tz=None, **kwargs): if not getattr(values,'dtype',None): values = np.array(values,copy=False) if values.dtype != _NS_DTYPE: @@ -628,15 +615,24 @@ def __setstate__(self, state): raise Exception("invalid pickle state") _unpickle_compat = __setstate__ + def _sub_datelike(self, other): + # subtract a datetime from myself, yielding a TimedeltaIndex + + from pandas import TimedeltaIndex + other = Timestamp(other) + i8 = self.asi8 + result = i8 - other.value + if self.hasnans: + mask = i8 == tslib.iNaT + result[mask] = tslib.iNaT + return TimedeltaIndex(result,name=self.name,copy=False) + def _add_delta(self, delta): - if isinstance(delta, (Tick, timedelta)): - inc = offsets._delta_to_nanoseconds(delta) - mask = self.asi8 == tslib.iNaT - new_values = (self.asi8 + inc).view(_NS_DTYPE) - new_values[mask] = tslib.iNaT - new_values = new_values.view(_NS_DTYPE) - elif isinstance(delta, np.timedelta64): - new_values = self.to_series() + delta + from pandas import TimedeltaIndex + if isinstance(delta, (Tick, timedelta, np.timedelta64)): + new_values = self._add_delta_td(delta) + elif isinstance(delta, TimedeltaIndex): + new_values = self._add_delta_tdi(delta) else: new_values = self.astype('O') + delta tz = 'UTC' if self.tz is not None else None @@ -646,16 +642,6 @@ def _add_delta(self, delta): result = result.tz_convert(self.tz) return result - def __contains__(self, key): - try: - res = self.get_loc(key) - return np.isscalar(res) or type(res) == slice - except (KeyError, TypeError): - return False - - def _format_with_header(self, header, **kwargs): - return header + self._format_native_types(**kwargs) - def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs): data = self.asobject @@ -665,35 +651,9 @@ def _format_native_types(self, na_rep=u('NaT'), date_format=date_format, justify='all').get_result() - def isin(self, values): - """ - Compute boolean array of whether each index value is found in the - passed set of values - - Parameters - ---------- - values : set or sequence of values - - Returns - ------- - is_contained : ndarray (boolean dtype) - """ - if not isinstance(values, DatetimeIndex): - try: - values = DatetimeIndex(values) - except ValueError: - return self.asobject.isin(values) - - value_set = set(values.asi8) - return lib.ismember(self.asi8, value_set) - def to_datetime(self, dayfirst=False): return self.copy() - def groupby(self, f): - objs = self.asobject.values - return _algos.groupby_object(objs, f) - def summary(self, name=None): if len(self) > 0: index_summary = ', %s to %s' % (com.pprint_thing(self[0]), @@ -710,9 +670,9 @@ def summary(self, name=None): return result - def get_duplicates(self): - values = Index.get_duplicates(self) - return DatetimeIndex(values) + def _format_footer(self): + tagline = 'Length: %d, Freq: %s, Timezone: %s' + return tagline % (len(self), self.freqstr, self.tz) def astype(self, dtype): dtype = np.dtype(dtype) @@ -755,10 +715,15 @@ def to_series(self, keep_tz=False): ------- Series """ - return super(DatetimeIndex, self).to_series(keep_tz=keep_tz) + from pandas import Series + return Series(self._to_embed(keep_tz), index=self, name=self.name) def _to_embed(self, keep_tz=False): - """ return an array repr of this object, potentially casting to object """ + """ + return an array repr of this object, potentially casting to object + + This is for internal compat + """ if keep_tz and self.tz is not None and str(self.tz) != 'UTC': return self.asobject.values return self.values @@ -790,23 +755,6 @@ def to_period(self, freq=None): return PeriodIndex(self.values, name=self.name, freq=freq, tz=self.tz) - def order(self, return_indexer=False, ascending=True): - """ - Return sorted copy of Index - """ - if return_indexer: - _as = self.argsort() - if not ascending: - _as = _as[::-1] - sorted_index = self.take(_as) - return sorted_index, _as - else: - sorted_values = np.sort(self.values) - if not ascending: - sorted_values = sorted_values[::-1] - return self._simple_new(sorted_values, self.name, None, - self.tz) - def snap(self, freq='S'): """ Snap time stamps to nearest occurring frequency @@ -831,56 +779,6 @@ def snap(self, freq='S'): # we know it conforms; skip check return DatetimeIndex(snapped, freq=freq, verify_integrity=False) - def shift(self, n, freq=None): - """ - Specialized shift which produces a DatetimeIndex - - Parameters - ---------- - n : int - Periods to shift by - freq : DateOffset or timedelta-like, optional - - Returns - ------- - shifted : DatetimeIndex - """ - if freq is not None and freq != self.offset: - if isinstance(freq, compat.string_types): - freq = to_offset(freq) - result = Index.shift(self, n, freq) - result.tz = self.tz - - return result - - if n == 0: - # immutable so OK - return self - - if self.offset is None: - raise ValueError("Cannot shift with no offset") - - start = self[0] + n * self.offset - end = self[-1] + n * self.offset - return DatetimeIndex(start=start, end=end, freq=self.offset, - name=self.name, tz=self.tz) - - def repeat(self, repeats, axis=None): - """ - Analogous to ndarray.repeat - """ - return DatetimeIndex(self.values.repeat(repeats), - name=self.name) - - def take(self, indices, axis=0): - """ - Analogous to ndarray.take - """ - maybe_slice = lib.maybe_indices_to_slice(com._ensure_int64(indices)) - if isinstance(maybe_slice, slice): - return self[maybe_slice] - return super(DatetimeIndex, self).take(indices, axis) - def union(self, other): """ Specialized union for DatetimeIndex objects. If combine @@ -1339,52 +1237,6 @@ def slice_indexer(self, start=None, end=None, step=None): return Index.slice_indexer(self, start, end, step) - def slice_locs(self, start=None, end=None): - """ - Index.slice_locs, customized to handle partial ISO-8601 string slicing - """ - if isinstance(start, compat.string_types) or isinstance(end, compat.string_types): - - if self.is_monotonic: - try: - if start: - start_loc = self._get_string_slice(start).start - else: - start_loc = 0 - - if end: - end_loc = self._get_string_slice(end).stop - else: - end_loc = len(self) - - return start_loc, end_loc - except KeyError: - pass - - else: - # can't use a slice indexer because we are not sorted! - # so create an indexer directly - try: - if start: - start_loc = self._get_string_slice(start, - use_rhs=False) - else: - start_loc = np.arange(len(self)) - - if end: - end_loc = self._get_string_slice(end, use_lhs=False) - else: - end_loc = np.arange(len(self)) - - return start_loc, end_loc - except KeyError: - pass - - if isinstance(start, time) or isinstance(end, time): - raise KeyError('Cannot use slice_locs with time slice keys') - - return Index.slice_locs(self, start, end) - def __getitem__(self, key): getitem = self._data.__getitem__ if np.isscalar(key): @@ -1411,17 +1263,6 @@ def __getitem__(self, key): return self._simple_new(result, self.name, new_offset, self.tz) - # Try to run function on index first, and then on elements of index - # Especially important for group-by functionality - def map(self, f): - try: - result = f(self) - if not isinstance(result, (np.ndarray, Index)): - raise TypeError - return result - except Exception: - return _algos.arrmap_object(self.asobject.values, f) - # alias to offset def _get_freq(self): return self.offset @@ -1430,13 +1271,6 @@ def _set_freq(self, value): self.offset = value freq = property(fget=_get_freq, fset=_set_freq, doc="get/set the frequncy of the Index") - @cache_readonly - def inferred_freq(self): - try: - return infer_freq(self) - except ValueError: - return None - @property def freqstr(self): """ return the frequency object as a string if its set, otherwise None """ @@ -1692,8 +1526,8 @@ def tz_localize(self, tz, ambiguous='raise'): else: tz = tslib.maybe_get_tz(tz) # Convert to UTC - - new_dates = tslib.tz_localize_to_utc(self.asi8, tz, + + new_dates = tslib.tz_localize_to_utc(self.asi8, tz, ambiguous=ambiguous) new_dates = new_dates.view(_NS_DTYPE) return self._shallow_copy(new_dates, tz=tz) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 942a2f445fd48..b4d8a6547950d 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -8,7 +8,7 @@ import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc from pandas.tseries.index import DatetimeIndex, Int64Index, Index -from pandas.core.base import DatetimeIndexOpsMixin +from pandas.tseries.base import DatetimeIndexOpsMixin from pandas.tseries.tools import parse_time_string import pandas.tseries.offsets as offsets @@ -16,7 +16,7 @@ from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box, _values_from_object, ABCSeries) from pandas import compat -from pandas.lib import Timestamp +from pandas.lib import Timestamp, Timedelta import pandas.lib as lib import pandas.tslib as tslib import pandas.algos as _algos @@ -61,7 +61,6 @@ class Period(PandasObject): minute : int, default 0 second : int, default 0 """ - _typ = 'periodindex' __slots__ = ['freq', 'ordinal'] _comparables = ['name','freqstr'] @@ -171,7 +170,7 @@ def __hash__(self): return hash((self.ordinal, self.freq)) def _add_delta(self, other): - if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)): + if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) @@ -198,7 +197,7 @@ def _add_delta(self, other): def __add__(self, other): if isinstance(other, (timedelta, np.timedelta64, - offsets.Tick, offsets.DateOffset)): + offsets.Tick, offsets.DateOffset, Timedelta)): return self._add_delta(other) elif com.is_integer(other): if self.ordinal == tslib.iNaT: @@ -211,7 +210,7 @@ def __add__(self, other): def __sub__(self, other): if isinstance(other, (timedelta, np.timedelta64, - offsets.Tick, offsets.DateOffset)): + offsets.Tick, offsets.DateOffset, Timedelta)): neg_other = -other return self + neg_other elif com.is_integer(other): @@ -606,10 +605,12 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): >>> idx2 = PeriodIndex(start='2000', end='2010', freq='A') """ _box_scalars = True + _typ = 'periodindex' _attributes = ['name','freq'] _datetimelike_ops = ['year','month','day','hour','minute','second', - 'weekofyear','week','dayofweek','weekday','dayofyear','quarter', 'qyear'] + 'weekofyear','week','dayofweek','weekday','dayofyear','quarter', 'qyear', 'freq'] _is_numeric_dtype = False + freq = None __eq__ = _period_index_cmp('__eq__') __ne__ = _period_index_cmp('__ne__', nat_result=True) @@ -839,17 +840,6 @@ def to_datetime(self, dayfirst=False): quarter = _field_accessor('quarter', 2, "The quarter of the date") qyear = _field_accessor('qyear', 1) - # Try to run function on index first, and then on elements of index - # Especially important for group-by functionality - def map(self, f): - try: - result = f(self) - if not isinstance(result, (np.ndarray, Index)): - raise TypeError - return result - except Exception: - return _algos.arrmap_object(self.asobject.values, f) - def _get_object_array(self): freq = self.freq return np.array([ Period._from_ordinal(ordinal=x, freq=freq) for x in self.values], copy=False) @@ -902,7 +892,7 @@ def to_timestamp(self, freq=None, how='start'): return DatetimeIndex(new_data, freq='infer', name=self.name) def _add_delta(self, other): - if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)): + if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) @@ -1129,9 +1119,6 @@ def __getitem__(self, key): return PeriodIndex(result, name=self.name, freq=self.freq) - def _format_with_header(self, header, **kwargs): - return header + self._format_native_types(**kwargs) - def _format_native_types(self, na_rep=u('NaT'), **kwargs): values = np.array(list(self), dtype=object) diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py new file mode 100644 index 0000000000000..01a89a1965549 --- /dev/null +++ b/pandas/tseries/tdi.py @@ -0,0 +1,979 @@ +""" implement the TimedeltaIndex """ + +import operator +import datetime +from datetime import timedelta +import numpy as np + +from pandas.core.common import (ABCSeries, _TD_DTYPE, _INT64_DTYPE, + is_timedelta64_dtype, _maybe_box, + _values_from_object, isnull) +from pandas.core.index import Index, Int64Index +import pandas.compat as compat +from pandas.compat import u +from pandas.core.base import PandasObject +from pandas.util.decorators import cache_readonly +from pandas.tseries.frequencies import to_offset +import pandas.core.common as com +from pandas.tseries import timedeltas +from pandas.tseries.base import DatetimeIndexOpsMixin +from pandas.tseries.timedeltas import to_timedelta, _coerce_scalar_to_timedelta_type +import pandas.tseries.offsets as offsets +from pandas.tseries.offsets import Tick, DateOffset + +import pandas.lib as lib +import pandas.tslib as tslib +import pandas.algos as _algos +import pandas.index as _index + +Timedelta = tslib.Timedelta + +_resolution_map = { + 'ns' : offsets.Nano, + 'us' : offsets.Micro, + 'ms' : offsets.Milli, + 's' : offsets.Second, + 'm' : offsets.Minute, + 'h' : offsets.Hour, + 'D' : offsets.Day, + } + +def _td_index_cmp(opname, nat_result=False): + """ + Wrap comparison operations to convert timedelta-like to timedelta64 + """ + def wrapper(self, other): + func = getattr(super(TimedeltaIndex, self), opname) + if _is_convertible_to_td(other): + other = _to_m8(other) + result = func(other) + if com.isnull(other): + result.fill(nat_result) + else: + if not com.is_list_like(other): + raise TypeError("cannot compare a TimedeltaIndex with type {0}".format(type(other))) + + other = TimedeltaIndex(other).values + result = func(other) + result = _values_from_object(result) + + if isinstance(other, Index): + o_mask = other.values.view('i8') == tslib.iNaT + else: + o_mask = other.view('i8') == tslib.iNaT + + if o_mask.any(): + result[o_mask] = nat_result + + mask = self.asi8 == tslib.iNaT + if mask.any(): + result[mask] = nat_result + + # support of bool dtype indexers + if com.is_bool_dtype(result): + return result + return Index(result) + + return wrapper + +class TimedeltaIndex(DatetimeIndexOpsMixin, Int64Index): + """ + Immutable ndarray of timedelta64 data, represented internally as int64, and + which can be boxed to timedelta objects + + Parameters + ---------- + data : array-like (1-dimensional), optional + Optional timedelta-like data to construct index with + unit: unit of the arg (D,h,m,s,ms,us,ns) denote the unit, optional + which is an integer/float number + freq: a frequency for the index, optional + copy : bool + Make a copy of input ndarray + start : starting value, timedelta-like, optional + If data is None, start is used as the start point in generating regular + timedelta data. + periods : int, optional, > 0 + Number of periods to generate, if generating index. Takes precedence + over end argument + end : end time, timedelta-like, optional + If periods is none, generated index will extend to first conforming + time on or just past end argument + closed : string or None, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None) + name : object + Name to be stored in the index + """ + + _typ = 'timedeltaindex' + _join_precedence = 10 + def _join_i8_wrapper(joinf, **kwargs): + return DatetimeIndexOpsMixin._join_i8_wrapper(joinf, dtype='m8[ns]', **kwargs) + + _inner_indexer = _join_i8_wrapper(_algos.inner_join_indexer_int64) + _outer_indexer = _join_i8_wrapper(_algos.outer_join_indexer_int64) + _left_indexer = _join_i8_wrapper(_algos.left_join_indexer_int64) + _left_indexer_unique = _join_i8_wrapper( + _algos.left_join_indexer_unique_int64, with_indexers=False) + _arrmap = None + _datetimelike_ops = ['days','hours','minutes','seconds','milliseconds','microseconds', + 'nanoseconds','freq','components'] + + __eq__ = _td_index_cmp('__eq__') + __ne__ = _td_index_cmp('__ne__', nat_result=True) + __lt__ = _td_index_cmp('__lt__') + __gt__ = _td_index_cmp('__gt__') + __le__ = _td_index_cmp('__le__') + __ge__ = _td_index_cmp('__ge__') + + _engine_type = _index.TimedeltaEngine + + _comparables = ['name','freq'] + _attributes = ['name','freq'] + _is_numeric_dtype = True + freq = None + + def __new__(cls, data=None, unit=None, + freq=None, start=None, end=None, periods=None, + copy=False, name=None, + closed=None, verify_integrity=True, **kwargs): + + if isinstance(data, TimedeltaIndex) and freq is None: + if copy: + data = data.copy() + return data + + freq_infer = False + if not isinstance(freq, DateOffset): + + # if a passed freq is None, don't infer automatically + if freq != 'infer': + freq = to_offset(freq) + else: + freq_infer = True + freq = None + + if periods is not None: + if com.is_float(periods): + periods = int(periods) + elif not com.is_integer(periods): + raise ValueError('Periods must be a number, got %s' % + str(periods)) + + if data is None and freq is None: + raise ValueError("Must provide freq argument if no data is " + "supplied") + + if data is None: + return cls._generate(start, end, periods, name, freq, + closed=closed) + + if unit is not None: + data = to_timedelta(data, unit=unit, box=False) + + if not isinstance(data, (np.ndarray, Index, ABCSeries)): + if np.isscalar(data): + raise ValueError('TimedeltaIndex() must be called with a ' + 'collection of some kind, %s was passed' + % repr(data)) + + # convert if not already + if getattr(data,'dtype',None) != _TD_DTYPE: + data = to_timedelta(data,unit=unit,box=False) + elif copy: + data = np.array(data,copy=True) + + # check that we are matching freqs + if verify_integrity and len(data) > 0: + if freq is not None and not freq_infer: + index = cls._simple_new(data, name=name) + inferred = index.inferred_freq + if inferred != freq.freqstr: + on_freq = cls._generate(index[0], None, len(index), name, freq) + if not np.array_equal(index.asi8, on_freq.asi8): + raise ValueError('Inferred frequency {0} from passed timedeltas does not ' + 'conform to passed frequency {1}'.format(inferred, freq.freqstr)) + index.freq = freq + return index + + if freq_infer: + index = cls._simple_new(data, name=name) + inferred = index.inferred_freq + if inferred: + index.freq = to_offset(inferred) + return index + + return cls._simple_new(data, name=name, freq=freq) + + @classmethod + def _generate(cls, start, end, periods, name, offset, closed=None): + if com._count_not_none(start, end, periods) != 2: + raise ValueError('Must specify two of start, end, or periods') + + if start is not None: + start = Timedelta(start) + + if end is not None: + end = Timedelta(end) + + left_closed = False + right_closed = False + + if start is None and end is None: + if closed is not None: + raise ValueError("Closed has to be None if not both of start" + "and end are defined") + + if closed is None: + left_closed = True + right_closed = True + elif closed == "left": + left_closed = True + elif closed == "right": + right_closed = True + else: + raise ValueError("Closed has to be either 'left', 'right' or None") + + index = _generate_regular_range(start, end, periods, offset) + index = cls._simple_new(index, name=name, freq=offset) + + if not left_closed: + index = index[1:] + if not right_closed: + index = index[:-1] + + return index + + @property + def _box_func(self): + return lambda x: Timedelta(x,unit='ns') + + @classmethod + def _simple_new(cls, values, name=None, freq=None, **kwargs): + if not getattr(values,'dtype',None): + values = np.array(values,copy=False) + if values.dtype == np.object_: + values = tslib.array_to_timedelta64(values) + if values.dtype != _TD_DTYPE: + values = com._ensure_int64(values).view(_TD_DTYPE) + + result = object.__new__(cls) + result._data = values + result.name = name + result.freq = freq + result._reset_identity() + return result + + _na_value = tslib.NaT + """The expected NA value to use with this index.""" + + @property + def _formatter_func(self): + from pandas.core.format import _get_format_timedelta64 + return _get_format_timedelta64(self, box=True) + + def _format_footer(self): + tagline = 'Length: %d, Freq: %s' + return tagline % (len(self), self.freqstr) + + def __setstate__(self, state): + """Necessary for making this object picklable""" + if isinstance(state, dict): + super(TimedeltaIndex, self).__setstate__(state) + else: + raise Exception("invalid pickle state") + _unpickle_compat = __setstate__ + + def _add_delta(self, delta): + if isinstance(delta, (Tick, timedelta, np.timedelta64)): + new_values = self._add_delta_td(delta) + elif isinstance(delta, TimedeltaIndex): + new_values = self._add_delta_tdi(delta) + else: + raise ValueError("cannot add the type {0} to a TimedeltaIndex".format(type(delta))) + + result = TimedeltaIndex(new_values, freq='infer') + return result + + def _evaluate_with_timedelta_like(self, other, op, opstr): + + # allow division by a timedelta + if opstr in ['__div__','__truediv__']: + if _is_convertible_to_td(other): + other = Timedelta(other) + if isnull(other): + raise NotImplementedError("division by pd.NaT not implemented") + + i8 = self.asi8 + result = i8/float(other.value) + if self.hasnans: + mask = i8 == tslib.iNaT + result = result.astype('float64') + result[mask] = np.nan + return Index(result,name=self.name,copy=False) + + raise TypeError("can only perform ops with timedelta like values") + + def _add_datelike(self, other): + + # adding a timedeltaindex to a datetimelike + from pandas import Timestamp, DatetimeIndex + other = Timestamp(other) + i8 = self.asi8 + result = i8 + other.value + if self.hasnans: + mask = i8 == tslib.iNaT + result[mask] = tslib.iNaT + return DatetimeIndex(result,name=self.name,copy=False) + + def _format_native_types(self, na_rep=u('NaT'), + date_format=None, **kwargs): + from pandas.core.format import Timedelta64Formatter + return Timedelta64Formatter(values=self, + nat_rep=na_rep, + justify='all').get_result() + + def _get_field(self, m): + + values = self.asi8 + hasnans = self.hasnans + if hasnans: + result = np.empty(len(self), dtype='float64') + mask = values == tslib.iNaT + imask = ~mask + result.flat[imask] = np.array([ getattr(Timedelta(val),m) for val in values[imask] ]) + result[mask] = np.nan + else: + result = np.array([ getattr(Timedelta(val),m) for val in values ],dtype='int64') + return result + + @property + def days(self): + """ The number of integer days for each element """ + return self._get_field('days') + + @property + def hours(self): + """ The number of integer hours for each element """ + return self._get_field('hours') + + @property + def minutes(self): + """ The number of integer minutes for each element """ + return self._get_field('minutes') + + @property + def seconds(self): + """ The number of integer seconds for each element """ + return self._get_field('seconds') + + @property + def milliseconds(self): + """ The number of integer milliseconds for each element """ + return self._get_field('milliseconds') + + @property + def microseconds(self): + """ The number of integer microseconds for each element """ + return self._get_field('microseconds') + + @property + def nanoseconds(self): + """ The number of integer nanoseconds for each element """ + return self._get_field('nanoseconds') + + @property + def components(self): + """ + Return a dataframe of the components of the Timedeltas + + Returns + ------- + a DataFrame + """ + from pandas import DataFrame + + columns = ['days','hours','minutes','seconds','milliseconds','microseconds','nanoseconds'] + hasnans = self.hasnans + if hasnans: + def f(x): + if isnull(x): + return [np.nan]*len(columns) + return x.components + else: + def f(x): + return x.components + + result = DataFrame([ f(x) for x in self ]) + result.columns = columns + if not hasnans: + result = result.astype('int64') + return result + + def summary(self, name=None): + formatter = self._formatter_func + if len(self) > 0: + index_summary = ', %s to %s' % (formatter(self[0]), + formatter(self[-1])) + else: + index_summary = '' + + if name is None: + name = type(self).__name__ + result = '%s: %s entries%s' % (com.pprint_thing(name), + len(self), index_summary) + if self.freq: + result += '\nFreq: %s' % self.freqstr + + return result + + def to_pytimedelta(self): + """ + Return TimedeltaIndex as object ndarray of datetime.timedelta objects + + Returns + ------- + datetimes : ndarray + """ + return tslib.ints_to_pytimedelta(self.asi8) + + def astype(self, dtype): + dtype = np.dtype(dtype) + + if dtype == np.object_: + return self.asobject + elif dtype == _INT64_DTYPE: + return self.asi8.copy() + elif dtype == _TD_DTYPE: + return self + elif dtype.kind == 'm': + + # return an index (essentially this is division) + result = self.values.astype(dtype) + if self.hasnans: + result = result.astype('float64') + result[self.asi8 == tslib.iNaT] = np.nan + return Index(result,name=self.name) + + return Index(result.astype('i8'),name=self.name) + + else: # pragma: no cover + raise ValueError('Cannot cast TimedeltaIndex to dtype %s' % dtype) + + def union(self, other): + """ + Specialized union for TimedeltaIndex objects. If combine + overlapping ranges with the same DateOffset, will be much + faster than Index.union + + Parameters + ---------- + other : TimedeltaIndex or array-like + + Returns + ------- + y : Index or TimedeltaIndex + """ + if _is_convertible_to_index(other): + try: + other = TimedeltaIndex(other) + except TypeError: + pass + + this, other = self, other + + if this._can_fast_union(other): + return this._fast_union(other) + else: + result = Index.union(this, other) + if isinstance(result, TimedeltaIndex): + if result.freq is None: + result.freq = to_offset(result.inferred_freq) + return result + + def append(self, other): + """ + Append a collection of Index options together + + Parameters + ---------- + other : Index or list/tuple of indices + + Returns + ------- + appended : Index + """ + name = self.name + to_concat = [self] + + if isinstance(other, (list, tuple)): + to_concat = to_concat + list(other) + else: + to_concat.append(other) + + for obj in to_concat: + if isinstance(obj, Index) and obj.name != name: + name = None + break + + to_concat = self._ensure_compat_concat(to_concat) + return Index(com._concat_compat(to_concat), name=name) + + def join(self, other, how='left', level=None, return_indexers=False): + """ + See Index.join + """ + if _is_convertible_to_index(other): + try: + other = TimedeltaIndex(other) + except (TypeError, ValueError): + pass + + return Index.join(self, other, how=how, level=level, + return_indexers=return_indexers) + + def _wrap_joined_index(self, joined, other): + name = self.name if self.name == other.name else None + if (isinstance(other, TimedeltaIndex) and self.freq == other.freq + and self._can_fast_union(other)): + joined = self._shallow_copy(joined) + joined.name = name + return joined + else: + return self._simple_new(joined, name) + + def _can_fast_union(self, other): + if not isinstance(other, TimedeltaIndex): + return False + + freq = self.freq + + if freq is None or freq != other.freq: + return False + + if not self.is_monotonic or not other.is_monotonic: + return False + + if len(self) == 0 or len(other) == 0: + return True + + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + else: + left, right = other, self + + right_start = right[0] + left_end = left[-1] + + # Only need to "adjoin", not overlap + return (right_start == left_end + freq) or right_start in left + + def _fast_union(self, other): + if len(other) == 0: + return self.view(type(self)) + + if len(self) == 0: + return other.view(type(self)) + + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + else: + left, right = other, self + + left_start, left_end = left[0], left[-1] + right_end = right[-1] + + # concatenate + if left_end < right_end: + loc = right.searchsorted(left_end, side='right') + right_chunk = right.values[loc:] + dates = com._concat_compat((left.values, right_chunk)) + return self._shallow_copy(dates) + else: + return left + + def __array_finalize__(self, obj): + if self.ndim == 0: # pragma: no cover + return self.item() + + self.name = getattr(obj, 'name', None) + self.freq = getattr(obj, 'freq', None) + self._reset_identity() + + def _wrap_union_result(self, other, result): + name = self.name if self.name == other.name else None + return self._simple_new(result, name=name, freq=None) + + def intersection(self, other): + """ + Specialized intersection for TimedeltaIndex objects. May be much faster + than Index.intersection + + Parameters + ---------- + other : TimedeltaIndex or array-like + + Returns + ------- + y : Index or TimedeltaIndex + """ + if not isinstance(other, TimedeltaIndex): + try: + other = TimedeltaIndex(other) + except (TypeError, ValueError): + pass + result = Index.intersection(self, other) + return result + + if len(self) == 0: + return self + if len(other) == 0: + return other + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + else: + left, right = other, self + + end = min(left[-1], right[-1]) + start = right[0] + + if end < start: + return type(self)(data=[]) + else: + lslice = slice(*left.slice_locs(start, end)) + left_chunk = left.values[lslice] + return self._shallow_copy(left_chunk) + + def _possibly_promote(self, other): + if other.inferred_type == 'timedelta': + other = TimedeltaIndex(other) + return self, other + + def get_value(self, series, key): + """ + Fast lookup of value from 1-dimensional ndarray. Only use this if you + know what you're doing + """ + + if _is_convertible_to_td(key): + key = Timedelta(key) + return self.get_value_maybe_box(series, key) + + try: + return _maybe_box(self, Index.get_value(self, series, key), series, key) + except KeyError: + try: + loc = self._get_string_slice(key) + return series[loc] + except (TypeError, ValueError, KeyError): + pass + + try: + return self.get_value_maybe_box(series, key) + except (TypeError, ValueError, KeyError): + raise KeyError(key) + + def get_value_maybe_box(self, series, key): + if not isinstance(key, Timedelta): + key = Timedelta(key) + values = self._engine.get_value(_values_from_object(series), key) + return _maybe_box(self, values, series, key) + + def get_loc(self, key): + """ + Get integer location for requested label + + Returns + ------- + loc : int + """ + if _is_convertible_to_td(key): + key = Timedelta(key) + return self._engine.get_loc(key) + + try: + return Index.get_loc(self, key) + except (KeyError, ValueError): + try: + return self._get_string_slice(key) + except (TypeError, KeyError, ValueError): + pass + + try: + stamp = Timedelta(key) + return self._engine.get_loc(stamp) + except (KeyError, ValueError): + raise KeyError(key) + + def _get_string_slice(self, key, use_lhs=True, use_rhs=True): + freq = getattr(self, 'freqstr', + getattr(self, 'inferred_freq', None)) + + loc = self._partial_td_slice(key, freq, use_lhs=use_lhs, + use_rhs=use_rhs) + return loc + + def _partial_td_slice(self, key, freq, use_lhs=True, use_rhs=True): + + # given a key, try to figure out a location for a partial slice + if not isinstance(key, compat.string_types): + return key + + parsed = _coerce_scalar_to_timedelta_type(key, box=True) + + is_monotonic = self.is_monotonic + + # figure out the resolution of the passed td + # and round to it + reso = parsed.resolution + t1 = parsed.round(reso) + t2 = t1 + _resolution_map[reso]() - Timedelta(1,'ns') + + stamps = self.asi8 + + if is_monotonic: + + # we are out of range + if len(stamps) and ( + (use_lhs and t1.value < stamps[0] and t2.value < stamps[0]) or ( + (use_rhs and t1.value > stamps[-1] and t2.value > stamps[-1]))): + raise KeyError + + # a monotonic (sorted) series can be sliced + left = stamps.searchsorted(t1.value, side='left') if use_lhs else None + right = stamps.searchsorted(t2.value, side='right') if use_rhs else None + + return slice(left, right) + + lhs_mask = (stamps >= t1.value) if use_lhs else True + rhs_mask = (stamps <= t2.value) if use_rhs else True + + # try to find a the dates + return (lhs_mask & rhs_mask).nonzero()[0] + + def __getitem__(self, key): + getitem = self._data.__getitem__ + if np.isscalar(key): + val = getitem(key) + return Timedelta(val) + else: + if com._is_bool_indexer(key): + key = np.asarray(key) + if key.all(): + key = slice(0,None,None) + else: + key = lib.maybe_booleans_to_slice(key.view(np.uint8)) + + result = getitem(key) + if result.ndim > 1: + return result + + return self._simple_new(result, self.name) + + @property + def freqstr(self): + """ return the frequency object as a string if its set, otherwise None """ + if self.freq is None: + return None + return self.freq + + def searchsorted(self, key, side='left'): + if isinstance(key, (np.ndarray, Index)): + key = np.array(key, dtype=_TD_DTYPE, copy=False) + else: + key = _to_m8(key) + + return self.values.searchsorted(key, side=side) + + def is_type_compatible(self, typ): + return typ == self.inferred_type or typ == 'timedelta' + + @property + def inferred_type(self): + return 'timedelta64' + + @property + def dtype(self): + return _TD_DTYPE + + @property + def is_all_dates(self): + return True + + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if self.is_(other): + return True + + if (not hasattr(other, 'inferred_type') or + other.inferred_type != 'timedelta64'): + try: + other = TimedeltaIndex(other) + except: + return False + + return np.array_equal(self.asi8, other.asi8) + + def insert(self, loc, item): + """ + Make new Index inserting new item at location + + Parameters + ---------- + loc : int + item : object + if not either a Python datetime or a numpy integer-like, returned + Index dtype will be object rather than datetime. + + Returns + ------- + new_index : Index + """ + + # try to convert if possible + if _is_convertible_to_td(item): + try: + item = Timedelta(item) + except: + pass + + freq = None + if isinstance(item, Timedelta): + + # check freq can be preserved on edge cases + if self.freq is not None: + if (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: + freq = self.freq + elif (loc == len(self)) and item - self.freq == self[-1]: + freq = self.freq + item = _to_m8(item) + + try: + new_tds = np.concatenate((self[:loc].asi8, [item.view(np.int64)], + self[loc:].asi8)) + return TimedeltaIndex(new_tds, name=self.name, freq=freq) + + except (AttributeError, TypeError): + + # fall back to object index + if isinstance(item,compat.string_types): + return self.asobject.insert(loc, item) + raise TypeError("cannot insert TimedeltaIndex with incompatible label") + + def delete(self, loc): + """ + Make a new DatetimeIndex with passed location(s) deleted. + + Parameters + ---------- + loc: int, slice or array of ints + Indicate which sub-arrays to remove. + + Returns + ------- + new_index : TimedeltaIndex + """ + new_tds = np.delete(self.asi8, loc) + + freq = 'infer' + if lib.is_integer(loc): + if loc in (0, -len(self), -1, len(self) - 1): + freq = self.freq + else: + if com.is_list_like(loc): + loc = lib.maybe_indices_to_slice(com._ensure_int64(np.array(loc))) + if isinstance(loc, slice) and loc.step in (1, None): + if (loc.start in (0, None) or loc.stop in (len(self), None)): + freq = self.freq + + return TimedeltaIndex(new_tds, name=self.name, freq=freq) + +TimedeltaIndex._add_numeric_methods() + +def _is_convertible_to_index(other): + """ return a boolean whether I can attempt conversion to a TimedeltaIndex """ + if isinstance(other, TimedeltaIndex): + return True + elif (len(other) > 0 and + other.inferred_type not in ('floating', 'mixed-integer','integer', + 'mixed-integer-float', 'mixed')): + return True + return False + + +def _is_convertible_to_td(key): + return isinstance(key, (DateOffset, timedelta, Timedelta, np.timedelta64, compat.string_types)) + +def _to_m8(key): + ''' + Timedelta-like => dt64 + ''' + if not isinstance(key, Timedelta): + # this also converts strings + key = Timedelta(key) + + # return an type that can be compared + return np.int64(key.value).view(_TD_DTYPE) + +def _generate_regular_range(start, end, periods, offset): + stride = offset.nanos + if periods is None: + b = Timedelta(start).value + e = Timedelta(end).value + e += stride - e % stride + elif start is not None: + b = Timedelta(start).value + e = b + periods * stride + elif end is not None: + e = Timedelta(end).value + stride + b = e - periods * stride + else: + raise NotImplementedError + + data = np.arange(b, e, stride, dtype=np.int64) + data = TimedeltaIndex._simple_new(data, None) + + return data + + +def timedelta_range(start=None, end=None, periods=None, freq='D', + name=None, closed=None): + """ + Return a fixed frequency timedelta index, with day as the default + frequency + + Parameters + ---------- + start : string or timedelta-like, default None + Left bound for generating dates + end : string or datetime-like, default None + Right bound for generating dates + periods : integer or None, default None + If None, must specify start and end + freq : string or DateOffset, default 'D' (calendar daily) + Frequency strings can have multiples, e.g. '5H' + name : str, default None + Name of the resulting index + closed : string or None, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None) + + Notes + ----- + 2 of start, end, or periods must be specified + + Returns + ------- + rng : TimedeltaIndex + """ + return TimedeltaIndex(start=start, end=end, periods=periods, + freq=freq, name=name, + closed=closed) + + diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py new file mode 100644 index 0000000000000..58b126a7efab8 --- /dev/null +++ b/pandas/tseries/tests/test_base.py @@ -0,0 +1,973 @@ +from __future__ import print_function +import re +from datetime import datetime, timedelta +import numpy as np +import pandas as pd +from pandas.tseries.base import DatetimeIndexOpsMixin +from pandas.util.testing import assertRaisesRegexp, assert_isinstance +from pandas.tseries.common import is_datetimelike +from pandas import (Series, Index, Int64Index, Timestamp, DatetimeIndex, PeriodIndex, + TimedeltaIndex, Timedelta, timedelta_range, date_range, Float64Index) +import pandas.tslib as tslib +import nose + +import pandas.util.testing as tm + +from pandas.tests.test_base import Ops + +class TestDatetimeIndexOps(Ops): + tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/Asia/Singapore', 'dateutil/US/Pacific'] + + def setUp(self): + super(TestDatetimeIndexOps, self).setUp() + mask = lambda x: isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex) + self.is_valid_objs = [ o for o in self.objs if mask(o) ] + self.not_valid_objs = [ o for o in self.objs if not mask(o) ] + + def test_ops_properties(self): + self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter']) + self.check_ops_properties(['date','time','microsecond','nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start', + 'is_quarter_end', 'is_year_start', 'is_year_end'], lambda x: isinstance(x,DatetimeIndex)) + + def test_ops_properties_basic(self): + + # sanity check that the behavior didn't change + # GH7206 + for op in ['year','day','second','weekday']: + self.assertRaises(TypeError, lambda x: getattr(self.dt_series,op)) + + # attribute access should still work! + s = Series(dict(year=2000,month=1,day=10)) + self.assertEquals(s.year,2000) + self.assertEquals(s.month,1) + self.assertEquals(s.day,10) + self.assertRaises(AttributeError, lambda : s.weekday) + + def test_asobject_tolist(self): + idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx') + expected_list = [pd.Timestamp('2013-01-31'), pd.Timestamp('2013-02-28'), + pd.Timestamp('2013-03-31'), pd.Timestamp('2013-04-30')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + + self.assertEqual(result.dtype, object) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx', tz='Asia/Tokyo') + expected_list = [pd.Timestamp('2013-01-31', tz='Asia/Tokyo'), + pd.Timestamp('2013-02-28', tz='Asia/Tokyo'), + pd.Timestamp('2013-03-31', tz='Asia/Tokyo'), + pd.Timestamp('2013-04-30', tz='Asia/Tokyo')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), + pd.NaT, datetime(2013, 1, 4)], name='idx') + expected_list = [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'), + pd.NaT, pd.Timestamp('2013-01-04')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + def test_minmax(self): + for tz in self.tz: + # monotonic + idx1 = pd.DatetimeIndex([pd.NaT, '2011-01-01', '2011-01-02', + '2011-01-03'], tz=tz) + self.assertTrue(idx1.is_monotonic) + + # non-monotonic + idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03', + '2011-01-02', pd.NaT], tz=tz) + self.assertFalse(idx2.is_monotonic) + + for idx in [idx1, idx2]: + self.assertEqual(idx.min(), pd.Timestamp('2011-01-01', tz=tz)) + self.assertEqual(idx.max(), pd.Timestamp('2011-01-03', tz=tz)) + + for op in ['min', 'max']: + # Return NaT + obj = DatetimeIndex([]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = DatetimeIndex([pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + def test_representation(self): + idx1 = DatetimeIndex([], freq='D') + idx2 = DatetimeIndex(['2011-01-01'], freq='D') + idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') + idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], + freq='H', tz='Asia/Tokyo') + idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], + tz='US/Eastern') + + exp1 = """ +Length: 0, Freq: D, Timezone: None""" + exp2 = """ +[2011-01-01] +Length: 1, Freq: D, Timezone: None""" + exp3 = """ +[2011-01-01, 2011-01-02] +Length: 2, Freq: D, Timezone: None""" + exp4 = """ +[2011-01-01, ..., 2011-01-03] +Length: 3, Freq: D, Timezone: None""" + exp5 = """ +[2011-01-01 09:00:00+09:00, ..., 2011-01-01 11:00:00+09:00] +Length: 3, Freq: H, Timezone: Asia/Tokyo""" + exp6 = """ +[2011-01-01 09:00:00-05:00, ..., NaT] +Length: 3, Freq: None, Timezone: US/Eastern""" + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], + [exp1, exp2, exp3, exp4, exp5, exp6]): + for func in ['__repr__', '__unicode__', '__str__']: + result = getattr(idx, func)() + self.assertEqual(result, expected) + + def test_resolution(self): + for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], + ['day', 'day', 'day', 'day', + 'hour', 'minute', 'second', 'millisecond', 'microsecond']): + for tz in [None, 'Asia/Tokyo', 'US/Eastern']: + idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) + self.assertEqual(idx.resolution, expected) + + def test_add_iadd(self): + for tz in self.tz: + # union + rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) + expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz) + + rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) + expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz) + + rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), + (rng3, other3, expected3)]: + result_add = rng + other + result_union = rng.union(other) + + tm.assert_index_equal(result_add, expected) + tm.assert_index_equal(result_union, expected) + rng += other + tm.assert_index_equal(rng, expected) + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + result = rng + delta + expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) + tm.assert_index_equal(result, expected) + rng += delta + tm.assert_index_equal(rng, expected) + + # int + rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, tz=tz) + result = rng + 1 + expected = pd.date_range('2000-01-01 10:00', freq='H', periods=10, tz=tz) + tm.assert_index_equal(result, expected) + rng += 1 + tm.assert_index_equal(rng, expected) + + def test_sub_isub(self): + for tz in self.tz: + # diff + rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) + expected1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) + expected2 = pd.date_range('1/1/2000', freq='D', periods=3, tz=tz) + + rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), + (rng3, other3, expected3)]: + result_add = rng - other + result_union = rng.diff(other) + + tm.assert_index_equal(result_add, expected) + tm.assert_index_equal(result_union, expected) + rng -= other + tm.assert_index_equal(rng, expected) + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), + Timedelta(hours=2)] + + for delta in offsets: + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + result = rng - delta + expected = pd.date_range('1999-12-31 22:00', '2000-01-31 22:00', tz=tz) + tm.assert_index_equal(result, expected) + rng -= delta + tm.assert_index_equal(rng, expected) + + # int + rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, tz=tz) + result = rng - 1 + expected = pd.date_range('2000-01-01 08:00', freq='H', periods=10, tz=tz) + tm.assert_index_equal(result, expected) + rng -= 1 + tm.assert_index_equal(rng, expected) + + def test_value_counts_unique(self): + # GH 7735 + for tz in [None, 'UTC', 'Asia/Tokyo', 'US/Eastern']: + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) + + exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, tz=tz) + expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') + tm.assert_series_equal(idx.value_counts(), expected) + + expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, tz=tz) + tm.assert_index_equal(idx.unique(), expected) + + idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00', + '2013-01-01 08:00', '2013-01-01 08:00', pd.NaT], tz=tz) + + exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], tz=tz) + expected = Series([3, 2], index=exp_idx) + tm.assert_series_equal(idx.value_counts(), expected) + + exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', pd.NaT], tz=tz) + expected = Series([3, 2, 1], index=exp_idx) + tm.assert_series_equal(idx.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) + + +class TestTimedeltaIndexOps(Ops): + + def setUp(self): + super(TestTimedeltaIndexOps, self).setUp() + mask = lambda x: isinstance(x, TimedeltaIndex) + self.is_valid_objs = [ o for o in self.objs if mask(o) ] + self.not_valid_objs = [ ] + + def test_ops_properties(self): + self.check_ops_properties(['days','hours','minutes','seconds','milliseconds']) + self.check_ops_properties(['microseconds','nanoseconds']) + + def test_asobject_tolist(self): + idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') + expected_list = [Timedelta('1 days'),Timedelta('2 days'),Timedelta('3 days'), + Timedelta('4 days')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + + self.assertEqual(result.dtype, object) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = TimedeltaIndex([timedelta(days=1),timedelta(days=2),pd.NaT, + timedelta(days=4)], name='idx') + expected_list = [Timedelta('1 days'),Timedelta('2 days'),pd.NaT, + Timedelta('4 days')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + def test_minmax(self): + + # monotonic + idx1 = TimedeltaIndex(['nat', '1 days', '2 days', '3 days']) + self.assertTrue(idx1.is_monotonic) + + # non-monotonic + idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT']) + self.assertFalse(idx2.is_monotonic) + + for idx in [idx1, idx2]: + self.assertEqual(idx.min(), Timedelta('1 days')), + self.assertEqual(idx.max(), Timedelta('3 days')), + + for op in ['min', 'max']: + # Return NaT + obj = TimedeltaIndex([]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = TimedeltaIndex([pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + def test_representation(self): + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + + exp1 = """ +Length: 0, Freq: """ + exp2 = """ +['1 days'] +Length: 1, Freq: """ + exp3 = """ +['1 days', '2 days'] +Length: 2, Freq: """ + exp4 = """ +['1 days', ..., '3 days'] +Length: 3, Freq: """ + exp5 = """ +['1 days 00:00:01', ..., '3 days 00:00:00'] +Length: 3, Freq: None""" + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + for func in ['__repr__', '__unicode__', '__str__']: + result = getattr(idx, func)() + self.assertEqual(result, expected) + + def test_add_iadd(self): + + # only test adding/sub offsets as + is now numeric + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = timedelta_range('1 days','10 days') + result = rng + delta + expected = timedelta_range('1 days 02:00:00','10 days 02:00:00',freq='D') + tm.assert_index_equal(result, expected) + rng += delta + tm.assert_index_equal(rng, expected) + + # int + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + result = rng + 1 + expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng += 1 + tm.assert_index_equal(rng, expected) + + def test_sub_isub(self): + + # only test adding/sub offsets as - is now numeric + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), + Timedelta(hours=2)] + + for delta in offsets: + rng = timedelta_range('1 days','10 days') + result = rng - delta + expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') + tm.assert_index_equal(result, expected) + rng -= delta + tm.assert_index_equal(rng, expected) + + # int + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + result = rng - 1 + expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng -= 1 + tm.assert_index_equal(rng, expected) + + def test_ops_compat(self): + + offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), + Timedelta(hours=2)] + + rng = timedelta_range('1 days','10 days',name='foo') + + # multiply + for offset in offsets: + self.assertRaises(TypeError, lambda : rng * offset) + + # divide + expected = Int64Index((np.arange(10)+1)*12,name='foo') + for offset in offsets: + result = rng / offset + tm.assert_index_equal(result,expected) + + # divide with nats + rng = TimedeltaIndex(['1 days',pd.NaT,'2 days'],name='foo') + expected = Float64Index([12,np.nan,24]) + for offset in offsets: + result = rng / offset + tm.assert_index_equal(result,expected) + + # don't allow division by NaT (make could in the future) + self.assertRaises(TypeError, lambda : rng / pd.NaT) + + def test_subtraction_ops(self): + + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days'],name='foo') + dti = date_range('20130101',periods=3) + td = Timedelta('1 days') + dt = Timestamp('20130101') + + self.assertRaises(TypeError, lambda : tdi - dt) + self.assertRaises(TypeError, lambda : tdi - dti) + + result = dt-dti + expected = TimedeltaIndex(['0 days','-1 days','-2 days']) + tm.assert_index_equal(result,expected) + + result = dti-dt + expected = TimedeltaIndex(['0 days','1 days','2 days']) + tm.assert_index_equal(result,expected) + + result = tdi-td + expected = TimedeltaIndex(['0 days',pd.NaT,'1 days']) + tm.assert_index_equal(result,expected) + + result = td-tdi + expected = TimedeltaIndex(['0 days',pd.NaT,'-1 days']) + tm.assert_index_equal(result,expected) + + result = dti-td + expected = DatetimeIndex(['20121231','20130101','20130102']) + tm.assert_index_equal(result,expected) + + result = dt-tdi + expected = DatetimeIndex(['20121231',pd.NaT,'20121230']) + tm.assert_index_equal(result,expected) + + def test_dti_tdi_numeric_ops(self): + + # These are normally union/diff set-like ops + tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days'],name='foo') + dti = date_range('20130101',periods=3) + td = Timedelta('1 days') + dt = Timestamp('20130101') + + result = tdi-tdi + expected = TimedeltaIndex(['0 days',pd.NaT,'0 days']) + tm.assert_index_equal(result,expected) + + result = tdi+tdi + expected = TimedeltaIndex(['2 days',pd.NaT,'4 days']) + tm.assert_index_equal(result,expected) + + result = dti-tdi + expected = DatetimeIndex(['20121231',pd.NaT,'20130101']) + tm.assert_index_equal(result,expected) + + def test_addition_ops(self): + + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(['1 days',pd.NaT,'2 days'],name='foo') + dti = date_range('20130101',periods=3) + td = Timedelta('1 days') + dt = Timestamp('20130101') + + result = tdi + dt + expected = DatetimeIndex(['20130102',pd.NaT,'20130103']) + tm.assert_index_equal(result,expected) + + result = dt + tdi + expected = DatetimeIndex(['20130102',pd.NaT,'20130103']) + tm.assert_index_equal(result,expected) + + result = td + tdi + expected = TimedeltaIndex(['2 days',pd.NaT,'3 days']) + tm.assert_index_equal(result,expected) + + result = tdi + td + expected = TimedeltaIndex(['2 days',pd.NaT,'3 days']) + tm.assert_index_equal(result,expected) + + # unequal length + self.assertRaises(ValueError, lambda : tdi + dti[0:1]) + self.assertRaises(ValueError, lambda : tdi[0:1] + dti) + + # random indexes + self.assertRaises(TypeError, lambda : tdi + Int64Index([1,2,3])) + + # this is a union! + #self.assertRaises(TypeError, lambda : Int64Index([1,2,3]) + tdi) + + result = tdi + dti + expected = DatetimeIndex(['20130102',pd.NaT,'20130105']) + tm.assert_index_equal(result,expected) + + result = dti + tdi + expected = DatetimeIndex(['20130102',pd.NaT,'20130105']) + tm.assert_index_equal(result,expected) + + result = dt + td + expected = Timestamp('20130102') + self.assertEqual(result,expected) + + result = td + dt + expected = Timestamp('20130102') + self.assertEqual(result,expected) + + def test_value_counts_unique(self): + # GH 7735 + + idx = timedelta_range('1 days 09:00:00', freq='H', periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) + + exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10) + expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') + tm.assert_series_equal(idx.value_counts(), expected) + + expected = timedelta_range('1 days 09:00:00', freq='H', periods=10) + tm.assert_index_equal(idx.unique(), expected) + + idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00', '1 days 09:00:00', + '1 days 08:00:00', '1 days 08:00:00', pd.NaT]) + + exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00']) + expected = Series([3, 2], index=exp_idx) + tm.assert_series_equal(idx.value_counts(), expected) + + exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00', pd.NaT]) + expected = Series([3, 2, 1], index=exp_idx) + tm.assert_series_equal(idx.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) + +class TestPeriodIndexOps(Ops): + + def setUp(self): + super(TestPeriodIndexOps, self).setUp() + mask = lambda x: isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex) + self.is_valid_objs = [ o for o in self.objs if mask(o) ] + self.not_valid_objs = [ o for o in self.objs if not mask(o) ] + + def test_ops_properties(self): + self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter']) + self.check_ops_properties(['qyear'], lambda x: isinstance(x,PeriodIndex)) + + def test_asobject_tolist(self): + idx = pd.period_range(start='2013-01-01', periods=4, freq='M', name='idx') + expected_list = [pd.Period('2013-01-31', freq='M'), pd.Period('2013-02-28', freq='M'), + pd.Period('2013-03-31', freq='M'), pd.Period('2013-04-30', freq='M')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT', '2013-01-04'], freq='D', name='idx') + expected_list = [pd.Period('2013-01-01', freq='D'), pd.Period('2013-01-02', freq='D'), + pd.Period('NaT', freq='D'), pd.Period('2013-01-04', freq='D')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + for i in [0, 1, 3]: + self.assertTrue(result[i], expected[i]) + self.assertTrue(result[2].ordinal, pd.tslib.iNaT) + self.assertTrue(result[2].freq, 'D') + self.assertEqual(result.name, expected.name) + + result_list = idx.tolist() + for i in [0, 1, 3]: + self.assertTrue(result_list[i], expected_list[i]) + self.assertTrue(result_list[2].ordinal, pd.tslib.iNaT) + self.assertTrue(result_list[2].freq, 'D') + + def test_minmax(self): + + # monotonic + idx1 = pd.PeriodIndex([pd.NaT, '2011-01-01', '2011-01-02', + '2011-01-03'], freq='D') + self.assertTrue(idx1.is_monotonic) + + # non-monotonic + idx2 = pd.PeriodIndex(['2011-01-01', pd.NaT, '2011-01-03', + '2011-01-02', pd.NaT], freq='D') + self.assertFalse(idx2.is_monotonic) + + for idx in [idx1, idx2]: + self.assertEqual(idx.min(), pd.Period('2011-01-01', freq='D')) + self.assertEqual(idx.max(), pd.Period('2011-01-03', freq='D')) + + for op in ['min', 'max']: + # Return NaT + obj = PeriodIndex([], freq='M') + result = getattr(obj, op)() + self.assertEqual(result.ordinal, tslib.iNaT) + self.assertEqual(result.freq, 'M') + + obj = PeriodIndex([pd.NaT], freq='M') + result = getattr(obj, op)() + self.assertEqual(result.ordinal, tslib.iNaT) + self.assertEqual(result.freq, 'M') + + obj = PeriodIndex([pd.NaT, pd.NaT, pd.NaT], freq='M') + result = getattr(obj, op)() + self.assertEqual(result.ordinal, tslib.iNaT) + self.assertEqual(result.freq, 'M') + + def test_representation(self): + # GH 7601 + idx1 = PeriodIndex([], freq='D') + idx2 = PeriodIndex(['2011-01-01'], freq='D') + idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') + idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') + idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], freq='H') + + idx7 = pd.period_range('2013Q1', periods=1, freq="Q") + idx8 = pd.period_range('2013Q1', periods=2, freq="Q") + idx9 = pd.period_range('2013Q1', periods=3, freq="Q") + + exp1 = """ +Length: 0, Freq: D""" + exp2 = """ +[2011-01-01] +Length: 1, Freq: D""" + exp3 = """ +[2011-01-01, 2011-01-02] +Length: 2, Freq: D""" + exp4 = """ +[2011-01-01, ..., 2011-01-03] +Length: 3, Freq: D""" + exp5 = """ +[2011, ..., 2013] +Length: 3, Freq: A-DEC""" + exp6 = """ +[2011-01-01 09:00, ..., NaT] +Length: 3, Freq: H""" + exp7 = """ +[2013Q1] +Length: 1, Freq: Q-DEC""" + exp8 = """ +[2013Q1, 2013Q2] +Length: 2, Freq: Q-DEC""" + exp9 = """ +[2013Q1, ..., 2013Q3] +Length: 3, Freq: Q-DEC""" + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9], + [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]): + for func in ['__repr__', '__unicode__', '__str__']: + result = getattr(idx, func)() + self.assertEqual(result, expected) + + def test_resolution(self): + for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], + ['day', 'day', 'day', 'day', + 'hour', 'minute', 'second', 'millisecond', 'microsecond']): + + idx = pd.period_range(start='2013-04-01', periods=30, freq=freq) + self.assertEqual(idx.resolution, expected) + + def test_add_iadd(self): + # union + rng1 = pd.period_range('1/1/2000', freq='D', periods=5) + other1 = pd.period_range('1/6/2000', freq='D', periods=5) + expected1 = pd.period_range('1/1/2000', freq='D', periods=10) + + rng2 = pd.period_range('1/1/2000', freq='D', periods=5) + other2 = pd.period_range('1/4/2000', freq='D', periods=5) + expected2 = pd.period_range('1/1/2000', freq='D', periods=8) + + rng3 = pd.period_range('1/1/2000', freq='D', periods=5) + other3 = pd.PeriodIndex([], freq='D') + expected3 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) + other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) + expected4 = pd.PeriodIndex(['2000-01-01 09:00', '2000-01-01 10:00', + '2000-01-01 11:00', '2000-01-01 12:00', + '2000-01-01 13:00', '2000-01-02 09:00', + '2000-01-02 10:00', '2000-01-02 11:00', + '2000-01-02 12:00', '2000-01-02 13:00'], + freq='H') + + rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05'], freq='T') + other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05' + '2000-01-01 09:08'], freq='T') + expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05', '2000-01-01 09:08'], + freq='T') + + rng6 = pd.period_range('2000-01-01', freq='M', periods=7) + other6 = pd.period_range('2000-04-01', freq='M', periods=7) + expected6 = pd.period_range('2000-01-01', freq='M', periods=10) + + rng7 = pd.period_range('2003-01-01', freq='A', periods=5) + other7 = pd.period_range('1998-01-01', freq='A', periods=8) + expected7 = pd.period_range('1998-01-01', freq='A', periods=10) + + for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), + (rng3, other3, expected3), (rng4, other4, expected4), + (rng5, other5, expected5), (rng6, other6, expected6), + (rng7, other7, expected7)]: + + result_add = rng + other + result_union = rng.union(other) + + tm.assert_index_equal(result_add, expected) + tm.assert_index_equal(result_union, expected) + # GH 6527 + rng += other + tm.assert_index_equal(rng, expected) + + # offset + # DateOffset + rng = pd.period_range('2014', '2024', freq='A') + result = rng + pd.offsets.YearEnd(5) + expected = pd.period_range('2019', '2029', freq='A') + tm.assert_index_equal(result, expected) + rng += pd.offsets.YearEnd(5) + tm.assert_index_equal(rng, expected) + + for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), + np.timedelta64(365, 'D'), timedelta(365), Timedelta(days=365)]: + with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): + rng + o + + rng = pd.period_range('2014-01', '2016-12', freq='M') + result = rng + pd.offsets.MonthEnd(5) + expected = pd.period_range('2014-06', '2017-05', freq='M') + tm.assert_index_equal(result, expected) + rng += pd.offsets.MonthEnd(5) + tm.assert_index_equal(rng, expected) + + for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), + np.timedelta64(365, 'D'), timedelta(365), Timedelta(days=365)]: + rng = pd.period_range('2014-01', '2016-12', freq='M') + with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): + rng + o + + # Tick + offsets = [pd.offsets.Day(3), timedelta(days=3), np.timedelta64(3, 'D'), + pd.offsets.Hour(72), timedelta(minutes=60*24*3), + np.timedelta64(72, 'h'), Timedelta('72:00:00')] + for delta in offsets: + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + result = rng + delta + expected = pd.period_range('2014-05-04', '2014-05-18', freq='D') + tm.assert_index_equal(result, expected) + rng += delta + tm.assert_index_equal(rng, expected) + + for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), + np.timedelta64(4, 'h'), timedelta(hours=23), Timedelta('23:00:00')]: + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): + rng + o + + offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), + pd.offsets.Minute(120), timedelta(minutes=120), + np.timedelta64(120, 'm'), Timedelta(minutes=120)] + for delta in offsets: + rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') + result = rng + delta + expected = pd.period_range('2014-01-01 12:00', '2014-01-05 12:00', freq='H') + tm.assert_index_equal(result, expected) + rng += delta + tm.assert_index_equal(rng, expected) + + for delta in [pd.offsets.YearBegin(2), timedelta(minutes=30), + np.timedelta64(30, 's'), Timedelta(seconds=30)]: + rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') + with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): + result = rng + delta + with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): + rng += delta + + # int + rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) + result = rng + 1 + expected = pd.period_range('2000-01-01 10:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng += 1 + tm.assert_index_equal(rng, expected) + + def test_sub_isub(self): + # diff + rng1 = pd.period_range('1/1/2000', freq='D', periods=5) + other1 = pd.period_range('1/6/2000', freq='D', periods=5) + expected1 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng2 = pd.period_range('1/1/2000', freq='D', periods=5) + other2 = pd.period_range('1/4/2000', freq='D', periods=5) + expected2 = pd.period_range('1/1/2000', freq='D', periods=3) + + rng3 = pd.period_range('1/1/2000', freq='D', periods=5) + other3 = pd.PeriodIndex([], freq='D') + expected3 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) + other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) + expected4 = rng4 + + rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05'], freq='T') + other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05'], freq='T') + expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T') + + rng6 = pd.period_range('2000-01-01', freq='M', periods=7) + other6 = pd.period_range('2000-04-01', freq='M', periods=7) + expected6 = pd.period_range('2000-01-01', freq='M', periods=3) + + rng7 = pd.period_range('2003-01-01', freq='A', periods=5) + other7 = pd.period_range('1998-01-01', freq='A', periods=8) + expected7 = pd.period_range('2006-01-01', freq='A', periods=2) + + for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), + (rng3, other3, expected3), (rng4, other4, expected4), + (rng5, other5, expected5), (rng6, other6, expected6), + (rng7, other7, expected7),]: + result_add = rng - other + result_union = rng.diff(other) + + tm.assert_index_equal(result_add, expected) + tm.assert_index_equal(result_union, expected) + rng -= other + tm.assert_index_equal(rng, expected) + + # offset + # DateOffset + rng = pd.period_range('2014', '2024', freq='A') + result = rng - pd.offsets.YearEnd(5) + expected = pd.period_range('2009', '2019', freq='A') + tm.assert_index_equal(result, expected) + rng -= pd.offsets.YearEnd(5) + tm.assert_index_equal(rng, expected) + + for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), + np.timedelta64(365, 'D'), timedelta(365)]: + rng = pd.period_range('2014', '2024', freq='A') + with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): + rng - o + + rng = pd.period_range('2014-01', '2016-12', freq='M') + result = rng - pd.offsets.MonthEnd(5) + expected = pd.period_range('2013-08', '2016-07', freq='M') + tm.assert_index_equal(result, expected) + rng -= pd.offsets.MonthEnd(5) + tm.assert_index_equal(rng, expected) + + for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), + np.timedelta64(365, 'D'), timedelta(365)]: + rng = pd.period_range('2014-01', '2016-12', freq='M') + with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): + rng - o + + # Tick + offsets = [pd.offsets.Day(3), timedelta(days=3), np.timedelta64(3, 'D'), + pd.offsets.Hour(72), timedelta(minutes=60*24*3), np.timedelta64(72, 'h')] + for delta in offsets: + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + result = rng - delta + expected = pd.period_range('2014-04-28', '2014-05-12', freq='D') + tm.assert_index_equal(result, expected) + rng -= delta + tm.assert_index_equal(rng, expected) + + for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), + np.timedelta64(4, 'h'), timedelta(hours=23)]: + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): + rng - o + + offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), + pd.offsets.Minute(120), timedelta(minutes=120), np.timedelta64(120, 'm')] + for delta in offsets: + rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') + result = rng - delta + expected = pd.period_range('2014-01-01 08:00', '2014-01-05 08:00', freq='H') + tm.assert_index_equal(result, expected) + rng -= delta + tm.assert_index_equal(rng, expected) + + for delta in [pd.offsets.YearBegin(2), timedelta(minutes=30), np.timedelta64(30, 's')]: + rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') + with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): + result = rng + delta + with tm.assertRaisesRegexp(ValueError, 'Input has different freq from Period'): + rng += delta + + # int + rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) + result = rng - 1 + expected = pd.period_range('2000-01-01 08:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng -= 1 + tm.assert_index_equal(rng, expected) + + def test_value_counts_unique(self): + # GH 7735 + idx = pd.period_range('2011-01-01 09:00', freq='H', periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = PeriodIndex(np.repeat(idx.values, range(1, len(idx) + 1)), freq='H') + + exp_idx = PeriodIndex(['2011-01-01 18:00', '2011-01-01 17:00', '2011-01-01 16:00', + '2011-01-01 15:00', '2011-01-01 14:00', '2011-01-01 13:00', + '2011-01-01 12:00', '2011-01-01 11:00', '2011-01-01 10:00', + '2011-01-01 09:00'], freq='H') + expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') + tm.assert_series_equal(idx.value_counts(), expected) + + expected = pd.period_range('2011-01-01 09:00', freq='H', periods=10) + tm.assert_index_equal(idx.unique(), expected) + + idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 09:00', '2013-01-01 09:00', + '2013-01-01 08:00', '2013-01-01 08:00', pd.NaT], freq='H') + + exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00'], freq='H') + expected = Series([3, 2], index=exp_idx) + tm.assert_series_equal(idx.value_counts(), expected) + + exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00', pd.NaT], freq='H') + expected = Series([3, 2, 1], index=exp_idx) + tm.assert_series_equal(idx.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) + + +if __name__ == '__main__': + import nose + + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + # '--with-coverage', '--cover-package=pandas.core'], + exit=False) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 769062f293cf9..48d3f3a551055 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -1,22 +1,29 @@ # pylint: disable-msg=E1101,W0612 +from __future__ import division from datetime import datetime, timedelta import nose import numpy as np import pandas as pd -from pandas import (Index, Series, DataFrame, Timestamp, isnull, notnull, - bdate_range, date_range) +from pandas import (Index, Series, DataFrame, Timestamp, Timedelta, TimedeltaIndex, isnull, notnull, + bdate_range, date_range, timedelta_range, Int64Index) import pandas.core.common as com -from pandas.compat import StringIO, lrange, range, zip, u, OrderedDict, long +from pandas.compat import StringIO, lrange, range, zip, u, OrderedDict, long, PY3_2 from pandas import compat, to_timedelta, tslib from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct from pandas.util.testing import (assert_series_equal, assert_frame_equal, assert_almost_equal, + assert_index_equal, ensure_clean) +from pandas.tseries.offsets import Day, Second, Hour import pandas.util.testing as tm +from numpy.random import rand, randn +from pandas import _np_version_under1p8 + +iNaT = tslib.iNaT class TestTimedeltas(tm.TestCase): _multiprocess_can_split_ = True @@ -24,6 +31,240 @@ class TestTimedeltas(tm.TestCase): def setUp(self): pass + def test_construction(self): + + expected = np.timedelta64(10,'D').astype('m8[ns]').view('i8') + self.assertEqual(Timedelta(10,unit='d').value, expected) + self.assertEqual(Timedelta(10.0,unit='d').value, expected) + self.assertEqual(Timedelta('10 days').value, expected) + self.assertEqual(Timedelta(days=10).value, expected) + + expected += np.timedelta64(10,'s').astype('m8[ns]').view('i8') + self.assertEqual(Timedelta('10 days 00:00:10').value, expected) + self.assertEqual(Timedelta(days=10,seconds=10).value, expected) + self.assertEqual(Timedelta(days=10,milliseconds=10*1000).value, expected) + self.assertEqual(Timedelta(days=10,microseconds=10*1000*1000).value, expected) + + # rounding cases + self.assertEqual(Timedelta(82739999850000).value, 82739999850000) + self.assertTrue('0 days 22:58:59.999850' in str(Timedelta(82739999850000))) + self.assertEqual(Timedelta(123072001000000).value, 123072001000000) + self.assertTrue('1 days 10:11:12.001' in str(Timedelta(123072001000000))) + + # more strings + # GH 8190 + self.assertEqual(Timedelta('1 h'), timedelta(hours=1)) + self.assertEqual(Timedelta('1 hour'), timedelta(hours=1)) + self.assertEqual(Timedelta('1 hours'), timedelta(hours=1)) + self.assertEqual(Timedelta('-1 hours'), -timedelta(hours=1)) + self.assertEqual(Timedelta('1 m'), timedelta(minutes=1)) + self.assertEqual(Timedelta('1.5 m'), timedelta(seconds=90)) + self.assertEqual(Timedelta('1 minute'), timedelta(minutes=1)) + self.assertEqual(Timedelta('1 minutes'), timedelta(minutes=1)) + self.assertEqual(Timedelta('1 s'), timedelta(seconds=1)) + self.assertEqual(Timedelta('1 second'), timedelta(seconds=1)) + self.assertEqual(Timedelta('1 seconds'), timedelta(seconds=1)) + self.assertEqual(Timedelta('1 ms'), timedelta(milliseconds=1)) + self.assertEqual(Timedelta('1 milli'), timedelta(milliseconds=1)) + self.assertEqual(Timedelta('1 millisecond'), timedelta(milliseconds=1)) + self.assertEqual(Timedelta('1 us'), timedelta(microseconds=1)) + self.assertEqual(Timedelta('1 micros'), timedelta(microseconds=1)) + self.assertEqual(Timedelta('1 microsecond'), timedelta(microseconds=1)) + self.assertEqual(Timedelta('1.5 microsecond'), Timedelta('00:00:00.000001500')) + self.assertEqual(Timedelta('1 ns'), Timedelta('00:00:00.000000001')) + self.assertEqual(Timedelta('1 nano'), Timedelta('00:00:00.000000001')) + self.assertEqual(Timedelta('1 nanosecond'), Timedelta('00:00:00.000000001')) + + # combos + self.assertEqual(Timedelta('10 days 1 hour'), timedelta(days=10,hours=1)) + self.assertEqual(Timedelta('10 days 1 h'), timedelta(days=10,hours=1)) + self.assertEqual(Timedelta('10 days 1 h 1m 1s'), timedelta(days=10,hours=1,minutes=1,seconds=1)) + self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), -timedelta(days=10,hours=1,minutes=1,seconds=1)) + self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), -timedelta(days=10,hours=1,minutes=1,seconds=1)) + self.assertEqual(Timedelta('-10 days 1 h 1m 1s 3us'), -timedelta(days=10,hours=1,minutes=1,seconds=1,microseconds=3)) + self.assertEqual(Timedelta('-10 days 1 h 1.5m 1s 3us'), -timedelta(days=10,hours=1,minutes=1,seconds=31,microseconds=3)) + + # currently invalid as it has a - on the hhmmdd part (only allowed on the days) + self.assertRaises(ValueError, lambda : Timedelta('-10 days -1 h 1.5m 1s 3us')) + + # roundtripping both for string and value + for v in ['1s', + '-1s', + '1us', + '-1us', + '1 day', + '-1 day', + '-23:59:59.999999', + '-1 days +23:59:59.999999', + '-1ns', + '1ns', + '-23:59:59.999999999']: + + td = Timedelta(v) + self.assertEqual(Timedelta(td.value),td) + + # str does not normally display nanos + if not td.nanoseconds: + self.assertEqual(Timedelta(str(td)),td) + self.assertEqual(Timedelta(td._repr_base(format='all')),td) + + # floats + expected = np.timedelta64(10,'s').astype('m8[ns]').view('i8') + np.timedelta64(500,'ms').astype('m8[ns]').view('i8') + self.assertEqual(Timedelta(10.5,unit='s').value, expected) + + # nat + self.assertEqual(Timedelta('').value,iNaT) + self.assertEqual(Timedelta('nat').value,iNaT) + self.assertEqual(Timedelta('NAT').value,iNaT) + self.assertTrue(isnull(Timestamp('nat'))) + self.assertTrue(isnull(Timedelta('nat'))) + + # offset + self.assertEqual(to_timedelta(pd.offsets.Hour(2)),Timedelta('0 days, 02:00:00')) + self.assertEqual(Timedelta(pd.offsets.Hour(2)),Timedelta('0 days, 02:00:00')) + self.assertEqual(Timedelta(pd.offsets.Second(2)),Timedelta('0 days, 00:00:02')) + + # invalid + tm.assertRaisesRegexp(ValueError, + "cannot construct a TimeDelta", + lambda : Timedelta()) + tm.assertRaisesRegexp(ValueError, + "cannot create timedelta string convert", + lambda : Timedelta('foo')) + tm.assertRaisesRegexp(ValueError, + "cannot construct a TimeDelta from the passed arguments, allowed keywords are ", + lambda : Timedelta(day=10)) + + def test_repr(self): + + self.assertEqual(repr(Timedelta(10,unit='d')),"Timedelta('10 days 00:00:00')") + self.assertEqual(repr(Timedelta(10,unit='s')),"Timedelta('0 days 00:00:10')") + self.assertEqual(repr(Timedelta(10,unit='ms')),"Timedelta('0 days 00:00:00.010000')") + self.assertEqual(repr(Timedelta(-10,unit='ms')),"Timedelta('-1 days +23:59:59.990000')") + + def test_identity(self): + + td = Timedelta(10,unit='d') + self.assertTrue(isinstance(td, Timedelta)) + self.assertTrue(isinstance(td, timedelta)) + + def test_conversion(self): + + for td in [ Timedelta(10,unit='d'), Timedelta('1 days, 10:11:12.012345') ]: + self.assertTrue(td == Timedelta(td.to_pytimedelta())) + self.assertEqual(td,td.to_pytimedelta()) + self.assertEqual(td,np.timedelta64(td.value,'ns')) + + # this is NOT equal and cannot be roundtriped (because of the nanos) + td = Timedelta('1 days, 10:11:12.012345678') + self.assertTrue(td != td.to_pytimedelta()) + + def test_ops(self): + + td = Timedelta(10,unit='d') + self.assertEqual(-td,Timedelta(-10,unit='d')) + self.assertEqual(+td,Timedelta(10,unit='d')) + self.assertEqual(td - td, Timedelta(0,unit='ns')) + self.assertTrue((td - pd.NaT) is pd.NaT) + self.assertEqual(td + td, Timedelta(20,unit='d')) + self.assertTrue((td + pd.NaT) is pd.NaT) + self.assertEqual(td * 2, Timedelta(20,unit='d')) + self.assertTrue((td * pd.NaT) is pd.NaT) + self.assertEqual(td / 2, Timedelta(5,unit='d')) + self.assertEqual(abs(td), td) + self.assertEqual(abs(-td), td) + self.assertEqual(td / td, 1) + self.assertTrue((td / pd.NaT) is pd.NaT) + + # invert + self.assertEqual(-td,Timedelta('-10d')) + self.assertEqual(td * -1,Timedelta('-10d')) + self.assertEqual(-1 * td,Timedelta('-10d')) + self.assertEqual(abs(-td),Timedelta('10d')) + + # invalid + self.assertRaises(TypeError, lambda : Timedelta(11,unit='d') // 2) + + # invalid multiply with another timedelta + self.assertRaises(TypeError, lambda : td * td) + + # can't operate with integers + self.assertRaises(TypeError, lambda : td + 2) + self.assertRaises(TypeError, lambda : td - 2) + + def test_freq_conversion(self): + + td = Timedelta('1 days 2 hours 3 ns') + result = td / np.timedelta64(1,'D') + self.assertEquals(result, td.value/float(86400*1e9)) + result = td / np.timedelta64(1,'s') + self.assertEquals(result, td.value/float(1e9)) + result = td / np.timedelta64(1,'ns') + self.assertEquals(result, td.value) + + def test_fields(self): + rng = to_timedelta('1 days, 10:11:12') + self.assertEqual(rng.days,1) + self.assertEqual(rng.hours,10) + self.assertEqual(rng.minutes,11) + self.assertEqual(rng.seconds,12) + self.assertEqual(rng.milliseconds,0) + self.assertEqual(rng.microseconds,0) + self.assertEqual(rng.nanoseconds,0) + + td = Timedelta('-1 days, 10:11:12') + self.assertEqual(abs(td),Timedelta('13:48:48')) + self.assertTrue(str(td) == "-1 days +10:11:12") + self.assertEqual(-td,Timedelta('0 days 13:48:48')) + self.assertEqual(-Timedelta('-1 days, 10:11:12').value,49728000000000) + self.assertEqual(Timedelta('-1 days, 10:11:12').value,-49728000000000) + + rng = to_timedelta('-1 days, 10:11:12') + self.assertEqual(rng.days,-1) + self.assertEqual(rng.hours,10) + self.assertEqual(rng.minutes,11) + self.assertEqual(rng.seconds,12) + self.assertEqual(rng.milliseconds,0) + self.assertEqual(rng.microseconds,0) + self.assertEqual(rng.nanoseconds,0) + + # components + tup = pd.to_timedelta(-1, 'us').components + self.assertEqual(tup.days,-1) + self.assertEqual(tup.hours,23) + self.assertEqual(tup.minutes,59) + self.assertEqual(tup.seconds,59) + self.assertEqual(tup.milliseconds,999) + self.assertEqual(tup.microseconds,999) + self.assertEqual(tup.nanoseconds,0) + + tup = Timedelta('-1 days 1 us').components + self.assertEqual(tup.days,-2) + self.assertEqual(tup.hours,23) + self.assertEqual(tup.minutes,59) + self.assertEqual(tup.seconds,59) + self.assertEqual(tup.milliseconds,999) + self.assertEqual(tup.microseconds,999) + self.assertEqual(tup.nanoseconds,0) + + def test_timedelta_range(self): + + expected = to_timedelta(np.arange(5),unit='D') + result = timedelta_range('0 days',periods=5,freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(11),unit='D') + result = timedelta_range('0 days','10 days',freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(5),unit='D') + Second(2) + Day() + result = timedelta_range('1 days, 00:00:02','5 days, 00:00:02',freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta([1,3,5,7,9],unit='D') + Second(2) + result = timedelta_range('1 days, 00:00:02',periods=5,freq='2D') + tm.assert_index_equal(result, expected) + def test_numeric_conversions(self): self.assertEqual(ct(0), np.timedelta64(0,'ns')) self.assertEqual(ct(10), np.timedelta64(10,'ns')) @@ -99,7 +340,7 @@ def conv(v): self.assertEqual(ct('06:00:01.0'), conv(np.timedelta64(6*3600+1,'s'))) self.assertEqual(ct('06:00:01.01'), conv(np.timedelta64(1000*(6*3600+1)+10,'ms'))) - self.assertEqual(ct('- 1days, 00:00:01'), -conv(d1+np.timedelta64(1,'s'))) + self.assertEqual(ct('- 1days, 00:00:01'), conv(-d1+np.timedelta64(1,'s'))) self.assertEqual(ct('1days, 06:00:01'), conv(d1+np.timedelta64(6*3600+1,'s'))) self.assertEqual(ct('1days, 06:00:01.01'), conv(d1+np.timedelta64(1000*(6*3600+1)+10,'ms'))) @@ -141,9 +382,9 @@ def conv(v): tm.assert_series_equal(result, expected) # with units - result = Series([ np.timedelta64(0,'ns'), np.timedelta64(10,'s').astype('m8[ns]') ],dtype='m8[ns]') + result = TimedeltaIndex([ np.timedelta64(0,'ns'), np.timedelta64(10,'s').astype('m8[ns]') ]) expected = to_timedelta([0,10],unit='s') - tm.assert_series_equal(result, expected) + tm.assert_index_equal(result, expected) # single element conversion v = timedelta(seconds=1) @@ -159,40 +400,40 @@ def conv(v): # arrays of various dtypes arr = np.array([1]*5,dtype='int64') result = to_timedelta(arr,unit='s') - expected = Series([ np.timedelta64(1,'s') ]*5) - tm.assert_series_equal(result, expected) + expected = TimedeltaIndex([ np.timedelta64(1,'s') ]*5) + tm.assert_index_equal(result, expected) arr = np.array([1]*5,dtype='int64') result = to_timedelta(arr,unit='m') - expected = Series([ np.timedelta64(1,'m') ]*5) - tm.assert_series_equal(result, expected) + expected = TimedeltaIndex([ np.timedelta64(1,'m') ]*5) + tm.assert_index_equal(result, expected) arr = np.array([1]*5,dtype='int64') result = to_timedelta(arr,unit='h') - expected = Series([ np.timedelta64(1,'h') ]*5) - tm.assert_series_equal(result, expected) + expected = TimedeltaIndex([ np.timedelta64(1,'h') ]*5) + tm.assert_index_equal(result, expected) arr = np.array([1]*5,dtype='timedelta64[s]') result = to_timedelta(arr) - expected = Series([ np.timedelta64(1,'s') ]*5) - tm.assert_series_equal(result, expected) + expected = TimedeltaIndex([ np.timedelta64(1,'s') ]*5) + tm.assert_index_equal(result, expected) arr = np.array([1]*5,dtype='timedelta64[D]') result = to_timedelta(arr) - expected = Series([ np.timedelta64(1,'D') ]*5) - tm.assert_series_equal(result, expected) + expected = TimedeltaIndex([ np.timedelta64(1,'D') ]*5) + tm.assert_index_equal(result, expected) def testit(unit, transform): # array result = to_timedelta(np.arange(5),unit=unit) - expected = Series([ np.timedelta64(i,transform(unit)) for i in np.arange(5).tolist() ]) - tm.assert_series_equal(result, expected) + expected = TimedeltaIndex([ np.timedelta64(i,transform(unit)) for i in np.arange(5).tolist() ]) + tm.assert_index_equal(result, expected) # scalar result = to_timedelta(2,unit=unit) - expected = np.timedelta64(2,transform(unit)).astype('timedelta64[ns]') - self.assert_numpy_array_equal(result,expected) + expected = Timedelta(np.timedelta64(2,transform(unit)).astype('timedelta64[ns]')) + self.assertEqual(result, expected) # validate all units # GH 6855 @@ -212,8 +453,6 @@ def testit(unit, transform): testit('L',lambda x: 'ms') # these will error - self.assertRaises(ValueError, lambda : to_timedelta(['1h'])) - self.assertRaises(ValueError, lambda : to_timedelta(['1m'])) self.assertRaises(ValueError, lambda : to_timedelta([1,2],unit='foo')) self.assertRaises(ValueError, lambda : to_timedelta(1,unit='foo')) @@ -228,30 +467,28 @@ def test_to_timedelta_via_apply(self): def test_timedelta_ops(self): # GH4984 - # make sure ops return timedeltas + # make sure ops return Timedelta s = Series([Timestamp('20130101') + timedelta(seconds=i*i) for i in range(10) ]) td = s.diff() - result = td.mean()[0] - # TODO This should have returned a scalar to begin with. Hack for now. + result = td.mean() expected = to_timedelta(timedelta(seconds=9)) - tm.assert_almost_equal(result, expected) + self.assertEqual(result, expected) result = td.quantile(.1) - # This properly returned a scalar. - expected = np.timedelta64(2599999999,'ns') - tm.assert_almost_equal(result, expected) + expected = Timedelta(np.timedelta64(2600,'ms')) + self.assertEqual(result, expected) - result = td.median()[0] - # TODO This should have returned a scalar to begin with. Hack for now. + result = td.median() expected = to_timedelta('00:00:08') - tm.assert_almost_equal(result, expected) + self.assertEqual(result, expected) # GH 6462 # consistency in returned values for sum - result = td.sum()[0] + result = td.sum() expected = to_timedelta('00:01:21') tm.assert_almost_equal(result, expected) + self.assertEqual(result, expected) def test_timedelta_ops_scalar(self): # GH 6808 @@ -297,10 +534,10 @@ def test_to_timedelta_on_missing_values(self): assert_series_equal(actual, expected) actual = pd.to_timedelta(np.nan) - self.assertEqual(actual.astype('int64'), timedelta_NaT.astype('int64')) + self.assertEqual(actual.value, timedelta_NaT.astype('int64')) actual = pd.to_timedelta(pd.NaT) - self.assertEqual(actual.astype('int64'), timedelta_NaT.astype('int64')) + self.assertEqual(actual.value, timedelta_NaT.astype('int64')) def test_timedelta_ops_with_missing_values(self): # setup @@ -394,6 +631,567 @@ def test_apply_to_timedelta(self): # Can't compare until apply on a Series gives the correct dtype # assert_series_equal(a, b) + def test_pickle(self): + + v = Timedelta('1 days 10:11:12.0123456') + v_p = self.round_trip_pickle(v) + self.assertEqual(v,v_p) + +class TestTimedeltaIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_pass_TimedeltaIndex_to_index(self): + + rng = timedelta_range('1 days','10 days') + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pytimedelta(), dtype=object) + + self.assert_numpy_array_equal(idx.values, expected.values) + + def test_pickle(self): + + rng = timedelta_range('1 days', periods=10) + rng_p = self.round_trip_pickle(rng) + tm.assert_index_equal(rng,rng_p) + + def test_hash_error(self): + index = timedelta_range('1 days', periods=10) + with tm.assertRaisesRegexp(TypeError, + "unhashable type: %r" % + type(index).__name__): + hash(index) + + def test_append_join_nondatetimeindex(self): + rng = timedelta_range('1 days', periods=10) + idx = Index(['a', 'b', 'c', 'd']) + + result = rng.append(idx) + tm.assert_isinstance(result[0], Timedelta) + + # it works + rng.join(idx, how='outer') + + def test_append_numpy_bug_1681(self): + + td = timedelta_range('1 days','10 days',freq='2D') + a = DataFrame() + c = DataFrame({'A': 'foo', 'B': td}, index=td) + str(c) + + result = a.append(c) + self.assertTrue((result['B'] == td).all()) + + def test_astype(self): + rng = timedelta_range('1 days', periods=10) + + result = rng.astype('i8') + self.assert_numpy_array_equal(result, rng.asi8) + + def test_fields(self): + rng = timedelta_range('1 days, 10:11:12', periods=2, freq='s') + self.assert_numpy_array_equal(rng.days, np.array([1,1],dtype='int64')) + self.assert_numpy_array_equal(rng.hours, np.array([10,10],dtype='int64')) + self.assert_numpy_array_equal(rng.minutes, np.array([11,11],dtype='int64')) + self.assert_numpy_array_equal(rng.seconds, np.array([12,13],dtype='int64')) + self.assert_numpy_array_equal(rng.milliseconds, np.array([0,0],dtype='int64')) + self.assert_numpy_array_equal(rng.microseconds, np.array([0,0],dtype='int64')) + self.assert_numpy_array_equal(rng.nanoseconds, np.array([0,0],dtype='int64')) + + # with nat + s = Series(rng) + s[1] = np.nan + + tm.assert_series_equal(s.dt.days,Series([1,np.nan],index=[0,1])) + tm.assert_series_equal(s.dt.hours,Series([10,np.nan],index=[0,1])) + tm.assert_series_equal(s.dt.milliseconds,Series([0,np.nan],index=[0,1])) + + def test_components(self): + rng = timedelta_range('1 days, 10:11:12', periods=2, freq='s') + rng.components + + # with nat + s = Series(rng) + s[1] = np.nan + + result = s.dt.components + self.assertFalse(result.iloc[0].isnull().all()) + self.assertTrue(result.iloc[1].isnull().all()) + + def test_constructor(self): + expected = TimedeltaIndex(['1 days','1 days 00:00:05', + '2 days','2 days 00:00:02','0 days 00:00:03']) + result = TimedeltaIndex(['1 days','1 days, 00:00:05', + np.timedelta64(2,'D'), + timedelta(days=2,seconds=2), + pd.offsets.Second(3)]) + tm.assert_index_equal(result,expected) + + def test_constructor_coverage(self): + rng = timedelta_range('1 days', periods=10.5) + exp = timedelta_range('1 days', periods=10) + self.assertTrue(rng.equals(exp)) + + self.assertRaises(ValueError, TimedeltaIndex, start='1 days', + periods='foo', freq='D') + + self.assertRaises(ValueError, TimedeltaIndex, start='1 days', + end='10 days') + + self.assertRaises(ValueError, TimedeltaIndex, '1 days') + + # generator expression + gen = (timedelta(i) for i in range(10)) + result = TimedeltaIndex(gen) + expected = TimedeltaIndex([timedelta(i) for i in range(10)]) + self.assertTrue(result.equals(expected)) + + # NumPy string array + strings = np.array(['1 days', '2 days', '3 days']) + result = TimedeltaIndex(strings) + expected = to_timedelta([1,2,3],unit='d') + self.assertTrue(result.equals(expected)) + + from_ints = TimedeltaIndex(expected.asi8) + self.assertTrue(from_ints.equals(expected)) + + # non-conforming freq + self.assertRaises(ValueError, TimedeltaIndex, + ['1 days', '2 days', '4 days'], + freq='D') + + self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq='D') + + def test_constructor_name(self): + idx = TimedeltaIndex(start='1 days', periods=1, freq='D', + name='TEST') + self.assertEqual(idx.name, 'TEST') + + def test_freq_conversion(self): + + # doc example + + # series + td = Series(date_range('20130101',periods=4)) - \ + Series(date_range('20121201',periods=4)) + td[2] += timedelta(minutes=5,seconds=3) + td[3] = np.nan + + result = td / np.timedelta64(1,'D') + expected = Series([31,31,(31*86400+5*60+3)/86400.0,np.nan]) + assert_series_equal(result,expected) + + result = td.astype('timedelta64[D]') + expected = Series([31,31,31,np.nan]) + assert_series_equal(result,expected) + + result = td / np.timedelta64(1,'s') + expected = Series([31*86400,31*86400,31*86400+5*60+3,np.nan]) + assert_series_equal(result,expected) + + result = td.astype('timedelta64[s]') + assert_series_equal(result,expected) + + # tdi + td = TimedeltaIndex(td) + + result = td / np.timedelta64(1,'D') + expected = Index([31,31,(31*86400+5*60+3)/86400.0,np.nan]) + assert_index_equal(result,expected) + + result = td.astype('timedelta64[D]') + expected = Index([31,31,31,np.nan]) + assert_index_equal(result,expected) + + result = td / np.timedelta64(1,'s') + expected = Index([31*86400,31*86400,31*86400+5*60+3,np.nan]) + assert_index_equal(result,expected) + + result = td.astype('timedelta64[s]') + assert_index_equal(result,expected) + + def test_comparisons_coverage(self): + rng = timedelta_range('1 days', periods=10) + + result = rng < rng[3] + exp = np.array([True, True, True]+[False]*7) + self.assert_numpy_array_equal(result, exp) + + # raise TypeError for now + self.assertRaises(TypeError, rng.__lt__, rng[3].value) + + result = rng == list(rng) + exp = rng == rng + self.assert_numpy_array_equal(result, exp) + + def test_comparisons_nat(self): + if PY3_2: + raise nose.SkipTest('nat comparisons on 3.2 broken') + + tdidx1 = pd.TimedeltaIndex(['1 day', pd.NaT, '1 day 00:00:01', pd.NaT, + '1 day 00:00:01', '5 day 00:00:03']) + tdidx2 = pd.TimedeltaIndex(['2 day', '2 day', pd.NaT, pd.NaT, + '1 day 00:00:02', '5 days 00:00:03']) + tdarr = np.array([np.timedelta64(2,'D'), + np.timedelta64(2,'D'), + np.timedelta64('nat'), np.timedelta64('nat'), + np.timedelta64(1,'D') + np.timedelta64(2,'s'), + np.timedelta64(5,'D') + np.timedelta64(3,'s')]) + + if _np_version_under1p8: + # cannot test array because np.datetime('nat') returns today's date + cases = [(tdidx1, tdidx2)] + else: + cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] + + # Check pd.NaT is handles as the same as np.nan + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + self.assert_numpy_array_equal(result, expected) + + def test_map(self): + + rng = timedelta_range('1 day', periods=10) + + f = lambda x: x.days + result = rng.map(f) + exp = [f(x) for x in rng] + self.assert_numpy_array_equal(result, exp) + + def test_misc_coverage(self): + + rng = timedelta_range('1 day', periods=5) + result = rng.groupby(rng.days) + tm.assert_isinstance(list(result.values())[0][0], Timedelta) + + idx = TimedeltaIndex(['3d','1d','2d']) + self.assertTrue(idx.equals(list(idx))) + + non_td = Index(list('abc')) + self.assertFalse(idx.equals(list(non_td))) + + def test_union(self): + + i1 = timedelta_range('1day',periods=5) + i2 = timedelta_range('3day',periods=5) + result = i1.union(i2) + expected = timedelta_range('1day',periods=7) + self.assert_numpy_array_equal(result, expected) + + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = TimedeltaIndex(start='1 day', periods=10, freq='D') + i1.union(i2) # Works + i2.union(i1) # Fails with "AttributeError: can't set attribute" + + def test_union_coverage(self): + + idx = TimedeltaIndex(['3d','1d','2d']) + ordered = TimedeltaIndex(idx.order(), freq='infer') + result = ordered.union(idx) + self.assertTrue(result.equals(ordered)) + + result = ordered[:0].union(ordered) + self.assertTrue(result.equals(ordered)) + self.assertEqual(result.freq, ordered.freq) + + def test_union_bug_1730(self): + + rng_a = timedelta_range('1 day', periods=4, freq='3H') + rng_b = timedelta_range('1 day', periods=4, freq='4H') + + result = rng_a.union(rng_b) + exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) + self.assertTrue(result.equals(exp)) + + def test_union_bug_1745(self): + + left = TimedeltaIndex(['1 day 15:19:49.695000']) + right = TimedeltaIndex(['2 day 13:04:21.322000', + '1 day 15:27:24.873000', + '1 day 15:31:05.350000']) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) + self.assertTrue(result.equals(exp)) + + def test_union_bug_4564(self): + + left = timedelta_range("1 day","30d") + right = left + pd.offsets.Minute(15) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) + self.assertTrue(result.equals(exp)) + + def test_intersection_bug_1708(self): + index_1 = timedelta_range('1 day', periods=4, freq='h') + index_2 = index_1 + pd.offsets.Hour(5) + + result = index_1 & index_2 + self.assertEqual(len(result), 0) + + index_1 = timedelta_range('1 day', periods=4, freq='h') + index_2 = index_1 + pd.offsets.Hour(1) + + result = index_1 & index_2 + expected = timedelta_range('1 day 01:00:00',periods=3,freq='h') + tm.assert_index_equal(result,expected) + + def test_get_duplicates(self): + idx = TimedeltaIndex(['1 day','2 day','2 day','3 day','3day', '4day']) + + result = idx.get_duplicates() + ex = TimedeltaIndex(['2 day','3day']) + self.assertTrue(result.equals(ex)) + + def test_argmin_argmax(self): + idx = TimedeltaIndex(['1 day 00:00:05','1 day 00:00:01','1 day 00:00:02']) + self.assertEqual(idx.argmin(), 1) + self.assertEqual(idx.argmax(), 0) + + def test_order(self): + + idx = TimedeltaIndex(['4d','1d','2d']) + + ordered = idx.order() + self.assertTrue(ordered.is_monotonic) + + ordered = idx.order(ascending=False) + self.assertTrue(ordered[::-1].is_monotonic) + + ordered, dexer = idx.order(return_indexer=True) + self.assertTrue(ordered.is_monotonic) + self.assert_numpy_array_equal(dexer, [1, 2, 0]) + + ordered, dexer = idx.order(return_indexer=True, ascending=False) + self.assertTrue(ordered[::-1].is_monotonic) + self.assert_numpy_array_equal(dexer, [0, 2, 1]) + + def test_insert(self): + + idx = TimedeltaIndex(['4day','1day','2day'], name='idx') + + result = idx.insert(2, timedelta(days=5)) + exp = TimedeltaIndex(['4day','1day','5day','2day'],name='idx') + self.assertTrue(result.equals(exp)) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, 'inserted') + expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), + Timedelta('2day')], name='idx') + self.assertNotIsInstance(result, TimedeltaIndex) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + + idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') + + # preserve freq + expected_0 = TimedeltaIndex(['1day','1day 00:00:01','1day 00:00:02','1day 00:00:03'], + name='idx', freq='s') + expected_3 = TimedeltaIndex(['1day 00:00:01','1day 00:00:02','1day 00:00:03','1day 00:00:04'], + name='idx', freq='s') + + # reset freq to None + expected_1_nofreq = TimedeltaIndex(['1day 00:00:01','1day 00:00:01','1day 00:00:02','1day 00:00:03'], + name='idx', freq=None) + expected_3_nofreq = TimedeltaIndex(['1day 00:00:01','1day 00:00:02','1day 00:00:03','1day 00:00:05'], + name='idx', freq=None) + + cases = [(0, Timedelta('1day'), expected_0), + (-3, Timedelta('1day'), expected_0), + (3, Timedelta('1day 00:00:04'), expected_3), + (1, Timedelta('1day 00:00:01'), expected_1_nofreq), + (3, Timedelta('1day 00:00:05'), expected_3_nofreq)] + + for n, d, expected in cases: + result = idx.insert(n, d) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + def test_delete(self): + idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') + + # prserve freq + expected_0 = timedelta_range(start='2 Days', periods=4, freq='D', name='idx') + expected_4 = timedelta_range(start='1 Days', periods=4, freq='D', name='idx') + + # reset freq to None + expected_1 = TimedeltaIndex(['1 day','3 day','4 day', '5 day'],freq=None,name='idx') + + cases ={0: expected_0, -5: expected_0, + -1: expected_4, 4: expected_4, + 1: expected_1} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + with tm.assertRaises((IndexError, ValueError)): + # either depeidnig on numpy version + result = idx.delete(5) + + def test_delete_slice(self): + idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') + + # prserve freq + expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D', name='idx') + expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D', name='idx') + + # reset freq to None + expected_3_5 = TimedeltaIndex(['1 d','2 d','3 d', + '7 d','8 d','9 d','10d'], freq=None, name='idx') + + cases ={(0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + result = idx.delete(slice(n[0], n[-1] + 1)) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + def test_take(self): + + tds = ['1day 02:00:00','1 day 04:00:00','1 day 10:00:00'] + idx = TimedeltaIndex(start='1d',end='2d',freq='H',name='idx') + expected = TimedeltaIndex(tds, freq=None, name='idx') + + taken1 = idx.take([2, 4, 10]) + taken2 = idx[[2,4,10]] + + for taken in [taken1, taken2]: + self.assertTrue(taken.equals(expected)) + tm.assert_isinstance(taken, TimedeltaIndex) + self.assertIsNone(taken.freq) + self.assertEqual(taken.name, expected.name) + + def test_isin(self): + + index = tm.makeTimedeltaIndex(4) + result = index.isin(index) + self.assertTrue(result.all()) + + result = index.isin(list(index)) + self.assertTrue(result.all()) + + assert_almost_equal(index.isin([index[2], 5]), + [False, False, True, False]) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe(10, 10, data_gen_f=lambda *args, **kwargs: + randn(), r_idx_type='i', c_idx_type='td') + str(df) + + cols = df.columns.join(df.index, how='outer') + joined = cols.join(df.columns) + self.assertEqual(cols.dtype, np.dtype('O')) + self.assertEqual(cols.dtype, joined.dtype) + tm.assert_index_equal(cols, joined) + + def test_slice_keeps_name(self): + + # GH4226 + dr = pd.timedelta_range('1d','5d', freq='H', name='timebucket') + self.assertEqual(dr[1:].name, dr.name) + + def test_join_self(self): + + index = timedelta_range('1 day', periods=10) + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = index.join(index, how=kind) + self.assertIs(index, joined) + + def test_factorize(self): + idx1 = TimedeltaIndex(['1 day','1 day','2 day', + '2 day','3 day','3 day']) + + exp_arr = np.array([0, 0, 1, 1, 2, 2]) + exp_idx = TimedeltaIndex(['1 day','2 day','3 day']) + + arr, idx = idx1.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + self.assertTrue(idx.equals(exp_idx)) + + arr, idx = idx1.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + self.assertTrue(idx.equals(exp_idx)) + + # freq must be preserved + idx3 = timedelta_range('1 day', periods=4, freq='s') + exp_arr = np.array([0, 1, 2, 3]) + arr, idx = idx3.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + self.assertTrue(idx.equals(idx3)) + +class TestSlicing(tm.TestCase): + + def test_partial_slice(self): + rng = timedelta_range('1 day 10:11:12', freq='h',periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['5 day':'6 day'] + expected = s.iloc[86:134] + assert_series_equal(result, expected) + + result = s['5 day':] + expected = s.iloc[86:] + assert_series_equal(result, expected) + + result = s[:'6 day'] + expected = s.iloc[:134] + assert_series_equal(result, expected) + + result = s['6 days, 23:11:12'] + self.assertEqual(result, s.irow(133)) + + self.assertRaises(KeyError, s.__getitem__, '50 days') + + def test_partial_slice_high_reso(self): + + # higher reso + rng = timedelta_range('1 day 10:11:12', freq='us',periods=2000) + s = Series(np.arange(len(rng)), index=rng) + + result = s['1 day 10:11:12':] + expected = s.iloc[0:] + assert_series_equal(result, expected) + + result = s['1 day 10:11:12.001':] + expected = s.iloc[1000:] + assert_series_equal(result, expected) + + result = s['1 days, 10:11:12.001001'] + self.assertEqual(result, s.irow(1001)) if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 828c2a554b02d..1980924483bfb 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -332,7 +332,6 @@ def test_dti_slicing(self): def test_pass_datetimeindex_to_index(self): # Bugs in #1396 - rng = date_range('1/1/2000', '3/1/2000') idx = Index(rng, dtype=object) diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index e762ebe9d85cf..ad8c2c0f09ea1 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -12,19 +12,17 @@ is_timedelta64_dtype, _values_from_object, is_list_like, isnull, _ensure_object) -repr_timedelta = tslib.repr_timedelta64 -repr_timedelta64 = tslib.repr_timedelta64 - -def to_timedelta(arg, box=True, unit='ns'): +def to_timedelta(arg, unit='ns', box=True): """ Convert argument to timedelta Parameters ---------- arg : string, timedelta, array of strings (with possible NAs) - box : boolean, default True - If True returns a Series of the results, if False returns ndarray of values unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an integer/float number + box : boolean, default True + If True returns a Timedelta/TimedeltaIndex of the results + if False returns a np.timedelta64 or ndarray of values of dtype timedelta64[ns] Returns ------- @@ -34,8 +32,8 @@ def to_timedelta(arg, box=True, unit='ns'): def _convert_listlike(arg, box, unit): - if isinstance(arg, (list,tuple)): - arg = np.array(arg, dtype='O') + if isinstance(arg, (list,tuple)) or ((hasattr(arg,'__iter__') and not hasattr(arg,'dtype'))): + arg = np.array(list(arg), dtype='O') if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') @@ -47,11 +45,16 @@ def _convert_listlike(arg, box, unit): try: value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit) except: - value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ]) + + # try to process strings fast; may need to fallback + try: + value = np.array([ _get_string_converter(r, unit=unit)() for r in arg ],dtype='m8[ns]') + except: + value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ]) if box: - from pandas import Series - value = Series(value,dtype='m8[ns]') + from pandas import TimedeltaIndex + value = TimedeltaIndex(value,unit='ns') return value if arg is None: @@ -64,7 +67,7 @@ def _convert_listlike(arg, box, unit): return _convert_listlike(arg, box=box, unit=unit) # ...so it must be a scalar value. Return scalar. - return _coerce_scalar_to_timedelta_type(arg, unit=unit) + return _coerce_scalar_to_timedelta_type(arg, unit=unit, box=box) _unit_map = { 'Y' : 'Y', @@ -92,24 +95,48 @@ def _convert_listlike(arg, box, unit): 'NS' : 'ns', 'ns' : 'ns', } +_unit_scale = { + 'd' : 86400*1e9, + 'h' : 3600*1e9, + 'm' : 60*1e9, + 's' : 1e9, + 'ms' : 1e6, + 'us' : 1e3, + 'ns' : 1, + } def _validate_timedelta_unit(arg): """ provide validation / translation for timedelta short units """ try: return _unit_map[arg] except: + if arg is None: + return 'ns' raise ValueError("invalid timedelta unit {0} provided".format(arg)) _short_search = re.compile( "^\s*(?P-?)\s*(?P\d*\.?\d*)\s*(?Pd|s|ms|us|ns)?\s*$",re.IGNORECASE) _full_search = re.compile( - "^\s*(?P-?)\s*(?P\d+)?\s*(days|d|day)?,?\s*(?P