diff --git a/doc/source/release.rst b/doc/source/release.rst index 121cfb92b0eb2..e16a12ce0bc91 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -312,6 +312,11 @@ Improvements to existing features in item handling (:issue:`6745`, :issue:`6988`). - Improve performance in certain reindexing operations by optimizing ``take_2d`` (:issue:`6749`) - Arrays of strings can be wrapped to a specified width (``str.wrap``) (:issue:`6999`) +- Constructor for ``Period`` now takes full set of possible ``Offset`` objects for ``freq`` + parameter. (:issue:`4878`) +- Extends the number of ``Period``s supported by allowing for Python defined ``Period``s (:issue:`5148`) +- Added ``inferred_freq_offset`` as property on ``DatetimeIndex`` to provide the actual + Offset object rather than the string representation (:issue:`5082`). .. _release.bug_fixes-0.14.0: @@ -459,6 +464,7 @@ Bug Fixes - Bug in timeseries-with-frequency plot cursor display (:issue:`5453`) - Bug surfaced in groupby.plot when using a ``Float64Index`` (:issue:`7025`) - Stopped tests from failing if options data isn't able to be downloaded from Yahoo (:issue:`7034`) +- Bug in not correctly treating 'QS', 'BQS', 'BQ', 'Y' as frquency aliases (:issue:`5028`). pandas 0.13.1 ------------- diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index f19c1210b6a37..fc561a1f99387 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -234,7 +234,7 @@ API changes covs[df.index[-1]] - ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`) - +- ``pd.infer_freq`` and ``DatetimeIndex.inferred_freq`` now return a DateOffset subclass rather than a string. (:issue:`5082`) - Added ``nunique`` and ``value_counts`` functions to ``Index`` for counting unique elements. (:issue:`6734`) - ``stack`` and ``unstack`` now raise a ``ValueError`` when the ``level`` keyword refers to a non-unique item in the ``Index`` (previously raised a ``KeyError``). @@ -554,6 +554,9 @@ Enhancements values='Quantity', aggfunc=np.sum) - str.wrap implemented (:issue:`6999`) +- Constructor for ``Period`` now takes full set of possible ``Offset`` objects for ``freq`` + parameter. (:issue:`4878`) +- Extends the number of ``Period``s supported by allowing for Python defined ``Period``s (:issue:`5148`) .. _whatsnew_0140.performance: diff --git a/pandas/core/series.py b/pandas/core/series.py index 74f038b2bad23..d3f8b4c4ed831 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2361,7 +2361,7 @@ def to_period(self, freq=None, copy=True): new_values = new_values.copy() if freq is None: - freq = self.index.freqstr or self.index.inferred_freq + freq = self.index.freq or self.index.inferred_freq new_index = self.index.to_period(freq=freq) return self._constructor(new_values, index=new_index).__finalize__(self) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index e3c933e116987..12a8ac4844552 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -71,7 +71,7 @@ def get_freq(freq): return freq -def get_freq_code(freqstr): +def get_freq_code(freqstr, as_periodstr=False): """ Parameters @@ -81,7 +81,13 @@ def get_freq_code(freqstr): ------- """ if isinstance(freqstr, DateOffset): - freqstr = (get_offset_name(freqstr), freqstr.n) + freqstr_raw = get_offset_name(freqstr) + + #if we can, convert to canonical period str + if as_periodstr: + freqstr_raw = get_period_alias(freqstr_raw) + + freqstr = (freqstr_raw, freqstr.n) if isinstance(freqstr, tuple): if (com.is_integer(freqstr[0]) and @@ -113,7 +119,7 @@ def _get_freq_str(base, mult=1): code = _reverse_period_code_map.get(base) if mult == 1: return code - return str(mult) + code + return "%s%s" % (mult, code) #---------------------------------------------------------------------- @@ -157,6 +163,7 @@ def _get_freq_str(base, mult=1): 'H': 'H', 'Q': 'Q', 'A': 'A', + 'Y': 'A', 'W': 'W', 'M': 'M' } @@ -202,6 +209,9 @@ def get_period_alias(offset_str): 'Q@FEB': 'BQ-FEB', 'Q@MAR': 'BQ-MAR', 'Q': 'Q-DEC', + 'QS': 'QS-JAN', + 'BQ': 'BQ-DEC', + 'BQS': 'BQS-JAN', 'A': 'A-DEC', # YearEnd(month=12), 'AS': 'AS-JAN', # YearBegin(month=1), @@ -387,19 +397,44 @@ def get_legacy_offset_name(offset): name = offset.name return _legacy_reverse_map.get(name, name) -def get_standard_freq(freq): +def get_standard_freq(freq, as_periodstr=False): """ - Return the standardized frequency string + Return the standardized frequency string. + as_periodstr=True returns the string representing the period rather than + the frequency. An example when these may differ is MonthBegin. + MonthBegin and MonthEnd are two different frequencies but they define the + same period. + + >>> get_standard_freq(pandas.tseries.offsets.MonthBegin(), as_periodstr=False) + 'L' + >>> get_standard_freq(pandas.tseries.offsets.MonthEnd(), as_periodstr=False) + 'M' + >>> get_standard_freq(pandas.tseries.offsets.MonthBegin(), as_periodstr=True) + 'M' + >>> get_standard_freq(pandas.tseries.offsets.MonthEnd(), as_periodstr=True) + 'M' """ if freq is None: return None - if isinstance(freq, DateOffset): - return get_offset_name(freq) + code, stride = get_freq_code(freq, as_periodstr=as_periodstr) - code, stride = get_freq_code(freq) return _get_freq_str(code, stride) +def _get_standard_period_freq_impl(freq): + return get_standard_freq(freq, as_periodstr=True) + +def get_standard_period_freq(freq): + if isinstance(freq, DateOffset): + return freq.periodstr + + return _get_standard_period_freq_impl(freq) + +def _assert_mult_1(mult): + if mult != 1: + # TODO: Better error message - this is slightly confusing + raise ValueError('Only mult == 1 supported') + #---------------------------------------------------------------------- # Period codes @@ -629,7 +664,7 @@ def infer_freq(index, warn=True): Returns ------- - freq : string or None + freq : DateOffset object or None None if no discernible frequency TypeError if the index is not datetime-like """ @@ -650,7 +685,28 @@ def infer_freq(index, warn=True): index = pd.DatetimeIndex(index) inferer = _FrequencyInferer(index, warn=warn) - return inferer.get_freq() + return to_offset(inferer.get_freq()) + + +def infer_freqstr(index, warn=True): + """ + Infer the most likely frequency given the input index. If the frequency is + uncertain, a warning will be printed + + Parameters + ---------- + index : DatetimeIndex + if passed a Series will use the values of the series (NOT THE INDEX) + warn : boolean, default True + + Returns + ------- + freq : string or None + None if no discernible frequency + TypeError if the index is not datetime-like + """ + return infer_freq(index, warn).freqstr + _ONE_MICRO = long(1000) _ONE_MILLI = _ONE_MICRO * 1000 @@ -887,9 +943,11 @@ def is_subperiod(source, target): ------- is_subperiod : boolean """ + source_raw = source if isinstance(source, offsets.DateOffset): source = source.rule_code + target_raw = target if isinstance(target, offsets.DateOffset): target = target.rule_code @@ -918,6 +976,12 @@ def is_subperiod(source, target): return source in ['T', 'S'] elif target == 'S': return source in ['S'] + elif isinstance(source_raw, offsets._NonCythonPeriod): + return source_raw.is_subperiod(target_raw) + elif isinstance(target_raw, offsets._NonCythonPeriod): + return target_raw.is_superperiod(source_raw) + else: + return False def is_superperiod(source, target): @@ -936,9 +1000,11 @@ def is_superperiod(source, target): ------- is_superperiod : boolean """ + source_raw = source if isinstance(source, offsets.DateOffset): source = source.rule_code + target_raw = target if isinstance(target, offsets.DateOffset): target = target.rule_code @@ -971,6 +1037,12 @@ def is_superperiod(source, target): return target in ['T', 'S'] elif source == 'S': return target in ['S'] + elif isinstance(source_raw, offsets._NonCythonPeriod): + return source_raw.is_superperiod(target_raw) + elif isinstance(target_raw, offsets._NonCythonPeriod): + return target_raw.is_subperiod(source_raw) + else: + return False def _get_rule_month(source, default='DEC'): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index a2e01c8110261..5345fc6f8abcf 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -14,7 +14,7 @@ from pandas.compat import u from pandas.tseries.frequencies import ( infer_freq, to_offset, get_period_alias, - Resolution, get_reso_string, get_offset) + Resolution, get_reso_string, get_offset, infer_freqstr) from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay from pandas.tseries.tools import parse_time_string, normalize_date from pandas.util.decorators import cache_readonly @@ -792,8 +792,8 @@ def to_period(self, freq=None): msg = "You must pass a freq argument as current index has none." raise ValueError(msg) - if freq is None: - freq = get_period_alias(self.freqstr) + if freq is None: # No reason no convert to str; keep w/e freq is + freq = self.freq return PeriodIndex(self.values, freq=freq, tz=self.tz) @@ -1427,6 +1427,13 @@ def inferred_freq(self): except ValueError: return None + @cache_readonly + def inferred_freqstr(self): + try: + return infer_freqstr(self) + except ValueError: + return None + @property def freqstr(self): """ return the frequency object as a string if its set, otherwise None """ diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 1b8b82235cf08..67950587b9026 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -3,7 +3,7 @@ from pandas import compat import numpy as np -from pandas.tseries.tools import to_datetime +from pandas.tseries.tools import to_datetime, _try_parse_qtr_time_string # import after tools, dateutil check from dateutil.relativedelta import relativedelta, weekday @@ -60,6 +60,54 @@ class CacheableOffset(object): _cacheable = True +class _NonCythonPeriod(object): + """ + This class represents the base class for Offsets for which Period logic is + not implemented in Cython. This allows fully Python defined Offsets with + Period support. + All subclasses are expected to implement get_start_dt, get_end_dt, + period_format, get_period_ordinal, is_superperiod and is_subperiod. + """ + + def get_start_dt(self, ordinal): + raise NotImplementedError("get_start_dt") + + def get_end_dt(self, ordinal): + raise NotImplementedError("get_end_dt") + + def period_format(self, ordinal, fmt=None): + raise NotImplementedError("period_format") + + def get_period_ordinal(self, dt): + raise NotImplementedError("get_period_ordinal") + + def dt64arr_to_periodarr(self, data, tz): + f = np.vectorize(lambda x: self.get_period_ordinal(Timestamp(x))) + return f(data.view('i8')) + + def period_asfreq_arr(self, values, freq, end): + from pandas.tseries.period import Period + f = np.vectorize(lambda x: + Period(value=self.period_asfreq_value(x, end), freq=freq).ordinal) + return f(values.view('i8')) + + def period_fromfreq_arr(self, values, freq_int_from, end): + from pandas.tseries.period import _change_period_freq + offset = 0 if end else 1 + f = np.vectorize(lambda x: + _change_period_freq(x, freq_int_from, self).ordinal - offset) + return f(values.view('i8')) + + def period_asfreq_value(self, ordinal, end): + return self.get_end_dt(ordinal) if end else self.get_start_dt(ordinal) + + def is_superperiod(self, target): + raise NotImplementedError("is_superperiod") + + def is_subperiod(self, target): + raise NotImplementedError("is_subperiod") + + class DateOffset(object): """ Standard kind of date increment used for a date range. @@ -295,6 +343,19 @@ def freqstr(self): return fstr + @property + def periodstr(self): + """ + The string representation for the Period defined by this offset. + This may differ from freqstr which defines a freq. For example Month vs. + start of Month. + """ + from pandas.tseries.frequencies import _get_standard_period_freq_impl + return _get_standard_period_freq_impl(self) + + def parse_time_string(self, arg): + return None + class SingleConstructorOffset(DateOffset): @classmethod @@ -1654,14 +1715,14 @@ def get_rule_code_suffix(self): _int_to_weekday[self.weekday]) @classmethod - def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): - if varion_code == "N": + def _parse_suffix(cls, variation_code, startingMonth_code, weekday_code): + if variation_code == "N": variation = "nearest" - elif varion_code == "L": + elif variation_code == "L": variation = "last" else: raise ValueError( - "Unable to parse varion_code: %s" % (varion_code,)) + "Unable to parse variation_code: %s" % (variation_code,)) startingMonth = _month_to_int[startingMonth_code] weekday = _weekday_to_int[weekday_code] @@ -1677,7 +1738,7 @@ def _from_name(cls, *args): return cls(**cls._parse_suffix(*args)) -class FY5253Quarter(DateOffset): +class FY5253Quarter(_NonCythonPeriod, DateOffset): """ DateOffset increments between business quarter dates for 52-53 week fiscal year (also known as a 4-4-5 calendar). @@ -1828,6 +1889,85 @@ def rule_code(self): def _from_name(cls, *args): return cls(**dict(FY5253._parse_suffix(*args[:-1]), qtr_with_extra_week=int(args[-1]))) + + def _get_ordinal_from_y_q(self, fy, fq): + """Take zero indexed fq""" + return fy * 4 + fq + + def get_period_ordinal(self, dt): + year_end = self._offset.get_year_end(dt) + year_end_year = year_end.year + + if dt <= year_end: + if year_end.month < self._offset.startingMonth: + year_end_year -= 1 + fy = year_end_year + else: + fy = year_end_year + 1 + year_end = year_end + self._offset + + fq = 4 + while dt <= year_end: + year_end = year_end - self + fq -= 1 + + return self._get_ordinal_from_y_q(fy, fq) + + @property + def periodstr(self): + return self.rule_code + + def period_format(self, ordinal, fmt=None): + fy = ordinal // 4 + fq = (ordinal % 4) + 1 + + return "%dQ%d" % (fy, fq) + + def parse_time_string(self, arg): + qtr_parsed = _try_parse_qtr_time_string(arg) + if qtr_parsed is None: + return None + else: + fy, fq = qtr_parsed + return self.get_end_dt(self._get_ordinal_from_y_q(fy, fq - 1)) + + def get_start_dt(self, ordinal): + fy = ordinal // 4 + fq = (ordinal % 4) + 1 + + year_end = self._offset.get_year_end(datetime(fy, 1, 1)) + countdown = 4-fq+1 + while countdown: + countdown -= 1 + year_end = year_end-self + + return year_end + relativedelta(days=1) + + def get_end_dt(self, ordinal): + fy = ordinal // 4 + fq = (ordinal % 4) + 1 + + year_end = self._offset.get_year_end(datetime(fy, 1, 1)) + countdown = 4-fq + while countdown: + countdown -= 1 + year_end = year_end-self + + return year_end + + def is_superperiod(self, target): + if not isinstance(target, DateOffset): + from pandas.tseries.frequencies import get_offset + target = get_offset(target) + + if type(target) == Week: + return target.weekday == self._offset.weekday + elif type(target) == Day: + return True + + def is_subperiod(self, target): + #TODO Return True for FY5253 after FY5253 handles periods methods + return False class Easter(DateOffset): ''' diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 6d9e32433cd1e..3411303188a7d 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -6,7 +6,8 @@ from pandas.core.base import PandasObject from pandas.tseries.frequencies import (get_freq_code as _gfc, - _month_numbers, FreqGroup) + _month_numbers, FreqGroup, + _assert_mult_1) from pandas.tseries.index import DatetimeIndex, Int64Index, Index from pandas.tseries.tools import parse_time_string import pandas.tseries.frequencies as _freq_mod @@ -20,6 +21,8 @@ import pandas.tslib as tslib import pandas.algos as _algos from pandas.compat import map, zip, u +from pandas.tseries.offsets import DateOffset, _NonCythonPeriod +from pandas.util.decorators import cache_readonly #--------------- @@ -27,7 +30,7 @@ def _period_field_accessor(name, alias): def f(self): - base, mult = _gfc(self.freq) + base, _ = _gfc(self.freq) return tslib.get_period_field(alias, self.ordinal, base) f.__name__ = name return property(f) @@ -35,11 +38,21 @@ def f(self): def _field_accessor(name, alias): def f(self): - base, mult = _gfc(self.freq) + base, _ = _gfc(self.freq) return tslib.get_period_field_arr(alias, self.values, base) f.__name__ = name return property(f) +def _check_freq_mult(freq): + if isinstance(freq, DateOffset): + mult = freq.n + else: + _, mult = _gfc(freq, as_periodstr=True) + + _assert_mult_1(mult) + +def _change_period_freq(ordinal_from, freq_int_from, freq_to): + return Period(Timestamp(tslib.period_ordinal_to_dt64(ordinal_from, freq=freq_int_from)), freq=freq_to) class Period(PandasObject): """ @@ -70,8 +83,6 @@ def __init__(self, value=None, freq=None, ordinal=None, # periods such as A, Q, etc. Every five minutes would be, e.g., # ('T', 5) but may be passed in as a string like '5T' - self.freq = None - # ordinal is the period offset from the gregorian proleptic epoch self.ordinal = None @@ -94,7 +105,9 @@ def __init__(self, value=None, freq=None, ordinal=None, elif isinstance(value, Period): other = value - if freq is None or _gfc(freq) == _gfc(other.freq): + if freq is None \ + or freq == other.freq \ + or _gfc(freq, as_periodstr=True) == _gfc(other.freq, as_periodstr=True):#TODO: use freqstr? self.ordinal = other.ordinal freq = other.freq else: @@ -118,22 +131,35 @@ def __init__(self, value=None, freq=None, ordinal=None, else: msg = "Value must be Period, string, integer, or datetime" raise ValueError(msg) + + _check_freq_mult(freq) + + #TODO: Fix this + if not isinstance(freq, DateOffset): + freq = _freq_mod._get_freq_str(_gfc(freq)[0]) - base, mult = _gfc(freq) - if mult != 1: - # TODO: Better error message - this is slightly confusing - raise ValueError('Only mult == 1 supported') + self.freq = freq if self.ordinal is None: - self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day, - dt.hour, dt.minute, dt.second, dt.microsecond, 0, - base) + if isinstance(freq, _NonCythonPeriod): + self.ordinal = freq.get_period_ordinal(dt) + else: + base, _ = _gfc(freq, as_periodstr=True) - self.freq = _freq_mod._get_freq_str(base) + self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, dt.microsecond, 0, + base) + + @cache_readonly + def freqstr(self): + return _freq_mod.get_standard_period_freq(self.freq) + + def _same_freq(self, other): + return other.freq == self.freq or other.freqstr == self.freqstr def __eq__(self, other): if isinstance(other, Period): - if other.freq != self.freq: + if not self._same_freq(other): raise ValueError("Cannot compare non-conforming periods") return (self.ordinal == other.ordinal and _gfc(self.freq) == _gfc(other.freq)) @@ -197,16 +223,23 @@ def asfreq(self, freq, how='E'): resampled : Period """ how = _validate_end_alias(how) - base1, mult1 = _gfc(self.freq) - base2, mult2 = _gfc(freq) + _check_freq_mult(freq) + end = how == 'E' - if mult2 != 1: - raise ValueError('Only mult == 1 supported') + if isinstance(self.freq, _NonCythonPeriod): + value = self.freq.period_asfreq_value(self.ordinal, end) + return Period(value=value, freq=freq) + elif isinstance(freq, _NonCythonPeriod): + freq_int, _ = _gfc(self.freq) + return _change_period_freq(ordinal_from=self.ordinal, freq_int_from=freq_int, freq_to=freq) + else: - end = how == 'E' - new_ordinal = tslib.period_asfreq(self.ordinal, base1, base2, end) + base1, _ = _gfc(self.freq) + base2, _ = _gfc(freq) + + new_ordinal = tslib.period_asfreq(self.ordinal, base1, base2, end) - return Period(ordinal=new_ordinal, freq=base2) + return Period(ordinal=new_ordinal, freq=base2) @property def start_time(self): @@ -264,17 +297,22 @@ def to_timestamp(self, freq=None, how='start', tz=None): @classmethod def now(cls, freq=None): return Period(datetime.now(), freq=freq) + + def __get_formatted(self, fmt=None): + if isinstance(self.freq, _NonCythonPeriod): + return self.freq.period_format(self.ordinal, fmt=fmt) + + base, mult = _gfc(self.freq, as_periodstr=True) + return tslib.period_format(self.ordinal, base, fmt=fmt) def __repr__(self): - base, mult = _gfc(self.freq) - formatted = tslib.period_format(self.ordinal, base) - freqstr = _freq_mod._reverse_period_code_map[base] - + formatted = self.__get_formatted() + if not compat.PY3: encoding = com.get_option("display.encoding") formatted = formatted.encode(encoding) - return "Period('%s', '%s')" % (formatted, freqstr) + return "Period('%s', '%s')" % (formatted, self.freqstr) def __unicode__(self): """ @@ -283,9 +321,9 @@ def __unicode__(self): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ - base, mult = _gfc(self.freq) - formatted = tslib.period_format(self.ordinal, base) - value = ("%s" % formatted) + + formatted = self.__get_formatted() + value = str(formatted) return value def strftime(self, fmt): @@ -425,8 +463,7 @@ def strftime(self, fmt): >>> a.strftime('%b. %d, %Y was a %A') 'Jan. 01, 2001 was a Monday' """ - base, mult = _gfc(self.freq) - return tslib.period_format(self.ordinal, base, fmt) + return self.__get_formatted(fmt) def _get_date_and_freq(value, freq): @@ -471,11 +508,15 @@ def dt64arr_to_periodarr(data, freq, tz): if data.dtype != np.dtype('M8[ns]'): raise ValueError('Wrong dtype: %s' % data.dtype) - base, mult = _gfc(freq) - return tslib.dt64arr_to_periodarr(data.view('i8'), base, tz) + if isinstance(freq, _NonCythonPeriod): + return freq.dt64arr_to_periodarr(data, tz) + else: + base, _ = _gfc(freq, as_periodstr=True) + return tslib.dt64arr_to_periodarr(data.view('i8'), base, tz) # --- Period index sketch + def _period_index_cmp(opname): """ Wrap comparison operations to convert datetime-like to datetime64 @@ -483,12 +524,12 @@ def _period_index_cmp(opname): def wrapper(self, other): if isinstance(other, Period): func = getattr(self.values, opname) - if other.freq != self.freq: + if not other._same_freq(self): raise AssertionError("Frequencies must be equal") result = func(other.ordinal) elif isinstance(other, PeriodIndex): - if other.freq != self.freq: + if not other._same_freq(self): raise AssertionError("Frequencies must be equal") return getattr(self.values, opname)(other.values) else: @@ -523,7 +564,7 @@ class PeriodIndex(Int64Index): dtype : NumPy dtype (default: i8) copy : bool Make a copy of input ndarray - freq : string or period object, optional + freq : string or DateOffset object, optional One of pandas period strings or corresponding objects start : starting value, period-like, optional If data is None, used as the start point in generating regular @@ -565,7 +606,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, quarter=None, day=None, hour=None, minute=None, second=None, tz=None): - freq = _freq_mod.get_standard_freq(freq) + freq_orig = freq if periods is not None: if com.is_float(periods): @@ -580,17 +621,26 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, else: fields = [year, month, quarter, day, hour, minute, second] data, freq = cls._generate_range(start, end, periods, - freq, fields) + freq_orig, fields) else: - ordinal, freq = cls._from_arraylike(data, freq, tz) + ordinal, freq = cls._from_arraylike(data, freq_orig, tz) data = np.array(ordinal, dtype=np.int64, copy=False) subarr = data.view(cls) subarr.name = name - subarr.freq = freq + + # If freq_orig was initially none, fall back to freq + subarr.freq = freq_orig if freq_orig is not None else freq return subarr + @cache_readonly + def freqstr(self): + return _freq_mod.get_standard_period_freq(self.freq) + + def _same_freq(self, other): + return other.freq == self.freq or other.freqstr == self.freqstr + @classmethod def _generate_range(cls, start, end, periods, freq, fields): field_count = com._count_not_none(*fields) @@ -681,7 +731,8 @@ def __contains__(self, key): return key.ordinal in self._engine def _box_values(self, values): - f = lambda x: Period(ordinal=x, freq=self.freq) + freq = self.freq + f = lambda x: Period(ordinal=x, freq=freq) return lib.map_infer(values, f) def asof_locs(self, where, mask): @@ -748,27 +799,33 @@ def factorize(self): uniques = PeriodIndex(ordinal=uniques, freq=self.freq) return labels, uniques - @property - def freqstr(self): - return self.freq - def asfreq(self, freq=None, how='E'): how = _validate_end_alias(how) + _check_freq_mult(freq) - freq = _freq_mod.get_standard_freq(freq) + freq_orig = freq - base1, mult1 = _gfc(self.freq) - base2, mult2 = _gfc(freq) + end = how == 'E' - if mult2 != 1: - raise ValueError('Only mult == 1 supported') + if isinstance(self.freq, _NonCythonPeriod): + new_data = self.freq.period_asfreq_arr( + self.values, freq_orig, end) + freq = _freq_mod.get_standard_freq(freq) + elif isinstance(freq_orig, _NonCythonPeriod): + freq = freq_orig.periodstr + freq_int_from, _ = _gfc(self.freq) + new_data = freq_orig.period_fromfreq_arr( + self.values, freq_int_from, end) + else: + freq = _freq_mod.get_standard_freq(freq) + base1, _ = _gfc(self.freq) + base2, _ = _gfc(freq) - end = how == 'E' - new_data = tslib.period_asfreq_arr(self.values, base1, base2, end) + new_data = tslib.period_asfreq_arr(self.values, base1, base2, end) result = new_data.view(PeriodIndex) result.name = self.name - result.freq = freq + result.freq = freq_orig return result def to_datetime(self, dayfirst=False): @@ -1079,7 +1136,7 @@ def __array_finalize__(self, obj): def __repr__(self): output = com.pprint_thing(self.__class__) + '\n' - output += 'freq: %s\n' % self.freq + output += 'freq: %s\n' % self.freqstr n = len(self) if n == 1: output += '[%s]\n' % (self[0]) @@ -1096,7 +1153,7 @@ def __unicode__(self): prefix = '' if compat.PY3 else 'u' mapper = "{0}'{{0}}'".format(prefix) output += '[{0}]'.format(', '.join(map(mapper.format, self))) - output += ", freq='{0}'".format(self.freq) + output += ", freq='{0}'".format(self.freqstr) output += ')' return output diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index dd72a5245e7b2..5ac2f4308ed46 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -295,6 +295,7 @@ def _resample_timestamps(self): def _resample_periods(self): # assumes set_grouper(obj) already called axlabels = self.ax + source_freq = axlabels.freq obj = self.obj if len(axlabels) == 0: @@ -309,7 +310,7 @@ def _resample_periods(self): # Start vs. end of period memb = axlabels.asfreq(self.freq, how=self.convention) - if is_subperiod(axlabels.freq, self.freq) or self.how is not None: + if is_subperiod(source_freq, self.freq) or self.how is not None: # Downsampling rng = np.arange(memb.values[0], memb.values[-1] + 1) bins = memb.searchsorted(rng, side='right') @@ -317,7 +318,7 @@ def _resample_periods(self): grouped = obj.groupby(grouper, axis=self.axis) return grouped.aggregate(self._agg_method) - elif is_superperiod(axlabels.freq, self.freq): + elif is_superperiod(source_freq, self.freq): # Get the fill indexer indexer = memb.get_indexer(new_index, method=self.fill_method, limit=self.limit) @@ -325,7 +326,7 @@ def _resample_periods(self): else: raise ValueError('Frequency %s cannot be resampled to %s' - % (axlabels.freq, self.freq)) + % (source_freq, self.freq)) def _take_new_index(obj, indexer, new_index, axis=0): diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index 896f469f934c6..40ae8f7dc7a11 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -327,6 +327,11 @@ def test_is_superperiod_subperiod(): assert(fmod.is_superperiod(offsets.Hour(), offsets.Minute())) assert(fmod.is_subperiod(offsets.Minute(), offsets.Hour())) + +def test_get_period_alias_yearly(): + assert fmod.get_period_alias('Y') == fmod.get_period_alias('A') + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 86635271eb9c1..ecafbfdf3cf22 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -17,9 +17,9 @@ WeekOfMonth, format, ole2datetime, QuarterEnd, to_datetime, normalize_date, get_offset, get_offset_name, get_standard_freq) -from pandas.tseries.frequencies import _offset_map +from pandas.tseries.frequencies import _offset_map, cday from pandas.tseries.index import _to_m8, DatetimeIndex, _daterange_cache -from pandas.tseries.tools import parse_time_string +from pandas.tseries.tools import parse_time_string, DateParseError import pandas.tseries.offsets as offsets from pandas.tslib import monthrange, OutOfBoundsDatetime, NaT @@ -1650,6 +1650,7 @@ def test_onOffset(self): offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") + tests = [ # From Wikipedia (see: http://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar#Saturday_nearest_the_end_of_month) # 2006-09-02 2006 September 2 @@ -1700,6 +1701,7 @@ def test_onOffset(self): (offset_n, datetime(2012, 12, 31), False), (offset_n, datetime(2013, 1, 1), True), (offset_n, datetime(2013, 1, 2), False), + ] for offset, date, expected in tests: @@ -1716,6 +1718,7 @@ def test_apply(self): datetime(2011, 1, 2), datetime(2012, 1, 1), datetime(2012, 12, 30)] + DEC_SAT = FY5253(n=-1, startingMonth=12, weekday=5, variation="nearest") tests = [ @@ -1932,6 +1935,7 @@ def test_onOffset(self): (offset_n, datetime(2012, 12, 31), False), (offset_n, datetime(2013, 1, 1), True), (offset_n, datetime(2013, 1, 2), False) + ] for offset, date, expected in tests: @@ -2626,6 +2630,7 @@ def test_get_offset_name(self): self.assertEqual(get_offset_name(makeFY5253LastOfMonthQuarter(weekday=1, startingMonth=3, qtr_with_extra_week=4)),"REQ-L-MAR-TUE-4") self.assertEqual(get_offset_name(makeFY5253NearestEndMonthQuarter(weekday=1, startingMonth=3, qtr_with_extra_week=3)), "REQ-N-MAR-TUE-3") + def test_get_offset(): assertRaisesRegexp(ValueError, "rule.*GIBBERISH", get_offset, 'gibberish') assertRaisesRegexp(ValueError, "rule.*QS-JAN-B", get_offset, 'QS-JAN-B') @@ -2648,12 +2653,18 @@ def test_get_offset(): (name, expected, offset)) -def test_parse_time_string(): - (date, parsed, reso) = parse_time_string('4Q1984') - (date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984') - assert date == date_lower - assert parsed == parsed_lower - assert reso == reso_lower +class TestParseTimeString(tm.TestCase): + def test_case_sensitivity(self): + (date, parsed, reso) = parse_time_string('4Q1984') + (date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984') + + self.assertEqual(date, date_lower) + self.assertEqual(parsed, parsed_lower) + self.assertEqual(reso, reso_lower) + + def test_invalid_string(self): + self.assertRaises(DateParseError, + parse_time_string, '2013Q1', freq="INVLD-L-DEC-SAT") def test_get_standard_freq(): @@ -2714,6 +2725,37 @@ def test_rule_code(self): self.assertEqual(alias, get_offset(alias).rule_code) self.assertEqual(alias, (get_offset(alias) * 5).rule_code) + def test_offset_map(self): + #GH5028 + for name, offset in compat.iteritems(_offset_map): + if name == 'C' and cday is None: + continue + self.assertEqual(name, None if offset is None else offset.rule_code) + + def test_many_to_one_mapping(self): + #GH5028 + offsets = [ + QuarterBegin(startingMonth=1), + BQuarterBegin(startingMonth=1), + BQuarterEnd(startingMonth=12), + ] + + for offset in offsets: + self.assertEqual(get_offset_name(offset), offset.rule_code) + + def test_aliased_offset_equality(self): + self.assertEqual(get_offset("Q"), get_offset("Q")) + self.assertEqual(get_offset("Q"), get_offset("Q-DEC")) + self.assertEqual(get_offset("QS"), get_offset("QS-JAN")) + self.assertEqual(get_offset("BQ"), get_offset("BQ-DEC")) + self.assertEqual(get_offset("BQS"), get_offset("BQS-JAN")) + + def test_aliased_offset_repr_equality(self): + self.assertEqual(repr(get_offset("Q")), repr(get_offset("Q"))) + self.assertEqual(repr(get_offset("Q")), repr(get_offset("Q-DEC"))) + self.assertEqual(repr(get_offset("QS")), repr(get_offset("QS-JAN"))) + self.assertEqual(repr(get_offset("BQ")), repr(get_offset("BQ-DEC"))) + self.assertEqual(repr(get_offset("BQS")), repr(get_offset("BQS-JAN"))) def test_apply_ticks(): result = offsets.Hour(3).apply(offsets.Hour(4)) @@ -2814,7 +2856,7 @@ def test_str_for_named_is_name(self): names += ['WOM-' + week + day for week in ('1', '2', '3', '4') for day in days] #singletons - names += ['S', 'T', 'U', 'BM', 'BMS', 'BQ', 'QS'] # No 'Q' + names += ['S', 'T', 'U', 'BM', 'BMS', ] # No 'Q', 'BQ', 'QS', 'BQS', _offset_map.clear() for name in names: offset = get_offset(name) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index a6326794c1b12..cb6d75ffe0d70 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -14,7 +14,7 @@ from pandas.tseries.frequencies import MONTHS, DAYS, _period_code_map from pandas.tseries.period import Period, PeriodIndex, period_range from pandas.tseries.index import DatetimeIndex, date_range, Index -from pandas.tseries.tools import to_datetime +from pandas.tseries.tools import to_datetime, _try_parse_qtr_time_string import pandas.tseries.period as pmod import pandas.core.datetools as datetools @@ -29,6 +29,7 @@ import pandas.util.testing as tm from pandas import compat from numpy.testing import assert_array_equal +from pandas.tseries.offsets import FY5253Quarter, WeekDay, Week, Day class TestPeriodProperties(tm.TestCase): @@ -1698,6 +1699,11 @@ def test_ts_repr(self): expected = "\nfreq: Q-DEC\n[2013Q1, ..., 2013Q3]\nlength: 3" assert_equal(repr(val), expected) + def test_period_weeklies(self): + p1 = Period('2006-12-31', 'W') + p2 = Period('2006-12-31', '1w') + assert_equal(p1.freq, p2.freq) + def test_period_index_unicode(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') assert_equal(len(pi), 9) @@ -1861,21 +1867,21 @@ def test_to_period_quarterlyish(self): for off in offsets: rng = date_range('01-Jan-2012', periods=8, freq=off) prng = rng.to_period() - self.assertEqual(prng.freq, 'Q-DEC') + self.assertEqual(prng.freqstr, 'Q-DEC') def test_to_period_annualish(self): offsets = ['BA', 'AS', 'BAS'] for off in offsets: rng = date_range('01-Jan-2012', periods=8, freq=off) prng = rng.to_period() - self.assertEqual(prng.freq, 'A-DEC') + self.assertEqual(prng.freqstr, 'A-DEC') def test_to_period_monthish(self): offsets = ['MS', 'EOM', 'BM'] for off in offsets: rng = date_range('01-Jan-2012', periods=8, freq=off) prng = rng.to_period() - self.assertEqual(prng.freq, 'M') + self.assertEqual(prng.freqstr, 'M') def test_no_multiples(self): self.assertRaises(ValueError, period_range, '1989Q3', periods=10, @@ -2169,12 +2175,45 @@ def test_pickle_freq(self): import pickle prng = period_range('1/1/2011', '1/1/2012', freq='M') new_prng = pickle.loads(pickle.dumps(prng)) - self.assertEqual(new_prng.freq,'M') + self.assertEqual(new_prng.freq, 'M') def test_slice_keep_name(self): idx = period_range('20010101', periods=10, freq='D', name='bob') self.assertEqual(idx.name, idx[1:].name) + def test_period_range_alias(self): + self.assertTrue( + pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.MonthEnd()).to_period().identical( + pd.period_range('1/1/2012', periods=4, + freq=pd.offsets.MonthEnd()))) + + # GH 4878 + self.assertTrue( + pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.BusinessMonthEnd()).to_period().identical( + pd.period_range('1/1/2012', periods=4, + freq=pd.offsets.BusinessMonthEnd()))) + + def test_period_range_alias2(self): + self.assertTrue( + pd.Series(range(4), + index=pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.MonthEnd())).to_period().index.identical( + pd.Series(range(4), + index=pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.MonthEnd()).to_period()).index)) + + # GH 4878 + self.assertTrue( + pd.Series(range(4), + index=pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.BusinessMonthEnd()) + ).to_period().index.identical( + pd.Series(range(4), + index=pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.BusinessMonthEnd()).to_period()).index)) + def _permute(obj): return obj.take(np.random.permutation(len(obj))) @@ -2313,6 +2352,294 @@ def test_sort(self): self.assertEqual(sorted(periods), correctPeriods) +class TestFY5253QuarterPeriods(tm.TestCase): + def test_get_period_ordinal(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(offset.get_period_ordinal( + datetime(2013, 10, 27)), 2013 * 4 + 3) + self.assertEqual(offset.get_period_ordinal( + datetime(2013, 12, 28)), 2013 * 4 + 3) + self.assertEqual(offset.get_period_ordinal( + datetime(2013, 12, 29)), 2014 * 4 + 0) + + offset_n = FY5253Quarter(weekday=WeekDay.TUE, startingMonth=12, + variation="nearest", qtr_with_extra_week=4) + + self.assertEqual(offset_n.get_period_ordinal(datetime(2013, 1, 2)), + offset_n.get_period_ordinal(datetime(2013, 1, 30))) + + self.assertEqual(offset_n.get_period_ordinal(datetime(2013, 1, 1)) + 1, + offset_n.get_period_ordinal(datetime(2013, 1, 2))) + + def test_period_format(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(offset.period_format(2013 * 4 + 3), "2013Q4") + self.assertEqual(offset.period_format(2014 * 4 + 0), "2014Q1") + + def test_get_end_dt(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(offset.get_end_dt( + offset.get_period_ordinal(datetime(2013, 12, 27))), + datetime(2013, 12, 28)) + self.assertEqual(offset.get_end_dt( + offset.get_period_ordinal(datetime(2013, 12, 28))), + datetime(2013, 12, 28)) + self.assertEqual(offset.get_end_dt( + offset.get_period_ordinal(datetime(2013, 12, 28))), + datetime(2013, 12, 28)) + self.assertEqual(offset.get_end_dt( + offset.get_period_ordinal(datetime(2013, 12, 29))), + datetime(2014, 3, 29)) + + def test_get_start_dt(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(offset.get_start_dt( + offset.get_period_ordinal(datetime(2013, 9, 29))), + datetime(2013, 9, 29)) + self.assertEqual(offset.get_start_dt( + offset.get_period_ordinal(datetime(2013, 12, 27))), + datetime(2013, 9, 29)) + self.assertEqual(offset.get_start_dt( + offset.get_period_ordinal(datetime(2013, 12, 28))), + datetime(2013, 9, 29)) + self.assertEqual(offset.get_start_dt( + offset.get_period_ordinal(datetime(2013, 12, 28))), + datetime(2013, 9, 29)) + self.assertEqual(offset.get_start_dt( + offset.get_period_ordinal(datetime(2013, 12, 29))), + datetime(2013, 12, 29)) + + def test_period_str(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(str(Period("2013-12-27", freq=offset)), "2013Q4") + self.assertEqual(str(Period("2013-12-28", freq=offset)), "2013Q4") + self.assertEqual(str(Period("2013-12-29", freq=offset)), "2014Q1") + self.assertEqual(str(Period("2013-9-29", freq=offset)), "2013Q4") + self.assertEqual(str(Period("2013-9-28", freq=offset)), "2013Q3") + + offset_n = FY5253Quarter(weekday=WeekDay.TUE, startingMonth=12, + variation="nearest", qtr_with_extra_week=4) + self.assertEqual(str(Period("2013-01-01", freq=offset_n)), "2012Q4") + self.assertEqual(str(Period("2013-01-03", freq=offset_n)), "2013Q1") + self.assertEqual(str(Period("2013-01-02", freq=offset_n)), "2013Q1") + + offset_sun = FY5253Quarter(weekday=WeekDay.SUN, startingMonth=12, + variation="nearest", qtr_with_extra_week=4) + self.assertEqual(str(Period("2011-1-2", freq=offset_sun)), "2010Q4") + self.assertEqual(str(Period("2011-1-3", freq=offset_sun)), "2011Q1") + self.assertEqual(str(Period("2011-4-3", freq=offset_sun)), "2011Q1") + self.assertEqual(str(Period("2011-4-4", freq=offset_sun)), "2011Q2") + self.assertEqual(str(Period("2011-7-3", freq=offset_sun)), "2011Q2") + self.assertEqual(str(Period("2011-7-4", freq=offset_sun)), "2011Q3") + self.assertEqual(str(Period("2003-9-28", freq=offset_sun)), "2003Q3") + self.assertEqual(str(Period("2003-9-29", freq=offset_sun)), "2003Q4") + self.assertEqual(str(Period("2004-9-26", freq=offset_sun)), "2004Q3") + self.assertEqual(str(Period("2004-9-27", freq=offset_sun)), "2004Q4") + self.assertEqual(str(Period("2005-1-2", freq=offset_sun)), "2004Q4") + self.assertEqual(str(Period("2005-1-3", freq=offset_sun)), "2005Q1") + + def test_period_str_parsing(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(_try_parse_qtr_time_string("2013Q4"), (2013, 4)) + self.assertEqual(_try_parse_qtr_time_string("2013q4"), (2013, 4)) + self.assertEqual(_try_parse_qtr_time_string("13Q4"), (2013, 4)) + self.assertEqual(_try_parse_qtr_time_string("1Q14"), (2014, 1)) + + self.assertEqual( + str(Period(offset.parse_time_string("2013Q4"), + freq=offset)), "2013Q4") + + self.assertEqual(offset.get_period_ordinal( + offset.parse_time_string("2013Q4")), 2013 * 4 + 3) + + self.assertEqual(offset.period_format( + offset.get_period_ordinal( + offset.parse_time_string("2013Q4"))), "2013Q4") + + def test_period_asfreq1(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + period = Period("2013-12-27", freq=offset) + + week_offset = Week(weekday=WeekDay.SAT) + self.assertEqual(str(period.asfreq(freq=week_offset, how="E")), + "2013-12-22/2013-12-28") + self.assertEqual(str(period.asfreq(freq=week_offset, how="S")), + "2013-09-29/2013-10-05") + + day = Day() + self.assertEqual(str(period.asfreq(freq=day, how="E")), + "2013-12-28") + + self.assertEqual(str(period.asfreq(freq=day, how="S")), + "2013-09-29") + + def test_period_asfreq2(self): + qtr_offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + week_offset = Week(weekday=WeekDay.SAT) + + period = Period("2013-12-22/2013-12-28", freq=week_offset) + + self.assertEqual(str(period.asfreq(freq=qtr_offset, how="E")), + "2013Q4") + self.assertEqual(str(period.asfreq(freq=qtr_offset, how="S")), + "2013Q4") + + def test_period_range(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + +# prange = period_range('2013Q1', periods=2, freq=offset) + prange = period_range(datetime(2013, 1, 15), periods=2, freq=offset) + + self.assertEqual(len(prange), 2) + self.assertEqual(prange.freq, offset.periodstr) + self.assertEqual(str(prange[0]), '2013Q1') + self.assertEqual(str(prange[1]), '2013Q2') + + def test_period_range_from_ts(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + drange = date_range(datetime(2013, 1, 15), periods=2, freq=offset) + prange = drange.to_period() + + self.assertEqual(len(prange), 2) + self.assertEqual(prange.freq, offset.periodstr) + self.assertEqual(str(prange[0]), '2013Q1') + self.assertEqual(str(prange[1]), '2013Q2') + + def test_periodindex_asfreq(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + prange = period_range(datetime(2013, 1, 15), periods=2, freq=offset) + + week_offset = Week(weekday=WeekDay.SAT) + + week_end = prange.asfreq(freq=week_offset, how="E") + self.assertEqual(len(week_end), 2) + self.assertEqual(week_end.freq, week_offset.periodstr) + self.assertEqual(str(week_end[0]), '2013-03-24/2013-03-30') + self.assertEqual(str(week_end[1]), '2013-06-23/2013-06-29') + + week_start = prange.asfreq(freq=week_offset, how="S") + self.assertEqual(len(week_start), 2) + self.assertEqual(week_start.freq, week_offset.periodstr) + self.assertEqual(str(week_start[0]), '2012-12-30/2013-01-05') + self.assertEqual(str(week_start[1]), '2013-03-31/2013-04-06') + + day = Day() + day_end = prange.asfreq(freq=day, how="E") + self.assertEqual(len(day_end), 2) + self.assertEqual(day_end.freq, day.periodstr) + self.assertEqual(str(day_end[0]), '2013-03-30') + self.assertEqual(str(day_end[1]), '2013-06-29') + + day_start = prange.asfreq(freq=day, how="S") + self.assertEqual(len(day_start), 2) + self.assertEqual(day_start.freq, day.periodstr) + self.assertEqual(str(day_start[0]), '2012-12-30') + self.assertEqual(str(day_start[1]), '2013-03-31') + + def test_resample_to_weekly(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + prange = period_range(datetime(2013, 1, 15), periods=2, freq=offset) + + df = DataFrame({"A": [1, 2]}, index=prange) + resampled = df.resample(Week(weekday=WeekDay.SAT), fill_method="ffill") + self.assertEquals(len(resampled), 2 * 13) + self.assertEquals(str(resampled.index[0]), '2012-12-30/2013-01-05') + self.assertEquals(str(resampled.index[-1]), '2013-06-23/2013-06-29') + + tm.assert_frame_equal(resampled, + df.resample("W-SAT", fill_method="ffill")) + + assertRaisesRegexp(ValueError, + "cannot be resampled to", + df.resample, + "W-MON", fill_method="ffill") + + def test_resample_to_daily(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + prange = period_range(datetime(2013, 1, 15), periods=2, freq=offset) + + df = DataFrame({"A": [1, 2]}, index=prange) + resampled = df.resample(Day(), fill_method="ffill") + self.assertEquals(len(resampled), 2 * 7 * 13) + self.assertEquals(str(resampled.index[0]), '2012-12-30') + self.assertEquals(str(resampled.index[-1]), '2013-06-29') + + tm.assert_frame_equal(resampled, + df.resample("D", fill_method="ffill")) + + def test_resample_from_weekly(self): + offset_fyq = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + freq_week = Week(weekday=WeekDay.SAT) + + prange = period_range(datetime(2013, 1, 5), + periods=2 * 13, + freq=freq_week) + + df = DataFrame({"A": [1] * 13 + [2] * 13}, index=prange) + resampled = df.resample(offset_fyq, fill_method="mean") + + self.assertEquals(len(resampled), 2) + self.assertEquals(str(resampled.index[0]), '2013Q1') + self.assertEquals(str(resampled.index[-1]), '2013Q2') + self.assertEquals(resampled["A"][0], 1) + self.assertEquals(resampled["A"]["2013Q1"], 1) + self.assertEquals(resampled["A"][1], 2) + self.assertEquals(resampled["A"]["2013Q2"], 2) + + offset_fyq2 = FY5253Quarter(weekday=WeekDay.MON, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + assertRaisesRegexp(ValueError, + "cannot be resampled to", + df.resample, offset_fyq2, fill_method="ffill") + + def test_resample_from_daily(self): + offset_fyq = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + prange = period_range(datetime(2012, 12, 30), + periods=2 * 7 * 13, + freq=Day()) + + df = DataFrame({"A": [1] * 13 * 7 + [2] * 13 * 7}, index=prange) + resampled = df.resample(offset_fyq, fill_method="mean") + + self.assertEquals(len(resampled), 2) + self.assertEquals(str(resampled.index[0]), '2013Q1') + self.assertEquals(str(resampled.index[-1]), '2013Q2') + self.assertEquals(resampled["A"][0], 1) + self.assertEquals(resampled["A"][1], 2) + + def test_freq_to_period(self): + r = pd.date_range('01-Jan-2012', periods=8, freq='QS') + x = r.to_period() + self.assert_("freq='Q-DEC'" in str(x)) + self.assert_("freq: Q-DEC" in repr(x)) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index 5d1e4b67041f7..d07af679c0d47 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -13,6 +13,7 @@ from pandas.tseries.offsets import DateOffset from pandas.tseries.period import period_range, Period, PeriodIndex from pandas.tseries.resample import DatetimeIndex +from pandas.tseries.frequencies import get_period_alias from pandas.util.testing import assert_series_equal, ensure_clean import pandas.util.testing as tm @@ -97,7 +98,7 @@ def test_tsplot(self): f = lambda *args, **kwds: tsplot(s, plt.Axes.plot, *args, **kwds) for s in self.period_ser: - _check_plot_works(f, s.index.freq, ax=ax, series=s) + _check_plot_works(f, s.index.freq, ax=ax, series=s, is_period=True) for s in self.datetime_ser: _check_plot_works(f, s.index.freq.rule_code, ax=ax, series=s) @@ -149,7 +150,7 @@ def check_format_of_first_point(ax, expected_string): @slow def test_line_plot_period_series(self): for s in self.period_ser: - _check_plot_works(s.plot, s.index.freq) + _check_plot_works(s.plot, s.index.freq, is_period=True) @slow def test_line_plot_datetime_series(self): @@ -159,7 +160,7 @@ def test_line_plot_datetime_series(self): @slow def test_line_plot_period_frame(self): for df in self.period_df: - _check_plot_works(df.plot, df.index.freq) + _check_plot_works(df.plot, df.index.freq, is_period=True) @slow def test_line_plot_datetime_frame(self): @@ -676,7 +677,7 @@ def test_mixed_freq_lf_first(self): low.plot() ax = high.plot() for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'T') + self.assertEqual(PeriodIndex(data=l.get_xdata()).freqstr, 'T') def test_mixed_freq_irreg_period(self): ts = tm.makeTimeSeries() @@ -695,7 +696,7 @@ def test_to_weekly_resampling(self): high.plot() ax = low.plot() for l in ax.get_lines(): - self.assert_(PeriodIndex(data=l.get_xdata()).freq.startswith('W')) + self.assert_(PeriodIndex(data=l.get_xdata()).freqstr.startswith('W')) @slow def test_from_weekly_resampling(self): @@ -706,7 +707,7 @@ def test_from_weekly_resampling(self): low.plot() ax = high.plot() for l in ax.get_lines(): - self.assert_(PeriodIndex(data=l.get_xdata()).freq.startswith('W')) + self.assert_(PeriodIndex(data=l.get_xdata()).freqstr.startswith('W')) @slow def test_irreg_dtypes(self): @@ -924,7 +925,7 @@ def test_mpl_nopandas(self): line2.get_xydata()[:, 0]) -def _check_plot_works(f, freq=None, series=None, *args, **kwargs): +def _check_plot_works(f, freq=None, series=None, is_period=False, *args, **kwargs): import matplotlib.pyplot as plt fig = plt.gcf() @@ -944,10 +945,16 @@ def _check_plot_works(f, freq=None, series=None, *args, **kwargs): if isinstance(dfreq, DateOffset): dfreq = dfreq.rule_code if orig_axfreq is None: - assert ax.freq == dfreq + if is_period: + assert get_period_alias(ax.freq) == get_period_alias(dfreq) + else: + assert ax.freq == dfreq if freq is not None and orig_axfreq is None: - assert ax.freq == freq + if is_period: + assert get_period_alias(ax.freq) == get_period_alias(freq) + else: + assert ax.freq == freq ax = fig.add_subplot(212) try: diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index d01ad56165880..28c9d80e1c6d4 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -319,9 +319,11 @@ def _convert_listlike(arg, box, format): return _convert_listlike(np.array([ arg ]), box, format)[0] + class DateParseError(ValueError): pass + def _attempt_YYYYMMDD(arg): """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) """ @@ -369,6 +371,34 @@ def calc_with_mask(carg,mask): has_time = re.compile('(.+)([\s]|T)+(.+)') +def _try_parse_qtr_time_string(arg): + arg = arg.upper() + + add_century = False + if len(arg) == 4: + add_century = True + qpats = [(qpat1, 1), (qpat2, 0)] + else: + qpats = [(qpat1full, 1), (qpat2full, 0)] + + for pat, yfirst in qpats: + qparse = pat.match(arg) + if qparse is not None: + if yfirst: + yi, qi = 1, 2 + else: + yi, qi = 2, 1 + q = int(qparse.group(yi)) + y_str = qparse.group(qi) + y = int(y_str) + if add_century: + y += 2000 + + return y, q + + return None + + def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): """ Try hard to parse datetime string, leveraging dateutil plus some extra @@ -389,15 +419,19 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): datetime, datetime/dateutil.parser._result, str """ from pandas.core.config import get_option + from pandas.tseries.frequencies import (_get_rule_month, _month_numbers) from pandas.tseries.offsets import DateOffset - from pandas.tseries.frequencies import (_get_rule_month, _month_numbers, - _get_freq_str) if not isinstance(arg, compat.string_types): return arg arg = arg.upper() + if isinstance(freq, DateOffset): + parsed_dt = freq.parse_time_string(arg) + if parsed_dt is not None: + return parsed_dt, parsed_dt, freq.name + default = datetime(1, 1, 1).replace(hour=0, minute=0, second=0, microsecond=0) @@ -408,37 +442,26 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): ret = default.replace(year=int(m.group(1))) return ret, ret, 'year' - add_century = False - if len(arg) == 4: - add_century = True - qpats = [(qpat1, 1), (qpat2, 0)] - else: - qpats = [(qpat1full, 1), (qpat2full, 0)] - - for pat, yfirst in qpats: - qparse = pat.match(arg) - if qparse is not None: - if yfirst: - yi, qi = 1, 2 - else: - yi, qi = 2, 1 - q = int(qparse.group(yi)) - y_str = qparse.group(qi) - y = int(y_str) - if add_century: - y += 2000 - - if freq is not None: - # hack attack, #1228 - mnum = _month_numbers[_get_rule_month(freq)] + 1 - month = (mnum + (q - 1) * 3) % 12 + 1 - if month > mnum: - y -= 1 - else: - month = (q - 1) * 3 + 1 - - ret = default.replace(year=y, month=month) - return ret, ret, 'quarter' + qtr_parsed = _try_parse_qtr_time_string(arg) + if qtr_parsed is not None: + y, q = qtr_parsed + + if freq is not None: + # hack attack, #1228 + month_name = _get_rule_month(freq) + try: + mnum = _month_numbers[month_name] + 1 + except KeyError: + raise DateParseError( + "Do not understand freq: %s" % freq) + month = (mnum + (q - 1) * 3) % 12 + 1 + if month > mnum: + y -= 1 + else: + month = (q - 1) * 3 + 1 + + ret = default.replace(year=y, month=month) + return ret, ret, 'quarter' is_mo_str = freq is not None and freq == 'M' is_mo_off = getattr(freq, 'rule_code', None) == 'M'