Skip to content

Commit

Permalink
Merge pull request pandas-dev#11205 from chris-b1/faster-offsets
Browse files Browse the repository at this point in the history
PERF: vectorized DateOffset with months
  • Loading branch information
jreback committed Oct 2, 2015
2 parents 6ab626f + e58f18c commit 9fc9201
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 16 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,7 @@ Performance Improvements
- 2x improvement of ``Series.value_counts`` for float dtype (:issue:`10821`)
- Enable ``infer_datetime_format`` in ``to_datetime`` when date components do not have 0 padding (:issue:`11142`)
- Regression from 0.16.1 in constructing ``DataFrame`` from nested dictionary (:issue:`11084`)
- Performance improvements in addition/subtraction operations for ``DateOffset`` with ``Series`` or ``DatetimeIndex`` (issue:`10744`, :issue:`11205`)

.. _whatsnew_0170.bug_fixes:

Expand Down
18 changes: 9 additions & 9 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,16 +261,12 @@ def apply_index(self, i):
# relativedelta/_offset path only valid for base DateOffset
if (self._use_relativedelta and
set(self.kwds).issubset(relativedelta_fast)):

months = ((self.kwds.get('years', 0) * 12
+ self.kwds.get('months', 0)) * self.n)
if months:
base = (i.to_period('M') + months).to_timestamp()
time = i.to_perioddelta('D')
days = i.to_perioddelta('M') - time
# minimum prevents month-end from wrapping
day_offset = np.minimum(days,
to_timedelta(base.days_in_month - 1, unit='D'))
i = base + day_offset + time
shifted = tslib.shift_months(i.asi8, months)
i = i._shallow_copy(shifted)

weeks = (self.kwds.get('weeks', 0)) * self.n
if weeks:
Expand Down Expand Up @@ -1081,7 +1077,9 @@ def apply(self, other):

@apply_index_wraps
def apply_index(self, i):
return self._end_apply_index(i, 'M')
months = self.n - 1 if self.n >= 0 else self.n
shifted = tslib.shift_months(i.asi8, months, 'end')
return i._shallow_copy(shifted)

def onOffset(self, dt):
if self.normalize and not _is_normalized(dt):
Expand All @@ -1106,7 +1104,9 @@ def apply(self, other):

@apply_index_wraps
def apply_index(self, i):
return self._beg_apply_index(i, 'M')
months = self.n + 1 if self.n < 0 else self.n
shifted = tslib.shift_months(i.asi8, months, 'start')
return i._shallow_copy(shifted)

def onOffset(self, dt):
if self.normalize and not _is_normalized(dt):
Expand Down
14 changes: 7 additions & 7 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2565,32 +2565,32 @@ def test_datetime64_with_DateOffset(self):
for klass, assert_func in zip([Series, DatetimeIndex],
[self.assert_series_equal,
tm.assert_index_equal]):
s = klass(date_range('2000-01-01', '2000-01-31'))
s = klass(date_range('2000-01-01', '2000-01-31'), name='a')
result = s + pd.DateOffset(years=1)
result2 = pd.DateOffset(years=1) + s
exp = klass(date_range('2001-01-01', '2001-01-31'))
exp = klass(date_range('2001-01-01', '2001-01-31'), name='a')
assert_func(result, exp)
assert_func(result2, exp)

result = s - pd.DateOffset(years=1)
exp = klass(date_range('1999-01-01', '1999-01-31'))
exp = klass(date_range('1999-01-01', '1999-01-31'), name='a')
assert_func(result, exp)

s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
pd.Timestamp('2000-02-15', tz='US/Central')])
pd.Timestamp('2000-02-15', tz='US/Central')], name='a')
result = s + pd.offsets.Day()
result2 = pd.offsets.Day() + s
exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'),
Timestamp('2000-02-16', tz='US/Central')])
Timestamp('2000-02-16', tz='US/Central')], name='a')
assert_func(result, exp)
assert_func(result2, exp)

s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
pd.Timestamp('2000-02-15', tz='US/Central')])
pd.Timestamp('2000-02-15', tz='US/Central')], name='a')
result = s + pd.offsets.MonthEnd()
result2 = pd.offsets.MonthEnd() + s
exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'),
Timestamp('2000-02-29', tz='US/Central')])
Timestamp('2000-02-29', tz='US/Central')], name='a')
assert_func(result, exp)
assert_func(result2, exp)

Expand Down
11 changes: 11 additions & 0 deletions pandas/tseries/tests/test_tslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,17 @@ def compare_local_to_utc(tz_didx, utc_didx):
tslib.maybe_get_tz('Asia/Tokyo'))
self.assert_numpy_array_equal(result, np.array([tslib.iNaT], dtype=np.int64))

def test_shift_months(self):
s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp('2000-01-31 00:23:00'),
Timestamp('2000-01-01'), Timestamp('2000-02-29'), Timestamp('2000-12-31')])
for years in [-1, 0, 1]:
for months in [-2, 0, 2]:
actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + months))
expected = DatetimeIndex([x + offsets.DateOffset(years=years, months=months) for x in s])
tm.assert_index_equal(actual, expected)



class TestTimestampOps(tm.TestCase):
def test_timestamp_and_datetime(self):
self.assertEqual((Timestamp(datetime.datetime(2013, 10, 13)) - datetime.datetime(2013, 10, 12)).days, 1)
Expand Down
70 changes: 70 additions & 0 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3847,6 +3847,7 @@ def get_time_micros(ndarray[int64_t] dtindex):

return micros


@cython.wraparound(False)
def get_date_field(ndarray[int64_t] dtindex, object field):
'''
Expand Down Expand Up @@ -4386,6 +4387,75 @@ cpdef normalize_date(object dt):
raise TypeError('Unrecognized type: %s' % type(dt))


cdef inline int _year_add_months(pandas_datetimestruct dts,
int months):
'''new year number after shifting pandas_datetimestruct number of months'''
return dts.year + (dts.month + months - 1) / 12

cdef inline int _month_add_months(pandas_datetimestruct dts,
int months):
'''new month number after shifting pandas_datetimestruct number of months'''
cdef int new_month = (dts.month + months) % 12
return 12 if new_month == 0 else new_month

@cython.wraparound(False)
def shift_months(int64_t[:] dtindex, int months, object day=None):
'''
Given an int64-based datetime index, shift all elements
specified number of months using DateOffset semantics
day: {None, 'start', 'end'}
* None: day of month
* 'start' 1st day of month
* 'end' last day of month
'''
cdef:
Py_ssize_t i
int days_in_month
pandas_datetimestruct dts
int count = len(dtindex)
int64_t[:] out = np.empty(count, dtype='int64')

for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = NPY_NAT
else:
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)

if day is None:
dts.year = _year_add_months(dts, months)
dts.month = _month_add_months(dts, months)
#prevent day from wrapping around month end
days_in_month = days_per_month_table[is_leapyear(dts.year)][dts.month-1]
dts.day = min(dts.day, days_in_month)
elif day == 'start':
dts.year = _year_add_months(dts, months)
dts.month = _month_add_months(dts, months)

# offset semantics - when subtracting if at the start anchor
# point, shift back by one more month
if months <= 0 and dts.day == 1:
dts.year = _year_add_months(dts, -1)
dts.month = _month_add_months(dts, -1)
else:
dts.day = 1
elif day == 'end':
days_in_month = days_per_month_table[is_leapyear(dts.year)][dts.month-1]
dts.year = _year_add_months(dts, months)
dts.month = _month_add_months(dts, months)

# similar semantics - when adding shift forward by one
# month if already at an end of month
if months >= 0 and dts.day == days_in_month:
dts.year = _year_add_months(dts, 1)
dts.month = _month_add_months(dts, 1)

days_in_month = days_per_month_table[is_leapyear(dts.year)][dts.month-1]
dts.day = days_in_month

out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
return np.asarray(out)

#----------------------------------------------------------------------
# Don't even ask

Expand Down

0 comments on commit 9fc9201

Please sign in to comment.