Given a date range how can we break it up into N contiguous sub-intervals?
you should change date for datetime
from datetime import date, datetime, timedelta
begin = '20150101'
end = '20150228'
def get_yyyy_mm_dd(yyyymmdd):
# given string 'yyyymmdd' return (yyyy, mm, dd)
year = yyyymmdd[0:4]
month = yyyymmdd[4:6]
day = yyyymmdd[6:]
return int(year), int(month), int(day)
y1, m1, d1 = get_yyyy_mm_dd(begin)
d1 = datetime(y1, m1, d1)
y2, m2, d2 = get_yyyy_mm_dd(end)
d2 = datetime(y2, m2, d2)
def remove_tack(dates_list):
# given a list of dates in form YYYY-MM-DD return a list of strings in form 'YYYYMMDD'
tackless = []
for d in dates_list:
s = str(d)
tackless.append(s[0:4]+s[5:7]+s[8:])
return tackless
def divide_date(date1, date2, intervals):
dates = [date1]
delta = (date2-date1).total_seconds()/4
for i in range(0, intervals):
dates.append(dates[i] + timedelta(0,delta))
return remove_tack(dates)
listdates = divide_date(d1, d2, 4)
print listdates
result:
['20150101 00:00:00', '20150115 12:00:00', '20150130 00:00:00', '20150213 12:00:00', '20150228 00:00:00']
Using Datetimeindex and Periods from Pandas, together with dictionary comprehension:
import pandas as pd
begin = '20100101'
end = '20101231'
start = dt.datetime.strptime(begin, '%Y%m%d')
finish = dt.datetime.strptime(end, '%Y%m%d')
dates = pd.DatetimeIndex(start=start, end=finish, freq='D').tolist()
quarters = [d.to_period('Q') for d in dates]
df = pd.DataFrame([quarters, dates], index=['Quarter', 'Date']).T
quarterly_dates = {str(q): [ts.strftime('%Y%m%d')
for ts in df[df.Quarter == q].Date.values.tolist()]
for q in quarters}
>>> quarterly_dates
{'2010Q1': ['20100101',
'20100102',
'20100103',
'20100104',
'20100105',
...
'20101227',
'20101228',
'20101229',
'20101230',
'20101231']}
>>> quarterly_dates.keys()
['2010Q1', '2010Q2', '2010Q3', '2010Q4']
Could you use the datetime.date objects instead?
If you do:
import datetime
begin = datetime.date(2001, 1, 1)
end = datetime.date(2010, 12, 31)
intervals = 4
date_list = []
delta = (end - begin)/4
for i in range(1, intervals + 1):
date_list.append((begin+i*delta).strftime('%Y%m%d'))
and date_list should have the end dates for each inteval.
I would actually follow a different approach and rely on timedelta and date addition to determine the non-overlapping ranges
Implementation
def date_range(start, end, intv):
from datetime import datetime
start = datetime.strptime(start,"%Y%m%d")
end = datetime.strptime(end,"%Y%m%d")
diff = (end - start ) / intv
for i in range(intv):
yield (start + diff * i).strftime("%Y%m%d")
yield end.strftime("%Y%m%d")
Execution
>>> begin = '20150101'
>>> end = '20150228'
>>> list(date_range(begin, end, 4))
['20150101', '20150115', '20150130', '20150213', '20150228']