How do I find missing dates in a list of sorted dates?
>>> from datetime import datetime, timedelta
>>> date_list = [datetime(2010, 2, 23),datetime(2010, 2, 24),datetime(2010, 2, 25),datetime(2010, 2, 26),datetime(2010, 3, 1),datetime(2010, 3, 2)]
>>>
>>> date_set=set(date_list) # for faster membership tests than list
>>> one_day = timedelta(days=1)
>>>
>>> test_date = date_list[0]
>>> missing_dates=[]
>>> while test_date < date_list[-1]:
... if test_date not in date_set:
... missing_dates.append(test_date)
... test_date += one_day
...
>>> print missing_dates
[datetime.datetime(2010, 2, 27, 0, 0), datetime.datetime(2010, 2, 28, 0, 0)]
This also works for datetime.date
objects, but the OP says the list is datetime.datetime
objects
Sort the list of dates and iterate over it, remembering the previous entry. If the difference between the previous and current entry is more than one day, you have missing days.
Here's one way to implement it:
from datetime import date, timedelta
from itertools import tee, izip
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = tee(iterable)
b.next()
return izip(a, b)
def missing_dates(dates):
for prev, curr in pairwise(sorted(dates)):
i = prev
while i + timedelta(1) < curr:
i += timedelta(1)
yield i
dates = [ date(2010, 1, 8),
date(2010, 1, 2),
date(2010, 1, 5),
date(2010, 1, 1),
date(2010, 1, 7) ]
for missing in missing_dates(dates):
print missing
Output:
2010-01-03
2010-01-04
2010-01-06
Performance is O(n*log(n)) where n is the number of days in the span when the input is unsorted. As your list is already sorted, it will run in O(n).
using sets
>>> from datetime import date, timedelta
>>> d = [date(2010, 2, 23), date(2010, 2, 24), date(2010, 2, 25),
date(2010, 2, 26), date(2010, 3, 1), date(2010, 3, 2)]
>>> date_set = set(d[0] + timedelta(x) for x in range((d[-1] - d[0]).days))
>>> missing = sorted(date_set - set(d))
>>> missing
[datetime.date(2010, 2, 27), datetime.date(2010, 2, 28)]
>>>
USING A FOR LOOP
The imports you'll need:
import datetime
from datetime import date, timedelta
Let's say you have a sorted list called dates
with several missing dates in it.
First select the first and last date:
start_date = dates[0]
end_date = dates[len(dates)-1]
Than count the number of days between these two dates:
numdays = (end_date - start_date).days
Than create a new list with all dates between start_date and end_date:
all_dates = []
for x in range (0, (numdays+1)):
all_dates.append(start_date + datetime.timedelta(days = x))
Than check with dates are in all_dates
but not in dates
by using a for loop with range and adding these dates to dates_missing:
dates_missing = []
for i in range (0, len(all_dates)):
if (all_dates[i] not in dates):
dates_missing.append(all_dates[i])
else:
pass
Now you'll have a list called dates_missing
with all the missing dates.