Get distinct consecutive date ranges from overlapping date ranges
The logic is:
- at the beginning of an interval add its value to a cumulative sum
- at the end of an interval substract its value from this sum
- but in order to sweep the dateline, we'll have to collect al the (unique) date/time stamps, either start or stop.
So the point is: convert the data from a series of intervals to a series of (start/stop) events, and aggregate over these.
-- \i tmp.sql
create table coinsonperiod(
id serial,
startdate date,
enddate date,
coins integer
);
insert into coinsonperiod (startdate, enddate, coins) values
('2018-01-01','2018-01-31', 80)
, ('2018-01-07','2018-01-10', 10)
, ('2018-01-07','2018-01-31', 10)
, ('2018-01-11','2018-01-31', 5)
, ('2018-01-25','2018-01-27', 5)
, ('2018-02-02','2018-02-23', 100)
;
WITH changes AS (
SELECT startdate AS tickdate , coins
, 1 AS cover
FROM coinsonperiod
UNION ALL
-- add 1 day to convert to half-open intervals
SELECT 1+enddate AS tickdate, -1* coins
, -1 AS cover
FROM coinsonperiod
)
, sumchanges AS (
SELECT tickdate, SUM(coins) AS change, SUM(cover) AS cover
FROM changes
GROUP BY tickdate
)
, aggregated AS (
SELECT
tickdate AS startdate
, lead(tickdate) over www AS enddate
, sum(change) OVER www AS cash
-- number of covered intervals
, sum(cover) OVER www AS cover
FROM sumchanges
WINDOW www AS (ORDER BY tickdate)
)
-- substract one day from enddate to correct back to closed intervals
SELECT startdate, enddate-1 AS enddate, cash, cover
FROM aggregated
WHERE cover > 0
ORDER BY startdate
;
Looks like I found an ugly one that works
select t1.dt, t1.enddt, sum(coins)
from (
select distinct cp1.dt, min(cp2.dt) enddt
from ( select startdate as dt from coinsonperiod union all select enddate as dt from coinsonperiod ) cp1,
( select startdate as dt from coinsonperiod union all select enddate as dt from coinsonperiod ) cp2
where cp2.dt > cp1.dt
group by cp1.dt
order by cp1.dt ) t1, coinsonperiod t2
where t1.dt between t2.startdate and t2.enddate
and t1.enddt between t2.startdate and t2.enddate
group by t1.dt, t1.enddt
Output:
dt |enddt |sum |
-----------|-----------|----|
2018-01-01 |2018-01-07 |80 |
2018-01-07 |2018-01-10 |100 |
2018-01-10 |2018-01-11 |90 |
2018-01-11 |2018-01-25 |95 |
2018-01-25 |2018-01-27 |100 |
2018-01-27 |2018-01-31 |95 |
2018-02-02 |2018-02-23 |100 |
Only difference with your output is that I suppose you forgot the interval between 01/10 and 01/11