Grouping every n minutes with dplyr
lubridate-dplyr
-esque solution.
library(lubridate)
library(dplyr)
d2 <- data.frame(interval = seq(ymd_hms('2010-05-21 00:00:00'), by = '3 min',length.out=(1440/3)))
d3 <- d1 %>%
mutate(interval = floor_date(date, unit="hour")+minutes(floor(minute(date)/3)*3)) %>%
group_by(interval) %>%
mutate(sumvalue=sum(value)) %>%
select(interval,sumvalue)
d4 <- merge(d2,d3, all=TRUE) # better if left_join is used
tail(d4)
# interval sumvalue
#475 2010-05-21 23:42:00 NA
#476 2010-05-21 23:45:00 NA
#477 2010-05-21 23:48:00 NA
#478 2010-05-21 23:51:00 NA
#479 2010-05-21 23:54:00 NA
#480 2010-05-21 23:57:00 NA
d4[450,]
# interval sumvalue
#450 2010-05-21 22:27:00 643426
If you are comfortable working with Date
(I am not), you can dispense with lubridate
, and replace the final merge with left_join
.
If you need to group data into n
minute bins, the floor_date
function can allow multiple units to be specified within the unit
argument of the function. For example:
library(lubridate)
x <- ymd_hms("2009-08-03 12:25:59.23")
floor_date(x, unit = "3minutes")
"2009-08-03 12:24:00 UTC"
Using your example:
library(lubridate)
library(tidyverse)
# make complete time sequence
d2 <- data.frame(timePeriod = seq(as.POSIXct("2010-05-21 00:00:00"),
by="3 min", length.out=(1440/3)))
d1 %>%
mutate(timePeriod = floor_date(date, "3minutes")) %>%
group_by(timePeriod) %>%
summarise(sum = sum(value)) %>%
right_join(d2)