Collapse continuous integer runs to strings of ranges
Here is an attempt using diff
and tapply
returning a character vector
runs <- lapply(z, function(x) {
z <- which(diff(x)!=1);
results <- x[sort(unique(c(1,length(x), z,z+1)))]
lr <- length(results)
collapse <- rep(seq_len(ceiling(lr/2)),each = 2, length.out = lr)
as.vector(tapply(results, collapse, paste, collapse = ':'))
})
runs
$greg
[1] "7:11" "20:24" "30:33" "49"
$researcher
[1] "42:48"
$sally
[1] "25:29" "37:41"
$sam
[1] "1:6" "16:19" "34:36"
$teacher
[1] "12:15"
I think diff
is the solution. You might need some additional fiddling to deal with the singletons, but:
lapply(z, function(x) {
diffs <- c(1, diff(x))
start_indexes <- c(1, which(diffs > 1))
end_indexes <- c(start_indexes - 1, length(x))
coloned <- paste(x[start_indexes], x[end_indexes], sep=":")
paste0(coloned, collapse=", ")
})
$greg
[1] "7:11, 20:24, 30:33, 49:49"
$researcher
[1] "42:48"
$sally
[1] "25:29, 37:41"
$sam
[1] "1:6, 16:19, 34:36"
$teacher
[1] "12:15"
I have a fairly similar solution to Marius, his works as well as mine but the mechanisms are slightly different so I thought I may as well post it:
findIntRuns <- function(run){
rundiff <- c(1, diff(run))
difflist <- split(run, cumsum(rundiff!=1))
unname(sapply(difflist, function(x){
if(length(x) == 1) as.character(x) else paste0(x[1], ":", x[length(x)])
}))
}
lapply(z, findIntRuns)
Which produces:
$greg
[1] "7:11" "20:24" "30:33" "49"
$researcher
[1] "42:48"
$sally
[1] "25:29" "37:41"
$sam
[1] "1:6" "16:19" "34:36"
$teacher
[1] "12:15"
Using IRanges
:
require(IRanges)
lapply(z, function(x) {
t <- as.data.frame(reduce(IRanges(x,x)))[,1:2]
apply(t, 1, function(x) paste(unique(x), collapse=":"))
})
# $greg
# [1] "7:11" "20:24" "30:33" "49"
#
# $researcher
# [1] "42:48"
#
# $sally
# [1] "25:29" "37:41"
#
# $sam
# [1] "1:6" "16:19" "34:36"
#
# $teacher
# [1] "12:15"