Count Trailing and Leading NA for each vector
1) Cumsum - An option would be to create a logical vector with cumsum
on the presence of non-NA elements and get the sum
(base R
- No packages used)
f1 <- function(vec, trail = FALSE) {
if(trail) {
vec <- rev(vec)
}
sum(!cumsum(!is.na(vec)))
}
f1(v1)
#[1] 3
f1(v1, TRUE)
#[1] 0
sapply(mget(paste0("v", 1:3)), f1)
# v1 v2 v3
# 3 2 0
sapply(mget(paste0("v", 1:3)), f1, TRUE)
# v1 v2 v3
# 0 2 5
2 rle - Another base R
option is rle
(No packages are used)
with(rle(is.na(v2)), lengths[values & seq_along(values) %in% c(1, length(values))])
Wrapper over which.max
:
leading.nas <- function(x) {
if (length(x) == 0) {
0L
}
else {
which.min(!is.na(x)) - 1
}
}
This turns out to be similar to @Bulat's solution
count_nas <- function(x) {
nas <- is.na(x)
if (sum(nas) == length(x)) {
warning('all elements were NA')
return(c(start_na = NA_integer_, end_na = NA_integer_))
}
c(start_na = which.min(nas) - 1,
end_na = which.min(rev(nas)) - 1)
}
count_nas(v1)
#start_na end_na
# 3 0
sapply(list(v1,v2,v3), count_nas)
# [,1] [,2] [,3]
#start_na 3 2 0
#end_na 0 2 5
As far as performance, this is the fastest method with @akrun's methods being in the ballpark.
v_test3 <- sample(10000)
v_test3[c(1:3, 9998:10000)] <- NA_integer_
Unit: microseconds
expr min lq mean median uq max neval
akrun_cumsum 175.7 182.15 193.580 186.55 200.80 354.7 100
akrun_rle 168.6 199.25 210.635 209.25 221.00 289.3 100
g_grothen_zoo 1848.5 1904.45 2008.994 1941.40 2001.35 4799.6 100
g_grothen_reduce 12467.3 12888.10 14174.157 13445.15 15054.35 28241.6 100
www_rleid 5357.2 5439.40 5741.471 5517.15 5947.15 8470.4 100
bulat_and_cole 63.5 66.45 73.681 71.25 75.75 96.9 100
Code for reproducibility:
library(microbenchmark)
library(zoo)
library(data.table)
v_test3 <- sample(10000)
v_test3[c(1:3, 9998:10000)] <- NA_integer_
count_nas <- function(x) {
nas <- is.na(x)
if (sum(nas) == length(x)) {
warning('all elements were NA')
return(c(start_na = NA_integer_, end_na = NA_integer_))
}
c(start_na = which.min(nas) - 1,
end_na = which.min(rev(nas)) - 1)
}
countNA <- function(x) {
len <- function(fromLast = FALSE) length(na.locf(x, fromLast = fromLast))
if (all(is.na(x))) c(left = NA, right = NA)
else length(x) - c(left = len(), right = len(TRUE))
}
f1 <- function(vec, trail = FALSE) {
if(trail) {
vec <- rev(vec)
}
sum(!cumsum(!is.na(vec)))
}
count_fun <- function(x){
y <- rleid(x)
z <- split(x, y)[c(1, length(unique(y)))]
ans <- sapply(z, function(x) sum(is.na(x)))
return(unname(ans))
}
countNA2 <- function(x) {
f <- function(x) sum(Reduce(all, is.na(x), acc = TRUE))
if (all(is.na(x))) c(left = NA, right = NA)
else c(left = f(x), right = f(rev(x)))
}
microbenchmark(
akrun_cumsum = {
f1(v_test3, TRUE)
f1(v_test3, FALSE)
}
,
akrun_rle = {
with(rle(is.na(v_test3)), lengths[values & seq_along(values) %in% c(1, length(values))])
}
,
g_grothen_zoo = {
countNA(v_test3)
}
,
g_grothen_reduce = {
countNA2(v_test3)
}
,
www_rleid = {
count_fun(v_test3)
}
,
bulat_and_cole = {
count_nas(v_test3)
}
)