R - fill na values sequentially from last nonzero value
One option:
library(dplyr)
df %>%
group_by(idx = cumsum(!(is.na(a) | a == 0)), is.na(a)) %>%
mutate(rn = row_number()) %>%
group_by(idx) %>%
mutate(a = coalesce(a, first(a) + rn)) %>%
ungroup() %>%
select(a)
Output:
# A tibble: 12 x 1
a
<int>
1 11127
2 0
3 0
4 11128
5 0
6 0
7 11129
8 0
9 11580
10 0
11 11581
12 0
If speed is an issue, perhaps the data.table
equivalent will be slightly faster:
library(data.table)
setDT(df)[, rn := rowid(a), .(cumsum(!(is.na(a) | a == 0)), is.na(a))][
, a := fcoalesce(a, first(a) + rn), by = cumsum(!(is.na(a) | a == 0))][
, rn := NULL]
EDIT
IMO grouping and then getting the row index for NA
s is not really elegant; it's far better what you can see in other solutions (e.g. using cumsum
).
Using fcoalesce
, the issue could then be solved in a single data.table
step:
library(data.table)
setDT(df)[, a := fcoalesce(a, first(a) + cumsum(is.na(a))), by = cumsum(!(is.na(a) | a == 0))]
One solution utilizing dplyr
could be:
df %>%
group_by(id = cumsum(!is.na(a) & a != 0)) %>%
mutate(a = ifelse(is.na(a), first(a) + cumsum(is.na(a)), a))
a id
<int> <int>
1 11127 1
2 0 1
3 0 1
4 11128 1
5 0 1
6 0 1
7 11129 1
8 0 1
9 11580 2
10 0 2
11 11581 2
12 0 2
A base R approach with cumsum(logical)
and ave
.
nze <- df1$a != 0 & !is.na(df1$a)
ave(df1$a, cumsum(nze), FUN = function(x){
na <- is.na(x)
x[na] <- x[!na][1] + seq_along(which(na))
x
})
# [1] 11127 0 0 11128 0 0 11129 0 11580 0 11581 0
Then assign this result.
df1$a <- ave(df1$a, cumsum(nze), FUN = function(x){
na <- is.na(x)
x[na] <- x[!na][1] + seq_along(which(na))
x
})