Fill NA in a time series only to a limited number

Without using na.locf, but the idea is to split your xts by group of non missing values, then for each group replacing only the 3 first values (after the non misssing one) with the first value. It is a loop , but since it is only applied on group , it should be faster than a simple loop over all the values.

zz <- 
unlist(sapply(split(coredata(x),cumsum(!is.na(x))),
       function(sx){
         if(length(sx)>3) 
           sx[2:4] <- rep(sx[1],3)
         else sx <- rep(sx[1],length(sx))
         sx
       }))
## create the zoo object since , the latter algorithm is applied only to the values 
zoo(zz,index(x))

2014-09-20 2014-09-21 2014-09-22 2014-09-23 2014-09-24 2014-09-25 2014-09-26 2014-09-27 2014-09-28 2014-09-29 2014-09-30 2014-10-01 2014-10-02 
         1          1          1          1          5          5          5          5         NA         NA         11         12         12 
2014-10-03 2014-10-04 2014-10-05 2014-10-06 2014-10-07 2014-10-08 2014-10-09 
        12         12         NA         NA         NA         19         20 

And another idea that, unless I've missed something, seems valid:

na_locf_until = function(x, n = 3)
{
   wnn = which(!is.na(x))  
   inds = sort(c(wnn, (wnn + n+1)[which((wnn + n+1) < c(wnn[-1], length(x)))]))
   c(rep(NA, wnn[1] - 1), 
     as.vector(x)[rep(inds, c(diff(inds), length(x) - inds[length(inds)] + 1))])
}
na_locf_until(x)
#[1]  1  1  1  1  5  5  5  5 NA NA 11 12 12 12 12 NA NA NA 19 20

Here's another way:

l <- cumsum(! is.na(x))
c(NA, x[! is.na(x)])[replace(l, ave(l, l, FUN=seq_along) > 4, 0) + 1]
# [1]  1  1  1  1  5  5  5  5 NA NA 11 12 12 12 12 NA NA NA 19 20

edit: my previous answer required that x have no duplicates. The current answer does not.

benchmarks

x <- rep(x, length.out=1e4)

plourde <- function(x) {
    l <- cumsum(! is.na(x))
    c(NA, x[! is.na(x)])[replace(l, ave(l, l, FUN=seq_along) > 4, 0) + 1]
}

agstudy <- function(x) {
    unlist(sapply(split(coredata(x),cumsum(!is.na(x))),
           function(sx){
             if(length(sx)>3) 
               sx[2:4] <- rep(sx[1],3)
             else sx <- rep(sx[1],length(sx))
             sx
           }))
}

microbenchmark(plourde(x), agstudy(x))
# Unit: milliseconds
#        expr   min     lq median     uq   max neval
#  plourde(x)  5.30  5.591  6.409  6.774 57.13   100
#  agstudy(x) 16.04 16.249 16.454 17.516 20.64   100