Shifting non-NA cells to the left
You can use the standard apply
function:
df=data.frame(x=c("l","m",NA,NA,"p"),y=c(NA,"b","c",NA,NA),z=c("u",NA,"w","x","y"))
df2 = as.data.frame(t(apply(df,1, function(x) { return(c(x[!is.na(x)],x[is.na(x)]) )} )))
colnames(df2) = colnames(df)
> df
x y z
1 l <NA> u
2 m b <NA>
3 <NA> c w
4 <NA> <NA> x
5 p <NA> y
> df2
x y z
1 l u <NA>
2 m b <NA>
3 c w <NA>
4 x <NA> <NA>
5 p y <NA>
Thanks to @Richard Scriven for good observation
A) with is.na
and order
, lapply
and rbind
for aggregation
nosort.df<-do.call(rbind,lapply(1:nrow(df),function(x) { z=df[x,][order(is.na(df[x,]))];colnames(z)<-c("x","y","z");return(z) } ))
> nosort.df
x y z
1 l u <NA>
2 m b <NA>
3 c w <NA>
4 x <NA> <NA>
5 p y <NA>
B) if sorted rows are required:
with sort
, lapply
and rbind
sort.df<-do.call(rbind,lapply(1:nrow(df),function(x) { z=sort(df[x,],na.last=TRUE);colnames(z)<-c("x","y","z");return(z) } ))
> sort.df
x y z
1 l u <NA>
2 b m <NA>
3 c w <NA>
4 x <NA> <NA>
5 p y <NA>
I have included a function for this task in my package dedupewider
(available on CRAN). It allows to move NA
to right, left or even top and bottom:
library(dedupewider)
df <- data.frame(x = c("l", "m", NA, NA, "p"),
y = c(NA, "b", "c", NA, NA),
z = c("u", NA, "w", "x", "y"))
na_move(df) # 'right' direction is by default
#> x y z
#> 1 l u NA
#> 2 m b NA
#> 3 c w NA
#> 4 x <NA> NA
#> 5 p y NA
It implements the solution of reshaping data (from wide format to long and again to wide) and internally uses data.table
functions. Thus it is significantly faster than standard solution using apply
:
library(dedupewider)
library(microbenchmark)
df <- data.frame(x = c("l", "m", NA, NA, "p"),
y = c(NA, "b", "c", NA, NA),
z = c("u", NA, "w", "x", "y"))
df <- do.call(rbind, replicate(10000, df, simplify = FALSE))
apply_function <- function(df) {
as.data.frame(t(apply(df, 1, function(x) c(x[!is.na(x)], x[is.na(x)]))))
}
microbenchmark(apply_function(df), na_move(df))
#> Unit: milliseconds
#> expr min lq mean median uq max
#> apply_function(df) 289.2032 361.0178 475.65281 425.79355 545.6405 999.4086
#> na_move(df) 51.0419 58.1426 75.32407 65.01445 92.8706 216.6384