Find columns with all missing values
In dplyr
ColNums_NotAllMissing <- function(df){ # helper function
as.vector(which(colSums(is.na(df)) != nrow(df)))
}
df %>%
select(ColNums_NotAllMissing(.))
example:
x <- data.frame(x = c(NA, NA, NA), y = c(1, 2, NA), z = c(5, 6, 7))
x %>%
select(ColNums_NotAllMissing(.))
or, the other way around
Cols_AllMissing <- function(df){ # helper function
as.vector(which(colSums(is.na(df)) == nrow(df)))
}
x %>%
select(-Cols_AllMissing(.))
To find the columns with all values missing
allmisscols <- apply(dataset,2, function(x)all(is.na(x)));
colswithallmiss <-names(allmisscols[allmisscols>0]);
print("the columns with all values missing");
print(colswithallmiss);
This is easy enough to with sapply
and a small anonymous function:
sapply(test1, function(x)all(is.na(x)))
X1 X2 X3
FALSE FALSE FALSE
sapply(test2, function(x)all(is.na(x)))
X1 X2 X3
FALSE TRUE FALSE
And inside a function:
na.test <- function (x) {
w <- sapply(x, function(x)all(is.na(x)))
if (any(w)) {
stop(paste("All NA in columns", paste(which(w), collapse=", ")))
}
}
na.test(test1)
na.test(test2)
Error in na.test(test2) : All NA in columns 2