Converting Column Values into Their Own Binary Encoded Columns (Dummy Variables)
You need a melt
/dcast
combination here (which called recast
) in order to convert all columns into one column and avoid combinations
library(reshape2)
recast(df, ID ~ value, id.var = 1, fun.aggregate = function(x) (length(x) > 0) + 0L)
# ID 19 23 42 61 anxiety asthma copd diabetes female male
# 1 1 0 0 1 0 1 1 0 0 0 1
# 2 2 1 0 0 0 0 1 0 0 0 1
# 3 3 0 1 0 0 0 0 0 1 1 0
# 4 4 0 0 0 1 0 0 1 1 1 0
As per your Sidenote, you can add variable
here in order to get the names added too
recast(df, ID ~ variable + value, id.var = 1, fun.aggregate = function(x) (length(x) > 0) + 0L)
# ID gender_female gender_male age_19 age_23 age_42 age_61 diagnosis_anxiety diagnosis_asthma diagnosis_copd
# 1 1 0 1 0 0 1 0 1 1 0
# 2 2 0 1 1 0 0 0 0 1 0
# 3 3 1 0 0 1 0 0 0 0 0
# 4 4 1 0 0 0 0 1 0 0 1
# diagnosis_diabetes
# 1 0
# 2 0
# 3 1
# 4 1
There is a function in the caret
package to "dummify" data.
library(caret)
library(dplyr)
predict(dummyVars(~ ., data = mutate_each(df, funs(as.factor))), newdata = df)
A base R
option would be
(!!table(cbind(df1[1],stack(df1[-1])[-2])))*1L
# values
#ID 19 23 42 61 anxiety asthma copd diabetes female male
# 1 0 0 1 0 1 1 0 0 0 1
# 2 1 0 0 0 0 1 0 0 0 1
# 3 0 1 0 0 0 0 0 1 1 0
# 4 0 0 0 1 0 0 1 1 1 0
If you need the original name as well
(!!table(cbind(df1[1],Val=do.call(paste, c(stack(df1[-1])[2:1], sep="_")))))*1L
# Val
#ID age_19 age_23 age_42 age_61 diagnosis_anxiety diagnosis_asthma
#1 0 0 1 0 1 1
#2 1 0 0 0 0 1
#3 0 1 0 0 0 0
#4 0 0 0 1 0 0
# Val
#ID diagnosis_copd diagnosis_diabetes gender_female gender_male
#1 0 0 0 1
#2 0 0 0 1
#3 0 1 1 0
#4 1 1 1 0
data
df1 <- structure(list(ID = c(1L, 1L, 2L, 3L, 4L, 4L), gender = c("male",
"male", "male", "female", "female", "female"), age = c(42L, 42L,
19L, 23L, 61L, 61L), diagnosis = c("asthma", "anxiety", "asthma",
"diabetes", "diabetes", "copd")), .Names = c("ID", "gender",
"age", "diagnosis"), row.names = c(NA, -6L), class = "data.frame")