Split string column to create new binary columns

Other option is to melt the split string in list to long form and then use table

library(reshape2)
as.data.frame.matrix(table(melt(strsplit(as.character(
                                   mydata[[1]]), "/"))[2:1]))[,-1]
#    ca eq2_off eq2_on fe.gr put sent_1 hi.on hi.ov sent_1fe.gr cni_at.p3x.4
#1  1       1      1     1   1      1     0     0           0            0
#2  1       1      1     1   1      1     1     1           0            0
#3  1       1      1     0   1      0     1     1           1            0
#4  1       1      1     1   1      1     0     0           0            1
#5  1       1      1     1   1      1     0     0           0            0
#  cbr_LBL
#1       0
#2       0
#3       0
#4       0
#5       1

Using mtabuate from the qdapTools package that I maintain:

library(qdapTools)
mtabulate(strsplit(as.character(dat[[1]]), "/"))

##   V1 ca cbr_LBL cni_at.p3x.4 eq2_off eq2_on fe.gr hi.on hi.ov put sent_1 sent_1fe.gr
## 1  1  1       0            0       1      1     1     0     0   1      1           0
## 2  1  1       0            0       1      1     1     1     1   1      1           0
## 3  1  1       0            0       1      1     0     1     1   1      0           1
## 4  1  1       0            1       1      1     1     0     0   1      1           0
## 5  1  1       1            0       1      1     1     0     0   1      1           0

You can use cSplit_e from my "splitstackshape" package:

library(splitstackshape)
cSplit_e(mydata, "ALL", "/", type = "character", fill = 0)
#                                                ALL ALL_ca ALL_cbr_LBL
# 1              /ca/put/sent_1/fe.gr/eq2_on/eq2_off      1           0
# 2  /ca/put/sent_1/fe.gr/eq2_on/eq2_off/hi.on/hi.ov      1           0
# 3   /ca/put/sent_1fe.gr/eq2_on/eq2_off/hi.on/hi.ov      1           0
# 4 /ca/put/sent_1/fe.gr/eq2_on/eq2_off/cni_at.p3x.4      1           0
# 5      /ca/put/sent_1/fe.gr/eq2_on/eq2_off/cbr_LBL      1           1
#   ALL_cni_at.p3x.4 ALL_eq2_off ALL_eq2_on ALL_fe.gr ALL_hi.on ALL_hi.ov ALL_put
# 1                0           1          1         1         0         0       1
# 2                0           1          1         1         1         1       1
# 3                0           1          1         0         1         1       1
# 4                1           1          1         1         0         0       1
# 5                0           1          1         1         0         0       1
#   ALL_sent_1 ALL_sent_1fe.gr
# 1          1               0
# 2          1               0
# 3          0               1
# 4          1               0
# 5          1               0

(Note: I think there's a problem in row 3 of your dput which is why it doesn't match your desired output. Notice that the third item in row 3 is "sent_1fe.gr" with no "/" between them.)

How about something like this

spt <- strsplit(as.character(mydata$ALL),"/", fixed=T)
do.call(rbind, lapply(lapply(spt, factor, levels=unique(unlist(spt))), table))

which returns

       ca put sent_1 fe.gr eq2_on eq2_off hi.on hi.ov sent_1fe.gr cni_at.p3x.4 cbr_LBL
[1,] 1  1   1      1     1      1       1     0     0           0            0       0
[2,] 1  1   1      1     1      1       1     1     1           0            0       0
[3,] 1  1   1      1     0      1       1     1     1           1            0       0
[4,] 1  1   1      1     1      1       1     0     0           0            1       0
[5,] 1  1   1      1     1      1       1     0     0           0            0       1

Split string column to create new binary columns

Tags:

Split

R

Vectorization

Related

Recent Posts