Split data frame string column into multiple columns
5 years later adding the obligatory data.table
solution
library(data.table) ## v 1.9.6+
setDT(before)[, paste0("type", 1:2) := tstrsplit(type, "_and_")]
before
# attr type type1 type2
# 1: 1 foo_and_bar foo bar
# 2: 30 foo_and_bar_2 foo bar_2
# 3: 4 foo_and_bar foo bar
# 4: 6 foo_and_bar_2 foo bar_2
We could also both make sure that the resulting columns will have correct types and improve performance by adding type.convert
and fixed
arguments (since "_and_"
isn't really a regex)
setDT(before)[, paste0("type", 1:2) := tstrsplit(type, "_and_", type.convert = TRUE, fixed = TRUE)]
Another option is to use the new tidyr package.
library(dplyr)
library(tidyr)
before <- data.frame(
attr = c(1, 30 ,4 ,6 ),
type = c('foo_and_bar', 'foo_and_bar_2')
)
before %>%
separate(type, c("foo", "bar"), "_and_")
## attr foo bar
## 1 1 foo bar
## 2 30 foo bar_2
## 3 4 foo bar
## 4 6 foo bar_2
Use stringr::str_split_fixed
library(stringr)
str_split_fixed(before$type, "_and_", 2)