Separate a shopping list into multiple columns
Possibily something like this, and should work with any item/quantity. It just assumes that the quantity follows the item.
Lets use a custom function which extracts item and quantity:
my_fun <- function(w) {
items <- stringr::str_split(w, "\\d+", simplify = T)
items <- items[items!=""] # dont now why but you get en empty spot each time
quantities <- stringr::str_split(w, "\\D+", simplify = T)
quantities <- quantities[quantities!=""]
d <- data.frame(item = items, quantity=quantities, stringsAsFactors = F)
return(d)
}
Example:
my_fun("apple2milk5")
# gives:
# item quantity
# 1 apple 2
# 2 milk 5
Now we can apply the function to each id, using nest
and map
:
library(dplyr)
df_result <- df %>%
nest(item) %>%
mutate(res = purrr::map(data, ~my_fun(.x))) %>%
unnest(res)
df_results
# # A tibble: 9 x 3
# id item quantity
# <int> <chr> <chr>
# 1 1 apple 2
# 2 1 milk 5
# 3 2 milk 1
# 4 3 juice 3
# 5 3 apple 5
# 6 4 egg 10
# 7 4 juice 1
# 8 5 egg 8
# 9 5 milk 2
Now we can use dcast()
(probabily spread
would work too):
data.table::dcast(df_result, id~item, value.var="quantity")
# id apple egg juice milk
# 1 1 2 <NA> <NA> 5
# 2 2 <NA> <NA> <NA> 1
# 3 3 5 <NA> 3 <NA>
# 4 4 <NA> 10 1 <NA>
# 5 5 <NA> 8 <NA> 2
Data:
df <- data.frame(id = 1:5, item = c("apple2milk5", "milk1", "juice3apple5", "egg10juice1", "egg8milk2"), stringsAsFactors = F)
tmp = lapply(strsplit(df$item, "(?<=\\d)(?=\\D)|(?<=\\D)(?=\\d)", perl = TRUE),
function(x) {
d = split(x, 0:1)
setNames(as.numeric(d[[2]]), d[[1]])
})
nm = unique(unlist(lapply(tmp, names)))
cbind(df, do.call(rbind, lapply(tmp, function(x) setNames(x[nm], nm))))
# id item apple milk juice egg
#1 1 apple2milk5 2 5 NA NA
#2 2 milk1 NA 1 NA NA
#3 3 juice3apple5 5 NA 3 NA
#4 4 egg10juice1 NA NA 1 10
#5 5 egg8milk2 NA 2 NA 8
I just came up with a tidyverse
solution. Use str_extract()
to extract the quantities and set their names as product names. Then reduce(bind_rows)
generates the expected outcome.
library(tidyverse)
df$item %>%
map(~ set_names(str_extract_all(., "\\d+")[[1]], str_extract_all(., "\\D+")[[1]])) %>%
reduce(bind_rows) %>%
mutate_all(as.numeric) %>%
bind_cols(df, .)
# id item apple milk juice egg
# 1 1 apple2milk5 2 5 NA NA
# 2 2 milk1 NA 1 NA NA
# 3 3 juice3apple5 5 NA 3 NA
# 4 4 egg10juice1 NA NA 1 10
# 5 5 egg8milk2 NA 2 NA 8