Converting a deeply nested list to a dataframe
Another approach is to:
- Melt the nested list to a data.frame with
rrapply()
in the rrapply-package (or similarly withreshape2::melt()
). - Reshape the data.frame to the required format using tidyr's
pivot_wider()
andunnest()
.
library(rrapply)
library(tidyverse)
rrapply(ls, how = "melt") %>% ## melt to long df
pivot_wider(names_from = "L4") %>% ## reshape to wide df
unnest(c(Gmax.val, G2.val, Gmax.vec, G2.vec)) %>% ## unnest list columns
rename(time = L1, seed = L2, treatment = L3) ## rename columns
#> # A tibble: 64 x 7
#> time seed treatment Gmax.val G2.val Gmax.vec G2.vec
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 10 123 0.1 -0.626 0.184 -0.836 1.51
#> 2 10 123 0.1 -0.626 0.184 1.60 0.390
#> 3 10 123 0.1 -0.626 0.184 0.330 -0.621
#> 4 10 123 0.1 -0.626 0.184 -0.820 -2.21
#> 5 10 123 0.1 -0.626 0.184 0.487 1.12
#> 6 10 123 0.1 -0.626 0.184 0.738 -0.0449
#> 7 10 123 0.1 -0.626 0.184 0.576 -0.0162
#> 8 10 123 0.1 -0.626 0.184 -0.305 0.944
#> 9 10 123 0.2 0.821 0.594 0.919 -0.478
#> 10 10 123 0.2 0.821 0.594 0.782 0.418
#> # … with 54 more rows
Or using data.table's dcast()
to reshape the long table into wide format:
library(data.table)
long_dt <- as.data.table(rrapply(ls, how = "melt"))
wide_dt <- dcast(long_dt, L1 + L2 + L3 ~ L4)
wide_dt <- wide_dt[, lapply(.SD, unlist), by = list(L1, L2, L3), .SDcols = c("Gmax.val", "G2.val", "Gmax.vec", "G2.vec")]
setnames(wide_dt, old = c("L1", "L2", "L3"), new = c("time", "seed", "treatment"))
Some benchmarks
microbenchmark::microbenchmark(
tidyr = {
rrapply(ls, how = "melt") %>%
pivot_wider(names_from = "L4") %>%
unnest(c(Gmax.val, G2.val, Gmax.vec, G2.vec)) %>%
rename(time = L1, seed = L2, treatment = L3)
},
data.table = {
wide_dt <- dcast(as.data.table(rrapply(ls, how = "melt")), L1 + L2 + L3 ~ L4)
wide_dt <- wide_dt[, lapply(.SD, unlist), by = list(L1, L2, L3), .SDcols = c("Gmax.val", "G2.val", "Gmax.vec", "G2.vec")]
setnames(wide_dt, old = c("L1", "L2", "L3"), new = c("time", "seed", "treatment"))
wide_dt
},
times = 25
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> tidyr 17.959197 20.072647 23.662698 21.278771 25.633581 40.593022 25
#> data.table 2.061861 2.655782 2.966581 2.784425 2.988044 5.032524 25
Here's a solution that uses some of the newer "rectangling" methods in tidyr
. I'm posting this mainly as an exercise to gain and share some familiarity with these functions - my sense is that this approach could definitely be, well, tidied up a bit. Still, it's a nice way to play with swinging back and forth between wide/long list unpacking.
library(tidyverse)
set.seed(1L)
tibble(time = names(data), data = data) %>%
unnest_wider(data) %>%
pivot_longer(-time, names_to = "seed", values_to = "treatment") %>%
unnest_wider(treatment) %>%
pivot_longer(-c(time, seed), names_to = "treatment", values_to = "g_data") %>%
unnest_wider(g_data) %>%
mutate(row_n = row_number()) %>%
pivot_longer(c(Gmax.vec, G2.vec), names_to = "g", values_to = "g_val") %>%
unnest_longer(g_val) %>%
group_by(row_n, time, seed, treatment, Gmax.val, G2.val, g) %>%
mutate(sub_n = row_number()) %>%
pivot_wider(names_from = g, values_from = g_val) %>%
ungroup() %>%
select(-row_n, -sub_n)
# A tibble: 64 x 7
time seed treatment Gmax.val G2.val Gmax.vec G2.vec
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 10 123 0.1 -0.626 0.184 -0.836 1.51
2 10 123 0.1 -0.626 0.184 1.60 0.390
3 10 123 0.1 -0.626 0.184 0.330 -0.621
4 10 123 0.1 -0.626 0.184 -0.820 -2.21
5 10 123 0.1 -0.626 0.184 0.487 1.12
6 10 123 0.1 -0.626 0.184 0.738 -0.0449
7 10 123 0.1 -0.626 0.184 0.576 -0.0162
8 10 123 0.1 -0.626 0.184 -0.305 0.944
9 10 123 0.2 0.821 0.594 0.919 -0.478
10 10 123 0.2 0.821 0.594 0.782 0.418
# … with 54 more rows