Converting a deeply nested list to a dataframe

Another approach is to:

Melt the nested list to a data.frame with rrapply() in the rrapply-package (or similarly with reshape2::melt()).
Reshape the data.frame to the required format using tidyr's pivot_wider() and unnest().

library(rrapply)
library(tidyverse)

rrapply(ls, how = "melt") %>%                            ## melt to long df
  pivot_wider(names_from = "L4") %>%                     ## reshape to wide df
  unnest(c(Gmax.val, G2.val, Gmax.vec, G2.vec)) %>%      ## unnest list columns
  rename(time = L1, seed = L2, treatment = L3)           ## rename columns

#> # A tibble: 64 x 7
#>    time  seed  treatment Gmax.val G2.val Gmax.vec  G2.vec
#>    <chr> <chr> <chr>        <dbl>  <dbl>    <dbl>   <dbl>
#>  1 10    123   0.1         -0.626  0.184   -0.836  1.51  
#>  2 10    123   0.1         -0.626  0.184    1.60   0.390 
#>  3 10    123   0.1         -0.626  0.184    0.330 -0.621 
#>  4 10    123   0.1         -0.626  0.184   -0.820 -2.21  
#>  5 10    123   0.1         -0.626  0.184    0.487  1.12  
#>  6 10    123   0.1         -0.626  0.184    0.738 -0.0449
#>  7 10    123   0.1         -0.626  0.184    0.576 -0.0162
#>  8 10    123   0.1         -0.626  0.184   -0.305  0.944 
#>  9 10    123   0.2          0.821  0.594    0.919 -0.478 
#> 10 10    123   0.2          0.821  0.594    0.782  0.418 
#> # … with 54 more rows

Or using data.table's dcast() to reshape the long table into wide format:

library(data.table)

long_dt <- as.data.table(rrapply(ls, how = "melt"))
wide_dt <- dcast(long_dt, L1 + L2 + L3 ~ L4)
wide_dt <- wide_dt[, lapply(.SD, unlist), by = list(L1, L2, L3), .SDcols = c("Gmax.val", "G2.val", "Gmax.vec", "G2.vec")]
setnames(wide_dt, old = c("L1", "L2", "L3"), new = c("time", "seed", "treatment"))

Some benchmarks

microbenchmark::microbenchmark(
  tidyr = {
    rrapply(ls, how = "melt") %>%                            
      pivot_wider(names_from = "L4") %>%                     
      unnest(c(Gmax.val, G2.val, Gmax.vec, G2.vec)) %>%      
      rename(time = L1, seed = L2, treatment = L3)
  },
  data.table = {
    wide_dt <- dcast(as.data.table(rrapply(ls, how = "melt")), L1 + L2 + L3 ~ L4)
    wide_dt <- wide_dt[, lapply(.SD, unlist), by = list(L1, L2, L3), .SDcols = c("Gmax.val", "G2.val", "Gmax.vec", "G2.vec")]
    setnames(wide_dt, old = c("L1", "L2", "L3"), new = c("time", "seed", "treatment"))
    wide_dt
  },
  times = 25
)
#> Unit: milliseconds
#>        expr       min        lq      mean    median        uq       max neval
#>       tidyr 17.959197 20.072647 23.662698 21.278771 25.633581 40.593022    25
#>  data.table  2.061861  2.655782  2.966581  2.784425  2.988044  5.032524    25

Here's a solution that uses some of the newer "rectangling" methods in tidyr. I'm posting this mainly as an exercise to gain and share some familiarity with these functions - my sense is that this approach could definitely be, well, tidied up a bit. Still, it's a nice way to play with swinging back and forth between wide/long list unpacking.

library(tidyverse)
set.seed(1L)

tibble(time = names(data), data = data) %>%
  unnest_wider(data) %>%
  pivot_longer(-time, names_to = "seed", values_to = "treatment") %>%
  unnest_wider(treatment) %>%
  pivot_longer(-c(time, seed), names_to = "treatment", values_to = "g_data") %>%
  unnest_wider(g_data) %>%
  mutate(row_n = row_number()) %>%
  pivot_longer(c(Gmax.vec, G2.vec), names_to = "g", values_to = "g_val") %>%
  unnest_longer(g_val) %>%
  group_by(row_n, time, seed, treatment, Gmax.val, G2.val, g) %>%
  mutate(sub_n = row_number()) %>%
  pivot_wider(names_from = g, values_from = g_val) %>%
  ungroup() %>%
  select(-row_n, -sub_n) 

  # A tibble: 64 x 7
   time  seed  treatment Gmax.val G2.val Gmax.vec  G2.vec
   <chr> <chr> <chr>        <dbl>  <dbl>    <dbl>   <dbl>
 1 10    123   0.1         -0.626  0.184   -0.836  1.51  
 2 10    123   0.1         -0.626  0.184    1.60   0.390 
 3 10    123   0.1         -0.626  0.184    0.330 -0.621 
 4 10    123   0.1         -0.626  0.184   -0.820 -2.21  
 5 10    123   0.1         -0.626  0.184    0.487  1.12  
 6 10    123   0.1         -0.626  0.184    0.738 -0.0449
 7 10    123   0.1         -0.626  0.184    0.576 -0.0162
 8 10    123   0.1         -0.626  0.184   -0.305  0.944 
 9 10    123   0.2          0.821  0.594    0.919 -0.478 
10 10    123   0.2          0.821  0.594    0.782  0.418 
# … with 54 more rows

Converting a deeply nested list to a dataframe

Tags:

R

Nested Lists

Dataframe

Related

Recent Posts