dplyr summarize with subtotals
Also possible by simply joining the two group results:
cyl_carb <- mtcars %>% group_by(cyl,carb) %>% summarize(mean(disp))
cyl <- mtcars %>% group_by(cyl) %>% summarize(mean(disp))
joined <- full_join(cyl_carb, cyl)
result <- arrange(joined, cyl)
result
gives:
Source: local data frame [12 x 3]
Groups: cyl [3]
cyl carb mean(disp)
(dbl) (dbl) (dbl)
1 4 1 91.3800
2 4 2 116.6000
3 4 NA 105.1364
4 6 1 241.5000
5 6 4 163.8000
6 6 6 145.0000
7 6 NA 183.3143
8 8 2 345.5000
9 8 3 275.8000
10 8 4 405.5000
11 8 8 301.0000
12 8 NA 353.1000
or with an additional column:
cyl_carb <- mtcars %>% group_by(cyl,carb) %>% summarize(mean(disp))
cyl <- mtcars %>% group_by(cyl) %>% summarize(mean.cyl = mean(disp))
joined <- full_join(cyl_carb, cyl)
joined
gives:
Source: local data frame [9 x 4]
Groups: cyl [?]
cyl carb mean(disp) mean.cyl
(dbl) (dbl) (dbl) (dbl)
1 4 1 91.38 105.1364
2 4 2 116.60 105.1364
3 6 1 241.50 183.3143
4 6 4 163.80 183.3143
5 6 6 145.00 183.3143
6 8 2 345.50 353.1000
7 8 3 275.80 353.1000
8 8 4 405.50 353.1000
9 8 8 301.00 353.1000
data.table It's very clunky, but this is one way:
library(data.table)
DT <- data.table(mtcars)
rbind(
DT[,.(mean(disp)), by=.(cyl,carb)],
DT[,.(mean(disp), carb=NA), by=.(cyl) ],
DT[,.(mean(disp), cyl=NA), by=.(carb)]
)[order(cyl,carb)]
This gives
cyl carb V1
1: 4 1 91.3800
2: 4 2 116.6000
3: 4 NA 105.1364
4: 6 1 241.5000
5: 6 4 163.8000
6: 6 6 145.0000
7: 6 NA 183.3143
8: 8 2 345.5000
9: 8 3 275.8000
10: 8 4 405.5000
11: 8 8 301.0000
12: 8 NA 353.1000
13: NA 1 134.2714
14: NA 2 208.1600
15: NA 3 275.8000
16: NA 4 308.8200
17: NA 6 145.0000
18: NA 8 301.0000
I'd rather see results in something like an R table
, but don't know of any functions for that.
dplyr @akrun found this analogous code
bind_rows(
mtcars %>%
group_by(cyl, carb) %>%
summarise(Mean= mean(disp)),
mtcars %>%
group_by(cyl) %>%
summarise(carb=NA, Mean=mean(disp)),
mtcars %>%
group_by(carb) %>%
summarise(cyl=NA, Mean=mean(disp))
) %>% arrange(cyl, carb)
We could wrap the repeat operations in a function
library(lazyeval)
f1 <- function(df, grp, Var, func){
FUN <- match.fun(func)
df %>%
group_by_(.dots=grp) %>%
summarise_(interp(~FUN(v), v=as.name(Var)))
}
m1 <- f1(mtcars, c('carb', 'cyl'), 'disp', 'mean')
m2 <- f1(mtcars, 'carb', 'disp', 'mean')
m3 <- f1(mtcars, 'cyl', 'disp', 'mean')
bind_rows(list(m1, m2, m3)) %>%
arrange(cyl, carb) %>%
rename(Mean=`FUN(disp)`)
carb cyl Mean
1 1 4 91.3800
2 2 4 116.6000
3 NA 4 105.1364
4 1 6 241.5000
5 4 6 163.8000
6 6 6 145.0000
7 NA 6 183.3143
8 2 8 345.5000
9 3 8 275.8000
10 4 8 405.5000
11 8 8 301.0000
12 NA 8 353.1000
13 1 NA 134.2714
14 2 NA 208.1600
15 3 NA 275.8000
16 4 NA 308.8200
17 6 NA 145.0000
18 8 NA 301.0000
Either option can be made a little less ugly with data.table's rbindlist
with fill
:
rbindlist(list(
mtcars %>% group_by(cyl) %>% summarise(mean(disp)),
mtcars %>% group_by(carb) %>% summarise(mean(disp)),
mtcars %>% group_by(cyl,carb) %>% summarise(mean(disp))
),fill=TRUE) %>% arrange(cyl,carb)
rbindlist(list(
DT[,mean(disp),by=.(cyl,carb)],
DT[,mean(disp),by=.(cyl)],
DT[,mean(disp),by=.(carb)]
),fill=TRUE)[order(cyl,carb)]
Something similar to table
with addmargins
(although actually a data.frame
)
library(dplyr)
library(reshape2)
out <- bind_cols(
mtcars %>% group_by(cyl, carb) %>%
summarise(mu = mean(disp)) %>%
dcast(cyl ~ carb),
(mtcars %>% group_by(cyl) %>% summarise(Total=mean(disp)))[,2]
)
margin <- t((mtcars %>% group_by(carb) %>% summarise(Total=mean(disp)))[,2])
rbind(out, c(NA, margin, mean(mtcars$disp))) %>%
`rownames<-`(c(paste("cyl", c(4,6,8)), "Total")) # add some row names
# cyl 1 2 3 4 6 8 Total
# cyl 4 4 91.3800 116.60 NA NA NA NA 105.1364
# cyl 6 6 241.5000 NA NA 163.80 145 NA 183.3143
# cyl 8 8 NA 345.50 275.8 405.50 NA 301 353.1000
# Total NA 134.2714 208.16 275.8 308.82 145 301 230.7219
The bottom row is the column wise margins, columns named 1:8 are carbs, and Total is the rowwise margins.