Summarize different Columns with different Functions

We can use

 df %>%
    group_by(ID) %>%
    summarise(Cash = sum(Cash), Price = sum(Price), Weight = max(Weight))

If we have many columns, one way would be to do this separately and then join the output together.

 df1 <- df %>% 
          group_by(ID) %>% 
          summarise_each(funs(sum), Cash:Price)
 df2 <- df %>%
          group_by(ID) %>% 
          summarise_each(funs(max), Weight)
 inner_join(df1, df2, by = "ID")
 #      ID  Cash Price Weight
 #   (int) (dbl) (dbl)  (int)
 #1     1   0.6   4.2     82
 #2     2   0.3   1.0     70

Or do it w/o the double groups:

library(dplyr)

set.seed(1492)
df <- data.frame(id=rep(c(1,2), 3),
                 cash=rnorm(6, 0.5, 0.1),
                 price=rnorm(6, 0.5, 0.1)*6,
                 weight=sample(100, 6))

df

##   id      cash    price weight
## 1  1 0.4410152 2.484082     10
## 2  2 0.4101343 3.032529     93
## 3  1 0.3375889 2.305076     58
## 4  2 0.6047922 3.248851     55
## 5  1 0.4721711 3.209930     34
## 6  2 0.5362493 2.331530     99

custom_summarise <- function(do_df) {

  return(bind_cols(
    summarise_each(select(do_df, -weight), funs(sum)),
    summarise_each(select(do_df, weight), funs(max))
  ))

}

group_by(df, id) %>% do(custom_summarise(.))

## Source: local data frame [2 x 4]
## Groups: id [2]
## 
##      id     cash    price weight
##   (dbl)    (dbl)    (dbl)  (int)
## 1     3 1.250775 7.999089     58
## 2     6 1.551176 8.612910     99

library(data.table)

setDT(df)

df[,.(Cash = sum(Cash),Price = sum(Price),Weight = max(Weight)),by=ID]

One way of doing this for +90 columns can be:

max_col <- 'Weight'

sum_col <- setdiff(colnames(df),max_col)

query_1 <- paste0(sum_col,' = sum(',sum_col,')')

query_2 <- paste0(max_col,' = max(',max_col,')')

query_3 <- paste(query_1,collapse=',')

query_4 <- paste(query_2,collapse=',')

query_5 <- paste(query_3,query_4,sep=',')

final_query <- paste0('df[,.(',query_5,'),by = ID]')

eval(parse(text = final_query))

Summarize different Columns with different Functions

Tags:

Group By

R

Statistics

Dataframe

Dplyr

Related

Recent Posts