Searching for a straightforward way to do Stata's bysort tasks in R
dat <- read.table(header = TRUE,
text =
'category var1
a 1
a 2
a 3
b 4
b 6
b 8
b 10
c 11
c 14
c 17')
(dat <- within(dat, {
var6 <- ave(var1, category, FUN = function(x) c(NA, diff(x)))
var5 <- c(NA, diff(var1))
var4 <- ave(var1, category, FUN = length)
var3 <- rev(!duplicated(rev(category))) * 1
var2 <- (!duplicated(category)) * 1
}))
# category var1 var2 var3 var4 var5 var6
# 1 a 1 1 0 3 NA NA
# 2 a 2 0 0 3 1 1
# 3 a 3 0 1 3 1 1
# 4 b 4 1 0 4 1 NA
# 5 b 6 0 0 4 2 2
# 6 b 8 0 0 4 2 2
# 7 b 10 0 1 4 2 2
# 8 c 11 1 0 3 1 NA
# 9 c 14 0 0 3 3 3
# 10 c 17 0 1 3 3 3
An answer using dplyr
library(dplyr)
dat <- dat %>%
group_by(category) %>%
mutate(var2 = ifelse(row_number() == 1, 1, 0))%>%
mutate(var3 = ifelse(row_number() == n(), 1, 0)) %>%
mutate(var4 = n()) %>%
mutate(var6 = lag(var1, 1)) %>%
ungroup() %>%
mutate(var5 = lag(var1, 1))