Use R to Randomly Assign of Participants to Treatments on a Daily Basis
Consider splitting data frame by day and gender with by
, then run enough samples with replicate
at 100 times to pick one of several where treatments are balanced:
Data
df <- merge(data.frame(participant = p, gender = g),
data.frame(days = seq(1,365)),
by=NULL)
Solution
df_list <- by(df, list(df$gender, df$days), function(sub){
t <- replicate(100, { # RUN 100 REPETITIONS OF EXPRESSION
s <- sample(c("T1", "T2"), size=nrow(sub), replace=TRUE) # SAMPLE "T1" AND "T2" BY SIZE OF SUBSET
s[ sum(s == "T1") == sum(s == "T2") ] # FILTER TO EQUAL TREATMENTS
})
t <- Filter(length, t)[[1]] # SELECT FIRST OF SEVERAL NON-EMPTY RETURNS
transform(sub, treatment = t) # ASSIGN RESULT TO NEW COLUMN
})
# BIND DATA FRAMES AND RESET ROW.NAMES
final_df <- data.frame(do.call(rbind.data.frame, df_list), row.names=NULL)
Output
Day 1
head(final_df, 16)
# participant gender days treatment
# 1 P09 F 1 T1
# 2 P10 F 1 T2
# 3 P11 F 1 T2
# 4 P12 F 1 T1
# 5 P13 F 1 T2
# 6 P14 F 1 T2
# 7 P15 F 1 T1
# 8 P16 F 1 T1
# 9 P01 M 1 T1
# 10 P02 M 1 T1
# 11 P03 M 1 T2
# 12 P04 M 1 T2
# 13 P05 M 1 T2
# 14 P06 M 1 T1
# 15 P07 M 1 T1
# 16 P08 M 1 T2
Day 365
tail(final_df, 16)
# participant gender days treatment
# 5825 P09 F 365 T2
# 5826 P10 F 365 T2
# 5827 P11 F 365 T1
# 5828 P12 F 365 T2
# 5829 P13 F 365 T1
# 5830 P14 F 365 T2
# 5831 P15 F 365 T1
# 5832 P16 F 365 T1
# 5833 P01 M 365 T1
# 5834 P02 M 365 T2
# 5835 P03 M 365 T1
# 5836 P04 M 365 T2
# 5837 P05 M 365 T2
# 5838 P06 M 365 T2
# 5839 P07 M 365 T1
# 5840 P08 M 365 T1
Ideally, for analytical purposes you should keep data in long format (i.e., tidy data). But if needing wide format consider reshape
with helper and cleanup processing:
# HELPER OBJECTS
final_df$participant_gender <- with(final_df, paste0(participant, gender))
new_names <- paste0(p, g)
# RESHAPE WIDE
wide_df <- reshape(final_df, v.names = "treatment", timevar = "participant_gender",
idvar="days", drop = c("gender", "participant"),
new.row.names = 1:365, direction = "wide")
# RENAME AND RE-ORDER COLUMNS
names(wide_df) <- gsub("treatment.", "", names(wide_df))
wide_df <- wide_df[c("days", new_names)]
head(wide_df)
# days P01M P02M P03M P04M P05M P06M P07M P08M P09F P10F P11F P12F P13F P14F P15F P16F
# 1 1 T1 T1 T2 T2 T2 T1 T1 T2 T1 T2 T2 T1 T2 T2 T1 T1
# 2 2 T1 T1 T2 T1 T2 T1 T2 T2 T1 T2 T2 T1 T2 T2 T1 T1
# 3 3 T1 T1 T2 T1 T1 T2 T2 T2 T1 T2 T2 T2 T1 T2 T1 T1
# 4 4 T1 T1 T1 T2 T2 T2 T1 T2 T2 T1 T1 T2 T2 T1 T1 T2
# 5 5 T1 T1 T2 T1 T2 T2 T1 T2 T1 T1 T2 T1 T2 T2 T1 T2
# 6 6 T2 T1 T1 T1 T2 T2 T1 T2 T2 T2 T2 T1 T2 T1 T1 T1
Here is my approach. Surely it can be optimized, but I want to share my idea:
library(tidyverse)
p <- c("P01", "P02", "P03", "P04", "P05", "P06", "P07", "P08", "P09", "P10", "P11", "P12", "P13", "P14", "P15", "P16")
g <- c(rep("M", 8), rep("F", 8))
df <- data.frame(participant=p, sex=g)
First I create a data.frame for 13 cycles with 28 days. This gives us 13*28=364 days.
days <- data.frame(day=rep(1:28, 13), cycle=rep(1:13, each=28))
df <- merge(df, days) # merge/cross_join with df
Now I build a function that creates a logical vector for each group (male/female) with the condition "at least 10 times TRUE per participant"
rand_assign <- function(n_participants=16){
# create all possible combinations with 50 % treatment 1, 50 % treatment 2
comb <- list(0:1) %>%
rep(n_participants/2) %>%
expand.grid() %>%
filter(rowSums(.)==n_participants/4)
save_list <- list()
for (i in 1:2) {
repeat {
a <- comb %>%
nrow() %>%
seq(1,.,1) %>%
sample(28, replace=TRUE) %>%
slice(comb,.)
if (all(colSums(a) >= 10)) {
break
}
}
save_list[[i]] <- a
}
c <- save_list %>%
cbind.data.frame() %>%
t() %>%
as.vector
return(c)
}
Last step is combining the vector with the given data.frame
df %>%
group_by(cycle) %>%
mutate(treat_1 := rand_assign()) %>%
group_by(sex) %>%
pivot_wider(names_from=c(sex,participant), values_from=treat_1) %>%
mutate(day = 1:nrow(.)) %>%
dplyr::select(-cycle)
This yields
# A tibble: 364 x 17
day M_P01 M_P02 M_P03 M_P04 M_P05 M_P06 M_P07 M_P08 F_P09 F_P10 F_P11 F_P12 F_P13
<int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 1 1 1 0 1 0 1 0 0 0 0 1 1 1
2 2 1 0 0 0 1 0 1 1 0 0 0 1 1
3 3 0 1 0 1 0 1 1 0 0 1 0 1 0
4 4 0 1 1 1 0 0 1 0 0 1 1 0 1
5 5 0 1 1 0 1 0 0 1 1 0 0 1 1
6 6 0 1 1 1 1 0 0 0 1 0 0 0 1
7 7 0 0 0 1 1 1 0 1 0 0 1 0 0
8 8 1 0 1 0 0 1 0 1 0 0 1 0 1
9 9 0 1 0 1 1 0 1 0 1 0 1 1 0
10 10 1 1 0 0 1 1 0 0 1 1 0 0 0
with 1
and 0
corresponding to Treatment 1 or 2.
Nice first question. Thanks for posting.
My understanding of your constraints is that on any given day, four males must have one treatment and four males the other treatment. The same is true of the eight females: there must be four getting each treatment. Effectively, this means that on any given day, you only need a random sample applied to four individuals, since the rest of the individuals will effectively be constrained by the first four. Males 5 - 8 will be paired up to males 1 - 4, so that male 1 always get the opposite treatment to male 5, male 2 gets the opposite treatment to male 6, etc. The same pattern is applied to the females, so that although the individual assignations are random, there are always 4 females getting treatment 1, 4 females getting treatment 2, four males getting treatment 1 and four males getting treatment 2 on any given day.
You want at least ten days where each person gets treatment 1 in a 28-day period. This further constrains randomization to the point where it probably makes as much sense to ensure that each 28 day period contains a total of 14 days of treatment 1 and 14 days of treatment 2.
That way, you can get your assignations like this:
four_cols <- replicate(4, as.vector(replicate(14, sample(rep(1:2, 14))))[1:365])
eight_cols <- cbind(four_cols, 3 - four_cols)
sixteen_cols <- cbind(1:365, eight_cols, eight_cols)
df <- setNames(as.data.frame(sixteen_cols), c("Day", paste0("M", 1:8), paste0("F", 1:8)))
Now df
is a data frame with a layout like your table. The treatments are given as numbers 1 or 2 and the participants are labelled M1 - M8 and F1 - F8:
df
#> Day M1 M2 M3 M4 M5 M6 M7 M8 F1 F2 F3 F4 F5 F6 F7 F8
#> 1 1 1 1 1 1 2 2 2 2 1 1 1 1 2 2 2 2
#> 2 2 2 2 2 2 1 1 1 1 2 2 2 2 1 1 1 1
#> 3 3 2 1 1 2 1 2 2 1 2 1 1 2 1 2 2 1
#> 4 4 2 2 2 1 1 1 1 2 2 2 2 1 1 1 1 2
#> 5 5 1 2 1 1 2 1 2 2 1 2 1 1 2 1 2 2
#> 6 6 2 2 2 2 1 1 1 1 2 2 2 2 1 1 1 1
#> 7 7 1 2 1 1 2 1 2 2 1 2 1 1 2 1 2 2
#> 8 8 1 1 2 2 2 2 1 1 1 1 2 2 2 2 1 1
#> 9 9 2 2 1 2 1 1 2 1 2 2 1 2 1 1 2 1
#> 10 10 2 1 2 2 1 2 1 1 2 1 2 2 1 2 1 1
#> 11 11 1 2 2 2 2 1 1 1 1 2 2 2 2 1 1 1
#> 12 12 2 1 2 1 1 2 1 2 2 1 2 1 1 2 1 2
#> 13 13 1 1 1 1 2 2 2 2 1 1 1 1 2 2 2 2
#> 14 14 2 1 1 1 1 2 2 2 2 1 1 1 1 2 2 2
#> 15 15 1 1 2 1 2 2 1 2 1 1 2 1 2 2 1 2
#> 16 16 1 2 1 1 2 1 2 2 1 2 1 1 2 1 2 2
#> 17 17 2 2 2 2 1 1 1 1 2 2 2 2 1 1 1 1
#> ...
#> 365 365 2 2 2 2 1 1 1 1 2 2 2 2 1 1 1 1