applying a function across columns by extracting similar column names
Why not just rbind
the thing?
with(do.call(rbind, my_list),
cbind(germany=(germany - germany_mean) / germany_sd,
france=(france - france_mean) / france_sd))
# germany france
# [1,] -0.1429988 -0.5009603
# [2,] -0.4095864 -0.4157005
# [3,] -0.4427196 -0.6492633
# [4,] -0.6087181 -0.7976550
# [5,] -0.5748642 -0.7546901
# [6,] -0.5745473 -0.8392283
Do you have to use map
?
Here I get your desired output using two for
loops instead of using map
Result_list = vector("list",length(my_list))
for(i in 1:length(my_list))
{
df = my_list[[i]]
# identifier number of countries
countries = colnames(df)[grep('mean',colnames(df))]
countries = gsub("_mean","",countries)
df_result = NULL
for(j in 1:length(countries))
{
country = countries[j]
value_country = df[1,match(country,colnames(df))]
mean_country = df[1,match(paste0(country,"_mean"),colnames(df))]
sd_country = df[1,match(paste0(country,"_sd"),colnames(df))]
result_country = (value_country - mean_country) / sd_country
Sentence = paste0(country,": ",round(result_country,5)," = (",value_country," - ",round(mean_country,5),") / ",round(sd_country,5))
df_result = c(df_result,Sentence)
}
Result_list[[i]] = df_result
}
And the output Result_list
looks like:
> Result_list
[[1]]
[1] "germany: -0.143 = (17 - 21.29429) / 30.03026"
[2] "france: -0.50096 = (25 - 48.57103) / 47.05169"
[[2]]
[1] "germany: -0.40959 = (9 - 21.29932) / 30.02862"
[2] "france: -0.4157 = (29 - 48.56053) / 47.05439"
[[3]]
[1] "germany: -0.44272 = (8 - 21.29475) / 30.02973"
[2] "france: -0.64926 = (18 - 48.55189) / 47.05624"
[[4]]
[1] "germany: -0.60872 = (3 - 21.27785) / 30.02679"
[2] "france: -0.79765 = (11 - 48.53825) / 47.06077"
[[5]]
[1] "germany: -0.57486 = (4 - 21.26142) / 30.02694"
[2] "france: -0.75469 = (13 - 48.52145) / 47.0676"
[[6]]
[1] "germany: -0.57455 = (4 - 21.2532) / 30.0292"
[2] "france: -0.83923 = (9 - 48.50559) / 47.07372"
Is it what you are looking for ?
EDIT: Extracting only results
For extracting only result values, you can do the following:
Df_result_value = NULL
for(i in 1:length(my_list))
{
df = my_list[[i]]
# identifier number of countries
countries = colnames(df)[grep('mean',colnames(df))]
countries = gsub("_mean","",countries)
for(j in 1:length(countries))
{
country = countries[j]
value_country = df[1,match(country,colnames(df))]
mean_country = df[1,match(paste0(country,"_mean"),colnames(df))]
sd_country = df[1,match(paste0(country,"_sd"),colnames(df))]
result_country = (value_country - mean_country) / sd_country
Df_result_value = rbind(Df_result_value,c(country,result_country))
}
}
Df_result_value = data.frame(Df_result_value)
colnames(Df_result_value) = c("Country","Result")
And get this output:
> Df_result_value
Country Result
1 germany -0.142998843835787
2 france -0.500960300483614
3 germany -0.409586436512588
4 france -0.415700488060442
5 germany -0.442719572974515
6 france -0.649263275639099
7 germany -0.608718121899195
8 france -0.797654950237258
9 germany -0.574864249939699
10 france -0.754690110335453
11 germany -0.574547256608035
12 france -0.839228262008441
The question is unclear on the exact form of output so we assume that what is wanted is a data frame with a column for date and a column for each country in which the country value is normalized. In this case it means we want 3 columns in the output.
1) pivot_longer/_wider Bind the my_list
list components together creating a data frame with a row from each component. Then for each bare country name among the columns append _root to it so that every column name except date
is of the form country_suffix. Then convert to long form, perform the normalization and convert back to wide form:
library(dplyr)
library(tidyr)
library(purrr)
my_list %>%
bind_rows %>%
set_names(names(.)[1], sub("^([^_]*)$", "\\1_root", names(.)[-1])) %>%
pivot_longer(-date, names_to = c("country", ".value"), names_sep = "_") %>%
mutate(root = (root - mean) / sd) %>%
pivot_wider(id_cols = "date", names_from = "country", values_from = "root")
giving:
# A tibble: 6 x 3
date germany france
<date> <dbl> <dbl>
1 2016-01-01 -0.143 -0.501
2 2016-01-02 -0.410 -0.416
3 2016-01-03 -0.443 -0.649
4 2016-01-04 -0.609 -0.798
5 2016-01-05 -0.575 -0.755
6 2016-01-06 -0.575 -0.839
2) Base R
After rbinding the list components together giving d
we pick out the country names, nms
, as those names not containing an underscore except for the first such (which is date
). Then perform the normalization and cbind
the date
column to that.
d <- do.call("rbind", my_list)
nms <- grep("_", names(d), invert = TRUE, value = TRUE)[-1]
cbind(d[1], (d[nms] - d[paste0(nms, "_mean")]) / d[paste0(nms, "_sd")])
giving:
date germany france
1 2016-01-01 -0.1429988 -0.5009603
2 2016-01-02 -0.4095864 -0.4157005
3 2016-01-03 -0.4427196 -0.6492633
4 2016-01-04 -0.6087181 -0.7976550
5 2016-01-05 -0.5748642 -0.7546901
6 2016-01-06 -0.5745473 -0.8392283