cbind 2 dataframes with different number of rows
I think you should instead use merge
:
merge(df1, df2, by="year", all = T)
For your data:
df1 = data.frame(matrix(0, 7, 4))
names(df1) = c("year", "avg", "hr", "sal")
df1$year = 2010:2016
df1$avg = c(.3, .29, .275, .280, .295, .33, .315)
df1$hr = c(31, 30, 14, 24, 18, 26, 40)
df1$sal = c(2000, 4000, 600, 800, 1000, 7000, 9000)
df2 = data.frame(matrix(0, 5, 3))
names(df2) = c("year", "pos", "fld")
df2$year = c(2010, 2011, 2013, 2014, 2015)
df2$pos = c('A', 'B', 'C', 'B', 'D')
df2$fld = c(.99,.995,.97,.98,.99)
cbind
is meant to column-bind
two dataframes
that are in all sense compatible. But what you aim to do is actual merge
, where you want the elements from the two data frames not be discarded, and for missing values you get NA
instead.
Here is how you could do this with tidyverse
tools:
library(tidyverse)
h <- list()
g <- list()
h[[1]] <- tribble(
~year, ~avg, ~hr, ~sal,
2010, 0.300, 31, 2000,
2011, 0.290, 30, 4000,
2012, 0.275, 14, 600,
2013, 0.280, 24, 800,
2014, 0.295, 18, 1000,
2015, 0.330, 26, 7000,
2016, 0.315, 40, 9000
)
g[[1]] <- tribble(
~year, ~pos, ~fld,
2010, "A", 0.990,
2011, "B", 0.995,
2013, "C", 0.970,
2014, "B", 0.980,
2015, "D", 0.990
)
map2(h, g, left_join)
Which produces:
[[1]]
# A tibble: 7 x 6
year avg hr sal pos fld
<dbl> <dbl> <dbl> <dbl> <chr> <dbl>
1 2010 0.3 31 2000 A 0.99
2 2011 0.290 30 4000 B 0.995
3 2012 0.275 14 600 NA NA
4 2013 0.28 24 800 C 0.97
5 2014 0.295 18 1000 B 0.98
6 2015 0.33 26 7000 D 0.99
7 2016 0.315 40 9000 NA NA
We can use Map
with cbind.fill
(from rowr
) to cbind
the corresponding 'data.frame' from 'h' and 'g'.
library(rowr)
Map(cbind.fill, h, g, MoreArgs = list(fill=NA))
Update
Based on the expected output showed, it seems like the OP wanted a merge
instead of cbind
f1 <- function(...) merge(..., all = TRUE, by = 'year')
Map(f1, h, g)
#[[1]]
# year avg hr sal pos fld
#1 2010 0.300 31 2000 A 0.990
#2 2011 0.290 30 4000 B 0.995
#3 2012 0.275 14 600 <NA> NA
#4 2013 0.280 24 800 C 0.970
#5 2014 0.295 18 1000 B 0.980
#6 2015 0.330 26 7000 D 0.990
#7 2016 0.315 40 9000 <NA> NA
Or as @Colonel Beauvel mentioned, this can be made compact
Map(merge, h, g, by='year', all=TRUE)
data
h <- list(structure(list(year = 2010:2016, avg = c(0.3, 0.29, 0.275,
0.28, 0.295, 0.33, 0.315), hr = c(31L, 30L, 14L, 24L, 18L, 26L,
40L), sal = c(2000L, 4000L, 600L, 800L, 1000L, 7000L, 9000L)), .Names = c("year",
"avg", "hr", "sal"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7")))
g <- list(structure(list(year = c(2010L, 2011L, 2013L, 2014L, 2015L
), pos = c("A", "B", "C", "B", "D"), fld = c(0.99, 0.995, 0.97,
0.98, 0.99)), .Names = c("year", "pos", "fld"), class = "data.frame",
row.names = c("1",
"2", "3", "4", "5")))