cbind 2 dataframes with different number of rows

I think you should instead use merge:

merge(df1, df2, by="year", all = T)

For your data:

df1 = data.frame(matrix(0, 7, 4))
names(df1) = c("year", "avg", "hr", "sal")
df1$year = 2010:2016
df1$avg = c(.3, .29, .275, .280, .295, .33, .315)
df1$hr = c(31, 30, 14, 24, 18, 26, 40)
df1$sal = c(2000, 4000, 600, 800, 1000, 7000, 9000)
df2 = data.frame(matrix(0, 5, 3))
names(df2) = c("year", "pos", "fld")
df2$year = c(2010, 2011, 2013, 2014, 2015)
df2$pos = c('A', 'B', 'C', 'B', 'D')
df2$fld = c(.99,.995,.97,.98,.99)

cbind is meant to column-bind two dataframes that are in all sense compatible. But what you aim to do is actual merge, where you want the elements from the two data frames not be discarded, and for missing values you get NA instead.

Here is how you could do this with tidyverse tools:

library(tidyverse)

h <- list()
g <- list()

h[[1]] <- tribble(
  ~year,  ~avg, ~hr,  ~sal,
  2010,  0.300,  31,  2000,
  2011,  0.290,  30,  4000,
  2012,  0.275,  14,   600,
  2013,  0.280,  24,   800,
  2014,  0.295,  18,  1000,
  2015,  0.330,  26,  7000,
  2016,  0.315,  40,  9000
)

g[[1]] <- tribble(
  ~year,  ~pos,  ~fld,
   2010,   "A",   0.990,
   2011,   "B",   0.995,
   2013,   "C",   0.970,
   2014,   "B",   0.980,
   2015,   "D",   0.990
)

map2(h, g, left_join)

Which produces:

[[1]]
# A tibble: 7 x 6
   year   avg    hr   sal pos      fld
  <dbl> <dbl> <dbl> <dbl> <chr>  <dbl>
1  2010 0.3      31  2000 A      0.99 
2  2011 0.290    30  4000 B      0.995
3  2012 0.275    14   600 NA    NA    
4  2013 0.28     24   800 C      0.97 
5  2014 0.295    18  1000 B      0.98 
6  2015 0.33     26  7000 D      0.99 
7  2016 0.315    40  9000 NA    NA

We can use Map with cbind.fill (from rowr) to cbind the corresponding 'data.frame' from 'h' and 'g'.

library(rowr)
Map(cbind.fill, h, g, MoreArgs = list(fill=NA))

Update

Based on the expected output showed, it seems like the OP wanted a merge instead of cbind

f1 <- function(...) merge(..., all = TRUE, by = 'year')
Map(f1, h, g)
#[[1]]
#  year   avg hr  sal  pos   fld
#1 2010 0.300 31 2000    A 0.990
#2 2011 0.290 30 4000    B 0.995
#3 2012 0.275 14  600 <NA>    NA
#4 2013 0.280 24  800    C 0.970
#5 2014 0.295 18 1000    B 0.980
#6 2015 0.330 26 7000    D 0.990
#7 2016 0.315 40 9000 <NA>    NA

Or as @Colonel Beauvel mentioned, this can be made compact

Map(merge, h, g, by='year', all=TRUE)

data

h <- list(structure(list(year = 2010:2016, avg = c(0.3, 0.29, 0.275, 
0.28, 0.295, 0.33, 0.315), hr = c(31L, 30L, 14L, 24L, 18L, 26L, 
 40L), sal = c(2000L, 4000L, 600L, 800L, 1000L, 7000L, 9000L)), .Names = c("year", 
 "avg", "hr", "sal"), class = "data.frame", row.names = c("1", 
 "2", "3", "4", "5", "6", "7")))

g <- list(structure(list(year = c(2010L, 2011L, 2013L, 2014L, 2015L
), pos = c("A", "B", "C", "B", "D"), fld = c(0.99, 0.995, 0.97, 
0.98, 0.99)), .Names = c("year", "pos", "fld"), class = "data.frame",
row.names = c("1", 
"2", "3", "4", "5")))

cbind 2 dataframes with different number of rows

Update

data

Tags:

R

Cbind

Related

Recent Posts