merging pandas dataframes with respect to a function output
# function to compare one row of df1 with every row of df2
# note the use of abs() here, square root of negative numbers would be complex number,
# so the result of the computation would be NaN. abs() helps to avoids that
def compare(x, y):
df2['distance'] = (abs(x-df2['x'])**0.5 + abs(y-df2['y'])**0.5)**0.5
return df2.loc[df2['distance'].idxmin()]['color']
df1['color'] = df1.apply(lambda row: compare(row['x'], row['y']), axis=1)
print(df1)
x y size color
0 50 14 1 black
1 16 22 4 white
2 72 11 3 black
3 61 45 7 blue
4 95 58 6 blue
5 47 56 5 red
Something from numpy
broadcast
df1['color']=df2.color.iloc[np.argmin(np.sum(np.abs(df1[['x','y']].values-df2[['x','y']].values[:,None])**0.5,2),0)].values
df1
Out[79]:
x y size color
0 50 14 1 black
1 16 22 4 white
2 72 11 3 black
3 61 45 7 blue
4 95 58 6 blue
5 47 56 5 red