pandas merge multiple columns code example

Example 1: pd merge on multiple columns

new_df = pd.merge(A_df, B_df,  how='left', left_on=['A_c1','c2'], right_on = ['B_c1','c2'])

Example 2: python inner join based on two columns

df  = pd.merge(left=df1, right=df2, on=['var_1', 'var_2'], how='inner']

Example 3: Joins with another DataFrame

# Joins with another DataFrame

df.join(df2, df.name == df2.name, 'outer').select(
  df.name, df2.height).collect()
# [Row(name=None, height=80), Row(name=u'Bob', height=85), Row(
#   name=u'Alice', height=None)]

df.join(df2, 'name', 'outer').select('name', 'height').collect()
# [Row(name=u'Tom', height=80), Row(name=u'Bob', height=85), Row(
#   name=u'Alice', height=None)]

cond = [df.name == df3.name, df.age == df3.age]
df.join(df3, cond, 'outer').select(df.name, df3.age).collect()
# [Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]

df.join(df2, 'name').select(df.name, df2.height).collect()
# Row(name=u'Bob', height=85)]

df.join(df4, ['name', 'age']).select(df.name, df.age).collect()
# [Row(name=u'Bob', age=5)]

Example 4: merge two columns pandas

df["period"] = df["Year"] + df["quarter"]

Example 5: python add multiple columns to pandas dataframe

# Basic syntax:
df[['new_column_1_name', 'new_column_2_name']] = pd.DataFrame([[np.nan, 'word']], index=df.index)
# Where the columns you're adding have to be pandas dataframes

# Example usage:
# Define example dataframe:
import pandas as pd
import numpy as np
df = pd.DataFrame({
    'col_1': [0, 1, 2, 3],
    'col_2': [4, 5, 6, 7]
})

print(df)
   col_1  col_2
0      0      4
1      1      5
2      2      6
3      3      7

# Add several columns simultaneously:
df[['new_col_1', 'new_col_2', 'new_col_3']] = pd.DataFrame([[np.nan, 42, 'wow']], index=df.index)
print(df)
   col_1  col_2  new_col_1  new_col_2 new_col_3
0      0      4        NaN         42       wow
1      1      5        NaN         42       wow
2      2      6        NaN         42       wow
3      3      7        NaN         42       wow

# Note, this isn't much more efficient than simply doing three
#	separate assignments, e.g.:
df['new_col_1'] = np.nan
df['new_col_2'] = 42
df['new_col_3'] = 'wow'

Example 6: dataframe concatenate

# Pandas for Python

df['col1 & col2'] = df['col1']+df['col2']

#Output
#col1	col2	col1 & col2
#A1		A2		A1A2
#B1		B2		B1B2