Example 1: pd merge on multiple columns
new_df = pd.merge(A_df, B_df, how='left', left_on=['A_c1','c2'], right_on = ['B_c1','c2'])
Example 2: python inner join based on two columns
df = pd.merge(left=df1, right=df2, on=['var_1', 'var_2'], how='inner']
Example 3: Joins with another DataFrame
df.join(df2, df.name == df2.name, 'outer').select(
df.name, df2.height).collect()
df.join(df2, 'name', 'outer').select('name', 'height').collect()
cond = [df.name == df3.name, df.age == df3.age]
df.join(df3, cond, 'outer').select(df.name, df3.age).collect()
df.join(df2, 'name').select(df.name, df2.height).collect()
df.join(df4, ['name', 'age']).select(df.name, df.age).collect()
Example 4: merge two columns pandas
df["period"] = df["Year"] + df["quarter"]
Example 5: python add multiple columns to pandas dataframe
df[['new_column_1_name', 'new_column_2_name']] = pd.DataFrame([[np.nan, 'word']], index=df.index)
import pandas as pd
import numpy as np
df = pd.DataFrame({
'col_1': [0, 1, 2, 3],
'col_2': [4, 5, 6, 7]
})
print(df)
col_1 col_2
0 0 4
1 1 5
2 2 6
3 3 7
df[['new_col_1', 'new_col_2', 'new_col_3']] = pd.DataFrame([[np.nan, 42, 'wow']], index=df.index)
print(df)
col_1 col_2 new_col_1 new_col_2 new_col_3
0 0 4 NaN 42 wow
1 1 5 NaN 42 wow
2 2 6 NaN 42 wow
3 3 7 NaN 42 wow
df['new_col_1'] = np.nan
df['new_col_2'] = 42
df['new_col_3'] = 'wow'
Example 6: dataframe concatenate
df['col1 & col2'] = df['col1']+df['col2']