python conditionally create new column in pandas dataframe code example
Example 1: pandas create new column conditional on other columns
# For creating new column with multiple conditions
conditions = [
(df['Base Column 1'] == 'A') & (df['Base Column 2'] == 'B'),
(df['Base Column 3'] == 'C')]
choices = ['Conditional Value 1', 'Conditional Value 2']
df['New Column'] = np.select(conditions, choices, default='Conditional Value 1')
Example 2: python conditionally create new column in pandas dataframe
# If you only have one condition use numpy.where()
# Example usage with np.where:
df = pd.DataFrame({'Type':list('ABBC'), 'Set':list('ZZXY')}) # Define df
print(df)
Type Set
0 A Z
1 B Z
2 B X
3 C Y
# Add new column based on single condition:
df['color'] = np.where(df['Set']=='Z', 'green', 'red')
print(df)
Type Set color
0 A Z green
1 B Z green
2 B X red
3 C Y red
# If you have multiple conditions use numpy.select()
# Example usage with np.select:
df = pd.DataFrame({'Type':list('ABBC'), 'Set':list('ZZXY')}) # Define df
print(df)
Type Set
0 A Z
1 B Z
2 B X
3 C Y
# Set the conditions for determining values in new column:
conditions = [
(df['Set'] == 'Z') & (df['Type'] == 'A'),
(df['Set'] == 'Z') & (df['Type'] == 'B'),
(df['Type'] == 'B')]
# Set the new column values in order of the conditions they should
# correspond to:
choices = ['yellow', 'blue', 'purple']
# Add new column based on conditions and choices:
df['color'] = np.select(conditions, choices, default='black')
print(df)
# Returns:
Set Type color
0 Z A yellow
1 Z B blue
2 X B purple
3 Y C black
Example 3: if else python pandas dataframe
# create a list of our conditions
conditions = [
(df['likes_count'] <= 2),
(df['likes_count'] > 2) & (df['likes_count'] <= 9),
(df['likes_count'] > 9) & (df['likes_count'] <= 15),
(df['likes_count'] > 15)
]
# create a list of the values we want to assign for each condition
values = ['tier_4', 'tier_3', 'tier_2', 'tier_1']
# create a new column and use np.select to assign values to it using our lists as arguments
df['tier'] = np.select(conditions, values)
# display updated DataFrame
df.head()