pandas compare two columns code example
Example 1: comparing two dataframe columns
comparison_column = np.where(df["col1"] == df["col2"], True, False)
Example 2: pandas compare two columns
# Syntax:
# C = np.where(condition, A, B)
# equal to A when condition true and B when false
import numpy as np
import pandas as pd
a = [['10', '1.2', '4.2'], ['15', '70', '0.03'], ['8', '5', '0']]
df = pd.DataFrame(a, columns=['one', 'two', 'three'])
df['que'] = np.where((df['one'] >= df['two']) & (df['one'] <= df['three'])
, df['one'], np.nan)
# If you have more than one condition, then you could use np.select
# instead. For example, if you wish df['que'] to equal
# df['two'] when df['one'] < df['two'], then
conditions = [
(df['one'] >= df['two']) & (df['one'] <= df['three']),
df['one'] < df['two']]
choices = [df['one'], df['two']]
df['que'] = np.select(conditions, choices, default=np.nan)
# If we can assume that df['one'] >= df['two'] when
# df['one'] < df['two'] is False, then the conditions and
# choices could be simplified to
conditions = [
df['one'] < df['two'],
df['one'] <= df['three']]
choices = [df['two'], df['one']]
# Note that:
a = [['10', '1.2', '4.2'], ['15', '70', '0.03'], ['8', '5', '0']]
df = pd.DataFrame(a, columns=['one', 'two', 'three'])
# defines a DataFrame with string values. Since they look numeric,
# you might be better off converting those strings to floats:
df2 = df.astype(float)
# This changes the results, however, since strings compare
# character-by-character, while floats are compared numerically.