apply aggregate function to only one column pandas code example
Example 1: dataframe groupby multiple columns
grouped_multiple = df.groupby(['Team', 'Pos']).agg({'Age': ['mean', 'min', 'max']})
grouped_multiple.columns = ['age_mean', 'age_min', 'age_max']
grouped_multiple = grouped_multiple.reset_index()
print(grouped_multiple)
Example 2: Aggregate on the entire DataFrame without group
# Aggregate on the entire DataFrame without group
df.agg({"age": "max"}).collect()
# [Row(max(age)=5)]
from pyspark.sql import functions as F
df.agg(F.min(df.age)).collect()
# [Row(min(age)=2)]