groupby where pandas code example

Example 1: filter groupby pandas

>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
...                           'foo', 'bar'],
...                    'B' : [1, 2, 3, 4, 5, 6],
...                    'C' : [2.0, 5., 8., 1., 2., 9.]})
>>> grouped = df.groupby('A')
>>> grouped.filter(lambda x: x['B'].mean() > 3.)
     A  B    C
1  bar  2  5.0
3  bar  4  1.0
5  bar  6  9.0

Example 2: Groups the DataFrame using the specified columns

# Groups the DataFrame using the specified columns

df.groupBy().avg().collect()
# [Row(avg(age)=3.5)]
sorted(df.groupBy('name').agg({'age': 'mean'}).collect())
# [Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)]
sorted(df.groupBy(df.name).avg().collect())
# [Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)]
sorted(df.groupBy(['name', df.age]).count().collect())
# [Row(name='Alice', age=2, count=1), Row(name='Bob', age=5, count=1)]

Example 3: pandas groupby

>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
...                               'Parrot', 'Parrot'],
...                    'Max Speed': [380., 370., 24., 26.]})
>>> df
   Animal  Max Speed
0  Falcon      380.0
1  Falcon      370.0
2  Parrot       24.0
3  Parrot       26.0
>>> df.groupby(['Animal']).mean()
        Max Speed
Animal
Falcon      375.0
Parrot       25.0