Group by multiple keys and summarize/average values of a list of dictionaries

To get the aggregated results

from itertools import groupby
from operator import itemgetter

grouper = itemgetter("dept", "sku")
result = []
for key, grp in groupby(sorted(input_data, key = grouper), grouper):
    temp_dict = dict(zip(["dept", "sku"], key))
    temp_dict["qty"] = sum(item["qty"] for item in grp)
    result.append(temp_dict)

from pprint import pprint
pprint(result)

Output

[{'dept': '001', 'qty': 200, 'sku': 'bar'},
 {'dept': '001', 'qty': 400, 'sku': 'foo'},
 {'dept': '002', 'qty': 900, 'sku': 'baz'},
 {'dept': '002', 'qty': 600, 'sku': 'qux'},
 {'dept': '003', 'qty': 700, 'sku': 'foo'}]

And to get the averages, you can simply change the contents inside the for loop, like this

temp_dict = dict(zip(["dept", "sku"], key))
temp_list = [item["qty"] for item in grp]
temp_dict["avg"] = sum(temp_list) / len(temp_list)
result.append(temp_dict)

Output

[{'avg': 200, 'dept': '001', 'sku': 'bar'},
 {'avg': 200, 'dept': '001', 'sku': 'foo'},
 {'avg': 450, 'dept': '002', 'sku': 'baz'},
 {'avg': 600, 'dept': '002', 'sku': 'qux'},
 {'avg': 700, 'dept': '003', 'sku': 'foo'}]

Suggestion: Anyway, I would have added both the qty and avg in the same dict like this

temp_dict = dict(zip(["dept", "sku"], key))
temp_list = [item["qty"] for item in grp]
temp_dict["qty"] = sum(temp_list)
temp_dict["avg"] = temp_dict["qty"] / len(temp_list)
result.append(temp_dict)

Output

[{'avg': 200, 'dept': '001', 'qty': 200, 'sku': 'bar'},
 {'avg': 200, 'dept': '001', 'qty': 400, 'sku': 'foo'},
 {'avg': 450, 'dept': '002', 'qty': 900, 'sku': 'baz'},
 {'avg': 600, 'dept': '002', 'qty': 600, 'sku': 'qux'},
 {'avg': 700, 'dept': '003', 'qty': 700, 'sku': 'foo'}]

Inspired by Eelco Hoogendoorn's answer. Here is another way to resolve this using Pandas package. The code is more readable.

import numpy as np
import pandas as pd

def sum_by_cusip_and_dept(data):
    df = pd.DataFrame(data)
    grouped = df.groupby(['sku', 'dept'])    
    sum = grouped.sum()
    return [{'sku': r[0], 'dept': r[1], 'qty': kv.to_dict().get('qty')} for r, kv in sum.iterrows()]