Sum values in a list of lists of dictionaries using common key-value pairs
With pandas
:
>>> import pandas as pd
>>> [pd.DataFrame(dicts).groupby('user', as_index=False, sort=False).sum().to_dict(orient='records') for dicts in data]
[[{'user': 1, 'rating': 20},
{'user': 2, 'rating': 10},
{'user': 3, 'rating': 10}],
[{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 30}]]
You can try:
from itertools import groupby
result = []
for lst in data:
sublist = sorted(lst, key=lambda d: d['user'])
grouped = groupby(sublist, key=lambda d: d['user'])
result.append([
{'user': name, 'rating': sum([d['rating'] for d in group])}
for name, group in grouped])
# Sort the `result` `rating` wise:
result = [sorted(sub, key=lambda d: d['rating']) for sub in result]
# %%timeit
# 7.54 µs ± 220 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
UPDATE (A more efficient solution):
result = []
for lst in data:
visited = {}
for d in lst:
if d['user'] in visited:
visited[d['user']]['rating'] += d['rating']
else:
visited[d['user']] = d
result.append(sorted(visited.values(), key=lambda d: d['rating']))
# %% timeit
# 2.5 µs ± 54 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
Result:
# print(result)
[
[
{'user': 2, 'rating': 10},
{'user': 3, 'rating': 10},
{'user': 1, 'rating': 20}
],
[
{'user': 4, 'rating': 4},
{'user': 1, 'rating': 30},
{'user': 2, 'rating': 80}
]
]
op = []
for lst in data:
rating_of_user = {}
for e in lst:
user, rating = e['user'], e['rating']
rating_of_user[user] = rating_of_user.get(user, 0) + rating
op.append([{'user': u, 'rating': r} for u, r in rating_of_user.items()])
N.B.: since Python 3.7 dictionaries officially preserve the insertion order