Pandas convert Dataframe to Nested Json
It seems not hard to create a function will build the recursive dictionary given your DataFrame
object:
def fdrec(df):
drec = dict()
ncols = df.values.shape[1]
for line in df.values:
d = drec
for j, col in enumerate(line[:-1]):
if not col in d.keys():
if j != ncols-2:
d[col] = {}
d = d[col]
else:
d[col] = line[-1]
else:
if j!= ncols-2:
d = d[col]
return drec
which will produce:
{'MS_AVERY':
{'UGRAD':
{'GENERAL STUDIES': {'PHYS': 1L,
'POLS': 53L,
'PHIL': 10L,
'HIST': 5L,
'MELC': 2L,
'GEST': 5079L}}}}
Here's a solution I came up while working on this question:
def rollup_to_dict_core(x, values, columns, d_columns=None):
if d_columns is None:
d_columns = []
if len(columns) == 1:
if len(values) == 1:
return x.set_index(columns)[values[0]].to_dict()
else:
return x.set_index(columns)[values].to_dict(orient='index')
else:
res = x.groupby([columns[0]] + d_columns).apply(lambda y: rollup_to_dict_core(y, values, columns[1:]))
if len(d_columns) == 0:
return res.to_dict()
else:
res.name = columns[1]
res = res.reset_index(level=range(1, len(d_columns) + 1))
return res.to_dict(orient='index')
def rollup_to_dict(x, values, d_columns=None):
if d_columns is None:
d_columns = []
columns = [c for c in x.columns if c not in values and c not in d_columns]
return rollup_to_dict_core(x, values, columns, d_columns)
>>> pprint(rollup_to_dict(df, ['2013 Total']))
{'MS_AVERY': {'UGRAD': {'GENERAL STUDIES': {'GEST': 5079,
'HIST': 5,
'MELC': 2,
'PHIL': 10,
'PHYS': 1,
'POLS': 53}}}}