How to serialize a tree class object structure into json file format?
Here's an alternative answer, which is basically a Python 3 version of my answer to the question Making object JSON serializable with regular encoder which pickles any Python object that the regular json
encoder doesn't already handle.
There's a couple of the differences. One is that it doesn't monkey-patch the json
module because that's not an essential part of the solution. Another is that although the TreeNode
class isn't derived from the dict
class this time, it has essentially the same functionality. This was done intentionally to prevent the stock JSONEncoder
from encoding it and cause the _default()
method of in the JSONEncoder
subclass to be used instead.
Other than that, it's a very generic approach and will be able to handle many other types of Python objects, including user defined classes, without modification.
import base64
from collections import MutableMapping
import json
import pickle
class PythonObjectEncoder(json.JSONEncoder):
def default(self, obj):
return {'_python_object':
base64.b64encode(pickle.dumps(obj)).decode('utf-8') }
def as_python_object(dct):
if '_python_object' in dct:
return pickle.loads(base64.b64decode(dct['_python_object']))
return dct
# based on AttrDict -- https://code.activestate.com/recipes/576972-attrdict
class TreeNode(MutableMapping):
""" dict-like object whose contents can be accessed as attributes. """
def __init__(self, name, children=None):
self.name = name
self.children = list(children) if children is not None else []
def __getitem__(self, key):
return self.__getattribute__(key)
def __setitem__(self, key, val):
self.__setattr__(key, val)
def __delitem__(self, key):
self.__delattr__(key)
def __iter__(self):
return iter(self.__dict__)
def __len__(self):
return len(self.__dict__)
tree = TreeNode('Parent')
tree.children.append(TreeNode('Child 1'))
child2 = TreeNode('Child 2')
tree.children.append(child2)
child2.children.append(TreeNode('Grand Kid'))
child2.children[0].children.append(TreeNode('Great Grand Kid'))
json_str = json.dumps(tree, cls=PythonObjectEncoder, indent=4)
print('json_str:', json_str)
pyobj = json.loads(json_str, object_hook=as_python_object)
print(type(pyobj))
Output:
json_str: {
"_python_object": "gANjX19tYWluX18KVHJlZU5vZGUKcQApgXEBfXECKFgIAAAAY2hp"
"bGRyZW5xA11xBChoACmBcQV9cQYoaANdcQdYBAAAAG5hbWVxCFgH"
"AAAAQ2hpbGQgMXEJdWJoACmBcQp9cQsoaANdcQxoACmBcQ19cQ4o"
"aANdcQ9oACmBcRB9cREoaANdcRJoCFgPAAAAR3JlYXQgR3JhbmQg"
"S2lkcRN1YmFoCFgJAAAAR3JhbmQgS2lkcRR1YmFoCFgHAAAAQ2hp"
"bGQgMnEVdWJlaAhYBgAAAFBhcmVudHEWdWIu"
}
<class '__main__.TreeNode'>
Since you're dealing with a tree structure, it's natural to use nested dictionaries. The snippet of code below creates a subclass of dict
and uses itself as the underlying __dict__
of the instance — which is an interesting and useful trick I've run across in many different contexts:
Is it preferable to return an anonymous class or an object to use as a 'struct'? (stackoverflow)
How to use a dot “.” to access members of dictionary? (stackoverflow)
jsobject.py
(PyDoc.net)
Making Python Objects that act like Javascript Objects
(James Robert's blog)
AttrDict
(ActiveState recipe)
Dictionary with attribute-style access
(ActiveState recipe)
…so often in fact, that I consider it to be a (less well-known) Python idiom.
class TreeNode(dict):
def __init__(self, name, children=None):
super().__init__()
self.__dict__ = self
self.name = name
self.children = list(children) if children is not None else []
This solves half the serialization battle, but when the data produced is read back in with json.loads()
it will be a regular dictionary object, not an instance of TreeNode
. This is because JSONEncoder
can encode dictionaries (and subclasses of them) itself.
One way to address that is add an alternative constructor method to the TreeNode
class that can be called to reconstruct the data structure from the nested dictionary that json.loads()
returns.
Here's what I mean:
...
@staticmethod
def from_dict(dict_):
""" Recursively (re)construct TreeNode-based tree from dictionary. """
node = TreeNode(dict_['name'], dict_['children'])
# node.children = [TreeNode.from_dict(child) for child in node.children]
node.children = list(map(TreeNode.from_dict, node.children))
return node
if __name__ == '__main__':
import json
tree = TreeNode('Parent')
tree.children.append(TreeNode('Child 1'))
child2 = TreeNode('Child 2')
tree.children.append(child2)
child2.children.append(TreeNode('Grand Kid'))
child2.children[0].children.append(TreeNode('Great Grand Kid'))
json_str = json.dumps(tree, indent=2)
print(json_str)
print()
pyobj = TreeNode.from_dict(json.loads(json_str)) # reconstitute
print('pyobj class: {}'.format(pyobj.__class__.__name__)) # -> TreeNode
print(json.dumps(pyobj, indent=2))
Output:
{
"name": "Parent",
"children": [
{
"name": "Child 1",
"children": []
},
{
"name": "Child 2",
"children": [
{
"name": "Grand Kid",
"children": [
{
"name": "Great Grand Kid",
"children": []
}
]
}
]
}
]
}
pyobj class: TreeNode
{
"name": "Parent",
"children": [
{
"name": "Child 1",
"children": []
},
{
"name": "Child 2",
"children": [
{
"name": "Grand Kid",
"children": [
{
"name": "Great Grand Kid",
"children": []
}
]
}
]
}
]
}