How to limit the number of float digits JSONEncoder produces?
It is still possible to monkey-patch json
in Python 3, but instead of FLOAT_REPR
, you need to modify float
. Make sure to disable c_make_encoder
just like in Python 2.
import json
class RoundingFloat(float):
__repr__ = staticmethod(lambda x: format(x, '.2f'))
json.encoder.c_make_encoder = None
if hasattr(json.encoder, 'FLOAT_REPR'):
# Python 2
json.encoder.FLOAT_REPR = RoundingFloat.__repr__
else:
# Python 3
json.encoder.float = RoundingFloat
print(json.dumps({'number': 1.0 / 81}))
Upsides: simplicity, can do other formatting (e.g. scientific notation, strip trailing zeroes etc). Downside: it looks more dangerous than it is.
Here is a python code snippet that shows how to quantize json output to the specified number of digits:
#python example code, error handling not shown
#open files
fin = open(input_file_name)
fout = open(output_file_name, "w+")
#read file input (note this could be done in one step but breaking it up allows more flexibilty )
indata = fin.read()
# example quantization function
def quant(n):
return round((float(n) * (10 ** args.prec))) / (
10 ** args.prec
) # could use decimal.quantize
# process the data streams by parsing and using call back to quantize each float as it parsed
outdata = json.dumps(json.loads(indata, parse_float=quant), separators=(",", ":"))
#write output
fout.write(outdata)
The above is what the jsonvice command-line tool uses to quantize the floating-point json numbers to whatever precision is desired to save space.
https://pypi.org/project/jsonvice/
This can be installed with pip or pipx (see docs).
pip3 install jsonvice
Disclaimer: I wrote this when needing to test quantized machine learning model weights.
Here's something that you may be able to use that's based on my answer to the question:
Write two-dimensional list to JSON file.
I say may because it requires "wrapping" all the float values in the Python dictionary (or list) before JSON encoding it with dump()
.
(Tested with Python 3.7.2.)
from _ctypes import PyObj_FromPtr
import json
import re
class FloatWrapper(object):
""" Float value wrapper. """
def __init__(self, value):
self.value = value
class MyEncoder(json.JSONEncoder):
FORMAT_SPEC = '@@{}@@'
regex = re.compile(FORMAT_SPEC.format(r'(\d+)')) # regex: r'@@(\d+)@@'
def default(self, obj):
return (self.FORMAT_SPEC.format(id(obj)) if isinstance(obj, FloatWrapper)
else super(MyEncoder, self).default(obj))
def iterencode(self, obj, **kwargs):
for encoded in super(MyEncoder, self).iterencode(obj, **kwargs):
# Check for marked-up float values (FloatWrapper instances).
match = self.regex.search(encoded)
if match: # Get FloatWrapper instance.
id = int(match.group(1))
float_wrapper = PyObj_FromPtr(id)
json_obj_repr = '%.7f' % float_wrapper.value # Create alt repr.
encoded = encoded.replace(
'"{}"'.format(self.FORMAT_SPEC.format(id)), json_obj_repr)
yield encoded
d = dict()
d['val'] = FloatWrapper(5.78686876876089075543) # Must wrap float values.
d['name'] = 'kjbkjbkj'
with open('float_test.json', 'w') as file:
json.dump(d, file, cls=MyEncoder, indent=4)
Contents of file created:
{
"val": 5.7868688,
"name": "kjbkjbkj"
}
Update:
As I mentioned, the above requires all the float
values to be wrapped before calling json.dump()
. Fortunately doing that could be automated by adding and using the following (minimally tested) utility:
def wrap_type(obj, kind, wrapper):
""" Recursively wrap instances of type kind in dictionary and list
objects.
"""
if isinstance(obj, dict):
new_dict = {}
for key, value in obj.items():
if not isinstance(value, (dict, list)):
new_dict[key] = wrapper(value) if isinstance(value, kind) else value
else:
new_dict[key] = wrap_type(value, kind, wrapper)
return new_dict
elif isinstance(obj, list):
new_list = []
for value in obj:
if not isinstance(value, (dict, list)):
new_list.append(wrapper(value) if isinstance(value, kind) else value)
else:
new_list.append(wrap_type(value, kind, wrapper))
return new_list
else:
return obj
d = dict()
d['val'] = 5.78686876876089075543
d['name'] = 'kjbkjbkj'
with open('float_test.json', 'w') as file:
json.dump(wrap_type(d, float, FloatWrapper), file, cls=MyEncoder, indent=4)
Option 1: Use regular expression matching to round.
You can dump your object to a string using json.dumps
and then use the technique shown on this post to find and round your floating point numbers.
To test it out, I added some more complicated nested structures on top of the example you provided::
d = dict()
d['val'] = 5.78686876876089075543
d['name'] = 'kjbkjbkj'
d["mylist"] = [1.23456789, 12, 1.23, {"foo": "a", "bar": 9.87654321}]
d["mydict"] = {"bar": "b", "foo": 1.92837465}
# dump the object to a string
d_string = json.dumps(d, indent=4)
# find numbers with 8 or more digits after the decimal point
pat = re.compile(r"\d+\.\d{8,}")
def mround(match):
return "{:.7f}".format(float(match.group()))
# write the modified string to a file
with open('test.json', 'w') as f:
f.write(re.sub(pat, mround, d_string))
The output test.json
looks like:
{
"val": 5.7868688,
"name": "kjbkjbkj",
"mylist": [
1.2345679,
12,
1.23,
{
"foo": "a",
"bar": 9.8765432
}
],
"mydict": {
"bar": "b",
"foo": 1.9283747
}
}
One limitation of this method is that it will also match numbers that are within double quotes (floats represented as strings). You could come up with a more restrictive regex to handle this, depending on your needs.
Option 2: subclass json.JSONEncoder
Here is something that will work on your example and handle most of the edge cases you will encounter:
import json
class MyCustomEncoder(json.JSONEncoder):
def iterencode(self, obj):
if isinstance(obj, float):
yield format(obj, '.7f')
elif isinstance(obj, dict):
last_index = len(obj) - 1
yield '{'
i = 0
for key, value in obj.items():
yield '"' + key + '": '
for chunk in MyCustomEncoder.iterencode(self, value):
yield chunk
if i != last_index:
yield ", "
i+=1
yield '}'
elif isinstance(obj, list):
last_index = len(obj) - 1
yield "["
for i, o in enumerate(obj):
for chunk in MyCustomEncoder.iterencode(self, o):
yield chunk
if i != last_index:
yield ", "
yield "]"
else:
for chunk in json.JSONEncoder.iterencode(self, obj):
yield chunk
Now write the file using the custom encoder.
with open('test.json', 'w') as f:
json.dump(d, f, cls = MyCustomEncoder)
The output file test.json
:
{"val": 5.7868688, "name": "kjbkjbkj", "mylist": [1.2345679, 12, 1.2300000, {"foo": "a", "bar": 9.8765432}], "mydict": {"bar": "b", "foo": 1.9283747}}
In order to get other keyword arguments like indent
to work, the easiest way would be to read in the file that was just written and write it back out using the default encoder:
# write d using custom encoder
with open('test.json', 'w') as f:
json.dump(d, f, cls = MyCustomEncoder)
# load output into new_d
with open('test.json', 'r') as f:
new_d = json.load(f)
# write new_d out using default encoder
with open('test.json', 'w') as f:
json.dump(new_d, f, indent=4)
Now the output file is the same as shown in option 1.