python ast.literal_eval and datetime

Following up on Ignacio Vazquez-Abrams' idea:

import ast
import datetime

def parse_datetime_dict(astr,debug=False):
    try: tree=ast.parse(astr)
    except SyntaxError: raise ValueError(astr)
    for node in ast.walk(tree):
        if isinstance(node,(ast.Module,ast.Expr,ast.Dict,ast.Str,
                            ast.Attribute,ast.Num,ast.Name,ast.Load, ast.Tuple)): continue
        if (isinstance(node,ast.Call)
                and isinstance(node.func, ast.Attribute)
                and node.func.attr == 'datetime'): continue
        if debug:
            attrs=[attr for attr in dir(node) if not attr.startswith('__')]
            print(node)
            for attrname in attrs:
                print('    {k} ==> {v}'.format(k=attrname,v=getattr(node,attrname)))
        raise ValueError(astr)
    return eval(astr)

good_strings=["{'the_datetime': datetime.datetime(2010, 11, 21, 0, 56, 58)}"]
bad_strings=["__import__('os'); os.unlink",
             "import os; os.unlink",
             "import(os)", # SyntaxError
             ]

for astr in good_strings:
    result=parse_datetime_dict(astr)    
    print('{s} ... [PASSED]'.format(s=astr))

for astr in bad_strings:
    try:
        result=parse_datetime_dict(astr)
    except ValueError:
        print('{s} ... [REJECTED]'.format(s=astr))
    else:
        sys.exit('ERROR: failed to catch {s!r}'.format(s=astr))

yields

{'the_datetime': datetime.datetime(2010, 11, 21, 0, 56, 58)} ... [PASSED]
__import__('os'); os.unlink ... [REJECTED]
import os; os.unlink ... [REJECTED]
import(os) ... [REJECTED]

You could extract the (2010, 11, 21, 0, 56, 58) characters from the string using a regex, pass that to ast.literal_eval() to get a tuple, and then pass that tuple to datetime.datetime(*a_tuple) to get the object. Sounds like a lot, but each of the steps is very simple (and secure). Here's what I'm talking about:

import ast
import datetime
import re

s = "{'datetime': datetime.datetime(2010, 11, 21, 0, 56, 58)}"
m = re.search(r"""datetime(\((\d+)(,\s*\d+)*\))""", s)
if m:  # any matches?
    args = ast.literal_eval(m.group(1))
    print datetime.datetime(*args)
    # 2010-11-21 00:56:58

This searches for the pattern "datetime(<comma separated list of integers>)", in the string and passes just the list of literal integer values to ast.literal_eval() for conversion to a tuple -- which should always succeed and is code-injection resistant. I believe it's called "Context-Sensitive String Evaluation" or CSSE.


Instead of writing lots of code, do not use ast when you have to parse datetime objs. You can run eval(). BTW please note that you may have security issues using this function if the string can contain dodgy python commands.

Here is how it works:

>>> x="{'datetime': datetime.datetime(2010, 11, 21, 0, 56, 58)}"
>>> b=eval(x)
>>> b
{'datetime': datetime.datetime(2010, 11, 21, 0, 56, 58)}
>>> b["datetime"].year
2010

Enjoy! :D

Tags:

Python