Search for a value in a nested dictionary python

This is an iterative traversal of your nested dicts that additionally keeps track of all the keys leading up to a particular point. Therefore as soon as you find the correct value inside your dicts, you also already have the keys needed to get to that value.

The code below will run as-is if you put it in a .py file. The find_mime_type(...) function returns the sequence of keys that will get you from the original dictionary to the value you want. The demo() function shows how to use it.

d = {'dict1':
         {'part1':
              {'.wbxml': 'application/vnd.wap.wbxml',
               '.rl': 'application/resource-lists+xml'},
          'part2':
              {'.wsdl': 'application/wsdl+xml',
               '.rs': 'application/rls-services+xml',
               '.xop': 'application/xop+xml',
               '.svg': 'image/svg+xml'}},
     'dict2':
         {'part1':
              {'.dotx': 'application/vnd.openxmlformats-..',
               '.zaz': 'application/vnd.zzazz.deck+xml',
               '.xer': 'application/patch-ops-error+xml'}}}


def demo():
    mime_type = 'image/svg+xml'
    try:
        key_chain = find_mime_type(d, mime_type)
    except KeyError:
        print ('Could not find this mime type: {0}'.format(mime_type))
        exit()
    print ('Found {0} mime type here: {1}'.format(mime_type, key_chain))
    nested = d
    for key in key_chain:
        nested = nested[key]
    print ('Confirmation lookup: {0}'.format(nested))


def find_mime_type(d, mime_type):
    reverse_linked_q = list()
    reverse_linked_q.append((list(), d))
    while reverse_linked_q:
        this_key_chain, this_v = reverse_linked_q.pop()
        # finish search if found the mime type
        if this_v == mime_type:
            return this_key_chain
        # not found. keep searching
        # queue dicts for checking / ignore anything that's not a dict
        try:
            items = this_v.items()
        except AttributeError:
            continue  # this was not a nested dict. ignore it
        for k, v in items:
            reverse_linked_q.append((this_key_chain + [k], v))
    # if we haven't returned by this point, we've exhausted all the contents
    raise KeyError


if __name__ == '__main__':
    demo()

Output:

Found image/svg+xml mime type here: ['dict1', 'part2', '.svg']

Confirmation lookup: image/svg+xml

Here's a simple recursive version:

def getpath(nested_dict, value, prepath=()):
    for k, v in nested_dict.items():
        path = prepath + (k,)
        if v == value: # found value
            return path
        elif hasattr(v, 'items'): # v is a dict
            p = getpath(v, value, path) # recursive call
            if p is not None:
                return p

Example:

print(getpath(dictionary, 'image/svg+xml'))
# -> ('dict1', 'part2', '.svg')

To yield multiple paths (Python 3 only solution):

def find_paths(nested_dict, value, prepath=()):
    for k, v in nested_dict.items():
        path = prepath + (k,)
        if v == value: # found value
            yield path
        elif hasattr(v, 'items'): # v is a dict
            yield from find_paths(v, value, path) 

print(*find_paths(dictionary, 'image/svg+xml'))

Here is a solution that works for a complex data structure of nested lists and dicts

import pprint

def search(d, search_pattern, prev_datapoint_path=''):
    output = []
    current_datapoint = d
    current_datapoint_path = prev_datapoint_path
    if type(current_datapoint) is dict:
        for dkey in current_datapoint:
            if search_pattern in str(dkey):
                c = current_datapoint_path
                c+="['"+dkey+"']"
                output.append(c)
            c = current_datapoint_path
            c+="['"+dkey+"']"
            for i in search(current_datapoint[dkey], search_pattern, c):
                output.append(i)
    elif type(current_datapoint) is list:
        for i in range(0, len(current_datapoint)):
            if search_pattern in str(i):
                c = current_datapoint_path
                c += "[" + str(i) + "]"
                output.append(i)
            c = current_datapoint_path
            c+="["+ str(i) +"]"
            for i in search(current_datapoint[i], search_pattern, c):
                output.append(i)
    elif search_pattern in str(current_datapoint):
        c = current_datapoint_path
        output.append(c)
    output = filter(None, output)
    return list(output)


if __name__ == "__main__":
    d = {'dict1':
             {'part1':
                  {'.wbxml': 'application/vnd.wap.wbxml',
                   '.rl': 'application/resource-lists+xml'},
              'part2':
                  {'.wsdl': 'application/wsdl+xml',
                   '.rs': 'application/rls-services+xml',
                   '.xop': 'application/xop+xml',
                   '.svg': 'image/svg+xml'}},
         'dict2':
             {'part1':
                  {'.dotx': 'application/vnd.openxmlformats-..',
                   '.zaz': 'application/vnd.zzazz.deck+xml',
                   '.xer': 'application/patch-ops-error+xml'}}}

    d2 = {
        "items":
            {
                "item":
                    [
                        {
                            "id": "0001",
                            "type": "donut",
                            "name": "Cake",
                            "ppu": 0.55,
                            "batters":
                                {
                                    "batter":
                                        [
                                            {"id": "1001", "type": "Regular"},
                                            {"id": "1002", "type": "Chocolate"},
                                            {"id": "1003", "type": "Blueberry"},
                                            {"id": "1004", "type": "Devil's Food"}
                                        ]
                                },
                            "topping":
                                [
                                    {"id": "5001", "type": "None"},
                                    {"id": "5002", "type": "Glazed"},
                                    {"id": "5005", "type": "Sugar"},
                                    {"id": "5007", "type": "Powdered Sugar"},
                                    {"id": "5006", "type": "Chocolate with Sprinkles"},
                                    {"id": "5003", "type": "Chocolate"},
                                    {"id": "5004", "type": "Maple"}
                                ]
                        },

                        ...

                    ]
            }
    }

pprint.pprint(search(d,'svg+xml','d'))
>> ["d['dict1']['part2']['.svg']"]

pprint.pprint(search(d2,'500','d2'))
>> ["d2['items']['item'][0]['topping'][0]['id']",
 "d2['items']['item'][0]['topping'][1]['id']",
 "d2['items']['item'][0]['topping'][2]['id']",
 "d2['items']['item'][0]['topping'][3]['id']",
 "d2['items']['item'][0]['topping'][4]['id']",
 "d2['items']['item'][0]['topping'][5]['id']",
 "d2['items']['item'][0]['topping'][6]['id']"]

Search for a value in a nested dictionary python

Tags:

Python

Python 3.X

Python 2.7

Related

Recent Posts