How to limit the size of a dictionary?
Here's a simple, no-LRU Python 2.6+ solution (in older Pythons you could do something similar with UserDict.DictMixin
, but in 2.6 and better that's not recommended, and the ABCs from collections
are preferable anyway...):
import collections
class MyDict(collections.MutableMapping):
def __init__(self, maxlen, *a, **k):
self.maxlen = maxlen
self.d = dict(*a, **k)
while len(self) > maxlen:
self.popitem()
def __iter__(self):
return iter(self.d)
def __len__(self):
return len(self.d)
def __getitem__(self, k):
return self.d[k]
def __delitem__(self, k):
del self.d[k]
def __setitem__(self, k, v):
if k not in self and len(self) == self.maxlen:
self.popitem()
self.d[k] = v
d = MyDict(5)
for i in range(10):
d[i] = i
print(sorted(d))
As other answers mentioned, you probably don't want to subclass dict -- the explicit delegation to self.d
is unfortunately boilerplatey but it does guarantee that every other method is properly supplied by collections.MutableMapping
.
cachetools will provide you nice implementation of Mapping Hashes that does this (and it works on python 2 and 3).
Excerpt of the documentation:
For the purpose of this module, a cache is a mutable mapping of a fixed maximum size. When the cache is full, i.e. by adding another item the cache would exceed its maximum size, the cache must choose which item(s) to discard based on a suitable cache algorithm.
Python 2.7 and 3.1 have OrderedDict and there are pure-Python implementations for earlier Pythons.
from collections import OrderedDict
class LimitedSizeDict(OrderedDict):
def __init__(self, *args, **kwds):
self.size_limit = kwds.pop("size_limit", None)
OrderedDict.__init__(self, *args, **kwds)
self._check_size_limit()
def __setitem__(self, key, value):
OrderedDict.__setitem__(self, key, value)
self._check_size_limit()
def _check_size_limit(self):
if self.size_limit is not None:
while len(self) > self.size_limit:
self.popitem(last=False)
You would also have to override other methods that can insert items, such as update
. The primary use of OrderedDict
is so you can control what gets popped easily, otherwise a normal dict
would work.
Here is a simple and efficient LRU cache written with dirt simple Python code that runs on any python version 1.5.2 or later:
class LRU_Cache:
def __init__(self, original_function, maxsize=1000):
self.original_function = original_function
self.maxsize = maxsize
self.mapping = {}
PREV, NEXT, KEY, VALUE = 0, 1, 2, 3 # link fields
self.head = [None, None, None, None] # oldest
self.tail = [self.head, None, None, None] # newest
self.head[NEXT] = self.tail
def __call__(self, *key):
PREV, NEXT = 0, 1
mapping, head, tail = self.mapping, self.head, self.tail
link = mapping.get(key, head)
if link is head:
value = self.original_function(*key)
if len(mapping) >= self.maxsize:
old_prev, old_next, old_key, old_value = head[NEXT]
head[NEXT] = old_next
old_next[PREV] = head
del mapping[old_key]
last = tail[PREV]
link = [last, tail, key, value]
mapping[key] = last[NEXT] = tail[PREV] = link
else:
link_prev, link_next, key, value = link
link_prev[NEXT] = link_next
link_next[PREV] = link_prev
last = tail[PREV]
last[NEXT] = tail[PREV] = link
link[PREV] = last
link[NEXT] = tail
return value
if __name__ == '__main__':
p = LRU_Cache(pow, maxsize=3)
for i in [1,2,3,4,5,3,1,5,1,1]:
print(i, p(i, 2))