Context Navigation

← Previous Change
Next Change →

Changeset 663 for branches

Timestamp:

05/26/13 15:14:33 (3 years ago)

Author:

mmckerns

Message:

migrated keymap functions to keymap classes; enable memoize to use keymaps

Location:

branches/decorate

Files:

: 4 edited
: 1 moved

cache.py (modified) (22 diffs)
keymaps.py (moved) (moved from branches/decorate/cache_helper.py) (1 diff)
memoize.py (modified) (7 diffs)
surrogate.py (modified) (1 diff)
test_memoize.py (modified) (5 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/decorate/cache.py

-                      r662
+                      r663
+#
+import collections
+try:
+    from collections import namedtuple
+except ImportError:
+    from namedtuple import namedtuple
+from collections import deque
 from random import choice #XXX: biased?
 from heapq import nsmallest
 …
 from functools import update_wrapper
 from threading import RLock
+from cache_helper import _CacheInfo, Counter, hashmap as _keymap
+from keymaps import hashmap as _keymap
+_CacheInfo = namedtuple("CacheInfo", ['hits','misses','maxsize','currsize'])
+class Counter(dict):
+    'Mapping where default values are zero'
+    def __missing__(self, key):
+        return 0
 def no_cache(*arg, **kwd):
 …
     '''Infinitely-growing cache decorator.
-    If *typed* is True, arguments of different types will be cached separately.
-    For example, f(3.0) and f(3) will be treated as distinct calls with
-    distinct results.  Cache typing has a memory penalty, and may also may be
-    ignored by some 'keymaps'.
     If *keymap* is given, it will replace the hashing algorithm for generating
+    cache keys.  For example, see the other hashing algorithms available in
+    'cache_helper.py'. With the default keymap, arguments to the cached
+    function must be hashable.
+    cache keys.  Several hashing algorithms are available in 'keymaps.py'.  With
+    the default keymap, arguments to the cached function must be hashable.
+    If the keymap retains type information, then arguments of different types
+    will be cached separately.  For example, f(3.0) and f(3) will be treated
+    as distinct calls with distinct results.  Cache typing has a memory penalty,
+    and may also be ignored by some 'keymaps'.
     View the cache statistics named tuple (hits, misses, maxsize, currsize) with
 …
     '''
     maxsize = None
     make_key = kwd.get('keymap', _keymap)
     typed = kwd.get('typed', False)
+    make_key = kwd.get('keymap', None)
+    if make_key is None: make_key = _keymap()
     def decorating_function(user_function):
 …
         def wrapper(*args, **kwds):
             key = make_key(args, kwds, typed)
+            key = make_key(*args, **kwds)
             # get cache entry or compute if not found
 …
 def lfu_cache(maxsize=100, keymap=None, typed=False):
+def lfu_cache(maxsize=100, keymap=None):
     '''Least-frequenty-used cache decorator.
 …
     will grow without bound.
-    If *typed* is True, arguments of different types will be cached separately.
-    For example, f(3.0) and f(3) will be treated as distinct calls with
-    distinct results.  Cache typing has a memory penalty, and may also may be
-    ignored by some 'keymaps'.
     If *keymap* is given, it will replace the hashing algorithm for generating
+    cache keys.  For example, see the other hashing algorithms available in
+    'cache_helper.py'. With the default keymap, arguments to the cached
+    function must be hashable.
+    cache keys.  Several hashing algorithms are available in 'keymaps.py'.  With
+    the default keymap, arguments to the cached function must be hashable.
+    If the keymap retains type information, then arguments of different types
+    will be cached separately.  For example, f(3.0) and f(3) will be treated
+    as distinct calls with distinct results.  Cache typing has a memory penalty,
+    and may also be ignored by some 'keymaps'.
     View the cache statistics named tuple (hits, misses, maxsize, currsize) with
 …
     '''
     if maxsize == 0: return no_cache()
     if keymap is None: make_key = _keymap
+    if keymap is None: make_key = _keymap()
     else: make_key = keymap
     if maxsize is None: return inf_cache(keymap=make_key, typed=typed)
+    if maxsize is None: return inf_cache(keymap=make_key)
     def decorating_function(user_function):
 …
         def wrapper(*args, **kwds):
             key = make_key(args, kwds, typed)
+            key = make_key(*args, **kwds)
             # get cache entry or compute if not found
 …
 def lru_cache(maxsize=100, keymap=None, typed=False):
+def lru_cache(maxsize=100, keymap=None):
     '''Least-recently-used cache decorator.
 …
     will grow without bound.
-    If *typed* is True, arguments of different types will be cached separately.
-    For example, f(3.0) and f(3) will be treated as distinct calls with
-    distinct results.  Cache typing has a memory penalty, and may also may be
-    ignored by some 'keymaps'.
     If *keymap* is given, it will replace the hashing algorithm for generating
+    cache keys.  For example, see the other hashing algorithms available in
+    'cache_helper.py'. With the default keymap, arguments to the cached
+    function must be hashable.
+    cache keys.  Several hashing algorithms are available in 'keymaps.py'.  With
+    the default keymap, arguments to the cached function must be hashable.
+    If the keymap retains type information, then arguments of different types
+    will be cached separately.  For example, f(3.0) and f(3) will be treated
+    as distinct calls with distinct results.  Cache typing has a memory penalty,
+    and may also be ignored by some 'keymaps'.
     View the cache statistics named tuple (hits, misses, maxsize, currsize) with
 …
     '''
     if maxsize == 0: return no_cache()
     if keymap is None: make_key = _keymap
+    if keymap is None: make_key = _keymap()
     else: make_key = keymap
     if maxsize is None: return inf_cache(keymap=make_key, typed=typed)
+    if maxsize is None: return inf_cache(keymap=make_key)
     maxqueue = maxsize * 10 #XXX: user settable? confirm this works as expected
     def decorating_function(user_function):
         cache = dict()                  # mapping of args to results
         queue = collections.deque()     # order that keys have been used
+        queue = deque()                 # order that keys have been used
         refcount = Counter()            # times each key is in the queue
         sentinel = object()             # marker for looping around the queue
 …
         def wrapper(*args, **kwds):
             key = make_key(args, kwds, typed)
+            key = make_key(*args, **kwds)
             # get cache entry or compute if not found
 …
 def mru_cache(maxsize=100, keymap=None, typed=False):
+def mru_cache(maxsize=100, keymap=None):
     '''Most-recently-used cache decorator.
 …
     will grow without bound.
-    If *typed* is True, arguments of different types will be cached separately.
-    For example, f(3.0) and f(3) will be treated as distinct calls with
-    distinct results.  Cache typing has a memory penalty, and may also may be
-    ignored by some 'keymaps'.
     If *keymap* is given, it will replace the hashing algorithm for generating
+    cache keys.  For example, see the other hashing algorithms available in
+    'cache_helper.py'. With the default keymap, arguments to the cached
+    function must be hashable.
+    cache keys.  Several hashing algorithms are available in 'keymaps.py'.  With
+    the default keymap, arguments to the cached function must be hashable.
+    If the keymap retains type information, then arguments of different types
+    will be cached separately.  For example, f(3.0) and f(3) will be treated
+    as distinct calls with distinct results.  Cache typing has a memory penalty,
+    and may also be ignored by some 'keymaps'.
     View the cache statistics named tuple (hits, misses, maxsize, currsize) with
 …
     '''
     if maxsize == 0: return no_cache()
     if keymap is None: make_key = _keymap
+    if keymap is None: make_key = _keymap()
     else: make_key = keymap
     if maxsize is None: return inf_cache(keymap=make_key, typed=typed)
+    if maxsize is None: return inf_cache(keymap=make_key)
     def decorating_function(user_function):
         cache = dict()                  # mapping of args to results
         queue = collections.deque()     # order that keys have been used
+        queue = deque()                 # order that keys have been used
         stats = [0, 0]                  # make statistics updateable non-locally
         HITS, MISSES = 0, 1             # names for the stats fields
 …
         def wrapper(*args, **kwds):
             key = make_key(args, kwds, typed)
+            key = make_key(*args, **kwds)
             # get cache entry or compute if not found
 …
 def rr_cache(maxsize=100, keymap=None, typed=False):
+def rr_cache(maxsize=100, keymap=None):
     '''random-replacement cache decorator.
 …
     will grow without bound.
-    If *typed* is True, arguments of different types will be cached separately.
-    For example, f(3.0) and f(3) will be treated as distinct calls with
-    distinct results.  Cache typing has a memory penalty, and may also may be
-    ignored by some 'keymaps'.
     If *keymap* is given, it will replace the hashing algorithm for generating
+    cache keys.  For example, see the other hashing algorithms available in
+    'cache_helper.py'. With the default keymap, arguments to the cached
+    function must be hashable.
+    cache keys.  Several hashing algorithms are available in 'keymaps.py'.  With
+    the default keymap, arguments to the cached function must be hashable.
+    If the keymap retains type information, then arguments of different types
+    will be cached separately.  For example, f(3.0) and f(3) will be treated
+    as distinct calls with distinct results.  Cache typing has a memory penalty,
+    and may also be ignored by some 'keymaps'.
     View the cache statistics named tuple (hits, misses, maxsize, currsize) with
 …
     '''
     if maxsize == 0: return no_cache()
     if keymap is None: make_key = _keymap
+    if keymap is None: make_key = _keymap()
     else: make_key = keymap
     if maxsize is None: return inf_cache(keymap=make_key, typed=typed)
+    if maxsize is None: return inf_cache(keymap=make_key)
     def decorating_function(user_function):
 …
         def wrapper(*args, **kwds):
             key = make_key(args, kwds, typed)
+            key = make_key(*args, **kwds)
             # get cache entry or compute if not found
 …
 def _test_hits(algorithm, maxsize=20, typed=False, rangelimit=5, tries=1000):
     @algorithm(maxsize=maxsize, typed=typed)
+def _test_hits(algorithm, maxsize=20, keymap=None, rangelimit=5, tries=1000):
+    @algorithm(maxsize=maxsize, keymap=keymap)
     def f(x, y):
         return 3*x+y

branches/decorate/keymaps.py

-                      r662
+                      r663
+# helper functions for caching
+try:
+    from collections import namedtuple
+except ImportError:
+    from namedtuple import namedtuple
+#
+_CacheInfo = namedtuple("CacheInfo", ['hits','misses','maxsize','currsize'])
+class _Sentinel(object):
+    def __repr__(self):
+        return "<SENTINEL>"
+class _NoSentinel(object):
+    def __repr__(self):
+        return "<NOSENTINEL>"
+SENTINEL = _Sentinel()
+NOSENTINEL = _NoSentinel()
+# SENTINEL = object()
+# NOSENTINEL = (SENTINEL,)  #XXX: use to indicate "don't use a sentinel" ?
+#XXX: convert this into a 'keymap' class...
+def keymap(args, kwds, #XXX: should only take args kwds; rest are 'settings'
+           typed = False,
+           kwd_mark = (object(),), #XXX: 'nicer' kwd_mark = ("",) ?  None ?
+           flat = True, #XXX: if not flat, then key = (args, tuple)
+           fasttypes = set((int, str, frozenset, type(None))),
+           sorted=sorted, tuple=tuple, type=type, len=len):
+    'Make a cache key from optionally typed positional and keyword arguments'
+    if not flat:
+class keymap(object):
+    def __init__(self, typed=False, flat=True, sentinel=NOSENTINEL, **kwds):
+        '''initialize the key builder
+        typed: if True, include type information in the key
+        flat: if True, flatten the key to a sequence; if False, use (args, kwds)
+        sentinel: marker for separating args and kwds in flattened keys
+        '''
+        self.typed = typed
+        self.flat = flat
+        self.sentinel = sentinel
+        # some rare kwds that allow keymap customization
+        self._fasttypes = kwds.get('fasttypes', set((int,str,frozenset,type(None))))
+        self._sorted = kwds.get('sorted', sorted)
+        self._tuple = kwds.get('tuple', tuple)
+        self._type = kwds.get('type', type)
+        self._len = kwds.get('len', len)
+        return
+    def __get_sentinel(self):
+        if self._mark:
+            return self._mark[0]
+        return NOSENTINEL #XXX: or None?
+    def __sentinel(self, mark):
+        if mark != NOSENTINEL:
+            self._mark = (mark,)
+        else: self._mark = None
+    def __call__(self, *args, **kwds):
+        'Make cache key from optionally typed positional and keyword arguments'
+        if self.flat:
+            return self.encode(*args, **kwds)
+        return self.encrypt(*args, **kwds)
+    def encrypt(self, *args, **kwds):
+        """use a non-flat scheme for producing a key"""
         key = (args, kwds) #XXX: pickles larger, but is simpler to unpack
         if typed:
             sorted_items = sorted(kwds.items())
             key += (tuple(type(v) for v in args), \
                     tuple(type(v) for k, v in sorted_items))
+        if self.typed:
+            sorted_items = self._sorted(kwds.items())
+            key += (self._tuple(self._type(v) for v in args), \
+                    self._tuple(self._type(v) for (k,v) in sorted_items))
         return key
+    key = args
+    if kwds:
+        sorted_items = sorted(kwds.items())
+        key += kwd_mark
+        for item in sorted_items:
+            key += item
+    if typed: #XXX: 'kwd_mark' between each of the 4 parts, so easy to split
+        key += kwd_mark + tuple(type(v) for v in args)
+    def encode(self, *args, **kwds):
+        """use a flattened scheme for producing a key"""
+        key = args
         if kwds:
+            key += kwd_mark + tuple(type(v) for k, v in sorted_items)
+    elif len(key) == 1 and type(key[0]) in fasttypes:
+        return key[0]
+    return key
+#   return _HashedSeq(key)
+            sorted_items = self._sorted(kwds.items())
+            if self._mark: key += self._mark
+            for item in sorted_items:
+                key += item
+        if self.typed: #XXX: 'mark' between each part, so easy to split
+            if self._mark: key += self._mark
+            key += self._tuple(self._type(v) for v in args)
+            if kwds:
+                if self._mark: key += self._mark
+                key += self._tuple(self._type(v) for (k,v) in sorted_items)
+        elif self._len(key) == 1 and self._type(key[0]) in self._fasttypes:
+            return key[0]
+        return key
+'''
+class _HashedSeq(list):
+    __slots__ = 'hashvalue'
+    def decrypt(self, key):
+        raise NotImplementedError, "Key decryption is not implemented"
+    def __init__(self, tup, hash=hash):
+        self[:] = tup
+        self.hashvalue = hash(tup)
+    def decode(self, key):
+        raise NotImplementedError, "Key decoding is not implemented"
+    def __hash__(self):
+        return self.hashvalue
+    def dumps(self, obj):
+        """a more pickle-like interface for encoding a key"""
+        return self.encode(obj)
+    def loads(self, key):
+        """a more pickle-like interface for decoding a key"""
+        return self.decode(key)
+    # interface
+    sentinel = property(__get_sentinel, __sentinel)
+    pass
+def keymap(args, kwds, kwd_mark=object()):
+    """kwd_mark is a separator between args and kwds"""
+    key = args
+    if kwds:
+        key += (kwd_mark,) + tuple(sorted(kwds.items()))
+    return key
+'''
+class hashmap(keymap):
+    def encode(self, *args, **kwds):
+        return hash(keymap.encode(self, *args, **kwds))
+    def encrypt(self, *args, **kwds):
+        return hash(keymap.encrypt(self, *args, **kwds))
+def hashmap(*args, **kwds):
+    return hash(keymap(*args, **kwds))
+class stringmap(keymap):
+   #def __init__(self, *args, **kwds):
+   #    keymap.__init__(self, *args, **kwds)
+   #    self.typed = False  #XXX: is always typed, so set typed=False
+    def encode(self, *args, **kwds):
+        return str(keymap.encode(self, *args, **kwds))
+    def encrypt(self, *args, **kwds):
+        return str(keymap.encrypt(self, *args, **kwds))
 import dill as pickle
+def picklemap(*args, **kwds): #XXX: is always typed, so set typed=False
+    kwds['typed'] = kwds.get('typed', False)
+    return pickle.dumps(keymap(*args, **kwds))
+def stringmap(*args, **kwds): #XXX: is always typed, so set typed=False
+    kwds['typed'] = kwds.get('typed', False)
+    return str(keymap(*args, **kwds))
+class picklemap(keymap):
+   #def __init__(self, *args, **kwds):
+   #    keymap.__init__(self, *args, **kwds)
+   #    self.typed = False  #XXX: is always typed, so set typed=False
+    def encode(self, *args, **kwds):
+        return pickle.dumps(keymap.encode(self, *args, **kwds))
+    def encrypt(self, *args, **kwds):
+        return pickle.dumps(keymap.encrypt(self, *args, **kwds))
+class Counter(dict):
+    'Mapping where default values are zero'
+    def __missing__(self, key):
+        return 0
+# EOF

branches/decorate/memoize.py

-                      r662
+                      r663
 decorators that cache results to memory, to file, or to a database
 """
+from keymaps import stringmap
 __all__ = ['memoize','memoized','archive_dict','db_dict']
 …
 def memoized(memo=None, serializer=str, tol=None, deep=False, archived=False):
+def memoized(memo=None, keymap=None, tol=None, deep=False, archived=False):
     """Decorator that memoizes a function's return value each time it is called.
     If called later with the same arguments, the memoized value is returned, and
 …
     memo = storage hashmap (default is {})
     serializer = serializing function (e.g. pickle.dumps, but default is str)
+    keymap = cache key encoder (default is keymaps.stringmap(flat=False))
     tol = integer tolerance for rounding (default is None)
     deep = boolean for rounding depth (default is False, i.e. 'shallow')
     archived = boolean for archiving (default is False, i.e. "don't archive")
     """
+    if keymap is None: keymap = stringmap(flat=False)
     if memo is None: memo = archive_dict()
     elif type(memo) is dict: memo = archive_dict(memo)
 …
             try:
                 _args, _kwds = rounded_args(*args, **kwds)
                 argstr = serializer((_args, _kwds))
+                argstr = keymap(*_args, **_kwds)
                 if memo.has_key(argstr):
                     return memo[argstr]
 …
 #FIXME: use cache maxsize algorithms... where dump if maxsize
 #FIXME: can make trash_archive where archives to del
-#FIXME: can have serializer be 'hash' or lambda x:x
-#FIXME: should sort(kwds.items) in argstr; probably add an object to separate
 class memoize(object):
 …
     Can memoize a *method* on an object.
     """
     def __init__(self, memo=None, serializer=str, tol=None, deep=False):
+    def __init__(self, memo=None, keymap=None, tol=None, deep=False):
 #     self.func = func
+      if keymap is None: keymap = stringmap(flat=False)
       if memo is None: memo = archive_dict()
       elif type(memo) is dict: memo = archive_dict(memo)
       self.memo = memo
       self.__serializer = serializer
+      self.__keymap = keymap
       if deep: rounded = deep_round
 …
         try:
           _args, _kwds = self.__rounded_args(*args, **kwds)
           argstr = self.__serializer((_args, _kwds))
+          argstr = self.__keymap(*_args, **_kwds)
           if self.memo.has_key(argstr):
             return self.memo[argstr]

branches/decorate/surrogate.py

-                      r658
+                      r663
-import dill
 from memoize import memoized
+#@memoized(serializer=dill.dumps, tol=0, deep=True) # slower, but more robust
+from keymaps import picklemap
+dumps = picklemap(flat=False)
+#@memoized(keymap=dumps, tol=0, deep=True) # slower, but more robust
 #@memoized(tol=0, deep=True)
 #@memoized(serializer=dill.dumps, archived=True)    # slower, but more robust
+#@memoized(keymap=dumps, archived=True)    # slower, but more robust
 @memoized(archived=True)
 def marc_surr(x):

branches/decorate/test_memoize.py

-                      r658
+                      r663
 #from memoize import memoize
 from timer import timed
+import dill
+from keymaps import picklemap
+dumps = picklemap(flat=False)
 class Spam(object):
     """A simple class with a memoized method"""
     @memoized(serializer=dill.dumps)
+    @memoized(keymap=dumps)
     def eggs(self, *args, **kwds):
         print 'new:', args, kwds
 …
 # here caching saves time in a recursive function...
 @memoized(serializer=dill.dumps)
+@memoized(keymap=dumps)
 @timed()
 def fibonacci(n):
 …
 from numpy import sum, asarray
 @memoized(serializer=dill.dumps, tol=3)
+@memoized(keymap=dumps, tol=3)
 def add(*args):
     print 'new:', args
 …
     return sum(x**2 - y**2)
 cost1 = memoized(serializer=dill.dumps, tol=1)(cost)
 cost0 = memoized(serializer=dill.dumps, tol=0)(cost)
 costD = memoized(serializer=dill.dumps, tol=0, deep=True)(cost)
+cost1 = memoized(keymap=dumps, tol=1)(cost)
+cost0 = memoized(keymap=dumps, tol=0)(cost)
+costD = memoized(keymap=dumps, tol=0, deep=True)(cost)
 print "rounding to one decimals..."
 …
 print "re_dict_memo = %s" % add.memo
 @memoized(serializer=dill.dumps)
+@memoized(keymap=dumps)
 def add(x,y):
     return x+y

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: