From 18ac1a8e5aa83b276ea5b9702e5f38ecae4e57e7 Mon Sep 17 00:00:00 2001
From: Haridas Narayanaswamy <haridas.nss@gmail.com>
Date: Tue, 24 Mar 2015 08:28:56 +0530
Subject: [PATCH 1/4] Added New Client with Ketama based Consistent Hashing
 support.

TODO:
    - Add more documentation
    - Way to test the client with multiple memcache servers.
---
 memcache.py | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 194 insertions(+)
diff --git a/memcache.py b/memcache.py
index 80770ed..f9f2175 100644
--- a/memcache.py
+++ b/memcache.py
@@ -1412,6 +1412,200 @@ def __str__(self):
             return "unix:%s%s" % (self.address, d)
 
 
+class KetamaClient(Client):
+    """ Memcach client with Consistent hashing support.
+
+    We are using the ketama algorithm to implement the consistent hashing.
+
+    The ideal use case for this client is when your caching servers are
+    going to gets added or removed on time and you don't want to hot load
+    most of the hash keys at each time. If you are using consistent hashing
+    based client when ever there is change in the number of caching servers,
+    only very few percentage of cache miss happens across all servers.
+    By adjusting the SERVER_WEIGHT for your environment you can get least miss
+    rate.
+
+    How Ketama Works:
+        Ketama algorithm uses very simple algorithm to achieve the consistent
+        hashing. What it does is  that,
+
+        1. Preset the total number of Keys that we are going to save on the
+           hash server. eg; Total of 2 ** 16 or so.
+        2. Logically we put all this hash keys on a ring in ascending order.
+        3. For each server we give ring slots ( Hash key, or a position on the
+           ring.) Also we place same server in multiple places of the ring
+           to get better key distribution on a server.
+        4. When we want to place a value on the hash server, we give a slot
+           for that vale on the ring, and then we find a next closest server
+           on the ring by searching clock-wise. And then we pick that server
+           to actually save key:value pair.
+        5. Reading time, the same process happens. We find the ring slot for
+           the given key, and find the next server on the ring by searching
+           clock-wise. We know that we placed the value for that key on that
+           server.
+        6. So when we add or remove one server, some server slots getting
+           removed from the ring or some new one gets added. After this update
+           there is chances that some keys will miss, since we stop searching
+           for another server on the ring once we get the first server by doing
+           the clock-wise lookup. But the miss rate will be #Keys / RING_SIZE.
+           In case of non-consistent hashing method, since the hash function
+           depends on the number of servers, the majority of the keys misses if
+           we add or remove server.
+
+    TODO: Improve the documentation, add test cases.
+    """
+    # For this Consistent hashing client, the weight of the server means number
+    # of times the same server is placed on the different slots of the ketama
+    # hash key ring. This will make sure the each server have well normalized
+    # key distribution.
+    DEFAULT_SERVER_WEIGHT = 200
+
+    # Total number of slots on the ring.
+    # If addition or deletion of a new server only causes 1 to 5 percentage
+    # cache miss on the current configuration. ie; K / RING_SIZE 
+    # where K means total  keys stored on the ring.
+    RING_SIZE = 2 ** 16
+
+    def __init__(self, *args, **kwargs):
+        # Mapping between ring slot -> server.
+        self._ketama_server_ring = {}
+
+        # Sorted server slots on top of the virtual ring.
+        self._ketama_server_slots = []
+
+        super(KetamaClient, self).__init__(*args, **kwargs)
+
+    def add_server(self, server):
+        """
+        Add new server to the client.
+
+        @param servers: server host in <IP>:<PORT> format.
+                        or in tuple of (<IP>:<PORT>, weight)
+        """
+        server_obj = memcache._Host(
+            server if isinstance(server, tuple) else (
+                server, self.DEFAULT_SERVER_WEIGHT),
+            self.debug, dead_retry=self.dead_retry,
+            socket_timeout=self.socket_timeout,
+            flush_on_reconnect=self.flush_on_reconnect)
+
+        self._place_server_on_ring(server_obj)
+
+    def _get_server(self, key):
+        """
+        Get the memcache server corresponding to the given key.
+
+        Here we find the first server on the ring by searching clock-wise
+        from the given ring slot corresponding to the key.
+
+        @param key: The input query.
+
+        @return A tuple with (server_obj, key).
+        """
+        # map the key on to the ring slot space.
+        h_key = self._generate_ring_slot(key)
+
+        for slot in self._ketama_server_slots:
+            if h_key <= slot:
+                server = self._ketama_server_ring[slot]
+                if server.connect():
+                    return (server, key)
+
+        # Even after allocating the server, if the h_key won't fit
+        # on any server, then pick the first server on the ring.
+        server = (self._ketama_server_ring[self._ketama_server_slots[0]]
+                  if self._ketama_server_slots else None)
+
+        server and server.connect()
+        return server, key
+
+    def set_servers(self, servers):
+        """
+        Add a pool of servers into the client.
+
+        @param servers: List of server hosts in <IP>:<PORT> format.
+                        or
+                        List of tuples with each tuple of the format
+                        (<IP>:<PORT>, weight)
+        """
+        # Set the default weight if weight isn't passed.
+        self.servers = [memcache._Host(
+            s if isinstance(s, tuple) else (s, self.DEFAULT_SERVER_WEIGHT),
+            self.debug, dead_retry=self.dead_retry,
+            socket_timeout=self.socket_timeout,
+            flush_on_reconnect=self.flush_on_reconnect) for s in servers]
+
+        # Place all the servers on rings based on the slot allocation
+        # specifications.
+        map(self._place_server_on_ring, self.servers)
+
+    def _place_server_on_ring(self, server):
+        """
+        Place given server on the ring.
+
+        Based on the weight of the server, we generate multiple slots for
+        one key. This will give better key distribution.
+
+        @param server: An instance of :class:~`memcache._Host`.
+        """
+        server_slots = self._get_server_slots_on_ring(server)
+        for slot in server_slots:
+            if slot not in self._ketama_server_ring:
+                self._ketama_server_ring[slot] = server
+                self._ketama_server_slots.append(slot)
+            else:
+                # There is a key collection(<<<1% chance).
+                # Discarding this scenario now.
+                # TODO: Handle it.
+                pass
+
+        # Sort the server slot keys to make it a ring.
+        self._ketama_server_slots.sort()
+
+    def _get_server_slots_on_ring(self, server):
+        """
+        Returns list of slot on the ring for given server.
+
+        This make sure that the slots won't collide with others server.
+
+        @param: server An object of :class:~`memcache._Host`.
+        @return: list of slots on the ring.
+        """
+        server_slots = []
+
+        for i in range(0, server.weight):
+            # TODO: Keep a UUID id for each servers to avoid key collision.
+            server_key = "{}_{}".format("{}:{}".format(server.ip,
+                                                       server.port), i)
+
+            server_slots.append(self._generate_ring_slot(server_key))
+
+        return server_slots
+
+    def _generate_ring_slot(self, key):
+        """
+        Hash function which give random slots on the ring. Hash functon make
+        sure that the key distribution is even as much as possible.
+
+        @param key: Key which need to be mapped to the hash space.
+        @type key: str
+
+        @return: hash key corresponding to the `key`
+        """
+        #TODO: Make it more general.
+
+        # Simple hash method using python's internal hash algorithm.
+        #h_key = hash(key) & 0xffff
+
+        # crc32 based hashing
+        #h_key = ((crc32(key) & 0xffffffff) >> 16) & 0xffff
+
+        # For better randomness
+        h_key = ((crc32(key) & 0xffffffff)) & 0xffff
+
+        return h_key
+
+
 def _doctest():
     import doctest
     import memcache

From ed5a2000f5c6cf0cd35f6cefee48653942f35b86 Mon Sep 17 00:00:00 2001
From: Sergio Martins <Sergio.Martins@hypert.com>
Date: Thu, 26 Mar 2015 12:43:23 -0400
Subject: [PATCH 2/4] Fixed bugs and improved comments.

    - Used description from Richard's blog (with his permission) where appropriate
    - Fixed usage of crc32 function
    - Fixed issue with crc32 in python3
    - Fixed _get_server() not handling (serverhash, key) tuple
    - Fixed usage of _Host class
    - Removed unused function add_server()
    - Ran tests using KetamaClass (not included in this commit)
    - Tested against multiple memcached servers
---
 memcache.py | 137 +++++++++++++++++-----------------------------------
 1 file changed, 43 insertions(+), 94 deletions(-)

diff --git a/memcache.py b/memcache.py
index f9f2175..014d681 100644
--- a/memcache.py
+++ b/memcache.py
@@ -1415,56 +1415,31 @@ def __str__(self):
 class KetamaClient(Client):
     """ Memcach client with Consistent hashing support.
 
-    We are using the ketama algorithm to implement the consistent hashing.
-
-    The ideal use case for this client is when your caching servers are
-    going to gets added or removed on time and you don't want to hot load
-    most of the hash keys at each time. If you are using consistent hashing
-    based client when ever there is change in the number of caching servers,
-    only very few percentage of cache miss happens across all servers.
-    By adjusting the SERVER_WEIGHT for your environment you can get least miss
-    rate.
+    Ketama is an implementation of a consistent hashing algorithm, meaning you
+    can add or remove servers from the memcached pool without causing a
+    complete remap of all keys. It was designed by Richard Jones.
 
     How Ketama Works:
-        Ketama algorithm uses very simple algorithm to achieve the consistent
-        hashing. What it does is  that,
-
-        1. Preset the total number of Keys that we are going to save on the
-           hash server. eg; Total of 2 ** 16 or so.
-        2. Logically we put all this hash keys on a ring in ascending order.
-        3. For each server we give ring slots ( Hash key, or a position on the
-           ring.) Also we place same server in multiple places of the ring
-           to get better key distribution on a server.
-        4. When we want to place a value on the hash server, we give a slot
-           for that vale on the ring, and then we find a next closest server
-           on the ring by searching clock-wise. And then we pick that server
-           to actually save key:value pair.
-        5. Reading time, the same process happens. We find the ring slot for
-           the given key, and find the next server on the ring by searching
-           clock-wise. We know that we placed the value for that key on that
-           server.
-        6. So when we add or remove one server, some server slots getting
-           removed from the ring or some new one gets added. After this update
-           there is chances that some keys will miss, since we stop searching
-           for another server on the ring once we get the first server by doing
-           the clock-wise lookup. But the miss rate will be #Keys / RING_SIZE.
-           In case of non-consistent hashing method, since the hash function
-           depends on the number of servers, the majority of the keys misses if
-           we add or remove server.
+        1. Hash each server to several unsigned integer values.
+        2. Conceptually, these numbers are placed on a ring.
+        3. Each number links to the server it was hashed from, so servers
+           appear at several points on the ring.
+        4. To map a key->server, hash the key to an unsigned integer and find
+           the next biggest number on the ring. That's your server. If
+           the number is too big, roll over to the first server in the ring
+        When a server is added or removed, only some keys will be remapped to
+        different servers. With the original modula algorithm, all keys
+        would have been remapped.
 
     TODO: Improve the documentation, add test cases.
     """
-    # For this Consistent hashing client, the weight of the server means number
-    # of times the same server is placed on the different slots of the ketama
-    # hash key ring. This will make sure the each server have well normalized
-    # key distribution.
+    # For this Consistent hashing client, the weight of the server is the
+    # number of entries it will have in the ring. This will make sure
+    # each server has well normalized key distribution.
     DEFAULT_SERVER_WEIGHT = 200
 
     # Total number of slots on the ring.
-    # If addition or deletion of a new server only causes 1 to 5 percentage
-    # cache miss on the current configuration. ie; K / RING_SIZE 
-    # where K means total  keys stored on the ring.
-    RING_SIZE = 2 ** 16
+    RING_SIZE = 2**16
 
     def __init__(self, *args, **kwargs):
         # Mapping between ring slot -> server.
@@ -1475,53 +1450,38 @@ def __init__(self, *args, **kwargs):
 
         super(KetamaClient, self).__init__(*args, **kwargs)
 
-    def add_server(self, server):
-        """
-        Add new server to the client.
-
-        @param servers: server host in <IP>:<PORT> format.
-                        or in tuple of (<IP>:<PORT>, weight)
-        """
-        server_obj = memcache._Host(
-            server if isinstance(server, tuple) else (
-                server, self.DEFAULT_SERVER_WEIGHT),
-            self.debug, dead_retry=self.dead_retry,
-            socket_timeout=self.socket_timeout,
-            flush_on_reconnect=self.flush_on_reconnect)
-
-        self._place_server_on_ring(server_obj)
-
     def _get_server(self, key):
         """
         Get the memcache server corresponding to the given key.
 
-        Here we find the first server on the ring by searching clock-wise
-        from the given ring slot corresponding to the key.
+        @param key: key, or (server_hash, key) tuple if you want to specify
+                    a hash to determine which server is selected
 
-        @param key: The input query.
-
-        @return A tuple with (server_obj, key).
+        @return A tuple with (server_obj, key), or (None, None) if no servers
+                were available.
         """
         # map the key on to the ring slot space.
         h_key = self._generate_ring_slot(key)
 
+        if isinstance(key, tuple):
+            serverhash, key = key
+
         for slot in self._ketama_server_slots:
             if h_key <= slot:
                 server = self._ketama_server_ring[slot]
                 if server.connect():
                     return (server, key)
 
-        # Even after allocating the server, if the h_key won't fit
-        # on any server, then pick the first server on the ring.
-        server = (self._ketama_server_ring[self._ketama_server_slots[0]]
-                  if self._ketama_server_slots else None)
+        # Roll over to the first available server
+        for server in self._ketama_server_ring.values():
+            if server and server.connect():
+                return (server, key)
 
-        server and server.connect()
-        return server, key
+        return (None, None)
 
     def set_servers(self, servers):
         """
-        Add a pool of servers into the client.
+        Set servers for this client.
 
         @param servers: List of server hosts in <IP>:<PORT> format.
                         or
@@ -1529,7 +1489,7 @@ def set_servers(self, servers):
                         (<IP>:<PORT>, weight)
         """
         # Set the default weight if weight isn't passed.
-        self.servers = [memcache._Host(
+        self.servers = [_Host(
             s if isinstance(s, tuple) else (s, self.DEFAULT_SERVER_WEIGHT),
             self.debug, dead_retry=self.dead_retry,
             socket_timeout=self.socket_timeout,
@@ -1541,10 +1501,9 @@ def set_servers(self, servers):
 
     def _place_server_on_ring(self, server):
         """
-        Place given server on the ring.
-
-        Based on the weight of the server, we generate multiple slots for
-        one key. This will give better key distribution.
+        Based on the weight of the server, generate multiple slots for
+        each server. This ensures when a server is added/remove keys won't all
+        remap to the same new server
 
         @param server: An instance of :class:~`memcache._Host`.
         """
@@ -1554,9 +1513,7 @@ def _place_server_on_ring(self, server):
                 self._ketama_server_ring[slot] = server
                 self._ketama_server_slots.append(slot)
             else:
-                # There is a key collection(<<<1% chance).
-                # Discarding this scenario now.
-                # TODO: Handle it.
+                # TODO: Handle collisions
                 pass
 
         # Sort the server slot keys to make it a ring.
@@ -1584,27 +1541,19 @@ def _get_server_slots_on_ring(self, server):
 
     def _generate_ring_slot(self, key):
         """
-        Hash function which give random slots on the ring. Hash functon make
-        sure that the key distribution is even as much as possible.
+        Returns a slot in the ring for the given key.
 
-        @param key: Key which need to be mapped to the hash space.
+        @param key: Key which needs to be mapped to the ring.
         @type key: str
 
-        @return: hash key corresponding to the `key`
+        @return: hash value corresponding to the `key`
         """
-        #TODO: Make it more general.
-
-        # Simple hash method using python's internal hash algorithm.
-        #h_key = hash(key) & 0xffff
-
-        # crc32 based hashing
-        #h_key = ((crc32(key) & 0xffffffff) >> 16) & 0xffff
-
-        # For better randomness
-        h_key = ((crc32(key) & 0xffffffff)) & 0xffff
-
-        return h_key
 
+        if isinstance(key, tuple):
+            serverhash, key = key
+        else:
+            serverhash = binascii.crc32(key.encode('ascii')) & 0xffffffff
+        return serverhash % self.RING_SIZE
 
 def _doctest():
     import doctest

From 51fdfc8a10361fed155af30692eac64fa3b6369d Mon Sep 17 00:00:00 2001
From: Sergio Martins <Sergio.Martins@hypert.com>
Date: Thu, 26 Mar 2015 15:16:21 -0400
Subject: [PATCH 3/4] Added testing for KetamaClient

---
 tests/test_memcache.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/test_memcache.py b/tests/test_memcache.py
index c12c528..9e233a2 100644
--- a/tests/test_memcache.py
+++ b/tests/test_memcache.py
@@ -2,7 +2,7 @@
 
 from unittest import TestCase
 
-from memcache import Client, SERVER_MAX_KEY_LENGTH
+from memcache import Client, KetamaClient, SERVER_MAX_KEY_LENGTH
 
 try:
     _str_cls = basestring
@@ -31,10 +31,10 @@ def __eq__(self, other):
 
 
 class TestMemcache(TestCase):
-    def setUp(self):
+    def setUp(self, client_class=Client):
         # TODO: unix socket server stuff
         servers = ["127.0.0.1:11211"]
-        self.mc = Client(servers, debug=1)
+        self.mc = client_class(servers, debug=1)
         pass
 
     def check_setget(self, key, val, noreply=False):
@@ -119,6 +119,12 @@ def test_sending_key_too_long(self):
         self.mc.set('a' * SERVER_MAX_KEY_LENGTH, 1, noreply=True)
 
 
+class TestMemcacheKetama(TestMemcache):
+    def setUp(self):
+        # Run all the tests again using the KetamaClient
+        super(TestMemcacheKetama, self).setUp(KetamaClient)
+
+
 if __name__ == "__main__":
     # failures = 0
     # print("Testing docstrings...")

From 3465e73632b702566a80e903dc3b88f5adb3e80a Mon Sep 17 00:00:00 2001
From: Sergio Martins <Sergio.Martins@hypert.com>
Date: Thu, 26 Mar 2015 16:12:20 -0400
Subject: [PATCH 4/4] Fix format string issue in python2.6

---
 memcache.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/memcache.py b/memcache.py
index 014d681..068e4ab 100644
--- a/memcache.py
+++ b/memcache.py
@@ -1531,10 +1531,7 @@ def _get_server_slots_on_ring(self, server):
         server_slots = []
 
         for i in range(0, server.weight):
-            # TODO: Keep a UUID id for each servers to avoid key collision.
-            server_key = "{}_{}".format("{}:{}".format(server.ip,
-                                                       server.port), i)
-
+            server_key = "%s:%d_%d" % (server.ip, server.port, i)
             server_slots.append(self._generate_ring_slot(server_key))
 
         return server_slots