Commit a1af3811 authored by Tim Burke's avatar Tim Burke

sharding: Cache shard ranges for object writes

Previously, we issued a GET to the root container for every object PUT,
POST, and DELETE. This puts load on the container server, potentially
leading to timeouts, error limiting, and erroneous 404s (!).

Now, cache the complete set of 'updating' shards, and find the shard for
this particular update in the proxy. Add a new config option,
recheck_updating_shard_ranges, to control the cache time; it defaults to
one hour. Set to 0 to fall back to previous behavior.

Note that we should be able to tolerate stale shard data just fine; we
already have to worry about async pendings that got written down with
one shard but may not get processed until that shard has itself sharded
or shrunk into another shard.

Also note that memcache has a default value limit of 1MiB, which may be
exceeded if a container has thousands of shards. In that case, set()
will act like a delete(), causing increased memcache churn but otherwise
preserving existing behavior. In the future, we may want to add support
for gzipping the cached shard ranges as they should compress well.

Change-Id: Ic7a732146ea19a47669114ad5dbee0bacbe66919
Closes-Bug: 1781291
parent e62f07d9
......@@ -129,6 +129,13 @@ use = egg:swift#proxy
# log_handoffs = true
# recheck_account_existence = 60
# recheck_container_existence = 60
# How long the proxy should cache a set of shard ranges for a container.
# Note that stale shard range info should be fine; updates will still
# eventually make their way to the correct shard. As a result, you can
# usually set this much higher than the existence checks above.
# recheck_updating_shard_ranges = 3600
# object_chunk_size = 65536
# client_chunk_size = 65536
......@@ -45,7 +45,7 @@ from swift.common.wsgi import make_pre_authed_env, make_pre_authed_request
from swift.common.utils import Timestamp, config_true_value, \
public, split_path, list_from_csv, GreenthreadSafeIterator, \
GreenAsyncPile, quorum_size, parse_content_type, \
document_iters_to_http_response_body, ShardRange
document_iters_to_http_response_body, ShardRange, find_shard_range
from swift.common.bufferedhttp import http_connect
from swift.common import constraints
from swift.common.exceptions import ChunkReadTimeout, ChunkWriteTimeout, \
......@@ -67,6 +67,7 @@ from swift.common.storage_policy import POLICIES
def update_headers(response, headers):
......@@ -443,7 +444,7 @@ def get_account_info(env, app, swift_source=None):
return info
def get_cache_key(account, container=None, obj=None):
def get_cache_key(account, container=None, obj=None, shard=None):
Get the keys for both memcache and env['swift.infocache'] (cache_key)
where info about accounts, containers, and objects is cached
......@@ -451,6 +452,9 @@ def get_cache_key(account, container=None, obj=None):
:param account: The name of the account
:param container: The name of the container (or None if account)
:param obj: The name of the object (or None if account or container)
:param shard: Sharding state for the container query; typically 'updating'
or 'listing' (Requires account and container; cannot use
with obj)
:returns: a (native) string cache_key
if six.PY2:
......@@ -468,7 +472,13 @@ def get_cache_key(account, container=None, obj=None):
container = to_native(container)
obj = to_native(obj)
if obj:
if shard:
if not (account and container):
raise ValueError('Shard cache key requires account and container')
if obj:
raise ValueError('Shard cache key cannot have obj')
cache_key = 'shard-%s/%s/%s' % (shard, account, container)
elif obj:
if not (account and container):
raise ValueError('Object cache key requires account and container')
cache_key = 'object/%s/%s/%s' % (account, container, obj)
......@@ -2191,3 +2201,55 @@ class Controller(object):
"Failed to get shard ranges from %s: invalid data: %r",
req.path_qs, err)
return None
def _get_update_shard(self, req, account, container, obj):
Find the appropriate shard range for an object update.
Note that this fetches and caches (in both the per-request infocache
and memcache, if available) all shard ranges for the given root
container so we won't have to contact the container DB for every write.
:param req: original Request instance.
:param account: account from which shard ranges should be fetched.
:param container: container from which shard ranges should be fetched.
:param obj: object getting updated.
:return: an instance of :class:`swift.common.utils.ShardRange`,
or None if the update should go back to the root
if not
# caching is disabled; fall back to old behavior
shard_ranges = self._get_shard_ranges(
req, account, container, states='updating', includes=obj)
if not shard_ranges:
return None
return shard_ranges[0]
cache_key = get_cache_key(account, container, shard='updating')
infocache = req.environ.setdefault('swift.infocache', {})
memcache = getattr(, 'memcache', None) or req.environ.get(
cached_ranges = infocache.get(cache_key)
if cached_ranges is None and memcache:
cached_ranges = memcache.get(cache_key)
if cached_ranges:
shard_ranges = [
for shard_range in cached_ranges]
shard_ranges = self._get_shard_ranges(
req, account, container, states='updating')
if shard_ranges:
cached_ranges = [dict(sr) for sr in shard_ranges]
# went to disk; cache it
if memcache:
memcache.set(cache_key, cached_ranges,
if not shard_ranges:
return None
infocache[cache_key] = tuple(cached_ranges)
return find_shard_range(obj, shard_ranges)
......@@ -271,13 +271,12 @@ class BaseObjectController(Controller):
# find the sharded container to which we'll send the update
db_state = container_info.get('sharding_state', 'unsharded')
if db_state in ('sharded', 'sharding'):
shard_ranges = self._get_shard_ranges(
req, self.account_name, self.container_name,
includes=self.object_name, states='updating')
if shard_ranges:
shard_range = self._get_update_shard(
req, self.account_name, self.container_name, self.object_name)
if shard_range:
partition, nodes =
shard_ranges[0].account, shard_ranges[0].container)
return partition, nodes, shard_ranges[0].name
shard_range.account, shard_range.container)
return partition, nodes,
return container_info['partition'], container_info['nodes'], None
......@@ -40,7 +40,8 @@ from swift.common.constraints import check_utf8, valid_api_version
from swift.proxy.controllers import AccountController, ContainerController, \
ObjectControllerRouter, InfoController
from swift.proxy.controllers.base import get_container_info, NodeIter, \
from swift.common.swob import HTTPBadRequest, HTTPForbidden, \
HTTPMethodNotAllowed, HTTPNotFound, HTTPPreconditionFailed, \
HTTPServerError, HTTPException, Request, HTTPServiceUnavailable, \
......@@ -202,6 +203,9 @@ class Application(object):
self.recheck_container_existence = \
self.recheck_updating_shard_ranges = \
self.recheck_account_existence = \
......@@ -20,15 +20,16 @@ import uuid
from nose import SkipTest
from swift.common import direct_client
from swift.common import direct_client, utils
from swift.common.manager import Manager
from swift.common.memcached import MemcacheRing
from swift.common.direct_client import DirectClientException
from swift.common.utils import ShardRange, parse_db_filename, get_db_files, \
quorum_size, config_true_value, Timestamp
from swift.container.backend import ContainerBroker, UNSHARDED, SHARDING
from swift.common import utils
from swift.common.manager import Manager
from swiftclient import client, get_auth, ClientException
from swift.proxy.controllers.base import get_cache_key
from swift.proxy.controllers.obj import num_container_updates
from test import annotate_failure
from test.probe.brain import BrainSplitter
......@@ -116,6 +117,7 @@ class BaseTestContainerSharding(ReplProbeTest):
self.sharders = Manager(['container-sharder'])
self.internal_client = self.make_internal_client()
self.memcache = MemcacheRing([''])
def stop_container_servers(self, node_numbers=None):
if node_numbers:
......@@ -835,6 +837,9 @@ class TestContainerSharding(BaseTestContainerSharding):
self.assert_container_listing(more_obj_names + obj_names)
self.assert_container_object_count(len(more_obj_names + obj_names))
# Before writing, kill the cache
self.account, self.container_name, shard='updating'))
# add another object that lands in the first of the new sub-shards
......@@ -1217,6 +1222,10 @@ class TestContainerSharding(BaseTestContainerSharding):
# now look up the shard target for subsequent updates
# Before writing, kill the cache
self.account, self.container_name, shard='updating'))
# delete objects from first shard range
first_shard_objects = [obj_name for obj_name in obj_names
if obj_name <= orig_shard_ranges[0].upper]
......@@ -1243,6 +1252,11 @@ class TestContainerSharding(BaseTestContainerSharding):
# to a GET for a redirect target, the object update will default to
# being targeted at the root container
# Before writing, kill the cache
self.account, self.container_name, shard='updating'))
async_pendings = self.gather_async_pendings(
......@@ -1746,6 +1760,9 @@ class TestContainerSharding(BaseTestContainerSharding):
shard_part, shard_nodes = self.get_part_and_node_numbers(
# Before writing, kill the cache
self.account, self.container_name, shard='updating'))
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment