Commit cf18e1f4 authored by Zuul's avatar Zuul Committed by Gerrit Code Review

Merge "sharding: Cache shard ranges for object writes"

parents d147ab84 a1af3811
......@@ -129,6 +129,13 @@ use = egg:swift#proxy
# log_handoffs = true
# recheck_account_existence = 60
# recheck_container_existence = 60
#
# How long the proxy should cache a set of shard ranges for a container.
# Note that stale shard range info should be fine; updates will still
# eventually make their way to the correct shard. As a result, you can
# usually set this much higher than the existence checks above.
# recheck_updating_shard_ranges = 3600
#
# object_chunk_size = 65536
# client_chunk_size = 65536
#
......
......@@ -45,7 +45,7 @@ from swift.common.wsgi import make_pre_authed_env, make_pre_authed_request
from swift.common.utils import Timestamp, config_true_value, \
public, split_path, list_from_csv, GreenthreadSafeIterator, \
GreenAsyncPile, quorum_size, parse_content_type, \
document_iters_to_http_response_body, ShardRange
document_iters_to_http_response_body, ShardRange, find_shard_range
from swift.common.bufferedhttp import http_connect
from swift.common import constraints
from swift.common.exceptions import ChunkReadTimeout, ChunkWriteTimeout, \
......@@ -67,6 +67,7 @@ from swift.common.storage_policy import POLICIES
DEFAULT_RECHECK_ACCOUNT_EXISTENCE = 60 # seconds
DEFAULT_RECHECK_CONTAINER_EXISTENCE = 60 # seconds
DEFAULT_RECHECK_UPDATING_SHARD_RANGES = 3600 # seconds
def update_headers(response, headers):
......@@ -443,7 +444,7 @@ def get_account_info(env, app, swift_source=None):
return info
def get_cache_key(account, container=None, obj=None):
def get_cache_key(account, container=None, obj=None, shard=None):
"""
Get the keys for both memcache and env['swift.infocache'] (cache_key)
where info about accounts, containers, and objects is cached
......@@ -451,6 +452,9 @@ def get_cache_key(account, container=None, obj=None):
:param account: The name of the account
:param container: The name of the container (or None if account)
:param obj: The name of the object (or None if account or container)
:param shard: Sharding state for the container query; typically 'updating'
or 'listing' (Requires account and container; cannot use
with obj)
:returns: a (native) string cache_key
"""
if six.PY2:
......@@ -468,7 +472,13 @@ def get_cache_key(account, container=None, obj=None):
container = to_native(container)
obj = to_native(obj)
if obj:
if shard:
if not (account and container):
raise ValueError('Shard cache key requires account and container')
if obj:
raise ValueError('Shard cache key cannot have obj')
cache_key = 'shard-%s/%s/%s' % (shard, account, container)
elif obj:
if not (account and container):
raise ValueError('Object cache key requires account and container')
cache_key = 'object/%s/%s/%s' % (account, container, obj)
......@@ -2191,3 +2201,55 @@ class Controller(object):
"Failed to get shard ranges from %s: invalid data: %r",
req.path_qs, err)
return None
def _get_update_shard(self, req, account, container, obj):
"""
Find the appropriate shard range for an object update.
Note that this fetches and caches (in both the per-request infocache
and memcache, if available) all shard ranges for the given root
container so we won't have to contact the container DB for every write.
:param req: original Request instance.
:param account: account from which shard ranges should be fetched.
:param container: container from which shard ranges should be fetched.
:param obj: object getting updated.
:return: an instance of :class:`swift.common.utils.ShardRange`,
or None if the update should go back to the root
"""
if not self.app.recheck_updating_shard_ranges:
# caching is disabled; fall back to old behavior
shard_ranges = self._get_shard_ranges(
req, account, container, states='updating', includes=obj)
if not shard_ranges:
return None
return shard_ranges[0]
cache_key = get_cache_key(account, container, shard='updating')
infocache = req.environ.setdefault('swift.infocache', {})
memcache = getattr(self.app, 'memcache', None) or req.environ.get(
'swift.cache')
cached_ranges = infocache.get(cache_key)
if cached_ranges is None and memcache:
cached_ranges = memcache.get(cache_key)
if cached_ranges:
shard_ranges = [
ShardRange.from_dict(shard_range)
for shard_range in cached_ranges]
else:
shard_ranges = self._get_shard_ranges(
req, account, container, states='updating')
if shard_ranges:
cached_ranges = [dict(sr) for sr in shard_ranges]
# went to disk; cache it
if memcache:
memcache.set(cache_key, cached_ranges,
time=self.app.recheck_updating_shard_ranges)
if not shard_ranges:
return None
infocache[cache_key] = tuple(cached_ranges)
return find_shard_range(obj, shard_ranges)
......@@ -271,13 +271,12 @@ class BaseObjectController(Controller):
# find the sharded container to which we'll send the update
db_state = container_info.get('sharding_state', 'unsharded')
if db_state in ('sharded', 'sharding'):
shard_ranges = self._get_shard_ranges(
req, self.account_name, self.container_name,
includes=self.object_name, states='updating')
if shard_ranges:
shard_range = self._get_update_shard(
req, self.account_name, self.container_name, self.object_name)
if shard_range:
partition, nodes = self.app.container_ring.get_nodes(
shard_ranges[0].account, shard_ranges[0].container)
return partition, nodes, shard_ranges[0].name
shard_range.account, shard_range.container)
return partition, nodes, shard_range.name
return container_info['partition'], container_info['nodes'], None
......
......@@ -40,7 +40,8 @@ from swift.common.constraints import check_utf8, valid_api_version
from swift.proxy.controllers import AccountController, ContainerController, \
ObjectControllerRouter, InfoController
from swift.proxy.controllers.base import get_container_info, NodeIter, \
DEFAULT_RECHECK_CONTAINER_EXISTENCE, DEFAULT_RECHECK_ACCOUNT_EXISTENCE
DEFAULT_RECHECK_CONTAINER_EXISTENCE, DEFAULT_RECHECK_ACCOUNT_EXISTENCE, \
DEFAULT_RECHECK_UPDATING_SHARD_RANGES
from swift.common.swob import HTTPBadRequest, HTTPForbidden, \
HTTPMethodNotAllowed, HTTPNotFound, HTTPPreconditionFailed, \
HTTPServerError, HTTPException, Request, HTTPServiceUnavailable, \
......@@ -202,6 +203,9 @@ class Application(object):
self.recheck_container_existence = \
int(conf.get('recheck_container_existence',
DEFAULT_RECHECK_CONTAINER_EXISTENCE))
self.recheck_updating_shard_ranges = \
int(conf.get('recheck_updating_shard_ranges',
DEFAULT_RECHECK_UPDATING_SHARD_RANGES))
self.recheck_account_existence = \
int(conf.get('recheck_account_existence',
DEFAULT_RECHECK_ACCOUNT_EXISTENCE))
......
......@@ -20,15 +20,16 @@ import uuid
from nose import SkipTest
from swift.common import direct_client
from swift.common import direct_client, utils
from swift.common.manager import Manager
from swift.common.memcached import MemcacheRing
from swift.common.direct_client import DirectClientException
from swift.common.utils import ShardRange, parse_db_filename, get_db_files, \
quorum_size, config_true_value, Timestamp
from swift.container.backend import ContainerBroker, UNSHARDED, SHARDING
from swift.common import utils
from swift.common.manager import Manager
from swiftclient import client, get_auth, ClientException
from swift.proxy.controllers.base import get_cache_key
from swift.proxy.controllers.obj import num_container_updates
from test import annotate_failure
from test.probe.brain import BrainSplitter
......@@ -116,6 +117,7 @@ class BaseTestContainerSharding(ReplProbeTest):
self.brain.put_container(policy_index=int(self.policy))
self.sharders = Manager(['container-sharder'])
self.internal_client = self.make_internal_client()
self.memcache = MemcacheRing(['127.0.0.1:11211'])
def stop_container_servers(self, node_numbers=None):
if node_numbers:
......@@ -835,6 +837,9 @@ class TestContainerSharding(BaseTestContainerSharding):
self.assert_container_listing(more_obj_names + obj_names)
self.assert_container_object_count(len(more_obj_names + obj_names))
# Before writing, kill the cache
self.memcache.delete(get_cache_key(
self.account, self.container_name, shard='updating'))
# add another object that lands in the first of the new sub-shards
self.put_objects(['alpha'])
......@@ -1217,6 +1222,10 @@ class TestContainerSharding(BaseTestContainerSharding):
# now look up the shard target for subsequent updates
self.assert_container_listing(obj_names)
# Before writing, kill the cache
self.memcache.delete(get_cache_key(
self.account, self.container_name, shard='updating'))
# delete objects from first shard range
first_shard_objects = [obj_name for obj_name in obj_names
if obj_name <= orig_shard_ranges[0].upper]
......@@ -1243,6 +1252,11 @@ class TestContainerSharding(BaseTestContainerSharding):
# to a GET for a redirect target, the object update will default to
# being targeted at the root container
self.stop_container_servers()
# Before writing, kill the cache
self.memcache.delete(get_cache_key(
self.account, self.container_name, shard='updating'))
self.put_objects([beta])
self.brain.servers.start()
async_pendings = self.gather_async_pendings(
......@@ -1746,6 +1760,9 @@ class TestContainerSharding(BaseTestContainerSharding):
shard_part, shard_nodes = self.get_part_and_node_numbers(
shard_ranges[1])
self.brain.servers.stop(number=shard_nodes[2])
# Before writing, kill the cache
self.memcache.delete(get_cache_key(
self.account, self.container_name, shard='updating'))
self.delete_objects(['alpha'])
self.put_objects(['beta'])
self.assert_container_listing(['beta'])
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment