Commit 3182976d authored by Jonas Genannt's avatar Jonas Genannt

Imported Upstream version 0.9.13

parent 37b007fb
Metadata-Version: 1.0
Name: carbon
Version: 0.9.12
Version: 0.9.13
Summary: Backend data caching and persistence daemon for Graphite
Home-page: http://graphite-project.github.com
Author: Chris Davis
......
......@@ -109,7 +109,7 @@ class StdinMetricsReader(LineReceiver):
datapoint = (float(timestamp), float(value))
assert datapoint[1] == datapoint[1] # filter out NaNs
client_manager.sendDatapoint(metric, datapoint)
except:
except ValueError:
log.err(None, 'Dropping invalid line: %s' % line)
def connectionLost(self, reason):
......
......@@ -32,4 +32,12 @@
#
# <env>.applications.<app>.all.<app_metric> (60) = sum <env>.applications.<app>.*.<<app_metric>>
#
# It is also possible to use regular expressions. Following the example above
# when using:
#
# <env>.applications.<app>.<domain>.requests (60) = sum <env>.applications.<app>.<domain>\d{2}.requests
#
# You will end up with 'prod.applications.apache.www.requests' instead of
# 'prod.applications.apache.all.requests'.
#
# Note that any time this file is modified, it will be re-read automatically.
......@@ -30,7 +30,8 @@
#
#LOCAL_DATA_DIR = /opt/graphite/storage/whisper/
# Enable daily log rotation. If disabled, a kill -HUP can be used after a manual rotate
# Enable daily log rotation. If disabled, carbon will automatically re-open
# the file if it's rotated out of place (e.g. by logrotate daemon)
ENABLE_LOGROTATION = True
# Specify the user to drop privileges to
......@@ -56,7 +57,7 @@ MAX_UPDATES_PER_SECOND = 500
# If defined, this changes the MAX_UPDATES_PER_SECOND in Carbon when a
# stop/shutdown is initiated. This helps when MAX_UPDATES_PER_SECOND is
# relatively low and carbon has cached a lot of updates; it enables the carbon
# daemon to shutdown more quickly.
# daemon to shutdown more quickly.
# MAX_UPDATES_PER_SECOND_ON_SHUTDOWN = 1000
# Softly limits the number of whisper files that get created each minute.
......@@ -266,10 +267,19 @@ DESTINATIONS = 127.0.0.1:2004
MAX_DATAPOINTS_PER_MESSAGE = 500
MAX_QUEUE_SIZE = 10000
# This is the percentage that the queue must be empty before it will accept
# more messages. For a larger site, if the queue is very large it makes sense
# to tune this to allow for incoming stats. So if you have an average
# flow of 100k stats/minute, and a MAX_QUEUE_SIZE of 3,000,000, it makes sense
# to allow stats to start flowing when you've cleared the queue to 95% since
# you should have space to accommodate the next minute's worth of stats
# even before the relay incrementally clears more of the queue
QUEUE_LOW_WATERMARK_PCT = 0.8
# Set this to False to drop datapoints when any send queue (sending datapoints
# to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the
# default) then sockets over which metrics are received will temporarily stop accepting
# data until the send queues fall below 80% MAX_QUEUE_SIZE.
# data until the send queues fall below QUEUE_LOW_WATERMARK_PCT * MAX_QUEUE_SIZE.
USE_FLOW_CONTROL = True
# Set this to True to enable whitelisting and blacklisting of metrics in
......@@ -296,8 +306,14 @@ LOG_LISTENER_CONNECTIONS = True
# If set true, metric received will be forwarded to DESTINATIONS in addition to
# the output of the aggregation rules. If set false the carbon-aggregator will
# only ever send the output of aggregation.
FORWARD_ALL = True
# only ever send the output of aggregation. Default value is set to false and will not forward
FORWARD_ALL = False
# Filenames of the configuration files to use for this instance of aggregator.
# Filenames are relative to CONF_DIR.
#
# AGGREGATION_RULES = aggregation-rules.conf
# REWRITE_RULES = rewrite-rules.conf
# This is a list of carbon daemons we will send any relayed or
# generated metrics to. The default provided would send to a single
......@@ -305,7 +321,7 @@ FORWARD_ALL = True
# use multiple carbon-cache instances then it would look like this:
#
# DESTINATIONS = 127.0.0.1:2004:a, 127.0.0.1:2104:b
#
#
# The format is comma-delimited IP:PORT:INSTANCE where the :INSTANCE part is
# optional and refers to the "None" instance if omitted.
#
......
#!/bin/bash
# chkconfig: - 25 75
# description: carbon-aggregator
# processname: carbon-aggregator
export PYTHONPATH="$GRAPHITE_DIR/lib:$PYTHONPATH"
# Source function library.
if [ -e /etc/rc.d/init.d/functions ]; then
. /etc/rc.d/init.d/functions;
fi;
CARBON_DAEMON="aggregator"
GRAPHITE_DIR="/opt/graphite"
INSTANCES=`grep "^\[${CARBON_DAEMON}" ${GRAPHITE_DIR}/conf/carbon.conf | cut -d \[ -f 2 | cut -d \] -f 1 | cut -d : -f 2`
function die {
echo $1
exit 1
}
start(){
cd $GRAPHITE_DIR;
for INSTANCE in ${INSTANCES}; do
if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then
INSTANCE="a";
fi;
echo "Starting carbon-${CARBON_DAEMON}:${INSTANCE}..."
bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} start;
if [ $? -eq 0 ]; then
echo_success
else
echo_failure
fi;
echo ""
done;
}
stop(){
cd $GRAPHITE_DIR
for INSTANCE in ${INSTANCES}; do
if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then
INSTANCE="a";
fi;
echo "Stopping carbon-${CARBON_DAEMON}:${INSTANCE}..."
bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} stop
if [ `sleep 3; /usr/bin/pgrep -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}" | /usr/bin/wc -l` -gt 0 ]; then
echo "Carbon did not stop yet. Sleeping longer, then force killing it...";
sleep 20;
/usr/bin/pkill -9 -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}";
fi;
if [ $? -eq 0 ]; then
echo_success
else
echo_failure
fi;
echo ""
done;
}
status(){
cd $GRAPHITE_DIR;
for INSTANCE in ${INSTANCES}; do
if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then
INSTANCE="a";
fi;
bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} status;
if [ $? -eq 0 ]; then
echo_success
else
echo_failure
fi;
echo ""
done;
}
case "$1" in
start)
start
;;
stop)
stop
;;
status)
status
;;
restart|reload)
stop
start
;;
*)
echo $"Usage: $0 {start|stop|restart|status}"
exit 1
esac
#!/bin/bash
# chkconfig: - 25 75
# description: carbon-cache
# processname: carbon-cache
export PYTHONPATH="$GRAPHITE_DIR/lib:$PYTHONPATH"
# Source function library.
if [ -e /etc/rc.d/init.d/functions ]; then
. /etc/rc.d/init.d/functions;
fi;
CARBON_DAEMON="cache"
GRAPHITE_DIR="/opt/graphite"
INSTANCES=`grep "^\[${CARBON_DAEMON}" ${GRAPHITE_DIR}/conf/carbon.conf | cut -d \[ -f 2 | cut -d \] -f 1 | cut -d : -f 2`
function die {
echo $1
exit 1
}
start(){
cd $GRAPHITE_DIR;
for INSTANCE in ${INSTANCES}; do
if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then
INSTANCE="a";
fi;
echo "Starting carbon-${CARBON_DAEMON}:${INSTANCE}..."
bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} start;
if [ $? -eq 0 ]; then
echo_success
else
echo_failure
fi;
echo ""
done;
}
stop(){
cd $GRAPHITE_DIR
for INSTANCE in ${INSTANCES}; do
if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then
INSTANCE="a";
fi;
echo "Stopping carbon-${CARBON_DAEMON}:${INSTANCE}..."
bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} stop
if [ `sleep 3; /usr/bin/pgrep -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}" | /usr/bin/wc -l` -gt 0 ]; then
echo "Carbon did not stop yet. Sleeping longer, then force killing it...";
sleep 20;
/usr/bin/pkill -9 -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}";
fi;
if [ $? -eq 0 ]; then
echo_success
else
echo_failure
fi;
echo ""
done;
}
status(){
cd $GRAPHITE_DIR;
for INSTANCE in ${INSTANCES}; do
if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then
INSTANCE="a";
fi;
bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} status;
if [ $? -eq 0 ]; then
echo_success
else
echo_failure
fi;
echo ""
done;
}
case "$1" in
start)
start
;;
stop)
stop
;;
status)
status
;;
restart|reload)
stop
start
;;
*)
echo $"Usage: $0 {start|stop|restart|status}"
exit 1
esac
#!/bin/bash
# chkconfig: - 25 75
# description: carbon-relay
# processname: carbon-relay
export PYTHONPATH="$GRAPHITE_DIR/lib:$PYTHONPATH"
# Source function library.
if [ -e /etc/rc.d/init.d/functions ]; then
. /etc/rc.d/init.d/functions;
fi;
CARBON_DAEMON="relay"
GRAPHITE_DIR="/opt/graphite"
INSTANCES=`grep "^\[${CARBON_DAEMON}" ${GRAPHITE_DIR}/conf/carbon.conf | cut -d \[ -f 2 | cut -d \] -f 1 | cut -d : -f 2`
function die {
echo $1
exit 1
}
start(){
cd $GRAPHITE_DIR;
for INSTANCE in ${INSTANCES}; do
if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then
INSTANCE="a";
fi;
echo "Starting carbon-${CARBON_DAEMON}:${INSTANCE}..."
bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} start;
if [ $? -eq 0 ]; then
echo_success
else
echo_failure
fi;
echo ""
done;
}
stop(){
cd $GRAPHITE_DIR
for INSTANCE in ${INSTANCES}; do
if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then
INSTANCE="a";
fi;
echo "Stopping carbon-${CARBON_DAEMON}:${INSTANCE}..."
bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} stop
if [ `sleep 3; /usr/bin/pgrep -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}" | /usr/bin/wc -l` -gt 0 ]; then
echo "Carbon did not stop yet. Sleeping longer, then force killing it...";
sleep 20;
/usr/bin/pkill -9 -f "carbon-${CARBON_DAEMON}.py --instance=${INSTANCE}";
fi;
if [ $? -eq 0 ]; then
echo_success
else
echo_failure
fi;
echo ""
done;
}
status(){
cd $GRAPHITE_DIR;
for INSTANCE in ${INSTANCES}; do
if [ "${INSTANCE}" == "${CARBON_DAEMON}" ]; then
INSTANCE="a";
fi;
bin/carbon-${CARBON_DAEMON}.py --instance=${INSTANCE} status;
if [ $? -eq 0 ]; then
echo_success
else
echo_failure
fi;
echo ""
done;
}
case "$1" in
start)
start
;;
stop)
stop
;;
status)
status
;;
restart|reload)
stop
start
;;
*)
echo $"Usage: $0 {start|stop|restart|status}"
exit 1
esac
......@@ -29,7 +29,7 @@ class RuleManager:
# Only read if the rules file has been modified
try:
mtime = getmtime(self.rules_file)
except:
except OSError:
log.err("Failed to get mtime of %s" % self.rules_file)
return
if mtime <= self.rules_last_read:
......@@ -59,7 +59,7 @@ class RuleManager:
frequency = int( frequency.lstrip('(').rstrip(')') )
return AggregationRule(input_pattern, output_pattern, method, frequency)
except:
except ValueError:
log.err("Failed to parse line: %s" % line)
raise
......@@ -90,7 +90,7 @@ class AggregationRule:
extracted_fields = match.groupdict()
try:
result = self.output_template % extracted_fields
except:
except TypeError:
log.err("Failed to interpolate template %s with fields %s" % (self.output_template, extracted_fields))
self.cache[metric_path] = result
......
......@@ -45,7 +45,7 @@ import txamqp.spec
try:
import carbon
except:
except ImportError:
# this is being run directly, carbon is not installed
LIB_DIR = os.path.dirname(os.path.dirname(__file__))
sys.path.insert(0, LIB_DIR)
......
......@@ -116,7 +116,7 @@ def main():
else:
timestamp = time.time()
except:
except ValueError:
parser.print_usage()
raise SystemExit(1)
......
......@@ -17,12 +17,13 @@ from collections import deque
from carbon.conf import settings
try:
from collections import defaultdict
except:
except ImportError:
from util import defaultdict
class _MetricCache(defaultdict):
def __init__(self, defaultfactory=deque, method="sorted"):
self.size = 0
self.method = method
if self.method == "sorted":
self.queue = self.gen_queue()
......@@ -39,11 +40,8 @@ class _MetricCache(defaultdict):
while queue:
yield queue.pop()[0]
@property
def size(self):
return reduce(lambda x, y: x + len(y), self.values(), 0)
def store(self, metric, datapoint):
self.size += 1
self[metric].append(datapoint)
if self.isFull():
log.msg("MetricCache is full: self.size=%d" % self.size)
......@@ -59,11 +57,16 @@ class _MetricCache(defaultdict):
raise KeyError(metric)
elif not metric and self.method == "max":
metric = max(self.items(), key=lambda x: len(x[1]))[0]
datapoints = (metric, super(_MetricCache, self).pop(metric))
elif not metric and self.method == "naive":
return self.popitem()
datapoints = self.popitem()
elif not metric and self.method == "sorted":
metric = self.queue.next()
datapoints = (metric, super(_MetricCache, self).pop(metric))
# Save only last value for each timestamp
popped = super(_MetricCache, self).pop(metric)
ordered = sorted(dict(popped).items(), key=lambda x: x[0])
datapoints = (metric, deque(ordered))
self.size -= len(datapoints[1])
return datapoints
@property
......
......@@ -7,8 +7,13 @@ from carbon.conf import settings
from carbon.util import pickle
from carbon import log, state, instrumentation
try:
import signal
except ImportError:
log.debug("Couldn't import signal module")
SEND_QUEUE_LOW_WATERMARK = settings.MAX_QUEUE_SIZE * 0.8
SEND_QUEUE_LOW_WATERMARK = settings.MAX_QUEUE_SIZE * settings.QUEUE_LOW_WATERMARK_PCT
class CarbonClientProtocol(Int32StringReceiver):
......@@ -102,6 +107,7 @@ class CarbonClientFactory(ReconnectingClientFactory):
self.attemptedRelays = 'destinations.%s.attemptedRelays' % self.destinationName
self.fullQueueDrops = 'destinations.%s.fullQueueDrops' % self.destinationName
self.queuedUntilConnected = 'destinations.%s.queuedUntilConnected' % self.destinationName
self.relayMaxQueueLength = 'destinations.%s.relayMaxQueueLength' % self.destinationName
def queueFullCallback(self, result):
state.events.cacheFull()
......@@ -153,6 +159,7 @@ class CarbonClientFactory(ReconnectingClientFactory):
def sendDatapoint(self, metric, datapoint):
instrumentation.increment(self.attemptedRelays)
instrumentation.max(self.relayMaxQueueLength, self.queueSize)
queueSize = self.queueSize
if queueSize >= settings.MAX_QUEUE_SIZE:
if not self.queueFull.called:
......@@ -205,6 +212,9 @@ class CarbonClientManager(Service):
self.client_factories = {} # { destination : CarbonClientFactory() }
def startService(self):
if 'signal' in globals().keys():
log.debug("Installing SIG_IGN for SIGHUP")
signal.signal(signal.SIGHUP, signal.SIG_IGN)
Service.startService(self)
for factory in self.client_factories.values():
if not factory.started:
......
......@@ -53,6 +53,7 @@ defaults = dict(
MAX_AGGREGATION_INTERVALS=5,
FORWARD_ALL=False,
MAX_QUEUE_SIZE=1000,
QUEUE_LOW_WATERMARK_PCT = 0.8,
ENABLE_AMQP=False,
AMQP_VERBOSE=False,
BIND_PATTERNS=['#'],
......@@ -73,6 +74,9 @@ defaults = dict(
WRITE_BACK_FREQUENCY=None,
ENABLE_LOGROTATION=True,
LOG_LISTENER_CONNECTIONS=True,
AGGREGATION_RULES='aggregation-rules.conf',
REWRITE_RULES='rewrite-rules.conf',
RELAY_RULES='relay-rules.conf',
)
......@@ -149,10 +153,10 @@ class Settings(dict):
# Attempt to figure out numeric types automatically
try:
value = int(value)
except:
except ValueError:
try:
value = float(value)
except:
except ValueError:
pass
self[key] = value
......@@ -250,6 +254,11 @@ class CarbonCacheOptions(usage.Options):
logdir = settings.LOG_DIR
if not isdir(logdir):
os.makedirs(logdir)
if settings.USER:
# We have not yet switched to the specified user,
# but that user must be able to create files in this
# directory.
os.chown(logdir, self.parent["uid"], self.parent["gid"])
log.logToDir(logdir)
if self["whitelist"] is None:
......@@ -287,7 +296,7 @@ class CarbonCacheOptions(usage.Options):
try:
pid = int(pf.read().strip())
pf.close()
except:
except IOError:
print "Could not read pidfile %s" % pidfile
raise SystemExit(1)
print "Sending kill signal to pid %d" % pid
......@@ -309,7 +318,7 @@ class CarbonCacheOptions(usage.Options):
try:
pid = int(pf.read().strip())
pf.close()
except:
except IOError:
print "Failed to read pid from %s" % pidfile
raise SystemExit(1)
......@@ -327,7 +336,7 @@ class CarbonCacheOptions(usage.Options):
try:
pid = int(pf.read().strip())
pf.close()
except:
except IOError:
print "Could not read pidfile %s" % pidfile
raise SystemExit(1)
if _process_alive(pid):
......@@ -338,7 +347,7 @@ class CarbonCacheOptions(usage.Options):
print "Removing stale pidfile %s" % pidfile
try:
os.unlink(pidfile)
except:
except IOError:
print "Could not remove pidfile %s" % pidfile
print "Starting %s (instance %s)" % (program, instance)
......@@ -359,12 +368,12 @@ class CarbonAggregatorOptions(CarbonCacheOptions):
def postOptions(self):
CarbonCacheOptions.postOptions(self)
if self["rules"] is None:
self["rules"] = join(settings["CONF_DIR"], "aggregation-rules.conf")
self["rules"] = join(settings["CONF_DIR"], settings['AGGREGATION_RULES'])
settings["aggregation-rules"] = self["rules"]
if self["rewrite-rules"] is None:
self["rewrite-rules"] = join(settings["CONF_DIR"],
"rewrite-rules.conf")
settings['REWRITE_RULES'])
settings["rewrite-rules"] = self["rewrite-rules"]
......@@ -378,11 +387,11 @@ class CarbonRelayOptions(CarbonCacheOptions):
def postOptions(self):
CarbonCacheOptions.postOptions(self)
if self["rules"] is None:
self["rules"] = join(settings["CONF_DIR"], "relay-rules.conf")
self["rules"] = join(settings["CONF_DIR"], settings['RELAY_RULES'])
settings["relay-rules"] = self["rules"]
if self["aggregation-rules"] is None:
self["aggregation-rules"] = join(settings["CONF_DIR"], "aggregation-rules.conf")
self["aggregation-rules"] = join(settings["CONF_DIR"], settings['AGGREGATION_RULES'])
settings["aggregation-rules"] = self["aggregation-rules"]
if settings["RELAY_METHOD"] not in ("rules", "consistent-hashing", "aggregated-consistent-hashing"):
......@@ -398,6 +407,9 @@ def get_default_parser(usage="%prog [options] <start|stop|status>"):
parser.add_option(
"--debug", action="store_true",
help="Run in the foreground, log to stdout")
parser.add_option(
"--nodaemon", action="store_true",
help="Run in the foreground")
parser.add_option(
"--profile",
help="Record performance profile data to the given file")
......
......@@ -18,7 +18,7 @@ class Event:
for handler in self.handlers:
try:
handler(*args, **kwargs)
except:
except Exception:
log.err(None, "Exception in %s event handler: args=%s kwargs=%s" % (self.name, args, kwargs))
......
......@@ -29,6 +29,12 @@ def increment(stat, increase=1):
except KeyError:
stats[stat] = increase
def max(stat, newval):
try:
if stats[stat] < newval:
stats[stat] = newval
except KeyError:
stats[stat] = newval
def append(stat, value):
try:
......@@ -76,7 +82,9 @@ def recordMetrics():
creates = myStats.get('creates', 0)
errors = myStats.get('errors', 0)
cacheQueries = myStats.get('cacheQueries', 0)
cacheBulkQueries = myStats.get('cacheBulkQueries', 0)
cacheOverflow = myStats.get('cache.overflow', 0)
cacheBulkQuerySizes = myStats.get('cacheBulkQuerySize', [])
# Calculate cache-data-structure-derived metrics prior to storing anything
# in the cache itself -- which would otherwise affect said metrics.
......@@ -93,11 +101,16 @@ def recordMetrics():
pointsPerUpdate = float(committedPoints) / len(updateTimes)
record('pointsPerUpdate', pointsPerUpdate)
if cacheBulkQuerySizes:
avgBulkSize = sum(cacheBulkQuerySizes) / len(cacheBulkQuerySizes)
record('cache.bulk_queries_average_size', avgBulkSize)
record('updateOperations', len(updateTimes))
record('committedPoints', committedPoints)
record('creates', creates)
record('errors', errors)
record('cache.queries', cacheQueries)
record('cache.bulk_queries', cacheBulkQueries)
record('cache.overflow', cacheOverflow)
# aggregator metrics
......@@ -118,10 +131,12 @@ def recordMetrics():
# common metrics
record('metricsReceived', myStats.get('metricsReceived', 0))
record('blacklistMatches', myStats.get('blacklistMatches', 0))
record('whitelistRejects', myStats.get('whitelistRejects', 0))
record('cpuUsage', getCpuUsage())
try: # This only works on Linux
record('memUsage', getMemUsage())
except:
except Exception:
pass
......
......@@ -12,7 +12,7 @@ def getMetadata(metric, key):
try:
value = whisper.info(wsp_path)['aggregationMethod']
return dict(value=value)
except:
except Exception:
log.err()
return dict(error=traceback.format_exc())
......@@ -25,6 +25,6 @@ def setMetadata(metric, key, value):
try:
old_value = whisper.setAggregationMethod(wsp_path, value)
return dict(old_value=old_value, new_value=value)
except:
except Exception:
log.err()
return dict(error=traceback.format_exc())
......@@ -71,7 +71,7 @@ class MetricLineReceiver(MetricReceiver, LineOnlyReceiver):
try:
metric, value, timestamp = line.strip().split()
datapoint = (float(timestamp), float(value))
except:
except ValueError: