Skip to content
GitLab
Explore
Sign in
Register
Commits on Source (6)
check_raid/control: Fixing invalid control statement
· 0d80aa92
Jan Wagner
authored
Jan 29, 2019
0d80aa92
check_raid: Update to 4.0.9
· 1dfe25a1
Jan Wagner
authored
Jan 30, 2019
1dfe25a1
d/control: Auto update
· 23a69f57
Jan Wagner
authored
Jan 30, 2019
23a69f57
d/control.in: Using priority optional
· bc48c7bf
Jan Wagner
authored
Jan 30, 2019
bc48c7bf
d/control: Auto update
· 5bb19371
Jan Wagner
authored
Jan 30, 2019
5bb19371
check_mongo: Update to 46d27ab
· bc2cc30a
Jan Wagner
authored
Jan 31, 2019
bc2cc30a
Expand all
Show whitespace changes
Inline
Side-by-side
check_mongodb/check_mongodb.py
View file @
bc2cc30a
...
...
@@ -26,17 +26,19 @@
# See the README.md
#
from
__future__
import
print_function
from
__future__
import
division
import
sys
import
time
import
optparse
import
textwrap
import
re
import
os
import
numbers
try
:
import
pymongo
except
ImportError
,
e
:
print
e
except
ImportError
as
e
:
print
(
e
)
sys
.
exit
(
2
)
# As of pymongo v 1.9 the SON API is part of the BSON package, therefore attempt
...
...
@@ -80,37 +82,35 @@ def performance_data(perf_data, params):
def
numeric_type
(
param
):
if
((
type
(
param
)
==
float
or
type
(
param
)
==
int
or
type
(
param
)
==
long
or
param
==
None
)):
return
True
return
False
return
param
is
None
or
isinstance
(
param
,
numbers
.
Real
)
def
check_levels
(
param
,
warning
,
critical
,
message
,
ok
=
[]):
if
(
numeric_type
(
critical
)
and
numeric_type
(
warning
)):
if
param
>=
critical
:
print
"
CRITICAL -
"
+
message
print
(
"
CRITICAL -
"
+
message
)
sys
.
exit
(
2
)
elif
param
>=
warning
:
print
"
WARNING -
"
+
message
print
(
"
WARNING -
"
+
message
)
sys
.
exit
(
1
)
else
:
print
"
OK -
"
+
message
print
(
"
OK -
"
+
message
)
sys
.
exit
(
0
)
else
:
if
param
in
critical
:
print
"
CRITICAL -
"
+
message
print
(
"
CRITICAL -
"
+
message
)
sys
.
exit
(
2
)
if
param
in
warning
:
print
"
WARNING -
"
+
message
print
(
"
WARNING -
"
+
message
)
sys
.
exit
(
1
)
if
param
in
ok
:
print
"
OK -
"
+
message
print
(
"
OK -
"
+
message
)
sys
.
exit
(
0
)
# unexpected param value
print
"
CRITICAL - Unexpected value : %d
"
%
param
+
"
;
"
+
message
print
(
"
CRITICAL - Unexpected value : %d
"
%
param
+
"
;
"
+
message
)
return
2
...
...
@@ -206,7 +206,6 @@ def main(argv):
return
err
conn_time
=
time
.
time
()
-
start
conn_time
=
round
(
conn_time
,
0
)
if
action
==
"
connections
"
:
return
check_connections
(
con
,
warning
,
critical
,
perf_data
)
...
...
@@ -314,6 +313,10 @@ def mongo_connect(host=None, port=None, ssl=False, user=None, passwd=None, repli
else
:
con
=
pymongo
.
Connection
(
host
,
port
,
slave_okay
=
True
,
network_timeout
=
10
)
# we must authenticate the connection, otherwise we won't be able to perform certain operations
if
ssl_cert
and
ssl_ca_cert_file
and
user
:
con
.
the_database
.
authenticate
(
user
,
mechanism
=
'
MONGODB-X509
'
)
try
:
result
=
con
.
admin
.
command
(
"
ismaster
"
)
except
ConnectionFailure
:
...
...
@@ -321,7 +324,7 @@ def mongo_connect(host=None, port=None, ssl=False, user=None, passwd=None, repli
sys
.
exit
(
2
)
if
'
arbiterOnly
'
in
result
and
result
[
'
arbiterOnly
'
]
==
True
:
print
"
OK - State: 7 (Arbiter on port %s)
"
%
(
port
)
print
(
"
OK - State: 7 (Arbiter on port %s)
"
%
(
port
)
)
sys
.
exit
(
0
)
if
user
and
passwd
:
...
...
@@ -334,11 +337,11 @@ def mongo_connect(host=None, port=None, ssl=False, user=None, passwd=None, repli
# Ping to check that the server is responding.
con
.
admin
.
command
(
"
ping
"
)
except
Exception
,
e
:
except
Exception
as
e
:
if
isinstance
(
e
,
pymongo
.
errors
.
AutoReconnect
)
and
str
(
e
).
find
(
"
is an arbiter
"
)
!=
-
1
:
# We got a pymongo AutoReconnect exception that tells us we connected to an Arbiter Server
# This means: Arbiter is reachable and can answer requests/votes - this is all we need to know from an arbiter
print
"
OK - State: 7 (Arbiter)
"
print
(
"
OK - State: 7 (Arbiter)
"
)
sys
.
exit
(
0
)
return
exit_with_general_critical
(
e
),
None
return
0
,
con
...
...
@@ -348,7 +351,7 @@ def exit_with_general_warning(e):
if
isinstance
(
e
,
SystemExit
):
return
e
else
:
print
"
WARNING - General MongoDB warning:
"
,
e
print
(
"
WARNING - General MongoDB warning:
"
,
e
)
return
1
...
...
@@ -356,7 +359,7 @@ def exit_with_general_critical(e):
if
isinstance
(
e
,
SystemExit
):
return
e
else
:
print
"
CRITICAL - General MongoDB Error:
"
,
e
print
(
"
CRITICAL - General MongoDB Error:
"
,
e
)
return
2
...
...
@@ -369,14 +372,14 @@ def set_read_preference(db):
def
check_version
(
con
):
try
:
server_info
=
con
.
server_info
()
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
),
None
return
0
,
int
(
server_info
[
'
version
'
].
split
(
'
.
'
)[
0
].
strip
())
def
check_connect
(
host
,
port
,
warning
,
critical
,
perf_data
,
user
,
passwd
,
conn_time
):
warning
=
warning
or
3
critical
=
critical
or
6
message
=
"
Connection took %
i
seconds
"
%
conn_time
message
=
"
Connection took %
.3f
seconds
"
%
conn_time
message
+=
performance_data
(
perf_data
,
[(
conn_time
,
"
connection_time
"
,
warning
,
critical
)])
return
check_levels
(
conn_time
,
warning
,
critical
,
message
)
...
...
@@ -398,15 +401,15 @@ def check_connections(con, warning, critical, perf_data):
(
available
,
"
available_connections
"
)])
return
check_levels
(
used_percent
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
def
check_rep_lag
(
con
,
host
,
port
,
warning
,
critical
,
percent
,
perf_data
,
max_lag
,
user
,
passwd
,
ssl
=
None
,
insecure
=
None
,
ssl_ca_cert_file
=
None
,
cert_file
=
None
):
# Get mongo to tell us replica set member name when connecting locally
if
"
127.0.0.1
"
==
host
:
if
not
"
me
"
in
con
.
admin
.
command
(
"
ismaster
"
,
"
1
"
).
keys
():
print
"
UNKNOWN - This is not replicated MongoDB
"
if
not
"
me
"
in
list
(
con
.
admin
.
command
(
"
ismaster
"
,
"
1
"
).
keys
()
)
:
print
(
"
UNKNOWN - This is not replicated MongoDB
"
)
return
3
host
=
con
.
admin
.
command
(
"
ismaster
"
,
"
1
"
)[
"
me
"
].
split
(
'
:
'
)[
0
]
...
...
@@ -425,9 +428,9 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
# Get replica set status
try
:
rs_status
=
con
.
admin
.
command
(
"
replSetGetStatus
"
)
except
pymongo
.
errors
.
OperationFailure
,
e
:
except
pymongo
.
errors
.
OperationFailure
as
e
:
if
((
e
.
code
==
None
and
str
(
e
).
find
(
'
failed: not running with --replSet
"'
))
or
(
e
.
code
==
76
and
str
(
e
).
find
(
'
not running with --replSet
"'
))):
print
"
UNKNOWN - Not running with replSet
"
print
(
"
UNKNOWN - Not running with replSet
"
)
return
3
serverVersion
=
tuple
(
con
.
server_info
()[
'
version
'
].
split
(
'
.
'
))
...
...
@@ -454,19 +457,19 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
# Check if we're in the middle of an election and don't have a primary
if
primary_node
is
None
:
print
"
WARNING - No primary defined. In an election?
"
print
(
"
WARNING - No primary defined. In an election?
"
)
return
1
# Check if we failed to find the current host
# below should never happen
if
host_node
is
None
:
print
"
CRITICAL - Unable to find host
'"
+
host
+
"'
in replica set.
"
print
(
"
CRITICAL - Unable to find host
'"
+
host
+
"'
in replica set.
"
)
return
2
# Is the specified host the primary?
if
host_node
[
"
stateStr
"
]
==
"
PRIMARY
"
:
if
max_lag
==
False
:
print
"
OK - This is the primary.
"
print
(
"
OK - This is the primary.
"
)
return
0
else
:
#get the maximal replication lag
...
...
@@ -491,7 +494,7 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
message
+=
performance_data
(
perf_data
,
[(
maximal_lag
,
"
replication_lag
"
,
warning
,
critical
)])
return
check_levels
(
maximal_lag
,
warning
,
critical
,
message
)
elif
host_node
[
"
stateStr
"
]
==
"
ARBITER
"
:
print
"
UNKNOWN - This is an arbiter
"
print
(
"
UNKNOWN - This is an arbiter
"
)
return
3
# Find the difference in optime between current node and PRIMARY
...
...
@@ -543,12 +546,12 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
# Check if we're in the middle of an election and don't have a primary
if
primary_node
is
None
:
print
"
WARNING - No primary defined. In an election?
"
print
(
"
WARNING - No primary defined. In an election?
"
)
sys
.
exit
(
1
)
# Is the specified host the primary?
if
host_node
[
"
stateStr
"
]
==
"
PRIMARY
"
:
print
"
OK - This is the primary.
"
print
(
"
OK - This is the primary.
"
)
sys
.
exit
(
0
)
# Find the difference in optime between current node and PRIMARY
...
...
@@ -567,7 +570,7 @@ def check_rep_lag(con, host, port, warning, critical, percent, perf_data, max_la
message
+=
performance_data
(
perf_data
,
[(
lag
,
"
replication_lag
"
,
warning
,
critical
)])
return
check_levels
(
lag
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
#
...
...
@@ -602,7 +605,7 @@ def check_memory(con, warning, critical, perf_data, mapped_memory, host):
try
:
data
=
get_server_status
(
con
)
if
not
data
[
'
mem
'
][
'
supported
'
]
and
not
mapped_memory
:
print
"
OK - Platform not supported for memory info
"
print
(
"
OK - Platform not supported for memory info
"
)
return
0
#
# convert to gigs
...
...
@@ -639,7 +642,7 @@ def check_memory(con, warning, critical, perf_data, mapped_memory, host):
else
:
return
check_levels
(
mem_resident
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -652,7 +655,7 @@ def check_memory_mapped(con, warning, critical, perf_data):
try
:
data
=
get_server_status
(
con
)
if
not
data
[
'
mem
'
][
'
supported
'
]:
print
"
OK - Platform not supported for memory info
"
print
(
"
OK - Platform not supported for memory info
"
)
return
0
#
# convert to gigs
...
...
@@ -674,10 +677,10 @@ def check_memory_mapped(con, warning, critical, perf_data):
if
not
mem_mapped
==
-
1
:
return
check_levels
(
mem_mapped
,
warning
,
critical
,
message
)
else
:
print
"
OK - Server does not provide mem.mapped info
"
print
(
"
OK - Server does not provide mem.mapped info
"
)
return
0
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -702,11 +705,11 @@ def check_lock(con, warning, critical, perf_data, mongo_version):
message
=
"
Lock Percentage: %.2f%%
"
%
lock_percentage
message
+=
performance_data
(
perf_data
,
[(
"
%.2f
"
%
lock_percentage
,
"
lock_percentage
"
,
warning
,
critical
)])
return
check_levels
(
lock_percentage
,
warning
,
critical
,
message
)
except
Exception
,
e
:
print
"
Couldn
'
t get globalLock lockTime info from mongo, are you sure you
'
re not using version 3? See the -M option.
"
except
Exception
as
e
:
print
(
"
Couldn
'
t get globalLock lockTime info from mongo, are you sure you
'
re not using version 3? See the -M option.
"
)
return
exit_with_general_critical
(
e
)
else
:
print
"
OK - MongoDB version 3 doesn
'
t report on global locks
"
print
(
"
OK - MongoDB version 3 doesn
'
t report on global locks
"
)
return
0
...
...
@@ -733,10 +736,10 @@ def check_flushing(con, warning, critical, avg, perf_data):
return
check_levels
(
flush_time
,
warning
,
critical
,
message
)
except
Exception
:
print
"
OK - flushing stats not available for this storage engine
"
print
(
"
OK - flushing stats not available for this storage engine
"
)
return
0
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -757,14 +760,14 @@ def index_miss_ratio(con, warning, critical, perf_data):
not_supported_msg
=
"
not supported on this platform
"
try
:
data
[
'
indexCounters
'
]
if
data
[
'
indexCounters
'
]
.
has_key
(
'
note
'
)
:
print
"
OK - MongoDB says:
"
+
not_supported_msg
if
'
note
'
in
data
[
'
indexCounters
'
]:
print
(
"
OK - MongoDB says:
"
+
not_supported_msg
)
return
0
else
:
print
"
WARNING - Can
'
t get counter from MongoDB
"
print
(
"
WARNING - Can
'
t get counter from MongoDB
"
)
return
1
except
Exception
:
print
"
OK - MongoDB says:
"
+
not_supported_msg
print
(
"
OK - MongoDB says:
"
+
not_supported_msg
)
return
0
message
=
"
Miss Ratio: %.2f
"
%
miss_ratio
...
...
@@ -772,7 +775,7 @@ def index_miss_ratio(con, warning, critical, perf_data):
return
check_levels
(
miss_ratio
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
def
check_replset_quorum
(
con
,
perf_data
):
...
...
@@ -796,7 +799,7 @@ def check_replset_quorum(con, perf_data):
message
=
"
Cluster is not quorate and cannot operate
"
return
check_levels
(
state
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -811,7 +814,7 @@ def check_replset_state(con, perf_data, warning="", critical=""):
except
:
critical
=
[
8
,
4
,
-
1
]
ok
=
range
(
-
1
,
8
)
# should include the range of all posiible values
ok
=
list
(
range
(
-
1
,
8
)
)
# should include the range of all posiible values
try
:
worst_state
=
-
2
message
=
""
...
...
@@ -821,22 +824,22 @@ def check_replset_state(con, perf_data, warning="", critical=""):
data
=
con
.
admin
.
command
(
pymongo
.
son_manipulator
.
SON
([(
'
replSetGetStatus
'
,
1
)]))
except
:
data
=
con
.
admin
.
command
(
son
.
SON
([(
'
replSetGetStatus
'
,
1
)]))
members
=
data
[
'
members
'
]
;
members
=
data
[
'
members
'
]
my_state
=
int
(
data
[
'
myState
'
])
worst_state
=
my_state
for
member
in
members
:
their_state
=
int
(
member
[
'
state
'
])
message
+=
"
%s: %i (%s)
"
%
(
member
[
'
name
'
],
their_state
,
state_text
(
their_state
))
if
state_is_worse
(
their_state
,
worst_state
,
warning
,
critical
):
worst_state
=
their_state
;
worst_state
=
their_state
message
+=
performance_data
(
perf_data
,
[(
my_state
,
"
state
"
)])
except
pymongo
.
errors
.
OperationFailure
,
e
:
except
pymongo
.
errors
.
OperationFailure
as
e
:
if
((
e
.
code
==
None
and
str
(
e
).
find
(
'
failed: not running with --replSet
"'
))
or
(
e
.
code
==
76
and
str
(
e
).
find
(
'
not running with --replSet
"'
))):
worst_state
=
-
1
return
check_levels
(
worst_state
,
warning
,
critical
,
message
,
ok
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
def
state_is_worse
(
state
,
worst_state
,
warning
,
critical
):
...
...
@@ -881,7 +884,7 @@ def check_databases(con, warning, critical, perf_data=None):
message
=
"
Number of DBs: %.0f
"
%
count
message
+=
performance_data
(
perf_data
,
[(
count
,
"
databases
"
,
warning
,
critical
,
message
)])
return
check_levels
(
count
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -903,7 +906,7 @@ def check_collections(con, warning, critical, perf_data=None):
message
+=
performance_data
(
perf_data
,
[(
count
,
"
collections
"
,
warning
,
critical
,
message
)])
return
check_levels
(
count
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -940,21 +943,21 @@ def check_database_size(con, database, warning, critical, perf_data):
try
:
set_read_preference
(
con
.
admin
)
data
=
con
[
database
].
command
(
'
dbstats
'
)
storage_size
=
data
[
'
storageSize
'
]
/
1024
/
1024
storage_size
=
data
[
'
storageSize
'
]
/
/
1024
/
/
1024
if
perf_data
:
perfdata
+=
"
| database_size=%i;%i;%i
"
%
(
storage_size
,
warning
,
critical
)
#perfdata += " database=%s" %(database)
if
storage_size
>=
critical
:
print
"
CRITICAL - Database size: %.0f MB, Database: %s%s
"
%
(
storage_size
,
database
,
perfdata
)
print
(
"
CRITICAL - Database size: %.0f MB, Database: %s%s
"
%
(
storage_size
,
database
,
perfdata
)
)
return
2
elif
storage_size
>=
warning
:
print
"
WARNING - Database size: %.0f MB, Database: %s%s
"
%
(
storage_size
,
database
,
perfdata
)
print
(
"
WARNING - Database size: %.0f MB, Database: %s%s
"
%
(
storage_size
,
database
,
perfdata
)
)
return
1
else
:
print
"
OK - Database size: %.0f MB, Database: %s%s
"
%
(
storage_size
,
database
,
perfdata
)
print
(
"
OK - Database size: %.0f MB, Database: %s%s
"
%
(
storage_size
,
database
,
perfdata
)
)
return
0
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -968,20 +971,20 @@ def check_database_indexes(con, database, warning, critical, perf_data):
try
:
set_read_preference
(
con
.
admin
)
data
=
con
[
database
].
command
(
'
dbstats
'
)
index_size
=
data
[
'
indexSize
'
]
/
1024
/
1024
index_size
=
data
[
'
indexSize
'
]
/
1024
/
/
1024
if
perf_data
:
perfdata
+=
"
| database_indexes=%i;%i;%i
"
%
(
index_size
,
warning
,
critical
)
if
index_size
>=
critical
:
print
"
CRITICAL - %s indexSize: %.0f MB %s
"
%
(
database
,
index_size
,
perfdata
)
print
(
"
CRITICAL - %s indexSize: %.0f MB %s
"
%
(
database
,
index_size
,
perfdata
)
)
return
2
elif
index_size
>=
warning
:
print
"
WARNING - %s indexSize: %.0f MB %s
"
%
(
database
,
index_size
,
perfdata
)
print
(
"
WARNING - %s indexSize: %.0f MB %s
"
%
(
database
,
index_size
,
perfdata
)
)
return
1
else
:
print
"
OK - %s indexSize: %.0f MB %s
"
%
(
database
,
index_size
,
perfdata
)
print
(
"
OK - %s indexSize: %.0f MB %s
"
%
(
database
,
index_size
,
perfdata
)
)
return
0
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -995,15 +998,15 @@ def check_collection_documents(con, database, collection, warning, critical, per
perfdata
+=
"
| collection_documents=%i;%i;%i
"
%
(
documents
,
warning
,
critical
)
if
documents
>=
critical
:
print
"
CRITICAL - %s.%s documents: %s %s
"
%
(
database
,
collection
,
documents
,
perfdata
)
print
(
"
CRITICAL - %s.%s documents: %s %s
"
%
(
database
,
collection
,
documents
,
perfdata
)
)
return
2
elif
documents
>=
warning
:
print
"
WARNING - %s.%s documents: %s %s
"
%
(
database
,
collection
,
documents
,
perfdata
)
print
(
"
WARNING - %s.%s documents: %s %s
"
%
(
database
,
collection
,
documents
,
perfdata
)
)
return
1
else
:
print
"
OK - %s.%s documents: %s %s
"
%
(
database
,
collection
,
documents
,
perfdata
)
print
(
"
OK - %s.%s documents: %s %s
"
%
(
database
,
collection
,
documents
,
perfdata
)
)
return
0
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -1022,15 +1025,15 @@ def check_collection_indexes(con, database, collection, warning, critical, perf_
perfdata
+=
"
| collection_indexes=%i;%i;%i
"
%
(
total_index_size
,
warning
,
critical
)
if
total_index_size
>=
critical
:
print
"
CRITICAL - %s.%s totalIndexSize: %.0f MB %s
"
%
(
database
,
collection
,
total_index_size
,
perfdata
)
print
(
"
CRITICAL - %s.%s totalIndexSize: %.0f MB %s
"
%
(
database
,
collection
,
total_index_size
,
perfdata
)
)
return
2
elif
total_index_size
>=
warning
:
print
"
WARNING - %s.%s totalIndexSize: %.0f MB %s
"
%
(
database
,
collection
,
total_index_size
,
perfdata
)
print
(
"
WARNING - %s.%s totalIndexSize: %.0f MB %s
"
%
(
database
,
collection
,
total_index_size
,
perfdata
)
)
return
1
else
:
print
"
OK - %s.%s totalIndexSize: %.0f MB %s
"
%
(
database
,
collection
,
total_index_size
,
perfdata
)
print
(
"
OK - %s.%s totalIndexSize: %.0f MB %s
"
%
(
database
,
collection
,
total_index_size
,
perfdata
)
)
return
0
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -1047,7 +1050,7 @@ def check_queues(con, warning, critical, perf_data):
message
+=
performance_data
(
perf_data
,
[(
total_queues
,
"
total_queues
"
,
warning
,
critical
),
(
readers_queues
,
"
readers_queues
"
),
(
writers_queues
,
"
writers_queues
"
)])
return
check_levels
(
total_queues
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
def
check_collection_size
(
con
,
database
,
collection
,
warning
,
critical
,
perf_data
):
...
...
@@ -1062,15 +1065,15 @@ def check_collection_size(con, database, collection, warning, critical, perf_dat
perfdata
+=
"
| collection_size=%i;%i;%i
"
%
(
size
,
warning
,
critical
)
if
size
>=
critical
:
print
"
CRITICAL - %s.%s size: %.0f MB %s
"
%
(
database
,
collection
,
size
,
perfdata
)
print
(
"
CRITICAL - %s.%s size: %.0f MB %s
"
%
(
database
,
collection
,
size
,
perfdata
)
)
return
2
elif
size
>=
warning
:
print
"
WARNING - %s.%s size: %.0f MB %s
"
%
(
database
,
collection
,
size
,
perfdata
)
print
(
"
WARNING - %s.%s size: %.0f MB %s
"
%
(
database
,
collection
,
size
,
perfdata
)
)
return
1
else
:
print
"
OK - %s.%s size: %.0f MB %s
"
%
(
database
,
collection
,
size
,
perfdata
)
print
(
"
OK - %s.%s size: %.0f MB %s
"
%
(
database
,
collection
,
size
,
perfdata
)
)
return
0
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -1086,15 +1089,15 @@ def check_collection_storageSize(con, database, collection, warning, critical, p
perfdata
+=
"
| collection_storageSize=%i;%i;%i
"
%
(
storageSize
,
warning
,
critical
)
if
storageSize
>=
critical
:
print
"
CRITICAL - %s.%s storageSize: %.0f MB %s
"
%
(
database
,
collection
,
storageSize
,
perfdata
)
print
(
"
CRITICAL - %s.%s storageSize: %.0f MB %s
"
%
(
database
,
collection
,
storageSize
,
perfdata
)
)
return
2
elif
storageSize
>=
warning
:
print
"
WARNING - %s.%s storageSize: %.0f MB %s
"
%
(
database
,
collection
,
storageSize
,
perfdata
)
print
(
"
WARNING - %s.%s storageSize: %.0f MB %s
"
%
(
database
,
collection
,
storageSize
,
perfdata
)
)
return
1
else
:
print
"
OK - %s.%s storageSize: %.0f MB %s
"
%
(
database
,
collection
,
storageSize
,
perfdata
)
print
(
"
OK - %s.%s storageSize: %.0f MB %s
"
%
(
database
,
collection
,
storageSize
,
perfdata
)
)
return
0
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -1155,7 +1158,7 @@ def check_queries_per_second(con, query_type, warning, critical, perf_data, mong
return
check_levels
(
query_per_sec
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -1202,7 +1205,7 @@ def check_oplog(con, warning, critical, perf_data):
message
+=
performance_data
(
perf_data
,
[(
"
%.2f
"
%
hours_in_oplog
,
'
oplog_time
'
,
warning
,
critical
),
(
"
%.2f
"
%
approx_level
,
'
oplog_time_100_percent_used
'
)])
return
check_levels
(
-
approx_level
,
-
warning
,
-
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -1220,7 +1223,7 @@ Under very high write situations it is normal for this value to be nonzero. """
message
+=
performance_data
(
perf_data
,
[(
j_commits_in_wl
,
"
j_commits_in_wl
"
,
warning
,
critical
)])
return
check_levels
(
j_commits_in_wl
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -1236,7 +1239,7 @@ def check_journaled(con, warning, critical, perf_data):
message
+=
performance_data
(
perf_data
,
[(
"
%.2f
"
%
journaled
,
"
journaled
"
,
warning
,
critical
)])
return
check_levels
(
journaled
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -1253,7 +1256,7 @@ than the amount physically written to disk."""
message
+=
performance_data
(
perf_data
,
[(
"
%.2f
"
%
writes
,
"
write_to_data_files
"
,
warning
,
critical
)])
return
check_levels
(
writes
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -1265,7 +1268,7 @@ def get_opcounters(data, opcounters_name, host, port):
delete
=
data
[
opcounters_name
][
'
delete
'
]
getmore
=
data
[
opcounters_name
][
'
getmore
'
]
command
=
data
[
opcounters_name
][
'
command
'
]
except
KeyError
,
e
:
except
KeyError
as
e
:
return
0
,
[
0
]
*
100
total_commands
=
insert
+
query
+
update
+
delete
+
getmore
+
command
new_vals
=
[
total_commands
,
insert
,
query
,
update
,
delete
,
getmore
,
command
]
...
...
@@ -1432,9 +1435,9 @@ def check_page_faults(con, sample_time, warning, critical, perf_data):
try
:
#on linux servers only
page_faults
=
(
int
(
data2
[
'
extra_info
'
][
'
page_faults
'
])
-
int
(
data1
[
'
extra_info
'
][
'
page_faults
'
]))
/
sample_time
page_faults
=
(
int
(
data2
[
'
extra_info
'
][
'
page_faults
'
])
-
int
(
data1
[
'
extra_info
'
][
'
page_faults
'
]))
/
/
sample_time
except
KeyError
:
print
"
WARNING - Can
'
t get extra_info.page_faults counter from MongoDB
"
print
(
"
WARNING - Can
'
t get extra_info.page_faults counter from MongoDB
"
)
sys
.
exit
(
1
)
message
=
"
Page Faults: %i
"
%
(
page_faults
)
...
...
@@ -1442,7 +1445,7 @@ def check_page_faults(con, sample_time, warning, critical, perf_data):
message
+=
performance_data
(
perf_data
,
[(
page_faults
,
"
page_faults
"
,
warning
,
critical
)])
check_levels
(
page_faults
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
exit_with_general_critical
(
e
)
...
...
@@ -1458,35 +1461,35 @@ def chunks_balance(con, database, collection, warning, critical):
shards
=
col
.
distinct
(
"
shard
"
)
except
:
print
"
WARNING - Can
'
t get chunks infos from MongoDB
"
print
(
"
WARNING - Can
'
t get chunks infos from MongoDB
"
)
sys
.
exit
(
1
)
if
nscount
==
0
:
print
"
WARNING - Namespace %s is not sharded
"
%
(
nsfilter
)
print
(
"
WARNING - Namespace %s is not sharded
"
%
(
nsfilter
)
)
sys
.
exit
(
1
)
avgchunksnb
=
nscount
/
len
(
shards
)
warningnb
=
avgchunksnb
*
warning
/
100
criticalnb
=
avgchunksnb
*
critical
/
100
avgchunksnb
=
nscount
/
/
len
(
shards
)
warningnb
=
avgchunksnb
*
warning
/
/
100
criticalnb
=
avgchunksnb
*
critical
/
/
100
for
shard
in
shards
:
delta
=
abs
(
avgchunksnb
-
col
.
find
({
"
ns
"
:
nsfilter
,
"
shard
"
:
shard
}).
count
())
message
=
"
Namespace: %s, Shard name: %s, Chunk delta: %i
"
%
(
nsfilter
,
shard
,
delta
)
if
delta
>=
criticalnb
and
delta
>
0
:
print
"
CRITICAL - Chunks not well balanced
"
+
message
print
(
"
CRITICAL - Chunks not well balanced
"
+
message
)
sys
.
exit
(
2
)
elif
delta
>=
warningnb
and
delta
>
0
:
print
"
WARNING - Chunks not well balanced
"
+
message
print
(
"
WARNING - Chunks not well balanced
"
+
message
)
sys
.
exit
(
1
)
print
"
OK - Chunks well balanced across shards
"
print
(
"
OK - Chunks well balanced across shards
"
)
sys
.
exit
(
0
)
except
Exception
,
e
:
except
Exception
as
e
:
exit_with_general_critical
(
e
)
print
"
OK - Chunks well balanced across shards
"
print
(
"
OK - Chunks well balanced across shards
"
)
sys
.
exit
(
0
)
...
...
@@ -1502,7 +1505,7 @@ def check_connect_primary(con, warning, critical, perf_data):
data
=
con
.
admin
.
command
(
son
.
SON
([(
'
isMaster
'
,
1
)]))
if
data
[
'
ismaster
'
]
==
True
:
print
"
OK - This server is primary
"
print
(
"
OK - This server is primary
"
)
return
0
phost
=
data
[
'
primary
'
].
split
(
'
:
'
)[
0
]
...
...
@@ -1520,17 +1523,17 @@ def check_connect_primary(con, warning, critical, perf_data):
return
check_levels
(
pconn_time
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
def
check_collection_state
(
con
,
database
,
collection
):
try
:
con
[
database
][
collection
].
find_one
()
print
"
OK - Collection %s.%s is reachable
"
%
(
database
,
collection
)
print
(
"
OK - Collection %s.%s is reachable
"
%
(
database
,
collection
)
)
return
0
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -1542,7 +1545,7 @@ def check_row_count(con, database, collection, warning, critical, perf_data):
return
check_levels
(
count
,
warning
,
critical
,
message
)
except
Exception
,
e
:
except
Exception
as
e
:
return
exit_with_general_critical
(
e
)
...
...
@@ -1566,7 +1569,7 @@ def write_values(file_name, string):
f
=
None
try
:
f
=
open
(
file_name
,
'
w
'
)
except
IOError
,
e
:
except
IOError
as
e
:
#try creating
if
(
e
.
errno
==
2
):
ensure_dir
(
file_name
)
...
...
@@ -1585,11 +1588,11 @@ def read_values(file_name):
data
=
f
.
read
()
f
.
close
()
return
0
,
data
except
IOError
,
e
:
except
IOError
as
e
:
if
(
e
.
errno
==
2
):
#no previous data
return
1
,
''
except
Exception
,
e
:
except
Exception
as
e
:
return
2
,
None
...
...
@@ -1627,8 +1630,8 @@ def replication_get_time_diff(con):
col
=
'
oplog.$main
'
firstc
=
local
[
col
].
find
().
sort
(
"
$natural
"
,
1
).
limit
(
1
)
lastc
=
local
[
col
].
find
().
sort
(
"
$natural
"
,
-
1
).
limit
(
1
)
first
=
firstc
.
next
(
)
last
=
lastc
.
next
()
first
=
next
(
firstc
)
last
=
next
(
lastc
)
tfirst
=
first
[
"
ts
"
]
tlast
=
last
[
"
ts
"
]
delta
=
tlast
.
time
-
tfirst
.
time
...
...
check_mongodb/control
View file @
bc2cc30a
Uploaders: Jan Wagner <waja@cyconet.org>
Recommends: python-pymongo
Version:
3805751
Version:
46d27ab
Homepage: https://github.com/mzupan/nagios-plugin-mongodb
Watch: https://github.com/mzupan/nagios-plugin-mongodb <a class="commit-tease-sha"[^>]*>\s+([0-9a-f]+)\s+</a>
Description: Plugin script to monitor your MongoDB server(s)
check_raid/check_raid
View file @
bc2cc30a
This diff is collapsed.
Click to expand it.
check_raid/control
View file @
bc2cc30a
Homepage: https://github.com/glensc/nagios-plugin-check_raid
Watch: https://github.com/glensc/nagios-plugin-check_raid "/glensc/nagios-plugin-check_raid/tree/([0-9.]+)"
Suggests: cciss-vol-status (>= 1.10), mpt-status
Version: 4.0.
8
Version: 4.0.
9
Uploaders: Bernd Zeimetz <bzed@debian.org>
Description: plugin to check sw/hw RAID status
The plugin looks for any known types of RAID configurations,
and checks them all.
.
Supports:
- Adaptec AAC RAID via aaccli or afacli or arcconf
- AIX software RAID via lsvg
...
...
debian/control
View file @
bc2cc30a
Source: nagios-plugins-contrib
Section: net
Priority:
extra
Priority:
optional
Maintainer: Debian Nagios Maintainer Group <pkg-nagios-devel@lists.alioth.debian.org>
Uploaders: Bernd Zeimetz <bzed@debian.org>, Jan Wagner <waja@cyconet.org>, Stefan Schoerghofer <amd1212@4md.gr>, Petter Reinholdtsen <pere@hungry.com>, Leo Antunes <leo@costela.net>
Build-Depends: debhelper (>= 8.0.0),
...
...
@@ -88,7 +88,7 @@ Description: Plugins for nagios compatible monitoring systems
outside its normal parameters.
* check_httpd_status (rev204): plugin checking Apache or Lighthttpd
server-status page (using mod_status)
* check_ipmi_sensor (3.1
2
): IPMI Sensor Monitoring Plugin
* check_ipmi_sensor (3.1
3
): IPMI Sensor Monitoring Plugin
Plugin to monitor the hardware status (fan speed, temperaturs,
voltages, power usage, ...) of a server using IPMI.
* check_libs (0.2015012901 ): plugin to report the usage of no longer existing
...
...
@@ -118,10 +118,9 @@ Description: Plugins for nagios compatible monitoring systems
* check_printer: plugin to check printer supply levels using SNMP
It outputs performance data for all supplies
found, for example toner and drum.
* check_raid (4.0.
8
): plugin to check sw/hw RAID status
* check_raid (4.0.
9
): plugin to check sw/hw RAID status
The plugin looks for any known types of RAID configurations,
and checks them all.
.
Supports:
- Adaptec AAC RAID via aaccli or afacli or arcconf
- AIX software RAID via lsvg
...
...
debian/control.in
View file @
bc2cc30a
Source: nagios-plugins-contrib
Section: net
Priority:
extra
Priority:
optional
Maintainer: Debian Nagios Maintainer Group <pkg-nagios-devel@lists.alioth.debian.org>
Uploaders: #AUTO_UPDATE_Uploaders#
Build-Depends: debhelper (>= 8.0.0),
...
...