Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 34 additions & 12 deletions checks.d/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class MongoDb(AgentCheck):

# Service check
SERVICE_CHECK_NAME = 'mongodb.can_connect'
SERVICE_RS_STATUS_CHECK_NAME = 'mongodb.rs_status'

# Metrics
"""
Expand Down Expand Up @@ -388,18 +389,19 @@ class MongoDb(AgentCheck):
MongoDB replica set states, as documented at
https://docs.mongodb.org/manual/reference/replica-states/
"""
# the third value is weight of emergency when rs has node with this status
REPLSET_MEMBER_STATES = {
0: ('STARTUP', 'Starting Up'),
1: ('PRIMARY', 'Primary'),
2: ('SECONDARY', 'Secondary'),
3: ('RECOVERING', 'Recovering'),
4: ('Fatal', 'Fatal'), # MongoDB docs don't list this state
5: ('STARTUP2', 'Starting up (forking threads)'),
6: ('UNKNOWN', 'Unknown to this replset member'),
7: ('ARBITER', 'Arbiter'),
8: ('DOWN', 'Down'),
9: ('ROLLBACK', 'Rollback'),
10: ('REMOVED', 'Removed'),
0: ('STARTUP', 'Starting Up', AgentCheck.WARNING),
1: ('PRIMARY', 'Primary', AgentCheck.OK),
2: ('SECONDARY', 'Secondary', AgentCheck.OK),
3: ('RECOVERING', 'Recovering', AgentCheck.WARNING),
4: ('Fatal', 'Fatal', AgentCheck.CRITICAL), # MongoDB docs don't list this state
5: ('STARTUP2', 'Starting up (forking threads)', AgentCheck.WARNING),
6: ('UNKNOWN', 'Unknown to this replset member', AgentCheck.CRITICAL),
7: ('ARBITER', 'Arbiter', AgentCheck.OK),
8: ('DOWN', 'Down', AgentCheck.CRITICAL),
9: ('ROLLBACK', 'Rollback', AgentCheck.CRITICAL),
10: ('REMOVED', 'Removed', AgentCheck.CRITICAL),
}

def __init__(self, name, init_config, agentConfig, instances=None):
Expand Down Expand Up @@ -778,13 +780,33 @@ def total_seconds(td):
u"replset_state:{0}".format(replset_state),
])

rs_status = AgentCheck.OK
rs_status_details = []

# Find nodes: master and current node (ourself)
for member in replSet.get('members'):

member_state = int(member.get('state'))
member_state_info = self.REPLSET_MEMBER_STATES[member_state]

# checking all replset members
if member_state_info[2] > AgentCheck.OK:
rs_status_details.append("%s has state \"%s\"" % (member.get('name'),
member_state_info[1]))
if rs_status < member_state_info[2]:
rs_status = member_state_info[2]

if member.get('self'):
current = member
if int(member.get('state')) == 1:
if member_state == 1:
primary = member

self.service_check(
self.SERVICE_RS_STATUS_CHECK_NAME,
rs_status,
tags=service_check_tags,
message='\n'.join(rs_status_details))

# Compute a lag time
if current is not None and primary is not None:
if 'optimeDate' in primary and 'optimeDate' in current:
Expand Down