#!/bin/sh

# Send passive alerts to a Nagios server when the Puppet live directories
# get too far behind.

# Step 1. Load the Nagios passive send bash script library
. /usr/lib/nagios-passive.sh

# Step 2. Calculate the nuber of seconds the live directories are behind.

# The number of seconds that the Puppetservice "live" directories are
# behind the pushed Git repositories is important as the more seconds
# behind the longer a Puppet developer has to wait for their changes to
# take effect.  When the wait time gets to three minutes this is a
# problem. More than five minutes is critical.

WARN_SECS=$( expr 2 \* 60 )
CRIT_SECS=$( expr 4 \* 60 )

progress "WARN_SECS is $WARN_SECS seconds"
progress "CRIT_SECS is $CRIT_SECS seconds"

# Get the max sync seconds behind.
SECONDS_BEHIND=$(/usr/sbin/git-info --max-seconds-behind)
LAST_ERROR=$?

progress "LAST_ERROR is $LAST_ERROR"
progress "SECONDS_BEHIND is '$SECONDS_BEHIND'"
progress "STDERR is '$STDERR'"

if [[ ! -z "$STDERR" ]]; then
    ALERT_LEVEL="UNKNOWN"
    ALERT_MSG="monitor failed: $STDERR" ;
elif [[ "$SECONDS_BEHIND" == "" ]]; then
    ALERT_LEVEL="UNKNOWN"
    ALERT_MSG="git-info --sync-seconds returned an empty string"
elif [[ "$SECONDS_BEHIND" -gt "$CRIT_SECS" ]]; then
    ALERT_LEVEL="CRITICAL"
    ALERT_MSG="live directory is behind $SECONDS_BEHIND seconds"
elif [[ "$SECONDS_BEHIND" -gt "$WARN_SECS" ]]; then
    ALERT_LEVEL="WARNING"
    ALERT_MSG="live directory is behind $SECONDS_BEHIND seconds"
else
    ALERT_LEVEL="OK"
    ALERT_MSG="live directory is synced within acceptable limits ($SECONDS_BEHIND seconds behind)"
fi

# Step 3. Send the passive alert to the Nagios servers.
send_passive_check "git-info-sync" "$ALERT_LEVEL" "$ALERT_MSG"

exit 0

# Documentation.  Use a hack to hide this from the shell.  Because of the
# above exit line, this should never be executed.
DOCS=<<__END_OF_DOCS__

=head1 NAME

git-info-nagios - Passive monitor for Puppet live directory sync status

=head1 SYNOPSIS

B<git-info-nagios> [options]

=head1 DESCRIPTION

This command finds how far behind the Puppet "live" directory is behind
the Git repositories. If the time is more than 5 minutes in the past, a
CRITICAL passive Nagios alert is sent to the Nagios servers. If the time
is less than 5 minutes but more than 3 minutes a WARNING passive Nagios
alert is sent.

If a C<hostname> argument is supplied that value is used as the hostname
for the passvive alert. If no C<hostname> argument is provider, then the
C<hostname> command is run and that value is used.

=head1 AUTHOR

Adam Lewenberg <adamhl@stanford.edu>

=cut
