#!/usr/bin/perl

## no critic (CodeLayout::ProhibitParensWithBuiltins);
## no critic (InputOutput::RequireCheckedSyscalls);

use strict;
use warnings;
use autodie;

use Carp;
use Getopt::Long::Descriptive;
use IPC::Run;
use Readonly;
use Sys::Hostname;
use Time::Seconds;

my @NAGIOS_SERVER_IPS = ('171.67.217.114', '171.67.217.115',);

my $DEBUG = 0;

Readonly my $DEFAULT_TIMEOUT_SECS => 10;

# For more information on these timeout exit codes see timeout man page at
# https://www.gnu.org/software/coreutils/manual/html_node/timeout-invocation.html#timeout-invocation
Readonly my $TIMEOUT_RC_TIMESOUT        => 124;
Readonly my $TIMEOUT_RC_FAILS           => 125;
Readonly my $TIMEOUT_RC_CMD_NOT_INVOKED => 126;
Readonly my $TIMEOUT_RC_CMD_NOT_FOUND   => 127;
Readonly my $TIMEOUT_RC_KILLED          => 137;

my $CHECK_SCRIPT = '/usr/sbin/check-repo-inrelease-valid';

# Use: pass in an array, returns ($stdout, $stderr, $exit_value)
sub run_command {
    my (@command) = @_;

    # (From Russ)
    my ($out, $err);

    my $msg = q{about to run command '} . join(q{ }, @command) . q{'};
    progress($msg);
    IPC::Run::run(\@command, q{>}, \$out, q{2>}, \$err);
    ## no critic (Variables::ProhibitPunctuationVars);
    ## no critic (ValuesAndExpressions::ProhibitMagicNumbers)
    return ($out, $err, $? >> 8);
}

sub progress {
    my ($msg) = @_;
    if ($DEBUG) {
        print $msg . "\n";
    }
    return;
}

# Process any options:

Readonly my $DEFAULT_WARN_DAYS => 4;
Readonly my $DEFAULT_CRIT_DAYS => 2;

#<<< Hey perltidy: I like it this way.
my ($opt, $usage) = describe_options(
    'monitor-distro-inrelease-valid-nagios %o',
    ['repository|r=s', 'query this repository',                       ],
    ['security|s',     'this is a security archive', { default  => 0 }],
    ['warning|w=i',    'number of seconds behind to trigger WARN',
                         { default  => 60 * 60 * 24 * $DEFAULT_WARN_DAYS}],
    ['critical|c=i',   'number of seconds behind to trigger CRIT',
                         { default  => 60 * 60 * 24 * $DEFAULT_CRIT_DAYS}],
    ['timeout|t=i',    'number of seconds to wait for check script to return',
                         { default  => $DEFAULT_TIMEOUT_SECS}],
    ['verbose|v',      'print extra stuff'],
    ['help',           'print usage message and exit'],
);
#>>>

if ($opt->help()) {
    print($usage->text);
    exit;
}

if ($opt->verbose()) {
    $DEBUG = 1;
}

my $repository;
if ($opt->repository()) {
    $repository = $opt->repository();
} else {
    my $hostname = hostname();
    $repository = "http://${hostname}.stanford.edu/debian";
}

my $security;
if ($opt->security()) {
    $security = 1;
} else {
    $security = 0;
}

my $TIMEOUT_SECS = $opt->timeout() + 0;
progress("timeout is $TIMEOUT_SECS seconds");

my $HOSTNAME;
if ($repository =~ m{\/\/([^.]+[.][\w.]*)}xsm) {
    $HOSTNAME = $1;
} else {
    croak("could not find hostname in '$repository'");
}

# Remove any trailing slash.
$repository =~ s{\/$}{}xsm;

progress("using repository '$repository'");

# Mirroring is considered critically behind if the InRelease file's
# expiration is less than $CRIT_SECS seconds from now, and is considered
# worringly behind if its expiration is less than $WARN_SECS seconds from
# now.
my $WARN_SECS = $opt->warning();
my $CRIT_SECS = $opt->critical();
progress("WARN     seconds is $WARN_SECS");
progress("CRITICAL seconds is $CRIT_SECS");

if ($CRIT_SECS > $WARN_SECS) {
    croak "error: critical seconds ($CRIT_SECS) "
      . "is more than warn seconds ($WARN_SECS)";
}

sub process_distro {
    my ($distro) = @_;

    # Get the number of seconds since last update.
    my @check_script_cmd = ($CHECK_SCRIPT, $repository, $distro,);

    if ($security) {
        push(@check_script_cmd, 'security');
    }

    my $check_script_fmted = join(q{ }, @check_script_cmd);
    progress("check_script is '$check_script_fmted'");

    # We want to time-out the check script; otherwise we run the risk
    # of multiple calls to the check script piling up on each other.
    my @cmd = ('timeout', $TIMEOUT_SECS, @check_script_cmd);

    my ($stdout, $stderr, $rc) = run_command(@cmd);
    progress("stdout:    $stdout");
    progress("stderr:    $stderr");
    progress("exit code: $rc");
    my $seconds_behind = $stdout;

    my $timeout_msg;
    my $seconds_to_days;
    my $days_behind;
    $rc = $rc + 0;
    ## no critic (ProhibitCascadingIfElse);
    if ($rc == $TIMEOUT_RC_TIMESOUT) {
        $timeout_msg = "check script timed-out after $TIMEOUT_SECS seconds";
    } elsif ($rc == $TIMEOUT_RC_FAILS) {
        $timeout_msg = 'timeout command failed';
    } elsif ($rc == $TIMEOUT_RC_CMD_NOT_INVOKED) {
        $timeout_msg = "'$check_script_fmted' found but cannot be invoked";
    } elsif ($rc == $TIMEOUT_RC_CMD_NOT_FOUND) {
        $timeout_msg = "'$check_script_fmted' not found";
    } elsif ($rc == $TIMEOUT_RC_KILLED) {
        $timeout_msg = "'$check_script_fmted' or timeout sent a KILL signal";
    } else {
        progress('check script did not time-out');
        progress("seconds_behind is $seconds_behind");

        $seconds_to_days = Time::Seconds->new($seconds_behind);
        $days_behind     = $seconds_to_days->pretty;
    }

    if ($timeout_msg) {
        progress("there was a problem with timeout: $timeout_msg");
    }

    my $msg;
    ## no critic (ProhibitCascadingIfElse);
    if ($timeout_msg) {
        $msg = "3;UNKNOWN - $timeout_msg";
    } elsif ($stderr) {
        $msg = "3;UNKNOWN - monitor failed: $stderr";
    } elsif ($seconds_behind eq q{}) {
        $msg = '3;UNKNOWN - check script returned an empty string';
    } elsif ($seconds_behind < $CRIT_SECS) {
        $msg = "2;CRITICAL - $distro is behind $days_behind";
    } elsif ($seconds_behind < $WARN_SECS) {
        $msg = "1;WARNING - $distro is behind $days_behind";
    } else {
        $msg = '0;OK - Valid_Until is within system limits';
    }

    # Send NCSA (passive check) to Nagios monitors.
    my $test_name;
    if ($security) {
        $test_name = "m-security-${distro}";
    } else {
        $test_name = "m-${distro}";
    }

    foreach my $nagios_server_ip (@NAGIOS_SERVER_IPS) {
        my $psv_msg = "$HOSTNAME;$test_name;$msg";
        progress("passive message: $psv_msg");

        my $out;
        #<<<  perltidy please ignore this
        my @command = (
            'send_nsca',
            '-d', q{;},
            '-H', $nagios_server_ip,
            );
        #>>>
        ## no critic (Variables::ProhibitPunctuationVars);
        IPC::Run::run \@command, \$psv_msg, \$out or carp $?;
        progress("send_ncsa command output: $out");
    }

    return;
}

sub process {
    my ($distr_array_aref) = @_;

    my @distro_array = @{$distr_array_aref};

    foreach my $distro_var (@distro_array) {
        process_distro($distro_var);
    }

    return;
}

# We look at these distributions.
my @distro_array_ordinary = qw(
  oldstable-proposed-updates
  oldstable-updates
  stable-proposed-updates
  stable-updates
  testing
  testing-proposed-updates
  testing-updates
  sid
);

# We look at these debian-security distributions.
my @debian_security_distro_array = qw(
  oldstable
  stable
  testing
);

if ($security) {
    process(\@debian_security_distro_array);
} else {
    process(\@distro_array_ordinary);
}

__END__

=for stopwords distro inrelease nagios repo debian InRelease Nagios
=for stopwords Srinivas Puttagunta Lewenberg

=head1 NAME

monitor-distro-inrelease-valid-nagios - Debian repo passive monitoring

=head1 SYNOPSIS

B<monitor-distro-inrelease-valid-nagios> -r debian-mirror-endpoint

=head1 DESCRIPTION

Given a Debian endpoint this script iterate over the following
distributions looking for out-of-date InRelease files:

    oldstable-proposed-updates
    oldstable-updates
    stable-proposed-updates
    stable-updates
    testing
    testing-proposed-updates
    testing-updates
    testing-backports
    sid

For each of the above distributions the check-repo-inrelease-valid check
script is called which returns the number of seconds until the InRelease
file for this distribution expires. The expiration date is based on the
Valid-Until field in the InRelease file. If the number of seconds until
expiration is less than four days a Nagios WARN alert is sent; if the
number of seconds is less than two days a Nagios CRITICAL alert is sent.

The Nagios servers the alerts are sent is hard-coded in this script.

=head1 OPTIONS

=over 4

=item B<-w|--warning> B<seconds>

If the number of seconds until expiration is less than B<seconds> generate a Nagios
WARN alert. Default is 4 days.

=item B<-c|--critical> B<seconds>

If the number of seconds until expiration is less than B<seconds> generate a Nagios
CRITICAL alert. Default is 2 days.

=item B<-v|--verbose>

Show more information while running.

=item B<-r|--repository>

The repository to monitor. This should be the full URL to the repository,
for example, "http://debian-repo1.stanford.edu/debian". If omitted
defaults to "http://HOSTNAME.stanford.edu/debian" where HOSTNAME is the
short version of the host the script runs on.

=item B<-t|--timeout>

The number of seconds to wait for the check script
F<check-repo-inrelease-valid> to return. We have seen cases where
F<check-repo-inrelease-valid> never returns, so it is important to put
a limit to how long we are willing to wait. Default is 10 seconds.

=back

=head1 EXAMPLES

    monitor-distro-inrelease-valid-nagios -r http://debian.stanford.edu/debian
    monitor-distro-inrelease-valid-nagios -r http://debian.stanford.edu/debian-security

=head1 SEE ALSO

debmirror(1)

=head1 AUTHOR

Srinivas Puttagunta <psr123@stanford.edu>.
Adam H. Lewenberg <adamhl@stanford.edu>.

=cut
