#!/usr/bin/perl
#
# check_hardware_health -- Nagios plugin to perform hardware checks
#
# Written by Jason Bishop <jason.bishop@stanford.edu>
# Copyright 2012, 2013
#     The Board of Trustees of the Leland Stanford Junior University
#
# This program is free software; you may redistribute it and/or modify it
# under the same terms as Perl itself.

use 5.006;
use strict;
use warnings;

# Path to the binary used to get internal storage status on Dell systems
our $OMREPORT = '/opt/dell/srvadmin/bin/omreport';

# Path to the binary used to get internal storage status on HP systems
our $HPACUCLI = '/opt/ss-scripting-toolkit/utilities/hpacucli/hpacucli';

# Report a configuration or runtime error and exit.  We do this via stdout in
# order to satisfy the Nagios plugin output requirements, but also report a
# more conventional error via stderr in case people are calling this outside
# of Nagios.
sub error {
    print 'HARDWARE UNKNOWN - ', join ('', @_), "\n";
    warn "$0: ", join ('', @_), "\n";
    exit 3;
}

# Exit if we don't have the required tool installed.
if (! -x $OMREPORT && ! -x $HPACUCLI) {
    print "HARDWARE UNKNOWN - neither omreport or hpacucli utility installed\n";
    exit 3;
}

my $status = 0;
my $perfdata = '';
my $output = '';

if (-x $OMREPORT) {
# Gather the data from omreport.
    my $vdiskid;
    open (OMREPORT, '-|', $OMREPORT, 'storage', 'vdisk')
        or error ("cannot run $OMREPORT");
    while (<OMREPORT>) {
        chomp;
        if (/ID\s+:\s+(\d+)$/) {
            $vdiskid = $1;
            $perfdata .= "; " if $perfdata;
            $perfdata .= "VD$vdiskid = ";
        }
        if (/State\s+:(.*)/) {
            my $state = $1;
            $perfdata .= "$state ";
            if ($state =~ /Degraded/) {
                $output .= " $vdiskid (DEGRADED)";
                $status = 2;
            } elsif ($state !~ /Ready/) {
                $output .= " $vdiskid ($state)";
                $status = 3 unless $status;
            }
            if (/Layout\s+:\s+(.*)/) {
                $perfdata .= "$1 ";
            }
            if (/^Size\s+:\s+(.*)\(/) {
                $perfdata .= "$1 ";
            }
            if (/^Device\sName\s+:\s+(.*)/) {
                $perfdata .= "$1 ";
            }
            if (/Status\s+:\s+(.*)/) {
                $perfdata .= "$1 ";
            }
        }
    }
    close OMREPORT;

    # ug, no way to get physical disk status except to trudge through
    # all of the controllers
    if (-x $OMREPORT) {
        #Gather the data from omreport.
        my @controllers = ();
        open (OMREPORT, '-|', $OMREPORT, 'storage', 'controller')
            or error ("cannot run $OMREPORT");
        while (<OMREPORT>) {
            chomp;
            if (/ID\s+:\s+(\d+)$/) {
                push @controllers, $1;
            }
        }
        close OMREPORT;

        my $pdiskid = '';
        foreach my $controller (@controllers) {
            open (OMREPORT, '-|', $OMREPORT, 'storage', 'pdisk', "controller=$controller")
                or error ("cannot run $OMREPORT");
            while (<OMREPORT>) {
                chomp;
                if (/ID\s+:\s+(.*)$/) {
                    $pdiskid = $1;
                }
                if (/Failure\sPredicted\s+:\s+(.*)/) {
                    if ($1 eq "Yes") {
                        $output .= " $pdiskid (PHYSICAL DISK FAILURE PREDICTED)";
                        $status = 2;
                        $perfdata .= "(Failure Predicted) ";
                    }
                }
                if (/^Status\s+:\s+(.*)/) {
                    $perfdata .= "; " if $perfdata;
                    $perfdata .= "PD$pdiskid = $1 ";
                }

            }
            close OMREPORT;
        }
    }
}

if (-x $HPACUCLI) {
    # Gather the data from hpacucli
    my @controllers = ();
    open (HPACUCLI, '-|', $HPACUCLI, 'controller', 'all', 'show')
        or error ("cannot run $HPACUCLI");
    while (<HPACUCLI>) {
        chomp;
        if (/^.*?\sin\sSlot\s(\d+)\s+/) {
            push @controllers, $1;
        }
    }
    close HPACUCLI;

    foreach my $controller (@controllers) {
        my $currentcontroller = 'not set';
        my $currentlogicaldrive = 'not set';
        open (HPACUCLI, '-|', $HPACUCLI, 'controller', "slot=$controller", 'logicaldrive', 'all', 'show')
            or error ("cannot run $HPACUCLI");
        while (<HPACUCLI>) {
            chomp;
            if (/^.*?\sin\sSlot\s(\d+)\s+/) {
                $currentcontroller=$1
            }
            if (/^\s+array\s+(\w+)\s+/) {
                $currentlogicaldrive=$1
            }
            if (/^\s+logicaldrive\s+(\d+)\s+\((.*),(.*),\s+(.*)\)/) {
                $perfdata .= "; " if $perfdata;
                $perfdata .= "logicaldrive $1 $2 $3 $4";
                if ($4 ne "OK") {
                    $status = 2;
                    $output .= " LD$1 (DEGRADED)";
                }
            }
        }
        close HPACUCLI;
    }
}

# Output the results in Nagios format.
if ($status == 0) {
    print "HARDWARE OK | $perfdata\n";
} elsif ($status == 3) {
    print "HARDWARE UNKNOWN -$output | $perfdata\n";
} elsif ($status == 2) {
    print "HARDWARE WARNING -$output | $perfdata\n";
} else {
    print "HARDWARE ERROR -$output | $perfdata\n";
}
exit $status;

__END__

=for stopwords
Nagios util

=head1 NAME

check_hardware_health - Nagios plugin to perform hardware checks

B<check_hardware_health>

=head1 DESCRIPTION

This Nagios check is intended to encapsulate various checks for local
hardware health that require additional vendor-specific software to be
installed.  Currently, the only check it does is for the RAID status on
Dell systems.

=head1 EXIT STATUS

B<check_hardware_health> follows the standard Nagios exit status
requirements.  This means that it will exit with status 0 if there are no
problems or with status 2 if there are hardware errors found.  For other
errors, such as invalid syntax, inability to parse command output,
or requisite software not installed
B<check_hardware_health> will exit with status 3.

=head1 BUGS

The standard Nagios plugin command-line options are not supported.

=head1 CAVEATS

This script does not use the Nagios util library or any of the defaults
that it provides, which makes it somewhat deficient as a Nagios plugin.

=head1 AUTHOR

Jason Bishop <jason.bishop@stanford.edu>

=cut
