#!/usr/bin/perl
#
# horsewatcher-feed -- Feed parsed log data to the horsewatcher via syslog.
#
# Written by Huaqing Zheng <morpheus@stanford.edu>
# Documentation and updates by Russ Allbery <rra@stanford.edu>
# Copyright 2010, 2012, 2013
#     The Board of Trustees of the Leland Stanford Junior University

##############################################################################
# Modules and declarations
##############################################################################

use 5.006;
use strict;
use warnings;

use Date::Calc qw(Parse_Date This_Year);
use Date::Parse;
use File::Tail ();
use Getopt::Long qw(GetOptions);
use POSIX qw(strftime);
use Sys::Syslog qw(:DEFAULT setlogsock);
use Sys::Hostname qw(hostname);

# Work around a bug in Sys::Syslog where it sends an extra nul at the end
# of each message, which destroys rsyslog's ability to parse it.
package Sys::Syslog;
sub _syslog_send_socket {
    my ($buf) = @_;
    chop $buf;
    return syswrite(SYSLOG, $buf, length($buf));
}
package main;

# The name of the horsewatcher server.
our $LOGHOST = 'horsewatcher.stanford.edu';

# The location of the Cyrus IMAPD log file to tail.
our $LOG_CYRUS = '/var/log/mail';

# The location of the Kerberos log file to tail.
our $LOG_KERBEROS = '/var/log/auth';

# The location of the RADIUS log file to tail.
our $LOG_RADIUS = '/var/www/logs/radiusall.log';

# The location of the SMTP log file to tail.
our $LOG_SMTP = '/var/log/mail';

# The location of the SSLVPN log file to tail.
our $LOG_SSLVPN = '/var/www/logs/sslvpnpre.log';

# The location of the generic syslog file.
our $LOG_SYSLOG = '/var/log/messages';

# The location of the VPN log file to tail.
our $LOG_VPN = '/var/www/logs/su-vpn-post.log';

# The location of the WebLogin log file to tail.
our $LOG_WEBLOGIN = '/var/log/apache2/error.log';

# The location of the Zimbra log file to tail.
our $LOG_ZIMBRA = '/opt/zimbra/log/mailbox.log';

##############################################################################
# Service-specific log parsing
##############################################################################

# Each of the functions in this section read a particular type of log and
# send logins to the horsewatcher server via Sys::Syslog.  The format for the
# login lines should be:
# SERVICE;YYYY-MM-DD:HH:MM:SS;SUNETID;IP;COMMENT;COMMENT

sub feed_cyrus {
    my $log = File::Tail->new($LOG_CYRUS);

    my ($line, $re, @date, $timestr, @result);
    $re = qr{
        ^(\w+\ +\d+)\ (\S+).*?
        \ login:\ (?:\S+)\ \[(\S+)\]\ (\w+)\ (\S+)
    }x;
    while (defined($line = $log->read)) {
        if ($line =~ $re) {
            @date = Parse_Date("$1 " . This_Year());
            $timestr = sprintf("%04d-%02d-%02d:%s", @date, $2);
            my @result = ($timestr, $4, $3, 'imaps', $5);
            syslog('info|local0', join(';', 'cyrus', @result));
        }
    }
}

sub feed_kerberos {
    my $log = File::Tail->new($LOG_KERBEROS);

    my ($line, $re1, $re2, $re3, @date, $timestr, @result, $success);
    $re1 = qr{
        ^(\w+\ +\d+)\ (\S+).*?
        (AS-REQ)\ (\S+)\@stanford\.edu\ from\ IPv\d:(\S+)\ for\ (\S+)
    }x;
    $re2 = qr{
        ^(\w+\ +\d+)\ (\S+).*?AS-REQ\ authtime:.*?starttime:.*?endtime:
    }x;
    $re3 = qr{
        ^(\w+\ +\d+)\ (\S+).*?sending\ \d+\ bytes\ to\ IPv\d:
    }x;
    while (defined($line = $log->read)) {
        $success = 0;
        if ($line =~ $re1) {
            my ($date, $time, $user, $ip, $type, $requester)
              = ($1, $2, $4, $5, $3, $6);

            # Set aside anything that's not a user, root, or admin principal.
            next if $user !~ m{^\w+(/root|/admin)?$};

            @date = Parse_Date("$date " . This_Year());
            $timestr = sprintf("%04d-%02d-%02d:%s", @date, $time);
            @result = ($timestr, $user, $ip, $type, $requester);
            while (defined($line = $log->read)) {
                if ($line =~ $re2) {
                    $success = 1;
                } elsif ($line =~ $re3) {
                    syslog('info|local0', join(';', 'kerberos', @result))
                        if $success;
                    last;
                }
            }
        }
    }
}

sub feed_radius {
    my $log = File::Tail->new($LOG_RADIUS);

    my ($line, $re);
    $re = qr{
        ^(\S+)\s*?
        User:\ (\w+)\#.*?Client\ friendly\ name:\ ([^\#]+)\#.*?
        Client\ IP:\ ([^\#]+)\#.*?Policy\ Used:\ ([^\#]+)
    }x;
    while (defined($line = $log->read)) {
        if ($line =~ $re) {
            my $timestr = strftime ("%Y-%m-%d:%T",
                                    localtime (str2time ($1)));
            my @result = ($timestr, $2, $4, $3, $5);
            syslog ('info|local0', join (';', 'radius', @result));
        }
    }
}

sub feed_smtp {
    my $log = File::Tail->new($LOG_SMTP);

    my ($line, $re, @date, $timestr, @result);
    $re = qr{
        ^(\w+\ +\d+)\ (\S+).*?
        client=[^\[]+\[(\S+?)\],\ sasl_method=([^,]+),\ sasl_username=(\w+)
    }x;
    while (defined($line = $log->read)) {
        if ($line =~ $re) {
            @date = Parse_Date("$1 " . This_Year());
            $timestr = sprintf("%04d-%02d-%02d:%s", @date, $2);
            my @result = ($timestr, $5, $3, 'smtp', $4);
            syslog('info|local0', join(';', 'smtp', @result));
        }
    }
}

sub feed_syslog {
    my $log = File::Tail->new($LOG_SYSLOG);

    my ($line, $re, @date, $timestr, @result);
    $re = qr{
        ^(\w+\ +\d+)\ (\S+).*?
        Accepted\ (\S+)\ for\ (\S+)\ from\ (\S+)\ port
    }x;
    while (defined($line = $log->read)) {
        if ($line =~ $re) {
            @date = Parse_Date("$1 " . This_Year());
            $timestr = sprintf("%04d-%02d-%02d:%s", @date, $2);
            my @result = ($timestr, $4, $5, 'ssh', $3);
            syslog('info|local0', join(';', 'server', @result));
        }
    }
}

sub feed_vpn {
    my $log = File::Tail->new($LOG_VPN);

    my $re = qr{
        ^(\S+)\s.*?
        Group\ =\ ([^,]+),\ Username\ =\ ([^,]+),\ IP\ =\ ([^,]+),
        \ Assigned\ private\ IP\ address\ (\S+)\ to\ remote\ user
    }x;

    my $line;
    while (defined ($line = $log->read)) {
        if ($line =~ $re) {
            my $timestr = strftime ("%Y-%m-%d:%T",
                                    localtime (str2time ($1)));
            my @result = ($timestr, $3, $4, $2, $5);
            syslog ('info|local0', join (';', 'vpn', @result));
        }
    }
}

sub feed_sslvpn {
    my $log = File::Tail->new($LOG_SSLVPN);

    my ($line, $re, @date, $timestr, @result);
    $re = qr{
        ^(\w+\ +\d+|\S+)\ \S+.*?
        \ Juniper:\ id=firewall
        \ time=\"(\d\d\d\d-\d\d-\d\d)\ (\d\d:\d\d:\d\d)\".*?
        \ user=([^\ ]+).*?
        \ realm=(\"([^\"]+)\")?.*?
        \ src=([^\ ]+).*?
        \ msg=\"(AUT\d\d\d\d\d):
        \ (Primary\ authentication\ successful|Login\ succeeded)
    }x;
    while (defined($line = $log->read)) {
        if ($line =~ $re) {
            $timestr = "$2:$3";
            @result = ($timestr, $4, $7, $8, $5);
            syslog('info|local0', join(';', 'sslvpn', @result));
        }
    }
}

sub feed_weblogin {
    my $log = File::Tail->new($LOG_WEBLOGIN);

    my ($line, $re, @date, $timestr, @result);
    $re = qr{
        ^\[\w+\ (\w+\ \d+)\ (\S+)\ (\d+)\].*?
        clientIp=(\S+)\ server=krb5:webauth\/([^@]+)@\S+\ url=\S+\ user=(\S+)
        \ rtt=\S+\ sa=\S+(?:\ ro=\S+)?\ login=(\S+)\ .*lec=0$
    }x;
    while (defined($line = $log->read)) {
        if ($line =~ $re) {
            my ($date, $time, $year, $user, $ip, $login, $requester)
              = ($1, $2, $3, $6, $4, $7, $5);

            # Set aside anything that's not a user, root, or admin principal.
            next if $user !~ m{^\w+(/root|/admin)?$};

            @date = Parse_Date("$date $year");
            $timestr = sprintf("%04d-%02d-%02d:%s", @date, $time);
            @result = ($timestr, $user, $ip, $login, $requester);
            syslog('info|local0', join(';', 'weblogin', @result));
        }
    }
}

sub feed_zimbra {
    my $log = File::Tail->new($LOG_ZIMBRA);

    my ($line, $re1, $re2, $timestr, @result);
    $re1 = qr{^(\S+)\ ([^,]+) .*?
        \[name=(\w+)\@stanford\.edu;ip=([^;]+);[^\]]*\]\ (imap|pop)\ -
        \ user\ (?:\S+)\ authenticated,\ mechanism=(\S+)
    }x;
    $re2 = qr{^(\S+)\ (\S+) .*?
        name=([a-z0-9]+)\@stanford\.edu;
        (?:aname=([a-z0-9]+)\@stanford\.edu;)?
        (?:[^;]+;)*
        ip=([^;]+);
        ua=(?:ZimbraWebClient\ -\ )?([^;]+);\]
        \ (?:
             soap\ -\ (?:\(batch\)\ GetMailboxMetadataRequest|SyncRequest)
            |dav\ -\ DavServlet
        )
    }x;
    while (defined($line = $log->read)) {
        if ($line =~ $re1) {
            $timestr = "$1:$2";
            my @result = ($timestr, $3, $4, $5, $6);
            syslog('info|local0', join(';', 'zimbra', @result));
        } elsif ($line =~ $re2) {
            $timestr = "$1:$2";
            my @result = ($timestr, $4 || $3, $5, 'https', $6);
            syslog('info|local0', join(';', 'zimbra', @result));
        }
    }
}

##############################################################################
# Main routine
##############################################################################

# Always flush output.
$| = 1;

# Clean up the script name for error reporting.
my $fullpath = $0;
$0 =~ s%.*/%%;

# Parse the argument list.
Getopt::Long::config ('bundling', 'no_ignore_case');
my ($help, $method);
$method = 'inet';
GetOptions ('h|help'     => \$help,
            's|server=s' => \$LOGHOST,
            'm|method=s' => \$method);
if ($help) {
    print "Feeding myself to perldoc, please wait...\n";
    exec ('perldoc', '-t', $fullpath);
}

# Get the current short hostname.
my $host = hostname;
$host =~ s/\..*$//;

# Open the syslog pipe.
my $log = $method;
if ($method eq 'udp' or $method eq 'tcp') {
    setlogsock ($method, $LOGHOST);
    $log .= " to $LOGHOST";
} elsif ($method eq 'inet') {
    $Sys::Syslog::host = $LOGHOST;
    setlogsock ($method);
    $log .= " to $LOGHOST";
} else {
    setlogsock ($method);
}
openlog($host, 'ndelay')
    or die "$0: unable to open syslog method $log: $!\n";

my $command = shift;
if    ($command eq 'cyrus')    { feed_cyrus    }
elsif ($command eq 'kerberos') { feed_kerberos }
elsif ($command eq 'radius')   { feed_radius   }
elsif ($command eq 'smtp')     { feed_smtp     }
elsif ($command eq 'sslvpn')   { feed_sslvpn   }
elsif ($command eq 'syslog')   { feed_syslog   }
elsif ($command eq 'vpn')      { feed_vpn      }
elsif ($command eq 'weblogin') { feed_weblogin }
elsif ($command eq 'zimbra')   { feed_zimbra   }
else   { die "$0: unknown log type $command\n" }

closelog();

exit 0;

##############################################################################
# Documentation
##############################################################################

=for stopwords
Huaqing IMAP IP KDC TCP UDP WebLogin Zheng Zimbra cyrus horsewatcher
horsewatcher-feed hostname kerberos nul smtp sslvpn timestamp username vpn
weblogin zimbra Allbery

=head1 NAME

horsewatcher-feed - Feed parsed log data to the horsewatcher via syslog

=head1 SYNOPSIS

B<horsewatcher-feed> [B<-h>] [B<-m> I<method>] [B<-s> I<server>]
    (cyrus | kerberos | radius | smtp | sslvpn | syslog | vpn | weblogin
     | zimbra)

=head1 DESCRIPTION

B<horsewatcher-feed> follows (using File::Tail) the log file specified by
the provided option, watches for authentications using built-in parsing
regexes, and feeds the results to the horsewatcher server via the syslog
protocol.  By default, TCP is tried first, and then B<horsewatcher-feed>
falls back on UDP.  The syslog method can be overridden with the B<-m>
option.

The message logged will have the short hostname (without a trailing
domain) as the program, and then the following data in order separated by
semicolons: log type (matching the first non-option argument), timestamp
as YYYY-MM-DD:HH:MM:SS, username or Kerberos principal name without any
realm, IP address of the client authenticating, sub-protocol (such as IMAP
for Zimbra), and then any additional service-specific data about this
authentication.

=head1 OPTIONS

=over 4

=item B<-h>, B<--help>

Print out this documentation (which is done simply by feeding the script
to C<perldoc -t>).

=item B<-m> I<method>, B<--method>=I<method>

Use the syslog method I<method> instead of the default of C<inet>.  For a
list of the possible syslog methods, see L<Sys::Syslog>.  See L<BUGS>
below for some cautions.

=item B<-s> I<server>, B<--server>=I<server>

Send the log messages to I<server> instead of the default horsewatcher
server.

=back

=head1 EXAMPLES

Parse the WebLogin log and send data to the default server:

    horsewatcher-feed weblogin

Parse the Kerberos KDC log and send data to syslog.example.com, using TCP:

    horsewatcher-feed -m tcp -s syslog.example.com kerberos

=head1 BUGS

In order to work around a bug in Sys::Syslog, B<horsewatcher-feed> strips
a spurious trailing nul when sending syslog messages to a socket.  This
fixes problems with TCP and should be okay with UDP, but may (probably
will) break with UNIX domain sockets.

=head1 AUTHORS

Huaqing Zheng and Russ Allbery <rra@stanford.edu>.

=head1 SEE ALSO

L<Sys::Syslog>

=cut
