#!/usr/bin/perl
#
# cron-distribute - Find upcoming cron jobs and send to the cron servers.
#
# Written by Jon Robertson <jonrober@stanford.edu>
# Updated by Adam H. Lewenberg <adamhl@stanford.edu>
# Copyright 2008-2012, 2022 Board of Trustees, Leland Stanford Jr. University

## no critic (CodeLayout::ProhibitParensWithBuiltins);
## no critic (Modules::RequireNoMatchVarsWithUseEnglish);

#############################################################################
# Modules and declarations
#############################################################################

use strict;
use warnings;
use autodie;

use Data::Dumper;
use DBI;
use English;
use Getopt::Long::Descriptive;
use POSIX qw(strftime);
use Readonly;
use Schedule::Cron;

use Stanford::Orange::Util qw(run_command_improved);

# use Stanford::Infrared::General qw(get_db_setup);
use Stanford::WWWScheduler qw(
    call_remctl
    db_connect
    read_password
    read_config
);

###################################################################$$$

### CONFIGURATION
my %CONFIG;
my $CONFIG_FILE = '/etc/www-scheduler/www-scheduler-config.yaml';
my $VERBOSE     = 0;
my $DRYRUN      = 0;

# Logging destination. If $LOGFH is undefined send logging to standard
# output. $LOGFH is only used when logging to an actual file.
my $LOGFH;
my $LOGFILE = '/var/log/crondist.log';


my $MAXLOAD;
my @CRONSERVERS = ();
my $SLEEPTIME_MINUTES;

# key is in the hash %TESTCRONIDS iff key appears in the list
# cron-distribute:test-cron-ids in the configuration file.
my $TESTMODE    = 0;
my %TESTCRONIDS = ();

# Programs to run.
my $LBCDCLIENT = '/usr/bin/lbcdclient';

# File to contain the PID of the crondist job.
my $PIDFILE = '/var/run/crondist.pid';

# Keep track of how long this script has been running
my $START_TIME = time();

#############################################################################
# Misc routines
#############################################################################

sub config_setup {
    my ($config_file) = @_;

    progress("reading configuration file $config_file...");
    my %config = read_config($config_file);

    my $cron_distribute = $config{'cron-distribute'};

    if (!$cron_distribute) {
        my $msg = "no cron-distribute section in configuration file $config_file";
        exit_with_error($msg);
    }

    $MAXLOAD           = $cron_distribute->{'maxload'};
    @CRONSERVERS       = @{ $cron_distribute->{'cronservers'} };
    $SLEEPTIME_MINUTES = $cron_distribute->{'sleeptime-minutes'};

    if (exists($cron_distribute->{'test-mode'}) &&
        ($cron_distribute->{'test-mode'} =~ m{true}ixsm)
       ) {
        progress('cron-distribute is in test mode');
        $TESTMODE = 1;

        if (exists($cron_distribute->{'test-cron-ids'})) {
            my @test_cronids = @{ $cron_distribute->{'test-cron-ids'} };
            for my $cronid (@test_cronids) {
                $TESTCRONIDS{$cronid} = 1;
            }
            my $num_test_cron_ids = scalar(%TESTCRONIDS);
            progress("there are $num_test_cron_ids test cron ids");
        }
    } else {
        progress('cron-distribute is NOT in test mode');
    }

    if (!@CRONSERVERS) {
        my $msg = 'no CRONSERVERS defined in configuration file';
        exit_with_error($msg);
    }

    ## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars);
    progress("\$MAXLOAD: $MAXLOAD");
    progress("\$SLEEPTIME_MINUTES: $SLEEPTIME_MINUTES");
    progress('\@CRONSERVERS: ' . join(q{ }, @CRONSERVERS));

    progress('finished reading configuration file');
    return %config;
}

# Given a server name, run lbcdclient against that server, parse the result,
# and return the current load for that machine.
#
# The code has changed in this section due to the changed output of
# lbcdclient in version 3.5.x of lbcd.
sub get_server_load {
    my ($server) = @_;

    my @cmd       = ($LBCDCLIENT, $server);
    my $cmd_fmted = join(q{ }, @cmd);

    log_message("running command '$cmd_fmted'");
    my ($stdout, $stderr, $rc) = run_command_improved(@cmd);

    # Because this function is called in a detached process the
    # $rc is not returned correctly. So, instead of using $rc as an
    # indicator of success we use $stderr.
    if ($stderr) {
        my $msg = "command '$cmd_fmted' failed: $stderr";
        log_message($msg);
        exit_with_error($msg);
    }

    # LBCDCLIENT output looks (in part) like this:
    #   PROTOCOL 3
    #
    #   MACHINE STATUS:
    #   l1           = 0.10
    #   l5           = 0.39
    #   l15          = 0.44
    #   current_time = 1649182388   (2022-04-05 11:13:08)
    #   boot_time    = 1647917931   (2022-03-21 19:58:51)
    #   ... more ...
    #
    # Get the l1, l5, and l15 values.
    my $rxms = 'MACHINE[ ]STATUS:';
    my $rx1  = '\s*l1\s*=\s*(\S+)';
    my $rx5  = '\s*l5\s*=\s*(\S+)';
    my $rx15 = '\s*l15\s*=\s*(\S+)';
    my $rx   = qr/$rxms $rx1 $rx5 $rx15/xsm;

    my ($one, $five, $fifteen) = ($stdout =~ m{$rx}xsm);

    # Convert to percentages:
    ## no critic (ValuesAndExpressions::ProhibitMagicNumbers);
    if ($one) {
        return $one * 100;
    } else {
        return $MAXLOAD * 100;
    }
}

# Find the next server to send a job to, by weighting the current load.
sub next_server {
    my @weights = ();

    ## no critic (ValuesAndExpressions::ProhibitMagicNumbers);
    foreach my $server (@CRONSERVERS) {
        my $load = get_server_load($server);
        progress("server load for $server: $load");
        my $weighting = $MAXLOAD * 100 - $load;
        next if $weighting <= 0;
        for my $i (0 .. $weighting - 1) {
            push(@weights, $server);
        }
    }

    my $server = $weights[rand @weights];
    if ($server) {
        return $server;
    } else {
        return q{};
    }
}

# Given the identifying key for a database entry, pass it off to a server
# for execution.
sub dispatch_job {
    my ($key) = @_;

    progress("($key) considering dispatching job");

    my $server = next_server();

    # If we are in test mode we skip dispatching this job if $key does not
    # appear in the hash %TESTCRONIDS.
    my $skip_dispatch = 0;
    if ($TESTMODE && (!exists($TESTCRONIDS{$key}))) {
        $skip_dispatch = 1;
    }

    if ($server) {
        if ($DRYRUN) {
            my $msg = "($key) dry-run: would have made remctl call "
              . "'cron runjob $key' to server $server";
            log_message($msg);
        } else {
            if ($skip_dispatch) {
                log_message("($key) skipping dispatching to $server as in testmode and " .
                         "$key not in cron-distribute:test-cron-ids");
            } else {
                log_message("($key) dispatching job to server $server");
                my ($stdout, $stderr, $child_error) = call_remctl($server, q{}, \%CONFIG, 'cron', 'runjob', $key);
                if ($stderr) {
                    my $msg = "($key) remctl call to $server failed: $stderr (rc: $child_error)";
                    log_message($msg);
                } else {
                    progress("($key) remctl finished without error");
                }
            }
        }
    } else {
        log_message("($key) no server found to send this job to; maybe they are all too loaded?");
    }

    return;
}

# Given references to a current array of jobs and an old array of job ids,
# check to see if there are any old jobs that are no longer in the mix.
# We don't care about new jobs -- those will be flagged just because they
# will have a newer modified time.
sub jobs_deleted {
    my ($newids, $oldids) = @_;
    my (@deleted, %jobids);

    progress('entering jobs_deleted');

    # If there are no old ids, then nothing could have deleted any of them.
    if (!@{$oldids}) {
        return ();
    }

    # Push current job ids into a hash and check old ids against them.
    foreach my $id (@{$newids}) {
        $jobids{$id} = 1;
    }
    foreach my $id (@{$oldids}) {
        if (!$jobids{$id}) {
            push(@deleted, $id);
        }
    }

    # No old ids that weren't in newids.
    return @deleted;
}

#############################################################################
# Database routines
#############################################################################

# Given an optional modified time, return all cronjobs, or all cronjobs
# modified since that time.  Return in an array ordered by the time of last
# modification with the job modified earliest first in the array.
sub get_cronjobs {
    my ($modified) = @_;

    progress('entering get_cronjobs');

    # Set up the database source.
    progress('setting up database connection...');
    my $dbh = db_connect(\%CONFIG);

    my ($sth, $query);
    if ($modified) {
        $query = q{SELECT * FROM cronjobs WHERE cr_active='Yes' AND }
          . 'cr_modified > ? ORDER BY cr_modified';
        $sth = $dbh->prepare($query);
        $sth->execute($modified);
    } else {
        $query = q{SELECT * FROM cronjobs WHERE cr_active='Yes' }
          . 'ORDER BY cr_modified';
        $sth = $dbh->prepare($query);
        $sth->execute();
    }

    my (@jobs);
    while (my $ref = $sth->fetchrow_hashref()) {
        push(@jobs, $ref);
    }

    my $number_jobs = scalar(@jobs);
    progress("found $number_jobs cronjobs");

    progress('closing database connection');
    $dbh->disconnect();

    return @jobs;
}

# Find and return a list of all job ids in the database.
sub get_jobids {
    progress('entering get_jobids');

    # Set up the database source.
    progress('setting up database connection...');
    my $dbh = db_connect(\%CONFIG);

    my ($sth, $query);
    $query = q{SELECT * FROM cronjobs WHERE cr_active='Yes'};
    $sth   = $dbh->prepare($query);
    $sth->execute();

    my (@jobs);
    while (my $ref = $sth->fetchrow_hashref()) {
        push(@jobs, $ref->{'cr_id'});
    }

    my $number_jobs = scalar(@jobs);
    progress("found $number_jobs job ids");

    progress('closing database connection');
    $dbh->disconnect();

    return @jobs;
}

#############################################################################
# Cron routines
#############################################################################

# Given a single row from the cronjob table, reformat it as a cron entry and
#  add it to the scheduler.
sub add_cron {
    my ($cron, %entry) = @_;

    my @crontab;
    foreach my $period (
        'cr_minutes', 'cr_hours', 'cr_days_month',
        'cr_months',  'cr_days_week'
      )
    {
        if ($entry{$period} eq q{}) {
            $entry{$period} = q{*};
        }

        # If a user has requested both * and a number (or more), fix the
        #  entry to */<num>.  If they've requested multiple numbers, assume
        #  the * was a mistake and remove it.  Long term, maybe want to
        #  change the db field from a set to a varchar and move this to the
        #  request script with greater validation there, but right now just
        #  want it to work.
        if (($entry{$period} =~ m{[*]}xsm) && ($entry{$period} =~ m{,}xsm)) {
            my @nums = split(m{,}xsm, $entry{$period});
            if (@nums > 2) {
                shift(@nums);
                $entry{$period} = join(q{,}, @nums);
            } else {
                $entry{$period} = join(q{/}, @nums);
            }
        }

        # Add it to the crontab entry.
        push(@crontab, $entry{$period});
    }

    my $crontab = join(q{ }, @crontab);
    my $id      = $entry{'cr_id'};
    if (my $index = $cron->check_entry($id)) {
        my $existing = $cron->get_entry($index);
        $existing->{'time'} = $crontab;
        $cron->update_entry($index, $existing);
    } else {
        $cron->add_entry($crontab, $id);
    }

    return;
}

sub add_prefix_to_lines {
    my ($msg, $prefix) = @_;

    my @lines = split(/\n/xsm, $msg);

    my @results = ();
    foreach my $line (@lines) {
        push(@results, $prefix . $line) ;
    }

    return join("\n", @results);
}


sub log_message {
    my ($msg) = @_;

    my $datetime = strftime '%Y-%m-%d %H:%M:%S', localtime;

    my $msg_formatted = add_prefix_to_lines($msg, "[$datetime] ");
    $msg_formatted   .= "\n";

    if ($LOGFH) {
        print {$LOGFH} $msg_formatted ;
    } else {
        print $msg_formatted;
    }

    return;
}

sub progress {
    my ($msg) = @_;
    if ($VERBOSE) {
        log_message("progress: $msg");
    }
    return;
}

sub exit_with_error {
    my ($msg) = @_;
    print "ERROR: $msg\n";
    exit(1);
}

# If there is a PIDFILE this means that there is an earlier version of the
# scheduler running. Kill that process.
sub kill_earlier_scheduler {
    # Kill any earlier run of the scheduler.
    if (-e $PIDFILE) {
        progress("killing earlier PID file $PIDFILE");
        open(my $PID, '<', $PIDFILE);
        my $pid = <$PID>;
        chomp($pid);
        close($PID);
        unlink($PIDFILE);
        kill('KILL', $pid);
    } else {
        progress("no PID file $PIDFILE so no cleanup needed");
    }

    return;
}

sub prepare_log_fh {
    my ($logfile) = @_ ;
    open(my $log_fh, '>>', $logfile);
    return $log_fh;
}

#############################################################################
# Main routine
#############################################################################

# Get errors and output in the same order.
local $OUTPUT_AUTOFLUSH = 0;

# Clean up the path name.
my $fullpath = $PROGRAM_NAME;
$PROGRAM_NAME =~ s{^.*/}{}xsm;

#######################################################################
# Parse command-line options.
my ($opt, $usage) = describe_options(
    'my-program %o <some-arg>',
    ['manual',        'print manual page and exit', { 'shortcircuit' => 1 }],
    ['dry-run|n',     'dry-run mode'],
    ['verbose|v',     'print extra stuff'],
    ['log-to-stdout', 'send log messages to standard output'],
    ['help|h',        'print usage message and exit', { 'shortcircuit' => 1 }],
);

if ($opt->manual) {
    pod2usage(-verbose => 2);
    exit 0;
}

if ($opt->help()) {
    print $usage->text;
    exit;
}

if ($opt->verbose()) {
    $VERBOSE = 1;
}

# Read configuration file and define some global variables.
%CONFIG = config_setup($CONFIG_FILE);

if (!$opt->log_to_stdout()) {
    $LOGFH = prepare_log_fh($LOGFILE);
    progress("LOGGING: $LOGFILE");

} else {
    progress('LOGGING: standard output');
}

if ($opt->dry_run()) {
    $DRYRUN = 1;
} else {
    $DRYRUN = 0;
}
progress("DRYRUN:  $DRYRUN");

# Run this code on script ending.
END {
    if ($LOGFH) {
        close($LOGFH);
    }
}

#######################################################################

my $mostrecent = 0;
my @oldids     = ();
my $cron       = Schedule::Cron->new(
    \&dispatch_job,
    processprefix => $PROGRAM_NAME
);

progress('about to enter infinite while loop');
while (1) {

    # Get the jobs that have been updated since the last known update, and
    # any jobs deleted since the last update.
    my @jobs    = get_cronjobs($mostrecent);
    my @newids  = get_jobids();
    my @deleted = jobs_deleted(\@newids, \@oldids);

    @oldids = @newids;

    if (@jobs || @deleted) {

        progress('found jobs to add or delete...');

        # Kill any earlier run of the scheduler.
        kill_earlier_scheduler();

        # Remove any jobs deleted from the database.
        foreach my $id (@deleted) {
            log_message('==================');
            log_message("Deleting job: $id");
            log_message('==================');

            my $index = $cron->check_entry($id);
            if (defined $index) {
                $cron->delete_entry($index);
            }
        }

        # Now go through and add the new cron entries. The function
        # add_cron will, if the job already exists in $cron, do an update
        # rather than add.
        #
        # Since @jobs is ordered so that the most recently modified jobs
        # are LAST in the array, at the end of this loop $mostrecent will
        # have the time of the most recently modified job.
        foreach my $job (@jobs) {
            log_message('>>>>>>>>>>>>>>>>>>');
            log_message('Altering job: ' . Dumper($job));
            log_message('<<<<<<<<<<<<<<<<<<');
            $mostrecent = $job->{'cr_modified'};
            add_cron($cron, %{$job});
        }

        # Run the job.
        log_message(Dumper($cron->list_entries()) . "\n" . localtime(time()));
        my $child_pid = $cron->run(detach => 1, pid_file => $PIDFILE);
        progress("cron running with PID $child_pid");
    } else {
        progress('no jobs found to add or delete...');
    }

    # Sleep before doing another update. The actual cron jobs continue
    # running in the detached process -- we just are pausing before
    # checking for any updates in the database.
    progress("going to sleep for $SLEEPTIME_MINUTES minutes...");
    sleep(60 * $SLEEPTIME_MINUTES);
    progress('waking up from sleep');
}

exit(0);

__END__

##############################################################################
# Documentation
##############################################################################

=head1 NAME

cron-distribute - Distribute Stanford cron service jobs to servers via remctl

=head1 SYNOPSIS

cron-distribute [B<--help>]

=head1 DESCRIPTION

This is a part of the Stanford cron service.  It checks a database of
cron events for user-requested jobs and their times.  They are added to
Schedule::Cron and when they come due, they are farmed out to one of a
pool of cron servers based on load.  The farming is done via remctl,
where a remctl command is sent to a server telling it to run a specific
job via database record id.

=head1 OPTIONS

B<--help>

Prints the perldoc information (this document) for the script.

=head1 AUTHORS

Jon Robertson <jonrober@stanford.edu>

=cut
