#!/usr/bin/perl
#
# okill -- Kill processes that have been running longer than certain hours.
#
# Written by Susan Feng <sfeng@stanford.edu>
# Copyright 2005, 2006, 2013
#     The Board of Trustees of the Leland Stanford Junior University

##############################################################################
# Modules and declarations
##############################################################################

use 5.006;
use strict;
use warnings;
use vars qw($ID $PS $LIMIT);

use Getopt::Long qw(GetOptions);

# The syntax of this ps command should work across all SYSV platforms.
$PS = 'ps -e -o pid,ppid,user,etime,time,pcpu,comm';

# Default to killing jobs running for longer than 24 hours.
$LIMIT = 24;

##############################################################################
# Helper routines
##############################################################################

# Calculate the number of minutes from the [dd-]hh:mm:ss format used by the
# etime and cputime fields of ps.
sub calc_minutes {
    my $time = shift;

    $time =~ /^((\d+)-)?((\d+):)?(\d+):(\d+)$/;
    my ($days, $hours, $mins, $secs) = ($2, $4, $5, $6);

    $hours += $days * 24 if $days;
    $mins += $hours * 60 if $hours;
    return $mins;
}

##############################################################################
# Main routine
##############################################################################

# Always flush output and fix executable name.
$| = 1;
$0 =~ s%^.*/%%;

# Define the default signal type and limits.
my $signal = 1;
my ($etime_lim, $time_lim, $pcpu_lim) = (0, 0, 0);

# Parse command-line options.
Getopt::Long::config ('bundling');
my ($help, $dryrun, $verbose, $parent);
GetOptions ('h|help'     => \$help,
            'e|etime=i'  => \$etime_lim,
            't|time=i'   => \$time_lim,
            'c|pcpu=i'   => \$pcpu_lim,
            'n|dryrun'   => \$dryrun,
            'p|parent'   => \$parent,
            's|signal=i' => \$signal,
            'verbose'    => \$verbose) or exit 1;
if ($help) {
    print "Feeding myself to perldoc, please wait....\n";
    exec ('perldoc', '-t', $0);
}

die "Usage: $0: okill [-etime <minutes>] [-n] [-p] procname\n"
    unless my $procname = shift @ARGV;

# Always be verbose if we are doing a dry run.
$verbose = 1 if $dryrun;

# Open up the process list and start reaping the old ones.
open (PROCS, "$PS |") or die "$0: unable to run $PS\n";
my ($pid, $ppid, $user, $etime, $time, $pcpu, $comm);
while (<PROCS>) {
    s/^\s+//;
    ($pid, $ppid, $user, $etime, $time, $pcpu, $comm) = split /\s+/;
    next unless $comm =~ /^(.*\/)?$procname$/;

    $etime = calc_minutes($etime);
    $time = calc_minutes($time);
    if ($etime >= $etime_lim && $time >= $time_lim && $pcpu >= $pcpu_lim) {
        if ($verbose) {
            print "killing $pid age $etime minutes\n";
            print "killing parent $ppid age $etime minutes\n" if $parent;
        }
        unless ($dryrun) {
            kill $signal, $pid unless $pid == 1;
            kill $signal, $ppid if $parent and $ppid != 1;
        }
    }
}
close (PROCS);

##############################################################################
# Documentation
##############################################################################

=for stopwords
Feng HUP Huaqing Zheng cpu --etime etime init -np okill pcpu pkill

=head1 NAME

okill - Kill processes that have exceed certain limits

=head1 SYNOPSIS

B<okill> [B<-h>]

B<okill> [B<--etime> I<minutes>] [B<-np>] [B<-s> I<signal>] <process name>

=head1 DESCRIPTION

okill takes a process name on the command line and finds all the processes
with that name and kills those exceeding certain limits.  This script
should run all System V compliant operating systems.  The init process
will never be killed.  The current supported limits are etime,
time, and pcpu.  See the OPTIONS section to see what these limits mean.

If more than one limit is supplied, then all the limits have to be
exceeded to avoid unnecessary killing.  If no limits are supplied, then
this script will kill all processes matching the command name (like the
pkill behavior).

=head1 OPTIONS

=over 4

=item B<-c> I<pcpu>, B<--pcpu>=I<percentage>

Kill matching processes that exceed the supplied cpu utilization.  pcpu
stands for the cumulative CPU time divided by the elapsed time.

=item B<-e> I<minutes>, B<--etime>=I<minutes>

Kill matching processes that have been running longer than the supplied
number of minutes.  etime stands for elapsed time.

=item B<-h>, B<--help>

Print out this documentation (which is done simply by feeding the script to
C<perldoc -t>).

=item B<-n>

Dry run only. Print out what would be killed. If [B<-p>] is given will
also print the parent process id.  Implies [B<--verbose>].

=item B<-p>, B<--parent>

Kill the parent process as well.  This is fairly dangerous.

=item B<-s> I<signal>, B<--signal>=I<signal>

By default, B<okill> sends signal 1 (HUP).  To specify a different signal,
use this flag.  The signal must be a number.  (15, TERM, is a common
choice.)

=item B<-t> I<minutes>, B<--time>=I<minutes>

Kill matching processes that exceed the supplied cumulative CPU time.

=item B<--verbose>

Verbose.  Print out what would be killed.

=back

=head1 AUTHORS

Susan Feng <sfeng@stanford.edu> and
Huaqing Zheng <morpheus@stanford.edu>.

=cut
