#!/usr/bin/env perl
##
##----------------------------------------------------------------------------
## History:
## Date      OPR    Who         Reason
## --------  ------ ----------  --------------------------------------------
## 10/31/02  45017  Sherbert    Can't do subroutines in a csh script :(
## 11/01/02  45017  Sherbert    Discovered psgrep and added two parameters
##                              Disabled -user parameter however
## 11/02/02  45017  Sherbert    opusgrep allows specification of user
## 04/21/03  45017  Sherbert    clean up for code review
## 05/20/03  45017  Sherbert    "kill -9" is evil (SIGKILL) and so I rm it
## 09/08/03  45017  Sherbert    Required to add ".pl" to file names
## 09/09/03  45017  Sherbert    Added linesize param
## 12/23/03  49783  Sontag      Do not call perl scripts directly in Share
## 02/24/04  50465  Sontag      Change location of id
## 10/05/04  51890  Sherbert    Improve help about kill
## 11/11/04  51890  Sherbert    Verify -t ^[asp] otherwise gives opusgrep usage
##
##----------------------------------------------------------------------------
##
## pscluster:
## 
## - Provides the ability to list and/or kill servers (or all OPUS
## processes) the user has running on specified nodes.  This became
## necessary under Tru64 which would allow you to start processes under
## the cluster name, but then you had to be on the member nodes to a)
## see what was running, or to b) kill processes.
## - If used with "-kill", OPUS processes running on any specified 
## nodes will be killed (if user has permission to do so).
## - Wide terminals work best so more process status information can be seen.
##
## a) Use ssh or rsh, depending on opus_login.csh setting
##    # Set location of rsh or ssh and rcp/scp
##    setenv OPUS_REMOTE_SHELL /usr/bin/rsh  #<path to ssh or rsh>
##    setenv OPUS_REMOTE_COPY  /usr/bin/rcp  #<path to rcp or scp>
##
## b) Standardize the ps so it will work on any unix (I hope)
##    Looks like we'll have to switch/case on 'uname -s' result
##    Looks like this will have to happen with each rsh or ssh for 
##    maximum flexibility
##    $! perl command line  very important: must use /usr/local/bin/perl 
##       to be compatible between SunOS and OSF1... 
##       hope Linux setup the same...
##
## c) Generalize the cluster members, making an input list possible
##
## d) Generalize the user so it is specifiable but defaults to current user
##    i)  whoami is NOT on every system :(
##    ii) use 'id' command to determine current user:
##        A) $USER may be vague when 'su' is used
##        B) whoami not in every path
##
## e) Allow user to search for OPUS server processes, OPUS processes running
##    in any path, OPUS processes running in specified path OR a
##    combination of OPUS processes running in any path and OPUS servers.  
##
## Usage:
##  $0 [node1[,node2,...]] [-kill]
##  $0 [-node node1[,node2,...]] [-kill] [-type=a|s|p]
##
## Since I seem to be having a problem running ps as a backtick command, 
## I have discovered a psgrep command from the Perl Cookbook.  I modified 
## it to set up the ps command for Tru64, SunOS, and Linux and it seems 
## to work.  However, I am not sure what about our OS-es are STScI-specific,
## so users still may have to tweak scripts if their systems are set-up
## completely differently.
##
##----------------------------------------------------------------------------


if (exists ($ENV{"OPUS_REMOTE_SHELL"})) {
    $OPUS_REMOTE_SHELL = ($ENV{"OPUS_REMOTE_SHELL"});
} else {
    $OPUS_REMOTE_SHELL = "/usr/bin/rsh";
}

use Getopt::Long;
($opt_nodes, $opt_user, $opt_kill, $opt_type, $opt_path, $opt_linesize, 
 $opt_help, $opt_verbose) = (0) x 8;
GetOptions qw{
    nodes=s
    user=s
    kill
    type=s
    path=s
    linesize=i
    help
    verbose
    };
## apparently "-nodes string" is same as "-n string" 
## whereas   "--nodes string" would be different

if ( $opt_help ) {
    PrintUsage();
}
if ( $opt_verbose ) {
    $verbose = 1;
} else {
    $verbose = 0;
}

if ( $opt_user && $opt_kill ) {
    print STDERR "ERROR: cannot specify kill option AND name a user" ;
    PrintUsage();
}

## check for missing values as far as one is able to do so...
## "-nodes -user l" will generate a failure but
## "-nodes l -user" will default to current user
if ( $opt_nodes =~ /^-/ ||
     $opt_user  =~ /^-/ ||
     $opt_kill  =~ /^-/ ||
     $opt_type  =~ /^-/
    ) {
    print STDERR "ERROR: parameter name must not begin with'-'.  ";
    print STDERR "Is a value missing on the command line?\n";
    PrintUsage();
}


##
## Set up default values
##

## Default to long linesize for ps output
if ( ! $opt_linesize ) {
    $linesize = 115;
} else {
    $linesize = $opt_linesize;
}

## Default to current node
$thisNode = `/bin/uname -n | /bin/sed 's/\\..*//'` ;
chomp $thisNode;
if ( ! $opt_nodes ) { 
    $nodeList = $thisNode ;
} else {
    $nodeList = $opt_nodes ;
}

## Get rid of commas in nodeList, if any
@nodeList = split /,/, $nodeList ;

## Default to no killing
if ( ! $opt_kill ) {
        $killMode = 0        ## false
} else {
        $killMode = 1        ## true
}

## Determine user name...
## Default to current user (found between 1st set of parens)
if ( ! $opt_user ) {
    ## Hardocding absolute paths to command has pros and cons for multi-OS
    ## use, but we'll leave it so for now.
    ## id exists in /usr/bin on Solaris,RH8,RH9,Fedora,AIX
    $me = `/usr/bin/id|/bin/cut -d \\( -f 2|/bin/cut -d \\) -f 1` ;
} else {
    $me = $opt_user ;
}
chomp ($me);

## Determine name of path in which to search for processes, if any
my $pattern  = "";
if ( ! $opt_path ) {
    $pathname = "" ;
} else {
    $pathname = $opt_path ;
}

## Determine what types of processes to look for 
## Default to servers
if      ( ! $opt_type || $opt_type =~ /^s/i ) {        
    ## Discourage combining -type s and -p <name>
    if  ( $pathname ne "" ) {
        print STDERR "\n*** WARNING: servers are NOT path-specific: ";
        print STDERR "ignoring path specification $pathname.\n"
    }
    ## Look for servers
    $pattern = qq{"/opus_[a-z]*_serv/"} ;
} elsif ( $opt_type =~ /^p/i ) {        
    ## Look for processes in [optionally specified] path
    ## $pattern = qq{"/$pathname\.path/"} ; ## matches classpath :(
    $pattern = qq{"/${pathname}[.]path/"} ;
} elsif ( $opt_type =~ /^a/i ) {        
    ## Look for both processes in any path AND servers
    ## Discourage combining -type a and -p <name>
    if  ( $pathname ne "" ) {
        print STDERR "\n*** WARNING: servers are NOT path-specific: ";
        print STDERR "ignoring path specification $pathname.\n"
    }
    $pattern = qq{"/opus_[a-z]*_serv/ || /.*[.]path/"} ;  
    ## Note: 2nd '.' above is part of pattern...
} else {
    ##
    ## opt_type is not a|s|p
    ##
    print STDERR "\n", "*"x79, "\n";
    print STDERR "*** ERROR: type must be s, p, or a \n";
    print STDERR "***   (representing servers, path, or all ) ";
    print STDERR "\n", "*"x79, "\n";
    exit 1;
}


## trace statements
if ( $verbose ) {
    print STDOUT "" ;
    print STDOUT "nodeList: @nodeList killMode: $killMode user (me): $me. \n";
    print STDOUT "" ;
}


## opusgrep.pl command determines the OS-appropriate ps command
## which is then run on each node.  It requires specification of user.
## If -kill is specified, create a list of PIDs then run kill on each.
foreach $node ( @nodeList ) {
    chomp $node;

    # ## print banner
    # print "\n";
    # print STDOUT "-" x 3 ." ".$node." "."-" x (80 - 5 - length $node)."\n";

    ## trace statements
    if ( $verbose ) {
        print STDOUT "Checking: node is .$node. and thisNode is ";
        print STDOUT ".$thisNode. \n";
    }

    ## run the ps command on each node
    my $cmd;
    if ($node eq $thisNode) {
       ## print banner
        my $local = " (local)";
        &PrintBanner ($node,$local);
       ## Determine command to use
        $cmd = "perl `which opusgrep.pl` $pattern $me" ;
        if ( $verbose ) { 
            print STDOUT "Sending to local node $node: $cmd \n"; 
        }
    } else {
       ## print banner
        my $local = "";
        &PrintBanner ($node,$local);
       ## Determine command to use
        $psme = qq|'perl `which opusgrep.pl` $pattern $me'| ;
        $cmd = "$OPUS_REMOTE_SHELL $node $psme " ;
        if ( $verbose ) { 
            print "Sending to $node: $cmd \n";
        }
    }

    my @lines = `$cmd`  ;
    ## The number that is the first parameter below is the length of the line 
    ## of the output.  I like more information than less because there is then
    ## more chance of unique identifiers, but if user regularly operates in 
    ## smaller windows, they may want to twiddle the number below.  86 was ok.
    &PrintLines($linesize, \@lines);
    # print STDOUT @lines;

    if ($killMode) {
        ## if we specify -type a and -kill, then we *really*
        ## ought to kill all the PROCESSES first THEN the servers
        ## The way to kill processes off all specified nodes first and then
        ## kill servers is to run the command twice, first for processes
        ## then for servers.  Wrapper script can enforce this rule.

        ## Sometimes something restarts processes which then restart
        ## servers.  This script then has to be run again after user
        ## determines why processes are being restarted.

        my ($total_pids, $server_pids_ref, $process_pids_ref) = 
            &DiffPIDs(\@lines, $verbose);
        ## If 0 PIDs returned by DiffPIDs, 
        ## tell the user there is nothing to kill
        if ( ! $total_pids ) {
            print STDOUT "No PIDs to kill. \n";
        } else {
            ## Otherwise kill any PROCESSES first
            if ( $process_pids_ref ) {
                &KillUsNow($node,$process_pids_ref,$verbose);
            } 
            ## Then return to kill any SERVERS 
            if ( $server_pids_ref ) {
                &KillUsNow($node,$server_pids_ref,$verbose);
            } 
        }   ## end else total_PIDs
    }   ## end if killMode


}   ## end foreach NODE
## print end banner
print STDOUT "-" x 80 . "\n\n" ;

exit;

##############################################################################
##############################################################################


##############################################################################
## Figure out how to print PS output in limited amount of space...
## PrintLines():
##   Input: Number of characters per line of ps output
##          Reference to an array containing ps output
##  Output: STDOUT
##############################################################################
sub PrintLines () {
    my ($len, $ps_output_ref) = @_;

    ## Regenerate array passed by calling routine
    my @ps_output = @{$ps_output_ref};
    
    foreach my $line (@ps_output) {
        chomp ( $line );
        print STDOUT substr($line,0,$len), "\n";
    }
}


##############################################################################
## DiffPIDs:
##  Input:  Reference to an array containing output from ps command
##  Output: Total PIDs in ps output, 
##          Reference to the array of server PIDs to be killed
##          Reference to the array of process PIDs to be killed
##
## If user specifies -type a and -kill, then we *really*
## ought to kill all the PROCESSES first THEN the servers
##
## This subroutine generates two arrays of PIDs that are to be sent to the 
## kill command and a scalar with the total number of PIDs sent to the 
## subroutine.  It checks to see if the list of PIDs is empty and, if so, 
## informs the calling routine by sending 0 values that can be checked.
##
##############################################################################
sub DiffPIDs () {
    ## Differentiate between the server PIDs and process PIDs so they 
    ## can be stopped separately

    my ($ps_output_ref, $verbose) = @_;
    my @pids;

    ## Regenerate array passed by calling routine
    my @ps_output = @{$ps_output_ref};
    my $total_pids = 0; 
    my (@server_pids, @process_pids);

    # if ( $verbose ) {
    #   print "\nDiffPIDs()'s interpretation of ps output... \n"; 
    # }
    foreach my $line (@ps_output) {
        # if ( $verbose ) { print join '\t',  split / +/, $line; }
        ## Remove the leading white space which messes up fewer-digit PIDs
        $line =~ s/^\s+//;
        my ($pid, @the_rest) = split /\s+/, $line;

        ## The first line contains the labels.  Labels are chars not digits.
        ## PIDs are digits.  We want a good assurance that a PID is in pid

        if ( $pid =~ /[0-9]+/ ) {
            if ( $verbose ) { print "Found pid $pid\n"; }
            foreach my $rem_string ( @the_rest ) {
                if ( $rem_string =~ /opus_[a-z]*_serv/ ) {
                    if ( $verbose ) {
                        print "$rem_string matched server pattern\n";
                    }
                    ## this PID is for a server 
                    push @server_pids, $pid;
                } elsif ( $rem_string =~ /\.path/ ) {
                    if ( $verbose ) {
                        print "$rem_string matched process pattern\n";
                    }
                    ## this PID is for a process
                    push @process_pids, $pid;
                }
            }
        }
        $total_pids = scalar (@server_pids) + scalar (@process_pids);
    }
    if ( $verbose ) { print "Returning $total_pids total pids "; }
    if ( $total_pids > 0 ) {
        if ( $verbose ) { print " in one or two arrays\n"; }
        return ($total_pids,\@server_pids,\@process_pids);
    } else {
        if ( $verbose ) { print " and arrays, just 2 more zero values\n"; }
        return (0,0,0);
    }

}

##############################################################################
## KillUsNow()
##  Input: Reference to an array (of PIDs)
##         Verbosity setting (0 or 1)
## Output: 
##############################################################################
sub KillUsNow () {
    my ( $node, $pids_ref, $verbose ) = @_;
    my @pids;

    if ( $pids_ref ) {
        ## Regenerate array passed by calling routine
        @pids = @{$pids_ref};
        if ( $verbose ) {
            print "Have PIDs. Have reconstituted $pids_ref (pids_ref)  ";
            print "into array pids: @pids.\n";
        }
    } else {
        ## Somehow an array reference value of 0 was sent?
        die "\nERROR: No PIDs in array.  \n";
    }

    ## Count number of PIDs passed
    my $pids_cnt = scalar (@pids);

    ## Send kill the KILL signal (9) for PIDs passed in
    ## do NOT use "kill -9" (SIGKILL): it is evil and causes problems
    if ( scalar(@pids) ) {
        print "\n*** killing @pids \n";
        if ($node ne $thisNode) {
            $cmd = "$OPUS_REMOTE_SHELL $node /bin/kill @pids" ;
        } else {
            $cmd = "/bin/kill @pids" ;
        }
        #if ( $verbose ) { 
            print "Sending to $node: $cmd \n";
        #}
        my $kill_out = `$cmd`;
    }

}

##############################################################################
## Print Usage statement
##############################################################################
sub PrintUsage
{
    use File::Basename;
    
    my ($name, $unixpath, $ext) = fileparse($0, '.pl' );
    
    $name = $name . $ext;
    print STDERR<<EOF; 

$name 
    [-node node1[,node2,...]] 
    [-user user] 
    [-type s|p|a] 
    [-path path] 
    [-kill] 
    [-help]
    [-verbose]
All arguments are optional.  
    [-user user] and [-kill] are mutually exclusive.
    [-type server or all] and [-path pathname] are mutually exclusive.
 
    -node node1,...    
        List of nodes on which to look for processes
        Nodes should be separated by commas: no spaces.
        Default is current node.
    -user user 
        Name of user for whose processes to search.
        Defaults to current user.  
    -type s|p|a    
        s (servers)  Implies pattern opus_[a-z]*_serv.
        p (pathname) Implies processes running in any path 
                     unless [-path pathname] is used.
        a (all)      Implies processes in all paths and the servers.
        Defaults to servers.
    -path pathname 
        Look only for processes in specified path. Ext ".path" unnecessary.
        No default.
    -linesize N
        Limit the output of unix ps command to N-characters. 
        N < 80 is not useful.  More characters give more information.
        Defaults to 115 characters.  
    -kill      
        Indicates that processes found matching pattern should be stopped.
        Only works for processes owned by the current user. 
        Note that NO file cleanup occurs with pscluster and -kill option.
        (see instead -kill in "psopus.pl -help")
    -help
        Print this usage statement.
    -verbose
        Run script verbosely.
 
Examples: 
    $name
        List OPUS servers owned by current user on current node
        (Useful for making sure kill worked.)
    $name -kill  -t a
        Kill all OPUS servers AND OPUS processes owned by current user running 
        on current node  (see also -kill in "psopus.pl -help")
        Note that NO file cleanup occurs with pscluster and -kill option.
    $name -kill 
        Kill all OPUS servers owned by current user running on current node
        If there are OPUS PROCESSES running, this will NOT WORK WELL as the 
        processes will likely restart the servers.
        Note that NO file cleanup occurs with pscluster and -kill option.
    $name -kill -t p
        Kill all processes running in OPUS paths owned by current user running 
        on current node.  (Do not kill servers).
        Note that NO file cleanup occurs with pscluster and -kill option.
    $name -kill -t p -path trodden
        Kill all processes running in OPUS path named "trodden", owned by 
        current user, running on current node.  (Do not kill servers).
        (Do not kill processes in any other OPUS path.)
        Note that NO file cleanup occurs with pscluster and -kill option.
    $name -u polk -n fido,max,spot  
        List OPUS server processes owned by user polk on nodes fido, max, 
        and spot
    $name -n fido,max,spot -type a 
        List all OPUS processes owned by current user on nodes fido, max, 
        and spot 
    $name -t s -l 80
        Limit width of list of OPUS server information to 80-characters
    $name -l 80
        Same: Limit width of list of OPUS server information to 80-characters

EOF
    exit 1;
}

##############################################################################
## Print Banner to make logical separations
##############################################################################
sub PrintBanner {
    my ( $node, $local ) = @_;

    my $len1 = length $node;
    my $len2 = length $local;
    print STDOUT "\n" . 
                 "-" x 3      . " " .
                 $node.$local . " " .
                 "-" x (80 - 5 - $len1 - $len2 )."\n";
    return;
}


