#!/usr/bin/env perl
################################################################################
#
# Name: omgSpy.pl
#
# purpose:
#          Want to create a command-line script to format OSF file entries
#          the way I format copy/pastes from OMG GUI.
#
# description:
#     This gives a nice human-readable output more akin to the OMG.
#     And you can redirect the output to a file then sort and egrep out only what 
#     you want to look at.  Much better for cutting the information down to size.  
#     (Also, I suggest using egrep, because then you can include the column titles 
#     in your output by using it's 'OR' option, i.e.
#     egrep 'DATA|ipppssoot' blah.out
#     
#     It also needs to be put someplace everyone can get to and I was hoping to 
#     port it to python.  And this one is still pumping out development trace 
#     statements.  It was based on read_osf_fm_cmdline.pl for which --help works, 
#     but for some reason omgSpy.pl --help is not working?  But here's how it works:
#     
#     This script will format a list of OSFs to show:
#         dataset_name data_id Column_stati command DCF and Date
#     
#     USAGE: omgSpy.pl [-dir <OPUS_path>|-path <OPUS_path>] [-stage <stage_file_name>]
#     
#     WHERE: OPUS_path is the name of the OPUS path file where the OSFs are
#            <stage_file_name> is the name of the file where the stages
#                 are defined
#     you have a choice of -dir or -path because you can use this tool with or 
#     without OPUS servers running.  If OPUS servers ARE running, then use -path, 
#     if servers are down, use -dir.
#     Again because I hate to type, the -stage <stage> parameter is optional.  
#     You do NOT need to use it IF your PATH name is the same as the shortcut for 
#     the pipeline stage file.  For example, I use science.path for my path 
#     controlled by the opus_science_pipeline.stage.  And "science" is the shortcut 
#     for "opus_science_pipeline.stage" (see below).
#     
#     NOTE: you MUST be in an OPUS environment to use this script because 
#           in the PATH it needs:
#                   osfile_stretch_file 
#                   osf_test
#                   convertTimeStamp 
#
# Example:
#     omgSpy.pl -dir $osfs/bismark - stage cdbs 
#     omgSpy.pl -path ingest 
#     omgSpy.pl -dir science -stage opus_science_pipeline.stage
#     omgSpy.pl -path emma -stage science 
#
# Pipeline stage file names (shortcut):
#     ast_pipeline.stage             (ast)
#     edr_pipeline.stage             (edr)
#     fgs_pipeline.stage             (fgs)
#     fof_pipeline.stage             (fof)
#     g2f_pipeline.stage             (g2f)
#     ingest_pipeline.stage          (ingest) (could add reingest)
#     otfr_pipeline.stage            (otfr)
#     otfr_science_pipeline.stage    (otfr_sci OR otfsci) (could add the idr* versions?)
#     opus_science_pipeline.stage    (science OR otfr_sci)
#     opus_tv_pipeline.stage         (tv)
#     pacor_pipeline.stage           (pacor)
#     pdr_pipeline.stage             (pdf)
#
# Input:   a) OPUS path name
#          b) stage name
#
# Returns: 
#   OSFs w/o dates formatted under stage names
#
# History:
# mm/dd/yy PR     who        what
# -------- ------ ---------- ----------------------------------------------------------
# 11/03/10 -      Sherbert   directory unreliable, switch to osf_test ** start servers **?
# 11/04/10 -      Sherbert   Add time w/convertTimeStamp 
# 09/28/12 72180  Sherbert   Put in tree so anyone can use
#
################################################################################

 ## set up external routines
 ## unshift prepends LIST to the front of the array
 my @more_dirs = split /:/, $ENV{PATH}; 

 unshift @INC, @more_dirs;
# print "# Arguments are: @ARGV \n" ;

#print "INC is @INC \n";        ## trace
 require 'gen_regex_fmfile.pl';
 require 'printmsg.pl';
 require 'print_utils.pl' ;
 use File::Basename;
 use Getopt::Long;
 my $ok = GetOptions( \%Options, "dir=s", "path=s", "stage=s" ) ;
# print "# ok is $ok \n" ;

#GetOptions qw{
#   dir=s
#   path=s
#   stage=s
#   };
# print "# Options are " ;
# printHash( %Options ) ;

 # These used to be set for me, why aren't they anymore?
 my $opt_path  = $Options{'path'} ;
 my $opt_dir   = $Options{'dir'} ;
 my $opt_stage = $Options{'stage'} ;

# print "# opt_path  is $opt_path  \n" ;
# print "# opt_dir   is $opt_dir   \n" ;
# print "# opt_stage is $opt_stage \n" ;

 # Must specify path or a directory
 if ( ! $opt_path && ! $opt_dir ) {
    print "## Did NOT find -path AND Did NOT find -dir on command line? \n" ;
    PrintUsage();
    exit 1 ;
 }
 # if -stage NOT provided, then path or ${dir:t} must be same string 
 # Being lazy, I would like the code to figure that out
 if ( ! $opt_stage ) {
    if ( $opt_path ) {
        # Just in case user puts '.path' on -path...
        my ( $opath, $ext ) = split( /\./, $opt_path ) ;
        $opt_stage = $opath ;
    } else {    
        # MUST be $opt_dir, let us get the last part of path 
        my $len = length( $opt_dir ) ;
        my $lastChar = substr( $opt_dir, $len, 1 ) ;
        print "# lastChar of $opt_dir is $lastChar \n" ;
        if ( $lastChar =~ qq|/| ) {
            print "# lastChar must have been a slash...chop it off \n" ;
            chop $opt_dir ;
        }
        my ( $directory, $lastPart ) = $opt_dir =~ m/(.*\/)(.*)$/;
        $opt_stage = $lastPart ;
        print "# I figured out that stage should be $opt_stage \n" ;
    }
 }

 ## Input: location of OSFs (path name)
 ## Should be able to use OPUS tools to find this
 my @in_osfs ;
 if ( $opt_dir ) {
    my $files_arr_ref=get_osfs_fm_dir( "$opt_dir" );  # old way reads unreliable dir
    @in_osfs = @$files_arr_ref;
 } elsif ( $opt_path ) { 
    my $osfs_arr_ref=get_osfs_fm_server( "$opt_path" );   # new way runs osf_test BUT I cheat here
    @in_osfs = @$osfs_arr_ref;
 }

 ## Input: stage file name/location
 ## Should be able to use OPUS tools to find this
 if ( $opt_stage eq "science" ) {
    $opt_stage = "opus_science_pipeline.stage"
 } elsif ( $opt_stage eq "ingest" ) {
    $opt_stage = "ingest_pipeline.stage"
 } elsif ( $opt_stage eq "tv" ) {
    $opt_stage = "opus_tv_pipeline.stage"
 } elsif ( $opt_stage =~ /otfr_sci/ ) {
    $opt_stage = "opus_science_pipeline.stage"
 } elsif ( $opt_stage =~ /otfsci/ ) {
    $opt_stage = "opus_science_pipeline.stage"
 } elsif ( $opt_stage =~ /idrsci/ ) {
    $opt_stage = "opus_science_pipeline.stage"
 } elsif ( $opt_stage eq "otfr" ) {
    $opt_stage = "otfr_pipeline.stage"
 } elsif ( $opt_stage eq "idrdev" ) {
    $opt_stage = "otfr_pipeline.stage"
 } elsif ( $opt_stage eq "idrotf" ) {
    $opt_stage = "otfr_pipeline.stage"
 } elsif ( $opt_stage eq "ast" ) {
    $opt_stage = "ast_pipeline.stage"
 } elsif ( $opt_stage eq "edr" ) {
    $opt_stage = "edr_pipeline.stage"
 } elsif ( $opt_stage eq "fof" ) {
    $opt_stage = "fof_pipeline.stage"
 } elsif ( $opt_stage eq "fgs" ) {
    $opt_stage = "fgs_pipeline.stage"
 } elsif ( $opt_stage eq "pdr" ) {
    $opt_stage = "pdr_pipeline.stage"
 } elsif ( $opt_stage eq "cdbs" ) {
    $opt_stage = "cdbs.stage"
 } elsif ( $opt_stage eq "pacor" ) {
    $opt_stage = "pacor_pipeline.stage"
 } elsif ( $opt_stage eq "fuse" ) {
    $opt_stage = "fuse_pipeline.stage"
 } elsif ( $opt_stage eq "g2f" ) {
    $opt_stage = "g2f_pipeline.stage"
 }

 if ( $opt_stage !~ /.stage$/ ) {
    $opt_stage = $opt_stage . ".stage";
 }
 my $stage_file = resolve($opt_stage);
 my $stage_arr_ref = grab_stages($stage_file);
 my @stages = @$stage_arr_ref;

 my $num_stages = $#stages + 1;                     ## Will need later
#print "Number of stages is $num_stages \n";        ## trace
 ##
 ## Print Labels
 ##
 ##=================================================##
 ## ## Print name of stage file (eg. CDBS)
 my ($filename, $unixpath, $ext) = fileparse($stage_file, '.stage');
 print uc($filename), "\n";
 ##=================================================##
 ## ## Print other column IDs
 ## This format must match the one below where OSF bits being printed
 ## BOTH should be from opus.env
 printf ("%-40s %-7s ", "DATA_SET_NAME", "DATA_ID" );
 ##=================================================##
 ## ## Print stage column IDs
 print (join " ", @stages);
 ##=================================================##
 ## ## Print other column IDs
 ## This format must match the one below where OSF bits being printed
 printf (" %-7s", "COMMAND" );
 printf (" %-3s", "DCF" );
 printf (" %-20s", "Date" );
 print "\n" ;
 ##=================================================##
 
 ## All this does is provide a regex that allows me to ID and parse (I hope) the OSF file bits
 my $osf_regex = GenRegEx("opus.env", "OSF");
 ## Safeguard against empty returns
 if ( ! length($osf_regex) ) {
    PrintMsg ("E","No regular expression generated for PSTAT (regex is $osf_regex)");
    exit 2 ;
 }
#print "osf_regex is $osf_regex \n";

 my $osfcount = 0;
 my @osfs;
 foreach my $osf ( @in_osfs ) {
    ## Delete stranded OSFs -- or rather, just print a list of files
    ## matching regex  ## Used to say PSTATS, no longer sure what this is.
    if ( $osf =~ /$osf_regex/ ) {
#       print STDERR "Found $osf\n";   ## trace
        push (@osfs, $osf);
        $osfcount++;
    }
 }
 ##
 ## Print out data_set_name and data_id, then stage values, then command
 ##
 foreach my $osf ( @osfs ) {
    my @chunks = split /(-|\.)/, $osf;           ## How to split with multiple field separators
    my @regex_chunks = split /(-|\.)/, $osf_regex;
        ## There may be a way to use above as print format
        ## but not nec worth trouble finding out
#   print "## osf chunked into: ";
#   print (join"===", @chunks);
#   print "\n";

    ## $chunks[0]  is timestamp in hex format.  Can perl handle this?
    my $hexTime = $chunks[0] ;
    ## convertTimeStamp, an unofficial tool, will return yyyymmddHHMMSS 
    ## which we will likely want to take apart and re-assemble into a more 
    ## readable form like yyyy-mm-dd_HH:MM:SS though it takes a lot of chars.
    ## Try to make sure it is available
    my $testMe = `which convertTimeStamp >& /dev/null` ;
    if ( $? > 0 ) { 
        print "Failed to find convertTimeStamp in PATH ! \n" ;
        exit 9 ;
    }
    my $yyyymmddhhmmss = `convertTimeStamp $hexTime` ;
#   print "## yyyymmddhhmmss = $yyyymmddhhmmss\n" ;
    my $year = substr( $yyyymmddhhmmss,  0, 4 ) ;
    my $mo   = substr( $yyyymmddhhmmss,  4, 2 ) ;
    my $day  = substr( $yyyymmddhhmmss,  6, 2 ) ;
    my $hour = substr( $yyyymmddhhmmss,  8, 2 ) ;
    my $mins = substr( $yyyymmddhhmmss, 10, 2 ) ;
    my $secs = substr( $yyyymmddhhmmss, 12, 2 ) ;
    my $date = $year . '-' . $mo . '-' . $day . '_' . $hour . ':' . $mins . ':' . $secs ;
#   print "## year-mo-day_hour:mins:secs is $date \n" ;

    ## $chunks[4]  is data_set_name w/ extra underscores, hmmm
    my $data_set_name = $chunks[4];
    my $last_dbl_us = index $data_set_name, "__";
    $data_set_name = substr ($data_set_name,0,$last_dbl_us);

    ## $chunks[6]  is data_id
    my $data_id = $chunks[6];

    # WHY IS THIS FORMAT NOT FROM opus.env?
    printf ("%-40s %-7s ", $data_set_name, $data_id );

    ## $chunks[2]  is status (stages)
    my $filled_stages = substr($chunks[2], 0, $num_stages);
    foreach $byte (split //, $filled_stages) {
        printf ("%-2s ", $byte );
    }

    ## $chunks[8] is dcfnum which is useful when ASNs do not collect
    my $dcfnum = $chunks[8] ;

    ## $chunks[10] is command (could be important...more so w/ PSFs)
    my $command = $chunks[10] ;
    chomp( $command ) ;
    printf ("%-8s", $command ); ## 8 because I need the space 

    ## Add dcfnum and date back in to the output though it will increase the 
    ## line length quite a bit...with no GUIs I kinda need the info.
    ## I am not quite decided where they should go, so 
    ## we'll put the \n on its own line and then things can move around.
    printf ("%-4s", $dcfnum );  ## 4 because I need the space 
    printf ("%-20s", $date ) ;

    print "\n" ;
 }
 print "\nFound $osfcount OSFs \n";


 exit;

##------------------------------------------------------------------------------
##
## get_osfs_fm_dir
## 
## input:  dir of OSFs
## output: list of OSF files
##
##------------------------------------------------------------------------------
sub get_osfs_fm_dir {

    my $input = $_[0];
    print "## get_osfs_fm_dir' input is $input\n";  ## trace
    opendir (DIR, $input)
        or die "get_osfs_fm_dir: Cannot open ", $input,
               " directory for reading $!.\n";

    my @allfiles = readdir DIR;
    closedir (DIR);

    return \@allfiles;
}

##------------------------------------------------------------------------------
##
## get_osfs_fm_server
## 
##  Try using osf_test instead of reading directory because directory is NOT 
##  matching osf_test results after osf_update used to modify an OSF.  Why it 
##  is not updating the directory in a reasonable amount of time is unknown :(
## 
## input:  OPUS path containing OSFs
## output: list of OSF files
##
##------------------------------------------------------------------------------
sub get_osfs_fm_server {

    my $input = $_[0];
#   print "## get_osfs_fm_server' input is $input\n";  ## trace
    $ENV{'MSG_REPORT_LEVEL'} = 'MSG_INFO' ;
#    print STDERR "MSG_REPORT_LEVEL is ", $ENV{'MSG_REPORT_LEVEL'}, "\n" ;
    $cmd = "osf_test -p ".$input ;
#   print "cmd is $cmd \n" ;
    my @osfs = `$cmd`;
#   if $@ die "get_osfs_fm_server: Cannot run osf_test on ", $input, 
#              " directory $!.\n";
#   print "osfs are @osfs \n" ;
    return \@osfs;
}

# ##------------------------------------------------------------------------------
# ##
# ## get_osfs
# ## 
# ## input:  dir of OSFs
# ## output: list of OSF files
# ##
# ##------------------------------------------------------------------------------
# sub get_osfs {
# 
#     my $input = $_[0];
# #   print "## get_osfs' input is $input\n";  ## trace
#     opendir (DIR, $input)
#         or die "get_osfs: Cannot open ", $input,
#                " directory for reading $!.\n";
# 
#     my @allfiles = readdir DIR;
#     closedir (DIR);
# 
#     return \@allfiles;
# }

##------------------------------------------------------------------------------
##
## grab_stages
## 
## input:  stage file
## output: reference to an array of stages, in expected order
##
##------------------------------------------------------------------------------
sub grab_stages {

    my $stage_file = $_[0];

    open (SFILE, $stage_file)
        or die "grab_stages: Cannot open ", $stage_file,
               "for reading $!.\n";
## switch above to PrintMsg and open error check ala OTFR

    my ($stage_num, $stage, $dummy, @stage_array);
    while (<SFILE>) {
        next if /^\s*\!/;               ## Skip special OPUS comments
        next if /^$/;                   ## skip blank lines

        my $line = $_;
        chomp $line;
        $line =~ s/^\s+//;              ## Remove leading spaces
        $line =~ s/\s+$//;              ## Remove trailing spaces

    
        if ( $line =~ /TITLE/ ) {
            ## STAGE01.TITLE = CP
            ($stage_num, $stage) = split /=/;
            $stage =~ s/^\s+//;              ## Remove leading spaces
            $stage =~ s/\s+$//;              ## Remove trailing spaces
#           print "## stage is $stage    ";

            ## Need stage_num in case they are out of order in stage file
            ## (Or would that break OPUS?
            ($stage_num, $dummy) = split /\./, $stage_num;
 
            ## Where is the "E" in STAGEnn?
            my $eloc = rindex($stage_num, "E");
            if ( $eloc < 0 ) {        ## we did not find the "E"?
                die "grab_stages: Could not find E in $stage_num \n";
            }
## switch above to PrintMsg and open error check ala OTFR
            my $arr_num = int( substr($stage_num, $eloc+1) ) - 1;
#           print "## arr_num is $arr_num \n";

            $stage_array[$arr_num] = $stage;
            
        }

    }

    close (SFILE);
    return \@stage_array;

}

##------------------------------------------------------------------------------
##
## resolve
## 
## input:  dir of OSFs
## output: list of OSF files
##
##------------------------------------------------------------------------------
sub resolve {


    my $input_file = $_[0];
    $cmd = "osfile_stretch_file OPUS_DEFINITIONS_DIR:".$input_file;
    my $file = `$cmd`;
    if ( $file =~ /OPUS_DEFINITIONS_DIR/ ) {
        PrintMsg ("F", "$cmd failed ", "resolve");
        exit 1;
    }

    return $file;
}

##------------------------------------------------------------------------------
##
## Print Usage
##
##------------------------------------------------------------------------------
sub PrintUsage
{
#   use File::Basename

    my ($nm, $up, $ext) = fileparse ($0, ".pl");
    print STDERR<<" EOF";

This script will format a list of OSFs to show:
    dataset_name data_id Column_stati command and date

USAGE: $nm -dir  <directory> [-stage <stage_file_name>]
    or $nm -path <OPUS_path> [-stage <stage_file_name>]

WHERE: <directory> is the name of a directory containins OSF files
       <OPUS_path> is the name of the OPUS path file where the OSFs are
       <stage_file_name> is the name of the file where the stages
            are defined and is optional IF <stage_file_name> would 
            be the same as <OPUS_path> or <directory>
NOTE: you MUST be in an OPUS environment to use this script because 
      in the PATH it needs:
              osfile_stretch_file 
              osf_test

Example:
    $nm -dir cdbs -stage cdbs
    $nm -path cdbs 
    $nm -dir ingest -stage ingest_pipeline
    $nm -dir science -stage opus_science_pipeline.stage
    $nm -dir emma -stage science 

Other pipeline stage file names:
    ast or ast_pipeline.stage
    edr or edr_pipeline.stage
    fgs or fgs_pipeline.stage
    fof or fof_pipeline.stage
    g2f or g2f_pipeline.stage
    pdr or pdr_pipeline.stage*
    pacor    or pacor_pipeline.stage
    otfr     or otfr_pipeline.stage
    otfr_sci or otfsci or otfr_science_pipeline.stage

 EOF
}