#!/usr/bin/env perl 
#----------------------------------------------------------------------------
#
# Name: ingest_hst_cal_oms.pl
#
# This perl script is used interactively.  This tool is run from a default
# directory that contains either HST CAL or OMS datasets but not both together. 
#
# All the CAL datasets in the directory will be ingested if they have either a
# *trl.fits or a *.trl file.  All the OMS datasets in the directory will be
# ingested if they have an extension matching ".j[iw][tf]" or "_j[iw][tf].fits".
# The OMS directory must have all datasets using the same name convention.  The
# tool will rename the OMS files as required to run the genreq program with the
# fgsreq.resource file.
#
# The tool is designed to make it easy to ingest multiple datasets out of the
# same directory.  The user must specify on the command line whether the default
# directory contains CAL or OMS data.  
#
# The script will use the null.path to find the Ingest path name and it will
# assure that this is an Ingest path that has the keyword OVERRIDE_REQUIRED=Y
# value.  The script will use the Ingest path file to find the 
# INGEST_TRIGGER_ROOT value and the INGEST_SOURCE_DIR.  The script will use the
# genreq process to copy and possibly rename the dataset files and create the
# trigger file, for each dataset found in the default directory.  
#
# To handle associations properly, the script will process the associations
# first.  For every association, it will use the asn.fits file (if present) or
# or for OMS data recreate a missing ASN file by using the OPUS_DB.asn_members
# table to find the the datasets for the association.  Any dataset assigned to 
# an association will be eliminated from the list of datasets found in the 
# default directory.  After all associations have been processed, the datasets 
# remaining in the list will be ingested as singletons.    
# 
# Account requirements:
#
# The account using this progrom must have used the opus_login.csh setup
# to set the proper ENV variables, OPUS_DEFINITIONS_DIR,  $OPUS_SERVER and $OPUS_DB.
# The $PATH variable must include the OPUS bin directory containing Perl modules.  
#
# Command Usage: (see $usage definition below)
#
#  Return values:
#     0 - success
#     1 - failure
#
# ENV variables:
#     OPUS_SERVER is the database server name
#     OPUS_DB is the OPUS database name
#    
# History:
# Date     OPR      Who         Reason
# -------- -------- ----------  ---------------------------------------------
# 04/07/05 53339    Baum        Initial code
# 02/22/10 59963    Sherbert    try again
# 02/26/10 59963    Sherbert    Add input parameter
# 09/18/12 72255    Sherbert    get rid of DSQUERY
#----------------------------------------------------------------------------
# set up external routines
unshift @INC,(split /:/, $ENV{PATH});
require 'ingest_tools_pkg.pl';  # shared subroutines for this tool
require 'printmsg.pl' ;       # PrintMsg
use strict ;

my $nm = "ingest_hst_cal_oms.pl" ;
######################################
#$ENV{STDB_REPORT_LEVEL} = "STDB_INFO" ;   ## rm when testing done
#$ENV{MSG_REPORT_LEVEL}  = "MSG_ALL"   ;   ## rm when testing done
#PrintMsg( "D", "## Attempted to set STDB And MSG reporting HIGH", $nm );
######################################
PrintMsg( "D", "Running $0 " );  ## $0 == which

# begin 
    #specify exit status values
    my $EXIT_FAILURE =      1;   # exit status for errors
    my $EXIT_SUCCESS =      0;   # exit status for success

# define command usage
    my $numRequired = 2 ;
    my $usage = <<"EOM";    
Usage: 
>$nm -i <input_dir> -c <archive_class> -o <data_source>
   
   The order of [-flag value] pairs does not matter.  
   Tool now responds to MSG_REPORT_LEVEL settings*
   
   <input_dir>     is the location of the data to be archived.  
                   Only data from one type of archive_class should be here.
                   This is the optional parameter.  Defaults to current dir,
                   if omitted.
   
   <archive_class> is the either CAL or OMS.  
   
   <data_source>   is the four character data source override value to be 
                   set in the regression pipeline.   
   
   * MSG_REPORT_LEVEL and STDB_REPORT_LEVEL settings are turned off before 
     calls to genreq. 
   
EOM

 # check ENV variables used for queries
 my $OPUS_SERVER         = $ENV{"OPUS_SERVER"};
 my $OPUS_DB         = $ENV{"OPUS_DB"};
        
 ## Be certain these ENV vars area available
 if (!defined($OPUS_SERVER) || !defined($OPUS_DB) ) {
    my $msg = "Missing ENV variables: OPUS_SERVER or OPUS_DB " ;
    PrintMsg( "E", $msg, $nm ) ;
    exit( $EXIT_FAILURE );
 }

    # start argument checks
    my $num_args = scalar @ARGV;
    PrintMsg( "D", "number of arguments is $num_args.  We fail if < 3. ", $nm ) ;

    if ($num_args < 3) {    
        PrintMsg( "E", "Too few arguments" ); ## Usage will provide nm
        print $usage;
        exit( $EXIT_FAILURE ) ;
    }
    my ( $option, $inDir, $archive_class, $data_source, $msg ) ;

    # verify the required option(s)
    my $requiredFound = 0 ;
    my $optionalFound = 0 ;
    while (scalar @ARGV) {
      $option = shift @ARGV;
      if ($option eq "-c") {
         $requiredFound = $requiredFound + 1 ;
         $archive_class = uc(shift @ARGV);
         if ($archive_class ne "CAL" && $archive_class ne "OMS") {
            $msg = "Invalid -c option: $archive_class. Must be CAL or OMS." ;
            PrintMsg( "E", $msg ) ; ## Usage will provide nm
            print $usage;
            exit( $EXIT_FAILURE ) ;
         }
      } elsif ($option eq "-o") {
         $requiredFound = $requiredFound + 1 ;
         $data_source = uc(shift @ARGV);
      } elsif ($option eq "-i") {
         $optionalFound = $optionalFound + 1 ;
         $inDir = shift @ARGV ;
      } else {
         PrintMsg( "E", "Invalid option found: $option" ) ; ## Usage will provide nm
         print $usage;
         exit( $EXIT_FAILURE ) ;
      }
    }
    PrintMsg( "D", "requiredFound is $requiredFound.  I think it should be 2 out of 3.", $nm ) ;
    if ( $requiredFound < $numRequired ) {
        # Either -d or -o NOT found
        PrintMsg( "E",  "Too few arguments" ) ; ## Usage will provide nm
        print $usage;
        exit( $EXIT_FAILURE ) ;
    } elsif ( $optionalFound == 0 ) {
        # Default input dir to current dir
        $inDir = "./" ;
    }
########################################################
 ## Tell user what the command line is...
 $msg = "Starting... " ;
 $msg = $msg . $nm . " -i " . $inDir . " -c " . $archive_class . " -o " . $data_source ;
 PrintMsg( "I", $msg ) ; ## msg contains nm
########################################################

    my $pathname ;
    eval { $pathname = get_regr_path() } ;
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }
    PrintMsg( "D", "pathname     is $pathname", $nm ) ;
    eval { check_pipeline( $pathname ) } ;
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }

    # need dir to be a full path name
    my @pwdOut =`pwd` ;
   #print "#0: pwd sez we are sitting in: @pwdOut " ;
    use Cwd ;
    my $cwd = getcwd() ;
   #print "#1: my cwd is $cwd \n" ;
    my $size = length( $cwd ) ;
   #print "#2: length of cwd is $size \n" ;
    my $lastchar = substr( $cwd, $size-1, 1 ) ;
    if ( $lastchar ne "/" ) {
       #print "#   We need to add a slash to cwd \n" ;
        $cwd = $cwd . '/' ;
    }
   #print "#3: my cwd is $cwd \n" ;
   #print "#4: my inDir is $inDir \n" ;

    if ( substr($inDir, 0, 1) ne "/" ) {
       #print "#   This is NOT an absolute path, make it one. \n" ;
        $inDir = $cwd . $inDir ;
    }
   #print "#5: my inDir is $inDir \n" ;

    chdir $inDir ;
    PrintMsg( "I", "chdir'd to $inDir ", $nm ) ;
    @pwdOut =`pwd` ;
   #$msg = "pwd sez we are sitting in: @pwdOut " ;
   #PrintMsg( "I", $msg, $nm ) ;
   #my @lsOut = `ls` ;
   #PrintMsg( "I", "ls sez these files are here: ", $nm ) ;
   #print @lsOut, "\n" ;

    # get datasets in directory
    my @datasets ;
    eval { @datasets = get_cal_oms_datasets($inDir, $archive_class) } ; 
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }
    $msg = '@datasets are ' ;
    $msg = $msg . "(" . join(", ", @datasets) . ")";
    PrintMsg( "D", $msg, $nm );

    # get parameters from path    
    my ($trig_dir, $source_dir) ;
    eval { ($trig_dir, $source_dir) = get_path_dirs( $pathname ) } ; 
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }
   #PrintMsg( "D", "trig_dir     is $trig_dir", $nm ) ;
   #PrintMsg( "D", "source_dir   is $source_dir", $nm ) ;
   #PrintMsg( "D", "data_source  is $data_source", $nm ) ;
    # create file with source name     
    eval { set_data_source($source_dir,$data_source) } ;
    if ( $@ ) {
        PrintMsg( "E", $@ ) ; ## Usage will provide nm
        print $usage ;
        exit( $EXIT_FAILURE );
    }

    # get list of associations - make OMS *_asn.fits file if missing
    my @asn_datasets ;
    eval { @asn_datasets = get_asn_datasets($inDir, \@datasets, $archive_class) } ;
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }
    $msg = '@asn_datasets are ' ;
    $msg = $msg . "(" . join(", ", @asn_datasets) . ")";
    PrintMsg( "D", $msg, $nm );
    # get list of all request datasets - eliminating association members
    my @request_dsets ;
    eval { @request_dsets = get_request_datasets($inDir, \@datasets,\@asn_datasets, $archive_class) } ;
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }
    $msg = '@request_dsets are ' ;
    $msg = $msg . "(" . join(", ", @request_dsets) . ")";
    PrintMsg( "D", $msg, $nm );

    # get sublists of datasets that use the same data_ids
    my @ing_sublists = request_sublists(\@request_dsets, $archive_class);
    $msg = '@ing_sublists are ' ;
    $msg = $msg . "(" . join(", ", @ing_sublists) . ")";
    PrintMsg( "D", $msg, $nm );
    # send all requests to ingest using trigger file interface
    my $hst_trig_dir = $trig_dir."hst/";
    PrintMsg( "D", "hst_trig_dir is $hst_trig_dir", $nm ) ;

    my $opusConn ;
    eval { $opusConn = open_opus_db( $OPUS_SERVER, $OPUS_DB ) } ;
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }
    
    foreach my $sublist_ref (@ing_sublists) {
        eval { ingest_sublist( $opusConn, $sublist_ref, $archive_class, $hst_trig_dir ) } ;
        if ( $@ ) {
            PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
            exit( $EXIT_FAILURE );
        }
    }
    close_opus_db( $opusConn );
########################################################
 ## Tell user that we are done
 $msg = "Completed..." ;
 $msg = $msg . $nm . " -i " . $inDir . " -c " . $archive_class . " -o " . $data_source ;
 PrintMsg( "I", $msg ) ; ## msg contains nm
########################################################
    exit( $EXIT_SUCCESS );