#!/usr/bin/env perl 
#----------------------------------------------------------------------------
#
# Name: ingest_dads_logs.pl
#
# This perl script is used interactively.  This tool is run from a default
# directory that contains DADS log datasets to be ingested.  A dads log dataset
# is identfied by a rootname of any length and one file with the extension
# ".log".  Any other file extension will be accepted but the file will not be 
# validated.  The default directory may contain many DADS log datasets but no
# other types of data should be included.  The tool is designed to 
# make it easy to ingest multiple datasets out of the same directory.  
# The user must know the OPUS data_id used to process the data.  The script will
# use the null.path to find the Ingest path name and it will assure that this is
# an Ingest path that has the keyword OVERRIDE_REQUIRED=Y value. The script will
# use the Ingest path file to find the INGEST_PATH_ROOT, INGEST_TRIGGER_ROOT and
# INGEST_SOURCE_DIR values.  The script will copy the dataset and create the
# trigger file, for each dataset found by using the "*.log" search in the
# default dirctory.  
#   
# Account requirements:
#
# The account using this progrom must have used the opus_login.csh setup
# to set the proper ENV variables, OPUS_DEFINITIONS_DIR, $OPUS_SERVER and $OPUS_DB. 
# The $PATH file must include the OPUS bin directory containing Perl modules.  
#
# Command Usage: (see $usage definition below)
#
#  Return values:
#     0 - success
#     1 - failure
#
# ENV variables:
#     OPUS_SERVER is the database server name
#     OPUS_DB is the OPUS database name
#    
# History:
# Date     OPR      Who         Reason
# -------- -------- ----------  ---------------------------------------------
# 11/16/04 52338    Baum        Initial code
# 03/01/10 59963    Sherbert    Changed input parameters 
# 09/18/12 72255    Sherbert    get rid of DSQUERY
#----------------------------------------------------------------------------
# set up external routines
unshift @INC,(split /:/, $ENV{PATH});
require 'ingest_tools_pkg.pl';  # subroutine package for tools
require 'printmsg.pl' ;       # PrintMsg
use strict ;

my $nm = "ingest_dads_log.pl" ;
######################################
#$ENV{STDB_REPORT_LEVEL} = "STDB_INFO" ;   ## rm when testing done
#$ENV{MSG_REPORT_LEVEL}  = "MSG_ALL"   ;   ## rm when testing done
#PrintMsg( "D", "## Attempted to set STDB And MSG reporting HIGH", $nm );
######################################
PrintMsg( "D", "Running $0 " );  ## $0 == which

# begin 
    #specify exit status values
    my $EXIT_FAILURE =      1;   # exit status for errors
    my $EXIT_SUCCESS =      0;   # exit status for success

# define command usage
    my $numRequired = 1 ;
    my $usage = <<"EOM";    
Usage: 
>$nm -i <input_dir> -o <data_source>
   
   The order of [-flag value] pairs does not matter.
   Tool now responds to MSG_REPORT_LEVEL settings.

   <input_dir>     is the location of the data to be archived.  
                   Only data from one type of archive_class should be here.
   
   <data_source>   is the four character data source override value to be
                   set in the regression pipeline.

   Note that the DADS log datasets must be in the default directory.  At least
   one file in each dataset must have the extension ".log".   
   
EOM

 # check ENV variables used for queries
 my $OPUS_SERVER         = $ENV{"OPUS_SERVER"};
 my $OPUS_DB         = $ENV{"OPUS_DB"};
        
 ## Be certain these ENV vars area available
 if (!defined($OPUS_SERVER) || !defined($OPUS_DB) ) {
    my $msg = "Missing ENV variables: OPUS_SERVER or OPUS_DB " ;
    PrintMsg( "E", $msg, $nm ) ;
    exit( $EXIT_FAILURE );
 }

    # start argument checks
    my $num_args = scalar @ARGV;
    PrintMsg( "D", "number of arguments is $num_args.  We fail if < 2. ", $nm ) ;

    if ($num_args < 2) {    
        PrintMsg( "E",  "Too few arguments" ) ; ## Usage will provide nm
        print $usage;
        exit ($EXIT_FAILURE);
    }
    my ( $option, $inDir, $data_source, $msg ) ;

    # verify the required option(s)
    my $requiredFound = 0 ;
    my $optionalFound = 0 ;
    while (scalar @ARGV) {
      $option = shift @ARGV;
      if ($option eq "-o") {
         $requiredFound = $requiredFound + 1 ;
         $data_source = uc(shift @ARGV);
      } elsif ($option eq "-i") {
         $optionalFound = $optionalFound + 1 ;
         $inDir = shift @ARGV ;
      } else {
         PrintMsg( "E", "Invalid option found: $option" ) ; ## Usage will provide nm
         print $usage;
         exit( $EXIT_FAILURE ) ;
      }
    }
    PrintMsg( "D", "requiredFound is $requiredFound.  I think it should be 1 out of 2.", $nm ) ;
    if ( $requiredFound < $numRequired ) {
        # Either -d or -o NOT found
        PrintMsg( "E",  "Too few arguments" ) ; ## Usage will provide nm
        print $usage;
        exit( $EXIT_FAILURE ) ;
    } elsif ( $optionalFound == 0 ) {
        # Default input dir to current dir
        $inDir = "./" ;
    }
########################################################
 ## Tell user what the command line is...
 $msg = "Starting... " ;
 $msg = $msg . $nm . " -i " . $inDir . " -o " . $data_source ;
 PrintMsg( "I", $msg ) ; ## msg contains nm 
########################################################

    my $pathname ;
    eval { $pathname = get_regr_path() } ;
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }
    PrintMsg( "D", "pathname     is $pathname", $nm ) ;
    eval { check_pipeline( $pathname ) } ;
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }

    # need dir to be a full path name
    my @pwdOut =`pwd` ;
   #print "#0: pwd sez we are sitting in: @pwdOut " ;
    use Cwd ;
    my $cwd = getcwd() ;
   #print "#1: my cwd is $cwd \n" ;
    my $size = length( $cwd ) ;
   #print "#2: length of cwd is $size \n" ;
    my $lastchar = substr( $cwd, $size-1, 1 ) ;
    if ( $lastchar ne "/" ) {
       #print "#   We need to add a slash to cwd \n" ;
        $cwd = $cwd . '/' ;
    }
   #print "#3: my cwd is $cwd \n" ;
   #print "#4: my inDir is $inDir \n" ;

    if ( substr($inDir, 0, 1) ne "/" ) {
       #print "#   This is NOT an absolute path, add end slash to make it one." ;
        $inDir = $cwd . $inDir ;
    }
   #print "#5: my inDir is $inDir \n" ;

    chdir $inDir ;
    PrintMsg( "I", "chdir'd to $inDir ", $nm ) ;
    @pwdOut =`pwd` ;
   #$msg = "pwd sez we are sitting in: @pwdOut " ;
   #PrintMsg( "I", $msg, $nm ) ;
   #my @lsOut = `ls` ;
   #PrintMsg( "I", "ls sez these files are here: ", $nm ) ;
   #print @lsOut, "\n" ;

    # get datasets in directory 
    my @datasets ;
    eval { @datasets = get_datasets($inDir, 'dlg') } ; 
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }
    $msg = '@datasets are ' ;
    $msg = $msg . "(" . join(", ", @datasets) . ")";
    PrintMsg( "D", $msg, $nm );
    eval { check_datasets(\@datasets, "./", ".log") } ;
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }

    # get parameters from path    
    my ($trig_dir, $source_dir) ;
    eval { ($trig_dir, $source_dir) = get_path_dirs( $pathname ) }; 
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }
#   PrintMsg( "D", "trig_dir     is $trig_dir", $nm ) ;
#   PrintMsg( "D", "source_dir   is $source_dir", $nm ) ;
#   PrintMsg( "D", "data_source  is $data_source", $nm ) ;

    my $regr_root_dir ;
    eval { $regr_root_dir = get_root_dir( $pathname) } ;
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }
    PrintMsg( "D", "regr_root_dir  is $regr_root_dir", $nm ) ;
    my $dlg_dir = $regr_root_dir."dlg/"; 
    PrintMsg( "D", "dlg_dir        is $dlg_dir", $nm ) ;
    my $dlg_trig_dir = $trig_dir."hst/";     
    PrintMsg( "D", "dlg_trig_dir   is $dlg_trig_dir", $nm ) ;
    eval { set_data_source($source_dir,$data_source) } ;
    if ( $@ ) {
        print $usage ;
        PrintMsg( "E", $@ ) ;
        exit( $EXIT_FAILURE ) ;
    }
    eval { copy_datasets(\@datasets, "Dlg", "./", $dlg_dir, $dlg_trig_dir) } ;
    if ( $@ ) {
        PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
        exit( $EXIT_FAILURE );
    }
########################################################
 ## Tell user that we are done
 $msg = "Completed..." ;
 $msg = $msg . $nm . " -i " . $inDir . " -o " . $data_source ;
 PrintMsg( "I", $msg ) ;
########################################################
    exit( $EXIT_SUCCESS);  





















