#!/usr/bin/env perl
#----------------------------------------------------------------------------
#
# Name: save_dataset_log.pl
#
# This perl script is used for the SAVLOG task of the Ingest pipeline. It is 
# designed to be run only as a command in an XPOLL process with no arguments.
# All parameters are passed as environmental variables. 
#
# Pipeline Usage:
#       command:save_data_set_log.pl 
#     
#       In the pipeline, the resource file must set the following ENV 
#       variables: INGEST_LOG_DIR, DADS_LOG_DIR, DADS_LOG_CLASS,
#       LOG_FILE_TYPE, OK_TO_UPDATE_DATABASE, ARCH_SERVER and ARCH_DB.
#
#       This scripts uses the folowing ENV variables that are set by the 
#       XPOLL process: OSF_DATASET and OSF_DATA_ID.
#
# Implementation note; the PrintMsg subroutine prints to both the standard
# output and to a FILEHANDLE reference named $trl
#
# When new daily logs are created this process must find the previous daily log
# and create the OSF to get it archived.
#
# History:
# Date     OPR      Who         Reason
# -------- -------- ----------  ---------------------------------------------
# 01/15/04 49559    Baum        Initial code
# 03/18/05 51433    MSwam       A tweak to deliver this after 49559 was closed
# 07/11/07 57369    MSwam       Parameterize osf_create status for multi-mission
# 09/09/09 59448    MSwam       add path name to make daily log filename unique
# 03/25/10 64273    MSwam       Replace ST_DBlib with DBI
# 06/28/10 65360    MSwam       use single quotes for SQLServer
# 09/18/12 72328    Sherbert    get rid of DSQUERY
#----------------------------------------------------------------------------
# set up external routines
unshift @INC,(split /:/, $ENV{PATH});
require 'printmsg.pl';       # prints a formatted status message
require 'do_dbi_pkg.pl';         # run query returning only record count
use File::Basename;          # for fileparse function


# subroutine prototypes
sub ErrorExit($);

#begin

    #specify exit status values

    $OSF_FAILURE =      7;   # exit status for XPOLL
    $OSF_SUCCESS =      9;   # exit status for XPOLL
    
    # other constants
    $true = 1;
    $false = 0;

    # Verify ENV variables set by XPOLL using pipeline
    $osf_root = $ENV{"OSF_DATASET"};
    $osf_data_id = $ENV{"OSF_DATA_ID"};
    $osf_path = $ENV{"PATH_FILE"};
    $path_baseroot = $ENV{"PATH_BASEROOT"};

    if (!defined($osf_root) || !defined($osf_data_id) || !defined($osf_path) ||
        !defined($path_baseroot))
    {
        PrintMsg("E","This script must be run as a pipeline command ...");
        ErrorExit("where OSF_DATASET, OSF_DATA_ID, PATH_FILE & PATH_BASEROOT are defined."); 
    }
    # get file_name, path_name and extension from $osf_path
    @parsed_name = fileparse($osf_path, '.path' );
 
    if (!defined( $parsed_name[0])) {
        ErrorExit("Invalid PATH_FILE name.");
    }
    $osf_path = $parsed_name[0].$parsed_name[2];  # concat name and extension

    # Verify ENV variables set by XPOLL using resource file
    $ingest_log_dir  = $ENV{"INGEST_LOG_DIR"};
    $dads_log_dir    = $ENV{"DADS_LOG_DIR"};
    $dads_log_class  = lc($ENV{"DADS_LOG_CLASS"});
    $log_file_type   = $ENV{"LOG_FILE_TYPE"};
    $ok_to_update_db = $ENV{"OK_TO_UPDATE_DATABASE"};
    $osf_create_status = $ENV{"OSF_CREATE_STATUS"};
    $ARCH_SERVER     = $ENV{"ARCH_SERVER"};
    $ARCH_DB         = $ENV{"ARCH_DB"};

    if (!defined($ingest_log_dir)  || !defined($dads_log_dir) || 
        !defined($dads_log_class)  || !defined($log_file_type) || 
        !defined($osf_create_status) ||
	!defined($ok_to_update_db) || !defined($ARCH_SERVER) || !defined($ARCH_DB)) 
    {
        PrintMsg("E","Missing at least one ENV variable in resource file.");
        PrintMsg("E",
          "Need: INGEST_LOG_DIR, DADS_LOG_DIR, DADS_LOG_CLASS, LOG_FILE_TYPE, ".
	  "OK_TO_UPDATE_DATABASE, OSF_CREATE_STATUS.");
        ErrorExit(
          "Also need database locators: ARCH_SERVER and ARCH_DB");
    }
          
    # begin processing - first verify dataset log file existence 
    $ds_log_spec = $ingest_log_dir."/".$osf_root."_".$osf_data_id.".log";
    if (!(-e $ds_log_spec)) {
       # sign on - this message goes only to to process log
       PrintMsg ("I",
          "--- start --- Save Dataset Log for $osf_root $osf_data_id ---");
       PrintMsg("W",
          "Cannot find dataset log file at: $ds_log_spec");
    } else {
       # open dataset log file for appending
       open (TRL, ">>".$ds_log_spec); 

       # create reference to the TRL filehandle for use by PrintMsg
       $trl = \*TRL;
       
       # sign on - this message goes to dataset log
       PrintMsg ("I",
          "--- start --- Save Dataset Log for $osf_root $osf_data_id ---");

       # get the completion date and daily log name using local time
       ($comp_date,$daily_log,$log_datestamp) = 
                   get_time_based_names( $log_file_type, $path_baseroot);

       # build the full file spec
       $daily_log_spec = $dads_log_dir."/".$daily_log;
    
       # check if daily log file exits
       if (-e $daily_log_spec) {
          # append to existing file after closing dataset log
          PrintMsg("I", "Appending $ds_log_spec ...");
          PrintMsg("I", "       to $daily_log_spec");
	  close TRL;
	  undef $trl;
	  $command = "/bin/cat $ds_log_spec >> $daily_log_spec";
	  $sys_stat = system("$command");
	  $new_daily_log = $false;
       } else {
          # copy to new file after closing dataset log
          PrintMsg("I", "Copying $ds_log_spec ..."); 
	  PrintMsg("I", "     to $daily_log_spec");
	  close TRL;
	  undef $trl;
          $command = "/bin/cp $ds_log_spec $daily_log_spec";
	  $sys_stat = system("$command");
	  $new_daily_log = $true;
       }
       # from now on messages only go to process log file
       if ($sys_stat) { 
          ErrorExit("Cannot execute command:$command");
       }
       # open database for queries
       $db = DoDBIopen( $ARCH_SERVER, $ARCH_DB, $OSF_FAILURE);
    
       # update the database with either log name or the completion date
       update_database( $comp_date, $daily_log);

       # end of all queries
       DoDBIclose($db);
       undef $db;

       # delete log file that was saved
       PrintMsg("I","Deleting $ds_log_spec");
       unlink ($ds_log_spec);
              
       if ($new_daily_log) {
          # ingest old ingest daily logs that do not match the current log
          ingest_old_daily_log($log_datestamp);
       }
    } # end log existence check
    PrintMsg ("I",
       "---  end  --- Save Dataset Log for $osf_root $osf_data_id ---");
    exit( $OSF_SUCCESS);  
#----------------------------------------------------------------------------
sub get_time_based_names {
    # get the completion date and daily log name using local time
    my ($log_type, $pathname) = @_; 
    my $complete_date;
    my $log_file_name;
    my $log_datestamp;
    my @time_record = localtime(time);
    my $sec  = $time_record[0];
    my $min  = $time_record[1];
    my $hour = $time_record[2];
    my $mday = $time_record[3];
    my $mon  = $time_record[4] + 1;  # Perl is 0 origin counter
    my $year = $time_record[5];
    my $year_4 = $year + 1900;              # get four digit year
    my $year_2 = $year;
    
    if ($year_2 > 99) {
       $year_2 -= 100; # get two digit year before 2000
    }   
    
    # format completion date for query below in format mm-dd-yy hh:mi:ss
    $complete_date = sprintf("%02d-%02d-%02d %02d:%02d:%02d",
                             $mon,$mday,$year_2,$hour,$min,$sec);
    # format daily log file name using same year, month and day as comp_date
    $log_datestamp = sprintf("%4d_%02d_%02d", $year_4, $mon, $mday);
    $log_file_name = sprintf("%s_%s_%s.log", "$log_type", "$pathname",
                                             $log_datestamp);
       
    ($complete_date,$log_file_name,$log_datestamp);  # returned values
}
#----------------------------------------------------------------------------
sub  update_database {
    # Either update the ids_log_file_name or the ads_completion_time
    
    my ( $completion_time, $log_file_name) = @_;
    
    if (!update_log_file_name($log_file_name)) {
       # get generation time for the most recent group request and
       # verify that is succeeeded 
       my ($generation_date, $mission) = check_gen_date_mission();

       if (defined($generation_date)) {
          # we have a successful request - update the time
          update_completion_time($completion_time, $generation_date, $mission);
       }
    }    
}
#----------------------------------------------------------------------------
sub  update_log_file_name {
    # try to update ids_log_file_name and return update count
    
    my ($log_file_name) = @_;
    my $query = <<"EOQ";
UPDATE ingest_data_set_info
SET ids_log_file_name = '$log_file_name'
WHERE ids_ins_request_id = '$osf_root'
EOQ

    my $count= DoDBI( $db, $query);
    if ($count == 0) {
        PrintMsg("I","No ids_log_file_name update for this OSF.");
    } else {
        PrintMsg("I","Updated ids_log_file_name to $log_file_name.");
    }
    $count;
}    
#------------------------------------------------------------------------
sub check_gen_date_mission () {
    # See if the OSF is for a group request and get the generation date
    # and mission name of that request. Make a join with the 
    # archive_data_set_all table to verify the request was successful.
    # If the returned list is undefined then either this OSF is not a group
    # request or this OSF group request was not successful.

    my $query = <<"EOQ";       
SELECT DISTINCT 
    CONVERT(varchar,ids_generation_date,109) generation_date, ids_mission
FROM ingest_data_set_info, archive_data_set_all
WHERE ids_group_name = '$osf_root' and ids_group_data_id = '$osf_data_id'
   and ids_receipt_date = 
      (SELECT max(ids_receipt_date) 
       FROM ingest_data_set_info 
       WHERE ids_group_name = '$osf_root' and 
             ids_group_data_id = '$osf_data_id')
   and ids_archive_class = ads_archive_class
   and ids_data_set_name = ads_data_set_name 
   and ids_generation_date = ads_generation_date
   and ids_mission = ads_mission
EOQ
    my $err_msg = "Cannot join ingest_data_set_info with archive_data_set_all";
    my @record = DoDBIselect( $db, $query);
}
#----------------------------------------------------------------------------
sub  update_completion_time {
    # update the ads_completion_time for all archive_data_set_all records of
    # the request by joining with the ingest_data_set_info table 

    my ($completion_time, $generation_date, $mission) = @_;
    my $query = <<"EOQ";
UPDATE archive_data_set_all
SET ads_completion_time = '$completion_time'
FROM ingest_data_set_info, archive_data_set_all
WHERE ids_group_name = '$osf_root' 
  and ids_group_data_id = '$osf_data_id'
  and ids_generation_date = '$generation_date'
  and ids_mission = '$mission'
  and ads_data_set_name = ids_data_set_name
  and ads_archive_class = ids_archive_class
  and ads_generation_date = ids_generation_date
  and ads_mission = ids_mission
EOQ
    
    my $count= DoDBI( $db, $query);
    if ($count == 0) {
        ErrorExit("Failed to update ids_completion_date values.");
    } 
    PrintMsg("I","$count updates of ads_completion_time to $completion_time.");
}
#------------------------------------------------------------------------
sub ingest_old_daily_log {
    # ingest old ingest daily logs that do not match the date stamp of
    # the current log 
    #
    # globals:
    #   $dads_log_dir, $log_file_type
    #    
    my ($log_datestamp) = @_;

    # get list of all daily log files in the dads log direcory
    my $file_mask = $dads_log_dir.$log_file_type."*";
    my $command = "ls -1 $file_mask";
    my @file_list = `$command`;
    my $line;
    
    # any logs not containing the datestamp of today's log will be ingested 
    #   (since they are "old")
    #
    foreach $line (@file_list) {
	if (index( $line, $log_datestamp) < 0) {
	    # this file is not the current daily log - ingest it
            ingest_daily_log( $line);
	}
    }
}
#------------------------------------------------------------------------
sub ingest_daily_log
{
   my ($full_spec) = @_;
   # find last slash to delimit the logfile rootname
   my $file_offset = rindex( $full_spec, "/") + 1;
   my $name_size = rindex ($full_spec, ".log") - $file_offset;
   my $osf_name = substr( $full_spec, $file_offset, $name_size);
   PrintMsg("I","Trying to create Ingest OSF for log file $osf_name"); 
   $osf_command="osf_create -p $osf_path -f $osf_name".
                    " -t $dads_log_class -s $osf_create_status";
   if (system($osf_command)) {
       # Failure to create this OSF is generally not a problem since
       # a duplicate OSF would do the work anyhow. It the failure was
       # caused by some other pipeline problem the repair is simple. 
       # It is easy to notice a daily log that is hanging around in the 
       # ing/dlg directory, and the easy fix is to interactively use osf_create
       # to generate the ingest OSF with the data_id of "dlg"
       PrintMsg("W","Failed to create OSF - possible duplicate");
       PrintMsg("W","Failed cmd:".$osf_command);
   }
}
#----------------------------------------------------------------------------
sub ErrorExit ($) {   
   # one argument - the error message
   # exit the script with the error condition after closing the $db database
   # objest and writing the error message to the log file.
   
   my ($msg) = @_;
   if (defined($db)) {
      DoDBIclose($db);
   }
   PrintMsg("E",$msg);
   exit ( $OSF_FAILURE);
}
