#!/usr/bin/env perl
#----------------------------------------------------------------------------
#
# Name: query_dataset_status.pl
#
# This perl script is used for the IQUERY task of the Ingest pipeline or 
# interactively. In the pipeline, it is designed to be controlled by the XPOLL
# task where both pipeline and resource file variables are passed as ENV
# variables. There are two modes of operation for this script. First the process
# must check if the ENV variable OSF_DATASET is present, which  indicates that
# the process is being run in the pipeline as an OSF poller. In pipeline mode, 
# there are two cases:
#
#  1) The OSF_DATA_ID is lowercase -- this is a data_id for a group request that
#  matches the ids_group_data_id in the ingest_data_set_info table. Report to 
#  the dataset log only, on the status of all members of the request that also
#  has ids_group_name that matches the OSF_DATASET value. 
#
#  2) The OSF_DATA_ID in uppercase -- this is a data_id from an installion OSF
#  where the OSF_DATASET bvalues matched the ids_ins_request_id in the 
#  ingest_data_set_info table. Report to the dataset log only, on the status
#  of that request including information on all files of the request. 
#
#  In pipeline mode this task cannot be run after the SAVLOG task has deleted 
#  the dataset log file. If this is tried, the process goes to an error status
#  with a process log message that suggests using the intercative mode. 
#
# In interactive mode, there are two similar cases:
#
#  1) One argument -- the install request id(ids_ins_request_id) - generate the
#  same report as case one in the pipeline mode but send it to stdout instead of
#  the dataset log.
#
#  2) Two arguments -- the group name (ids_group_name) and the group data_id
#  ids_group_data_id) -- generate the same report as case two in the pipeline
#  mode but send it to stdout instead of the dataset log. 
#
# Pipeline Usage:
#       command:query_dataset_status.pl 
#     
#       In the pipeline, the resource file must set the following ENV 
#       variables: INGEST_LOG_DIR, DSQUERY and ARCH_DB.
#
#       This scripts uses the following ENV variables that are set by the 
#       XPOLL process: OSF_DATASET and OSF_DATA_ID.
#
# Interactive Usage:
#       command query_dataset_status.pl [ (<install_request_id> | 
#                                  <group_name> <group_data_id>) ]
#
#       The ENV variables DSQUERY amd ARCH_DB must be defined.
#
#
# Logging note: 
#    In pipeline mode, only sign on, sign off and error messages go to the 
#    process log. The dataset log gets all the process log messages and the 
#    reports. In interactive mode all messages and reports go to stdout.
#
# History:
# Date     OPR      Who         Reason
# -------- -------- ----------  ---------------------------------------------
# 02/06/04 49924    Baum        Initial code
# 03/18/05 53053    Baum        JBD project: remove NSA media from query
# 03/25/10 64274    MSwam       Replace ST_DBlib with DBI
# 06/30/10 64432    MSwam       Use single quotes for SQLServer
#----------------------------------------------------------------------------
# set up external routines
unshift @INC,(split /:/, $ENV{PATH});
require 'printmsg.pl';       # prints a formatted status message
require 'do_dbi_pkg.pl';         # run query returning only record count

    #specify exit status values

    $OSF_FAILURE =      7;   # exit status for XPOLL
    $OSF_SUCCESS =      9;   # exit status for XPOLL
    
    # other constants
    $true = 1;
    $false = 0;

    # check mode of operation
    $osf_dataset = $ENV{"OSF_DATASET"};
    $osf_data_id = $ENV{"OSF_DATA_ID"};
    $num_args = scalar @ARGV;
    
    if (!defined($osf_dataset)) {
       $interactive_mode = $true;
       if ($num_args == 1) {
	  $install_id = $ARGV[0];
          $process_mode = "DATASET";
       } elsif ($num_args == 2) {
          $process_mode = "GROUP";
	  $group_name = $ARGV[0];
	  $grp_data_id = $ARGV[1];
       } else {
           PrintMsg("E","Invalid number of arguments = $num_args");
	   PrintMsg("E","Usage: ");
	   PrintMsg("E"," One arg - request ID - for dataset report,");
	   ErrorExit(" Two args - group_name data_id - for group report"); 
       }
    } else {
       $interactive_mode = $false;
    
       if ($osf_data_id eq uc($osf_data_id)){
          $process_mode = "DATASET";
	  $install_id = $ENV{"OSF_DATASET"};
       } else {
          $process_mode = "GROUP";
	  $group_name = $osf_dataset;
	  $grp_data_id = $osf_data_id;
       }
    }
    $DSQUERY         = $ENV{"DSQUERY"};
    $ARCH_DB         = $ENV{"ARCH_DB"};
    $ingest_log_dir  = $ENV{"INGEST_LOG_DIR"};
           
    # Verify ENV variables used by both pipeline and interactive jobs
    if (!defined($DSQUERY) || !defined($ARCH_DB)) {
       ErrorExit("Missing ENV variables: DSQUERY or ARCH_DB.");
    }
    # Verify ENV variable used in pipeline    
    if (!$interactive_mode) {
       if (!defined($ingest_log_dir) ) {
	     ErrorExit("The ENV variable INGEST_LOG_DIR must be defined.");
       }
    } 
    # end of parameter validation - start real work
    
    if ($interactive_mode) {
       $rpt = \*STDOUT; # assign report pointer to standout output
    } else {
       # open dataset log file for appending
       $ds_log_spec = $ingest_log_dir."/".$osf_dataset."_".$osf_data_id.".log";
       
       if (!(-e $ds_log_spec)) { # check if dataset log exists
           # no dataset log - logging diagnostics to process log only
           PrintMsg ("I", "--- start --- Query Dataset Status for ".
              "$osf_dataset $osf_data_id ---");
           PrintMsg("I", "Missing dataset log file. ".
              "Try the following interactive command:");
           
           if ($process_mode eq "GROUP") { 
              PrintMsg("I",
              "Cmd: query_dataset_status.pl $group_name $grp_data_id");
           } else {
              PrintMsg("I",
              "Cmd: query_dataset_status.pl $install_id");
           }
           ErrorExit("Cannot find: $ds_log_spec");
       } 
       open (TRL, ">>".$ds_log_spec); 

       if (!(-w TRL)) {   # is it writeable
          ErrorExit( "Cannot open dataset log $ds_log_spec");
       }
       # create reference to the TRL filehandle for use by PrintMsg
       $trl = \*TRL;
       $rpt = $trl;   # assign report pointer to dataset log pointer
    }
     
    if (!$interactive_mode) {
       # write signon message to process and trailer logs   
       PrintMsg ("I", "--- start --- Query Dataset Status for ".
          "$osf_dataset $osf_data_id ---");
    }
    # open database for queries
    $db = DoDBIopen( $DSQUERY, $ARCH_DB, $OSF_FAILURE);
    
    if ($process_mode eq "DATASET") {
       PrintMsg("I","Dataset status report for install id $install_id.");
       write_dataset_report();
    } else {   # $process_mode must be GROUP
       PrintMsg("I","Group status report for all datasets of ".
	   "$group_name $grp_data_id");
       write_group_report();
    }
    # end of all queries
    DoDBIclose($db);

    if (!$interactive_mode) {
       # write signoff message to process and trailer logs   
       PrintMsg ("I", "---  end  --- Query Dataset Status for ".
          "$osf_dataset $osf_data_id ---");
    }
    exit( $OSF_SUCCESS);
#----------------------------------------------------------------------------
sub write_dataset_report {
    #  get the dataset data from ingest_data_set_all, then use that to get
    #  data on all the files.  
    # The data format 102 is "yyyy.mm.dd"
    # The data format 108 is "hh:mm:ss"
    # The data format 109 is "mon.dd.yyyy hh:mi:ss.mmmAM (or PM)" 

    my $query = <<"EOQ";       
SELECT ids_group_name, ids_group_data_id, ids_data_set_name, ids_archive_class,
   CONVERT(varchar,ids_generation_date,109) generation_date, ids_mission,
   (CONVERT(varchar,ids_receipt_date,102) + ' ' +
      CONVERT(varchar,ids_receipt_date,108)) receipt_date, 
   ids_path_name,
   ids_data_set_size, ids_file_count, ids_install_flag, 
   (CONVERT(varchar,ids_nsa_req_date,102) + ' ' +
      CONVERT(varchar,ids_nsa_req_date,108)) nsa_req_date,
   (CONVERT(varchar,ids_nsa_rsp_date,102) + ' ' +
      CONVERT(varchar,ids_nsa_req_date,108)) nsa_rsp_date,
   ids_log_file_name
FROM ingest_data_set_info
WHERE ids_ins_request_id = '$install_id' 
EOQ

    PrintMsg("I","Querying ingest_data_set_info for one request.");
    my $err_msg = "Cannot access specified ingest_data_set_info record";
    my @req_record = DoDBIselect( $db, $query);
    
    if (!defined($req_record[0])) {
        PrintMsg( "W","Failed to read ingest_data_set_info table");
    } else {
       report_files(\@req_record);
    }
}
#----------------------------------------------------------------------------
sub report_files {  # one arg - dataset record pointer
    # use data set description values to query the file data of each file
    # in the dataset. The file data is stored in an array of records. When the
    # query is done, the full report is written, including dataset descriptors 
    # and a description of the status of each file.
    my ($file_rec_ptr) = @_;
    local ($group_name, $grp_data_id, $ds_name, $archive_class,$gen_date, 
       $mission, $receipt_date, $path_name, $ds_size, $file_count, 
       $install_flag, $nsa_req_date, $nsa_rsp_date, $log_file_name) = 
       @$file_rec_ptr;
       
    if (!defined( $group_name)) {
       ErrorExit("Invalid record pointer in report_files routine.");
    }
    $query = <<"EOQ";       
SELECT 
   ifi_file_extension, ifi_file_name, ifi_file_type, 
   ifi_pre_compress_size, ifi_post_compress_size, 
   ifi_verify_status, ifi_checksum, ifi_checksum_status
FROM ingest_files
WHERE ifi_data_set_name = '$ds_name' and ifi_archive_class = '$archive_class'
   and ifi_generation_date = '$gen_date' and ifi_mission = '$mission' 
ORDER BY ifi_file_extension
EOQ

    PrintMsg("I","Querying ingest_files.");
    my $err_msg1 = "Cannot access first ingest_files record";
    my $err_msg2 = "Cannot access next ingest_files record";
    my @file_list;

    $sth = DoDBIexecute( $db, $query);
    while ( ( @file_record ) = DoDBIfetch( $db, $query, $sth) ) {
	push @file_list, [@file_record];
    }
    my $file_count = scalar @file_list;
    PrintMsg("I","Obtained $file_count records from ingest_files.");
    my $file_ptr;
        
    if ($file_count == 0) {     
       PrintMsg( "W","Failed to read the ingest_files table");
    } else {
       print $rpt ("\n      **** Dataset Status Report ****\n");
       write_group( $group_name, $grp_data_id, $mission, $gen_date);    
       write_dataset($install_id);
       
       foreach $file_ptr (@file_list) {
          write_file($file_ptr);
       }
       print $rpt ("\n End of Dataset\n\n");
    }
} 
#----------------------------------------------------------------------------
sub write_group {   # three args -- group_name, grp_data_id, mission, gen_date
    my ( $group_name, $grp_data_id, $mission, $gen_date) = @_;
    print $rpt (" Group: $group_name, Data ID: $grp_data_id, for $mission, at".
        " $gen_date\n");
}
#----------------------------------------------------------------------------
sub write_dataset {   # 1 args -  install_id
    # Using local and global variables write dataset info to report
    my ($install_id) = @_;    
    print $rpt ("\n Dataset: $ds_name, Class: $archive_class, ".
	   "Received: $receipt_date\n");
    printf $rpt ("   %d files, %.0f bytes, to be found in $path_name\n", 
	$file_count, $ds_size);
    print $rpt ("   Install ID: $install_id\n");
    print $rpt ("   Installation status: $install_flag ");
	
    if ($install_flag eq "N") {
       print $rpt ("(No, dataset not yet installed)\n");
    } elsif ($install_flag eq "Y") {
       print $rpt ("(Yes, dataset successfully installed into NSA)\n");
    } else {
       print $rpt ("(Error, no installation of this dataset)\n"); 
    }
    if (substr($nsa_rsp_date,0,4) ne "1900") {
       print $rpt 
	  ("   NSA request: $nsa_req_date, NSA response: $nsa_rsp_date\n");
       if ($install_flag ne "Y") {
          print $rpt ("   Install status not yet set by UPDATE task.");
       } 
    } elsif (substr($nsa_req_date,0,4) ne "1900") {
       print $rpt ("   NSA request: $nsa_req_date\n");
       if ($install_flag eq "E") {
          print $rpt ("   Install failure occurred after NSA request\n");
       } 
    } elsif ($install_flag eq "E") {
       print $rpt ("   Install failure occurred prior to NSA request\n");
    }
    if ($log_file_name =~m/^\s*$/) {  #match empty or blank string
       print $rpt ("   Dataset log file is still present.\n");
    } else {
       print $rpt ("   Dataset log file has been appended to $log_file_name\n");
    }   
}
#----------------------------------------------------------------------------
sub write_file {   # one args -- pointer to file description record
    my ($rec_ptr) = @_; 
    my ($file_extension, $file_name, $file_type, $pre_compress_size, 
       $post_compress_size, $verify_status, $checksum, $checksum_status) 
       = @$rec_ptr;

    print $rpt ("\n File extension: $file_extension, Name: $file_name\n");

    if ($file_type =~m/^\s*$/) {  # match empty or blank string
       print $rpt ("   File not yet validated.\n");
    } else {
       printf $rpt ("   File type: $file_type, Uncompressed size: %.0f, ".
          "Compressed: %.0f\n", $pre_compress_size, $post_compress_size); 
      
       printf $rpt ("   Verify status: $verify_status, Checksum: %d, ".
          "   Checksum status: $checksum_status\n", $checksum);
    }   
}
#----------------------------------------------------------------------------
sub write_group_report { 
    # Check if there is more than one generation date for the group and
    # if so report the number found and report on each request. Generally there
    # is only one group request in the database so when there is only one
    # do not generate any additional reporting.    
    # The data format 109 is "mon.dd.yyyy hh:mi:ss.mmmAM (or PM)" 

    my $query = <<"EOQ";       
SELECT DISTINCT
   CONVERT(varchar,ids_generation_date,109) generation_date, ids_mission
FROM ingest_data_set_info
WHERE ids_group_name = '$group_name' and
   ids_group_data_id = '$grp_data_id'  
ORDER BY generation_date
EOQ

    PrintMsg("I","Querying ingest_data_set_info for generation date.");
    my $err_msg1 = "Cannot access first ingest_data_set_info record";
    my $err_msg2 = "Cannot access next ingest_data_set_info record";
    my @req_list = ();

    $sth = DoDBIexecute( $db, $query);
    while ( ( @req_record ) = DoDBIfetch( $db, $query, $sth) ) {
	push @req_list, [@req_record];
    }
    my $req_count = scalar @req_list;
    
    if ($req_count == 0) {     
        ErrorExit( "Failed to read archive_data_set_all table");
    } elsif ($req_count > 1) {
       PrintMsg("I","Reporting on $req_count separate group requests");
    }    
    my $rec_ptr;
    foreach $rec_ptr (@req_list) {
       ($gen_date,$mission) = @$rec_ptr;
       group_report( $group_name, $grp_data_id, $gen_date, $mission);
    }      

    if ($req_count > 1) {
       PrintMsg("I","Completed reports on $req_count separate group requests");
    }    
}
#----------------------------------------------------------------------------
sub group_report { # four arguments: grp_nmae, grp_data_id, gen_date, mission
    # Query all ingest_data_set_info records of the specific group, then
    # generate report. 
    # The data format 102 is "yyyy.mm.dd"
    # The data format 108 is "hh:mm:ss"

    
    my ($group_name, $grp_data_id, $gen_date, $mission) = @_;
    my $query = <<"EOQ";       
SELECT 
   ids_data_set_name, ids_archive_class, ids_ins_request_id,
   (CONVERT(varchar,ids_receipt_date,102) + ' ' +
      CONVERT(varchar,ids_receipt_date,108)) receipt_date, 
   ids_path_name,
   ids_data_set_size, ids_file_count, ids_install_flag, 
   (CONVERT(varchar,ids_nsa_req_date,102) + ' ' +
      CONVERT(varchar,ids_nsa_req_date,108)) nsa_req_date,
   (CONVERT(varchar,ids_nsa_rsp_date,102) + ' ' +
      CONVERT(varchar,ids_nsa_req_date,108)) nsa_rsp_date,
   ids_log_file_name
FROM ingest_data_set_info
WHERE ids_mission = '$mission' and ids_group_name = '$group_name' and
   ids_group_data_id = '$grp_data_id' and ids_generation_date = '$gen_date' 
ORDER BY ids_data_set_name, ids_archive_class
EOQ

    PrintMsg("I","Querying ingest_data_set_info using generation date.");
    my $err_msg1 = "Cannot access the ingest_data_set_info record";
    my $err_msg2 = "Cannot access the later ingest_data_set_info records";

    my @dset_list = ();
    $sth = DoDBIexecute( $db, $query);
    while ( ( @dset_record ) = DoDBIfetch( $db, $query, $sth) ) {
	push @dset_list, [@dset_record];
    }
    $dset_count = scalar @dset_list;
    PrintMsg("I","Obtained $dset_count records from ingest_data_set_info.");
    
    if ($dset_count == 0) {     
        ErrorExit( "Failed to read the ingest_data_set_info table");
    } 
    print $rpt ("\n      ****  Group Status Report  ****\n\n");
    write_group( $group_name, $grp_data_id, $mission, $gen_date);    

    my $rec_ptr;
    foreach $rec_ptr (@dset_list) {
       local ($ds_name, $archive_class, $ins_request_id, $receipt_date, 
          $path_name, $ds_size, $file_count, $install_flag, $nsa_req_date, 
	  $nsa_rsp_date, $log_file_name) = @$rec_ptr;

        write_dataset($ins_request_id);
    }      
    print $rpt ("\n End of Group\n\n");
}
#----------------------------------------------------------------------------
sub ErrorExit {   # one argument - the error message
   # exit the script with the error condition after closing the $db database
   # objest and writing the error message to the log file.
   
   my ($msg) = @_;
   if (defined($db)) {
      DoDBIclose($db);
   }
   PrintMsg("E",$msg);
   exit ( $OSF_FAILURE);
}
