#!/usr/bin/env perl
#=======================================================================
#
#  NAME
#
#      	dms_asn_status.pl
#
#  DESCRIPTION
#
#      	Given a from date and to date list all dms associations created
#       during that time period.  The tool creates 2 main reports.  One
#       with a list of associations with no pdq differences in its' members
#       and the other one with pdq differences in its members.  It generates
#       a file with the no pdq difference associations and at the end of
#       the report is general statistics about all the dms associations
#       that were created.
#
#  OPTIONS
#
#	S - Database server name
#	D - DADS Database name
#	O - OPUS Database name
#	h - Print Usage
#
#  HISTORY
#
#	07/23/07  58512  Lisa Gardner    Initial Release
#	10/09/07  58336  Lisa Gardner    Add pdq_severity_code field
#	11/05/07  58893  Lisa Gardner    Strip dir path off script name
#
#=======================================================================

use strict;

use STScI::DBI;			# Used to automatically login
use File::Basename;             # Used to parse the script name
use Date::Manip;                # Slow but good for one or two date
                                # conversions - read documentation
require 'getopts.pl';		# contains Getopts

#-----------------------------------------------------------------------
#
# Get the DB options or if nothing supplied give them defaults.
#
#-----------------------------------------------------------------------
use vars ( '$opt_S', '$opt_D', '$opt_O', '$opt_h' );
&Getopts('S:D:O:h');

my $server    = $opt_S   || 'CATLOG';
my $dadsDb    = $opt_D   || 'dadsops';
my $opusDb    = $opt_O   || 'opus';

#-- The fromDate and toDate are optional.  If they are not supplied
#-- the fromDate will be todays date and the toDate will be tomorrows date.
#-- If only one date is supplied it will be considered the fromDate and
#-- toDate will default to tomorrow.

my $fromDate  = ( $ARGV[0] ) ? UnixDate(ParseDate($ARGV[0]),"%m/%d/%Y")
                             : UnixDate(ParseDate("today"),"%m/%d/%Y");
my $toDate    = ( $ARGV[1] ) ? UnixDate(ParseDate($ARGV[1]),"%m/%d/%Y")
                             : UnixDate(DateCalc( "today", "+1 day" ),
                                                                "%m/%d/%Y");

#-- Create the fileName for the list of valid associations to process.

my $timeStamp = UnixDate(ParseDate("today"),"%Y%m%d");
my ( $scriptName ) = fileparse($0);
$scriptName =~ s/\.pl//;
my $lisFileName = "${scriptName}.lis";
my $rptFileName = "${scriptName}.rpt";

my %instrConv = ('O','STIS','N','NICMOS','J','ACS','U','WFPC2');

#-----------------------------------------------------------------------
#
# Print the usage
#
#-----------------------------------------------------------------------
if ( $opt_h ) {
   print "Usage: $0 [-S<server> -D<DADS database> -O<OPUS database> ",
         "<from date> <to date>]\n";
   print "Ex: $0 -SCATLOG -Ddadsops -Oopus 'Jul 23 2007' 'Aug 20 2007'\n";
   exit;
}

#-----------------------------------------------------------------------
#
# Connect to database
#
#-----------------------------------------------------------------------
my $dbh = STScI::DBI->connect( "dbi:Sybase:server=$server" )
        or die "Can't connect to database: $DBI::errstr";

$dbh->do("use $opusDb") or die;

$dbh->func("LONG","_date_fmt");

#-----------------------------------------------------------------------
#
# Read in the pdq information from the dms_asn_orig_data.  For a short
# while the cleanup tool deleted member information from pdq so the  
# association had pdq but none of the members had pdq. This situation
# caused these types of association to show up in the pdq difference 
# report. By reading in the original pdq records we can use them in
# the comparison further down if the operational db has those entries
# missing.
#
#-----------------------------------------------------------------------
my %pdqOrig = ();

my $query = <<"EOQ";
select pdq_data_set_name, pdq_quality, pdq_severity_code,
       pdq_comment_1,  pdq_comment_2, pdq_comment_3
from  dms_asn_orig_data..pdq_summary
EOQ

my $sth = $dbh->prepare($query) or die "Can't prepare query:" . $dbh->errstr;
          $sth->execute         or die "Can't execute query:" . $dbh->errstr;

while ( my ($dsn, $qual, $sev, $com1, $com2, $com3) = $sth->fetchrow_array) {

   $dsn =~ s/[SQNPMTOR]$//;
   $pdqOrig{$dsn} = "*" . join("*",$qual, $sev, $com1, $com2, $com3) . "*";
}
#-----------------------------------------------------------------------
#
# Get the PDQ information.  Only create an association pdq if all
# the members have the same pdq values.  If they don't put them in 
# a skip list and process them separately.
#
#-----------------------------------------------------------------------
my %skip = ();
my %pdqVals = ();
my %asnList = ();
my %pdqInfo = ();

my $query = <<"EOQ";
select c.association_id, b.member_num, a.pdq_quality, a.pdq_severity_code,
       a.pdq_comment_1,  a.pdq_comment_2, a.pdq_comment_3
from $dadsDb..pdq_summary a, asn_members b, dms_asn_id c
where c.association_id = b.association_id
and   b.program_id    *= a.pdq_program_id
and   b.obset_id      *= a.pdq_obset_id
and   b.member_num    *= a.pdq_obsnum 
and   c.creation_date >= '$fromDate'
and   c.creation_date  < '$toDate'
EOQ

my $sth = $dbh->prepare($query) or die "Can't prepare query:" . $dbh->errstr;
          $sth->execute         or die "Can't execute query:" . $dbh->errstr;

while ( my ($asnId, $obsnum, $qual, $sev,
            $com1, $com2, $com3) = $sth->fetchrow_array ) {

   #-- Create the dataset name out of the association id and obsnum.
   my $dsn = substr($asnId,0,6) . $obsnum;

   #-- Save the member obsnum in an array based on the association id
   push ( @{$asnList{$asnId}}, $obsnum );

   #-- Create a string out of all the pdq information.  This will be used
   #-- in the compare.  If there is no pdq information substitute the 
   #-- pdq information from dms_asn_orig_data if it exists.

   my $val = "*" . join("*", $qual, $sev, $com1, $com2, $com3) . "*";
   $val = $pdqOrig{$dsn} if $val eq '******' and $pdqOrig{$dsn};

   #-- Save the pdq info for each member so if there is no match you
   #-- can print it out in the pdq difference report.

   $pdqInfo{$asnId}{$obsnum} = $val;

   #-- When running this tool in the middle of processing dms association 
   #-- there should be no entries in pdq for the products - so don't compare it.

   next if length $obsnum == 3 and !$qual;

   #-- Only compare when have at least two members to compare to.
   #-- If any differences are found put the association in the skip
   #-- list which will be processed after the valid list is processed.

   if ( $pdqVals{$asnId} and $pdqVals{$asnId} ne $val ) {
      $skip{$asnId} = 1;
   } else {
      $pdqVals{$asnId} = $val;
   }
}

#-----------------------------------------------------------------------
#
# Print out each dms association and its member's obsnums where
# the members do not have pdqs differences or don't have any pdq entries.
# (i.e. they are not in the skip list.)
# Write out the list of these associations to a file.
#
#-----------------------------------------------------------------------
open LIS, ">$lisFileName" or die "Can't open file: $!";
open RPT, ">$rptFileName" or die "Can't open file: $!";

print RPT "      DMS Report from $fromDate - $toDate\n";

#-- Only print header if there are associations to report.
my $asnCnt = keys %asnList;
my $skipCnt = keys %skip;

if ( $asnCnt and ($asnCnt-$skipCnt) > 0 ) {
   print RPT "\n          Associations without PDQ Differences \n\n";
   print RPT "Asn Id      ObsNums in Asn                                 \n";
   print RPT "---------   -----------------------------------------------\n";
}

foreach my $asnId ( sort keys %asnList ) {
   next if $skip{$asnId};
   print RPT "$asnId : @{$asnList{$asnId}}\n";
   print LIS "$asnId\n";
}

close LIS;

#-----------------------------------------------------------------------
#
# Print out the association's that have members with different pdq
# information.  It is difficult to read so separate each association with
# a line.
#
#-----------------------------------------------------------------------

#-- Only print header if there are associations to report.
if ( $skipCnt ) {
   print RPT "\n                    Associations with PDQ Differences \n\n";
   print RPT "Asn Id      Obsnum   PDQ Info                               \n";
   print RPT "---------   ------   ---------------------------------------------------------\n";
}

foreach my $asnId ( sort keys %skip ) {
   foreach my $obsnum ( sort keys %{$pdqInfo{$asnId}} ) {
      my $printLine = sprintf("%9s   %-6s   %s\n", $asnId, $obsnum, 
                             $pdqInfo{$asnId}{$obsnum});
      print RPT $printLine;
   }

   print RPT "================================================================================\n";
}

#-----------------------------------------------------------------------
#
# Print out total information about the run and all dms processing
# in general.  At the end list the name of the file containing
# the list of valid associations.  This file will be used as part
# of dms association processing.
#
#-----------------------------------------------------------------------
print RPT "\nNumber of associations processed in report time period       : $asnCnt\n";
print RPT "Number of associations with mixed PDQs in report time period : $skipCnt\n\n";

#-----------------------------------------------------------------------
#
# Get all the DMS association info.  
#
# The date being pulled back is in the form YYYY/MM/DD but I'm only 
# saving the first 7 characters so all that is returned is YYYY/MM 
# which is the month the association was created.  This is used to generate 
# the statistics for how many dms associations were processed each month.
#
# Note: By completed it means that the dms associations have been archived
#       in DADS.  It does not mean any cleanup was done or pdqs created.
#
#-----------------------------------------------------------------------
my $dmsCreated = 0;
my $dmsFinished = 0;
my %dmsFinishedByMonth = ();
my %dmsCreatedByInstr  = ();
my %dmsFinishedByInstr = ();

my $query = <<"EOQ";
select distinct a.association_id, convert(char(7),a.creation_date, 111), 
       b.asm_asn_id
from $opusDb..dms_asn_id a, $dadsDb..assoc_member b
where a.association_id *= b.asm_asn_id
EOQ

my $sth = $dbh->prepare($query) or die "Can't prepare query:" . $dbh->errstr;
          $sth->execute         or die "Can't execute query:" . $dbh->errstr;

while ( my ( $asnId, $createDate, $inDads ) = $sth->fetchrow_array ) {
   my $instr = substr($asnId,0,1);
   my $realInstr = $instrConv{$instr};
   $dmsCreated++;
   $dmsCreatedByInstr{$realInstr}++;
   if ( $inDads ) {
      $dmsFinished++;
      $dmsFinishedByMonth{$createDate}{$realInstr}++;
      $dmsFinishedByInstr{$realInstr}++;
   }
}

#-----------------------------------------------------------------------
#
# Print out completion totals by month.
#
#-----------------------------------------------------------------------
my $monthTot = 0;

print RPT "      Completed Associations by Month & Instrument\n\n";
print RPT "Date       Total   By Instrument                 \n";
print RPT "-------    -----   ------------------------------\n";
foreach my $date ( sort keys %dmsFinishedByMonth ) {
   my @compAsnList = ();
   foreach my $instr ( sort keys %{$dmsFinishedByMonth{$date}} ) {
      $monthTot += $dmsFinishedByMonth{$date}{$instr};
      push ( @compAsnList, "$instr($dmsFinishedByMonth{$date}{$instr})" );
   }
   my $compAsn = join( ", ", @compAsnList );
   my $printLine = sprintf("%-7s    %5s   %s\n", $date, $monthTot, $compAsn);
   print RPT $printLine;
   $monthTot = 0
}

#-----------------------------------------------------------------------
#
# Print out completion totals by instrument.
#
#-----------------------------------------------------------------------
print RPT "\nInstrument   Completed Asns   Created Asns\n";
print RPT "----------   --------------   ------------\n";
foreach my $instr ( sort keys %dmsFinishedByInstr ) {
   my $printLine = sprintf("%-6s       %14s   %12s\n", 
              $instr, $dmsFinishedByInstr{$instr}, $dmsCreatedByInstr{$instr});
   print RPT $printLine;
}
print RPT " ----------   --------------   ------------\n";
my $printLine = sprintf("%-6s       %14s   %12s\n", 
                'Total', $dmsFinished, $dmsCreated );
print RPT $printLine;

close RPT;

exit;
