#!/usr/local/bin/perl
#=======================================================================
#
#  NAME
#
#  logical_delete.pl
#
#  DESCRIPTION
#
#  This program will delete data from tables affected given a dataset
#  name, archive class, generation date and mission.  If there is a 
#  previous version of this dataset in the database then just print out
#  a message listing the datasets and the generation dates and exit.
#  If the override flag is set then delete out all previous versions 
#  from the database
#
#  OPTIONS
#
#   S - name of dads server
#   D - name of dads database
#   K - name of keyword server
#   k - name of keyword database
#   d - dataset name
#   a - archive class
#   m - mission
#   c - comment about the logical delete
#   o - delete multiple record datasets
#   v - verbose; print helper messages
#   h - print Usage
#   t - test mode - will roll back changes made during run
#
#  HISTORY
#
#   01/18/2004 49680 L Gardner Initial Release
#   03/29/2005 53314 L Gardner Fix oms association deletions
#   09/08/2006 56560 L Gardner Fix stis associations sij deletions
#   06/18/2008 60162 L Gardner Add test mode, and COS SEGEXP to an if stmnt
#   08/18/2010 65545 L Gardner Make mssql compliant
#   04/03/2012 70992 L Gardner Add csi and ssi cleanup processing
#   06/11/2012 71520 L Gardner Process oms class not ending in 0 but asn.
#                              Cleanup each product and exposure from  
#                              both product and exposure tables.
#                              Only print out delete stmnt if it deleted.
#
#=======================================================================

use strict;
use warnings; 

use Date::Manip;           # Time functions
use File::Basename;        # File processing routines
push (@INC, dirname($0));  # Add the directory with main tool to dir lookup

require 'getopts.pl';      # Option getting routines
require 'db_access.pl';    # db access routines
require 'ingest_info.pl';  # routines to access ingest tables in kywd db

#-----------------------------------------------------------------------
#
# Get input information or set defaults.  Parse the input and
# derive other necessary variables.
#
#-----------------------------------------------------------------------
use vars ( '$opt_S', '$opt_D', '$opt_K', '$opt_k', '$opt_d', '$opt_a', 
           '$opt_m', '$opt_c', '$opt_v', '$opt_h', '$opt_o', '$opt_t' );
&Getopts('S:D:K:k:d:a:m:c:vhot');

my $dadsServer     = $opt_S || 'CHICO';
my $dadsDatabase   = $opt_D || 'dadsopscopy';
my $kywdServer     = $opt_K || 'CHICO';
my $kywdDatabase   = $opt_k || 'keyworddev2';
my $datasetName    = uc $opt_d;
my $archClass      = uc $opt_a;
my $mission        = uc $opt_m;
my $comment        = $opt_c;
my $verbose        = $opt_v || '';
my $override       = $opt_o || '';
my $testMode       = $opt_t || '';

my $os = $ENV{OSTYPE};

# This is when not processing any tracking tables
#$comment = "junk" if $cleanTables;

#-----------------------------------------------------------------------
#
# Print the usage if requested
#
#-----------------------------------------------------------------------
if ( $opt_h  or !$datasetName or !$archClass or !$mission or !$comment ) {
   print "Usage: $0 -S<DADS server name>\n\t\t\t -D<DADS database name>\n",
                 "\t\t\t -K<keyword server name>\n",
                 "\t\t\t -k<keyword database name>\n",
                 "\t\t\t -d<dataset name>\n",
                 "\t\t\t -a<archive class>\n",
                 "\t\t\t -m<mission>\n",
                 "\t\t\t -c<comment for logical delete table>\n",
                 "\t\t\t [-o (override mode - always do cleanup)]\n",
                 "\t\t\t [-v (verbose mode)]\n",
                 "\t\t\t [-t (test mode)]\n";
   print "   Ex: $0 -SDMSOPSDB1 -Ddadsops -KDMSOPSDB1 -kkeyword -dJ8MT26DMQ \n",
                "\t-aCAL -mHST -c\"PR 44444 - Bad data\"\n";
   print "Exiting program...\n";
   exit;
}

#-----------------------------------------------------------------------
#
# Set some needed variables
#
#-----------------------------------------------------------------------
# The transmission source is the last letter of the dataset name for
# single exposures

my $transSource = "SQNPMTOR";

# For CAL and AST data this translates the first letter of the dataset
# name into an instrument

my %instrName = ( 'F', 'FGS', 'I','WFC3','J','ACS', 'L','COS','N', 'NICMOS', 
                  'O', 'STIS', 'U', 'WFPC2', 'V', 'HSP', 'W', 'WFPC', 
                  'X', 'FOC', 'Y', 'FOS', 'Z', 'HRS' );

# For certain archive classes the first character of the dataset name
# does not tell us the instrument but the archive class does.  This is 
# the mapping for those types of datasets.
 
my %classInstr = ( 'ACC', 'STIS', 'ACM', 'ACS', 'CSI','COS','FEN', 'FUSE', 
                   'FUS', 'FUSE', 'SMS', 'SMS', 'SSI','STIS',
                   'TVI', 'WFC3', 'TVL', 'COS' );
                    
# Need to determine the instrument and the ipso.  The ipso is made up
# of the instrument character, program id, obset id and obsnum fields.  The
# reason we need the ipso is that somebody came up with the idea of 
# tacking on a transmission source character to the dataset name which
# occasionally changes the dataset name.  We *always* want to ignore
# the transmission source.  In the case of associations there is no 
# transmission source and the obsnum field is 3 characters long.
# For the exposure ipsos we change the transmission source into % for
# database access purposes. 


my $dsnChar = substr($datasetName,0,1);
my $ipso    = $datasetName;
my $instr   = '';

# For certain archive classes we need the instrument value which can't be
# determined from the first character the the dataset name.  Those are listed
# in a hash above.  For CAL, AST and ASN use the first letter the dataset name.

# For OMS the association ts may be [0-9A-I] so we have to put all those
# possible letters since we don't know which is in ads.  This way we 
# do not get kicked out because we failed an ads check.  The asn query further 
# down should process the oms correctly for associations.

if ( $classInstr{$archClass} ) {
   $instr = $classInstr{$archClass};
} elsif ( $archClass eq 'CAL' or $archClass eq 'AST' or $archClass eq 'ASN' ) {
   $instr = $instrName{$dsnChar};
   $ipso =~ s/[$transSource]$/%/;
} elsif ( $archClass eq 'OMS' ) {
   $instr = 'OMS';
   $ipso =~ s/[0-9A-I]$/[0-9A-I]/;}

# The archive class should be the archive class passed in.  The only
# exception is when deleting an association.  In that case the archive
# class must be ASN but we still need to delete the CAL members so
# we change the archive class to include CAL.  Same with FUSE.

my $acWhere = "in ('$archClass')";
$acWhere = "in ('ASN','CAL')" if $archClass eq 'ASN';
$acWhere = "in ('FEN','FUS')" if $archClass eq 'FEN' or $archClass eq 'FUS';

print "===Running $0\n";
print "*************** Input:$datasetName,$archClass,$mission ",
      "***************\nDerived:$dsnChar,$instr,$ipso,$override,\n" if $verbose;

my $error       = '';
my $asnId       = '';
my $asnStat     = '';
my $orphanStmnt = '';
my %dsnList     = (); 

#-----------------------------------------------------------------------
#
# Connect to the dads database.  
#
#-----------------------------------------------------------------------
print "Processing using $dadsServer..$dadsDatabase\n" if $verbose;

my $dbh = DBConnect ( $dadsServer, $dadsDatabase );

#-----------------------------------------------------------------------
#
# Get the generation dates and dataset names for the ipso.  If there
# are no records that means there was no dataset in ADS and so should 
# exit with a message. If there was more than one record returned and 
# the override is not set exit with a message.  If the override is set 
# or one record was found continue.
#
#-----------------------------------------------------------------------
my $query = "select convert(char(10),ads_generation_date,111) + ' ' + " .
            "convert(char(8),ads_generation_date,108),ads_data_set_name from ".
            "archive_data_set_all where ads_data_set_name like '$ipso' and " .
            "ads_archive_class = '$archClass' and ads_mission = '$mission'";
my %adsDsnList = GetDbHash( $dbh, $query );

my $adsCnt = keys %adsDsnList;

if ( ! $adsCnt ) {
   print "Dataset '$datasetName' does not exist for archive class ",
         "'$archClass'.\n --- Exiting program\n";
   exit;
} elsif ( $adsCnt > 1 and ! $override ) {
   print "Multiple versions of this dataset listed below.",
            " --- Exiting program\n";
   foreach my $gd ( sort keys %adsDsnList ) {
      print "$adsDsnList{$gd}\t$archClass\t$gd\t$mission\n";
   }
   exit;
}

#-----------------------------------------------------------------------
#
#  Get the dataset list to process
#
#-----------------------------------------------------------------------
if ( $mission eq 'HST' ) {
   ( $error, $asnId, $asnStat, $orphanStmnt, %dsnList ) 
            = GetHstDatasets( $dbh, $datasetName, $ipso );
} elsif ( $mission eq 'FUSE' ) {
   ( $error, $asnId, %dsnList ) = GetFuseDatasets( $dbh, $datasetName );
} else {
   print "Mission '$mission' does not exist - Exiting program\n";
   exit;
}

if ( $error ) {
   print $error;
   exit;
}

#-----------------------------------------------------------------------
#
# This is the main processing.  First we do the tracking table cleanup.
# This includes moving the data from ADS and AFI to ADL and AFL.  Next
# if there are any other tables affected by that dataset clean them out.  

# In this section if any processing or database or perl errors occur a 
# message is written out and all database changes are undone.
#
#-----------------------------------------------------------------------
$dbh->{AutoCommit} = 0;  # enable transactions, if possible
$dbh->{RaiseError} = 1;

eval {
   ( $error, $adsCnt ) = RollTracking( $dbh, $acWhere, $asnId, $asnStat, 
                                       $comment, %dsnList );

   if ( !$error ) {
      $error = TableCleanup( $dbh, $kywdServer, $kywdDatabase, $mission, 
                             $instr, $asnId, $archClass, $orphanStmnt, 
                             %dsnList );
   }

   if ( $error ) {
      print "Error Messages:\n$error\n";
      $dbh->rollback; # undo the incomplete changes
   } elsif ( $testMode ) {
      print "Test Mode: Rolling back changes\n";
      $dbh->rollback; # undo the changes
   } else {
      $dbh->commit;   # commit the changes if we get this far
      print "Number of records successfully deleted from ",
            "archive_data_set_all = $adsCnt\n";
   }
   $dbh->disconnect if $os ne 'linux';
};
if ($@) {
   print "Transaction aborted because $@\n";
   $dbh->rollback; # undo the incomplete changes
   $dbh->disconnect if $os ne 'linux';
}

exit;

#-----------------------------------------------------------------------
#
# Subroutine GetFuseDatasets:
#   Get a list of datasets that will have to be cleaned up.  
#   FUSE only has associations so process all its memebers.
#   If deleting an association the archive class may be FEN or FUS but the
#   input dataset name must be the association id.  If it's not, 
#   a message will be sent and an error message sent back.  
#
#-----------------------------------------------------------------------
sub GetFuseDatasets 
{
   my ( $dbh, $datasetName ) = @_;

   my $error = '';

   my $query = <<"   EOQ";
   select distinct fem_asn_id from fuse_member
   where  fem_asn_id like '$datasetName'
   EOQ

   # Get the association id associated with this dataset
   my ( $asnId ) = GetDbScalar( $dbh, $query );
   
   print "Asn Info: $asnId,\n" if $verbose;
   
   my %dsnList = ();

   # Deleting an association so get association info
   if ( $asnId eq $datasetName ) {

      $query = "select fem_data_set_name, fem_member_type " .
               "from fuse_member where fem_asn_id = '$asnId'";
      %dsnList = GetDbHash( $dbh, $query );

      # Override the member type so other table cleanup will work
      $dsnList{$asnId} = 'PRODUCT';

   # Dealing with an association member so write out error message
   } else {
      $error = "Dataset '$datasetName' is not an association id.\n".
               "To delete a whole association supply the association id.\n" . 
               "The archive class can be 'FEN' or 'FUS'.\n" .
               " --  Exiting program\n";
   }
   
   my @prDsnList = map{$_,$dsnList{$_}} sort keys %dsnList;
   print "dsn List: @prDsnList\n" if $verbose;

   ( $error, $asnId, %dsnList ) 
}

#-----------------------------------------------------------------------
#
# Subroutine GetHstDatasets:
#   Get a list of datasets that will have to be cleaned up.  In the
#   case of a non association dataset it's just the dataset itself.
#   In the case of an association it's the dataset + all it's memebers.
#   If deleting an association the archive class MUST be ASN and the
#   input dataset name must be the association id.  If it's not, a message 
#   will be sent and the script terminated.  Finally when dealing with
#   non association exposures we determine whether it's an orphan.
#
#-----------------------------------------------------------------------
sub GetHstDatasets 
{
   my ( $dbh, $datasetName, $ipso ) = @_;

   my $error = '';

   my $query = <<"   EOQ";
   select distinct asm_asn_id, ast_asn_status 
   from   assoc_member
   left outer join assoc_status on asm_asn_id = ast_asn_id 
   where  asm_data_set_name like '$ipso' or asm_asn_id like '$ipso'
   EOQ

   # Get the association id and status associated with this dataset
   my ( $asnId, $asnStat ) = GetDbArray( $dbh, $query );
   $asnId   = '' if !$asnId;
   $asnStat = '' if !$asnStat;
   
   print "Asn Info: $asnId,$asnStat,\n" if $verbose;
   
   my %dsnList = ();
   my $orphanCleanup  = '';

   # Deleting an association so get association info
   if ( $asnId eq $datasetName ) {

      $query = "select asm_member_name, asm_member_type " .
               "from assoc_member where asm_asn_id = '$asnId'";
      %dsnList = GetDbHash( $dbh, $query );
      $dsnList{$datasetName} = 'ASNONLY' if !$dsnList{$datasetName};

   # Dealing with an association member or wrong ac so write out error message
   } elsif ( ( $asnId and $asnId ne $datasetName ) or 
             ( $asnId and $asnId eq $datasetName and $archClass ne 'ASN' ) ) {
   
      $error = "Dataset '$datasetName' is a member of association '$asnId'.\n".
               "To delete a whole association supply the association id\n" . 
               "and the archive class should be 'ASN'.\n" .
               " --  Exiting program\n";
   
   # Not an association member
   } else {
      $dsnList{$datasetName} = 'BOTH';
      $query = "select aso_orphan_type from assoc_orphan " .
               "where aso_data_set_name like '$ipso' ";
      if ( GetDbScalar( $dbh, $query ) ) {
         ( $orphanCleanup = $query ) =~ s/^select aso_orphan_type /delete /;
      }
   }

   my @prDsnList = %dsnList;
   print "dsn List: @prDsnList\n" if $verbose;

   ( $error, $asnId, $asnStat, $orphanCleanup, %dsnList ) 
}

#-----------------------------------------------------------------------
#
# Subroutine RollTracking:
#   - Move the records from ADS to ADL and delete from ADS
#   - Move the records from AFI to AFL and delete from AFI
#   Note: Only populate the ADL and AFL tables if a record for that 
#         dataset name, archive class and generation_date are already
#         there (as per Faith Abney's instructions).
#   Set error message if any of the row counts are 0 or if the 
#   deleted row counts are less than the inserted row counts.  The
#   inserted row counts may be bigger since the ignore_dup_row on ADL 
#   and AFL may cause rows to be ignored.
#
#-----------------------------------------------------------------------
sub RollTracking
{
   my ( $dbh, $acWhere, $asnId, $asnStat, $comment, %dsnList ) = @_;

   print "****** Tracking table cleanup ******\n" if $verbose;

   my $error    = '';
   my $afiWhere = '';
   my @dsnLike  = ();

   # Create a list of dataset names to use in the where clause
   foreach my $dsn ( sort keys %dsnList ) {
      $dsn =~ s/[$transSource]$/%/ if $acWhere =~ /'CAL'/ or 
                                      $acWhere =~ /'AST'/;
      $dsn =~ s/[$transSource]$/J/ if $acWhere =~ /'OMS'/;
      push( @dsnLike, "ads_data_set_name like '$dsn'" );
   }

   my $dsnStmnt = join ( " or ", @dsnLike );

   my $adsWhere = "where ads_archive_class $acWhere " .
                  "and ads_mission = '$mission' " .
                  "and ( $dsnStmnt )";
   
   # Either quote or use null for these extra fields in the ADL
   my $asnIdOut   = $asnId   ? "'$asnId'"   : 'NULL';
   my $asnStatOut = $asnStat ? "'$asnStat'" : 'NULL';
   
   my $query = "insert into archive_data_set_log_del select *, $asnIdOut, " .
               "$asnStatOut, getdate(), '$comment' from " .
               "archive_data_set_all $adsWhere";
   my $adlCnt = SetDbValues( $dbh, $query );
   print "$query\nADL records inserted = $adlCnt\n" if $verbose;
   $error .= "No records inserted into ADL\n" if $adlCnt == 0; 

   # Create a where clause for each table that needs one
   ( $afiWhere = $adsWhere ) =~ s/ads_/afi_/g;

   $query = "insert into archive_files_log_del " .
            "select * from archive_files $afiWhere";
   my $aflCnt = SetDbValues( $dbh, $query );
   print "$query\nAFL records inserted = $aflCnt\n" if $verbose;
   $error .= "No records inserted into AFL\n" if $aflCnt == 0; 

   $query = "delete from archive_data_set_all $adsWhere";
   my $adsCnt = SetDbValues( $dbh, $query );
   print "$query\nADS records deleted = $adsCnt\n" if $verbose;
   $error .= "No records deleted  from ADS\n" if $adsCnt == 0; 

   $query = "delete from archive_files $afiWhere";
   my $afiCnt = SetDbValues( $dbh, $query );
   print "$query\nAFI records deleted = $afiCnt\n" if $verbose;
   $error .= "No records deleted  from AFI\n" if $afiCnt == 0; 

   $error .= "ADL ($adlCnt) doesn't match ADS ($adsCnt) records processed\n"
                    if $adlCnt > $adsCnt;
   $error .= "AFL ($aflCnt) doesn't match AFI ($afiCnt) records processed\n"
                    if $aflCnt > $afiCnt;
 
   return ( $error, $adsCnt );
}

#-----------------------------------------------------------------------
#
# Subroutine TableCleanup:
#   Cleanup the non tracking tables that were filled by these datasets.
#   For each dataset in the list create a delete statement for the 
#   table this dataset COULD be in (the data may not be there and thats
#   ok).  Use %keyFields to generate the where clause for the delete.
#   ASO is deleted if a delete statement was created previously.
#
#-----------------------------------------------------------------------
sub TableCleanup
{
   my ( $dbh, $kywdServer, $kywdDatabase, $mission, $instr, $asnId, 
        $archClass, $orphanCleanup, %dsnList ) = @_;

   print "****** Other Table Cleanup ******\n" if $verbose;

   # Load the other table information and key fields from the keyword database
   print "Processing using $kywdServer..$kywdDatabase\n" if $verbose;
   my $dbh2 = DBConnect ( $kywdServer, $kywdDatabase );
   my %keyFields = GetKeyFields( $dbh2, $mission, $instr, $acWhere );
   $dbh2->disconnect;

   my $error = '';
   my %asnProcessed = ();

   foreach my $dsn ( sort keys %dsnList ) {

      foreach my $tableName ( sort keys %keyFields ) {

         # assoc_orphan processed below.
         next if $tableName eq 'assoc_orphan';
    
         # If processing an association that does not have a 0 product
         # as one of it's members don't want to do any table cleanup for
         # the non existent 0 product since it didn't  populate any tables.  
         # Ex: If there is no dither product for ACS then there will no 
         # entry in ADS for CAL class and there will be for ASN.  In this 
         # case the asn class did not fill any of the acs tables.

         next if $dsnList{$dsn} eq 'ASNONLY' and $tableName !~ /^assoc_/; 

         next if $asnProcessed{$tableName};

         my $whereClause = '';

         foreach my $fieldName ( sort keys %{$keyFields{$tableName}} ) {

            # SIJ is treated like member table so skip product keys
            next if $fieldName =~ /^sij_sdb/;

            if ( $fieldName =~ /program_id/ ) {
               my $progId  = substr($dsn,1,3);
               $whereClause .= " and $fieldName = '$progId'";
            } elsif ( $fieldName =~ /obset_id/ ) {
               my $obsetId = substr($dsn,4,2);
               $whereClause .= " and $fieldName = '$obsetId'";
            } elsif ( $fieldName =~ /obsnum/ ) {
               my $obsnum  = substr($dsn,6,3);
               # The transmission source is for CAL data only.
               # OMS needs to have it's J removed also from the obsnum.
               $obsnum  =~ s/[J$transSource]$//o;
               $whereClause .= " and $fieldName = '$obsnum'";
            } elsif ( $fieldName =~ /data_set_name/ ) {
               my $ipso = $dsn;

               # Stis associations have the asn_id for the dataset name
               $ipso = $asnId if $instr eq 'STIS' and $asnId;

               # Remove the trans source for all queries.
               $ipso =~ s/[$transSource]$/%/o if $archClass eq 'CAL' or
                                                 $archClass eq 'AST' or
                                                 $archClass eq 'ASN';

               # Only qualify on dataset name in the where clause for
               # OMS stis asns.  This is because it's the only instrument
               # that can have the same exposure in different associations.
               # Note: For oms the instr is OMS and not stis so have to
               #       use the first letter of asnId.

               if ( $archClass eq 'OMS' and $asnId ) {
                  next if $asnId !~ /^O/;
                  $ipso = $asnId;
               }

               $whereClause .= " and $fieldName like '$ipso'";
            } elsif ( $fieldName =~ /asn_id/ ) {
               $whereClause .= " and $fieldName = '$asnId'" if $asnId;
               $asnProcessed{$tableName} = 'Yep';
               last;
            } elsif ( $fieldName =~ /archive_class/ ) {
               $whereClause .= " and $fieldName = '$archClass'";
            } else {
               #print "Unknown field type:$tableName..$fieldName\n" if $verbose;
            }
         }

         # For sij we have to add the idb_data_set_name since it is was 
         # taken out of ingest_keyfields for other reasons (PR 55318).

         if ( $tableName eq 'sci_inst_db_join' and $instr eq 'STIS' 
              and $asnId ) {
            $whereClause .= " and sij_idb_data_set_name = '$asnId'";
         }

         # If there is no where clause then there shouldn't be a delete-skip it
         # Otherwise run the delete statement and print it out if any
         # records were deleted and it's in verbose mode.

         if ( $whereClause ) {
            # The first 'and' should be replaced with a 'where'
            $whereClause =~ s/^ and /where /;
            my $stmnt = "delete from $tableName $whereClause";
            my $recCnt = SetDbValues( $dbh, $stmnt );
            if ( $verbose and $recCnt != 0 ) {
               print "--$stmnt\n$tableName records deleted = $recCnt\n";
            }
         }
   
      }
   }

   if ( $orphanCleanup ) {
      my $recCnt = SetDbValues( $dbh, $orphanCleanup );
      $error .= "No records deleted from assoc_orphan:$orphanCleanup\n" if $recCnt == 0; 
      print "--$orphanCleanup\nassoc_orphan records deleted = $recCnt\n" if $verbose;
   }

   $error;
}
