#----------------------------------------------------------------------------
#
# Name: ingest_tools_pkg.pl
#
# This is a perl script package of subroutines used by interactive ingest and
# catalog tools.  It was created to avoid code duplication.  
# 
# Module usage:
# The main routine must require the runsql.pl _select_pkg.pl modules.  
#   
# History:
# Date     OPR      Who         Reason
# -------- ------ ----------  ---------------------------------------------
# 08/25/04 51848  Baum        Initial code
# 11/16/04 52338  Baum        Added 3 routines for DADS logs: get_root_dir
#                               check_datasets and copy_datasets.
# 04/07/05 53339  Baum        Added 9 routines for OMS and CAL datasets and
#                               print genreq output on genreq failure.
# 09/15/06 56505  MSwam       Use prefixed DB field names
# 04/24/08 59760  MSwam       Alter some regexs to allow >3char suffixes
# 06/04/08 59760  MSwam       Removed file_tag_type=3 in get_datasets,
#                                 since file_tag_type=6 covers those cases
# 02/22/10 59963  Sherbert    Use MSG_REPORT_LEVEL instead of Main param
# 
#----------------------------------------------------------------------------

BEGIN {

  unshift @INC,(split /:/, $ENV{PATH});

  # set up external routines
  require 'do_dbi_pkg.pl';          # run query returning only record count
  require 'printmsg.pl' ;       # PrintMsg
  use strict ;

}

#----------------------------------------------------------------------------
# Name: open_opus_db 
#       This is generic enough to change the name
# Input: server
#        OPUS_DB
# Return: dbConn
#----------------------------------------------------------------------------
sub open_opus_db () {   
    # Open the database object for the OPUS_DB.
    my $nm = "open_opus_db" ;

    my ( $opusServer, $opusDB ) = @_ ;
    PrintMsg( "D", "Opening connection to $opusServer $opusDB ", $nm ) ;
 
    # open database for queries within this package
    $dbConn = DoDBIopen( $opusServer, $opusDB, 666 );
    my $errMsg = $nm . "-Failed to establish connection to $opusDB-Very rare." ;
    die( $errMsg ) if $dbConn == 666;   # very rarely dies - catch error message 
    return $dbConn ;
}   # end open_opus_db

#----------------------------------------------------------------------------
# Name: close_opus_db 
#       This is generic enough to change the name
# Input: dbConn
# Return: none
#----------------------------------------------------------------------------
sub close_opus_db () { 
    # if $db exists then close the $db database object for the OPUS_DB. 
    my $nm = "close_opus_db" ;

    my ( $dbConn ) = @_ ;
    PrintMsg( "D", "Closing dbConn $dbConn", $nm ) ;

    ## maybe if-defined needs to move to Main?
    if (defined($dbConn)) {
       # done with database
       DoDBIclose( $dbConn );
       undef $dbConn;   
    }
}   # end close_opus_db

#----------------------------------------------------------------------------
# Name:  check_hst_data_id 
# Input:  db -- connection to OPUS db
#         data_id
#         cal_oms_flag
# Return: none that I can see, but will die if data_id invalid
#
# History:
# mm/dd/yy PR_num Name      Description
# -------- ------ ---------- ---------------------------------------------------
# 02/23/10 59963  Sherbert   Add db connection as input parameter
#-------------------------------------------------------------------------------
sub check_hst_data_id {   
    # two arguments - the lowercase data_id, and cal_oms_flag (1 or 0)
    # 
    # check the database to validate HST data_id. It must not be for FUSE
    # class and the cal_oms_flag argument determines whether CAL and OMS classes
    # should be excluded or used inclusively. Use open_opus_db() first.
    my $nm = "check_hst_data_id";
    PrintMsg( "D", "-----subBeg----- ", $nm );
   
    my ( $db, $data_id, $cal_oms_flag ) = @_;
    
    my $query=<<"EOQ";
SELECT acm_archive_class
FROM archive_class_map
WHERE acm_data_id='$data_id'
EOQ

    my $err_msg = "Cannot select from archive_class_map." ; 
    my (@arch_class) = DoDBIselect( $db, $query ) ;
    
    if ( ! @arch_class ) {
   #if (!defined(@arch_class)) {
        report_valid_ids( $db, $cal_oms_flag ) ;
        my $errMsg = $nm . "-The data_id $data_id is not found in archive_class_map." ;
        die( $errMsg ) ;
    } 
    if ($arch_class[0] eq "FUS" || $arch_class[0] eq "FUR" ) {
        report_valid_ids( $db, $cal_oms_flag ) ;
        my $errMsg = $nm . "-The data_id $data_id is invalid class $arch_class." ;
        die( $errMsg ) ;
    } elsif ($arch_class[0] eq "CAL" || $arch_class[0] eq "OMS" ) {
        if (!$cal_oms_flag) {
            report_valid_ids( $db, $cal_oms_flag ) ;
            my $errMsg = $nm . "-The data_id $data_id is invalid class $arch_class." ;
            die( $errMsg ) ;
        }
    }
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end check_hst_data_id

#----------------------------------------------------------------------------
# Name: report_valid_ids 
#     report list of valid data_ids and their class using archive_class_map table
# 
# Input:  db -- connection to OPUS db
#         cal_oms_flag (0,1) - indicates cal or oms is valid
# 
# Return: none 
# 
# History:
# mm/dd/yy PR_num Name      Description
# -------- ------ ---------- ---------------------------------------------------
# 02/23/10 59963  Sherbert   Add db connection as input parameter
#----------------------------------------------------------------------------
sub report_valid_ids {   
    my $nm = "report_valid_ids";
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ( $db, $cal_oms_flag ) = @_; 
    my $in_test;
   
    if ($cal_oms_flag) {
        $in_test = "IN ('CAL','OMS')";
    } else {
        $in_test = "NOT IN ('CAL','OMS','FUS','FUR')";
    }

    my $query = <<"EOQ";
SELECT acm_data_id, acm_archive_class
FROM archive_class_map
WHERE acm_data_id=lower(acm_data_id) and acm_archive_class $in_test 
EOQ
    PrintMsg( "D", "query:\n$query", $nm ); 
#   my $err_msg1 = "Cannot select first record from archive_class_map."; 
#   my $err_msg2 = "Cannot select next record from archive_class_map."; 
#   my ($data_id,$arch_class) = FirstRecordSql($db, $query, $err_msg1);

    print "\nThe following data_id values may be used with this tool.\n";
    $sth = DoDBIexecute( $db, $query);
    while ( ($data_id,$arch_class) = DoDBIfetch( $db, $query, $sth) ) {
#   while (defined($data_id)) {
        print "$data_id for archive class $arch_class\n";
    }
   #print "\n";    
}   # end report_valid_ids


#----------------------------------------------------------------------------
# Name: get_datasets 
#    get list of files in given directory and return list of dataset names
#
#    The filemask that matches the files is used to determine which parsing
#    algorithm to use to extract the dataset rootname.
#   
#    The first matching algorithm in order determines the parse algorithm: 
#     *asnf.fit         (FUSE association file)
#     *.???             (3char extension)
#     l????????_*.fits  (COS data product, can have underscores in suffix)
#     l????????_[0-9]*.fits  (COS TV product, w/underscores in suffix)
#     *_*.fits          (FITS file w/no restriction on suffix length)
#     *.fit_*           (FUSE reference file)
#    for file matching, where the first mask that works is the only one used.
#
# Input: src_dir   -- directory in which to search for files
#        data_id   -- just so we know when to convert SMS fits to ASCII
#                     This is going to be weird: we need ASCII only for 
#                     cataloging, but we want to do the conversion the 
#                     first time through the list of datasets 
#                     BEFORE GENREQ converts any already ASCII to FITS
#
# Return: array of datasets
#
# History:
# mm/dd/yy PR_num Name       Description
# -------- ------ ---------- ---------------------------------------------------
# 02/23/10 59963  Sherbert   Add db connection as input parameter
# 08/16/10 59963  Sherbert   pick up diff types of file names in same dir
# 08/24/10 63986  Sherbert   Convert SMS FITS to ascii for cataloging
#-------------------------------------------------------------------------------
sub get_datasets {   
    my $nm = "get_datasets";
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ( $src_dir, $data_id ) = @_; 
    PrintMsg( "D", "in:       src_dir is $src_dir ", $nm ) ;
    PrintMsg( "D", "in:       data_id is $data_id ", $nm ) ;
    my $msg ;

    ## cd to src_dir 
    ## because MUST have local files when get here.       
    $src_dir =~ s#/{2,}#/#g ;  ## remove multiple slashes
    PrintMsg( "D", "src_dir is $src_dir now \n", $nm ) ;
    if ( ! chdir( $src_dir ) ) {
        my $errMsg = $nm . "-Cannot cd to $src_dir" ;
        die( $errMsg ) ;
    }
#   use Cwd;            ## Just to verify 
#   my $cwd = getcwd() ;
#   my @hereFiles = glob( "*.*" ) ;
#   $msg = "## Files in this dir ( $cwd ) are: \n" ;
#   $msg = $msg . "(   " . join("\n>>> ", @hereFiles ) . " )";
#   print $msg, "\n" ;


    ##---------------------- Begin Searching Different File Types ----------------------## 
    ## Find and classify the files in this dir
    my $match;              ## uniq per filetype
    my @dataset_names=();   ## Collect as we go
    my @datasets=();        ## Final, sorted, uniq-ed list
    my @file_list=();       ## Final list of everything in src_dir

    # Start with HST  (moved FUSE down for later deletion)
    my $file_tag_type = 2;  # HST dataset.ext
    my $file_type     = "HST dataset.ext" ;
    my @file_list2 = glob( "*.???");
#   print "## file_list2 is @file_list2 \n" ;
    foreach $file ( @file_list2 ) {
#       print "## file is $file \n";
        # Extract the rootname that is the minimal number of non_white
        # characters after the slash and before the .???.
        $match = $file =~m#(\S+?)\.\S{3}$#;
#       print "## match is $match \n" ;
        if (!$match) {
            my $errMsg = $nm . "-Cannot parse $file_type filename $file." ;
            die( $errMsg ) ;
        }
        push (@dataset_names, $1) ;
    }
    $msg =  "Found ".(scalar @file_list2) ;
    $msg = $msg . " $file_type file(s) in $src_dir." ;
    PrintMsg( "D", $msg, $nm ) ;
    push (@file_list, @file_list2) ;
#   print "## file_list is @file_list \n\n" ;

    # file_tag_type=3 was removed on purpose (covered by file_tag_type=6)

    # Now try different file mask      
    $file_tag_type = 4;  # COS TV product file and suffix w/underscores
    $file_type     = "COS TV product file and suffix w/underscores" ; 
    my @file_list4 = glob( "l????????_[0-9]*.fits");
    foreach $file ( @file_list4 ) {
        # Extract the rootname that is the minimal number of non_white
        # characters after the slash and after the TV dataset name
        $match = $file =~m#(\S+?_ddddddddddddd)_(\S+?)\.fits$#;
        if (!$match) {
            my $errMsg = $nm . "-Cannot parse $file_type filename $file." ;
            die( $errMsg ) ;
        }
        push (@dataset_names, $1) ;
    }
    $msg =  "Found ".(scalar @file_list4) ;
    $msg = $msg . " $file_type file(s) in $src_dir." ;
    PrintMsg( "D", $msg, $nm ) ;
    push (@file_list, @file_list4) ;
      
    # Now try different file mask      
    $file_tag_type = 5;  # COS product file and suffix w/underscores
    $file_type     = "COS product file and suffix w/underscores" ; 
    my @file_list5 = glob( "l????????_*.fits");
    foreach $file ( @file_list5 ) {
        # Extract the rootname that is the minimal number of non_white
        # characters after the slash and before the 1st underscore
        $match = $file =~m#(\S+?)_(\S+?)\.fits$#;
        if (!$match) {
            my $errMsg = $nm . "-Cannot parse $file_type filename $file." ;
            die( $errMsg ) ;
        }
        push (@dataset_names, $1) ;
    }
    $msg =  "Found ".(scalar @file_list5) ;
    $msg = $msg . " $file_type file(s) in $src_dir." ;
    PrintMsg( "D", $msg, $nm ) ;
    push (@file_list, @file_list5) ;
#   print "## file_list is @file_list \n\n" ;
      
    # Now try different file mask      
    $file_tag_type = 6;  # FITS w/any size suffix, no underscores 
    $file_type     = "FITS files (any size suffix, no underscores)" ; 
    my @file_list6 = glob( "*_*.fits");
    foreach $file ( @file_list6 ) {
        # Extract the rootname that is the maximum number of non_white
        # characters after the slash and before the LAST underscore
        # (+ = greedy match,  +? = non-greedy match)
        #
        $match = $file =~m#(\S+)_(\S+?)\.fits$#;
        if (!$match) {
            my $errMsg = $nm . "-Cannot parse $file_type filename $file." ;
            die( $errMsg ) ;
        }
        my $thisRoot = $1;  ## save for SMS 
        my $thisExt  = $2;  ## save for SMS 
        my $msg = "thisRoot is " . $thisRoot . "; thisExt is " . $thisExt ;
        PrintMsg( "D", $msg, $nm );
        push (@dataset_names, $thisRoot) ;

        # If there are any SMS FITS files w/o the ASCII equivalent, we 
        # want to convert them to ASCII, then ignore the fits.  how?
        # And MUST rename converted FROM rootname_pod.tlis to rootname.pod
        if ( $data_id =~ /sms/ ) {
            # Must be in ASCII format
            my $msg = "SMS FITS file $file must convert to ASCII" ;
            PrintMsg( "I", $msg, $nm ) ;
            my $outputName = $thisRoot . "." . $thisExt ;
            $msg = "outputName is $outputName" ;
            PrintMsg( "D", $msg, $nm ) ;
            my $cmd = "listtra $file -f $outputName " ;
            `$cmd > /dev/null 2>&1 `;             # just need a return status
            if ( $? != 0 ) { 
                my $errMsg = $nm . "-Cannot convert SMS file $file to ASCII";
                die( $errMsg ) ;
            }
        }

    }
    $msg =  "Found ".(scalar @file_list6) ;
    $msg = $msg . " $file_type in $src_dir." ;
    PrintMsg( "D", $msg, $nm ) ;
    push (@file_list, @file_list6) ;
#   print "## file_list is @file_list \n\n" ;

    # Now try different file mask      
    $file_tag_type = 1;  # FUSE association file
    $file_type     = "FUSE association file(s)" ; 
    my @file_list1 = glob( "*asnf.fit");
    foreach $file ( @file_list1 ) { 
        # Extract the 11 character rootname before "asnf.fit".
        $match = $file =~m#(\S{11})asnf.fit$#;
        if (!$match) {
            my $errMsg = $nm . "-Cannot parse $file_type filename $file." ;
            die( $errMsg ) ;
        }
        push (@dataset_names, $1) ;
    }
    $msg =  "Found ".(scalar @file_list1) ;
    $msg = $msg . " $file_type in $src_dir." ;
    PrintMsg( "D", $msg, $nm ) ;
    push (@file_list, @file_list1) ;
#   print "## file_list is @file_list \n\n" ;
    
    # Now try different file mask      
    $file_tag_type = 7;  # FUSE reference file
    $file_type     = "FUSE reference file(s)" ; 
    my @file_list7 = glob( "*.fit_*");
    foreach $file ( @file_list7 ) {
        # Extract the rootname that is the minimal number of non_white
        # characters after the slash and before the ".fit"
        $match = $file =~m#/(\S+?)\.fit#;
        if (!$match) {
            my $errMsg = $nm . "-Cannot parse $file_type filename $file." ;
            die( $errMsg ) ;
        }
        push (@dataset_names, $1) ;
    }
    $msg =  "Found ".(scalar @file_list7) ;
    $msg = $msg . " $file_type in $src_dir." ;
    PrintMsg( "D", $msg, $nm ) ;
    push (@file_list, @file_list7) ;
#   print "## file_list is @file_list \n\n" ;
    ##---------------------- End   Searching Different File Types ----------------------## 

      
    if ((scalar @file_list) == 0) {
        my $errMsg = $nm . "-No files found in $src_dir directory." ;
        die( $errMsg ) ;
    }
    # Sort the list, just because
    @dataset_names = sort @dataset_names ;
    # Uniq the list... according to 
    # Perl Cookbook 4.6 Extracting Uniq Elements from a list
    %seen = () ;
    @datasets = grep { ! $seen{$_} ++ } @dataset_names ;
    foreach my $dataset ( @datasets ) {
        PrintMsg( "I", "Adding $dataset to dataset list.", $nm ) ;
    }

    $msg = "Returning datasets: "  ;
    $msg = $msg . "(" . join(", ", @datasets) . ")";
    PrintMsg( "D", $msg, $nm );
    return @datasets;    
}   # end get_datasets


#----------------------------------------------------------------------------
# Name: get_cat_resource    
#     build the resource file name from the data_id and check if it exists.
#     If that resource file does not exist check all cat* resource files for
#     trigger data_ids that are not in the SKIP_CLASSES.
#
#     Alas, cat_cos_copies.resource can trip us up.
#
# Input: env Var, eg. OPUS_DEFINITIONS_DIR, containing list of directories 
#                         in which to search for files
#
#
#
# Return: resource file name and catalog task name 
#         return values may be undefined if no catalog req'd for data_id
#
# History:
# mm/dd/yy PR_num Name       Description
# -------- ------ ---------- ---------------------------------------------------
# 03/30/10 59963  Sherbert   why not return the full-path name?
#----------------------------------------------------------------------------
sub get_cat_resource {   
    my $nm = "get_cat_resource";
    PrintMsg( "D", "-----subBeg----- ", $nm );
   
    my ($envVar, $data_id) = @_;
    my $resource ;
    my $task ;
    my $msg ;

    ## DIRSPEC:file 
    my $res_name = $envVar.":cat_".$data_id.".resource";
        
    # resource file may be as easy as cat_<data_id>.resource
    my $res_file ;
    eval { $res_file = resolve( $res_name ) } ;
    if ( $@ ) {
        # Or we may have to search for DATA_ID-specific triggers 
        # in cat_*.resource.  Use expand because we no longer feed in the dirs. 
        my @def_dirs = expand( $envVar ) ;
        foreach my $def_dir ( @def_dirs ) {
            # add trailing slash if missing
            if ($def_dir !~m#/$#) {
                $def_dir .= "/";
            }
            $msg = "# check inside each cat_* resource file to test for OSF_TRIGGER" ;
            PrintMsg( "D", $msg, $nm ) ;
            # check inside each cat_* resource file to test for OSF_TRIGGER 
            # using the data_id but 
            # return NULL if SKIP_CLASSES record for data_id is found.   
            my $command = "egrep '"."^OSF_TRIGGER.+\.DATA_ID.+$data_id|^SKIP_CLASSES.+$data_id" ."' $def_dir"."cat_*" . " 2> /dev/null " ;
            my @grep_out = `$command`;
     
            # Look through the grep output for the resource file 
            foreach my $line (@grep_out) {
                chomp ( $line ) ;
                PrintMsg( "D", "Line from grep is $line", $nm ) ;     ## keep around in case need in future
                if ( $line =~ m/cat_cos_copies/ ) {
                    # This is not the resource file you are looking for
                    next ;
                }
                if ($line =~m/SKIP_CLASSES/) {
                    ## return NULL 
                    $msg = "data_id $data_id SKIPS cataloging " ;
                    PrintMsg( "I", $msg, $nm ) ; 
                    return;  # this data_id is skipped
                }
                # grep output will consist of a <filename>:<line-in-file>
                # If there is a <line-in-file> then we have our resource <filename>
                # We want the field value returned, not the number of fields
                    ( $res_file, undef ) = split( ":", $line ) ; 
                    PrintMsg( "D", "res_file        is $res_file ", $nm ) ; 
                    PrintMsg( "D", "remember we want to return full path name...", $nm ) ;
            }
        }
    }

    # Determine TASK line value from found resource file.  
    # An undefined res_file is legal and implies an undefined task.
    # However,  if res_file IS defined, then task must NOT be undefined.
    if ( $res_file ) {
   #if ( defined( $res_file ) ) {
        # grep the task line in the resource file 
        $command = 'grep "^TASK *="'." $res_file ";
        my $task_line = `$command`;
        $msg = "## task_line is $task_line " ;
        PrintMsg( "D", $msg, $nm ) ;
        if ($task_line eq "") {
            $msg = "Failed command: $command" ;
            PrintMsg( "E", $msg, $nm ) ;
            die( $msg ) ;
        }
        # parse out the task name from line "TASK = <taskname "
        my $match = ($task_line =~m/^TASK\s*=\s*<(\S+)\s/);
        $task = $1 ;
        $msg = "match is " . $match ." but task is " . $task ;
        PrintMsg( "D", $msg, $nm ) ;
        if (!$match) {
            $msg = "Cannot parse task name from: $task_line" ;
            PrintMsg( "E", $msg, $nm ) ;
            die( $msg ) ;
        }
    }

    # For debugs: avoid missing values and be clear in the chatter
    my $res_file_string ;
    my $task_string ;
    if ( ! $res_file ) {
   #if ( ! defined( $res_file ) ) {
        $res_file_string = "<undefined>" ;
    }
    else {
        $res_file_string = $res_file ;
    }
    if ( ! $task ) {
   #if ( ! defined( $task ) ) {
        $task_string = "<undefined>" ;
    }
    else {
        $task_string = $task ;
    }
    $msg = "Returning resource file: $res_file_string "  ;
    PrintMsg( "D", $msg, $nm );
    $msg = "Returning task     name: $task_string "  ;
    PrintMsg( "D", $msg, $nm );
    PrintMsg( "D", "-----subEnd----- ", $nm );
    return ( $res_file, $task ) ;  # return values may be undefined
}   # end get_cat_resource

#----------------------------------------------------------------------------
# Name: msgVerbosity
# 
# Input: none
# 
# Return: setting of MSG_REPORT_LEVEL OR MSG_INFO if it was not set
# 
# History:
# Date     OPR      Who         Reason
# -------- -------- ----------  ---------------------------------------------
# 08/13/10 59963    Sherbert    Turn off verbosity for genreq/hkp
#----------------------------------------------------------------------------
sub msgVerbosity {
    my $msgLevel = 'MSG_INFO' ;  ## default behaviour 
    if ( defined( $ENV{"MSG_REPORT_LEVEL"}  ) ) {
        $msgLevel = $ENV{"MSG_REPORT_LEVEL"} ;
    }
    return ( $msgLevel ) ;
}

#----------------------------------------------------------------------------
# Name: dbVerbosity
# 
# Input: none
# 
# Return: setting of STDB_REPORT_LEVEL OR '' if it was not set
# 
# History:
# Date     OPR      Who         Reason
# -------- -------- ----------  ---------------------------------------------
# 08/13/10 59963    Sherbert    Turn off verbosity for genreq/hkp
#----------------------------------------------------------------------------
sub dbVerbosity {
    my $dbLevel = '' ;  ## default behaviour
    if ( defined( $ENV{"STDB_REPORT_LEVEL"} ) ) {
        $dbLevel = $ENV{"STDB_REPORT_LEVEL"} ;
    }
    return ( $dbLevel ) ;
}

#----------------------------------------------------------------------------
# Name: catalog_datasets 
#       Run task in interactive mode. First check for valid task name, then
#       choose command syntax and success criteria based on task name. 
#
# Input: dataset_list_reference, task, resource, data_id
#
# Return: none because it runs the catalog task
#
# History:
# Date     OPR      Who         Reason
# -------- -------- ----------  ---------------------------------------------
# 08/10/03 55454    Sherbert    Make it work when msg rptg is set high
#----------------------------------------------------------------------------
sub catalog_datasets {   
    my $nm = "catalog_datasets";
    PrintMsg( "D", "-----subBeg----- ", $nm );
    
    my ($ds_ref, $task, $resource, $data_id) = @_;

    my $out_r_offset;   # number of lines from end to find success line
    my $success_string;  # found in out_r_offset line for success

    if ($task eq "catalog") {
        $out_r_offset = 3;
        $success_string = "CATALOG Ending";
    } elsif ($task eq "catalog_ascii_main.py") {
        $out_r_offset = 1;
        $success_string = "attempting INSERT";
    } else {
        my $errMsg = $nm . "-Unexpected task name: $task." ;
        die( $errMsg ) ;
    }
    my $dataset;
    my $command;
    my @task_out;
    my $line;
      
    PrintMsg( "I", "Processing with task $task and resource $resource.", $nm ) ;
    foreach $dataset (@$ds_ref) {
        my $msg = "Starting $task processing for dataset $dataset.";
        PrintMsg( "I", $msg, $nm ) ;
        # use Bourne shell redirection 2>&1 to mix stderr with stdout

        if ($task eq "catalog") {
            $command = "$task -r $resource $dataset.$data_id  2>&1";
        } else {
            $command = "$task -r $resource -d $dataset -a $data_id  2>&1";
        }
        PrintMsg( "I", "Using command: $command ", $nm ) ;
   ## For testing, decrease amount of messaging from catalog, ?.
        ## First collect current msg level
        my $msgLevel = msgVerbosity() ; 
        ## Then reset to lowest level, even if already there
        $ENV{"MSG_REPORT_LEVEL"}  = 'MSG_INFO' ; 
        ## then run cmd
        @task_out = `$command`;
        ## Then restore the msg level
        $ENV{"MSG_REPORT_LEVEL"}  = $msgLevel   ;   ## rm when testing done
        $num_task_lines = scalar @task_out;
      
        # check for success statement
        my $failed = 1;
        my $nlines = 0;
        foreach  $line (@task_out) {
            ## index < 0 means not found, >= 0 means found
            $nlines++;
            ## print line to STDOUT IF line does NOT contain Opus_env,
            ## and it is not a line that ends at INFO, and it is not 
            ## a line containing just the thread count, e.g. (1).
            ## Take this opp to get rid of incessant 'more rows available'
            print $line if ( $line !~ /Opus_env::/        &&
                             $line !~ /-I-INFO(\s*)$/     &&
                             $line !~ /^(\s*)\([0-9]*\)$/ &&
                             $line !~ /more rows available/  ) ;
            ## If line contains success string, we are done
            if (index( $line, $success_string ) > -1) {
                my $msg = "Completed $task processing for dataset $dataset.";
                PrintMsg( "I", $msg, $nm ) ;
                ## We succeeded, do not print failed message
                $failed = 0;
            } 
        }
        if ( $failed ) {
            ## success string never found in catalog output
            my $errMsg = $nm . "-$task failed for dataset $dataset." ;
            die( $errMsg ) ;
        }
    }
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end catalog_datasets

#----------------------------------------------------------------------------
# Name: get_hkp_resource {   
#
#     append the file_mask, "hkp*.resource", to the definitions_dir 
#     and grep all the resource files using the given data_id to find the
#     one resource file for this data_id.
#
#    Made this look exactly like the sub to find resource for GENREQ, but 
#         egrep is different
#         data_id is lc here 
#         OK if there is no resource for data_id
#
# Input: the definitions_dirs and the data_id
#
# Return: appropriate resource file name for running HKP (update_db_tool)
#
# History:
# mm/dd/yy PR       Who         Reason
# -------- -------- ----------  ------------------------------------------------
# 03/31/10 59963    Sherbert    2005 query fmt change broke this, no one noticed
# 08/26/10 64600    Sherbert    fix override issue
# 
#----------------------------------------------------------------------------
sub get_hkp_resource {   
    my $nm = "get_hkp_resource";
    PrintMsg( "D", "-----subBeg----- ", $nm );
   
    my ($def_dir_ref, $data_id) = @_;
    # dereference 
    my @def_dirs = @$def_dir_ref ;
    my $best_file ;

    my $msg = 'In: data_id =  ' . $data_id . '; @def_dirs = ';
    $msg = $msg . "(" . join(", ", @def_dirs) . ")";
    PrintMsg( "D", $msg, $nm) ;

    my $lc_data_id = lc($data_id);

    my $num_files = 0 ; ## get_hkp_resource
    foreach my $def_dir ( @def_dirs ) {
        # add trailing slash if missing
        if ($def_dir !~m#/$#) {
            $def_dir .= "/";
        }

        # Grep for data_id from whatever files may match template
        # If NO files match the template, go on
        # Else the grep finds what we are looking for in one step
        my $hkp_mask = $def_dir."hkp*.resource";
        PrintMsg( "I", "The grep file mask is $hkp_mask.", $nm ) ;
        my $egrep_cmd = "egrep -l '"."^QUERY\.+$data_id"."' " . $hkp_mask . " 2> /dev/null " ;
        PrintMsg( "D", "egrep_cmd is $egrep_cmd", $nm ) ;
        my @grep_out = `$egrep_cmd`;
        $num_files = scalar @grep_out;
        ## The following message can be confusing because it repeats for 
        ## each iteration through the loop.  Must've been a JB ?  Lacked $nm.
        ## Maybe better inside   if ( $num_files > 0 ) ?
        PrintMsg( "D", "Number of hkp*.resource files containing $lc_data_id is $num_files ", $nm ) ;

        if ( $num_files == 1 ) {
            # found 1 match, save it
            # because we fed in prioritized directories, this is the best file
            chomp( $grep_out[0] );
            $best_file = $grep_out[0] ;
            last ;
        }
        elsif ( $num_files == 0 ) {
            # This is not the resource file directory you want.
            # Try again.
            next ;
        } 
        elsif ( $num_grep_lines > 1 ) {
            my $errMsg = $nm . "-Too many files matched in $command " ;
            die( $errMsg ) ;
        } 
    }
    if ( $num_files == 0 ) {
        ## Hmm, no files found for data_id.  This is acceptable.
        my $msg = "No HKP resource to return" ;
        PrintMsg( "D", $msg, $nm );
        return undef ;
    }
    ## So, my only question now is, did we want full path name?  
    ## I am thinking not, so just return file name, no dirs
    use File::Spec ;
    my ( undef, undef, $resource ) = File::Spec->splitpath( $best_file ) ;
    # We do not need the '.resource' either
    ( $resource, undef ) = split /\./, $resource ;
    $msg = "Returning resource: $resource "  ;
    PrintMsg( "D", $msg, $nm );
    PrintMsg( "D", "-----subEnd----- ", $nm );
    return $resource ;
}   # end get_hkp_resource  


#----------------------------------------------------------------------------
# Name: hkp_datasets 
#       Run update_db_tool in interactive mode. Look for -F-INFO to find
#       an error condition
#
#     NOTE: this subroutine FORCES MSG and STDB reporting to be QUIET 
#           while hkp runs because I figured if MSG and STDB 
#           reporting are up for this script, we are debugging the 
#           script, NOT hkp.  We can change this if we want.
#
# Input: dataset_list_reference, resource, and  data_id
#
# Return: none because it runs the update_db_tool 
#
# 08/02/10 59963  Sherbert   update_db_tool already shows its queries
#----------------------------------------------------------------------------
sub hkp_datasets {   
    my $nm = "hkp_datasets";
    PrintMsg( "D", "-----subBeg----- ", $nm );
    
   my ($ds_ref, $resource, $data_id) = @_;
   my @update_out;
   my $command;
   my $failed;

   my $dataset;
   my $line;

   my $msg = "Processing with data_id $data_id with resource $resource." ;
   PrintMsg( "I", $msg, $varNm ) ;
   foreach $dataset (@$ds_ref) {
      my $msg = "Starting housekeeping data update for dataset $dataset. ";
      PrintMsg( "I", $msg, $nm ) ;
      # use Bourne shell redirection to mix stderr with stdout
      $command = "update_db_tool -r $resource -t $data_id $dataset 2>&1";
      PrintMsg( "I", "Using command: $command", $varNm ) ;
      # update_db_tool is nicely verbose w/ no external help
      # external help actually makes the output harder to read so turn it off.
      my $msgLevel = msgVerbosity() ; 
      my $sdbLevel = dbVerbosity() ; 
      $ENV{"MSG_REPORT_LEVEL"}  = "MSG_INFO" ; 
      $ENV{"STDB_REPORT_LEVEL"} = "" ;   ## not necessary for update_db_tool
      @update_out = `$command`;
      $ENV{"MSG_REPORT_LEVEL"}  = $msgLevel  ; 
      $ENV{"STDB_REPORT_LEVEL"} = $sdbLevel  ; 

      if (!(scalar @update_out)) {
        my $errMsg = $nm . "-failed to execute command: $command." ;
        die( $errMsg ) ;
      }
      $failed = 0;

      foreach  $line (@update_out) {
            ## print line to STDOUT IF line does NOT contain Opus_env,
            ## and it is not a line that ends at INFO, and it is not 
            ## a line containing just the thread count, e.g. (1).
            print $line if ( $line !~ /Opus_env::/        &&
                             $line !~ /-I-INFO(\s*)$/     &&
                             $line !~ /^(\s*)\([0-9]*\)$/  ) ;

            # check for failure statement
            if ($line =~m/-F-INFO/ || $line =~ m/STDB_DEFAULT_ERROR_HANDLER/) {
               $failed = 1;
            }
      }
      if ($failed) {
         my $errMsg = $nm . "-update_db_tool failed for dataset $dataset." ;
         die( $errMsg ) ;
      } else {
         my $msg = "Completed housekeeping data update for dataset $dataset.";
         PrintMsg( "I", $msg, $nm ) ;
      }
   }
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end hkp_datasets

#----------------------------------------------------------------------------
# Name: get_regr_path 
# 
# Input: none
# 
# Return: the path file associated with INGEST_PATH_NAME in null.path
#         in the user's OPUS environment 
# 
#----------------------------------------------------------------------------
sub get_regr_path {   
    # no arguments  
    #
    # read the null path file keywords to  get the value of INGEST_PATH_NAME
    my $nm = "get_regr_path";
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my $nullpath = `osfile_stretch_file "OPUS_DEFINITIONS_DIR:null.path"`;
    chomp $nullpath ;
    $nullpath =~ s#/{2,}#/#g ;  ## remove multiple slashes
    PrintMsg( "D", "null path is $nullpath", $nm );
   
    if ($nullpath =~m/^OPUS_DEFINITIONS_DIR/) {
        my $errMsg = $nm . "-Cannot find null path: $nullpath" ;
        die( $errMsg ) ;
    } else {    
        PrintMsg( "I", "Obtained null path name: $nullpath", $nm ) ;
    }
    if (!open( PATHFILE, "<$nullpath") ) {
        my $errMsg = $nm . "-Cannot open $nullpath" ;
        die( $errMsg ) ;
    }
    # find regression path file name
    my $record;
    my $regr_path;
   
    while (<PATHFILE>) {
        $record = $_;
      
        if ($record =~m/^INGEST_PATH_NAME/) {
            # parse line to extract value after the equal sign
            $record =~m/^INGEST_PATH_NAME\s*=\s*(\S+)/;
            $regr_path = $1;
            last;
        } 
    }
    close PATHFILE; 
   
    if ( ! $regr_path ) {
   #if (!defined($regr_path)) {
        my $errMsg = $nm . "-Could not find INGEST_PATH_NAME in null.path." ;
        die( $errMsg ) ;
    }
    # remove extension ".path" if present
    $regr_path =~s/(\S+)\.path/$1/;
    PrintMsg( "I", "Found INGEST_PATH_NAME: $regr_path", $nm ) ;
    my $msg = "Returning regr_path: $regr_path "  ;
    PrintMsg( "D", $msg, $nm );
    PrintMsg( "D", "-----subEnd----- ", $nm );
    return $regr_path;
}   # end get_regr_path

#----------------------------------------------------------------------------
# Name: get_path_dirs 
#       read path file keywords to check value of OVERRIDE_REQUIRED and get the
#       value of INGEST_HST_TRIGGER_DIR and INGEST_SOURCE_DIR.
#
# Input: the path name, likely from get_regr_path
#
# Returns: INGEST_TRIGGER_ROOT
#          INGEST_SOURCE_DIR
#
#----------------------------------------------------------------------------
sub get_path_dirs {   
    my $nm = "get_path_dirs";
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ($path_name) = @_;
    my $pathfilename = $path_name.".path";      ## short version
    my $pathfile = `osfile_stretch_file "OPUS_DEFINITIONS_DIR:$pathfilename"`;
    chomp( $pathfile ) ;
    $pathfile =~ s#/{2,}#/#g ;  ## remove multiple slashes
    
    if ($pathfile =~m/^OPUS_DEFINITIONS_DIR/) {
        my $errMsg = $nm . "-Invalid path name: $pathfilename" ;
        die( $errMsg ) ;
    } else {    
       # This was here originally which is why I did not add nm, but drives me crazy
       PrintMsg( "I", "Obtained path file name: $pathfile", $nm ) ;
    }
    if (!open( PATHFILE, "<$pathfile") ) {
        my $errMsg = $nm . "-Cannot open $pathfile" ;
        die( $errMsg ) ;
    }
    # check path file keywords
    my $record;
    my $override;
    my $trig_dir;
    my $source_dir;
    
    while (<PATHFILE>) {
        $record = $_;
      
        if ($record =~m/^OVERRIDE_REQUIRED/) {
            # parse line to extract value after the equal sign
            $record =~m/^OVERRIDE_REQUIRED\s*=\s*(\S+)/;
            $override = $1;
        } elsif ($record =~m/^INGEST_TRIGGER_ROOT/) {
            $record =~m/^INGEST_TRIGGER_ROOT\s*=\s*(\S+)/;
            $trig_dir = $1;
        } elsif ($record =~m/^INGEST_SOURCE_DIR/) {
            $record =~m/^INGEST_SOURCE_DIR\s*=\s*(\S+)/;
            $source_dir = $1;
        }
        if ( $override && $trig_dir && $source_dir ) {
       #if (defined($override) && defined($trig_dir) && defined($source_dir)) {
            last
        };
    }
    close PATHFILE; 
   
    if ( ! $override ) {
   #if (!defined($override)) {
        my $errMsg = $nm . "-Could not find OVERRIDE_REQUIRED in $pathfilename." ;
        die( $errMsg ) ;
    }
    if ( ! $trig_dir ) {
   #if (!defined($trig_dir)) {
        my $errMsg = $nm . "-Could not find INGEST_TRIGGER_ROOT in path file." ;
        die( $errMsg ) ;
    }
    if ( ! $source_dir ) {
   #if (!defined($source_dir)) {
        my $errMsg = $nm . "-Could not find INGEST_SOURCE_DIR in path file." ;
        die( $errMsg ) ;
    }


   # These were here originally which is why I did not add nm, but drives me crazy
   PrintMsg( "I", "Found INGEST_TRIGGER_ROOT: $trig_dir", $nm ) ;
   PrintMsg( "I", "Found INGEST_SOURCE_DIR: $source_dir", $nm ) ;

    if ($override ne "Y") {
        my $errMsg = $nm . "-OVERRIDE_REQUIRED must be Y in $pathfilename." ;
        die( $errMsg ) ;
    }
    my $msg = "Returning trig_dir, source_dir: ($trig_dir, $source_dir) "  ;
    PrintMsg( "D", $msg, $nm );
    PrintMsg( "D", "-----subEnd----- ", $nm );
    return ($trig_dir, $source_dir);
}   # end get_path_dirs

#----------------------------------------------------------------------------
# Name: set_data_source 
#       verify data source length, run override_data_source tool, and
#       verify output of tool
#
# Input:  the source directory and the data source value
#
# Return: none because does some cleans up 
# 
#----------------------------------------------------------------------------
sub set_data_source {   
    my $nm = "set_data_source";
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ($override_dir,$data_source) = @_;
   #PrintMsg( "D", "in: override_dir is $override_dir", $nm );
   #PrintMsg( "D", "in: data_source  is $data_source ", $nm );
    my $override_name = "source_override";
    
    if (length( $data_source) != 4) {
      #print $usage;
       my $errMsg = $nm . "-Data source value must be 4 characters." ;
       die( $errMsg ) ;
    }
    # delete any old files
    my $unlink_count = unlink glob( $override_dir.$override_name.".*");
    if ($unlink_count) {
       PrintMsg( "I", "Deleted old file from $override_dir.", $nm ) ;
    }  else {
       PrintMsg( "I", "No old files found in $override_dir.", $nm ) ;
    }
    # create source override file
    my $override = $override_dir.$override_name.".".lc($data_source);
        
    if (!open (OVERRIDE, ">$override")) {
       my $errMsg = $nm . "-Error - cannot create new file - protection issue?" ;
       die( $errMsg ) ;
    }
    close OVERRIDE;
    PrintMsg( "I", "Created $override", $nm ) ;
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end set_data_source

#----------------------------------------------------------------------------
# Name: get_req_resource {   
#
#     append the file_mask, "*req.resource", to the definitions_dir 
#     and grep all the resource files using the given data_id to find the
#     one resource file for this data_id.
#
#    Made this look exactly like the sub to find resource for HKP, but 
#         egrep is different
#         data_id is uc here 
#         NOT OK if there is no resource for data_id
#
# Input: the definitions_dirs and the data_id
#
# Return: appropriate resource file name for running GENREQ
#
# History:
# mm/dd/yy PR       Who         Reason
# -------- -------- ----------  ------------------------------------------------
# 08/26/10 64600    Sherbert    do not return .resource, 'tis unnecessary
# 
#----------------------------------------------------------------------------
sub get_req_resource {   
    my $nm = "get_req_resource";
    PrintMsg( "D", "-----subBeg----- ", $nm );
   
    my ($def_dir_ref, $data_id) = @_;
    # dereference 
    my @def_dirs = @$def_dir_ref ;
    my $msg = 'In: data_id =  ' . $data_id . '; @def_dirs = ';
   #$msg = $msg . "(" . join(", ", @def_dirs) . ")";
   #PrintMsg( "D", $msg, $nm) ;
    my $best_file ;

    my $uc_data_id = uc($data_id);

    my $num_files = 0 ;
    foreach my $def_dir ( @def_dirs ) {
        # add trailing slash if missing
        if ($def_dir !~m#/$#) {
            $def_dir .= "/";
        }
        my $req_mask = $def_dir."*req.resource";
       #PrintMsg( "I", "The grep file mask is $req_mask.", $nm ) ;
        my $command = "egrep -l $uc_data_id\.DATASET_DIR $req_mask 2> /dev/null " ;
        PrintMsg( "D", "command is $command", $nm ) ;
        my @grep_out = `$command`;
        $num_files = scalar @grep_out;
        ## The following message can be confusing because it repeats for 
        ## each iteration through the loop.  Must've been a JB ?  Lacked $nm.
        ## Maybe better inside   if ( $num_files > 0 ) ?
       #PrintMsg( "D", "Number of *req.resource files containing $uc_data_id is $num_files ", $nm ) ;

        if ( $num_files == 1 ) {
            # found 1 match, save it
            # because we fed in prioritized directories, this is the best file
            chomp( $grep_out[0] );
            $best_file = $grep_out[0] ;
            last ;
        }
        elsif ( $num_files == 0 ) {
            # This is not the resource file directory you want.
            # Try again.
            next ;
        } 
        elsif ( $num_grep_lines > 1 ) {
            my $errMsg = $nm . "-Too many files matched in $command " ;
            die( $errMsg ) ;
        } 
    }
    if ( $num_files == 0 ) {
        ## Hmm, no files found for data_id.  This is BAD.
        my $errMsg = $nm . "-NO *req.resource files contain $uc_data_id " ;
        die( $errMsg ) ;
    }
    ## So, my only question now is, did we want full path name?  
    ## I am thinking not, so just return file name, no dirs
    use File::Spec ;
    my ( undef, undef, $resource ) = File::Spec->splitpath( $best_file ) ;
    # We do not need the '.resource' either
    ( $resource, undef ) = split /\./, $resource ;
    $msg = "Returning resource: $resource "  ;
    PrintMsg( "D", $msg, $nm );
    PrintMsg( "D", "-----subEnd----- ", $nm );
    return $resource ;
}   # end get_req_resource  

#----------------------------------------------------------------------------
# Name: request_datasets 
#     Run genreq in interactive mode and then create trigger file in 
#     the Ingest pipeline 
#
#     NOTE: this subroutine FORCES MSG and STDB reporting to be QUIET 
#           while genreq runs because I figured if MSG and STDB 
#           reporting are up for this script, we are debugging the 
#           script, NOT GENREQ.  We can change this if we want.
#
# Input: dataset_list_reference, data_id, resource, and trigger_dir
#
# Return: none because it runs GENREQ
#
# 07/23/10 59963  Sherbert   cd into data dir here?
#----------------------------------------------------------------------------
sub request_datasets {   
    my $nm = "request_datasets" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ($ds_ref, $data_id, $resource, $trig_dir) = @_;
   #PrintMsg( "D", "in:  ds_ref is $ds_ref", $nm ) ;
   #PrintMsg( "D", "in: data_id is $data_id", $nm ) ;
   #PrintMsg( "D", "in:resource is $resource", $nm ) ;
   #PrintMsg( "D", "in:trig_dir is $trig_dir", $nm ) ;
    my $msg = "in:de-reffed ds_ref is: "  ;
   #$msg = $msg . "(" . join(", ", @$ds_ref) . ")";
   #PrintMsg( "D", $msg , $nm ) ;
    my @genreq_out;
    my $command;
    my $successful;
    my $dataset;
    my $line;

    ## DEBUG only uncomment if necessary to verify in future 
   #my ($tmpFile, $fh) ;
   #my $user = $ENV{"USER"} ;
   #$tmpFile = "/tmp/$user/genreq.out" ;
   #open (TMP, ">>$tmpFile") or die "Cannot open $tmpFile for writing\n";

    $msg = "Processing with data_id $data_id with resource $resource.";
    PrintMsg( "I", $msg, $nm ) ;
    PrintMsg( "I", "Using trigger directory $trig_dir.", $nm ) ;
    # I think genreq needs us to be in dir containing the data :(
   #use Cwd;
   #my $cwd = getcwd() ;
   #$msg = "getcwd sez we are sitting in: $cwd " ;
   #PrintMsg( "I", $msg, $nm ) ;
   #my @lsOut = `ls` ;
   #PrintMsg( "I", "ls sez these files are here: ", $nm ) ;
   #print @lsOut, "\n" ;
    foreach $dataset (@$ds_ref) {
        PrintMsg( "I", "Processing dataset $dataset.", $nm ) ;
        # use Bourne shell redirection to mix stderr with stdout
        $command = "genreq -r $resource -t $data_id $dataset 2>&1";
        PrintMsg( "D", $command, $nm ) ;
   ## For testing, decrease amount of messaging from genreq, geesh.
        my $msgLevel = msgVerbosity() ; 
        my $sdbLevel = dbVerbosity() ; 
        $ENV{"MSG_REPORT_LEVEL"}  = "MSG_INFO" ; 
        $ENV{"STDB_REPORT_LEVEL"} = "" ; 
        @genreq_out = `$command`;
        $ENV{"MSG_REPORT_LEVEL"}  = $msgLevel  ; 
        $ENV{"STDB_REPORT_LEVEL"} = $sdbLevel  ; 


        $successful = 0;
        foreach  $line (@genreq_out) {
   #        print TMP $line ;   ## TRACE what is going wrong
            ## print line to STDOUT IF line does NOT contain Opus_env,
            ## and it is not a line that ends at INFO, and it is not 
            ## a line containing just the thread count, e.g. (1).
            print $line if ( $line !~ /Opus_env::/        &&
                             $line !~ /-I-INFO(\s*)$/     &&
                             $line !~ /^(\s*)\([0-9]*\)$/  ) ;
            # check for success statement
            if (rindex( $line, "collected successfully") > -1) {
                $successful = 1;
                my $msg = "Dataset $dataset successfully processed by genreq.";
                PrintMsg( "I", $msg, $nm ); 
            } 
        }
        if (!$successful) {
            print @genreq_out;
            my $errMsg = $nm . "-genreq failed for dataset $dataset." ;
            die( $errMsg ) ;
        }
        # create an empty trigger file
        if (!open(TRIGGER, ">$trig_dir$dataset.$data_id")) {
            my $errMsg = $nm . "-Cannot open $trig_dir$dataset.$data_id" ;
            die( $errMsg ) ;
        }
        my $msg =  "Created $trig_dir$dataset.$data_id.";
        PrintMsg( "I", $msg, $nm ) ;
        close TRIGGER;
    }
   #close( TMP ) ;
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end request_datasets

#----------------------------------------------------------------------------
# Name: check_pipeline {   
#     Use osf_test to report on any OSF datasets in path. Exit with error
#     message if any OSF datasets were found.
#
# Input: the path name for the regression Ingest pipeline
#
# Return: none
#
#----------------------------------------------------------------------------
sub check_pipeline {   
    my $nm = "check_pipeline";
    PrintMsg( "D", "-----subBeg----- ", $nm );
    my $msg ;

    my ($pathname) = @_;
    PrintMsg( "D", "input is $pathname", $nm );
    my $pipeline_busy;
    my $command = "osf_test -p $pathname -pr dataset";
    PrintMsg( "I", "Pipeline test: $command", $nm ) ;
    my $osf_datasets = `$command`;
    if ($?) {
        my $errMsg = $nm . "-Cannot execute command: $command" ; 
        die( $errMsg ) ;
    }
    $pipeline_busy = ($osf_datasets ne "");

    if ( $pipeline_busy ) {
        $msg = "The osf_test tool for path $pathname has found pipeline OSFs.";
        PrintMsg( "E", $msg, $nm ) ;
        $msg = "The following OSF datasets were found in pipeline $pathname";
        PrintMsg( "E", $msg, $nm ) ;
        $msg = $osf_datasets;
        PrintMsg( "E", $msg, $nm ) ;
        $msg = "This tool must run only after all OSFs are gone.";
        PrintMsg( "E", $msg, $nm ) ;
        my $errMsg = $nm . "-Pipeline busy." ;
        die( $errMsg ) ;
    }
    PrintMsg( "I", "$pathname is idle.", $nm ) ; 
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end check_pipeline

#----------------------------------------------------------------------------
# Name: rename_extension (Only called in ingest_fuse (no longer needed)).
#----------------------------------------------------------------------------
sub rename_extension {   
    # two arguments - the old extension (without the dot) and the new extension
    # 
    # The files must be in the default directory
    my $nm = "rename_extension";
    PrintMsg( "D", "-----subBeg----- ", $nm );
   
    my ($old_ext,$new_ext) = @_;
    my @file_list = glob("*.$old_ext");
    my $file_count = scalar @file_list;

    if ($file_count) {
        foreach my $file (@file_list) {
            my $old_file = $file;
            $file =~s/\.$old_ext/\.$new_ext/;
            if (!rename ($old_file,$file)) {
                my $errMsg = $nm . "-Failed to rename $old_file to $file" ;
                die( $errMsg ) ;
            }
        }
        my $msg =  "$file_count file extensions renamed from " ;
        $msg = $msg . "$old_ext to $new_ext.";
        PrintMsg( "I", $msg, $nm ) ;
    }
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end rename_extension

#----------------------------------------------------------------------------
# Name: clean_request_members 
# Input:  db -- connection to OPUS db for call to delete_request_members_group
#         db_ref -- array reference of ???group names?
#         data_id
# Return: none -- wrapper to delete_request_members_group
#
# History:
# mm/dd/yy PR_num Name      Description
# -------- ------ ---------- ---------------------------------------------------
# 02/23/10 59963  Sherbert   Add db connection as input parameter
#----------------------------------------------------------------------------
sub clean_request_members {   
    my $nm = "clean_request_members" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );
    my $msg;    ## def here else pushes only use off to right too much

    # for every dataset in list,  delete request_members for that group 
    my ( $db, $db_ref, $data_id ) = @_;
    PrintMsg( "D", "in:  dbConn is $db ", $nm ) ;
    PrintMsg( "D", "in:  db_ref is $db_ref ", $nm ) ;
    PrintMsg( "D", "in: data_id is $data_id ", $nm ) ;
   
    foreach my $grp_dataset (@$db_ref) {
        $msg = "delete_request_members_group( $db, $grp_dataset, $data_id )" ;
        PrintMsg( "D", $msg, $nm ) ;
        delete_request_members_group( $db, $grp_dataset, $data_id ) ;
    }
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end clean_request_members

#----------------------------------------------------------------------------
# Name: delete_request_members_group 
# Input:  db -- connection to OPUS db
#         group_name
#         data_id
# Return: none -- empties request_members table in OPUS db
#
# History:
# mm/dd/yy PR_num Name      Description
# -------- ------ ---------- ---------------------------------------------------
# 02/23/10 59963  Sherbert   Add db connection as input parameter
#----------------------------------------------------------------------------
sub delete_request_members_group {   
    my $nm = "delete_request_members_group" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );
    my $msg ;

    # delete request_members record by group and data_id 
    my ( $db, $group_name, $data_id ) = @_;
    my $query = <<EOQ;       
DELETE request_members 
WHERE rqm_group_name = '$group_name' and rqm_data_id = '$data_id'
EOQ
    PrintMsg( "D", "query:\n$query", $nm ); 
    my $count = DoDBI($db, $query);
    if ($count) {
        $msg = "Deleted $count records from request_members for $group_name.";
        PrintMsg( "I", $msg, $nm ) ;
    }   
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end delete_request_members_group

#----------------------------------------------------------------------------
# Name: get_root_dir {   
#     read path file to get the value of INGEST_PATH_ROOT.
#
# Input: the path name
#
# Return: INGEST_PATH_ROOT 
#
#----------------------------------------------------------------------------
sub get_root_dir {   
    my $nm = "get_root_dir" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ($path_name) = @_;
    my $pathfilename = $path_name.".path";
    my $pathfile = `osfile_stretch_file "OPUS_DEFINITIONS_DIR:$pathfilename"`;
   
    if ($pathfile =~m/^OPUS_DEFINITIONS_DIR/) {
        my $errMsg = $nm . "-Invalid path name: $pathfilename" ;
        die( $errMsg ) ;
    }
    if (!open( PATHFILE, "<$pathfile") ) {
        my $errMsg = $nm . "-Cannot open $pathfile" ;
        die( $errMsg ) ;
    }
    # check path file keywords
    my $record;
    my $root_dir;
   
    while (<PATHFILE>) {
        $record = $_;
      
        if ($record =~m/^INGEST_PATH_ROOT/) {
            # parse line to extract value after the equal sign
            $record =~m/^INGEST_PATH_ROOT\s*=\s*(\S+)/;
            $root_dir = $1;
            last;
        } 
    }
    close PATHFILE; 
   
    if ( ! $root_dir ) {
   #if (!defined($root_dir)) {
        my $errMsg = $nm . "-Could not find INGEST_PATH_ROOT in $pathfilename." ;
        die( $errMsg ) ;
    }
    PrintMsg( "I", "Found INGEST_PATH_ROOT: $root_dir", $nm ) ;     
    return $root_dir;
}   # end get_root_dir

#----------------------------------------------------------------------------
# Name: check_datasets 
#     Verify file with required extension exists for each dataset
#
# Input: dataset_list_reference, directory and required_ext
#
# Return: none
#
#----------------------------------------------------------------------------
sub check_datasets {   
    my $nm = "check_datasets" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ($ds_ref, $datadir, $req_ext) = @_;
    my $dataset;
    my $required_file;
   
    my $msg = "Checking that every dataset has a file with extension $req_ext.";
    PrintMsg( "I", $msg, $nm ) ;
    foreach $dataset (@$ds_ref) {
        $required_file = $datadir.$dataset.$req_ext;
        if (!(-e $required_file)) {
            my $errMsg = $nm . "-Missing required file $required_file." ;
            die( $errMsg ) ;
        }
    }
}   # end check_datasets

#----------------------------------------------------------------------------
# Name: copy_datasets 
#     For each dataset, copy dataset files from input to output and
#     create trigger file.
#
# Input: dataset_list_reference, data_id, input_dir, output_dir,
#                 and trigger_dir
#
# Return: none because it does a copy 
#
#----------------------------------------------------------------------------
sub copy_datasets {   
    my $nm = "copy_datasets" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ($ds_ref, $data_id, $in_dir, $out_dir, $trig_dir) = @_;
    # add trailing slash if missing
    if ($in_dir !~m#/$#) {
        $in_dir .= "/";
    }
    my $command;
    my $dataset;

    my $msg = "Copying with data_id $data_id from $in_dir to $out_dir.";
    PrintMsg( "I", $msg, $nm ) ;
    PrintMsg( "I", "Using trigger directory $trig_dir.", $nm ) ;
    foreach $dataset (@$ds_ref) {
        PrintMsg( "I", "Copying dataset $dataset.", $nm ) ;
      
        $command = "cp $in_dir$dataset.* $out_dir";
        if (system( $command)) {
            my $errMsg = $nm . "-failed command: $command." ;
            die( $errMsg ) ;
        }
        # created an empty trigger file
        if (!open(TRIGGER, ">$trig_dir$dataset.$data_id")) {
            my $errMsg = $nm . "-Cannot open $trig_dir$dataset.$data_id" ;
            die( $errMsg ) ;
        }
        PrintMsg( "I",  "Created $trig_dir$dataset.$data_id.", $nm ) ;
        close TRIGGER;
    }
}   # end copy_datasets

#----------------------------------------------------------------------------
# Name: get_cal_oms_datasets 
#      get list of files in given directory and return list of dataset names
#      using in order "*_*.fits" and "*.???". If the second pattern is found
#      it must be for OMS data. If the first patttern is for OMS the
#      files must be rename to use the second pattern.
#
# Input: src_dir       -- location of data
#        archive_class -- type of data
# 
#
# Note: all data in src_dir must be for same archive_class
#
# Return: array of dataset names
#
# Question: need that array contain $src_dir pre-pended?
# 
# History:
# 01/30/06 53339 MSwam   corrected typo in regex
# 05/08/08 59760 MSwam   need to handle COS cal files with possible
#                          embedded underscores in file suffix
# 08/18/10 59963 Sherbert Be more flexible with file types
#
#----------------------------------------------------------------------------
sub get_cal_oms_datasets {   
    my $nm = "get_cal_oms_datasets" ;
    PrintMsg( "D", "subBeg... ", $nm );

    my ($src_dir, $archive_class) = @_;
    PrintMsg( "D", "in:       src_dir is $src_dir ", $nm ) ;
    PrintMsg( "D", "in: archive_class is $archive_class ", $nm ) ;
    my $file_tag_type;
    my @file_list;
    my @file_list1;
    my @file_list2;
    my $msg ;
    my $rename = 0;

    ## cd to src_dir to make globs easier
    if ( ! chdir( $src_dir ) ) {
        my $errMsg = $nm . "-Cannot cd to $src_dir" ;
        die( $errMsg ) ;
    }
#   use Cwd;
#   my $cwd = getcwd() ;
#   my @hereFiles = glob( "*.*" ) ;
#   $msg = "## Files in this dir ( $cwd ) are: \n" ;
#   $msg = $msg . "(   " . join("\n>>> ", @hereFiles ) . " )";
#   print $msg, "\n" ;
    if ($archive_class eq "CAL") {
        PrintMsg( "D", "archive_class is $archive_class", $nm ) ;
        # verify there are no jitter files in directory
        @file_list = glob( "*.j[iw][ft]");

        if (scalar @file_list > 0) {
            $msg =  "Found ".(scalar @file_list) ;
            $msg = $msg . " OMS files in $src_dir." ;
            PrintMsg( "E", $msg, $nm ) ;
            my $errMsg = $nm . "-Cannot process OMS file in CAL class request." ;
            die( $errMsg ) ;
        }
        @file_list2 = glob( "*_*.fits");
#       $msg = "file_list2 contains \n" ; 
#       $msg = $msg . "( " . join("\n, ", @file_list2) . " )";
#       PrintMsg( "D", $msg, $nm );
        ## Copy so I do not have to change file_list everywhere
        @file_list = @file_list2 ; 
#       $msg = "file_list  now contains \n" ; 
#       $msg = $msg . "( " . join("\n, ", @file_list) . " )";
#       PrintMsg( "D", $msg, $nm );
      
            if ((scalar @file_list) == 0) {
                my $errMsg = $nm . "-No CAL files found in $src_dir directory." ;
                die( $errMsg ) ;
            }

    } else { 
    # $archive_class eq "OMS"
        PrintMsg( "D", "archive_class is $archive_class", $nm ) ;
#       print "## OMS archive_class\n" ;
        $file_tag_type = 3;  # OMS dataset.ext
        @file_list1 = glob( "*.j[iw][ft]" );
#       print '## @file_list1 = glob( "*.j[iw][ft]" )'."\n";
        $msg = "## 3)file_tag_type ($file_tag_type) file_list1 contains \n" ; 
        $msg = $msg . "( " . join("\n, ", @file_list1) . " )";
#       print $msg, "\n" ;
        push( @file_list, @file_list1 ) ;
   
        # Why not allow multiple file masks in same dir?
        # try different file mask
        $file_tag_type = 2;  # converted OMS dataset_suffix.fits
        @file_list2 = glob( "*_j[iw][ft].fits" );
#       print '## @file_list2 = glob( "*_j[iw][ft].fits" )'."\n";
        $msg = "## 4)file_tag_type ($file_tag_type) file_list2 contains \n" ;
        $msg = $msg . "( " . join("\n, ", @file_list2) . " )";
#       print $msg, "\n" ;
        push( @file_list, @file_list2 ) ;
          
        if ((scalar @file_list) == 0) {
            my $errMsg = $nm . "-No OMS files found in $src_dir directory." ;
            die( $errMsg ) ;
        } 
        # No longer certain what the point is of having this here, 
        # when it could depend on individual file names.
        # CATALOG wants rootname.ext :(
       #else {
       #    $rename = 1;
       #    print "## rename = $rename\n" ;
       #}
    }
#   print "## Done first big loop\n"; 
    my @datasets=();
    my $dataset_name;
    my $extension;
    my $suffix;
    my $match;
   
    $msg = "file_list  still contains \n" ; 
    $msg = $msg . "( " . join("\n, ", @file_list) . " )";
#   print $msg, "\n" ;
    foreach my $file (@file_list) {
#       print "## my file         is $file         \n" ;
        # COS science products can have suffixes that are
        #   - greater than 3chars
        #   - include embedded underscores
        # Extract the rootname that is the minimal number of non_white
        # characters before the 1st underscore and suffix which is 
        # between 1st underscore and the period before the extension
        my $pIdx =  index( $file, "." ) ;
#       print "## my pIdx (.) for $file is $pIdx \n" ;
        my $uIdx =  index( $file, "_" ) ;
#       print "## my uIdx (_) for $file is $uIdx \n" ;
        if ( $uIdx > -1 ) { 
#           print "## my uIdx (_) for $file is > -1.  Found, right? \n" ;
            ## Have an OMS file like rootname_ext.fits 
            ## CATALOG requires rootname.ext :(
            if ( $archive_class eq 'OMS' ) { $rename = 1 };
#           print "## rename = $rename\n" ;
            $dataset_name = substr( $file, 0, $uIdx ) ;
#           print "## my dataset_name is $dataset_name \n" ;
            ## Be sure to skip the '_' and the "."
            ## WHY do I even care about "suffix"?
            $suffix       = substr( $file, $uIdx + 1, $pIdx-$uIdx-1 ) ;
#           print "## my suffix       is $suffix       \n" ;
        } else { 
#           print "## my uIdx (_) for $file is <= -1.  '_' not found, right? \n" ;
            ## Have an OMS file like rootname.ext 
            ## CATALOG requires rootname_ext.fits :(
            ## This means we have OMS that does NOT need to be renamed
            if ( $archive_class eq 'OMS' ) { $rename = 0 };
#           print "## rename = $rename\n" ;
            ## But it also means we need to figure out suffix differently
            $dataset_name = substr( $file, 0, $pIdx ) ;
#           print "## my dataset_name is $dataset_name \n" ;
            ## WHY do I even care about "suffix"?  For OMS renames?
            ## If so, then maybe I do not need this?
            $suffix       = substr( $file, $pIdx+1 ) ;
#           print "## my suffix       is $suffix       \n" ;
        }
         
#       print "## Rename if rename ($rename) is true AND archive_class is OMS.\n" ;
        if ( $rename && $archive_class eq 'OMS' ) {
            # rename OMS to dataset.ext for use by "genreq -r fgsreq". 
            my $newname = $dataset_name.".".$suffix;
            if (!rename( $file, $newname)) {
                my $errMsg = $nm . "-Failed to rename $file to $newname." ;
                die( $errMsg ) ;
            }
            $msg = "Renamed OMS to $newname " ;
            PrintMsg( "D", $msg, $nm ) ;
        }
        if (scalar @datasets) {
            # is last datasets entry not the same value 
            if ($datasets[-1] ne $dataset_name) {
                PrintMsg( "I", "Adding $dataset_name to dataset list.", $nm ) ;
                push @datasets,$dataset_name;
            }
        } else {
            PrintMsg( "I", "Adding $dataset_name to dataset list.", $nm ) ;
            push @datasets,$dataset_name; # first name
        }
    }
    $msg = "Returning datasets: "  ;
    $msg = $msg . "(" . join(", ", @datasets) . ")";
    PrintMsg( "D", $msg, $nm );
    PrintMsg( "D", "subEnd... ", $nm );
    return @datasets;    
}   # end get_cal_oms_datasets

#----------------------------------------------------------------------------
# Name: get_asn_datasets 
#     For each dataset, check if ninth character matches [0..9,a..i],
#     and if so, then check if the related asn name matches the last asn in the
#     list. If no match is found, check that the new asn in in the input
#     directory. If asn not present and this is OMS data, create a new
#     asn file. If asn_not_present and this is CAL data, then error exit.
#     Return the list of association datasets (all ending in "0").
#
# Input: input directory, dataset_list_reference, and archive_class
#
# Return: list of ASN rootnames
#
# 07/23/10 62692 Sherbert   catch ASN error
#----------------------------------------------------------------------------
sub get_asn_datasets {   
    my $nm = "get_asn_datasets" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ($in_dir, $ds_ref, $archive_class) = @_;
    my @asn_list = ();
    my $last_asn = " ";

    foreach my $dataset (@$ds_ref) {
       if ($dataset =~/[0123456789abcdefghi]$/) {
          # this is an asn dataset - get the asn id
          my $asn_id = substr( $dataset,0,8)."0";
          
          if ($asn_id ne $last_asn) {
             push @asn_list,$asn_id;
             $last_asn = $asn_id;
             # check if asn fits file is present
             my $asn_file_spec = $in_dir."/".$asn_id."_asn.fits";
             $asn_file_spec =~ s#/{2,}#/#g ;  ## remove multiple slashes
             
             if (!(-e $asn_file_spec)) {
                 if ($archive_class eq "CAL") {
                    my $errMsg = $nm . "-Missing $asn_file_spec" ; 
                    die( $errMsg ) ;
                 } 
                 # try to create OMS asn file using asn_table program
                 PrintMsg( "I", "Regenerating $asn_file_spec", $nm ) ;
                 # use Bourne shell redirection to mix stderr with stdout
                 my $cmd = "asn_table -r bldasn ".$asn_id.".fas 2>&1";
                 my @asn_tab_out = `$cmd`;
                 my $successful = 0;
                 
                 # check command output for completion
                 foreach  my $line (@asn_tab_out) {
                     # PR 62692 check for ERROR statements
                     if (rindex( $line, "ERROR") > -1) {
                        $successful = 0;
                        PrintMsg( "E", "ASN file ERROR detected.", $nm ) ;
                        last ;
                     } 
                     # check for success statement
                     if (rindex( $line, "Table Completed") > -1) {
                        $successful = 1;
                        PrintMsg( "I", "ASN file created.", $nm ) ;
                     } 
                 }
                 if (!$successful) {
                    my $errMsg = $nm . "-asn_table failed to regenerate ASN file." ;
                    die( $errMsg ) ;
                 }
             }
          }
       }
    }
    PrintMsg( "D", "-----subEnd----- ", $nm );
    return @asn_list;
}   # end get_asn_datasets

#----------------------------------------------------------------------------
# Name: get_request_datasets 
#     Make copy of dataset_list. Copy all association datasets to request_list.
#     For each association dataset, get listasn data from ASN file and 
#     for all datasets marked present remove them from the dataset_list.
#     Copy remaining datasets to the request list. Return the request list. 
#
# Input: input directory, dataset_list_reference, association_list_reference, 
#                 archive_class
#
# Return:  list of rootnames
#
#----------------------------------------------------------------------------
sub get_request_datasets {   
    my $nm = "get_request_datasets" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ($in_dir, $ds_ref, $asn_ref, $archive_class) = @_;
    my @request_list = ();
    my %ds_hash;
    my $dataset;

    foreach $dataset (@$ds_ref) {
        $ds_hash{$dataset} = "T";  # default hash value is T (true)
    }
    foreach my $asn_id (@$asn_ref) {
        push @request_list, $asn_id;

        if ( $ds_hash{$asn_id} ) {
       #if (defined ($ds_hash{$asn_id})) {
           $ds_hash{$asn_id} = "F";  # don't add to request list
        }
        # get all members of association from ASN file
        my $asn_filespec = $in_dir."/".$asn_id."_asn.fits";
        $asn_filespec =~ s#/{2,}#/#g ;  ## remove multiple slashes
        my @table = `listasn $asn_filespec -s`;

        if ($?) {
            my $errMsg = $nm . "-Error status returned from listasn for $asn_filespec." ;
            die( $errMsg ) ;
        }
        $lines = scalar @table;
        if ($lines == 0) {
            my $errMsg = $nm . "-No output lines from listasn for $asn_filespec." ;
            die( $errMsg ) ;
        }
        # check each line in the table for present datasets
        for (@table) {
            chomp;   # remove newline
            $line = $_;
            # extract $asn_dataset from first 9 characters, extract $member_type
            # from middle field separated by a single spaces, and
            # extract $present from either the value T or F at end of line.
            my ($asn_dataset,$member_type,$present) = 
                                 ($line=~/^(\S{9})\s(\S+)\s([TF])$/);

            if ( $present ) {
           #if (defined($present)) {
                my $mem_dataset;
                if ($archive_class eq "OMS" && ($member_type !~ m/^PROD/)) {
                    # last dataset character of OMS exposures is always "j".
                    $mem_dataset = substr(lc($asn_dataset),0,8)."j";
                } else {
                    $mem_dataset = lc($asn_dataset);
                }
                if ($mem_dataset ne $asn_id) {
                    if ( $ds_hash{$mem_dataset} ) {
                   #if (defined( $ds_hash{$mem_dataset})) {
                        $ds_hash{$mem_dataset} = "F";  # don't add to request list
                    }
                }
            }
        }
    }
    foreach $dataset (@$ds_ref) {
        if ($ds_hash{$dataset} eq "T") {
            push @request_list, $dataset; # add non-asn dataset to request 
        }
    }
    my $msg = "Returning request_list: "  ;
    $msg = $msg . "(" . join(", ", @request_list) . ")";
    PrintMsg( "D", $msg, $nm );
    PrintMsg( "D", "-----subEnd----- ", $nm );
    return @request_list;  # contains either asn_ids or non-asn datasets
}   # end get_request_datasets

#----------------------------------------------------------------------------
# Name:  request_sublists 
#
# purpose: Wrapper around call to ingrsp_datasets 
#
# Input:   request_list_reference, archive_class
#
# Returns: list of sublists
#          
# modification history:
#
#   date    opr     who     reason
# -------- -----  --------  --------------------------------------
# 06/17/10 62962  Sherbert  add consistent comment block
#
#----------------------------------------------------------------------------
sub request_sublists {
    # two arguments: request_list_reference, archive_class
    #
    # For OMS, collect sublists of requests that are associated  and 
    # singletons.
    #
    # For CAL, divide sublists by instrument code (first dataset char) as well.
    #
    # Return list of sublists
    my $nm = "request_sublists" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ($req_dsets_ref,$archive_class) = @_;
    my @sublist_list;
    my $list_index = 0;
    my $last_is_asn;
    my $new_is_asn;
    my $last_si;
    my $new_si; 
    my $new_dset;
    my $last_dset = " ";

    while (scalar @$req_dsets_ref) {
        $new_dset = shift @$req_dsets_ref;

        if ($archive_class eq "OMS") {
            if ($last_dset ne " ") {
                $last_is_asn = substr($last_dset,8,1) eq "0";
                $new_is_asn = substr($new_dset,8,1) eq "0";
                if ($new_is_asn != $last_is_asn) {
                    # end of OMS sublist
                    my $msg = "Sublist $list_index  size is " .
                            (scalar @{$sublist_list[$list_index]});
                    PrintMsg( "D", $msg, $nm ) ;
                    $list_index += 1;
                }   
            }
        } 
        else {  # CAL data
            if ($last_dset ne " ") {
                $last_is_asn = substr($last_dset,8,1) eq "0";
                $new_is_asn = substr($new_dset,8,1) eq "0";
                $last_si = substr($last_dset,0,1);
                $new_si = substr($new_dset,0,1);
             
                if ($new_is_asn != $last_is_asn || $new_si ne $last_si) {
                    # end of CAL sublist
                    my $msg = "Sublist $list_index  size is " .
                            (scalar @{$sublist_list[$list_index]});
                    PrintMsg( "D", $msg, $nm ) ;
                    $list_index += 1;
                }   
            }
        }       
        PrintMsg( "D", "Assigning $new_dset to sublist $list_index.", $nm ) ;
        push @{$sublist_list[$list_index]}, $new_dset;
        $last_dset = $new_dset;

        if (!scalar @$req_dsets_ref) {
            # at end of requests, sublist is done
            my $msg = "Sublist $list_index  size is " .
                   (scalar @{$sublist_list[$list_index]});
            PrintMsg( "D", $msg, $nm ) ;
        }
    }
    my $msg = "Returning sublist_list: "  ;
    $msg = $msg . "(" . join(", ", @sublist_list) . ")";
    PrintMsg( "D", $msg, $nm );
    PrintMsg( "D", "-----subEnd----- ", $nm );
    return @sublist_list;
}   # end request_sublists

#----------------------------------------------------------------------------
# Name:  hst_cal_oms_data_id 
#
# purpose: To determine the data_id for data being processed
# 
# Input:   dataset and archive_class
#          
#
# Returns:  data_id
#          
#
# modification history:
#
#   date    opr     who     reason
# -------- -----  --------  --------------------------------------
# 06/17/10 62962  Sherbert  add consistent comment block
# 
#----------------------------------------------------------------------------
sub hst_cal_oms_data_id {
    # two arguments: $dataset and archive_class
    #
    # For OMS use archive class to determine data_id 
    # For CAL use archive class and first dataset char to determine data_id.
    # Return data_id.
    my $nm = "hst_cal_oms_data_id" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    # PR ???? Why cannot this be read from DB?  It is wrong.  Fixing for WF3.
    my ($dataset, $archive_class) = @_;
    my %data_id_hash = ("fa"=>"Fas", "fn"=>"Fgs",
                        "ia"=>"W3a", "in"=>"Wf3",
                        "ja"=>"Aca", "jn"=>"Acs",
                        "la"=>"Coa", "ln"=>"Cos",
                        "na"=>"Nas", "nn"=>"Nic",
                        "oa"=>"Sas", "on"=>"Sti",
                                     "un"=>"Wf2");
    my $did_code;
    my $asn_code;

    if (substr($dataset,8,1) eq "0") {
        $asn_code = "a";   # association
    } else {
        $asn_code = "n";   # not an association
    }
    if ($archive_class eq "OMS") {
        $did_code = "f".$asn_code;
        $resource = "fgsreq";
    } else {
        $did_code = substr( $dataset,0,1).$asn_code;
        $resource = "genreq";
    }
    my $data_id = $data_id_hash{$did_code};  # look it up

    if ( ! $data_id ) {
   #if (!defined( $data_id)) {
        my $errMsg = $nm . "-Invalid or unsupported dataset name: $dataset." ;
        die( $errMsg ) ;
    }
    my $msg = "Returning data_id: $data_id "  ;
    PrintMsg( "D", $msg, $nm );
    PrintMsg( "D", "-----subEnd----- ", $nm );
    return $data_id;
}   # end hst_cal_oms_data_id   


#----------------------------------------------------------------------------
# Name: ingrsp_datasets 
#
# purpose: Run InteractiveIngResponse for every datasets in list.
#
# Input:  db -- connection to OPUS db for call to delete_request_members_group
#         ds_ref -- a reference to an array of ???
#         data_id
#
# Return: none because it runs InteractiveIngResponse
#
# History:
# mm/dd/yy PR_num Name      Description
# -------- ------ ---------- ---------------------------------------------------
# 02/23/10 59963  Sherbert   Add db connection as input parameter
#
#----------------------------------------------------------------------------
sub ingrsp_datasets {   
    # two arguments: dataset_list_reference and data_id
    #
    # Run InteractiveIngResponse for every datasets in list.
    my $nm = "ingrsp_datasets" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ( $db, $ds_ref, $data_id ) = @_;
    my $success_string = "Response Processing completed";
    my $dataset;
    my $cmd;
    my @task_out;
    my $line;

    foreach $dataset (@$ds_ref) {
        PrintMsg( "I", "InteractiveIngResponse: processing dataset $dataset.", $nm );
        $cmd = "InteractiveIngResponse.csh $dataset $data_id './' 2>&1";
        PrintMsg( "I", "Using command: $cmd", $nm ) ;
        @task_out = `$cmd`;
        $num_task_lines = scalar @task_out;
      
        if ($num_task_lines == 0) {
            my $errMsg = $nm . "-failed to execute command: $cmd" ;
            die( $errMsg ) ;
        }
        foreach  $line (@task_out) {
            if (index( $line, "Opus_env::") < 0) {
                print $line ;
            }
        }
        # check for success statement
        if (index( @task_out[($num_task_lines - 1)], 
            $success_string) > -1) 
        {
            PrintMsg( "I",  "Virtual files copied for dataset $dataset.", $nm ) ;
        } else {
            my $errMsg = $nm . "-InteractiveIngResponse failed for dataset $dataset." ;
            die( $errMsg ) ;
        }
        # delete from request members tables
        if (substr( $dataset, 8, 1) eq "0") {
            # this is an association that needs a cleanup
            delete_request_members_group( $db, $dataset, $data_id ) ;
        }
    }
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end ingrsp_datasets

#----------------------------------------------------------------------------
# Name:  ingest_sublist 
#
# purpose: Wrapper around call to request_datasets 
#
# Input:  db -- connection to OPUS db for call to clean_request_members
#         sublist_ref -- reference to an array of "primary" rootnames?
#         archive_class
#         trig_dir      -- hst trigger dir
#
# Return: none -- ultimately calls genreq or equiv.
#
# History:
# mm/dd/yy PR_num Name      Description
# -------- ------ ---------- ---------------------------------------------------
# 02/23/10 59963  Sherbert   Add db connection as input parameter
#
#----------------------------------------------------------------------------
sub ingest_sublist {
    # For OMS use archive class to determine data_id 
    # For CAL use archive class and first dataset char to determine data_id.
    # Use request_dataset for sublist.
    my $nm = "ingest_sublist" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ( $db, $sublist_ref, $archive_class, $trig_dir ) = @_;
    PrintMsg( "D", "in:   sublist_ref is $sublist_ref" , $nm );
    PrintMsg( "D", "in: archive_class is $archive_class" , $nm );
    PrintMsg( "D", "in:      trig_dir is $trig_dir" , $nm );
    my $resource;
    my @sublist = @$sublist_ref;
    my $msg = 'in: de-reffed @sublist is  ' ;
    $msg = $msg . "(" . join(", ", @sublist) . ")";
    PrintMsg( "D", $msg, $nm );
    my $data_id ;
    eval { $data_id = hst_cal_oms_data_id( $sublist[0], $archive_class) } ;
    die $@ if ( $@ ) ;
    PrintMsg( "D", "data_id is $data_id", $nm );

    if ($archive_class eq "OMS") {
        PrintMsg( "D", "archive_class is $archive_class", $nm );
        $resource = "fgsreq";
    } else {
        PrintMsg( "D", "archive_class is $archive_class", $nm );
        $resource = "genreq";
    }
    PrintMsg( "D", "resource      is $resource    ", $nm );
    if (substr( $sublist[0], 8, 1) eq "0") {
        # this is an association sublist that may need a cleanup
        my $msg = "clean_request_members( $db, $sublist_ref, $data_id)" ;
        PrintMsg( "D", $msg, $nm ) ;
        clean_request_members( $db, $sublist_ref, $data_id);
    }
    $msg = "request_datasets( $sublist_ref, $data_id, $resource, $trig_dir )" ;
    PrintMsg( "D", $msg, $nm ) ;
    eval { request_datasets( $sublist_ref, $data_id, $resource, $trig_dir ) };
    die $@ if ( $@ ) ;
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end ingest_sublist

#----------------------------------------------------------------------------
# Name:  ingrsp_sublist 
#
# purpose: Wrapper around call to ingrsp_datasets 
#          Get data_id and use ingrsp_datasets.
#
# Input:  db -- connection to OPUS db for call to ingrsp_datasets
#         sublist_ref -- a reference to an array of ???
#
# Return: none because it runs INGRSP
#
# History:
# mm/dd/yy PR_num Name       Description
# -------- ------ ---------- ---------------------------------------------------
# 02/23/10 59963  Sherbert   Add db connection as input parameter
#
#----------------------------------------------------------------------------
sub ingrsp_sublist {
    my $nm = "ingrsp_sublist" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ( $db, $sublist_ref ) = @_;
    my @sublist = @$sublist_ref;
    my $data_id ;
    eval { $data_id = hst_cal_oms_data_id( $sublist[0], "CAL") } ;
    die $@ if ( $@ ) ;
    eval { ingrsp_datasets( $db, $sublist_ref, $data_id ) } ; 
    die $@ if ( $@ ) ;
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end ingrsp_sublist

#----------------------------------------------------------------------------
# Name:  copy_cos_sublist 
#
# purpose: Wrapper around call to cat_cos_copies.py
#
# Input:  inDir -- location of files, we should already be there
#         sublist_ref -- a reference to an array of ???
#
#         cat_cos_copies.py takes a LONG list of input arguments, UNLESS 
#         those arguments are already defined in the environment...
#         KW_SERVER KW_DB OSF_DATASET OSF_DATA_ID SKIP_CLASSES OUTPATH
#         These could also be read in from the resource file, but since a 
#         couple have to do with OSFs, it did not make sense to try.
#         The catalog tools must be run in an OPUS environment, therefore 
#         the db info should be defined already.  SKIP_CLASSES is hard-coded 
#         because turns out the normal data_id values for it
#         would not be sent through catalog_hst_cal_oms.pl, the only tool 
#         that needs to call this subroutine.  OSF_DATA_ID is set to N/A 
#         because its exact value turns out to also be unimportant in this 
#         usage, as long as it does not match something in SKIP_CLASSES; 
#         handy since this catalog tool works from archive_class instead 
#         of data_id.  cat_cos_copies.py will check the input rootname 
#         to be sure it is COS, so sending all CAL data through here should
#         be safe, though maybe slow.
# 
#
#
# Return: none because it runs cat_cos_copies.py
#
# History:
# mm/dd/yy PR_num Name       Description
# -------- ------ ---------- ---------------------------------------------------
# 09/07/10 59963  Sherbert   Need the copies to catalog COS correctly
#
#----------------------------------------------------------------------------
sub copy_cos_sublist {
    my $nm = "copy_cos_sublist" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ( $caldir, $sublist_ref ) = @_;
    my @sublist = @$sublist_ref;    ## de-ref

    my $KW_SERVER   = $ENV{"KW_SERVER"} ;
    my $KW_DB       = $ENV{"KW_DB"} ;
    my $data_id     = "n/a" ;       ## Only needed for SKIP_CLASSES check
    my $skip_these  = "cdm,edl" ;   ## n/a will never equal one of these

    my $script  = 'cat_cos_copies.py' ;
    my $command  = '' ; 
    my $copy_out = '' ;

    my $pay_attention = 0 ;
    foreach $rootname ( @sublist ) {
        $command = "$script $KW_SERVER $KW_DB  $rootname $data_id $skip_these $caldir 2>&1";
        @copy_out = `$command`;
        $pay_attention = $? >> 8 ; 
        # If errStat==1 it means this is NOT a COS dataset.  That is OK.
        # Or rather, if ($? >> 8)==1 we're ok: need to bit-shift though.  
        # I want output printed for success or failure. 
        print @copy_out ;   
        PrintMsg( "D", "FYI, unshifted return code was $? ", $nm ); 
        if ( $pay_attention  && $pay_attention != 1 ) {
            # Since I don't trust my bit-shifting...make sure I did it right.
            my $msg = "pay_attention is $pay_attention\n" ; 
            PrintMsg( "D", $msg, $nm ); 
            my $errMsg = $nm . "-Cannot execute command: $command" ; 
            die( $errMsg ) ;
        }
    }

    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end copy_cos_sublist


#----------------------------------------------------------------------------
# Name:  catalog_sublist 
#
# purpose: Wrapper around call to catalog_datasets 
# 
# Input:   inDir - for COS copies, if needed
#          sublist_reference 
#          archive_class
#          
#
# Returns:  none; will die if call to catalog_datasets fails
#          
#
# modification history:
#
#   date    opr     who     reason
# -------- -----  --------  --------------------------------------
# 06/17/10 62962  Sherbert  add consistent comment block
# 09/23/10 59663  Sherbert  move COS copies to here and limit to COS 
# 
#----------------------------------------------------------------------------
sub catalog_sublist {
    # two arguments: sublist_reference and archive_class
    #
    # For OMS use archive class to determine data_id 
    # For CAL use archive class and first dataset char to determine data_id.
    # Use request_dataset for sublist.
    my $nm = "catalog_sublist" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ($inDir, $sublist_ref, $archive_class) = @_;
    my %resource_hash = ("f"=>"cat_fgs",
                         "i"=>"cat_wf3",
                         "j"=>"cat_acs",
                         "l"=>"cat_cos",
                         "n"=>"cat_nic",
                         "o"=>"cat_sti",
                         "u"=>"cat_wf2");
    my @sublist = @$sublist_ref;
    my $data_id ;
    eval { $data_id = hst_cal_oms_data_id( $sublist[0], $archive_class) } ;
    die $@ if ( $@ ) ;
    $data_id = lc( $data_id ) ; # maybe too confusing for eval. better here.
    my $si_code;

    if ($archive_class eq "OMS") {
        $si_code = "f";
    } else {
        $si_code = substr( $sublist[0], 0, 1);
    } 
    my $resource = $resource_hash{$si_code};  # look it up      
    if ( $resource eq "cat_cos" ) {
          # necessary to create COS files copies 
          # The cat_cos_copies.py script will weed out non-COS data  
          # but I hate seeing it called on non-COS data
          eval { copy_cos_sublist( $inDir, $sublist_ref ) } ;
          if ( $@ ) {
              PrintMsg( "E", $@ ) ;  # no nm because die will mention this script
              exit( $EXIT_FAILURE );
          }
    }
    eval { catalog_datasets( $sublist_ref, "catalog", $resource, $data_id) } ;
    die $@ if ( $@ ) ;
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end catalog_sublist

#----------------------------------------------------------------------------
# Name:  hkp_sublist 
#
# purpose: Wrapper around call to hkp_datasets 
# 
# Input:   sublist_reference and archive_class
#          
#
# Returns:  none; will die if call to hkp_datasets fails
#          
#
# modification history:
#
#   date    opr     who     reason
# -------- -----  --------  --------------------------------------
# 06/17/10 62962  Sherbert  add consistent comment block
# 
#----------------------------------------------------------------------------
sub hkp_sublist {
    # two arguments: sublist_reference and archive_class
    #
    # Use archive class to determine resource file name.
    # Use hkp_datasets for sublist with lowercase data_id.
    # Clean up associations with mixed-case data_id.
    my $nm = "hkp_sublist" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    my ($sublist_ref, $archive_class) = @_;
    my @sublist = @$sublist_ref;
    my $lc_data_id ;
    eval { $lc_data_id = hst_cal_oms_data_id( $sublist[0], $archive_class) } ;
    die $@ if ( $@ ) ;
    $lc_data_id = lc( $lc_data_id ) ;
    my $resource;

    if ($archive_class eq "OMS") {
        $resource = "hkp_fgs";
    } else {
        $resource = "hkp_sci";
    } 
    eval { hkp_datasets( $sublist_ref, $resource, $lc_data_id) } ;
    die $@ if ( $@ ) ;
    PrintMsg( "D", "-----subEnd----- ", $nm );
}   # end hkp_sublist

#----------------------------------------------------------------------------
#
# Name: resolve
#
# purpose: Run osfile_stretch_file on an input
# 
# Input:   DIRSPEC:file to resolve w/ osfile_stretch_file
#          e.g. OPUS_DEFINITIONS_DIR:fgsreq.resource
#
# Returns: fullpath_filespec - resolved filespec 
#          e.g. /store/smalls/opus20093-091126/definitions/uniq/fgsreq.resource
#
# modification history:
#
#   date    opr     who     reason
# -------- -----  --------  --------------------------------------
# 07/15/09 ?      Sherbert  based on resolve.pl for ENV vars
# 12/07/09        Sherbert  catch errors
# 
#----------------------------------------------------------------------------
sub resolve
{
    my $nm = "resolve" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );
    my ( $inVar ) = @_; 

    # Check to make sure input contains VAR:file format
    if ( $inVar !~ /:/ ) {
        my $errMsg = $nm . "-Incorrect input format for osfile_stretch_file: " ;
        $errMsg = $errMsg . $inVar . " does NOT match DIRVAR:file format. " ;
        die( $errMsg );
    }
   #PrintMsg( "D", "inVar is $inVar ", $nm ) ;
    $command = "osfile_stretch_file $inVar" ;
##  PrintMsg( "D", "File_finding command is $command ", $nm ) ;
    $fullpath_filespec = `$command`;
    chomp ($fullpath_filespec);
    if ( $fullpath_filespec eq $inVar ) {
        ## There are times when we might be guessing at a name and 
        ## this result may be expected.  Put in debugs in case it wasn't?
##      PrintMsg( "D", "inVar   was $inVar ", $nm ) ;
##      PrintMsg( "D", "output  was $fullpath_filespec ", $nm ) ;
##      PrintMsg( "D", "command was $command ", $nm ) ;
        # osfile_strectch_file failed
        my $errMsg = $nm . "-osfile_stretch_file FAILED" ;
        $errMsg = $errMsg . "( $command ) " ;
        die( $errMsg ) ;
    }
    ## Remove multiple slashes
    $fullpath_filespec =~ s/\/{2,}/\//g ;
    PrintMsg( "D", "fullpath_filespec is $fullpath_filespec ", $nm ) ;
    PrintMsg( "D", "-----subEnd----- ", $nm );
    return $fullpath_filespec;
}   # end resolve

#----------------------------------------------------------------------------
#
# Name: expand 
#
# purpose: This subroutine takes an environment variable containing multiple 
#          directories, like OPUS_DEFINITIONS_DIR, and returns a list of 
#          resolved (full path name) directories. I.e., it determines which 
#          pieces of the value of the environment variable need to be resolved 
#          by osfile_stretch_file so that the final list is fully resolved.
# 
# Input:   name of an ENVIRONMENT varialbe of directory specs separated by
#          spaces, like OPUS_DEFINITIONS_DIR
#
# Returns: @resolveDirs - an array of resolved filespecs 
#
# modification history:
#
#   date    opr     who     reason
# -------- -----  --------  --------------------------------------
# 07/15/09 ?      Sherbert  based on resolve.pl for ENV vars
# 03/01/10 59963  Sherbert  remove multiple /'s  (decrease debugs but keep if need in future)
#
#----------------------------------------------------------------------------
sub expand
{
    my ($inVar) = @_;                           # ARGUMENTS from @_
    my $resolved     ;
    my @resolvedDirs ;
    my $nm = "expand" ;
    PrintMsg( "D", "-----subBeg----- ", $nm );

    #
    # obtain the environment variable value
    my $envVal = $ENV{"$inVar"} ;
   #PrintMsg( "D", "envVal    is $envVal ", $nm );
    my @pathvals = split / /, $envVal; 
##  $msg = "pathvals are ", 
##  $msg = $msg . "(" . join(", ", @pathvals) . ")";
##  PrintMsg( "D", $msg, $nm );

    foreach $DIR ( @pathvals ) {
#      #PrintMsg( "D", "DIR      is $DIR ", $nm ) ;
        ## pass the value through the stretch-resolver command 
        ## and trim the newline
        if ( $DIR =~ /:/ ) {
##          PrintMsg( "D", "colon found in $DIR ", $nm ) ;
            eval { $resolved = resolve( $DIR ) } ; 
            die $@ if ( $@ ) ;
        } else {
#      #    PrintMsg( "D", "colon NOT found in $DIR ", $nm ) ;
            $resolved = $DIR ;
        }       
        ## Remove multiple slashes
        $resolved =~ s/\/{2,}/\//g ;
#      #PrintMsg( "D", "resolved is $resolved ", $nm ) ;
        push @resolvedDirs, $resolved;
    }

    my $msg = "Returning resolvedDirs: "  ;
    $msg = $msg . "(" . join(", ", @resolvedDirs) . ")";
    PrintMsg( "D", $msg, $nm );
    PrintMsg( "D", "-----subEnd----- ", $nm );
    # return the resolved filespec
    return @resolvedDirs;
}   # end expand
1;


## Complete list if needed in future.
##     my %si_char_to_root_char = (
##             ACS    => "j"    ## ACS
##           , COS    => "l"    ## COS
##           , FGS    => "f"    ## FGS 
##           , FOC    => "x"    ## FOC 
##           , FOS    => "y"    ## FOS 
##           , GHRS   => "z"    ## GHRS
##           , HRS    => "z"    ## GHRS
##           , HSP    => "v"    ## HSP
##           , NICMOS => "n"    ## NICMOS
##           , NIC    => "n"    ## NICMOS
##           , STI    => "o"    ## STIS
##           , STIS   => "o"    ## STIS
##           , WFPC   => "w"    ## WFPC1
##           , WFPC2  => "u"    ## WFPC2
##           , WF2    => "u"    ## WFPC2
##           , WFC3   => "i"    ## WFC3
##           , WF3    => "i"    ## WFC3   
##           ) ;