#!/usr/bin/env perl
#
################################################################################
#
# routine: gen_regex_fmfile.pl
#
# purpose:
#          Want to create a subroutine callable from perl programs
#          Subroutine will return a regular expression that can be used
#          to search a directory of filenames for a matching pattern
#          indicating if you have found the type of file you seek.
#
# Input:   a) Input file, formatted like opus.env and found in 
#             OPUS_DEFINITIONS_DIR (in case you want to test an unofficial 
#             override or a file similarly formatted )
#          b) type of file you seek (e.g. PSTAT or OSF)
#
# Returns: a string containing a regular expression
#
# Example call: my $regex = GenRegEx("opus.env", "PSTAT");
#
# modification history:
#
#   date   opr    who       reason
# -------- ------ --------- --------------------------------------
# 10/04/04 49405  Sherbert  Third version yet first version
#
#########1#########2#########3#########4#########5#########6#########7#########8
sub GenRegEx
{

 my ($opus_env_file, $type) = @_;

#
# include these modules
#
 require 'printmsg.pl';       # prints a formatted status message

## Input file referred to as opus_env_file, but it could be any file similarly 
## formatted and found in OPUS_DEFINITIONS_DIR
 my $infile = "OPUS_DEFINITIONS_DIR:" . $opus_env_file ;
 my $command = "osfile_stretch_file $infile ";
 $opus_env_file = `$command`;
 chomp($opus_env_file);

#print "###Command returns $opus_env_file for opus.env\n";   ## trace

 if (!open(OENV, "$opus_env_file")) {
    PrintMsg("E","ERROR: failed to open OPUS ENV file: $opus_env_file");
    exit $quit_this_dataset;
 }

 my %fld_hash;  ## Hash to keep track of field sizes
 my (@delims, $num_delims, @locations, @sorted_locs, @template_fields );

 ## Define the strings to search for.  $type is likely either PSTAT or OSF
 ## $type_tmplt ends in the regular expression "\b" to distinguish it
 ## from $type_tmplt_delims: former is substring of latter  
 my $type_tmplt_delims = $type . ".TEMPLATE_DELIMS" ;
 my $type_tmplt        = $type . ".TEMPLATE\\b"      ;

 ########1#########2#########3#########4#########5#########6#########7#########8
 ## Read input file
 while (<OENV>) {
    ## Search file for Template Delimiters of correct Type
    if ( /$type_tmplt_delims/ ) { 
        my ($key, $val) = split_key_val ($_);
        @delims = split //, $val;
        ## How many delimiters are there?
        $num_delims = scalar (@delims);
    }   ## end reading (if) template_delims

    ## Search file for Template of correct Type
    if ( /$type_tmplt/ ) { 
        my ($key, $template) = split_key_val ($_);

        ## Look for the delimiters in the TEMPLATE string
        ## Record where they are found in locations array
        for ( my $i = 0; $i < $num_delims; $i++ ) {
            my $pos = -1;
            while (($pos = index($template, $delims[$i], $pos)) > -1) {
                push @locations, $pos;
                $pos++;
            }
        }

        ## Sort locations array so we can read what is between the delimiters
        @sorted_locs = (sort { $a <=> $b } @locations) ;

        ## Loop through sorted delimiters and collect the information in between
        ## Store that information in the array template_fields
        ## That info can be used to find sizes and construct the reg. expression
        for ( my $i=0; $i < (scalar(@sorted_locs) - 1 ); $i++ ) {
            ## Want to avoid uninitialed value (one element past the end)
            ## therefore limit for loop to one before the end
            my $left  = $sorted_locs[$i]   + 1;
            my $lngth = $sorted_locs[$i+1] - $left;
            $field = substr($template,$left,$lngth); 
            push ( @template_fields, $field);
        }

    }   ## end reading (if) template

    ## After collecting the TEMPLATE and TEMPLATE_DELIMITERS, get the SIZEs
    ## Put sizes into a hash keyed on field names.  
    foreach $fld ( @template_fields ) {
        if ( $fld =~ /[A-Z]/ ) {
            ## We are only interested in the SIZE.  Who knows, maybe
            ## someday there'll be $fld.TYPE or somesuch.
            if ( /^$fld.SIZE/ ) {
                my ($hash_key, $hash_val) = split_key_val ($_);
                $fld_hash{$hash_key} = $hash_val;
            }   ## end if find fld
        }   ## end if fld is an uppercase letter
    }      ## end foreach fld in template_fields

 }  ## end while oenv
 close (OENV);
#print "###fld_hash contains: \n";  ## trace
#&way_foreach(%fld_hash);        ## trace

 ########1#########2#########3#########4#########5#########6#########7#########8
 ## Build regular expression string

 ## It's all there in the PSTAT.TEMPLATE and PSTAT.TEMPLATE_DELIMITERS 
 ## and fld_hash for sizes; combn that w/ template_fields array

 ## Build a string: where template_fields contains ^[A-Z], substitute with 
 ## "\w{$fld_hash($template_fields)}" otherwise use $template_fields
 my $reg_exp_str = "";
 my $w = "\\w{";            ## repeated part of regular expression
 foreach $fld ( @template_fields ) {
    if ( $fld =~ /[A-Z]/ ) {
        my $fld_sz = $fld . ".SIZE" ;
        $reg_exp_str = $reg_exp_str . $w . $fld_hash{$fld_sz} . "}";
    } else {
        $reg_exp_str = $reg_exp_str . $fld ;
    }   ## end if fld begins with uppercase letter
 }  ## end foreach $fld in @template_fields
#print "###My regular expression string is $reg_exp_str \n";    ## trace
#print "###Expect to see something like... \\w{8}-\\w{9}-\\w{15}.\\w{8}-\\w{9}-\\w{20}-\\w{4} \n"; ## trace for PSTATs

 ## Return the regular expression
 $reg_exp_str;

}   ## end sub GenRegEx
1;

################################################################################
## Subroutines for the subroutine
################################################################################

 ##
 ## Print hash values for trace purposes
 ##
 sub way_foreach {
   %hash = @_;
   foreach $key (sort keys %hash) {             ## trace
       printf ("*** %-s %-s\n",    $key, $hash{$key} );    ## trace
   }
 }

 ##
 ## get Keyword = Value pairs; return value
 ##
 sub split_key_val {
    my $line = $_;
    my ($key, $val) = split /=/, $line;
 ## Remove leading spaces
    $key =~ s/^\s+//;              # Remove leading spaces
    $val =~ s/^\s+//;              # Remove leading spaces
 ## Remove trailing spaces
    $key =~ s/\s+$//;              # Remove trailing spaces
    $val =~ s/\s+$//;              # Remove trailing spaces
    return ($key, $val);
 }


