# Orca::Utils: Small utility subroutines.
#
# Copyright (C) 1998-1999 Blair Zajac and Yahoo!, Inc.
# Copyright (C) 1999-2002 Blair Zajac.

package Orca::Utils;

use strict;
use Carp;
use Exporter;
use Digest::MD5         qw(md5_base64);
use Orca::Constants     qw($INCORRECT_NUMBER_OF_ARGS);
use Orca::Config        qw(%config_global);
use Orca::SourceFileIDs qw(new_fids);
use vars qw(@EXPORT_OK @ISA $VERSION);

@EXPORT_OK = qw(email_message
                gcd name_to_fsname
                perl_glob
                recursive_mkdir
                unique);
@ISA       = qw(Exporter);
$VERSION   = substr q$Revision: 0.01 $, 10;

# Email the list of people a message.
sub email_message {
  my ($people, $subject) = @_;

  return unless $people;

  if (open(SENDMAIL, "|/usr/lib/sendmail -oi -t")) {
    print SENDMAIL <<"EOF";
To: $people
Subject: Orca: $subject

Orca: $subject
EOF
    close(SENDMAIL) or
      warn "$0: warning: sendmail did not close: $!\n";
  } else {
    warn "$0: warning: cannot fork for sendmail: $!\n";
  }
}

# Return the greatest common divisor.
sub gcd {
  unless (@_ == 2) {
    confess "$0: Orca::Utils::gcd $INCORRECT_NUMBER_OF_ARGS";
  }
  my ($m, $n) = @_;
  if ($n > $m) {
    my $tmp = $n;
    $n = $m;
    $m = $tmp;
  }
  while (my $r = $m % $n) {
    $m = $n;
    $n = $r;
  }
  $n;
}

# Replace special characters from names, remove redundant characters,
# and shorten the names so the maximum path name is not exceeded.  If
# the name is still too long such that the maximum filename path
# length ($config_global{max_filename_length}) may be exceeded by
# appending -daily.html or other names to the name, then compute a MD5
# hash of the name, trim the name the name to max_filename_length
# minus at least 23 plus the postfix length characters to leave space
# for a 22 byte base64 MD5 digest, plus a separating '-', and plus the
# postfix.
sub name_to_fsname {
  unless (@_ == 2) {
    confess "$0: Orca::Utils::name_to_fsname $INCORRECT_NUMBER_OF_ARGS";
  }

  my ($name, $postfix_length) = @_;

  $name =~ s/:/_/g;
  $name =~ s:/:_per_:g;
  $name =~ s:\s+:_:g;
  $name =~ s:%:_pct_:g;
  $name =~ s:#:_num_:g;
  $name =~ s:\*:_X_:g;

  # Trim anything containing orcallator and orca to o.
  $name =~ s:orcallator:o:g;
  $name =~ s:orca:o:g;

  # Remove trailing _'s.
  $name =~ s:_+$::;
  $name =~ s:_+,:,:g;

  # Replace multiple _'s with one _, except when they follow a , which
  # happens when the same group and subgroup appear for a new data
  # source.
  $name =~ s:,_{2,}:\200:g;
  $name =~ s:_{2,}:_:g;
  $name =~ s:\200:,__:g;

  my $max_filename_length = $config_global{max_filename_length};
  if (length($name)+$postfix_length > $max_filename_length) {

    my $md5         = md5_base64($name);
    my $trim_length = $max_filename_length - 23 - $postfix_length;
    $name           = substr($name, 0, $trim_length) . "-$md5";

    # Be careful to convert any /, \ or + characters to _.  The /
    # character definitely needs to be modified since / is a valid
    # base64 character and can't be used since we don't want a
    # directory.
    $name =~ s:[/\\\+]:_:g;
  }

  $name;
}

sub old_name_to_fsname {
  my $name = shift;
  $name =~ s/:/_/g;
  $name =~ s:/:_per_:g;
  $name =~ s:\s+:_:g;
  $name =~ s:%:_percent_:g;
  $name =~ s:#:_number_:g;
  $name =~ s:\*:_X_:g;
  $name =~ s:([_,]){2,}:$1:g;

  # Remove trailing _'s.
  $name =~ s:_+$::;
  $name =~ s:_+,:,:g;
  $name;
}

# Find all files matching a particular Perl regular expression and
# return file ids.
sub perl_glob {
  my $regexp = shift;

  # perl_glob gets called recursively.  To tell if we're being called by
  # perl_glob, look for the existence of two arguments, where the second
  # one if the current directory to open for matching.
  my $current_dir = @_ ? $_[0] : '.';

  # Remove all multiple /'s, since they will confuse perl_glob.
  $regexp =~ s:/{2,}:/:g;

  # If the regular expression begins with a /, then remove it from the
  # regular expression and set the current directory to /.
  $current_dir = '/' if $regexp =~ s:^/::;

  # Get the first file path element from the regular expression to
  # match.
  my @regexp_elements = split(m:/:, $regexp);
  my $first_regexp    = shift(@regexp_elements);

  # Find all of the files in the current directory that match the
  # first regular expression.
  unless (opendir(GLOB_DIR, "$current_dir")) {
    warn "$0: error: cannot opendir `$current_dir': $!\n";
    return ();
  }

  my @matches = grep { /^$first_regexp$/ } readdir(GLOB_DIR);

  closedir(GLOB_DIR) or
    warn "$0: warning: cannot closedir `$current_dir': $!\n";

  # If the last path element is being used as the regular expression,
  # then just return the list of matching files with the current
  # directory prepended.
  unless (@regexp_elements) {
    @matches = grep { -f $_ and -r _ } map { "$current_dir/$_" } @matches;
    return @_ ? @matches : new_fids(@matches);
  }

  # Otherwise we need to look into the directories below the current
  # directory.  Also create the next regular expression to use that is
  # made up of the remaining file path elements.  Make sure not to
  # process any directories named `..'.
  my @results;
  my $new_regexp = join('/', @regexp_elements);
  foreach my $new_dir (grep { $_ ne '..' and -d "$current_dir/$_" } @matches) {
    my $new_current = "$current_dir/$new_dir";
    $new_current =~ s:/{2,}:/:g;
    push(@results, perl_glob($new_regexp, $new_current));
  }

  return @_ ? @results : new_fids(@results);
}

# Given a directory name, attempt to make all necessary directories.
sub recursive_mkdir {
  my $dir = shift;

  # Remove extra /'s.
  $dir =~ s:/{2,}:/:g;

  my $path;
  if ($dir =~ m:^/:) {
    $path = '/';
  } else {
    $path = './';
  }

  my @elements = split(/\//, $dir);
  foreach my $element (@elements) {
    $path = "$path/$element";
    next if -d $path;
    unless (mkdir($path, 0755)) {
      die "$0: error: unable to create `$path': $!\n";
    }
  }
}

# Return a list of the unique elements of a list in the same order as
# they appear in the input list.
sub unique {
  my %a;
  my @unique;
  foreach my $element (@_) {
    unless (exists $a{$element}) {
      push(@unique, $element);
      $a{$element} = 1;
    }
  }
  @unique;
}

1;