# Orca::SourceFile: Manage the watching and loading of source data files.
#
# Copyright (C) 1998-1999 Blair Zajac and Yahoo!, Inc.
# Copyright (C) 1999-2002 Blair Zajac.

package Orca::SourceFile;

use strict;
use Carp;
use Digest::MD5         qw(md5);
use Storable            qw(dclone);
use Orca::Constants     qw($opt_verbose
                           die_when_called
                           $INCORRECT_NUMBER_OF_ARGS);
use Orca::Config        qw(%config_global
                           @config_groups
                           @config_groups_names
                           @config_plots
                           get_color);
use Orca::OldState      qw($orca_old_state);
use Orca::DataFile      qw(ORCA_DATAFILE_LAST_INDEX);
use Orca::OpenFileHash  qw($open_file_cache);
use Orca::SourceFileIDs qw(@sfile_fids);
use Orca::ImageFile;
use Orca::RRDFile;
use Orca::Utils         qw(email_message);
use vars                qw(@ISA $VERSION);

@ISA     = qw(Orca::DataFile);
$VERSION = substr q$Revision: 0.01 $, 10;

# This is a static variable that lists all of the column names for a
# particular group.
my @group_column_names;

# This caches the reference to the array holding the column
# descriptions for files that have their column descriptions in the
# first line of the file.
my %first_line_cache;

# These are caches for the different objects that are used to add a
# plot.
my %all_rrds_cache;
my %my_rrd_list_cache;
my %choose_data_sub_cache;

# Use a blessed reference to an array as the storage for this class.
# Since this class is a subclass of Orca::DataFile, append to the
# end of the Orca::DataFile array the values needed by this class
# using the ORCA_DATAFILE_LAST_INDEX index.  Define these constant
# subroutines as indexes into the array.  If the order of these
# indexes change, make sure to rearrange the constructor in new.
sub I_GROUP_INDEX        () { ORCA_DATAFILE_LAST_INDEX +  1 }
sub I_INTERVAL           () { ORCA_DATAFILE_LAST_INDEX +  2 }
sub I_LATE_INTERVAL      () { ORCA_DATAFILE_LAST_INDEX +  3 }
sub I_READ_INTERVAL      () { ORCA_DATAFILE_LAST_INDEX +  4 }
sub I_REOPEN             () { ORCA_DATAFILE_LAST_INDEX +  5 }
sub I_DATE_SOURCE        () { ORCA_DATAFILE_LAST_INDEX +  6 }
sub I_DATE_PARSE         () { ORCA_DATAFILE_LAST_INDEX +  7 }
sub I_MY_RRD_LIST        () { ORCA_DATAFILE_LAST_INDEX +  8 }
sub I_ALL_RRD_REF        () { ORCA_DATAFILE_LAST_INDEX +  9 }
sub I_GROUP_KEYS         () { ORCA_DATAFILE_LAST_INDEX + 10 }
sub I_CHOOSE_DATA_SUB    () { ORCA_DATAFILE_LAST_INDEX + 11 }
sub I_COLUMN_DESCRIPTION () { ORCA_DATAFILE_LAST_INDEX + 12 }
sub I_LAST_DATA_TIME     () { ORCA_DATAFILE_LAST_INDEX + 13 }
sub I_LAST_READ_TIME     () { ORCA_DATAFILE_LAST_INDEX + 14 }
sub I_FIRST_LINE         () { ORCA_DATAFILE_LAST_INDEX + 15 }
sub I_DATE_COLUMN_INDEX  () { ORCA_DATAFILE_LAST_INDEX + 16 }
sub I_IS_CURRENT         () { ORCA_DATAFILE_LAST_INDEX + 17 }
sub I_IS_CURRENT_DAY     () { ORCA_DATAFILE_LAST_INDEX + 18 }

sub new {
  unless (@_ == 3) {
    confess "$0: Orca::SourceFile::new passed $INCORRECT_NUMBER_OF_ARGS";
  }

  my ($class, $group_index, $fid) = @_;

  my $self = $class->SUPER::new($fid);

  my $config_group = $config_groups[$group_index];

  # Set the last value to preexpand the array.
  $self->[I_IS_CURRENT_DAY]     = undef;
  $self->[I_GROUP_INDEX]        = $group_index;
  $self->[I_INTERVAL]           = $config_group->{interval};
  $self->[I_LATE_INTERVAL]      = $config_group->{late_interval};
  $self->[I_READ_INTERVAL]      = $config_group->{read_interval};
  $self->[I_REOPEN]             = $config_group->{reopen};
  $self->[I_DATE_SOURCE]        = $config_group->{date_source};
  $self->[I_DATE_PARSE]         = $config_group->{date_parse};
  $self->[I_MY_RRD_LIST]        = [];
  $self->[I_ALL_RRD_REF]        = undef;
  $self->[I_GROUP_KEYS]         = {};
  $self->[I_CHOOSE_DATA_SUB]    = undef;

  $self->[I_COLUMN_DESCRIPTION] = $config_group->{column_description};
  $self->[I_LAST_DATA_TIME]     = -1;
  $self->[I_LAST_READ_TIME]     = -1;
  $self->[I_FIRST_LINE]         =  0;
  $self->[I_DATE_COLUMN_INDEX]  = undef;

  # Load in any state information for this file.
  my $filename = $sfile_fids[$fid];
  my @column_description;
  if (defined (my $ref = delete $orca_old_state->{$filename})) {
    @$self[I_LAST_DATA_TIME,
           I_LAST_READ_TIME,
           &Orca::DataFile::I_FILE_DEV,
           &Orca::DataFile::I_FILE_INO,
           &Orca::DataFile::I_FILE_SIZE,
           &Orca::DataFile::I_FILE_MTIME] = splice(@$ref, 0, 6);
    @column_description = @{$ref->[0]} if $ref->[0];
  }

# XXXXX
#  # Do the following steps if the source data file exists.  If there is
#  # no entry in the state database for this file, then create a default
#  # one.  If there is an entry, then check the file's mtime and if they
#  # do not agree, then make the entry a default one so that the data from
#  # it will be reloaded.  If the source data file does not exist, then
#  # make the entry a default one.
#  my $state;
#  if ($self->status == -1) {
#    $state = Orca::
#  } else {
#  }
#  # If the source data file does not exist in the state database, then
#  # create a new default entry.  If the file does not exist, then reset to
#  # If the source file's mtime is the same as stored in the saved
#  # state file, then load all the information from it, otherwise do
#  # not keep any of it and load the file freshly.
#  if ($orca_
#  if (my $mtime = delete($state->{_file_mtime}) eq $self->file_mtime) {
#    while (my ($key, $value) = each %$state) {
#      $self->[$key] = $value;
#    }
#  }

  # Now do a stat of the file.
  my $stat_status = $self->status;

  # Load the column names if the column names are supposed to be loaded
  # from the file.  Use the cached names if the file has not changed.
  if ($self->[I_COLUMN_DESCRIPTION][0] eq 'first_line') {
    if ($stat_status or !@column_description) {
      my $fd = $open_file_cache->open($fid, $self->file_mtime);
      return unless $fd;
      my $line = <$fd>;
      chomp($line);
      if ($line) {
        $self->[I_FIRST_LINE] = 1;
        @column_description = split(' ', $line);
      } else {
        warn "$0: warning: no first_line for `$filename' yet.\n";
        $open_file_cache->close($fid) or
          warn "$0: warning: cannot close `$filename' for reading: $!\n";
        return;
      }
    }
    my $cache_key = md5(join("\200", @column_description));
    unless (defined $first_line_cache{$cache_key}) {
      $first_line_cache{$cache_key} = \@column_description;
    }
    $self->[I_COLUMN_DESCRIPTION] = $first_line_cache{$cache_key};
  }

  # Test if the file has been updated in the last _interval number of
  # seconds.  If so, then note it so we can see when the file is no
  # longer being updated.
  $self->[I_IS_CURRENT] = $self->is_current;

  return unless $self->get_date_column;

  $self;
}

# For each group make a note of the column description names that appear.
sub add_groups {
  my $self = shift;

  foreach my $group_index (@_) {
    $self->[I_GROUP_KEYS]{$group_index} = 1;
    foreach my $description (@{$self->[I_COLUMN_DESCRIPTION]}) {
      $group_column_names[$group_index]{$description} = 1;
    }
  }
}

# Return 1 if the source data file is current or 0 otherwise.  Also
# note the day that this test was performed.  This lets the code
# ignore files that are not current because a new file was generated
# for the next day.
sub is_current {
  my $self = shift;

  $self->[I_IS_CURRENT_DAY] = (localtime)[3];

  $self->last_stat_time <= $self->file_mtime + $self->[I_LATE_INTERVAL];
}

# This returns the time when the file should be next read.  To
# calculate the next read time, take into the account the time that it
# takes for the file to be updated.  In some sense, this is measured
# by the late interval.  Because we won't want to use the complete
# late interval, take the multiplicative average instead of the
# summation average, since the multiplicative average will result in
# an average closer to the smaller of the two values.  If the source
# file is current, then just add the modified late interval to the
# last file modification time, otherwise add the late interval to the
# last file stat time.  Use the late interval to watch old files so we
# don't spend as much time on them.
sub next_load_time {
  my $self = shift;

  my $last_stat_time = $self->last_stat_time;
  my $file_mtime     = $self->file_mtime;

  if ($last_stat_time <= $file_mtime + $self->[I_LATE_INTERVAL]) {
    return $file_mtime + $self->[I_READ_INTERVAL];
  } else {
    return $last_stat_time + $self->[I_LATE_INTERVAL];
  }
}

sub get_date_column {
  my $self = shift;

  return $self if $self->[I_DATE_SOURCE][0] eq 'file_mtime';

  my $fid              = $self->fid;
  my $date_column_name = $self->[I_DATE_SOURCE][1];

  my $found = -1;
  for (my $i=0; $i<@{$self->[I_COLUMN_DESCRIPTION]}; ++$i) {
    if ($self->[I_COLUMN_DESCRIPTION][$i] eq $date_column_name) {
      $found = $i;
      last;
    }
  }

  unless ($found > -1) {
    warn "$0: warning: cannot find date `$date_column_name' in `$sfile_fids[$fid]'.\n";
warn "@{$self->[I_COLUMN_DESCRIPTION]}\n";
    return;
  }
  $self->[I_DATE_COLUMN_INDEX] = $found;

  $self;
}

sub add_plots {
  # Make sure that the user has called the add_groups method and
  # inserted at least one key.
  unless (@group_column_names) {
    confess "$0: Orca::SourceFile::add_groups must be called before ",
            "add_plots.\n";
  }

  unless (@_ == 5) {
    confess "$0: Orca::SourceFile::add_plots $INCORRECT_NUMBER_OF_ARGS";
  }

  my ($self,
      $group_index,
      $subgroup_name,
      $rrd_data_files_ref,
      $image_files_ref) = @_;

  my $group_name = $config_groups_names[$group_index];

  # See if we have already done all the work for a plot with this group_name,
  # subgroup_name, and column description.  Use an MD5 hash instead of a very
  # long key.  Store into a hash the column names found in this file for this
  # group.  Finally, create a hash keyed by column name with a value of the
  # index into the column description array.
  my @column_description = @{$self->[I_COLUMN_DESCRIPTION]};
  my %column_description;
  for (my $i=0; $i<@column_description; ++$i) {
    $column_description{$column_description[$i]} = $i;
  }
  my $plot_key  = join("\200", $group_name,
                               $subgroup_name,
                               @column_description);
  my $cache_key = md5($plot_key);
  if (defined $all_rrds_cache{$cache_key}) {
    $self->[I_ALL_RRD_REF]     = $all_rrds_cache{$cache_key};
    $self->[I_MY_RRD_LIST]     = $my_rrd_list_cache{$cache_key};
    $self->[I_CHOOSE_DATA_SUB] = $choose_data_sub_cache{$cache_key};
    return 1;
  }

  # Use this hash to keep a list of RRDs that this file uses.
  my %my_rrd_list;

  # This is the source for an anonymous subroutine that given a row
  # from a source data file returns a hash keyed by RRD name with the
  # values calculated from the row.
  my $choose_data_expr = "sub {\n  (\n";

  # Go through each plot to create and process it for this file.
  my @regexp_pos          = map { 0 } (1..@config_plots);
  my $oldest_regexp_index = 0;
  my $handle_regexps      = 0;
  my $i                   = 0;
  my $old_i               = 0;

  # This is the main loop where we keep looking for plots to create
  # until all of the column descriptions have been compared against.
  while ($handle_regexps or $i < @config_plots) {
    # If we've reached an index value greater than the largest index
    # in the plots, then reset the index to the oldest regexp that
    # still needs to be completed.
    if ($handle_regexps and $i >= @config_plots) {
      $i = $oldest_regexp_index;
    }

    my $plot = $config_plots[$i];

    # Skip this plot if the source group indexes does not match.
    # Increment the index of the next plot to handle.
    if ($plot->{source_index} != $group_index) {
      if ($oldest_regexp_index == $i) {
        $handle_regexps = 0;
        ++$oldest_regexp_index;
      }
      ++$i;
      next;
    }

    # There are three cases to handle:
    # 1) Regular expression match in the first data with additional datas.
    # 2) Regular expression match in the first data with no additional datas.
    # 3) All others.
    # The first is a single data source that has a regular expression.  In
    # this case, all of the columns are searched to match the regular
    # expression.  This generates a single plot with all of the different
    # data sources plotted on it.  The second case is two or more data
    # sources and where the first data source has a regular expression
    # match.  This may generate more than one plot, for each set of columns
    # that match the regular expression.  The final case to handle is when
    # the previous two cases are not true.  The last column matched on is
    # stored in @regexp_pos.
    my $number_datas         = @{$plot->{data}};
    my $number_elements      = @{$plot->{data}[0]};
    my $regexp_element_index = -1;
    for (my $j=0; $j<@{$plot->{data}[0]}; ++$j) {
      if ($plot->{data}[0][$j] =~ m:\(.+\):) {
        $regexp_element_index = $j;
        last;
      }
    }

    # 1) Regular expression match in the first data with additional datas.
    if ($number_datas == 1 and $regexp_element_index != -1) {

      # If we've gone up to the last column to match, then go on.
      if ($regexp_pos[$i] >= @column_description) {
        if ($oldest_regexp_index == $i) {
          $handle_regexps = 0;
          ++$oldest_regexp_index;
        }
        $i = $plot->{flush_regexps} ? $oldest_regexp_index : $i + 1;
        next;
      }
      $regexp_pos[$i] = @column_description;

      # Start by making a deep copy of the plot.  Be careful not to make
      # a deep copy of the `creates' reference, since it can cause
      # recursion.  Replace the regular expression in the first data
      # with the name of the column that caused the match.
      my $creates          = delete $plot->{creates};
      my $new_plot         = dclone($plot);
      $plot->{creates}     = $creates;
      $new_plot->{creates} = $creates;
      $plot                = $new_plot;

      # At this point we have a copy of plot.  Now go through looking
      # for all the columns that match and create an additional data
      # source for each match.
      my @data_with_regexp = @{$plot->{data}[0]};
      my $regexp           = $data_with_regexp[$regexp_element_index];
      my $new_data_index   = 0;
      my $original_legend  = $plot->{legend}[0];
      foreach my $column_name (@column_description) {
        my @matches = $column_name =~ /$regexp/;
        next unless @matches;

        # Replace the regular expression match with the matched column
        # name.
        $data_with_regexp[$regexp_element_index] = $column_name;
        $plot->{data}[$new_data_index] = [ @data_with_regexp ];

        # Copy any items over that haven't been created for this new
        # data source.  Make sure that any new elements added to
        # pcl_plot_append_elements show up here.  The first data_min,
        # data_max, data_type, and summary_format are always set and
        # if any later ones are not set, then use the previously set
        # one.
        unless (defined $plot->{data_min}[$new_data_index]) {
          $plot->{data_min}[$new_data_index] =
            $plot->{data_min}[$new_data_index-1];
        }
        unless (defined $plot->{data_max}[$new_data_index]) {
          $plot->{data_max}[$new_data_index] =
            $plot->{data_max}[$new_data_index-1];
        }
        unless (defined $plot->{data_type}[$new_data_index]) {
          $plot->{data_type}[$new_data_index] =
            $plot->{data_type}[$new_data_index-1];
        }
        unless (defined $plot->{color}[$new_data_index]) {
          $plot->{color}[$new_data_index] = get_color($new_data_index);
        }
        unless (defined $plot->{legend}[$new_data_index]) {
          $plot->{legend}[$new_data_index] = $original_legend;
        }
        unless (defined $plot->{line_type}[$new_data_index]) {
          $plot->{line_type}[$new_data_index] = $plot->{line_type}[0];
        }
        unless (defined $plot->{summary_format}[$new_data_index]) {
          $plot->{summary_format}[$new_data_index] =
            $plot->{summary_format}[$new_data_index-1];
        }

        # Replace the regular expression in any legend elements.
        my $legend = $plot->{legend}[$new_data_index];
        my $count  = 1;
        foreach my $match (@matches) {
          $legend =~ s/\$$count/$match/ge;
          $legend =~ s/\(.+\)/$match/ge;
          ++$count;
        }
        $plot->{legend}[$new_data_index] = $legend;

        ++$new_data_index;
      }

      if ($oldest_regexp_index == $i) {
        $handle_regexps = 0;
        ++$oldest_regexp_index;
      }
      $old_i = $i;
      $i = $plot->{flush_regexps} ? $oldest_regexp_index : $i + 1;
      next unless $new_data_index;
    }

    # 2) Regular expression match in the first data with no additional datas.
    elsif ($number_datas > 1 and $regexp_element_index != -1) {
      $handle_regexps = 1;

      # If we've gone up to the last column to match, then go on.  If
      # this is the oldest regexp, then increment oldest_regexp_index.
      if ($regexp_pos[$i] >= @column_description) {
        if ($oldest_regexp_index == $i) {
          $handle_regexps = 0;
          ++$oldest_regexp_index;
        }
        $i = $plot->{flush_regexps} ? $oldest_regexp_index : $i + 1;
        next;
      }

      # Go through all of the columns and stop at the first match.
      my @data_with_regexp = @{$plot->{data}[0]};
      my $regexp           = $data_with_regexp[$regexp_element_index];
      my $column_description;
      my @matches;
      for (;$regexp_pos[$i]<@column_description; ++$regexp_pos[$i]) {
        @matches = $column_description[$regexp_pos[$i]] =~ /$regexp/;
        if (@matches) {
          $column_description = $column_description[$regexp_pos[$i]];
          last;
        }
      }
      unless (@matches) {
        if ($oldest_regexp_index == $i) {
          ++$oldest_regexp_index;
          $handle_regexps = 0;
        }
        ++$i;
        next;
      }
      ++$regexp_pos[$i];

      # Start by making a deep copy of the plot.  Be careful not to make
      # a deep copy of the `creates' reference, since it can cause
      # recursion.  Replace the regular expression in the first data
      # with the name of the column that caused the match.  Then create
      # string form of the plot object using Data::Dumper::Dumper and
      # replace all of the $1, $2, ... with what was matched in the
      # first data source.
      my $creates          = delete $plot->{creates};
      my $new_plot         = dclone($plot);
      $plot->{creates}     = $creates;
      $plot                = $new_plot;
      $plot->{data}[0][$regexp_element_index] = $column_description;
      my $d                = Data::Dumper->Dump([$plot], [qw(plot)]);
      $plot->{creates}     = $creates;
      my $count            = 1;
      foreach my $match (@matches) {
        $d =~ s/\$$count/$match/mge;
        $d =~ s/\(.+\)/$match/mge;
        ++$count;
      }
      {
        local $SIG{__DIE__}  = 'DEFAULT';
        local $SIG{__WARN__} = \&die_when_called;
        eval $d;
      }
      die "$0: internal error: eval on\n   $d\nOutput: $@\n" if $@;

      # Either increment the index or reset it to the oldest regexp
      # index.
      $old_i = $i;
      $i = $plot->{flush_regexps} ? $oldest_regexp_index : $i + 1;
    }

    # 3) All others.
    else {
      $old_i = $i++;
      ++$oldest_regexp_index unless $handle_regexps;
    }

    # Make a copy of the data's so that if we change anything, we're
    # not changing the original plot structure.  Look through each
    # element of each data and look for names appearing in the column
    # description array.  If there is a match for this file, then
    # convert the element to index the @_ array where the data will be
    # pulled from.  If there is not a match, then see if the element
    # matches a name from one of the other column names from the same
    # group.  In this case the data argument for this file will not be
    # used.

    # To allow data gathering program to send unknown values to Orca,
    # check if any of the substituted values equals 'U' and return
    # immediately the value 'U' to pass to RRDtool.  Keep track of the
    # substituted values.
    my %substituted_values;

    my @datas;
    foreach my $one_data (@{$plot->{data}}) {
      push(@datas, [@$one_data]);
    }
    my $required = $plot->{required};
    for (my $j=0; $j<@datas; ++$j) {
      my $match_one_data = 0;
      for (my $k=0; $k<@{$datas[$j]}; ++$k) {
        my $element = $datas[$j][$k];
        my $pos;
        if (defined ($pos = $column_description{$element})) {
          $match_one_data = 1;
          $datas[$j][$k]  = "\$_[$pos]";
          $substituted_values{"\$_[$pos]"} = 1;
        } elsif (defined $group_column_names[$group_index]{$element}) {
          my $m = $old_i + 1;
          if ($required) {
            warn "$0: $element in `data @{$plot->{data}[$j]}' in plot #$m ",
                 "not replaced since it is not in file `",
                 $self->filename, "'.\n";
          }
          $datas[$j] = undef;
          last;
        }
      }
      # If there were no substitutions and verbose is on, then warn about it.
      if (!$match_one_data and $opt_verbose > 1) {
        my $m = $old_i + 1;
        warn "$0: warning: no substitutions performed for ",
             "`data @{$plot->{data}[$j]}' in plot #$m in `",
             $self->filename, "'.\n";
      }
    }

    # Because users may place code into the data statements that do not
    # have any substitutions, then the only way to check for the validity
    # is to create valid anonymous subroutines and try them.  Invalid
    # ones will either return undef or fail to compile.  If the plot is
    # required, then replace invalid subroutines with one that returns 0.
    # Here the results of eval'ing a test subroutine on a data is kept.
    # The cached result is either a 1 or a 0.  To test the subroutine,
    # pass the newly created subroutine a fake array of numbers, where the
    # array has as manay elements as there are in one line from the file.
    # If it is an invalid subroutine but the plot is required, then set
    # the subroutine to return 'U', which is RRD's way of declaring
    # undefined data.
    my @fake_numbers = 1 .. @column_description;
    my @substituted_data_expressions;
    my $one_ok_data = 0;
    for (my $j=0; $j<@datas; ++$j) {
      my $data_expression;
      if (defined $datas[$j]) {
        my $sub_expr = "sub {\n";
        foreach my $s (sort keys %substituted_values) {
          $sub_expr .= "  if (!defined($s) || $s eq 'U') {    return 'U';\n  }\n";
        }
        $data_expression  = "@{$datas[$j]}";
        $sub_expr        .= "  $data_expression;\n}";
        my $sub_expr_md5  = md5($data_expression);
        my $eval_result   = $choose_data_sub_cache{$sub_expr_md5};
        unless (defined $eval_result) {
          $eval_result = 1;
          my $test_value;
          my $message;
          {
            local $SIG{__DIE__}  = 'DEFAULT';
            local $SIG{__WARN__} = \&die_when_called;
            if (my $sub = eval $sub_expr) {
              eval { $test_value = &$sub(@fake_numbers) };
            }
          }
          if ($@) {
            $eval_result = 0;
            $@ =~ s/\s+$//g;
            my $m = $old_i + 1;
            $message = "$0: warning: cannot compile `$sub_expr' for plot #$m `data @{$plot->{data}[$j]}': $@\n";
          } elsif (!defined $test_value) {
            $eval_result = 0;
            my $m = $old_i + 1;
            $message = "$0: warning: testing of `$sub_expr' for plot #$m `data @{$plot->{data}[$j]}' yielded an undefined value.\n";
          }
          if ($message and ($required or $opt_verbose > 1)) {
            warn $message;
          }
          $choose_data_sub_cache{$sub_expr_md5} = $eval_result;
        }
        $data_expression = undef unless $eval_result;
      }
      # If the data_expression did not work, but the plot is required, then
      # have the expression return 'U';
      if (!$data_expression and $plot->{required}) {
        $data_expression = "'U'";
      }
      $one_ok_data = 1 if $data_expression;
      push(@substituted_data_expressions, $data_expression);
    }

    # If none of the data expressions compiled, then go on to the next
    # unless the plot is required.
    next if (!$one_ok_data and !$required);

    # At this point we have a plot to create.

    # For each valid data source in this plot, place each the substituted
    # code a large anonymous subroutine that takes a single row of data
    # from an input source file and returns a hash keyed by the name
    # used for a RRD and the value calculated using the input row of
    # data.  Also create an unique Orca data file name for this plot
    # and a name for this plot that does not include the subgroup name.
    my @my_rrds;
    my @my_short_rrds;
    my @name_with_subgroup;
    my @name_without_subgroup;
    my $previous_data_type     = '';
    my $previous_group_index   = -1;
    my $previous_subgroup_name = '';
    for (my $j=0; $j<@substituted_data_expressions; ++$j) {

      # Include in the original data expression the data_type that RRD
      # will apply to the input data.
      my $data_type                   = lc($plot->{data_type}[$j]);
      my $original_data_expression    = join('_', @{$plot->{data}[$j]});
      my $substituted_data_expression = $substituted_data_expressions[$j];

      my $name_with_subgroup = "${group_name}_${subgroup_name}_${data_type}_${original_data_expression}";
      push(@name_with_subgroup,    $name_with_subgroup);
      push(@name_without_subgroup, "${group_name}_${data_type}_${original_data_expression}");

      # If the current data expression is very similar to the previous
      # one, then do not include the group, subgroup and data_type.
      my $short_name_with_subgroup;
      if ($group_index == $previous_group_index) {
        $short_name_with_subgroup  = '_';
      } else {
        $short_name_with_subgroup  = "${group_name}_";
        $previous_group_index      = $group_index;
      }
      if ($subgroup_name eq $previous_subgroup_name) {
        $short_name_with_subgroup .= '_';
      } else {
        $short_name_with_subgroup .= "${subgroup_name}_";
        $previous_subgroup_name    = $subgroup_name;
      }
      if ($data_type eq $previous_data_type) {
        $short_name_with_subgroup .= '_';
      } else {
        $short_name_with_subgroup .= "${data_type}_";
        $previous_data_type        = $data_type;
      }
      $short_name_with_subgroup   .= $original_data_expression;

      # Create a new RRD only if it doesn't already exist and if a
      # valid get data subroutine is created.  Keep the
      # choose_data_sub for this file.
      if (defined $substituted_data_expression) {
        $choose_data_expr .= "    '$name_with_subgroup', $substituted_data_expression,\n";
        unless (defined $rrd_data_files_ref->{$name_with_subgroup}) {
          my $rrd_file = Orca::RRDFile->new($group_index,
                                            $subgroup_name,
                                            "${data_type}_${original_data_expression}",
                                            $plot,
                                            $j);
          $rrd_data_files_ref->{$name_with_subgroup} = $rrd_file;
        }
        $self->[I_ALL_RRD_REF]            = $rrd_data_files_ref;
        $my_rrd_list{$name_with_subgroup} = 1;
        push(@my_rrds, $name_with_subgroup);
        push(@my_short_rrds, $short_name_with_subgroup);
      }
    }

    # Generate a new plot for these data.
    my $image;
    my $all_names_with_subgroup = join(',', @name_with_subgroup);
    if (defined ($image = $image_files_ref->{hash}{$all_names_with_subgroup})){
      $image->add_rrds(@my_rrds);
    } else {
      $image = Orca::ImageFile->new($group_index,
                                    $subgroup_name,
                                    join(',', @my_short_rrds),
                                    join(',', @name_without_subgroup),
                                    $plot,
                                    $rrd_data_files_ref,
                                    \@my_rrds);
      $image_files_ref->{hash}{$all_names_with_subgroup} = $image;
      push(@{$image_files_ref->{list}}, $image);
      push(@{$config_plots[$old_i]{creates}}, $image);
    }

    # Put into each RRD the images that are generated from it.
    foreach my $rrd_key (@my_rrds) {
      $rrd_data_files_ref->{$rrd_key}->add_image($image);
    }
  }

  $choose_data_expr .= "  );\n}\n";
  {
    local $SIG{__DIE__}        = 'DEFAULT';
    local $SIG{__WARN__}       = \&die_when_called;
    $self->[I_CHOOSE_DATA_SUB] = eval $choose_data_expr;
  }
  if ($@) {
    my $m = $old_i + 1;
    die "$0: warning: bad evaluation of command for plot #$m:\n$choose_data_expr\nOutput: $@\n";
  }

  $all_rrds_cache{$cache_key}        = $self->[I_ALL_RRD_REF];
  $choose_data_sub_cache{$cache_key} = $self->[I_CHOOSE_DATA_SUB];
  my $tmp                            = [sort keys %my_rrd_list];
  $my_rrd_list_cache{$cache_key}     = $tmp;
  $self->[I_MY_RRD_LIST]             = $tmp;

  1;
}

sub load_new_data {
  my $self = shift;

  my $fid = $self->fid;

  # Test to see if we should read the file.  If the file has changed
  # in any way, then read it.  If the file is now gone and we have an
  # open file descriptor for it, then read to the end of it and then
  # close it.
  my $file_status = $self->status;
  my $fd          = $open_file_cache->get_fd($fid);
  my $load_data   = $file_status != 0;
  if ($file_status == -1) {
    my $message = "file `$sfile_fids[$fid]' did exist and is now gone.";
    email_message($config_global{warn_email}, $message);
    warn "$0: warning: $message\n";
    unless ($fd) {
      $self->[I_LAST_READ_TIME] = -1;
      return 0;
    }
  }

  # Test if the file was up to date and now is not.  If so, then send
  # a message.  Do not send a message if the file was current in the
  # previous day is now is not current today.
  my $old_is_current     = $self->[I_IS_CURRENT];
  my $old_is_current_day = $self->[I_IS_CURRENT_DAY];
  my $current_day        = (localtime($self->last_stat_time))[3];
  $self->[I_IS_CURRENT]  = $self->is_current;
  if ($old_is_current and
      !$self->[I_IS_CURRENT] and
      ($old_is_current_day == $current_day)) {
    my $message = "file `$sfile_fids[$fid]' was current and now is not.";
    warn "$0: warning: $message\n";
    email_message($config_global{warn_email}, $message);
  }

  # If we don't have to load the data from this file yet, then test to
  # see if the data needs to be loaded if the file modification time
  # is greater than the time at which it was last read.
  my $file_mtime = $self->file_mtime;
  unless ($load_data) {
    $load_data = $file_mtime > $self->[I_LAST_READ_TIME];
  }

  # If the file still does not have to be loaded, now test to see if
  # the timestamp of the last data point is larger than the last time
  # of any RRD files that depend on this source file.
  my $last_data_time = $self->[I_LAST_DATA_TIME];
  unless ($load_data) {
    foreach my $rrd_key (@{$self->[I_MY_RRD_LIST]}) {
      if ($self->[I_ALL_RRD_REF]{$rrd_key}->rrd_update_time < $last_data_time) {
        $load_data = 1;
        last;
      }
    }
  }

  return 0 unless $load_data;

  # Try to get a file descriptor to open the file.  Skip the first
  # line if the first line is used for column descriptions.

  my $opened_new_fd = 0;
  unless ($fd) {
    unless ($fd = $open_file_cache->open($fid, $file_mtime)) {
      warn "$0: warning: cannot open `$sfile_fids[$fid]' for reading: $!\n";
      return 0;
    }
    <$fd> if $self->[I_FIRST_LINE];
    $opened_new_fd = 1;
  }

  my $date_column_index = $self->[I_DATE_COLUMN_INDEX];
  my $use_file_mtime    = $self->[I_DATE_SOURCE][0] eq 'file_mtime';
  my $number_added      = 0;
  my $close_once_done   = 0;
  my $number_columns    = @{$self->[I_COLUMN_DESCRIPTION]};

  # Get the filename if the measurement time is loaded from the file
  # instead of from the last modified time and the time should be
  # parsed using the date_parse subroutine.
  my $date_parse = $self->[I_DATE_PARSE];
  my $filename;
  if (!$use_file_mtime and $date_parse) {
    $filename = $sfile_fids[$self->fid];
  }

  # Load in all of the data possible and send it to each plot.
  while (defined(my $line = <$fd>)) {
    # Skip the line if the word timestamp appears in it.  This is a
    # temporary fix for orcallator.se to place a new information line
    # in the output file when it starts up.
    next if $line =~ /timestamp/;

    my @line = split(' ', $line);

    # Skip this input line if 1) the file uses the first line to
    # define the column names, 2) the number of columns loaded is not
    # equal to the number of columns in the column description.
    if ($self->[I_FIRST_LINE] and @line != $number_columns) {
      warn "$0: number of columns in line $. of `$sfile_fids[$fid]' does not ",
           "match column description.\n";
      next;
    }

    my $time;
    if ($use_file_mtime) {
      $time = $self->file_mtime;
    } elsif ($filename) {
      $time = &$date_parse($filename, $line[$date_column_index]);
    } else {
      $time = $line[$date_column_index];
    }
    $last_data_time = $time if $time > $last_data_time;

    # If the file status from the source data file is greater than
    # zero, then it means the file has changed in some way, so we need
    # to do updates for all plots.  Load the available data, calculate
    # the value that needs to go to each RRD and push the value to the
    # RRD.
    my $add    = 0;
    my %values = &{$self->[I_CHOOSE_DATA_SUB]}(@line);
    foreach my $rrd_key (@{$self->[I_MY_RRD_LIST]}) {
      my $value = $values{$rrd_key};
      if (defined $value) {
        if ($self->[I_ALL_RRD_REF]{$rrd_key}->queue_data($time, $value)) {
          if ($opt_verbose > 2 and !$add) {
            print "  Loaded `@line' at ", scalar localtime($time), " ($time).\n";
          }
          $add = 1;
        }
      } else {
        $close_once_done = 1;
        warn "$0: internal error: expecting RRD name `$rrd_key' but no data ",
             "loaded from `", $self->filename, "' at time ",
             scalar localtime($time), " ($time).\n";
      }
    }
    ++$number_added if $add;
  }

  # Update the time when the file was last read.
  $self->[I_LAST_READ_TIME] = time;
  $self->[I_LAST_DATA_TIME] = $last_data_time;

  $open_file_cache->change_weight($fid, $file_mtime);

  # Now two special cases to handle.  First, if the file was removed
  # and we had an open file descriptor to it, then close the file
  # descriptor.  Second, if the file has a new device number or inode
  # and we had a already opened file descriptor to the file, then
  # close the descriptor, reopen it and read all the rest of the data.
  # If neither of these cases is true, then close the file if the file
  # should be reopened next time.
  if ($file_status == 2 and !$opened_new_fd) {
    $open_file_cache->close($fid) or
      warn "$0: warning: cannot close `$sfile_fids[$fid]' for reading: $!\n";
    # Setting the last_read_time to -1 will force load_new_data to
    # read it.
    $self->[I_LAST_READ_TIME] = -1;
    $number_added += $self->load_new_data;
  } elsif ($file_status == -1 or
           $close_once_done   or
           $self->[I_REOPEN]  or
           $open_file_cache->is_pipe($fid)) {
    $open_file_cache->close($fid) or
      warn "$0: warning: cannot close `$sfile_fids[$fid]' for reading: $!\n";
  }

  $number_added;
}

sub rrds {
  @{$_[0]->[I_MY_RRD_LIST]};
}

sub filename {
  $sfile_fids[$_[0]->fid];
}

1;