#$Id: Cache.pm 46153 2007-10-19 00:26:07Z wsnyder $
######################################################################
#
# This program is Copyright 2002-2007 by Wilson Snyder.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of either the GNU General Public License or the
# Perl Artistic License.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#                                                                           
######################################################################

package Make::Cache;
use IO::File;
use IO::Pipe;
use IO::Dir;
use Data::Dumper;
use File::Copy;
use File::Path;
use Storable;
use Digest::MD5;
use Sys::Hostname;
use Cwd;
use Data::Dumper; $Data::Dumper::Indent=1;

use Make::Cache::Hash;
use Make::Cache::Runtime;
use Carp;
use strict;

our $Debug;

our $VERSION = '1.043';

######################################################################
#### Creators

sub new {
    my $class = shift;
    my @inherit = %{$class} if ref $class;
    my $self = {
	# Global Defaults
	#dir => "path_to_cache_dir",
	read=>1,	# Allow cache to have read hits
	write=>1,	# Allow cache to write updated status
	clean_delay=>(24*60*60),	# After this many seconds, clean may delete the file
	pwd=>Cwd::getcwd(),
	file_env => [ 	# List of global mnemonic and local value for global_filename funcs (not hash-can be duplicates)
			PWD => Cwd::getcwd(),
			],
	# Per-file elements
	tgts_lcl => [],
	deps_lcl => [],
	cmds_lcl => [],
	flags_lcl => [],
	@inherit,
	@_,
    };
    $self->{dir} or croak "%Error: Need to specify repository dir,";
    ($self->{dir}=~/^\//) or croak "%Error: Repository dir must be absolute path,";
    bless $self, ref($class)||$class;
}

sub clear_hash_cache {
    Make::Cache::Hash::clear_cache();
}

#######################################################################
# Accessors

sub cmds_lcl {
    my $self = shift;
    # List of commands to be executed
    push @{$self->{cmds_lcl}}, @_ if defined $_[0];
    return @{$self->{cmds_lcl}};
}
sub flags_lcl {
    my $self = shift;
    # List of commands to be hashed, thus is generally cmds_lcl minus -D/-U/-I switches
    push @{$self->{flags_lcl}}, @_ if defined $_[0];
    return @{$self->{flags_lcl}};
}
sub deps_lcl {
    my $self = shift;
    # List of filename dependencies used to create the targets
    push @{$self->{deps_lcl}}, @_ if defined $_[0];
    return @{$self->{deps_lcl}};
}
sub tgts_lcl {
    my $self = shift;
    # List of filename targets to be cached
    push @{$self->{tgts_lcl}}, @_ if defined $_[0];
    return @{$self->{tgts_lcl}};
}

sub cmds_gbl {
    my $self = shift;
    return map {$self->global_filename($_);} $self->cmds_lcl;
}
sub deps_gbl {
    my $self = shift;
    return nodup(map {$self->global_filename($_);} $self->deps_lcl);
}
sub flags_gbl {
    my $self = shift;
    return nodup(map {$self->global_filename($_);} $self->flags_lcl);
}
sub tgts_gbl {
    my $self = shift;
    return nodup(map {$self->global_filename($_);} $self->tgts_lcl);
}

sub tgts_name_digest {
    my $self = shift;
    # Internal; create digest based on target names
    $self->{tgts_name_digest} ||= Make::Cache::Hash::hash (text=>[ $self->tgts_gbl ]);
    return $self->{tgts_name_digest};
}

sub runtime_key_digest {
    my $self = shift;
    # Internal; create digest based on target names and executable
    $self->{runtime_key_digest} ||= Make::Cache::Hash::hash (text=>[ $self->tgts_gbl, $self->{cmds_exec} ]);
    return $self->{runtime_key_digest};
}

sub deps_data_digest {
    my $self = shift;
    my $hash = $self->deps_fixed_digest;
    # Internal; create digest based on target contents
    if (!$hash) {
	my @dep_to_do = nodup($self->deps_lcl);
	@dep_to_do = sort(@dep_to_do);
	$hash = Make::Cache::Hash::hash (filenames=>[ @dep_to_do ],
				  text=>[ $self->flags_gbl ],);
	#print "\tHash $hash\n\tof ",(join ' ',@dep_to_do),"\n\tCmd=",(join ' ',$self->flags_gbl), "\n" if $Debug;
    }
    return $hash;
}

sub deps_fixed_digest {
    # If defined, this returns a digest which is known to contain all relevant
    # source hash information, and thus avoids having to do read dependencies
    # as each cache entry is compared.
    return undef;
}

sub nodup {
    my @o;
    my %o;
    foreach (@_) {
	next if $o{$_};
	$o{$_} = 1;
	push @o, $_;
    }
    return @o;
}

#######################################################################
# Methods, may be overloaded

sub global_filename {
    my $self = shift;
    my $filename = shift;
    for (my $i=0; $i<=$#{$self->{file_env}}; $i+=2) {
	my ($key, $val) = ($self->{file_env}[$i], $self->{file_env}[$i+1]);
	$filename =~ s/^$val/\$\$${key}\$\$/;
    }
    return $filename;
}

sub local_filename {
    my $self = shift;
    my $filename = shift;
    for (my $i=0; $i<=$#{$self->{file_env}}; $i+=2) {
	my ($key, $val) = ($self->{file_env}[$i], $self->{file_env}[$i+1]);
	$filename =~ s/^\$\$${key}\$\$/$val/;
    }
    return $filename;
}

#######################################################################
# Digest writing

sub write {
    my $self = shift;

    # We don't clear the cache, this means we almost certainly have the hashes of
    # the source code when we started the process.  This is safer, as it allows for
    # the user to change the code while running a build, and not caching the wrong
    # results.

    # What should we put into the file?
    my $store = {
	deps_gbl => [$self->deps_gbl],	# Dependencies expressed in local filesystem filenames
	flags_gbl => [$self->flags_gbl],# Command to be hashed (cmds_gbl minus flags that don't matter)
	tgts_gbl => [$self->tgts_gbl],	# Targets expressed in local filesystem filenames
	deps_data_digest => $self->deps_data_digest,
	tgts_name_digest => $self->tgts_name_digest,
	time => time(),
	pwd  => $self->{pwd},
    };

    #print Dumper($store) if $Debug;

    # Write it out
    if (!-w $self->{dir}) {
	mkpath $self->{dir}, 0777;
	if (!-w $self->{dir}) {
	    warn ("%Warning: Cache.pm: Cannot write dir '$self->{dir}' to cache '$store->{tgts_gbl}[0]'.\n");
	    return;
	}
    }

    if (my $tgt = $self->tgts_missing) {
	warn "%Warning: Cache.pm: Expected target file wasn't generated: $tgt\n";
	return;
    }

    (my $digPrefix = $store->{tgts_name_digest}) =~ s/^(..).*$/$1/;
    my $pathtgt = "$self->{dir}/$digPrefix/$store->{tgts_name_digest}";
    mkpath ($pathtgt, 0, 0777);
    my $pathfile = "$pathtgt/$store->{deps_data_digest}";
    Storable::nstore ($store, "$pathfile.digest.new$$");
    chmod (0666, "$pathfile.digest.new$$");
    for (my $n=0; $n<=$#{$store->{tgts_gbl}}; $n++) {
	my $tgt = $self->{tgts_lcl}[$n];
	my $to = "$pathfile.t${n}.new$$";
	print "    cp $tgt $to\n" if $Debug;
	copy ($tgt, "$pathfile.t${n}.new$$");
    }
    # Do the copy as one atomic op to reduce race case with another reader
    for (my $n=0; $n<=$#{$store->{tgts_gbl}}; $n++) {
	move ("$pathfile.t${n}.new$$", "$pathfile.t${n}");
    }
    move ("$pathfile.digest.new$$", "$pathfile.digest");
}

########################################################################
## Digest reading

sub restore {
    my $self = shift or return;
    # Call this with the results of a find_hit
    # Restores the cached files.

    my $pathfile = $self->{hit_filename} or die "%Error: Not called on a hit object,";

    # Update the digest date, so cleaning won't consider it unsed
    touch("${pathfile}.digest");

    my $missing;
    for (my $n=0; $n<=$#{$self->{tgts_lcl}}; $n++) {
	my $tgt = $self->{tgts_lcl}[$n];
	my $from = "$pathfile.t${n}";
	if (!-r $from) { $missing=$from; last; }
	# Can't hard link, as might be on different file system
	unlink $tgt;
	if ($self->{link}) {
	    print "    ln -s $from $tgt\n" if $Debug;
	    symlink $from, $tgt or die "objcache: %Error: Can't ln -s $from $tgt: $!\n";
	} else {
	    print "    cp $from $tgt\n" if $Debug;
	    copy $from, $tgt or die "objcache: %Error: Can't cp $from $tgt: $!\n";
	}
	touch($tgt);
    }

    if ($missing) {
	# Recover nicely by blowing away the cache entry
	warn "-Info: Cache.pm: Object cache files missing for $missing\n";
	unlink ("$pathfile.digest");
	for (my $n=0; $n<=$#{$self->{tgts_lcl}}; $n++) {
	    my $from = "$pathfile.t${n}";
	    unlink $from;
	}
	return undef;
    }

    return $self;
}

sub find_hit {
    my $self = shift;
    # Return new Make::Cache reference to the cache hit on ANY ENTRY for this file
    # Or undef if we're unlucky

    (my $digPrefix = $self->tgts_name_digest) =~ s/^(..).*$/$1/;
    my $pathtgt = "$self->{dir}/$digPrefix/".$self->tgts_name_digest;

    my $dfixed = $self->deps_fixed_digest;
    if ($dfixed) {
	my $file = "$pathtgt/$dfixed";
	print "Digests_read_fixed $file\n" if $Debug;
	if (-r "$file.digest") {
	    return $self->_find_hit_ent ($file);
	}
    }
    else {
	print "Digests_read_dir $pathtgt\n" if $Debug;    
	my $dir = new IO::Dir $pathtgt or return undef;
	while (defined(my $basefile = $dir->read)) {
	    my $file = "$pathtgt/$basefile";
	    if ($file =~ s/\.digest$//) {
		if (my $hitref = $self->_find_hit_ent ($file)) {
		    return $hitref;
		}
	    }
	}
    }
    return undef;
}

sub _find_hit_ent {
    my $self = shift;
    my $pathfile = shift;
    # Return new object if there is a cache hit for THIS ENTRY

    print "     Try $pathfile\n" if $Debug;
    my $digestref = Storable::retrieve("${pathfile}.digest");

    my @dl = map {$self->local_filename($_);} @{$digestref->{deps_gbl}};
    my @tl = map {$self->local_filename($_);} @{$digestref->{tgts_gbl}};

    my $hitref = $self->new(deps_lcl => [@dl],
			    tgts_lcl => [@tl],
			    #flags_lcl => [],	# Same as what we searched for
			    hit_filename => $pathfile,
			    );
    #print Dumper($hitref) if $Debug;

    my $hithash = $hitref->deps_data_digest;
    if (!$hithash) {
	print "\tMiss, dep doesn't exist\n" if $Debug;
	return undef;
    }
    if ($hithash ne $digestref->{deps_data_digest}) {
	print "\tMiss, digest mismatch ",$hithash," incache ",$digestref->{deps_data_digest},"\n" if $Debug;
	return undef;
    }
    print "\tDepend MATCH ${pathfile}\n" if $Debug;
    return $hitref;
}

#######################################################################
# Utilities

sub tgts_missing {
    my $self = shift || die;
    # Return filename if all target files don't exist
    foreach my $tgt ($self->tgts_lcl) {
	return $tgt if (!-r $tgt);
    }
    return undef;
}

sub tgts_unlink {
    my $self = shift || die;
    # Unlink any targets that exist
    foreach my $tgt ($self->tgts_lcl) {
	unlink $tgt;
    }
}

sub touch {
    my $file = shift;
    # There is a bug in redhat 7.2 where utime doesn't work
    #my $now = time; utime $now, $now, $file or carp "%Warn: Can't touch $file,";
    my $buf;
    my $fh = IO::File->new($file,"r+");
    $fh->sysread($buf,1);
    $fh->sysseek(0,0);
    $fh->syswrite($buf,1);
    $fh->close();
}

#######################################################################
# Cleaning

sub clean {
    my $self = shift || die;
    # Clean all old files in the repository
    my $purgetime = time() - $self->{clean_delay};
    $self->_clean_dir($purgetime, $self->{dir});
}

sub _clean_dir {
    my $self = shift;
    my $purgetime = shift;
    my $path = shift;
    # Clean one dir, return true if files exist
    my $dir = new IO::Dir $path or return;
    my $keepdir = 0;
    while (defined(my $basefile = $dir->read)) {
	my $file = "$path/$basefile";
	next if ($basefile eq "." || $basefile eq "..");
	if (-d $file) {
	    if (_clean_dir ($self, $purgetime, $file)) {
		$keepdir = 1;
	    } else {
		print "  Removing old dir $file\n" if $Debug;
		eval { rmtree $file,0,1; }   # Quietly...
	    }
	} elsif ($file =~ /\.digest$/) {
	    # We look at modtime, as it's faster and prevents death if it's malformed
	    my $mtime = (stat($file))[9];
	    if ($mtime && $mtime > $purgetime) {
		$keepdir = 1;
	    } else {
		print "  Removing old file $file\n" if $Debug;
		unlink $file;
		for (my $i=0; $i<10; $i++) {
		    (my $tfile = $file) =~ s/\.digest$/.t${i}/;
		    print "  Removing old file $tfile\n" if $Debug;
		    unlink $tfile;
		}
	    }
	}
    }
    #print "Depend_clean_dir $keepdir $path\n" if $Debug;
    return $keepdir;
}

#######################################################################
# Dump

sub dump {
    my $self = shift;

    my $infor = {
	dumpone => 1,
	rm => 0,	# If set, show the command to clean it up
	@_,
    };

    _dump_recurse($self,$infor, $self->{dir});

    printf +("\t%-11s %-11s %3s %3s %-32s %-5s %-20s %s\n",
	     "CREATED",
	     "LAST_HIT",
	     "Tgt",
	     "Dep",
	     "Hash",
	     "Cmd",
	     "Tgt[0]",
	     ($infor->{rm}?"RM":"Pwd"),
	     );
    foreach (sort(keys %{$infor->{lines}})) { print $infor->{lines}{$_}; }
}

sub _dump_recurse {
    my $self = shift;
    my $infor = shift;
    my $path = shift;

    my $dir = new IO::Dir $path or return;
    my $n=0;
    while (defined(my $basefile = $dir->read)) {
	my $file = "$path/$basefile";
	if ($basefile ne "." && $basefile ne ".." && -d $file) {
	    _dump_recurse($self,$infor,$file);
	}
	elsif ($file =~ /\.digest$/) {
	    my $persistref = Storable::retrieve($file);
	    print "$file\n" if $Debug; 
	    if ($infor->{dumpone}) {
		#print Dumper($persistref);
		$infor->{dumpone} = 0;
	    }
	    my $cdate; my $mdate;
	    {my $date = (stat($file))[9];
	     my ($a,$b,$c,$day,$month,$year,$d,$e,$f) = localtime($date);
	     $mdate = sprintf("%04d/%02d/%02d", $year+1900,$month+1,$day);}
	    {my ($a,$b,$c,$day,$month,$year,$d,$e,$f) = localtime($persistref->{time});
	     $cdate = sprintf("%04d/%02d/%02d", $year+1900,$month+1,$day);}

	    (my $showfile = $basefile) =~ s/\.digest//;
	    (my $key = $persistref->{tgts_gbl}[0]) =~ s/.*\///;
	    (my $cc  = $persistref->{flags_gbl}[0]) =~ s/.*\///;

	    # Convert old cache repository to new form
	    $persistref->{tgts_gbl} ||= [$persistref->{tgtfile}];
	    $persistref->{deps_gbl} ||= $persistref->{depends};

	    (my $rm = "/bin/rm $file") =~ s/\.digest$/*/;

	    $infor->{lines}{$key."_".$mdate.$n++}
	    = sprintf ("\t%-11s %-11s %3d %3d %-32s %-5s %-20s %s\n",
		       $cdate,
		       $mdate,
		       $#{$persistref->{tgts_gbl}}+1,
		       $#{$persistref->{deps_gbl}}+1,
		       $showfile,
		       $cc,
		       $key,
		       ($infor->{rm} ? $rm : $persistref->{pwd}),
		       );
	}
    }
}

#######################################################################
1;
__END__

=pod

=head1 NAME

Make::Cache - Caching of object and test run information

=head1 SYNOPSIS

  my $oc = Make::Cache->new (...)
  $oc->write
  if (my $hit = $oc->find_hit) {
    $hit->restore;
  }
  $oc->dump

=head1 DESCRIPTION

Make::Cache is used to accelerate the generation of makefile targets.
When a target is to be created, it is looked up in the cache.  On a miss,
the hash of the source files, and all the generated targets are stored.

Next time the target is needed, the cache will hit, and the target files
may be retrieved from the cache instead of being regenerated.

The Make::Cache class is generally used as a base class for more specific
classes.

=head1 LOCAL TO GLOBAL

Make::Cache converts local filenames to global filenames.  This gets around
the problem of compile lines similar to

    gcc /user/a/home_dir/foo.c

resulting in cache misses, because someone else compiled the exact same source
in a different directory

    gcc /user/b/home_dir/foo.c

To avoid this, all filenames are converted to global format, by using a
file_env list passed in the new constructor.  By default, this converts the
current working directory (cwd) to $CWD.  Using the examples above, the gcc
command line would be hashed as if the user typed:

    gcc $$CWD$$/foo.c

Which is identical for both users, and thus will result in cache hits.

=head1 FUNCTIONS

=over 4

=item new

Create a new Cache::Hash object.  Named parameters may be specified:

=over 4

=item clean_delay

Number of seconds a object must be in age before clean() may delete it.
Default is one day (24*60*60).

=item file_env

List of environment variables for substitution, see LOCAL TO GLOBAL
section.

=item read

Defaults true to enable finding hits in the cache.

=item write

Defaults true to enable writing updates to the cache.

=back

=item clear_hash_cache

If a source file changes, this function must be called to clear out a
temporary internal cache.

=item clean

Clean the cache.  See the clean_delay variable.

=item cmds_gbl

Return list of commands, relative to all users.  This is used in hashing;
see LOCAL TO GLOBAL section.

=item cmds_lcl

Return list of commands, relative to the local user, that should be
executed.  With parameter, add the specified command.

=item deps_gbl

Return list of source dependency filenames, relative to all users.  This is
used in hashing; see LOCAL TO GLOBAL section.

=item deps_lcl

Return list of source dependency filenames, relative to the local user,
that should be executed.  With parameter, add the specified filename.

=item

For debugging, dump the current contents of the compile cache.

=item flags_gbl

Return list of compile flags, relative to all users.  This is used in
hashing; see LOCAL TO GLOBAL section.

=item flags_lcl

Return list of compile flags, relative to the local user, that should be
executed.  With parameter, add the specified flag.

=item global_filename

Given a local filename, return the filename in global format.  See LOCAL TO
GLOBAL section.

=item local_filename

Given a global filename, return the filename in local format.  See LOCAL TO
GLOBAL section.

=item nodup

Given a input list, return the list with any duplicates removed.

=item tgts_gbl

Return list of target filenames, relative to all users.  This is used in
hashing; see LOCAL TO GLOBAL section.

=item tgts_lcl

Return list of target filenames, relative to the local user, that should be
executed.  With parameter, add the specified filename.

=item tgts_missing

Return the filename of any local targets that are not currently on disk.
Return undef if all targets exist.

=item tgts_unlink

Remove all local target files.

=item write

Write the cache digest.

=item restore (object_from_find_hit)

Given a object returned from find_hit, restore any target files to the
local compile area.

=back

=head1 DISTRIBUTION

The latest version is available from CPAN and from L<http://www.veripool.com/>.

Copyright 2000-2007 by Wilson Snyder.  This package is free software; you
can redistribute it and/or modify it under the terms of either the GNU
Lesser General Public License or the Perl Artistic License.

=head1 AUTHORS

Wilson Snyder <wsnyder@wsnyder.org>

=head1 SEE ALSO

L<objcache>, L<Make::Cache::Runtime>, L<Make::Cache::Hash>,
L<Make::Cache::Obj>, L<Make::Cache::Gcc>

=cut

######################################################################


syntax highlighted by Code2HTML, v. 0.9.1