#$Id: Cache.pm 46153 2007-10-19 00:26:07Z wsnyder $
######################################################################
#
# This program is Copyright 2002-2007 by Wilson Snyder.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of either the GNU General Public License or the
# Perl Artistic License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
######################################################################
package Make::Cache;
use IO::File;
use IO::Pipe;
use IO::Dir;
use Data::Dumper;
use File::Copy;
use File::Path;
use Storable;
use Digest::MD5;
use Sys::Hostname;
use Cwd;
use Data::Dumper; $Data::Dumper::Indent=1;
use Make::Cache::Hash;
use Make::Cache::Runtime;
use Carp;
use strict;
our $Debug;
our $VERSION = '1.043';
######################################################################
#### Creators
sub new {
my $class = shift;
my @inherit = %{$class} if ref $class;
my $self = {
# Global Defaults
#dir => "path_to_cache_dir",
read=>1, # Allow cache to have read hits
write=>1, # Allow cache to write updated status
clean_delay=>(24*60*60), # After this many seconds, clean may delete the file
pwd=>Cwd::getcwd(),
file_env => [ # List of global mnemonic and local value for global_filename funcs (not hash-can be duplicates)
PWD => Cwd::getcwd(),
],
# Per-file elements
tgts_lcl => [],
deps_lcl => [],
cmds_lcl => [],
flags_lcl => [],
@inherit,
@_,
};
$self->{dir} or croak "%Error: Need to specify repository dir,";
($self->{dir}=~/^\//) or croak "%Error: Repository dir must be absolute path,";
bless $self, ref($class)||$class;
}
sub clear_hash_cache {
Make::Cache::Hash::clear_cache();
}
#######################################################################
# Accessors
sub cmds_lcl {
my $self = shift;
# List of commands to be executed
push @{$self->{cmds_lcl}}, @_ if defined $_[0];
return @{$self->{cmds_lcl}};
}
sub flags_lcl {
my $self = shift;
# List of commands to be hashed, thus is generally cmds_lcl minus -D/-U/-I switches
push @{$self->{flags_lcl}}, @_ if defined $_[0];
return @{$self->{flags_lcl}};
}
sub deps_lcl {
my $self = shift;
# List of filename dependencies used to create the targets
push @{$self->{deps_lcl}}, @_ if defined $_[0];
return @{$self->{deps_lcl}};
}
sub tgts_lcl {
my $self = shift;
# List of filename targets to be cached
push @{$self->{tgts_lcl}}, @_ if defined $_[0];
return @{$self->{tgts_lcl}};
}
sub cmds_gbl {
my $self = shift;
return map {$self->global_filename($_);} $self->cmds_lcl;
}
sub deps_gbl {
my $self = shift;
return nodup(map {$self->global_filename($_);} $self->deps_lcl);
}
sub flags_gbl {
my $self = shift;
return nodup(map {$self->global_filename($_);} $self->flags_lcl);
}
sub tgts_gbl {
my $self = shift;
return nodup(map {$self->global_filename($_);} $self->tgts_lcl);
}
sub tgts_name_digest {
my $self = shift;
# Internal; create digest based on target names
$self->{tgts_name_digest} ||= Make::Cache::Hash::hash (text=>[ $self->tgts_gbl ]);
return $self->{tgts_name_digest};
}
sub runtime_key_digest {
my $self = shift;
# Internal; create digest based on target names and executable
$self->{runtime_key_digest} ||= Make::Cache::Hash::hash (text=>[ $self->tgts_gbl, $self->{cmds_exec} ]);
return $self->{runtime_key_digest};
}
sub deps_data_digest {
my $self = shift;
my $hash = $self->deps_fixed_digest;
# Internal; create digest based on target contents
if (!$hash) {
my @dep_to_do = nodup($self->deps_lcl);
@dep_to_do = sort(@dep_to_do);
$hash = Make::Cache::Hash::hash (filenames=>[ @dep_to_do ],
text=>[ $self->flags_gbl ],);
#print "\tHash $hash\n\tof ",(join ' ',@dep_to_do),"\n\tCmd=",(join ' ',$self->flags_gbl), "\n" if $Debug;
}
return $hash;
}
sub deps_fixed_digest {
# If defined, this returns a digest which is known to contain all relevant
# source hash information, and thus avoids having to do read dependencies
# as each cache entry is compared.
return undef;
}
sub nodup {
my @o;
my %o;
foreach (@_) {
next if $o{$_};
$o{$_} = 1;
push @o, $_;
}
return @o;
}
#######################################################################
# Methods, may be overloaded
sub global_filename {
my $self = shift;
my $filename = shift;
for (my $i=0; $i<=$#{$self->{file_env}}; $i+=2) {
my ($key, $val) = ($self->{file_env}[$i], $self->{file_env}[$i+1]);
$filename =~ s/^$val/\$\$${key}\$\$/;
}
return $filename;
}
sub local_filename {
my $self = shift;
my $filename = shift;
for (my $i=0; $i<=$#{$self->{file_env}}; $i+=2) {
my ($key, $val) = ($self->{file_env}[$i], $self->{file_env}[$i+1]);
$filename =~ s/^\$\$${key}\$\$/$val/;
}
return $filename;
}
#######################################################################
# Digest writing
sub write {
my $self = shift;
# We don't clear the cache, this means we almost certainly have the hashes of
# the source code when we started the process. This is safer, as it allows for
# the user to change the code while running a build, and not caching the wrong
# results.
# What should we put into the file?
my $store = {
deps_gbl => [$self->deps_gbl], # Dependencies expressed in local filesystem filenames
flags_gbl => [$self->flags_gbl],# Command to be hashed (cmds_gbl minus flags that don't matter)
tgts_gbl => [$self->tgts_gbl], # Targets expressed in local filesystem filenames
deps_data_digest => $self->deps_data_digest,
tgts_name_digest => $self->tgts_name_digest,
time => time(),
pwd => $self->{pwd},
};
#print Dumper($store) if $Debug;
# Write it out
if (!-w $self->{dir}) {
mkpath $self->{dir}, 0777;
if (!-w $self->{dir}) {
warn ("%Warning: Cache.pm: Cannot write dir '$self->{dir}' to cache '$store->{tgts_gbl}[0]'.\n");
return;
}
}
if (my $tgt = $self->tgts_missing) {
warn "%Warning: Cache.pm: Expected target file wasn't generated: $tgt\n";
return;
}
(my $digPrefix = $store->{tgts_name_digest}) =~ s/^(..).*$/$1/;
my $pathtgt = "$self->{dir}/$digPrefix/$store->{tgts_name_digest}";
mkpath ($pathtgt, 0, 0777);
my $pathfile = "$pathtgt/$store->{deps_data_digest}";
Storable::nstore ($store, "$pathfile.digest.new$$");
chmod (0666, "$pathfile.digest.new$$");
for (my $n=0; $n<=$#{$store->{tgts_gbl}}; $n++) {
my $tgt = $self->{tgts_lcl}[$n];
my $to = "$pathfile.t${n}.new$$";
print " cp $tgt $to\n" if $Debug;
copy ($tgt, "$pathfile.t${n}.new$$");
}
# Do the copy as one atomic op to reduce race case with another reader
for (my $n=0; $n<=$#{$store->{tgts_gbl}}; $n++) {
move ("$pathfile.t${n}.new$$", "$pathfile.t${n}");
}
move ("$pathfile.digest.new$$", "$pathfile.digest");
}
########################################################################
## Digest reading
sub restore {
my $self = shift or return;
# Call this with the results of a find_hit
# Restores the cached files.
my $pathfile = $self->{hit_filename} or die "%Error: Not called on a hit object,";
# Update the digest date, so cleaning won't consider it unsed
touch("${pathfile}.digest");
my $missing;
for (my $n=0; $n<=$#{$self->{tgts_lcl}}; $n++) {
my $tgt = $self->{tgts_lcl}[$n];
my $from = "$pathfile.t${n}";
if (!-r $from) { $missing=$from; last; }
# Can't hard link, as might be on different file system
unlink $tgt;
if ($self->{link}) {
print " ln -s $from $tgt\n" if $Debug;
symlink $from, $tgt or die "objcache: %Error: Can't ln -s $from $tgt: $!\n";
} else {
print " cp $from $tgt\n" if $Debug;
copy $from, $tgt or die "objcache: %Error: Can't cp $from $tgt: $!\n";
}
touch($tgt);
}
if ($missing) {
# Recover nicely by blowing away the cache entry
warn "-Info: Cache.pm: Object cache files missing for $missing\n";
unlink ("$pathfile.digest");
for (my $n=0; $n<=$#{$self->{tgts_lcl}}; $n++) {
my $from = "$pathfile.t${n}";
unlink $from;
}
return undef;
}
return $self;
}
sub find_hit {
my $self = shift;
# Return new Make::Cache reference to the cache hit on ANY ENTRY for this file
# Or undef if we're unlucky
(my $digPrefix = $self->tgts_name_digest) =~ s/^(..).*$/$1/;
my $pathtgt = "$self->{dir}/$digPrefix/".$self->tgts_name_digest;
my $dfixed = $self->deps_fixed_digest;
if ($dfixed) {
my $file = "$pathtgt/$dfixed";
print "Digests_read_fixed $file\n" if $Debug;
if (-r "$file.digest") {
return $self->_find_hit_ent ($file);
}
}
else {
print "Digests_read_dir $pathtgt\n" if $Debug;
my $dir = new IO::Dir $pathtgt or return undef;
while (defined(my $basefile = $dir->read)) {
my $file = "$pathtgt/$basefile";
if ($file =~ s/\.digest$//) {
if (my $hitref = $self->_find_hit_ent ($file)) {
return $hitref;
}
}
}
}
return undef;
}
sub _find_hit_ent {
my $self = shift;
my $pathfile = shift;
# Return new object if there is a cache hit for THIS ENTRY
print " Try $pathfile\n" if $Debug;
my $digestref = Storable::retrieve("${pathfile}.digest");
my @dl = map {$self->local_filename($_);} @{$digestref->{deps_gbl}};
my @tl = map {$self->local_filename($_);} @{$digestref->{tgts_gbl}};
my $hitref = $self->new(deps_lcl => [@dl],
tgts_lcl => [@tl],
#flags_lcl => [], # Same as what we searched for
hit_filename => $pathfile,
);
#print Dumper($hitref) if $Debug;
my $hithash = $hitref->deps_data_digest;
if (!$hithash) {
print "\tMiss, dep doesn't exist\n" if $Debug;
return undef;
}
if ($hithash ne $digestref->{deps_data_digest}) {
print "\tMiss, digest mismatch ",$hithash," incache ",$digestref->{deps_data_digest},"\n" if $Debug;
return undef;
}
print "\tDepend MATCH ${pathfile}\n" if $Debug;
return $hitref;
}
#######################################################################
# Utilities
sub tgts_missing {
my $self = shift || die;
# Return filename if all target files don't exist
foreach my $tgt ($self->tgts_lcl) {
return $tgt if (!-r $tgt);
}
return undef;
}
sub tgts_unlink {
my $self = shift || die;
# Unlink any targets that exist
foreach my $tgt ($self->tgts_lcl) {
unlink $tgt;
}
}
sub touch {
my $file = shift;
# There is a bug in redhat 7.2 where utime doesn't work
#my $now = time; utime $now, $now, $file or carp "%Warn: Can't touch $file,";
my $buf;
my $fh = IO::File->new($file,"r+");
$fh->sysread($buf,1);
$fh->sysseek(0,0);
$fh->syswrite($buf,1);
$fh->close();
}
#######################################################################
# Cleaning
sub clean {
my $self = shift || die;
# Clean all old files in the repository
my $purgetime = time() - $self->{clean_delay};
$self->_clean_dir($purgetime, $self->{dir});
}
sub _clean_dir {
my $self = shift;
my $purgetime = shift;
my $path = shift;
# Clean one dir, return true if files exist
my $dir = new IO::Dir $path or return;
my $keepdir = 0;
while (defined(my $basefile = $dir->read)) {
my $file = "$path/$basefile";
next if ($basefile eq "." || $basefile eq "..");
if (-d $file) {
if (_clean_dir ($self, $purgetime, $file)) {
$keepdir = 1;
} else {
print " Removing old dir $file\n" if $Debug;
eval { rmtree $file,0,1; } # Quietly...
}
} elsif ($file =~ /\.digest$/) {
# We look at modtime, as it's faster and prevents death if it's malformed
my $mtime = (stat($file))[9];
if ($mtime && $mtime > $purgetime) {
$keepdir = 1;
} else {
print " Removing old file $file\n" if $Debug;
unlink $file;
for (my $i=0; $i<10; $i++) {
(my $tfile = $file) =~ s/\.digest$/.t${i}/;
print " Removing old file $tfile\n" if $Debug;
unlink $tfile;
}
}
}
}
#print "Depend_clean_dir $keepdir $path\n" if $Debug;
return $keepdir;
}
#######################################################################
# Dump
sub dump {
my $self = shift;
my $infor = {
dumpone => 1,
rm => 0, # If set, show the command to clean it up
@_,
};
_dump_recurse($self,$infor, $self->{dir});
printf +("\t%-11s %-11s %3s %3s %-32s %-5s %-20s %s\n",
"CREATED",
"LAST_HIT",
"Tgt",
"Dep",
"Hash",
"Cmd",
"Tgt[0]",
($infor->{rm}?"RM":"Pwd"),
);
foreach (sort(keys %{$infor->{lines}})) { print $infor->{lines}{$_}; }
}
sub _dump_recurse {
my $self = shift;
my $infor = shift;
my $path = shift;
my $dir = new IO::Dir $path or return;
my $n=0;
while (defined(my $basefile = $dir->read)) {
my $file = "$path/$basefile";
if ($basefile ne "." && $basefile ne ".." && -d $file) {
_dump_recurse($self,$infor,$file);
}
elsif ($file =~ /\.digest$/) {
my $persistref = Storable::retrieve($file);
print "$file\n" if $Debug;
if ($infor->{dumpone}) {
#print Dumper($persistref);
$infor->{dumpone} = 0;
}
my $cdate; my $mdate;
{my $date = (stat($file))[9];
my ($a,$b,$c,$day,$month,$year,$d,$e,$f) = localtime($date);
$mdate = sprintf("%04d/%02d/%02d", $year+1900,$month+1,$day);}
{my ($a,$b,$c,$day,$month,$year,$d,$e,$f) = localtime($persistref->{time});
$cdate = sprintf("%04d/%02d/%02d", $year+1900,$month+1,$day);}
(my $showfile = $basefile) =~ s/\.digest//;
(my $key = $persistref->{tgts_gbl}[0]) =~ s/.*\///;
(my $cc = $persistref->{flags_gbl}[0]) =~ s/.*\///;
# Convert old cache repository to new form
$persistref->{tgts_gbl} ||= [$persistref->{tgtfile}];
$persistref->{deps_gbl} ||= $persistref->{depends};
(my $rm = "/bin/rm $file") =~ s/\.digest$/*/;
$infor->{lines}{$key."_".$mdate.$n++}
= sprintf ("\t%-11s %-11s %3d %3d %-32s %-5s %-20s %s\n",
$cdate,
$mdate,
$#{$persistref->{tgts_gbl}}+1,
$#{$persistref->{deps_gbl}}+1,
$showfile,
$cc,
$key,
($infor->{rm} ? $rm : $persistref->{pwd}),
);
}
}
}
#######################################################################
1;
__END__
=pod
=head1 NAME
Make::Cache - Caching of object and test run information
=head1 SYNOPSIS
my $oc = Make::Cache->new (...)
$oc->write
if (my $hit = $oc->find_hit) {
$hit->restore;
}
$oc->dump
=head1 DESCRIPTION
Make::Cache is used to accelerate the generation of makefile targets.
When a target is to be created, it is looked up in the cache. On a miss,
the hash of the source files, and all the generated targets are stored.
Next time the target is needed, the cache will hit, and the target files
may be retrieved from the cache instead of being regenerated.
The Make::Cache class is generally used as a base class for more specific
classes.
=head1 LOCAL TO GLOBAL
Make::Cache converts local filenames to global filenames. This gets around
the problem of compile lines similar to
gcc /user/a/home_dir/foo.c
resulting in cache misses, because someone else compiled the exact same source
in a different directory
gcc /user/b/home_dir/foo.c
To avoid this, all filenames are converted to global format, by using a
file_env list passed in the new constructor. By default, this converts the
current working directory (cwd) to $CWD. Using the examples above, the gcc
command line would be hashed as if the user typed:
gcc $$CWD$$/foo.c
Which is identical for both users, and thus will result in cache hits.
=head1 FUNCTIONS
=over 4
=item new
Create a new Cache::Hash object. Named parameters may be specified:
=over 4
=item clean_delay
Number of seconds a object must be in age before clean() may delete it.
Default is one day (24*60*60).
=item file_env
List of environment variables for substitution, see LOCAL TO GLOBAL
section.
=item read
Defaults true to enable finding hits in the cache.
=item write
Defaults true to enable writing updates to the cache.
=back
=item clear_hash_cache
If a source file changes, this function must be called to clear out a
temporary internal cache.
=item clean
Clean the cache. See the clean_delay variable.
=item cmds_gbl
Return list of commands, relative to all users. This is used in hashing;
see LOCAL TO GLOBAL section.
=item cmds_lcl
Return list of commands, relative to the local user, that should be
executed. With parameter, add the specified command.
=item deps_gbl
Return list of source dependency filenames, relative to all users. This is
used in hashing; see LOCAL TO GLOBAL section.
=item deps_lcl
Return list of source dependency filenames, relative to the local user,
that should be executed. With parameter, add the specified filename.
=item
For debugging, dump the current contents of the compile cache.
=item flags_gbl
Return list of compile flags, relative to all users. This is used in
hashing; see LOCAL TO GLOBAL section.
=item flags_lcl
Return list of compile flags, relative to the local user, that should be
executed. With parameter, add the specified flag.
=item global_filename
Given a local filename, return the filename in global format. See LOCAL TO
GLOBAL section.
=item local_filename
Given a global filename, return the filename in local format. See LOCAL TO
GLOBAL section.
=item nodup
Given a input list, return the list with any duplicates removed.
=item tgts_gbl
Return list of target filenames, relative to all users. This is used in
hashing; see LOCAL TO GLOBAL section.
=item tgts_lcl
Return list of target filenames, relative to the local user, that should be
executed. With parameter, add the specified filename.
=item tgts_missing
Return the filename of any local targets that are not currently on disk.
Return undef if all targets exist.
=item tgts_unlink
Remove all local target files.
=item write
Write the cache digest.
=item restore (object_from_find_hit)
Given a object returned from find_hit, restore any target files to the
local compile area.
=back
=head1 DISTRIBUTION
The latest version is available from CPAN and from L.
Copyright 2000-2007 by Wilson Snyder. This package is free software; you
can redistribute it and/or modify it under the terms of either the GNU
Lesser General Public License or the Perl Artistic License.
=head1 AUTHORS
Wilson Snyder
=head1 SEE ALSO
L, L, L,
L, L
=cut
######################################################################