package File::Finder::Steps; our $VERSION = 0.53; use strict; use Carp qw(croak); =head1 NAME File::Finder::Steps - steps for File::Finder =head1 SYNOPSIS ## See File::Finder for normal use of steps ## subclassing example: BEGIN { package My::File::Finder; use base File::Finder; sub _steps_class { "My::File::Finder::Steps" } } BEGIN { package My::File::Finder::Steps; use base File::Finder::Steps; sub bigger_than { # true if bigger than N bytes my $self = shift; my $bytes = shift; return sub { -s > $bytes; } } } my $over_1k = My::File::Finder->bigger_than(1024); print "Temp files over 1k:\n"; $over_1k->ls->in("/tmp"); =head1 DESCRIPTION C provide the predicates being tested for C. =head2 STEPS METHODS These methods are called on a class or instance to add a "step". Each step adds itself to a list of steps, returning the new object. This allows you to chain steps together to form a formula. As in I, the default operator is "and", and short-circuiting is performed. =over =item or Like I's C. =cut sub or { return "or" } =item left Like a left parenthesis. Used in nesting pairs with C. =cut sub left { return "left" } BEGIN { *begin = \&left; } =item right Like a right parenthesis. Used in nesting pairs with C. For example: my $big_or_old = File::Finder ->type('f') ->left ->size("+100")->or->mtime("+90") ->right; find($big_or_old->ls, "/tmp"); You need parens because the "or" operator is lower precedence than the implied "and", for the same reason you need them here: find /tmp -type f '(' -size +100 -o -mtime +90 ')' -print Without the parens, the -type would bind to -size, and not to the choice of -size or -mtime. Mismatched parens will not be found until the formula is used, causing a fatal error. =cut sub right { return "right" } BEGIN { *end = \&right; } =item begin Alias for C. =item end Alias for C. =item not Like I's C. Prefix operator, can be placed in front of individual terms or open parens. Can be nested, but what's the point? # list all non-files in /tmp File::Finder->not->type('f')->ls->in("/tmp"); =cut sub not { return "not" } =item true Always returns true. Useful when a subexpression might fail, but you don't want the overall code to fail: ... ->left-> ...[might return false]... ->or->true->right-> ... Of course, this is the I command's idiom of: find .... '(' .... -o -true ')' ... =cut sub true { return sub { 1 } } =item false Always returns false. =cut sub false { return sub { 0 } } =item comma Like GNU I's ",". The result of the expression (or subexpression if in parens) up to this point is discarded, and execution continues afresh. Useful when a part of the expression is needed for its side effects, but shouldn't affect the rest of the "and"-ed chain. # list all files and dirs, but don't descend into CVS dir contents: File::Finder->type('d')->name('CVS')->prune->comma->ls->in('.'); =cut sub comma { return "comma" } # gnu extension =item follow Enables symlink following, and returns true. =cut sub follow { my $self = shift; $self->{options}{follow} = 1; return sub { 1 }; } =item name(NAME) True if basename matches NAME, which can be given as a glob pattern or a regular expression object: my $pm_files = File::Finder->name('*.pm')->in('.'); my $pm_files_too = File::Finder->name(qr/pm$/)->in('.'); =cut sub name { my $self = shift; my $name = shift; unless (UNIVERSAL::isa($name, "Regexp")) { require Text::Glob; $name = Text::Glob::glob_to_regex($name); } return sub { /$name/; }; } =item perm(PERMISSION) Like I's C<-perm>. Leading "-" means "all of these bits". Leading "+" means "any of these bits". Value is de-octalized if a leading 0 is present, which is likely only if it's being passed as a string. my $files = File::Finder->type('f'); # find files that are exactly mode 644 my $files_644 = $files->perm(0644); # find files that are at least world executable: my $files_world_exec = $files->perm("-1"); # find files that have some executable bit set: my $files_exec = $files->perm("+0111"); =cut sub perm { my $self = shift; my $perm = shift; $perm =~ /^(\+|-)?\d+\z/ or croak "bad permissions $perm"; if ($perm =~ s/^-//) { $perm = oct($perm) if $perm =~ /^0/; return sub { ((stat _)[2] & $perm) == $perm; }; } elsif ($perm =~ s/^\+//) { # gnu extension $perm = oct($perm) if $perm =~ /^0/; return sub { ((stat _)[2] & $perm); }; } else { $perm = oct($perm) if $perm =~ /^0/; return sub { ((stat _)[2] & 0777) == $perm; }; } } =item type(TYPE) Like I's C<-type>. All native Perl types are supported. Note that C is a socket, mapping to Perl's C<-S>, to be consistent with I. Returns true or false, as appropriate. =cut BEGIN { my %typecast; sub type { my $self = shift; my $type = shift; $type =~ /^[a-z]\z/i or croak "bad type $type"; $type =~ s/s/S/; $typecast{$type} ||= eval "sub { -$type _ }"; } } =item print Prints the fullname to C, followed by a newline. Returns true. =cut sub print { return sub { print $File::Find::name, "\n"; 1; }; } =item print0 Prints the fullname to C, followed by a NUL. Returns true. =cut sub print0 { return sub { print $File::Find::name, "\0"; 1; }; } =item fstype Not implemented yet. =item user(USERNAME|UID) True if the owner is USERNAME or UID. =cut sub user { my $self = shift; my $user = shift; my $uid = ($user =~ /^\d+\z/) ? $user : _user_to_uid($user); die "bad user $user" unless defined $uid; return sub { (stat _)[4] == $uid; }; } =item group(GROUPNAME|GID) True if the group is GROUPNAME or GID. =cut sub group { my $self = shift; my $group = shift; my $gid = ($group =~ /^\d+\z/) ? $group : _group_to_gid($group); die "bad group $gid" unless defined $gid; return sub { (stat _)[5] == $gid; }; } =item nouser True if the entry doesn't belong to any known user. =cut sub nouser { return sub { CORE::not defined _uid_to_user((stat _)[4]); } } =item nogroup True if the entry doesn't belong to any known group. =cut sub nogroup { return sub { CORE::not defined _gid_to_group((stat _)[5]); } } =item links( +/- N ) Like I's C<-links N>. Leading plus means "more than", minus means "less than". =cut sub links { my $self = shift; my ($prefix, $n) = shift =~ /^(\+|-|)(.*)/; return sub { _n($prefix, $n, (stat(_))[3]); }; } =item inum( +/- N ) True if the inode number meets the qualification. =cut sub inum { my $self = shift; my ($prefix, $n) = shift =~ /^(\+|-|)(.*)/; return sub { _n($prefix, $n, (stat(_))[1]); }; } =item size( +/- N [c/k]) True if the file size meets the qualification. By default, N is in half-K blocks. Append a trailing "k" to the number to indicate 1K blocks, or "c" to indicate characters (bytes). =cut sub size { my $self = shift; my ($prefix, $n) = shift =~ /^(\+|-|)(.*)/; if ($n =~ s/c\z//) { return sub { _n($prefix, $n, int(-s _)); }; } if ($n =~ s/k\z//) { return sub { _n($prefix, $n, int(((-s _)+1023) / 1024)); }; } return sub { _n($prefix, $n, int(((-s _)+511) / 512)); }; } =item atime( +/- N ) True if access time (in days) meets the qualification. =cut sub atime { my $self = shift; my ($prefix, $n) = shift =~ /^(\+|-|)(.*)/; return sub { _n($prefix, $n, int(-A _)); }; } =item mtime( +/- N ) True if modification time (in days) meets the qualification. =cut sub mtime { my $self = shift; my ($prefix, $n) = shift =~ /^(\+|-|)(.*)/; return sub { _n($prefix, $n, int(-M _)); }; } =item ctime( +/- N ) True if inode change time (in days) meets the qualification. =cut sub ctime { my $self = shift; my ($prefix, $n) = shift =~ /^(\+|-|)(.*)/; return sub { _n($prefix, $n, int(-C _)); }; } =item exec(@COMMAND) Forks the child process via C. Any appearance of C<{}> in any argument is replaced by the current filename. Returns true if the child exit status is 0. The list is passed directly to C, so if it's a single arg, it can contain C syntax. Otherwise, it's a pre-parsed command that must be found on the PATH. Note that I couldn't figure out how to horse around with the current directory very well, so I'm using C<$_> here instead of the more traditional C. It still works, because we're still chdir'ed down into the directory, but it looks weird on a trace. Trigger C in C if you want a traditional I full path. my $f = File::Finder->exec('ls', '-ldg', '{}'); find({ no_chdir => 1, wanted => $f }, @starting_dirs); Yeah, it'd be trivial for me to add a no_chdir method. Soon. =cut sub exec { my $self = shift; my @command = @_; return sub { my @mapped = @command; for my $one (@mapped) { $one =~ s/{}/$_/g; } system @mapped; return !$?; }; } =item ok(@COMMAND) Like C, but displays the command line first, and waits for a response. If the response begins with C or C, runs the command. If the command fails, or the response wasn't yes, returns false, otherwise true. =cut sub ok { my $self = shift; my @command = @_; return sub { my @mapped = @command; for my $one (@mapped) { $one =~ s/{}/$_/g; } my $old = select(STDOUT); $|++; print "@mapped? "; select $old; return 0 unless =~ /^y/i; system @mapped; return !$?; }; } =item prune Sets C<$File::Find::prune>, and returns true. =cut sub prune { return sub { $File::Find::prune = 1 }; } =item xdev Not yet implemented. =item newer Not yet implemented. =item eval(CODEREF) Ah yes, the master escape, with extra benefits. Give it a coderef, and it evaluates that code at the proper time. The return value is noted for true/false and used accordingly. my $blaster = File::Finder->atime("+30")->eval(sub { unlink }); But wait, there's more. If the parameter is an object that responds to C, that method is automatically called, hoping for a coderef return. This neat feature allows subroutines to be created and nested: my $old = File::Finder->atime("+30"); my $big = File::Finder->size("+100"); my $old_or_big = File::Finder->eval($old)->or->eval($big); my $killer = File::Finder->eval(sub { unlink }); my $kill_old_or_big = File::Finder->eval($old_or_big)->ls->eval($killer); $kill_old_or_big->in('/tmp'); Almost too cool for words. =cut sub eval { my $self = shift; my $eval = shift; ## if this is another File::Finder object... then cheat: $eval = $eval->as_wanted if UNIVERSAL::can($eval, "as_wanted"); return $eval; # just reuse the coderef } =item depth Like I's C<-depth>. Sets a flag for C, and returns true. =cut sub depth { my $self = shift; $self->{options}{bydepth} = 1; return sub { 1 }; } =item ls Like I's C<-ls>. Performs a C on the entry to C (without forking), and returns true. =cut sub ls { return \&_ls; } =item tar Not yet implemented. =item [n]cpio Not yet implemented. =item ffr($ffr_object) Incorporate a C object as a step. Note that this must be a rule object, and not a result, so don't call or pass C. For example, using C to define a predicate for image files that are bigger than a megapixel in my friends folder, I get: require File::Finder; require File::Find::Rule; require File::Find::Rule::ImageSize; my $ffr = File::Find::Rule->file->image_x('>1000')->image_y('>1000'); my @big_friends = File::Finder->ffr($ffr) ->in("/Users/merlyn/Pictures/Sorted/Friends"); =cut sub ffr { my $self = shift; my $ffr_object = shift; my $their_wanted; no warnings; local *File::Find::find = sub { my ($options) = @_; for (my ($k, $v) = each %$options) { if ($k eq "wanted") { $their_wanted = $v; } else { $self->{options}->{$k} = $v; } } }; $ffr_object->in("/DUMMY"); # boom! croak "no wanted defined" unless defined $their_wanted; return $their_wanted; } =item contains(pattern) True if the file contains C (either a literal string treated as a regex, or a true regex object). my $plugh_files = File::Finder->type('f')->contains(qr/plugh/); Searching is performed on a line-by-line basis, respecting the current value of C<$/>. =cut sub contains { my $self = shift; my $pat = shift; return sub { open my $f, "<$_" or return 0; while (<$f>) { return 1 if /$pat/; } return 0; }; } =back =head2 EXTENDING A step consists of a compile-time and a run-time component. During the creation of a C object, step methods are called as if they were methods against the slowly-growing C instance, including any additional parameters as in a normal method call. The step is expected to return a coderef (possibly a closure) to be executed at run-time. When a C object is being evaluated as the C C routine, the collected coderefs are evaluated in sequence, again as method calls against the C object. No additional parameters are passed. However, the normal C values are available, such as C<$_>, C<$File::Find::name>, and so on. The C<_> pseudo-handle has been set properly, so you can safely use C<-X> filetests and C against the pseudo-handle. The routine is expected to return a true/false value, which becomes the value of the step. Although a C object is passed both to the compile-time invocation and the resulting run-time invocation, only the C self-hash element is properly duplicated through the cloning process. Do not be tempted to add additional self-hash elements without overriding C's C<_clone>. Instead, pass values from the compile-time phase to the run-time phase using closure variables, as shown in the synopsis. For simplicity, you can also just mix-in your methods to the existing C class, rather than subclassing both classes as shown above. However, this may result in conflicting implementations of a given step name, so beware. =head1 SEE ALSO L =head1 BUGS None known yet. =head1 AUTHOR Randal L. Schwartz, Emerlyn@stonehenge.comE =head1 COPYRIGHT AND LICENSE Copyright (C) 2003,2004 by Randal L. Schwartz, Stonehenge Consulting Services, Inc. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.2 or, at your option, any later version of Perl 5 you may have available. =cut ## utility subroutines sub _n { my ($prefix, $arg, $value) = @_; if ($prefix eq "+") { $value > $arg; } elsif ($prefix eq "-") { $value < $arg; } else { $value == $arg; } } BEGIN { my %user_to_uid; my %uid_to_user; my $initialize = sub { while (my ($user, $pw, $uid) = getpwent) { $user_to_uid{$user} = $uid; $uid_to_user{$uid} = $user; } }; sub _user_to_uid { my $user = shift; %user_to_uid or $initialize->(); $user_to_uid{$user}; } sub _uid_to_user { my $uid = shift; %uid_to_user or $initialize->(); $uid_to_user{$uid}; } } BEGIN { my %group_to_gid; my %gid_to_group; my $initialize = sub { while (my ($group, $pw, $gid) = getgrent) { $group_to_gid{$group} = $gid; $gid_to_group{$gid} = $group; } }; sub _group_to_gid { my $group = shift; %group_to_gid or $initialize->(); $group_to_gid{$group}; } sub _gid_to_group { my $gid = shift; %gid_to_group or $initialize->(); $gid_to_group{$gid}; } } BEGIN { ## from find2perl my @rwx = qw(--- --x -w- -wx r-- r-x rw- rwx); my @moname = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); sub _sizemm { my $rdev = shift; sprintf("%3d, %3d", ($rdev >> 8) & 0xff, $rdev & 0xff); } sub _ls { my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat(_); my $pname = $File::Find::name; $blocks or $blocks = int(($size + 1023) / 1024); my $perms = $rwx[$mode & 7]; $mode >>= 3; $perms = $rwx[$mode & 7] . $perms; $mode >>= 3; $perms = $rwx[$mode & 7] . $perms; substr($perms, 2, 1) =~ tr/-x/Ss/ if -u _; substr($perms, 5, 1) =~ tr/-x/Ss/ if -g _; substr($perms, 8, 1) =~ tr/-x/Tt/ if -k _; if (-f _) { $perms = '-' . $perms; } elsif (-d _) { $perms = 'd' . $perms; } elsif (-l _) { $perms = 'l' . $perms; $pname .= ' -> ' . readlink($_); } elsif (-c _) { $perms = 'c' . $perms; $size = _sizemm($rdev); } elsif (-b _) { $perms = 'b' . $perms; $size = _sizemm($rdev); } elsif (-p _) { $perms = 'p' . $perms; } elsif (-S _) { $perms = 's' . $perms; } else { $perms = '?' . $perms; } my $user = _uid_to_user($uid) || $uid; my $group = _gid_to_group($gid) || $gid; my ($sec,$min,$hour,$mday,$mon,$timeyear) = localtime($mtime); if (-M _ > 365.25 / 2) { $timeyear += 1900; } else { $timeyear = sprintf("%02d:%02d", $hour, $min); } printf "%5lu %4ld %-10s %3d %-8s %-8s %8s %s %2d %5s %s\n", $ino, $blocks, $perms, $nlink, $user, $group, $size, $moname[$mon], $mday, $timeyear, $pname; 1; } } 1; __END__