# forked version of B::Utils; needs to merge it ASAP package B::Utils; use 5.006; use warnings; use vars '$DEBUG'; our @EXPORT_OK = qw(all_starts all_roots anon_subs walkoptree_simple walkoptree_filtered walkallops_simple walkallops_filtered carp croak opgrep ); sub import { my $pack = __PACKAGE__; shift; my @exports = @_; my $caller = caller; my %EOK = map {$_ => 1} @EXPORT_OK; for (@exports) { unless ($EOK{$_}) { require Carp; Carp::croak(qq{"$_" is not exported by the $pack module}); } no strict 'refs'; *{"$caller\::$_"} = \&{"$pack\::$_"}; } } our $VERSION = '0.04_02'; # 0.04 with some Schwern patches use B qw(main_start main_root walksymtable class OPf_KIDS); my (%starts, %roots, @anon_subs); our @bad_stashes = qw(B Carp DB Exporter warnings Cwd Config CORE blib strict DynaLoader vars XSLoader AutoLoader base); sub null { my $op = shift; class( $op ) eq 'NULL'; } { my $_subsdone=0; sub _init { # To ensure runtimeness. return if $_subsdone; %starts = ( '__MAIN__' => main_start() ); %roots = ( '__MAIN__' => main_root() ); walksymtable(\%main::, '_push_starts', sub { return if scalar grep {$_[0] eq $_."::"} @bad_stashes; 1; }, # Do not eat our own children! ''); push @anon_subs, { root => $_->ROOT, start => $_->START} for grep { class($_) eq "CV" } B::main_cv->PADLIST->ARRAY->ARRAY; $_subsdone=1; } } =head1 NAME B::Utils - Helper functions for op tree manipulation =head1 SYNOPSIS use B::Utils; =head1 DESCRIPTION These functions make it easier to manipulate the op tree. =head1 FUNCTIONS =over 3 =item C =item C Returns a hash of all of the starting ops or root ops of optrees, keyed to subroutine name; the optree for main program is simply keyed to C<__MAIN__>. B: Certain "dangerous" stashes are not scanned for subroutines: the list of such stashes can be found in C<@B::Utils::bad_stashes>. Feel free to examine and/or modify this to suit your needs. The intention is that a simple program which uses no modules other than C and C would show no addition symbols. This does B return the details of ops in anonymous subroutines compiled at compile time. For instance, given $a = sub { ... }; the subroutine will not appear in the hash. This is just as well, since they're anonymous... If you want to get at them, use... =item C This returns an array of hash references. Each element has the keys "start" and "root". These are the starting and root ops of all of the anonymous subroutines in the program. =cut sub all_starts { _init(); return %starts; } sub all_roots { _init(); return %roots; } sub anon_subs { _init(); return @anon_subs } sub B::GV::_push_starts { my $name = $_[0]->STASH->NAME."::".$_[0]->SAFENAME; return unless ${$_[0]->CV}; my $cv = $_[0]->CV; if ($cv->PADLIST->can("ARRAY") and $cv->PADLIST->ARRAY and $cv->PADLIST->ARRAY->can("ARRAY")) { push @anon_subs, { root => $_->ROOT, start => $_->START} for grep { class($_) eq "CV" } $cv->PADLIST->ARRAY->ARRAY; } return unless ${$cv->START} and ${$cv->ROOT}; $starts{$name} = $cv->START; $roots{$name} = $cv->ROOT; }; sub B::SPECIAL::_push_starts{} =item C<< $op->oldname >> Returns the name of the op, even if it is currently optimized to null. This helps you understand the stucture of the op tree. =cut sub B::OP::oldname { return substr(B::ppname($_[0]->targ),3) if $_[0]->name eq "null" and $_[0]->targ; return $_[0]->name; } =item C<< $op->kids >> Returns an array of all this op's non-null children, in order. =cut sub B::OP::kids { my $op = shift; my @rv = (); foreach my $type (qw(first last other)) { my $kid = $op->$type(); next if !$kid || class($kid) eq 'NULL'; if( $kid->name eq 'null' ) { push @rv, $kid->kids; } else { push @rv, $kid; } } my @more_rv = (); foreach my $more_op (@rv) { my $next_op = $more_op; while( $next_op->can("sibling") ) { $next_op = $next_op->sibling; last if !$next_op || class($next_op) eq 'NULL'; if( $next_op->name eq 'null' ) { push @more_rv, $next_op->kids; } else { push @more_rv, $next_op; } } } return @rv, @more_rv; } =item C<< $op->first >> =item C<< $op->last >> =item C<< $op->other >> Normally if you call first, last or other on anything which is not an UNOP, BINOP or LOGOP respectivly it will die. This leads to lots of code like: $op->first if $op->can('first'); B::Utils provides every op with first, last and other methods which will simply return nothing if it isn't relevent. =cut foreach my $type (qw(first last other)) { no strict 'refs'; *{'B::OP::'.$type} = sub { my($op) = shift; if( $op->can("SUPER::$type") ) { return $op->$type(); } else { return; } } } =item C<< $op->parent >> Returns the parent node in the op tree, if possible. Currently "possible" means "if the tree has already been optimized"; that is, if we're during a C block. (and hence, if we have valid C pointers.) In the future, it may be possible to search for the parent before we have the C pointers in place, but it'll take me a while to figure out how to do that. =cut sub B::OP::parent { my $target = shift; printf( "parent %s %s=(0x%07x)\n", B::class( $target), $target->oldname, $$target ) if $DEBUG; die "I'm not sure how to do this yet. I'm sure there is a way. If you know, please email me." if (!$target->seq); my (%deadend, $search_kids); $search_kids = sub { my $node = shift || return undef; printf( "Searching from %s %s=(0x%07x)\n", class($node)||'?', $node->oldname, $$node ) if $DEBUG; # Go up a level if we've got stuck, and search (for the same # $target) from a higher vantage point. return $search->($node->parent) if exists $deadend{$node}; # Test the immediate children return $node if scalar grep {$_ == $target} $node->kids; # Recurse my $x; defined($x = $search->($_)) and return $x for $node->kids; # Not in this subtree. $deadend{$node}++; return undef; }; my $result; my $start = $target; $result = $search->($start) and return $result while $start = $start->next; return $search->($start); } =item C<< $op->previous >> Like C<< $op->next >>, but not quite. =cut sub B::OP::previous { my $target = shift; my $start = $target; my (%deadend, $search); $search = sub { my $node = shift || die; return $search->(find_parent($node)) if exists $deadend{$node}; return $node if $node->{next}==$target; # Recurse my $x; ($_->next == $target and return $_) for $node->kids; defined($x = $search->($_)) and return $x for $node->{kids}; # Not in this subtree. $deadend{$node}++; return undef; }; my $result; $result = $search->($start) and return $result while $start = $start->next; } =item walkoptree_simple($op, \&callback, [$data]) The C module provides various functions to walk the op tree, but they're all rather difficult to use, requiring you to inject methods into the C class. This is a very simple op tree walker with more expected semantics. The &callback is called at each op with the op itself passed in as the first argument and any additional $data as the second. All the C functions set C<$B::Utils::file> and C<$B::Utils::line> to the appropriate values of file and line number in the program being examined. Since only COPs contain this information it may be unavailable in the first few callback calls. =cut our ($file, $line); # Make sure we reset $file and $line between runs. sub walkoptree_simple { ($file, $line) = ('__none__', 0); _walkoptree_simple(@_); } sub _walkoptree_simple { my ($op, $callback, $data) = @_; ($file, $line) = ($op->file, $op->line) if $op->isa("B::COP"); $callback->($op,$data); if ($$op && ($op->flags & OPf_KIDS)) { my $kid; for ($kid = $op->first; $$kid; $kid = $kid->sibling) { _walkoptree_simple($kid, $callback, $data); } } } =item walkoptree_filtered($op, \&filter, \&callback, [$data]) This is much the same as C, but will only call the callback if the C returns true. The C is passed the op in question as a parameter; the C function is fantastic for building your own filters. =cut sub walkoptree_filtered { ($file, $line) = ('__none__', 0); _walkoptree_filtered(@_); } sub _walkoptree_filtered { my ($op, $filter, $callback, $data) = @_; ($file, $line) = ($op->file, $op->line) if $op->isa("B::COP"); $callback->($op,$data) if $filter->($op); if ($$op && ($op->flags & OPf_KIDS)) { my $kid; for ($kid = $op->first; $$kid; $kid = $kid->sibling) { _walkoptree_filtered($kid, $filter, $callback, $data); } } } =item walkallops_simple(\&callback, [$data]) This combines C with C and C to examine every op in the program. C<$B::Utils::sub> is set to the subroutine name if you're in a subroutine, C<__MAIN__> if you're in the main program and C<__ANON__> if you're in an anonymous subroutine. =cut our $sub; sub walkallops_simple { my ($callback, $data) = @_; _init(); for $sub (keys %roots) { walkoptree_simple($roots{$sub}, $callback, $data); } $sub = "__ANON__"; for (@anon_subs) { walkoptree_simple($_->{root}, $callback, $data); } } =item walkallops_filtered(\&filter, \&callback, [$data]) Same as above, but filtered. =cut sub walkallops_filtered { my ($filter, $callback, $data) = @_; _init(); for $sub (keys %roots) { walkoptree_filtered($roots{$sub}, $filter, $callback, $data); } $sub = "__ANON__"; for (@anon_subs) { walkoptree_filtered($_->{root}, $filter, $callback, $data); } } =item carp(@args) =item croak(@args) Warn and die, respectively, from the perspective of the position of the op in the program. Sounds complicated, but it's exactly the kind of error reporting you expect when you're grovelling through an op tree. =cut sub _preparewarn { my $args = join '', @_; $args = "Something's wrong " unless $args; $args .= " at $file line $line.\n" unless substr($args, length($args) -1) eq "\n"; } sub carp (@) { CORE::warn(_preparewarn(@_)) } sub croak (@) { CORE::die(_preparewarn(@_)) } =item opgrep(\%conditions, @ops) Returns the ops which meet the given conditions. The conditions should be specified like this: @barewords = opgrep( { name => "const", private => OPpCONST_BARE }, @ops ); You can specify alternation by giving an arrayref of values: @svs = opgrep ( { name => ["padsv", "gvsv"] }, @ops) And you can specify inversion by making the first element of the arrayref a "!". (Hint: if you want to say "anything", say "not nothing": C<["!"]>) You may also specify the conditions to be matched in nearby ops. walkallops_filtered( sub { opgrep( {name => "exec", next => { name => "nextstate", sibling => { name => [qw(! exit warn die)] } } }, @_)}, sub { carp("Statement unlikely to be reached"); carp("\t(Maybe you meant system() when you said exec()?)\n"); } ) Get that? Here are the things that can be tested: name targ type seq flags private pmflags pmpermflags first other last sibling next pmreplroot pmreplstart pmnext =cut sub opgrep { my ($cref, @ops) = @_; my %conds = %$cref; my @rv = (); OPLOOP: for my $o (grep defined, @ops) { # First, let's skim off ops of the wrong type. for my $type (qw(first other last pmreplroot pmreplstart pmnext pmflags pmpermflags)) { next OPLOOP if exists $conds{$type} and !$o->can($type); } for my $test (qw(name targ type seq flags private pmflags pmpermflags)) { next unless exists $conds{$test}; next OPLOOP unless $o->can($test); my @conds = ref $conds{$test} ? @{$conds{$test}} : $conds{$test}; if ($conds[0] eq "!") { my @conds = @{$conds{$test}}; shift @conds; next OPLOOP if grep {$o->$test eq $_} @conds; } else { next OPLOOP unless grep {$o->$test eq $_} @conds; } } for my $neighbour (qw(first other last sibling next pmreplroot pmreplstart pmnext)) { next unless exists $conds{$neighbour}; # We know it can, because we tested that above # Recurse, recurse! next OPLOOP unless opgrep($conds{$neighbour}, $o->$neighbour); } push @rv, $o; } return @rv; } package B::BUtils; @ISA = qw(B::Utils); 1; =back =head2 EXPORT None by default. =head1 AUTHOR Simon Cozens, C =head1 TODO I need to add more Fun Things, and possibly clean up some parts where the (previous/parent) algorithm has catastrophic cases, but it's more important to get this out right now than get it right. =head1 SEE ALSO L, L. =cut