package Text::Highlight::HTML; use strict; #TODO: # 1) DONE - Convert some tag list into a hashref syntax tree for sub syntax # 2) More distant goal: rewrite this not using HTML::SyntaxHighlighter # as I'm not much of a fan of its highlighting method. I'd prefer # html tags one color, attrib names another, and attrib vals as # strings. This wouldn't highlight the around the tags. Maybe # this is just how whatever text editors I've used have done it, # but it feels more "right". # # At the very least, I'd keep an option for using it as-is. This # means creating some kind of optional "options" hashref that gets # passed to individual T::H::foo methods. Maybe have one all should # implement that uses the method builtin to T::H itself. Got some # other shit to iron out first. # 3) Looks like this method is html-escaping all text, which may not # be the desired escape method. Something else to look into when # poking here again. #array of types, allows for span tag nesting my @types = (undef); #HTML::SyntaxHighlighter to Text::Highlight class translations my %classes = ( 'D' => 'key4', #DTD 'H' => 'key3', #html/head/body 'B' => 'key1', #block 'I' => 'key2', #inline 'A' => 'string', #attribute values 'T' => undef, #text 'S' => 'key5', #script/style 'C' => 'comment', ); sub highlight { shift; #class method's class name my $obj = shift; my $code = shift; eval { require HTML::SyntaxHighlighter; require HTML::Parser; }; if ( $@ ) { $obj->{_active} = __PACKAGE__->syntax; $obj->_highlight($code); return; } my $out; my $p = HTML::SyntaxHighlighter->new(br => "", out_func => \$out); $p->parse($code); #HTML::Entity's decode_entities method doesn't seem to convert nbsp's into spaces # (and some lazy html omits the semi-colon) $out =~ s/ ?/ /g; HTML::Parser->new( api_version => 3, handlers => { start => [\&start, 'tagname,attr'], end => [\&end, 'tagname'], #colorize the decoded text as the type on top of the @types stack text => [ sub { $obj->_colorize($types[-1], shift) }, 'dtext'], }, )->parse($out); } #if it's a span tag, look up the tag's class for its Highlighter type and push it on the stack sub start { return if shift ne 'span'; my $attr = shift; push @types, exists $classes{$attr->{class}} ? $classes{$attr->{class}} : undef; } #if it's a span tag, pop the top type off the stack sub end { pop @types if shift eq 'span'; } sub syntax { return { 'name' => 'HTML', 'blockCommentOn' => [ '' ], 'escape' => '\\', 'continueQuote' => 0 }; } 1; __END__