package Text::Highlight::HTML;
use strict;
#TODO:
# 1) DONE - Convert some tag list into a hashref syntax tree for sub syntax
# 2) More distant goal: rewrite this not using HTML::SyntaxHighlighter
# as I'm not much of a fan of its highlighting method. I'd prefer
# html tags one color, attrib names another, and attrib vals as
# strings. This wouldn't highlight the > around the tags. Maybe
# this is just how whatever text editors I've used have done it,
# but it feels more "right".
#
# At the very least, I'd keep an option for using it as-is. This
# means creating some kind of optional "options" hashref that gets
# passed to individual T::H::foo methods. Maybe have one all should
# implement that uses the method builtin to T::H itself. Got some
# other shit to iron out first.
# 3) Looks like this method is html-escaping all text, which may not
# be the desired escape method. Something else to look into when
# poking here again.
#array of types, allows for span tag nesting
my @types = (undef);
#HTML::SyntaxHighlighter to Text::Highlight class translations
my %classes = (
'D' => 'key4', #DTD
'H' => 'key3', #html/head/body
'B' => 'key1', #block
'I' => 'key2', #inline
'A' => 'string', #attribute values
'T' => undef, #text
'S' => 'key5', #script/style
'C' => 'comment',
);
sub highlight
{
shift; #class method's class name
my $obj = shift;
my $code = shift;
eval {
require HTML::SyntaxHighlighter;
require HTML::Parser;
};
if ( $@ ) {
$obj->{_active} = __PACKAGE__->syntax;
$obj->_highlight($code);
return;
}
my $out;
my $p = HTML::SyntaxHighlighter->new(br => "", out_func => \$out);
$p->parse($code);
#HTML::Entity's decode_entities method doesn't seem to convert nbsp's into spaces
# (and some lazy html omits the semi-colon)
$out =~ s/ ?/ /g;
HTML::Parser->new(
api_version => 3,
handlers => {
start => [\&start, 'tagname,attr'],
end => [\&end, 'tagname'],
#colorize the decoded text as the type on top of the @types stack
text => [ sub { $obj->_colorize($types[-1], shift) }, 'dtext'],
},
)->parse($out);
}
#if it's a span tag, look up the tag's class for its Highlighter type and push it on the stack
sub start
{
return if shift ne 'span';
my $attr = shift;
push @types, exists $classes{$attr->{class}} ? $classes{$attr->{class}} : undef;
}
#if it's a span tag, pop the top type off the stack
sub end
{
pop @types if shift eq 'span';
}
sub syntax {
return {
'name' => 'HTML',
'blockCommentOn' => [
''
],
'escape' => '\\',
'continueQuote' => 0
};
}
1;
__END__