package Search::OpenFTS::Parser; use strict; use Carp; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $AUTOLOAD); @ISA = qw(Exporter DynaLoader); $VERSION = '0.39'; require Exporter; require DynaLoader; require AutoLoader; sub AUTOLOAD { my $constname; ( $constname = $AUTOLOAD ) =~ s/.*:://; croak "& not defined" if $constname eq 'constant'; my $val = constant( $constname, @_ ? $_[0] : 0 ); if ( $! != 0 ) { if ( $! =~ /Invalid/ ) { $AutoLoader::AUTOLOAD = $AUTOLOAD; goto &AutoLoader::AUTOLOAD; } else { croak "Your vendor has not defined Lingua::Morph::UDI macro $constname"; } } no strict 'refs'; *$AUTOLOAD = sub () { $val }; goto &$AUTOLOAD; } =head1 NAME Search::OpenFTS::Parser - Provides functions for parsing =head1 SYNOPSIS my $parser=Search::OpenFTS::Parser->S; $types = $parser->alltypes; $parser->S( \$text ); $parser->S( \*STDIN );> $parser->S( \*STDIN, $limit ); S( ( ($type, $word) = $parser->S ) && $type ) { print $parser->S( $type ),"\t$word\n"; } $parser->end_parser; The parser will convert the text in the variable $text into a stream of lexemes together with their type ID. To get all types of lexemes that the parser supports use: my @types = $parser->alltypes; map {print "$_ => $types[$_]\n"; } 1..$#types; =head1 DESCRIPTION The package Search::OpenFTS::Parser is a wrapper around the parser functions. The particular functions that a parser must provide in order to work with OpenFTS are discussed in section VII (Customizations) of the OpenFTS primer. Parser recognizes following types of lexemes: 1 => Latin word 2 => Cyrillic word 3 => Word 4 => Email 5 => URL 6 => Host 7 => Scientific notation 8 => VERSION 9 => Part of hyphenated word 10 => Cyrillic part of hyphenated word 11 => Latin part of hyphenated word 12 => Space symbols 13 => Char in tag 14 => HTTP head 15 => Hyphenated word 16 => Latin hyphenated word 17 => Cyrillic hyphenated word 18 => URI 19 => File or path name 20 => Decimal notation 21 => Signed integer 22 => Unsigned integer =cut sub new { my ($class) = @_; $class = ref($class) || $class; my $self = {}; bless( $self, $class ); return $self; } sub start_parser { my ( $self, $rtxt, $limit ) = @_; if ( ref $rtxt eq 'SCALAR' ) { start_parse_str($$rtxt); } else { $limit ||= 0; start_parse_fh( $rtxt, $limit ); } } sub type_description { my ( $self, $id ) = @_; my $typed; getdescript( $id, $typed ); return $typed; } sub alltypes { my ( @types, $i, $ct ); $i = 1; while ( getdescript( $i, $ct ), length $ct ) { $types[$i] = $ct; $i++; } return @types; } bootstrap Search::OpenFTS::Parser $VERSION; =head1 SEE ALSO The OpenFTS Primer ( see doc/ subdirectory ) The Crash-course to OpenFTS ( in examples/ subdirectory ) perldoc Search::OpenFTS::Search perldoc Search::OpenFTS::Index perldoc Search::OpenFTS::Dict::PorterEng perldoc Search::OpenFTS::Dict::Snowball perldoc Search::OpenFTS::Dict::UnknownDict perldoc Search::OpenFTS::Morph::ISpell =cut 1; __END__