package Search::OpenFTS::Dict::Snowball; use strict; use Carp; use vars qw(@ISA $VERSION); use Lingua::Stem::Snowball; @ISA = qw(Lingua::Stem::Snowball); $VERSION = '0.01'; { no strict 'refs'; *{Search::OpenFTS::Dict::Snowball::lemms} = \&Lingua::Stem::Snowball::stem; } sub new { my ( $class, %opt ) = @_; $class = ref($class) || $class; my $self = $class->SUPER::new(%opt); $self->{STOP} = {}; if ( defined $opt{stop_file} ) { open( STOP, $opt{stop_file} ) || die "Can't open $opt{stop_file}"; while () { while (/(\S+)/g) { $self->{STOP}{ $self->stem($1) } = 1; } } close STOP; } bless( $self, $class ); return $self; } sub is_stoplexem { my ( $self, $s ) = @_; return ( exists $self->{STOP}{$s} ) ? 1 : 0; } 1; __END__ =head1 NAME Wrapper for Lingua::Stem::Snowball - Perl interface to Snowball stemmers. =head1 PREREQUISITES Snowball - http://snowball.tartarus.org/ Perl module Lingua::Stem::Snowball =head1 SYNOPSIS use Search::OpenFTS::Dict::Snowball; my $lang = 'english'; my $s=Search::OpenFTS::Dict::Snowball->new( lang=>$lang ); my $s=Search::OpenFTS::Dict::Snowball->new( lang=>$lang, stop_file=>'/path/to/stop_file' ); my @lexems = $dict->lemms( $word ); my $stop = $dict->is_stoplexem( $lexem ); stop_file should contains space separated unstemmed stop words, for example: the this when while =head1 DESCRIPTION =head1 AUTHOR Teodor Sigaev, teodor@sigaev.ru =head1 SEE ALSO Snowball web site http://snowball.tartarus.org/ Lingua::Stem::Snowball http://openfts.sourceforge.net/contributions.shtml The OpenFTS Primer ( see doc/ subdirectory ) The Crash-course to OpenFTS ( in examples/ subdirectory ) perldoc Search::OpenFTS::Search perldoc Search::OpenFTS::Index perldoc Search::OpenFTS::Parser perldoc Search::OpenFTS::Dict::PorterEng perldoc Search::OpenFTS::Dict::UnknownDict perldoc Search::OpenFTS::Morph::ISpell =cut