package Search::OpenFTS::Morph::ISpell; use strict; use Carp; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $AUTOLOAD); require Exporter; require DynaLoader; require AutoLoader; use locale; use POSIX qw(locale_h); @ISA = qw(Exporter DynaLoader); $VERSION = '0.02'; sub AUTOLOAD { # This AUTOLOAD is used to 'autoload' constants from the constant() # XS function. If a constant is not found then control is passed # to the AUTOLOAD in AutoLoader. my $constname; ( $constname = $AUTOLOAD ) =~ s/.*:://; croak "& not defined" if $constname eq 'constant'; my $val = constant( $constname, @_ ? $_[0] : 0 ); if ( $! != 0 ) { if ( $! =~ /Invalid/ ) { $AutoLoader::AUTOLOAD = $AUTOLOAD; goto &AutoLoader::AUTOLOAD; } else { croak "Your vendor has not defined Search::OpenFTS::Morph::ISpell macro $constname"; } } no strict 'refs'; *$AUTOLOAD = sub () { $val }; goto &$AUTOLOAD; } sub new { my ( $class, %opt ) = @_; $class = ref($class) || $class; my $self = {}; local $_; die "Dictionary file does not exist" if !-s $opt{dict_file}; die "Affix file does not exist" if !-s $opt{aff_file}; $self->{LOCALE} = undef; $self->{LOCALE} = $opt{locale} if defined $opt{locale}; my $old_locale; if ( defined $self->{LOCALE} ) { $old_locale = setlocale(LC_CTYPE); my $res = setlocale( LC_CTYPE, $self->{LOCALE} ); die "Can't set locale $self->{LOCALE}" if !defined $res; } $self->{OBJ} = Search::OpenFTS::Morph::ISpell::InitIspell( $opt{aff_file}, $opt{dict_file} ); die "InitIspell failed" if ( !( $self->{OBJ} && ref( $self->{OBJ} ) eq 'IspellDictPtr' ) ); $self->{STOP} = {}; if ( defined $opt{stop_file} && -s $opt{stop_file} ) { open( STOP, $opt{stop_file} ) || die "Can't open $opt{stop_file}"; while () { while (/(\S+)/g) { $self->{STOP}{ lc $1 } = 1; } } close STOP; } if ( defined $self->{LOCALE} ) { setlocale( LC_CTYPE, $old_locale ); } bless( $self, $class ); return $self; } sub DESTROY { DestroyIspell( shift()->{OBJ} ); } sub lemms { my ( $self, $s ) = @_; my ( %u, @norm ); my $old_locale; if ( defined $self->{LOCALE} ) { $old_locale = setlocale(LC_CTYPE); my $res = setlocale( LC_CTYPE, $self->{LOCALE} ); die "Can't set locale $self->{LOCALE}" if !defined $res; } my $len = Normalize( $self->{OBJ}, lc $s, \@norm ); if ( defined $self->{LOCALE} ) { setlocale( LC_CTYPE, $old_locale ); } return () if !$len; map { $u{$_} = 1 } @norm; return keys %u; } sub is_stoplexem { my ( $self, $s ) = @_; #we can do not use set locale, because #lexem already lowercased return ( exists $self->{STOP}{$s} ) ? 1 : 0; } bootstrap Search::OpenFTS::Morph::ISpell $VERSION; 1; __END__ =head1 NAME Search::OpenFTS::Morph::ISpell - Perl interface to ISpell's dictionaries. Thanks mnoGoSearch (http://www.mhogosearch.org) developers team, kindly granted us to use their stemming code. =head1 PREREQUISITES ISpell's dictionary and affixes ( *.dict and *.aff ) should be installed. =head1 SYNOPSIS use Search::OpenFTS::Morph::ISpell; my $dict=Search::OpenFTS::Morph::ISpell->new( aff_file=>'..', dict_file=>'..' ); my $dict=Search::OpenFTS::Morph::ISpell->new( aff_file=>'..', dict_file=>'..', stop_file=>'..' ); my $dict=Search::OpenFTS::Morph::ISpell->new( aff_file=>'..', dict_file=>'..', locale=>'..' ); my $dict=Search::OpenFTS::Morph::ISpell->new( aff_file=>'..', dict_file=>'..', stop_file=>'..', locale=>'..' ); my @norms = $dict->lemms($word); my $is_stopword = $dict->is_stoplexem( $norm ); =head1 DESCRIPTION This module returns an array of infinitives for given word and empty array if word is not found in dictionary. Input words must be in dictionary encoding. =head1 AUTHOR Teodor Sigaev, teodor@sigaev.ru =head1 SEE ALSO Ispell site http://fmg-www.cs.ucla.edu/geoff/ispell.html mnoGoSearch site http://www.mhogosearch.ru The OpenFTS Primer ( see doc/ subdirectory ) The Crash-course to OpenFTS ( in examples/ subdirectory ) perldoc Search::OpenFTS::Search perldoc Search::OpenFTS::Index perldoc Search::OpenFTS::Parser perldoc Search::OpenFTS::Dict::PorterEng perldoc Search::OpenFTS::Dict::Snowball perldoc Search::OpenFTS::Dict::UnknownDict =head1 NOTICES We recommend ispell dictionary for russian language maintained by Alexander Lebedev, avaliable from ftp://mch5.chem.msu.su/pub/russian/ispell/ =cut