package Search::OpenFTS::Dict::PorterEng; use strict; use Carp; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $AUTOLOAD); require Exporter; require DynaLoader; require AutoLoader; @ISA = qw(Exporter DynaLoader); $VERSION = '0.02'; my %STOPWORD = ( -390611389 => 'a', #a -1242291485 => 'about', #about -1299254443 => 'abov', #above -480534920 => 'across', #across -1992012223 => 'after', #after 973876162 => 'also', #also 238835196 => 'am', #am -1758133178 => 'an', #an 133536621 => 'and', #and -1769605466 => 'are', #are -197957473 => 'as', #as 1784151356 => 'at', #at 734694925 => 'be', #be 1291270882 => 'been', #been 2048989667 => 'bill', #bill -1322058038 => 'but', #but 1070322242 => 'by', #by 1664924370 => 'can', #can -1219680515 => 'cannot', #cannot -304818710 => 'cant', #cant -771467694 => 'co', #co -37775524 => 'con', #con -1437059452 => 'could', #could 2107278205 => 'couldnt', #couldnt 2106599819 => 'de', #de -1656373099 => 'do', #do 271442091 => 'done', #done -1970963994 => 'eg', #eg -1323707148 => 'els', #else -1756123262 => 'enough', #enough 783637705 => 'etc', #etc -817881230 => 'even', #even 2143720113 => 'fifi', #fify -277542408 => 'for', #for -1189437072 => 'from', #from -1883456467 => 'had', #had -699882850 => 'ha', #has 375095026 => 'hasnt', #hasnt -763434432 => 'have', #have -786078073 => 'he', #he 520408184 => 'her', #her -1323595880 => 'here', #here -1542383931 => 'herself', #herself 1052348033 => 'him', #him 828626669 => 'himself', #himself -661443924 => 'hi', #his -1786654595 => 'how', #how -59303430 => 'howev', #however -429115791 => 'i', #i -935438394 => 'ie', #ie 1362560636 => 'if', #if 1609338446 => 'in', #in -1753063562 => 'inc', #inc -1283636860 => 'into', #into 1022026391 => 'is', #is -1567721676 => 'it', #it -932946111 => 'itself', #itself -1090456859 => 'ltd', #ltd -361688545 => 'may', #may -1403874622 => 'me', #me 1450865991 => 'might', #might 574753205 => 'mill', #mill 1772671891 => 'mine', #mine -1957498244 => 'more', #more -281135240 => 'move', #move -1215591561 => 'much', #much -1202467187 => 'my', #my -1419388442 => 'myself', #myself 438673936 => 'never', #never -951898581 => 'nevertheless', #nevertheless 1739204639 => 'no', #no 2131094495 => 'nobodi', #nobody 2140143823 => 'none', #none -245766138 => 'noon', #noone -513386432 => 'nor', #nor 134610293 => 'not', #not -1324579159 => 'noth', #nothing 124625402 => 'of', #of 733764931 => 'off', #off 162933192 => 'on', #on -1827724348 => 'onc', #once 2053932785 => 'one', #one -43113676 => 'onli', #only -1776935336 => 'onto', #onto 498562439 => 'or', #or -648530656 => 'other', #other 738200294 => 'otherwis', #otherwise 1351207084 => 'our', #our 2144811700 => 'ourselv', #ourselves -1175818855 => 'out', #out -1258511527 => 'over', #over 1992226401 => 'own', #own 221331120 => 'per', #per 1627954268 => 're', #re 956988259 => 'she', #she 668872260 => 'should', #should -1731836925 => 'so', #so 122530779 => 'such', #such 1429300470 => 'than', #than -1470915188 => 'that', #that 1011183078 => 'the', #the 1892329419 => 'their', #their -1470828472 => 'them', #them -1958569585 => 'themselv', #themselves 828219890 => 'then', #then 1429934590 => 'thenc', #thence 1513089430 => 'there', #there 258519269 => 'thereaft', #thereafter -592318647 => 'therebi', #thereby 847689756 => 'therefor', #therefore -1591631071 => 'therein', #therein -964622075 => 'thereupon', #thereupon 1127536855 => 'these', #these -1299307467 => 'they', #they 905126349 => 'thi', #this 1319329025 => 'those', #those -795243748 => 'though', #though -1094980472 => 'thru', #thru 569539970 => 'thu', #thus -678964540 => 'to', #to -1180970221 => 'un', #un -977865572 => 'under', #under -1893063293 => 'until', #until 1133833840 => 'up', #up 1631313109 => 'upon', #upon -627195958 => 'us', #us 454208512 => 'wa', #was 478111769 => 'we', #we 352077610 => 'well', #well -1184776111 => 'were', #were -1159318942 => 'what', #what 2133441367 => 'whatev', #whatever 602410524 => 'when', #when 312175406 => 'whenc', #whence -503706970 => 'whenev', #whenever 495948614 => 'where', #where -2115520762 => 'whereaft', #whereafter 1997919605 => 'wherea', #whereas -312871468 => 'wherebi', #whereby -1865827908 => 'wherein', #wherein -15977024 => 'whereupon', #whereupon -188329806 => 'wherev', #wherever 97412720 => 'whether', #whether 972895631 => 'which', #which -1059826179 => 'while', #while -1069642895 => 'whither', #whither -556385631 => 'who', #who -1318862767 => 'whoever', #whoever -1000789681 => 'whole', #whole 1074706220 => 'whom', #whom 151201745 => 'whose', #whose 934649748 => 'whi', #why 501661262 => 'will', #will -1735335410 => 'with', #with -1888765948 => 'within', #within 791888417 => 'without', #without 1060636614 => 'would', #would -343695350 => 'yet', #yet 1718319126 => 'you', #you -1732566604 => 'your', #your 691118204 => 'yourself', #yourself 881242136 => 'yourselv', #yourselves ); sub AUTOLOAD { # This AUTOLOAD is used to 'autoload' constants from the constant() # XS function. If a constant is not found then control is passed # to the AUTOLOAD in AutoLoader. my $constname; ( $constname = $AUTOLOAD ) =~ s/.*:://; croak "& not defined" if $constname eq 'constant'; my $val = constant( $constname, (@_ and $_[0] =~ /^\d+$/) ? $_[0] : 0 ); if ( $! != 0 ) { if ( $! =~ /Invalid/ ) { $AutoLoader::AUTOLOAD = $AUTOLOAD; goto &AutoLoader::AUTOLOAD; } else { croak "Your vendor has not defined Search::OpenFTS::Dict::PorterEng macro $constname"; } } no strict 'refs'; *$AUTOLOAD = sub () { $val }; goto &$AUTOLOAD; } sub new { my ( $class, %opt ) = @_; $class = ref($class) || $class; my $self = {}; bless( $self, $class ); if ( defined $opt{stop_file} ) { open( STOP, $opt{stop_file} ) || die "Can't open $opt{stop_file}"; while () { while (/(\S+)/g) { $self->{STOPLEX}{ ( $self->lemms($1) )[0] } = 1; } } close STOP; } else { $self->{STOPLEX} = {}; map { $self->{STOPLEX}{$_} = 1; } values %STOPWORD; } return $self; } sub DESTROY { destroy(); } sub lemms { return ( Lexem( lc $_[1] ) ); } sub lemmsid { return ( LexemID( lc $_[1] ) ); } sub is_stoplemm { return ( exists $STOPWORD{ int $_[1] } ) ? 1 : 0; } sub is_stoplexem { return ( exists $_[0]->{STOPLEX}{ lc( $_[1] ) } ) ? 1 : 0; } bootstrap Search::OpenFTS::Dict::PorterEng $VERSION; 1; __END__ =head1 NAME Search::OpenFTS::Dict::PorterEng - Porter Stemming Algorithm for english language. ( Xapian (former Omseek) ). =head1 SYNOPSIS use Search::OpenFTS::Dict::PorterEng; my $dict=Search::OpenFTS::Dict::PorterEng->new; my @list_of_lemm_id = $dict->lemmsid ($word); my $stop = $dict->is_stoplemm( $lemmid ); my @lemm = $dict->lemms( $word ); my $stop = $dict->is_stoplexem( $lemm ); =head1 AUTHOR Teodor Sigaev, teodor@sigaev.ru =head1 SEE ALSO Snowball web site http://snowball.sourceforge.net/ The OpenFTS Primer ( see doc/ subdirectory ) The Crash-course to OpenFTS ( in examples/ subdirectory ) perldoc Search::OpenFTS::Search perldoc Search::OpenFTS::Index perldoc Search::OpenFTS::Parser perldoc Search::OpenFTS::Dict::Snowball perldoc Search::OpenFTS::Dict::UnknownDict perldoc Search::OpenFTS::Morph::ISpell =cut