#!/usr/bin/perl # test OpenFTS parser and dictionaries # Oleg Bartunov, oleg@sai.msu.su use strict; use Getopt::Std; use lib qw (.); use Search::OpenFTS::Parser; # add dictionary here my $dict = [ { 'Integer-Short' => 'Dict::IntegerDict->new(MAXLEN=>6, REJECTLONG=>0)' } , # shortened { 'Integer-Stop' => 'Dict::IntegerDict->new(MAXLEN=>6, REJECTLONG=>1)' } , # is_stoplexem works { 'Decimal-Short' => 'Dict::DecimalDict->new(MAXLENFRAC=>2, REJECTLONG=>0)' }, # shortened { 'Decimal-Stop' => 'Dict::DecimalDict->new(MAXLENFRAC=>2, REJECTLONG=>1)' }, # is_stoplexem works { 'PorterEng' => 'Search::OpenFTS::Dict::PorterEng->new()' } , # standard stemmer { 'PorterEng-Dict' => 'Dict::EngStem->new()' }, # strict { 'Snowball-Eng' => 'Search::OpenFTS::Dict::Snowball->new(lang=>"english",stop_file=>"/u/megera/app/fts/test-suite/Dict/english.stop" )' }, { 'Snowball-EngDict' => 'Dict::StemSnowball->new(lang=>"english",stop_file=>"/u/megera/app/fts/test-suite/Dict/english.stop" )' }, { 'Snowball-Rus' => 'Search::OpenFTS::Dict::Snowball->new(lang=>"russian",stop_file=>"/u/megera/app/fts/test-suite/Dict/russian.stop" )' }, { 'Snowball-RusDict' => 'Dict::StemSnowball->new(lang=>"russian",stop_file=>"/u/megera/app/fts/test-suite/Dict/russian.stop" )' }, { 'ISpell-Eng' => 'Search::OpenFTS::Morph::ISpell->new(aff_file=>"/usr/local/lib/english.aff",dict_file=>"/usr/local/lib/english.dict",stop_file=>"/u/megera/app/fts/test-suite/Dict/english.stop" )' }, { 'ISpell-Rus' => 'Search::OpenFTS::Morph::ISpell->new(aff_file=>"/usr/local/lib/russian.aff",dict_file=>"/usr/local/lib/russian.dict",stop_file=>"/u/megera/app/fts/test-suite/Dict/russian.stop" )' }, ]; my ( $mod, @dclass ) = (); my %opts = (); my %ignore_types = (); getopts( 'vphs:', \%opts ); if ( $opts{h} ) { print "Usage: $0 [-p] [-s] [-h]\n\t -p - don't use dictionaries\n\t -s - don't process space symbols\n\t -v - show dictionaries\n\t -h - help, this text\n"; exit 0; } $opts{s} = 12 if ( defined $opts{s} && $opts{s} eq '' ); # ignore space symbols by default map { $ignore_types{$_} = 1 } split ",", $opts{s}; my $pa = Search::OpenFTS::Parser->new; if ( !$opts{p} ) { # -p, don't need any dictionaries my $odict = 'dict'; foreach my $d (@$dict) { foreach my $dcom ( keys %$d ) { my $key = $$d{$dcom}; if ( $key =~ /^(.*?)->new/ ) { $mod = $1; print "Requested module: $mod\n" if $opts{v}; } eval( 'use ' . $mod . ';' ); die "Can't use module $mod: $@" if ($@); $odict = eval $key; push @dclass, [ $odict, $dcom ]; } } } my ( $type, $word ); $pa->start_parser( \*STDIN ); while ( ( ( $type, $word ) = $pa->get_word ) && $type ) { print "lexeme:$word:$type:", $pa->type_description($type), "\n" if !$ignore_types{$type}; # print "lexeme:$word:$type:",$pa->type_description( $type ),"\n"; if ( $type ne 12 && !$opts{p} ) { # not a space symbol foreach my $d (@dclass) { my $dict = $$d[0]; print $$d[1], ":\t", "$word -> ", join( ',', ( $dict->lemms($word) ) ), ":\t", join( ',', ( map { ( $dict->is_stoplexem($_) == 1 ) ? "stop" : " ok " } $dict->lemms($word) ) ), "\n"; } } } $pa->end_parser; print "-----------------------\n" if ( !$opts{p} );