package Search::OpenFTS::Base; use strict; =head1 NAME Search::OpenFTS::Base - This is the base class of searching and indexing =head1 SYNOPSIS my $s = Search::OpenFTS::Base->new( $dbi, index=>[ 0 | 1 ], prefix=>prefix ); =head1 DESCRIPTION =cut sub new { my ( $class, $dbi, %opt ) = @_; $class = ref($class) || $class; my $self = {}; return undef if !ref $dbi; $self->{DBI} = $dbi; $opt{index} ||= 0; $self->{TSVECTOR_FIELD} = ''; $opt{prefix} ||= ''; if ( length $opt{prefix} ) { $opt{prefix} =~ s/^([a-z]).*$/$1/ || return undef; $self->{PREFIX} = $opt{prefix} . '_'; $self->{FPREFIX} = 1 + ord( $opt{prefix} ) - ord('a'); } else { $self->{PREFIX} = ''; $self->{FPREFIX} = 0; } my $sth = $self->{DBI} ->prepare("select * from $self->{PREFIX}fts_conf order by name,did asc;"); $sth || die "Can't DBI prepare ($DBI::errstr)"; $sth->execute || die "Can't DBI execute ($DBI::errstr)"; my $dict; $self->{TSVECTOR_FIELD} = 0; while ( $dict = $sth->fetchrow_hashref ) { my $param; if ( defined $dict->{param} && length $dict->{param} ) { $param = eval $dict->{param}; die "Can't eval param with did=$dict->{did}: $@" if $@; } if ( $dict->{name} eq 'txttid' ) { $self->{TXTID} = $dict->{mod}; ( $self->{TABLE}, $self->{IDNAME} ) = split( /\./, $self->{TXTID}, 2 ); next; } elsif ( $dict->{name} eq 'map' ) { #schema $self->{MAP} = $param; next; } elsif ( $dict->{name} eq 'tsvector_field' ) { $self->{TSVECTOR_FIELD} = $dict->{mod}; next; } elsif ( $dict->{name} eq 'ignore_headline' ) { $self->{IGNOREIDHEADLINE} = {}; foreach my $id ( split( /\s+/, $dict->{mod} ) ) { $self->{IGNOREIDHEADLINE}{$id} = 1; } next; } elsif ( $dict->{name} eq 'ignore_id_index' ) { $self->{IGNOREID} = {}; foreach my $id ( split( /\s+/, $dict->{mod} ) ) { $self->{IGNOREID}{$id} = 1; } next; } eval( 'use ' . $dict->{mod} . ';' ); die "Can't use module $dict->{mod}: $@" if ($@); if ( $dict->{name} eq 'parser' ) { #parser $param ||= {}; eval { $self->{PRS} = $dict->{mod}->new(%$param); }; die "Can't call method new in parser: $@" if ( $@ || !ref $self->{PRS} ); } elsif ( $dict->{name} eq 'dict' ) { #dictionary $param ||= {}; $param->{DBI} = $dbi; $param->{index} = $opt{index}; eval { $self->{DICT}[ $dict->{did} ] = $dict->{mod}->new(%$param); }; die "Can't call method new in dict $dict->{mod}: $@" if ( $@ || !$self->{DICT}[ $dict->{did} ] ); } else { die "Unknown keyword: $dict->{name}"; } } $self->{MAP} ||= {}; die "tsvector_field are void" if ( !length $self->{TSVECTOR_FIELD} ); bless( $self, $class ); return $self; } sub _get_lexems { my ( $self, $type, $word, $do ) = @_; local $_; my @lexems; my $dict; $do ||= {}; foreach my $i ( ( exists $self->{MAP}{$type} ) ? @{ $self->{MAP}{$type} } : 0 .. $#{ $self->{DICT} } ) { $dict = $self->{DICT}[$i]; @lexems = $dict->lemms( $word, %$do ); last if $#lexems >= 0; } return undef if $#lexems < 0; return \@lexems if !$dict->can('is_stoplexem'); return [ grep { !$dict->is_stoplexem($_) } @lexems ]; } 1;