/* * Copyright (c) 2006 Andrei V. Shethin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the Andrei V. Shethin Team nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * LemClient.hpp * * $lem$ */ #ifndef _LEM_CLIENT_HPP__ #define _LEM_CLIENT_HPP__ 1 // C++ Includes #include #include #include //#include // FWD class CLemmatizer; class CAgramtab; namespace lem { #define C_RUS_GRAM_TAB 0x00000001 #define C_GER_GRAM_TAB 0x00000002 #define C_ENG_GRAM_TAB 0x00000004 #define C_BUFFER_SIZE 2048 /** @struct LemmResultQuick LemClient.hpp @brief Word lemmatization result, only first form */ struct LemmResultQuick { /** Vocabulary or not */ bool vocabulary; /** First word form */ std::string first_form; /** Word form */ int word_form; }; /** @struct LemmResult LemClient.hpp @brief Word lemmatization result with all word forms */ struct LemmResult: public LemmResultQuick { /** All possible word forms */ std::vector all_forms; }; /** @class Lemmatizer LemClient.hpp @brief Russian, German and English lemmatization system */ class Lemmatizer { public: /** @brief Constructor @param sRML - directory with lemmatizer database @param iLanguages - languages to load */ Lemmatizer(const std::string sRML, int iLanguages); /** @brief A destructor */ ~Lemmatizer() throw(); /** @brief Get supported languages */ int GetLanguages(); /** @brief Lemmatize word and get ONLY array of first forms @param sWord - word to lemmatize @return vector of lemmatized words */ std::vector FirstFormsQuick(std::string & sWord, bool bUseParadigms = true); /** @brief Lemmatize word and get first forms and word form @param sWord - word to lemmatize @return result of lemmatization */ std::vector LemmatizeQuick(std::string & sWord, bool bUseParadigms = true); /** @brief Lemmatize word and get also all word forms @param sWord - word to lemmatize @return vector of lemmatized words with word forms */ std::vector Lemmatize(std::string & sWord, bool bUseParadigms = true); private: int iLanguages; CLemmatizer * pRusLem; CLemmatizer * pGerLem; CLemmatizer * pEngLem; CAgramtab * pRusGramTab; CAgramtab * pGerGramTab; CAgramtab * pEngGramTab; char szBuffer[C_BUFFER_SIZE + 1]; }; } // namespace lem #endif // _LEM_CLIENT_HPP__ // End.