// ========== This file is under LGPL, the GNU Lesser General Public Licence // ========== Dialing Lemmatizer (www.aot.ru) // ========== Copyright by Alexey Sokirko #ifndef __RUSGRAMTAB_H_ #define __RUSGRAMTAB_H_ #include "agramtab_.h" #include "rus_consts.h" ///////////////////////////////////////////////////////////////////////////// const unsigned int StartUp = 0xC0E0; //Аа  const unsigned int EndUp = 0x10000; //яя const unsigned int MaxGrmCount = EndUp -StartUp; // // 16159 (16 Кб) const BYTE rPartOfSpeechCount = 22; // не должно быть пробелов между словами const char rPartOfSpeeches[rPartOfSpeechCount][20] = { "С", // 0 "П", // 1 "Г", // 2 "МС", // 3 "МС-П", // 4 "МС-ПРЕДК", // 5 "ЧИСЛ", // 6 "ЧИСЛ-П", // 7 "Н", // 8 "ПРЕДК", //9 "ПРЕДЛ", // 10 "ПОСЛ", // 11 "СОЮЗ", // 12 "МЕЖД", // 13 "ВВОДН",// 14 "ФРАЗ", // 15 "ЧАСТ", // 16 "КР_ПРИЛ", // 17 "ПРИЧАСТИЕ", //18 "ДЕЕПРИЧАСТИЕ", //19 "КР_ПРИЧАСТИЕ", // 20 "ИНФИНИТИВ" //21 }; const short GrammemsCount = 52; const char Grammems[GrammemsCount][10] = { // 0..1 "мн","ед", // 2..8 "им","рд","дт","вн","тв","пр","зв", // род 9-12 "мр","жр","ср","мр-жр", // 13 "кр", // 14..16 "нст","буд","прш", // 17..19 "1л","2л","3л", // 20 "пвл", // 21..22 "од","но", // 23 "сравн", // 24..25 "св","нс", // 26..27 "нп","пе", // 28..29 "дст","стр", // 30-32 "0", "аббр", "отч", // 33-34 "лок", "орг", // 35-36 "кач", "дфст", // 37-38 (наречия) "вопр", "указат", // 39..40 "имя","фам", // 41 "безл", // 42,43 "жарг", "опч", // 44,45,46 "разг", "притяж", "арх", // для второго родительного и второго предложного "2", "поэт", "проф", "прев", "полож" }; const int rClauseTypesCount = 12; const char rClauseTypes [rClauseTypesCount][30] = { "ГЛ_ЛИЧН", "ДПР", "КР_ПРЧ", "КР_ПРИЛ", "ПРЕДК", "ПРЧ", "ИНФ", "ВВОД", "ТИРЕ", "НСО", "СРАВН", "КОПУЛ" }; class CRusGramTab : public CAgramtab{ public: CAgramtabLine* Lines[MaxGrmCount]; CRusGramTab(); ~CRusGramTab(); BYTE GetPartOfSpeechesCount () const; const char* GetPartOfSpeechStr(BYTE i) const; size_t GetGrammemsCount() const; const char* GetGrammemStr(size_t i) const; bool IsAdditionalGrammem (const char* s) const; size_t GetMaxGrmCount() const; CAgramtabLine*& GetLine(size_t LineNo); const CAgramtabLine* GetLine(size_t LineNo) const; size_t s2i(const char * s ) const; string i2s(WORD i) const; bool ProcessPOSAndGrammems (const char* tab_str, BYTE& PartOfSpeech, QWORD& grammems) const; const char* GetRegistryString() const; bool GleicheCase(const char* gram_code_noun, const char* gram_code_adj) const; bool GleicheCaseNumber(const char* gram_code1, const char* gram_code2) const; QWORD GleicheGenderNumberCase(const char* common_gram_code_noun, const char* gram_code_noun, const char* gram_code_adj) const; bool GleicheGenderNumber(const char* gram_code1, const char* gram_code2) const; bool GleicheSubjectPredicate(const char* gram_code1, const char* gram_code2) const; long GetClauseTypeByName(const char* TypeName) const; const char* GetClauseNameByType(long type) const; const size_t GetClauseTypesCount() const; bool IsStrongClauseRoot(const DWORD Poses) const; bool is_month (const char* lemma) const; bool is_small_number (const char* lemma) const; bool IsMorphNoun (int Poses) const; bool is_morph_adj (int Poses) const; bool is_morph_participle (int Poses) const; bool is_morph_pronoun (int Poses) const; bool is_morph_pronoun_adjective(int Poses) const; bool is_left_noun_modifier (int Poses, QWORD grammems) const; bool is_numeral (int poses) const; bool is_verb_form (int poses) const; bool is_infinitive(int poses) const; bool is_morph_predk(int poses) const; bool is_morph_adv(int poses) const; bool is_morph_article(int poses) const; bool is_morph_personal_pronoun (int poses, QWORD grammems) const; bool IsParticle(const char* lemma, int poses) const; bool IsSynNoun(int Poses, const char* Lemma) const; bool IsStandardParamAbbr (const char* WordStrUpper) const; BYTE GetTagId(const char* gram_code) const; }; #endif //__RUSGRAMTAB_H_