// ========== This file is under LGPL, the GNU Lesser General Public Licence // ========== Dialing Graphematical Module (www.aot.ru) // ========== Copyright by Alexey Sokirko (1996-2001) #include "StdGraph.h" #include "../common/Graspace.h" #include "graline.h" #include "GraphmatFile.h" #include "GraphanDicts.h" const int NumberOfGraphematicalDescriptors = 48; const int ODesLen = 9; /* the length of unit's descriptor */ const char DesArray[NumberOfGraphematicalDescriptors][ODesLen] = { "BEG","RLE", "LLE","DEL","PUN",//the first position : //0-4 "DC", "DSC","EA","_UNK_", // 5-8 //the second position : "SPC","HYP","EOLN","EOP", //9-12 "PLP","AA", "aa", "Aa", // 13-16 "NAM?", "OPN","CLS", // 17-19 "EMSYM", "BUL", "INDENT", // 20-22 "PASS", "DPUN", "PAR_SYM", // 23-25 // macrosyntax "CS?","CS","QUA","CS_PRNT","HDNG","CS_AUX","DOC", // 26-32 // pair descriptors : // if d is the first descriptor, then it should be always even, and d+1 should be the corresponding second descriptor "EXPR1","EXPR2", // 33-34 "FAM1","FAM2", // 35-36 "RE1", "RE2", // 37-38 "FILE1", "FILE2", // 39-40 "ABB1", "ABB2", // 41-42 "KEY1", "KEY2", // 43-44 "GDC1", "GDC2", // 45-46 // German divided compounds "SENT_END" // 47 }; bool IsFirstMemberOfPairDesciptor(Descriptors d) { return (d%2 == 0) && (d >= (int)OEXPR1) && (d < NumberOfGraphematicalDescriptors); }; bool IsSecondMemberOfPairDesciptor(Descriptors d) { return (d%2 != 0) && (d >= (int)OEXPR2) && (d < NumberOfGraphematicalDescriptors); }; Descriptors GetSecondMemberByTheFirst(Descriptors d) { assert (IsFirstMemberOfPairDesciptor(d)); return (Descriptors)(d+1); }; Descriptors GetFirstMemberByTheSecond(Descriptors d) { assert (IsSecondMemberOfPairDesciptor(d)); return (Descriptors)(d-1); }; bool GetDescriptorStr(int DescriptorNo, string& Result) { if (DescriptorNo >= NumberOfGraphematicalDescriptors) return false; Result = DesArray[DescriptorNo]; return true; }; const char* GetDescriptorStr(int DescriptorNo) { assert (DescriptorNo < NumberOfGraphematicalDescriptors); return DesArray[DescriptorNo]; }; /* all descriptors which end a text period like a sentence or a paragraph */ bool IsEndTextPeriodDescriptor (Descriptors d) { return (d == CS_Undef) || (d == CS_Simple) || (d == CS_Parent) || (d == CS_Quasi) || (d == CS_Heading) || (d == CS_Explan) || (d == OSentEnd); }; CGraLine::CGraLine() { m_Status = 0; unit = NULL; m_Descriptors = 0; ulen = slen = 0; m_InputOffset = 0; }; bool CGraLine :: HasMacroSyntaxDelimiter () const { return ( m_Descriptors & ( _QM(CS_Undef ) | _QM( CS_Simple ) | _QM( CS_Parent ) | _QM( CS_Heading ) | _QM( CS_Explan ) ) ) > 0; }; bool CGraLine :: IsWordOrNumberOrAbbr() const { return ( m_Descriptors & ( _QM ( ORLE ) | _QM ( OLLE ) | _QM ( ODC ) | _QM ( ODSC ) | _QM ( OAbbr1 ) ) ) != 0; } int get_descriptor_len ( const char * s) { for (int i = 0; i< NumberOfGraphematicalDescriptors; i++) { size_t len = strlen (DesArray[i]); if (strncmp(s,DesArray[i],len) == 0) return strlen(DesArray[i]); } return 0; } bool CGraLine :: IsSingleSpaceToDelete() const { return (GetTokenLength() == 1) && (GetToken()[0] == ' ') // we delete only space, leaving alone tabulations && (m_Status == stSpace); // this line means that there is no other meaning for this // space, for example, it prevents to consider a converted "
(unit), ' ', SpacesLength); }; int CGraLine :: ToInt () const { char s[100]; strncpy (s,unit,ulen); s[ulen] = 0; return atoi (s); }; bool CGraLine::IsNotPrint () const { return (m_Status & stNotPrint) != 0; }; bool CGraLine::IsEnglishName () const { return (m_Status & stEnglishName) != 0; }; bool CGraLine::IsIdent () const { return (m_Status & stIdent) != 0; };; bool CGraLine::IsGrouped () const { return (m_Status & stGrouped) != 0; }; bool CGraLine::IsAbbreviation () const { return (m_Status & stAbbreviation) != 0; }; bool CGraLine::IsParagraphChar () const { return (m_Status & stParagraphChar) != 0; }; bool CGraLine::IsPageBreak() const { return (m_Status & stPageBreak) != 0; }; bool CGraLine::IsTextAreaEnd() const { return (m_Status & stTextAreaEnd) != 0; }; bool CGraLine::IsParagraphTag () const { return (m_Status & stParagraphTag) != 0; }; bool CGraLine::IsKeyModifier() const { return (m_Status & stKeyModifier) != 0; }; bool CGraLine::IsElectronicAddress() const { return (m_Status & stElectronicAddress) != 0; }; bool CGraLine::IsChar (int c) const { return (ulen == 1) && (unit[0] == c); }; bool CGraLine::IsAsterisk () const { return IsChar((unsigned char)'*') || IsChar(149); }; bool CGraLine::HasSingleSpaceAfter() const { return (m_Status & stSingleSpaceAfter) > 0;}; bool CGraLine::IsString (const char* s) const { return (s[ulen] == 0) && (!strncmp(unit,s,ulen)); }; void CGraLine::SetSpace () { m_Status |= stSpace; }; void CGraLine::SetEOLN () { m_Status |= stEOLN; }; void CGraLine::SetNotPrint() { m_Status |= stNotPrint; }; void CGraLine::SetPunct () { m_Status |= stPunct; }; void CGraLine::SetParagraphChar() { m_Status |= stParagraphChar; }; void CGraLine::SetParagraphTag() { m_Status |= stParagraphTag; }; void CGraLine::SetKeyModifier() { m_Status |= stKeyModifier; }; void CGraLine::SetElectronicAddress() { m_Status |= stElectronicAddress; }; void CGraLine::SetSingleSpaceAfter() { m_Status |= stSingleSpaceAfter; }; void CGraLine::SetIdent() { m_Status |= stIdent; }; void CGraLine::SetPageBreak() { m_Status |= stPageBreak; }; void CGraLine::SetTextAreaEnd() { m_Status |= stTextAreaEnd; }; void CGraLine::SetEnglishName () { m_Status |= stEnglishName; }; void CGraLine::DelDes(Descriptors d) { m_Descriptors &= ~( _QM(d) ); }; void CGraLine::SetDes(Descriptors d) { m_Descriptors |= _QM(d); }; void CGraLine::AddStatus(WORD add_state) { m_Status |= add_state; }; void CGraLine::AddLength(const CGraLine& L) { ulen += L.GetTokenLength(); slen += L.GetScreenLength(); }; void CGraLine::SetToken(const char* s) { unit = s; }; size_t GetInternetAddressStarter (const char *s) { if (!strncmp(s, "http://",strlen("http://"))) return strlen("http://"); if (!strncmp(s, "HTTP://",strlen("http://"))) return strlen("http://"); if (!strncmp(s, "ftp://",strlen("ftp://"))) return strlen("ftp://"); if (!strncmp(s, "FTP://",strlen("ftp://"))) return strlen("ftp://"); if (!strncmp(s, "ftp.",strlen("ftp."))) return strlen("ftp."); if (!strncmp(s, "FTP.",strlen("ftp."))) return strlen("ftp."); if (!strncmp(s, "www.",strlen("www."))) return strlen("www."); if (!strncmp(s, "WWW.",strlen("www."))) return strlen("www."); if (!strncmp(s, "www2.",strlen("www2."))) return strlen("www2."); if (!strncmp(s, "WWW2.",strlen("www2."))) return strlen("www2."); return 0; } size_t CGraLine::LengthUntilDelimiters (const char *s, const CGraphmatFile* G) { bool bElectronicAddress = GetInternetAddressStarter(s) > 0; int i = 0; for (i=0; i