// This file is part of The New Aspell // Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license // version 2.0 or 2.1. You should have received a copy of the LGPL // license along with this library if you did not you can find // it at http://www.gnu.org/. #include "tokenizer.hpp" #include "convert.hpp" #include "speller.hpp" namespace acommon { class TokenizerBasic : public Tokenizer { public: bool advance(); }; bool TokenizerBasic::advance() { word_begin = word_end; begin_pos = end_pos; FilterChar * cur = word_begin; unsigned int cur_pos = begin_pos; word.clear(); // skip spaces (non-word characters) while (*cur != 0 && !(is_word(*cur) || (is_begin(*cur) && is_word(cur[1])))) { cur_pos += cur->width; ++cur; } if (*cur == 0) return false; word_begin = cur; begin_pos = cur_pos; if (is_begin(*cur) && is_word(cur[1])) { cur_pos += cur->width; ++cur; } while (is_word(*cur) || (is_middle(*cur) && cur > word_begin && is_word(cur[-1]) && is_word(cur[1]) )) { word.append(*cur); cur_pos += cur->width; ++cur; } if (is_end(*cur)) { word.append(*cur); cur_pos += cur->width; ++cur; } word.append('\0'); word_end = cur; end_pos = cur_pos; return true; } #undef increment__ PosibErr new_tokenizer(Speller * speller) { Tokenizer * tok = new TokenizerBasic(); speller->setup_tokenizer(tok); return tok; } }