diff options
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/searcher/tokenizereader.cpp')
-rw-r--r-- | streamingvisitors/src/vespa/vsm/searcher/tokenizereader.cpp | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/streamingvisitors/src/vespa/vsm/searcher/tokenizereader.cpp b/streamingvisitors/src/vespa/vsm/searcher/tokenizereader.cpp index d8a6091fe11..5988bdd912f 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/tokenizereader.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/tokenizereader.cpp @@ -4,6 +4,19 @@ namespace vsm { +namespace { + +template <bool exact_match> inline bool is_word_char(ucs4_t c); + +template <> +inline bool is_word_char<false>(ucs4_t c) { return Fast_UnicodeUtil::IsWordChar(c); } + +// All characters are treated as word characters for exact match +template <> +inline constexpr bool is_word_char<true>(ucs4_t) { return true; } + +} + void TokenizeReader::fold(ucs4_t c) { const char *repl = Fast_NormalizeWordFolder::ReplacementString(c); @@ -18,4 +31,24 @@ TokenizeReader::fold(ucs4_t c) { } } +template <bool exact_match> +size_t +TokenizeReader::tokenize_helper(Normalizing norm_mode) +{ + ucs4_t c(0); + while (hasNext()) { + if (is_word_char<exact_match>(c = next())) { + normalize(c, norm_mode); + while (hasNext() && is_word_char<exact_match>(c = next())) { + normalize(c, norm_mode); + } + break; + } + } + return complete(); +} + +template size_t TokenizeReader::tokenize_helper<false>(Normalizing); +template size_t TokenizeReader::tokenize_helper<true>(Normalizing); + } |