diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-21 10:56:49 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-21 10:56:49 +0000 |
commit | 543366294b1e8ae8a01186c25b74e36ed4c3ae35 (patch) | |
tree | f62fb82c65a6152fabe944caa4e719051f4ab032 /searchsummary/src/vespa/juniper/tokenizer.cpp | |
parent | ef3db955e75e6df68a2a358feb5b95e44979377f (diff) |
- Separate methods for lowercasing, and lowercasing and folding.
- Hide implementations and use accessors.
- Minor code cleanup.
Diffstat (limited to 'searchsummary/src/vespa/juniper/tokenizer.cpp')
-rw-r--r-- | searchsummary/src/vespa/juniper/tokenizer.cpp | 15 |
1 files changed, 7 insertions, 8 deletions
diff --git a/searchsummary/src/vespa/juniper/tokenizer.cpp b/searchsummary/src/vespa/juniper/tokenizer.cpp index cd3c9c410ce..211ffe7054a 100644 --- a/searchsummary/src/vespa/juniper/tokenizer.cpp +++ b/searchsummary/src/vespa/juniper/tokenizer.cpp @@ -8,11 +8,10 @@ #include <vespa/log/log.h> LOG_SETUP(".juniper.tokenizer"); -JuniperTokenizer::JuniperTokenizer(const Fast_WordFolder* wordfolder, - const char* text, size_t len, ITokenProcessor* successor, - const juniper::SpecialTokenRegistry * registry) : +JuniperTokenizer::JuniperTokenizer(const Fast_WordFolder* wordfolder, const char* text, size_t len, + ITokenProcessor* successor, const juniper::SpecialTokenRegistry * registry) : _wordfolder(wordfolder), _text(text), _len(len), _successor(successor), _registry(registry), - _charpos(0), _wordpos(0) + _charpos(0), _wordpos(0), _buffer() { } @@ -32,19 +31,19 @@ void JuniperTokenizer::scan() const char* src = _text; const char* src_end = _text + _len; - const char* startpos = NULL; + const char* startpos = nullptr; ucs4_t* dst = _buffer; ucs4_t* dst_end = dst + TOKEN_DSTLEN; size_t result_len; while (src < src_end) { - if (_registry == NULL) { + if (_registry == nullptr) { // explicit prefetching seems to have negative effect with many threads src = _wordfolder->UCS4Tokenize(src, src_end, dst, dst_end, startpos, result_len); } else { const char * tmpSrc = _registry->tokenize(src, src_end, dst, dst_end, startpos, result_len); - if (tmpSrc == NULL) { + if (tmpSrc == nullptr) { src = _wordfolder->UCS4Tokenize(src, src_end, dst, dst_end, startpos, result_len); } else { src = tmpSrc; @@ -63,6 +62,6 @@ void JuniperTokenizer::scan() } token.bytepos = _len; token.bytelen = 0; - token.token = NULL; + token.token = nullptr; _successor->handle_end(token); } |