diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-21 10:56:49 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-21 10:56:49 +0000 |
commit | 543366294b1e8ae8a01186c25b74e36ed4c3ae35 (patch) | |
tree | f62fb82c65a6152fabe944caa4e719051f4ab032 /searchsummary | |
parent | ef3db955e75e6df68a2a358feb5b95e44979377f (diff) |
- Separate methods for lowercasing, and lowercasing and folding.
- Hide implementations and use accessors.
- Minor code cleanup.
Diffstat (limited to 'searchsummary')
-rw-r--r-- | searchsummary/src/vespa/juniper/tokenizer.cpp | 15 | ||||
-rw-r--r-- | searchsummary/src/vespa/juniper/tokenizer.h | 16 |
2 files changed, 15 insertions, 16 deletions
diff --git a/searchsummary/src/vespa/juniper/tokenizer.cpp b/searchsummary/src/vespa/juniper/tokenizer.cpp index cd3c9c410ce..211ffe7054a 100644 --- a/searchsummary/src/vespa/juniper/tokenizer.cpp +++ b/searchsummary/src/vespa/juniper/tokenizer.cpp @@ -8,11 +8,10 @@ #include <vespa/log/log.h> LOG_SETUP(".juniper.tokenizer"); -JuniperTokenizer::JuniperTokenizer(const Fast_WordFolder* wordfolder, - const char* text, size_t len, ITokenProcessor* successor, - const juniper::SpecialTokenRegistry * registry) : +JuniperTokenizer::JuniperTokenizer(const Fast_WordFolder* wordfolder, const char* text, size_t len, + ITokenProcessor* successor, const juniper::SpecialTokenRegistry * registry) : _wordfolder(wordfolder), _text(text), _len(len), _successor(successor), _registry(registry), - _charpos(0), _wordpos(0) + _charpos(0), _wordpos(0), _buffer() { } @@ -32,19 +31,19 @@ void JuniperTokenizer::scan() const char* src = _text; const char* src_end = _text + _len; - const char* startpos = NULL; + const char* startpos = nullptr; ucs4_t* dst = _buffer; ucs4_t* dst_end = dst + TOKEN_DSTLEN; size_t result_len; while (src < src_end) { - if (_registry == NULL) { + if (_registry == nullptr) { // explicit prefetching seems to have negative effect with many threads src = _wordfolder->UCS4Tokenize(src, src_end, dst, dst_end, startpos, result_len); } else { const char * tmpSrc = _registry->tokenize(src, src_end, dst, dst_end, startpos, result_len); - if (tmpSrc == NULL) { + if (tmpSrc == nullptr) { src = _wordfolder->UCS4Tokenize(src, src_end, dst, dst_end, startpos, result_len); } else { src = tmpSrc; @@ -63,6 +62,6 @@ void JuniperTokenizer::scan() } token.bytepos = _len; token.bytelen = 0; - token.token = NULL; + token.token = nullptr; _successor->handle_end(token); } diff --git a/searchsummary/src/vespa/juniper/tokenizer.h b/searchsummary/src/vespa/juniper/tokenizer.h index 68ef8118f5d..910da3f67ef 100644 --- a/searchsummary/src/vespa/juniper/tokenizer.h +++ b/searchsummary/src/vespa/juniper/tokenizer.h @@ -12,8 +12,8 @@ class JuniperTokenizer { public: JuniperTokenizer(const Fast_WordFolder* wordfolder, - const char* text, size_t len, ITokenProcessor* = NULL, - const juniper::SpecialTokenRegistry * registry = NULL); + const char* text, size_t len, ITokenProcessor* = nullptr, + const juniper::SpecialTokenRegistry * registry = nullptr); inline void SetSuccessor(ITokenProcessor* successor) { _successor = successor; } void setRegistry(const juniper::SpecialTokenRegistry * registry) { _registry = registry; } @@ -23,13 +23,13 @@ public: void scan(); private: const Fast_WordFolder* _wordfolder; - const char* _text; // The current input text - size_t _len; // Length of the text input - ITokenProcessor* _successor; + const char* _text; // The current input text + size_t _len; // Length of the text input + ITokenProcessor* _successor; const juniper::SpecialTokenRegistry * _registry; - off_t _charpos; // Last utf8 character position - off_t _wordpos; // Offset in numbering of words compared to input (as result of splits) - ucs4_t _buffer[TOKEN_DSTLEN]; // Temp. buffer to store folding result + off_t _charpos; // Last utf8 character position + off_t _wordpos; // Offset in numbering of words compared to input (as result of splits) + ucs4_t _buffer[TOKEN_DSTLEN]; // Temp. buffer to store folding result private: JuniperTokenizer(const JuniperTokenizer&); JuniperTokenizer& operator=(const JuniperTokenizer&); |