summaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-12-21 10:56:49 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2023-12-21 10:56:49 +0000
commit543366294b1e8ae8a01186c25b74e36ed4c3ae35 (patch)
treef62fb82c65a6152fabe944caa4e719051f4ab032 /searchsummary
parentef3db955e75e6df68a2a358feb5b95e44979377f (diff)
- Separate methods for lowercasing, and lowercasing and folding.
- Hide implementations and use accessors. - Minor code cleanup.
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/src/vespa/juniper/tokenizer.cpp15
-rw-r--r--searchsummary/src/vespa/juniper/tokenizer.h16
2 files changed, 15 insertions, 16 deletions
diff --git a/searchsummary/src/vespa/juniper/tokenizer.cpp b/searchsummary/src/vespa/juniper/tokenizer.cpp
index cd3c9c410ce..211ffe7054a 100644
--- a/searchsummary/src/vespa/juniper/tokenizer.cpp
+++ b/searchsummary/src/vespa/juniper/tokenizer.cpp
@@ -8,11 +8,10 @@
#include <vespa/log/log.h>
LOG_SETUP(".juniper.tokenizer");
-JuniperTokenizer::JuniperTokenizer(const Fast_WordFolder* wordfolder,
- const char* text, size_t len, ITokenProcessor* successor,
- const juniper::SpecialTokenRegistry * registry) :
+JuniperTokenizer::JuniperTokenizer(const Fast_WordFolder* wordfolder, const char* text, size_t len,
+ ITokenProcessor* successor, const juniper::SpecialTokenRegistry * registry) :
_wordfolder(wordfolder), _text(text), _len(len), _successor(successor), _registry(registry),
- _charpos(0), _wordpos(0)
+ _charpos(0), _wordpos(0), _buffer()
{ }
@@ -32,19 +31,19 @@ void JuniperTokenizer::scan()
const char* src = _text;
const char* src_end = _text + _len;
- const char* startpos = NULL;
+ const char* startpos = nullptr;
ucs4_t* dst = _buffer;
ucs4_t* dst_end = dst + TOKEN_DSTLEN;
size_t result_len;
while (src < src_end)
{
- if (_registry == NULL) {
+ if (_registry == nullptr) {
// explicit prefetching seems to have negative effect with many threads
src = _wordfolder->UCS4Tokenize(src, src_end, dst, dst_end, startpos, result_len);
} else {
const char * tmpSrc = _registry->tokenize(src, src_end, dst, dst_end, startpos, result_len);
- if (tmpSrc == NULL) {
+ if (tmpSrc == nullptr) {
src = _wordfolder->UCS4Tokenize(src, src_end, dst, dst_end, startpos, result_len);
} else {
src = tmpSrc;
@@ -63,6 +62,6 @@ void JuniperTokenizer::scan()
}
token.bytepos = _len;
token.bytelen = 0;
- token.token = NULL;
+ token.token = nullptr;
_successor->handle_end(token);
}
diff --git a/searchsummary/src/vespa/juniper/tokenizer.h b/searchsummary/src/vespa/juniper/tokenizer.h
index 68ef8118f5d..910da3f67ef 100644
--- a/searchsummary/src/vespa/juniper/tokenizer.h
+++ b/searchsummary/src/vespa/juniper/tokenizer.h
@@ -12,8 +12,8 @@ class JuniperTokenizer
{
public:
JuniperTokenizer(const Fast_WordFolder* wordfolder,
- const char* text, size_t len, ITokenProcessor* = NULL,
- const juniper::SpecialTokenRegistry * registry = NULL);
+ const char* text, size_t len, ITokenProcessor* = nullptr,
+ const juniper::SpecialTokenRegistry * registry = nullptr);
inline void SetSuccessor(ITokenProcessor* successor) { _successor = successor; }
void setRegistry(const juniper::SpecialTokenRegistry * registry) { _registry = registry; }
@@ -23,13 +23,13 @@ public:
void scan();
private:
const Fast_WordFolder* _wordfolder;
- const char* _text; // The current input text
- size_t _len; // Length of the text input
- ITokenProcessor* _successor;
+ const char* _text; // The current input text
+ size_t _len; // Length of the text input
+ ITokenProcessor* _successor;
const juniper::SpecialTokenRegistry * _registry;
- off_t _charpos; // Last utf8 character position
- off_t _wordpos; // Offset in numbering of words compared to input (as result of splits)
- ucs4_t _buffer[TOKEN_DSTLEN]; // Temp. buffer to store folding result
+ off_t _charpos; // Last utf8 character position
+ off_t _wordpos; // Offset in numbering of words compared to input (as result of splits)
+ ucs4_t _buffer[TOKEN_DSTLEN]; // Temp. buffer to store folding result
private:
JuniperTokenizer(const JuniperTokenizer&);
JuniperTokenizer& operator=(const JuniperTokenizer&);