diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-10 10:59:27 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-10 10:59:27 +0000 |
commit | 5bdad953f6d91cb26139ef6506c3748531dc708a (patch) | |
tree | 3e268a4f3e98ee62a9ed15e3ab3ffe0b38c9579d /streamingvisitors/src/vespa/vsm/searcher | |
parent | 3f7017773ce147a2d65a9835acdfd682dfafd54a (diff) |
Use the normalize_mode config.
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/searcher')
5 files changed, 18 insertions, 26 deletions
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp index b9e1fe8f83c..5e06ae41a03 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp @@ -51,6 +51,7 @@ FieldSearcher::FieldSearcher(FieldIdT fId, bool defaultPrefix) noexcept : FieldSearcherBase(), _field(fId), _matchType(defaultPrefix ? PREFIX : REGULAR), + _normalize_mode(Normalizing::LOWERCASE_AND_FOLD), _maxFieldLength(0x100000), _currentElementId(0), _currentElementWeight(1), @@ -69,7 +70,7 @@ FieldSearcher::search(const StorageDocument & doc) fInfo.setHitOffset(qt->getHitList().size()); } onSearch(doc); - for(auto qt : _qtl) { + for (auto qt : _qtl) { QueryTerm::FieldInfo & fInfo = qt->getFieldInfo(field()); fInfo.setHitCount(qt->getHitList().size() - fInfo.getHitOffset()); fInfo.setFieldLength(_words); diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h index 75ace16328b..c5bca6f3899 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h @@ -34,13 +34,13 @@ protected: class FieldSearcher : public FieldSearcherBase { public: + using Normalizing = search::streaming::Normalizing; enum MatchType { REGULAR, PREFIX, SUBSTRING, SUFFIX, EXACT, - CASED }; explicit FieldSearcher(FieldIdT fId) noexcept : FieldSearcher(fId, false) {} @@ -51,21 +51,22 @@ public: virtual void prepare(search::streaming::QueryTermList& qtl, const SharedSearcherBuf& buf, const vsm::FieldPathMapT& field_paths, search::fef::IQueryEnvironment& query_env); - FieldIdT field() const { return _field; } - void field(FieldIdT v) { _field = v; prepareFieldId(); } - bool prefix() const { return _matchType == PREFIX; } - bool substring() const { return _matchType == SUBSTRING; } - bool suffix() const { return _matchType == SUFFIX; } - bool exact() const { return _matchType == EXACT; } - bool cased() const { return _matchType == CASED; } - void setMatchType(MatchType mt) { _matchType = mt; } - MatchType match_type() const noexcept { return _matchType; } + FieldIdT field() const noexcept { return _field; } + bool prefix() const noexcept { return _matchType == PREFIX; } + bool substring() const noexcept { return _matchType == SUBSTRING; } + bool suffix() const noexcept { return _matchType == SUFFIX; } + bool exact() const noexcept { return _matchType == EXACT; } + Normalizing normalize_mode() const noexcept { return _normalize_mode; } + MatchType match_type() const noexcept { return _matchType; } + void match_type(MatchType mt) noexcept { _matchType = mt; } + void normalize_mode(Normalizing mode) noexcept { _normalize_mode = mode; } + void field(FieldIdT v) noexcept { _field = v; prepareFieldId(); } static void init(); static search::byte fold(search::byte c) { return _foldLowCase[c]; } static search::byte iswordchar(search::byte c) { return _wordChar[c]; } static search::byte isspace(search::byte c) { return ! iswordchar(c); } static size_t countWords(const FieldRef & f); - int32_t getCurrentWeight() const { return _currentElementWeight; } + int32_t currentWeight() const { return _currentElementWeight; } FieldSearcher & maxFieldLength(uint32_t maxFieldLength_) { _maxFieldLength = maxFieldLength_; return *this; } size_t maxFieldLength() const { return _maxFieldLength; } @@ -91,6 +92,7 @@ private: virtual void onStructValue(const document::StructFieldValue &) { } FieldIdT _field; MatchType _matchType; + Normalizing _normalize_mode; unsigned _maxFieldLength; uint32_t _currentElementId; int32_t _currentElementWeight; // Contains the weight of the current item being evaluated. @@ -104,7 +106,7 @@ protected: * For each call to onValue() a batch of words are processed, and the position is local to this batch. **/ void addHit(search::streaming::QueryTerm & qt, uint32_t pos) const { - qt.add(_words + pos, field(), _currentElementId, getCurrentWeight()); + qt.add(_words + pos, field(), _currentElementId, _currentElementWeight); } public: static search::byte _foldLowCase[256]; diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h index a01a9cd088d..aaf8b940dc8 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h @@ -20,7 +20,7 @@ public: explicit UTF8ExactStringFieldSearcher(FieldIdT fId) : UTF8StringFieldSearcherBase(fId) { - setMatchType(EXACT); + match_type(EXACT); } }; diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h index ed76fb79f4e..115cddce619 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h @@ -62,7 +62,6 @@ protected: SharedSearcherBuf _buf; using byte = search::byte; - using Normalizing = search::streaming::Normalizing; class TokenizeReader { public: @@ -121,15 +120,6 @@ protected: template<typename Reader> void tokenize(Reader & reader); - Normalizing normalize_mode() const noexcept { - switch (match_type()) { - case EXACT: return Normalizing::LOWERCASE; - case CASED: return Normalizing::NONE; - default: return Normalizing::LOWERCASE_AND_FOLD; - } - return Normalizing::LOWERCASE_AND_FOLD; - } - /** * Matches the given query term against the words in the given field reference * using exact or prefix match strategy. diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h index c20710e63ab..dc3bc214b49 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h @@ -3,8 +3,7 @@ #include "utf8stringfieldsearcherbase.h" -namespace vsm -{ +namespace vsm { /** * This class does suffix utf8 searches. |