aboutsummaryrefslogtreecommitdiffstats
path: root/streamingvisitors/src/vespa/vsm/searcher
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-10 10:59:27 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-01-10 10:59:27 +0000
commit5bdad953f6d91cb26139ef6506c3748531dc708a (patch)
tree3e268a4f3e98ee62a9ed15e3ab3ffe0b38c9579d /streamingvisitors/src/vespa/vsm/searcher
parent3f7017773ce147a2d65a9835acdfd682dfafd54a (diff)
Use the normalize_mode config.
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/searcher')
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp3
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h26
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h2
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h10
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h3
5 files changed, 18 insertions, 26 deletions
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
index b9e1fe8f83c..5e06ae41a03 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
@@ -51,6 +51,7 @@ FieldSearcher::FieldSearcher(FieldIdT fId, bool defaultPrefix) noexcept
: FieldSearcherBase(),
_field(fId),
_matchType(defaultPrefix ? PREFIX : REGULAR),
+ _normalize_mode(Normalizing::LOWERCASE_AND_FOLD),
_maxFieldLength(0x100000),
_currentElementId(0),
_currentElementWeight(1),
@@ -69,7 +70,7 @@ FieldSearcher::search(const StorageDocument & doc)
fInfo.setHitOffset(qt->getHitList().size());
}
onSearch(doc);
- for(auto qt : _qtl) {
+ for (auto qt : _qtl) {
QueryTerm::FieldInfo & fInfo = qt->getFieldInfo(field());
fInfo.setHitCount(qt->getHitList().size() - fInfo.getHitOffset());
fInfo.setFieldLength(_words);
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
index 75ace16328b..c5bca6f3899 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
@@ -34,13 +34,13 @@ protected:
class FieldSearcher : public FieldSearcherBase
{
public:
+ using Normalizing = search::streaming::Normalizing;
enum MatchType {
REGULAR,
PREFIX,
SUBSTRING,
SUFFIX,
EXACT,
- CASED
};
explicit FieldSearcher(FieldIdT fId) noexcept : FieldSearcher(fId, false) {}
@@ -51,21 +51,22 @@ public:
virtual void prepare(search::streaming::QueryTermList& qtl, const SharedSearcherBuf& buf,
const vsm::FieldPathMapT& field_paths, search::fef::IQueryEnvironment& query_env);
- FieldIdT field() const { return _field; }
- void field(FieldIdT v) { _field = v; prepareFieldId(); }
- bool prefix() const { return _matchType == PREFIX; }
- bool substring() const { return _matchType == SUBSTRING; }
- bool suffix() const { return _matchType == SUFFIX; }
- bool exact() const { return _matchType == EXACT; }
- bool cased() const { return _matchType == CASED; }
- void setMatchType(MatchType mt) { _matchType = mt; }
- MatchType match_type() const noexcept { return _matchType; }
+ FieldIdT field() const noexcept { return _field; }
+ bool prefix() const noexcept { return _matchType == PREFIX; }
+ bool substring() const noexcept { return _matchType == SUBSTRING; }
+ bool suffix() const noexcept { return _matchType == SUFFIX; }
+ bool exact() const noexcept { return _matchType == EXACT; }
+ Normalizing normalize_mode() const noexcept { return _normalize_mode; }
+ MatchType match_type() const noexcept { return _matchType; }
+ void match_type(MatchType mt) noexcept { _matchType = mt; }
+ void normalize_mode(Normalizing mode) noexcept { _normalize_mode = mode; }
+ void field(FieldIdT v) noexcept { _field = v; prepareFieldId(); }
static void init();
static search::byte fold(search::byte c) { return _foldLowCase[c]; }
static search::byte iswordchar(search::byte c) { return _wordChar[c]; }
static search::byte isspace(search::byte c) { return ! iswordchar(c); }
static size_t countWords(const FieldRef & f);
- int32_t getCurrentWeight() const { return _currentElementWeight; }
+ int32_t currentWeight() const { return _currentElementWeight; }
FieldSearcher & maxFieldLength(uint32_t maxFieldLength_) { _maxFieldLength = maxFieldLength_; return *this; }
size_t maxFieldLength() const { return _maxFieldLength; }
@@ -91,6 +92,7 @@ private:
virtual void onStructValue(const document::StructFieldValue &) { }
FieldIdT _field;
MatchType _matchType;
+ Normalizing _normalize_mode;
unsigned _maxFieldLength;
uint32_t _currentElementId;
int32_t _currentElementWeight; // Contains the weight of the current item being evaluated.
@@ -104,7 +106,7 @@ protected:
* For each call to onValue() a batch of words are processed, and the position is local to this batch.
**/
void addHit(search::streaming::QueryTerm & qt, uint32_t pos) const {
- qt.add(_words + pos, field(), _currentElementId, getCurrentWeight());
+ qt.add(_words + pos, field(), _currentElementId, _currentElementWeight);
}
public:
static search::byte _foldLowCase[256];
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h
index a01a9cd088d..aaf8b940dc8 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h
@@ -20,7 +20,7 @@ public:
explicit UTF8ExactStringFieldSearcher(FieldIdT fId)
: UTF8StringFieldSearcherBase(fId)
{
- setMatchType(EXACT);
+ match_type(EXACT);
}
};
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h
index ed76fb79f4e..115cddce619 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h
@@ -62,7 +62,6 @@ protected:
SharedSearcherBuf _buf;
using byte = search::byte;
- using Normalizing = search::streaming::Normalizing;
class TokenizeReader {
public:
@@ -121,15 +120,6 @@ protected:
template<typename Reader>
void tokenize(Reader & reader);
- Normalizing normalize_mode() const noexcept {
- switch (match_type()) {
- case EXACT: return Normalizing::LOWERCASE;
- case CASED: return Normalizing::NONE;
- default: return Normalizing::LOWERCASE_AND_FOLD;
- }
- return Normalizing::LOWERCASE_AND_FOLD;
- }
-
/**
* Matches the given query term against the words in the given field reference
* using exact or prefix match strategy.
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h
index c20710e63ab..dc3bc214b49 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h
@@ -3,8 +3,7 @@
#include "utf8stringfieldsearcherbase.h"
-namespace vsm
-{
+namespace vsm {
/**
* This class does suffix utf8 searches.