summaryrefslogtreecommitdiffstats
path: root/streamingvisitors
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-10 10:59:27 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-01-10 10:59:27 +0000
commit5bdad953f6d91cb26139ef6506c3748531dc708a (patch)
tree3e268a4f3e98ee62a9ed15e3ab3ffe0b38c9579d /streamingvisitors
parent3f7017773ce147a2d65a9835acdfd682dfafd54a (diff)
Use the normalize_mode config.
Diffstat (limited to 'streamingvisitors')
-rw-r--r--streamingvisitors/src/tests/searcher/searcher_test.cpp26
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp13
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp3
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h26
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h2
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h10
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h3
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp21
8 files changed, 45 insertions, 59 deletions
diff --git a/streamingvisitors/src/tests/searcher/searcher_test.cpp b/streamingvisitors/src/tests/searcher/searcher_test.cpp
index a691d7671f9..74d8fdc4bf3 100644
--- a/streamingvisitors/src/tests/searcher/searcher_test.cpp
+++ b/streamingvisitors/src/tests/searcher/searcher_test.cpp
@@ -441,11 +441,11 @@ testStrChrFieldSearcher(StrChrFieldSearcher & fs)
assertString(fs, StringList().add("oper").add("tor"), field, HitsList().add(Hits()).add(Hits()));
assertString(fs, StringList().add("and").add("overloading"), field, HitsList().add(Hits().add(1)).add(Hits().add(3)));
- fs.setMatchType(FieldSearcher::PREFIX);
+ fs.match_type(FieldSearcher::PREFIX);
assertString(fs, "oper", field, Hits().add(0).add(2));
assertString(fs, StringList().add("oper").add("tor"), field, HitsList().add(Hits().add(0).add(2)).add(Hits()));
- fs.setMatchType(FieldSearcher::REGULAR);
+ fs.match_type(FieldSearcher::REGULAR);
if (!EXPECT_TRUE(testStringFieldInfo(fs))) return false;
{ // test handling of several underscores
@@ -554,12 +554,12 @@ TEST("utf8 substring search with empty term")
TEST("utf8 suffix search") {
UTF8SuffixStringFieldSearcher fs(0);
std::string field = "operators and operator overloading";
- assertString(fs, "rsand", field, Hits());
- assertString(fs, "tor", field, Hits().add(2));
- assertString(fs, "tors", field, Hits().add(0));
+ TEST_DO(assertString(fs, "rsand", field, Hits()));
+ TEST_DO(assertString(fs, "tor", field, Hits().add(2)));
+ TEST_DO(assertString(fs, "tors", field, Hits().add(0)));
- assertString(fs, StringList().add("an").add("din"), field, HitsList().add(Hits()).add(Hits()));
- assertString(fs, StringList().add("nd").add("g"), field, HitsList().add(Hits().add(1)).add(Hits().add(3)));
+ TEST_DO(assertString(fs, StringList().add("an").add("din"), field, HitsList().add(Hits()).add(Hits())));
+ TEST_DO(assertString(fs, StringList().add("nd").add("g"), field, HitsList().add(Hits().add(1)).add(Hits().add(3))));
EXPECT_TRUE(testStringFieldInfo(fs));
}
@@ -591,22 +591,22 @@ TEST("utf8 flexible searcher"){
// prefix
assertString(fs, "vesp*", "vespa", Hits().add(0));
- fs.setMatchType(FieldSearcher::PREFIX);
+ fs.match_type(FieldSearcher::PREFIX);
assertString(fs, "vesp", "vespa", Hits().add(0));
// substring
- fs.setMatchType(FieldSearcher::REGULAR);
+ fs.match_type(FieldSearcher::REGULAR);
assertString(fs, "*esp*", "vespa", Hits().add(0));
- fs.setMatchType(FieldSearcher::SUBSTRING);
+ fs.match_type(FieldSearcher::SUBSTRING);
assertString(fs, "esp", "vespa", Hits().add(0));
// suffix
- fs.setMatchType(FieldSearcher::REGULAR);
+ fs.match_type(FieldSearcher::REGULAR);
assertString(fs, "*espa", "vespa", Hits().add(0));
- fs.setMatchType(FieldSearcher::SUFFIX);
+ fs.match_type(FieldSearcher::SUFFIX);
assertString(fs, "espa", "vespa", Hits().add(0));
- fs.setMatchType(FieldSearcher::REGULAR);
+ fs.match_type(FieldSearcher::REGULAR);
EXPECT_TRUE(testStringFieldInfo(fs));
}
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
index 88556778481..cdd1a018d84 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -42,6 +42,7 @@ using search::aggregation::HitsAggregationResult;
using search::attribute::IAttributeVector;
using search::expression::ConfigureStaticParams;
using search::streaming::Query;
+using search::streaming::Normalizing;
using search::streaming::QueryTermList;
using storage::StorageComponent;
using storage::VisitorEnvironment;
@@ -329,11 +330,11 @@ SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept {
namespace {
uint32_t
-count_exact(const vsm::FieldSearchSpecMapT & specMap, const StringFieldIdTMap & fieldIdMap) {
+count_normalize_lowercase(const vsm::FieldSearchSpecMapT & specMap, const StringFieldIdTMap & fieldIdMap) {
size_t count = 0;
for (const auto & fieldId : fieldIdMap.map()) {
auto found = specMap.find(fieldId.second);
- if ((found != specMap.end()) && found->second.searcher().exact()) {
+ if ((found != specMap.end()) && found->second.searcher().normalize_mode() == Normalizing::LOWERCASE) {
count++;
}
}
@@ -341,11 +342,11 @@ count_exact(const vsm::FieldSearchSpecMapT & specMap, const StringFieldIdTMap &
}
uint32_t
-count_cased(const vsm::FieldSearchSpecMapT & specMap, const StringFieldIdTMap & fieldIdMap) {
+count_normalize_none(const vsm::FieldSearchSpecMapT & specMap, const StringFieldIdTMap & fieldIdMap) {
size_t count = 0;
for (const auto & fieldId : fieldIdMap.map()) {
auto found = specMap.find(fieldId.second);
- if ((found != specMap.end()) && found->second.searcher().cased()) {
+ if ((found != specMap.end()) && found->second.searcher().normalize_mode() == Normalizing::NONE) {
count++;
}
}
@@ -358,8 +359,8 @@ SearchMethodInfo::Normalizing
SearchVisitor::normalizing_mode(vespalib::stringref index) const noexcept {
StringFieldIdTMap fieldIdMap;
_fieldSearchSpecMap.addFieldsFromIndex(index, fieldIdMap);
- if (count_cased(_fieldSearchSpecMap.specMap(), fieldIdMap) == fieldIdMap.map().size()) return Normalizing::NONE;
- if (count_exact(_fieldSearchSpecMap.specMap(), fieldIdMap) == fieldIdMap.map().size()) return Normalizing::LOWERCASE;
+ if (count_normalize_none(_fieldSearchSpecMap.specMap(), fieldIdMap) == fieldIdMap.map().size()) return Normalizing::NONE;
+ if (count_normalize_lowercase(_fieldSearchSpecMap.specMap(), fieldIdMap) == fieldIdMap.map().size()) return Normalizing::LOWERCASE;
return Normalizing::LOWERCASE_AND_FOLD;
}
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
index b9e1fe8f83c..5e06ae41a03 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
@@ -51,6 +51,7 @@ FieldSearcher::FieldSearcher(FieldIdT fId, bool defaultPrefix) noexcept
: FieldSearcherBase(),
_field(fId),
_matchType(defaultPrefix ? PREFIX : REGULAR),
+ _normalize_mode(Normalizing::LOWERCASE_AND_FOLD),
_maxFieldLength(0x100000),
_currentElementId(0),
_currentElementWeight(1),
@@ -69,7 +70,7 @@ FieldSearcher::search(const StorageDocument & doc)
fInfo.setHitOffset(qt->getHitList().size());
}
onSearch(doc);
- for(auto qt : _qtl) {
+ for (auto qt : _qtl) {
QueryTerm::FieldInfo & fInfo = qt->getFieldInfo(field());
fInfo.setHitCount(qt->getHitList().size() - fInfo.getHitOffset());
fInfo.setFieldLength(_words);
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
index 75ace16328b..c5bca6f3899 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
@@ -34,13 +34,13 @@ protected:
class FieldSearcher : public FieldSearcherBase
{
public:
+ using Normalizing = search::streaming::Normalizing;
enum MatchType {
REGULAR,
PREFIX,
SUBSTRING,
SUFFIX,
EXACT,
- CASED
};
explicit FieldSearcher(FieldIdT fId) noexcept : FieldSearcher(fId, false) {}
@@ -51,21 +51,22 @@ public:
virtual void prepare(search::streaming::QueryTermList& qtl, const SharedSearcherBuf& buf,
const vsm::FieldPathMapT& field_paths, search::fef::IQueryEnvironment& query_env);
- FieldIdT field() const { return _field; }
- void field(FieldIdT v) { _field = v; prepareFieldId(); }
- bool prefix() const { return _matchType == PREFIX; }
- bool substring() const { return _matchType == SUBSTRING; }
- bool suffix() const { return _matchType == SUFFIX; }
- bool exact() const { return _matchType == EXACT; }
- bool cased() const { return _matchType == CASED; }
- void setMatchType(MatchType mt) { _matchType = mt; }
- MatchType match_type() const noexcept { return _matchType; }
+ FieldIdT field() const noexcept { return _field; }
+ bool prefix() const noexcept { return _matchType == PREFIX; }
+ bool substring() const noexcept { return _matchType == SUBSTRING; }
+ bool suffix() const noexcept { return _matchType == SUFFIX; }
+ bool exact() const noexcept { return _matchType == EXACT; }
+ Normalizing normalize_mode() const noexcept { return _normalize_mode; }
+ MatchType match_type() const noexcept { return _matchType; }
+ void match_type(MatchType mt) noexcept { _matchType = mt; }
+ void normalize_mode(Normalizing mode) noexcept { _normalize_mode = mode; }
+ void field(FieldIdT v) noexcept { _field = v; prepareFieldId(); }
static void init();
static search::byte fold(search::byte c) { return _foldLowCase[c]; }
static search::byte iswordchar(search::byte c) { return _wordChar[c]; }
static search::byte isspace(search::byte c) { return ! iswordchar(c); }
static size_t countWords(const FieldRef & f);
- int32_t getCurrentWeight() const { return _currentElementWeight; }
+ int32_t currentWeight() const { return _currentElementWeight; }
FieldSearcher & maxFieldLength(uint32_t maxFieldLength_) { _maxFieldLength = maxFieldLength_; return *this; }
size_t maxFieldLength() const { return _maxFieldLength; }
@@ -91,6 +92,7 @@ private:
virtual void onStructValue(const document::StructFieldValue &) { }
FieldIdT _field;
MatchType _matchType;
+ Normalizing _normalize_mode;
unsigned _maxFieldLength;
uint32_t _currentElementId;
int32_t _currentElementWeight; // Contains the weight of the current item being evaluated.
@@ -104,7 +106,7 @@ protected:
* For each call to onValue() a batch of words are processed, and the position is local to this batch.
**/
void addHit(search::streaming::QueryTerm & qt, uint32_t pos) const {
- qt.add(_words + pos, field(), _currentElementId, getCurrentWeight());
+ qt.add(_words + pos, field(), _currentElementId, _currentElementWeight);
}
public:
static search::byte _foldLowCase[256];
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h
index a01a9cd088d..aaf8b940dc8 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h
@@ -20,7 +20,7 @@ public:
explicit UTF8ExactStringFieldSearcher(FieldIdT fId)
: UTF8StringFieldSearcherBase(fId)
{
- setMatchType(EXACT);
+ match_type(EXACT);
}
};
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h
index ed76fb79f4e..115cddce619 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h
@@ -62,7 +62,6 @@ protected:
SharedSearcherBuf _buf;
using byte = search::byte;
- using Normalizing = search::streaming::Normalizing;
class TokenizeReader {
public:
@@ -121,15 +120,6 @@ protected:
template<typename Reader>
void tokenize(Reader & reader);
- Normalizing normalize_mode() const noexcept {
- switch (match_type()) {
- case EXACT: return Normalizing::LOWERCASE;
- case CASED: return Normalizing::NONE;
- default: return Normalizing::LOWERCASE_AND_FOLD;
- }
- return Normalizing::LOWERCASE_AND_FOLD;
- }
-
/**
* Matches the given query term against the words in the given field reference
* using exact or prefix match strategy.
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h
index c20710e63ab..dc3bc214b49 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h
@@ -3,8 +3,7 @@
#include "utf8stringfieldsearcherbase.h"
-namespace vsm
-{
+namespace vsm {
/**
* This class does suffix utf8 searches.
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
index 22934ba74d2..715c19a0bb7 100644
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
+++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
@@ -31,17 +31,13 @@ namespace {
void
setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) {
if (arg1 == "prefix") {
- searcher->setMatchType(FieldSearcher::PREFIX);
+ searcher->match_type(FieldSearcher::PREFIX);
} else if (arg1 == "substring") {
- searcher->setMatchType(FieldSearcher::SUBSTRING);
+ searcher->match_type(FieldSearcher::SUBSTRING);
} else if (arg1 == "suffix") {
- searcher->setMatchType(FieldSearcher::SUFFIX);
- } else if (arg1 == "exact") {
- searcher->setMatchType(FieldSearcher::EXACT);
- } else if (arg1 == "word") {
- searcher->setMatchType(FieldSearcher::EXACT);
- } else if (arg1 == "cased") {
- searcher->setMatchType(FieldSearcher::CASED);
+ searcher->match_type(FieldSearcher::SUFFIX);
+ } else if ((arg1 == "exact") || (arg1 == "word")) {
+ searcher->match_type(FieldSearcher::EXACT);
}
}
@@ -86,12 +82,8 @@ FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string &
_searcher = std::make_unique<UTF8SubStringFieldSearcher>(fid);
} else if (_arg1 == "suffix") {
_searcher = std::make_unique<UTF8SuffixStringFieldSearcher>(fid);
- } else if (_arg1 == "exact") {
+ } else if ((_arg1 == "exact") || (_arg1 == "word")) {
_searcher = std::make_unique<UTF8ExactStringFieldSearcher>(fid);
- } else if (_arg1 == "word") {
- _searcher = std::make_unique<UTF8ExactStringFieldSearcher>(fid);
- } else if (_arg1 == "cased") {
- _searcher = std::make_unique<UTF8StrChrFieldSearcher>(fid);
} else if (searchDef == VsmfieldsConfig::Fieldspec::Searchmethod::UTF8) {
_searcher = std::make_unique<UTF8StrChrFieldSearcher>(fid);
} else {
@@ -124,6 +116,7 @@ FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string &
if (_searcher) {
setMatchType(_searcher, _arg1);
_searcher->maxFieldLength(maxLength());
+ _searcher->normalize_mode(_normalize_mode);
}
}