summaryrefslogtreecommitdiffstats
path: root/streamingvisitors
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-03 10:03:12 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-01-05 08:29:15 +0000
commit192af4443cb572791c8f11520e8ebec4ee4e5a8e (patch)
tree755a603c0fe1b28116a24749f4f919ffee756c84 /streamingvisitors
parentd8b50e4eaea708fed984c7c6ccdd06ac48b358bf (diff)
- Fold query for streaming search based on either query item type, or field definition.
- This ensures that query processing and document processing is symmetric for streaming search. No longer rely on java query processing being symmetric with backend c++ variant. - Indexed search does no normalization in backend and uses query as is.
Diffstat (limited to 'streamingvisitors')
-rw-r--r--streamingvisitors/src/tests/searcher/searcher_test.cpp19
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/querytermdata.h8
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp16
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.h1
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h2
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h11
6 files changed, 43 insertions, 14 deletions
diff --git a/streamingvisitors/src/tests/searcher/searcher_test.cpp b/streamingvisitors/src/tests/searcher/searcher_test.cpp
index 1ce285c2103..83b84fffa11 100644
--- a/streamingvisitors/src/tests/searcher/searcher_test.cpp
+++ b/streamingvisitors/src/tests/searcher/searcher_test.cpp
@@ -21,6 +21,7 @@ using namespace document;
using search::streaming::HitList;
using search::streaming::QueryNodeResultFactory;
using search::streaming::QueryTerm;
+using search::streaming::Normalizing;
using search::streaming::QueryTermList;
using TermType = QueryTerm::Type;
using namespace vsm;
@@ -56,11 +57,11 @@ public:
class Query
{
private:
- void setupQuery(const StringList & terms) {
+ void setupQuery(const StringList & terms, Normalizing normalizing) {
for (const auto & term : terms) {
ParsedQueryTerm pqt = parseQueryTerm(term);
ParsedTerm pt = parseTerm(pqt.second);
- qtv.push_back(std::make_unique<QueryTerm>(eqnr.create(), pt.first, pqt.first.empty() ? "index" : pqt.first, pt.second));
+ qtv.push_back(std::make_unique<QueryTerm>(eqnr.create(), pt.first, pqt.first.empty() ? "index" : pqt.first, pt.second, normalizing));
}
for (const auto & i : qtv) {
qtl.push_back(i.get());
@@ -72,7 +73,9 @@ public:
QueryNodeResultFactory eqnr;
std::vector<QueryTerm::UP> qtv;
QueryTermList qtl;
- explicit Query(const StringList & terms);
+
+ explicit Query(const StringList & terms) : Query(terms, Normalizing::LOWERCASE_AND_FOLD) {}
+ Query(const StringList & terms, Normalizing normalizing);
~Query();
static ParsedQueryTerm parseQueryTerm(const std::string & queryTerm) {
size_t i = queryTerm.find(':');
@@ -94,8 +97,8 @@ public:
}
};
-Query::Query(const StringList & terms) : eqnr(), qtv(), qtl() {
- setupQuery(terms);
+Query::Query(const StringList & terms, Normalizing normalizing) : eqnr(), qtv(), qtl() {
+ setupQuery(terms, normalizing);
}
Query::~Query() = default;
@@ -286,8 +289,8 @@ bool
assertMatchTermSuffix(const std::string & term, const std::string & word)
{
QueryNodeResultFactory eqnr;
- QueryTerm qa(eqnr.create(), term, "index", TermType::WORD);
- QueryTerm qb(eqnr.create(), word, "index", TermType::WORD);
+ QueryTerm qa(eqnr.create(), term, "index", TermType::WORD, Normalizing::LOWERCASE_AND_FOLD);
+ QueryTerm qb(eqnr.create(), word, "index", TermType::WORD, Normalizing::LOWERCASE_AND_FOLD);
const ucs4_t * a;
size_t alen = qa.term(a);
const ucs4_t * b;
@@ -308,7 +311,7 @@ assertNumeric(FieldSearcher & fs, const StringList & query, const FieldValue & f
std::vector<QueryTerm::UP>
performSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv)
{
- Query q(query);
+ Query q(query, fs.exact() ? Normalizing::LOWERCASE : Normalizing::LOWERCASE_AND_FOLD);
// prepare field searcher
test::MockFieldSearcherEnv env;
diff --git a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h
index 36176f70d1d..38d0e942fbc 100644
--- a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h
+++ b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h
@@ -22,17 +22,23 @@ public:
class SearchMethodInfo {
public:
+ using Normalizing = search::streaming::Normalizing;
virtual ~SearchMethodInfo() = default;
virtual bool is_text_matching(vespalib::stringref index) const noexcept = 0;
+ virtual Normalizing normalizing_mode(vespalib::stringref index) const noexcept = 0;
};
class QueryTermDataFactory final : public search::streaming::QueryNodeResultFactory {
public:
+ using Normalizing = search::streaming::Normalizing;
QueryTermDataFactory(const SearchMethodInfo * searchMethodInfo) noexcept : _searchMethodInfo(searchMethodInfo) {}
std::unique_ptr<search::streaming::QueryNodeResultBase> create() const override {
return std::make_unique<QueryTermData>();
}
- bool getRewriteFloatTerms(vespalib::stringref index ) const noexcept override {
+ Normalizing normalizing_mode(vespalib::stringref index) const noexcept override {
+ return _searchMethodInfo ? _searchMethodInfo->normalizing_mode(index) : Normalizing::LOWERCASE_AND_FOLD;
+ }
+ bool allow_float_terms_rewrite(vespalib::stringref index ) const noexcept override {
return _searchMethodInfo && _searchMethodInfo->is_text_matching(index);
}
private:
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
index 49604135afc..4161adaf21f 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -326,6 +326,22 @@ SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept {
return false;
}
+SearchMethodInfo::Normalizing
+SearchVisitor::normalizing_mode(vespalib::stringref index) const noexcept {
+ StringFieldIdTMap fieldIdMap;
+ _fieldSearchSpecMap.addFieldsFromIndex(index, fieldIdMap);
+ size_t num_exact = 0;
+ for (const auto & fieldId : fieldIdMap.map()) {
+ auto found = _fieldSearchSpecMap.specMap().find(fieldId.second);
+ if ((found != _fieldSearchSpecMap.specMap().end()) && found->second.searcher().exact()) {
+ num_exact++;
+ }
+ }
+ return ((num_exact == 0) || (num_exact != fieldIdMap.map().size()))
+ ? Normalizing::LOWERCASE_AND_FOLD
+ : Normalizing::LOWERCASE;
+}
+
void
SearchVisitor::init(const Parameters & params)
{
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
index 709564bcf02..ce40b5ba742 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
@@ -487,6 +487,7 @@ private:
void setupAttributeVector(const vsm::FieldPath &fieldPath);
bool is_text_matching(vespalib::stringref index) const noexcept override;
+ Normalizing normalizing_mode(vespalib::stringref index) const noexcept override;
};
class SearchVisitorFactory : public storage::VisitorFactory {
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
index 43443bd9cf4..e64c41f814f 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
@@ -77,7 +77,7 @@ private:
void onStructStart(const Content & c) override;
public:
- explicit IteratorHandler(FieldSearcher & searcher) : _searcher(searcher) {}
+ explicit IteratorHandler(FieldSearcher & searcher) noexcept : _searcher(searcher) {}
};
friend class IteratorHandler; // to allow calls to onValue();
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h
index 997bed74787..dd6f31581a0 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h
@@ -12,13 +12,16 @@ namespace vsm
class UTF8ExactStringFieldSearcher : public UTF8StringFieldSearcherBase
{
protected:
- virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override;
- virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override;
+ size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override;
+ size_t matchTerms(const FieldRef & f, size_t shortestTerm) override;
public:
std::unique_ptr<FieldSearcher> duplicate() const override;
- UTF8ExactStringFieldSearcher() : UTF8StringFieldSearcherBase() { }
- UTF8ExactStringFieldSearcher(FieldIdT fId) : UTF8StringFieldSearcherBase(fId) { }
+ UTF8ExactStringFieldSearcher(FieldIdT fId)
+ : UTF8StringFieldSearcherBase(fId)
+ {
+ setMatchType(EXACT);
+ }
};
}