aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-03 00:10:24 +0100
committerGitHub <noreply@github.com>2024-01-03 00:10:24 +0100
commit71b2c35d7904a9cec4357f43ce1d06af3ee6127d (patch)
tree351f15cc3bfd386dea483624aeb4a2e9ca71f5fd
parent23045290247f1ea1654430c28e81e1e64310563e (diff)
parent241b5245ecbb5ca8b44cfe96e9b06ff63ea342e6 (diff)
Merge pull request #29776 from vespa-engine/revert-29775-balder/only-rewrite-numeric-terms-for-text-fieldsv8.281.22
Revert "Balder/only rewrite numeric terms for text fields"
-rw-r--r--searchlib/src/tests/query/streaming_query_test.cpp33
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynode.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h3
-rw-r--r--streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp2
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/querytermdata.h15
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp36
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.h5
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h5
8 files changed, 32 insertions, 69 deletions
diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp
index 0a751e96222..08705fa837b 100644
--- a/searchlib/src/tests/query/streaming_query_test.cpp
+++ b/searchlib/src/tests/query/streaming_query_test.cpp
@@ -287,10 +287,7 @@ TEST(StreamingQueryTest, test_query_language)
class AllowRewrite : public QueryNodeResultFactory
{
public:
- explicit AllowRewrite(vespalib::stringref index) noexcept : _allowedIndex(index) {}
- bool getRewriteFloatTerms(vespalib::stringref index) const noexcept override { return index == _allowedIndex; }
-private:
- vespalib::string _allowedIndex;
+ virtual bool getRewriteFloatTerms() const override { return true; }
};
const char TERM_UNIQ = static_cast<char>(ParseItem::ITEM_TERM) | static_cast<char>(ParseItem::IF_UNIQUEID);
@@ -300,12 +297,12 @@ TEST(StreamingQueryTest, e_is_not_rewritten_even_if_allowed)
const char term[6] = {TERM_UNIQ, 3, 1, 'c', 1, 'e'};
vespalib::stringref stackDump(term, sizeof(term));
EXPECT_EQ(6u, stackDump.size());
- AllowRewrite allowRewrite("c");
+ AllowRewrite allowRewrite;
const Query q(allowRewrite, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr);
- const auto & qt = static_cast<const QueryTerm &>(root);
+ const QueryTerm & qt = static_cast<const QueryTerm &>(root);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("e"), qt.getTerm());
EXPECT_EQ(3u, qt.uniqueId());
@@ -316,12 +313,12 @@ TEST(StreamingQueryTest, onedot0e_is_not_rewritten_by_default)
const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'};
vespalib::stringref stackDump(term, sizeof(term));
EXPECT_EQ(9u, stackDump.size());
- AllowRewrite empty("nix");
+ QueryNodeResultFactory empty;
const Query q(empty, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr);
- const auto & qt = static_cast<const QueryTerm &>(root);
+ const QueryTerm & qt = static_cast<const QueryTerm &>(root);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm());
EXPECT_EQ(3u, qt.uniqueId());
@@ -332,34 +329,34 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too)
const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'};
vespalib::stringref stackDump(term, sizeof(term));
EXPECT_EQ(9u, stackDump.size());
- AllowRewrite empty("c");
+ AllowRewrite empty;
const Query q(empty, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
EXPECT_TRUE(dynamic_cast<const EquivQueryNode *>(&root) != nullptr);
- const auto & equiv = static_cast<const EquivQueryNode &>(root);
+ const EquivQueryNode & equiv = static_cast<const EquivQueryNode &>(root);
EXPECT_EQ(2u, equiv.size());
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv[0].get()) != nullptr);
{
- const auto & qt = static_cast<const QueryTerm &>(*equiv[0]);
+ const QueryTerm & qt = static_cast<const QueryTerm &>(*equiv[0]);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm());
EXPECT_EQ(3u, qt.uniqueId());
}
EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv[1].get()) != nullptr);
{
- const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]);
+ const PhraseQueryNode & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]);
EXPECT_EQ(2u, phrase.size());
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[0].get()) != nullptr);
{
- const auto & qt = static_cast<const QueryTerm &>(*phrase[0]);
+ const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[0]);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("1"), qt.getTerm());
EXPECT_EQ(0u, qt.uniqueId());
}
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[1].get()) != nullptr);
{
- const auto & qt = static_cast<const QueryTerm &>(*phrase[1]);
+ const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[1]);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("0e"), qt.getTerm());
EXPECT_EQ(0u, qt.uniqueId());
@@ -463,7 +460,7 @@ TEST(StreamingQueryTest, test_phrase_evaluate)
terms[1]->add(1, 5, 0, 1);
terms[2]->add(0, 5, 0, 1);
HitList hits;
- auto * p = static_cast<PhraseQueryNode *>(phrases[0]);
+ PhraseQueryNode * p = static_cast<PhraseQueryNode *>(phrases[0]);
p->evaluateHits(hits);
ASSERT_EQ(3u, hits.size());
EXPECT_EQ(hits[0].wordpos(), 2u);
@@ -753,7 +750,7 @@ TEST(StreamingQueryTest, require_that_incorrectly_specified_diversity_can_be_par
TEST(StreamingQueryTest, require_that_we_do_not_break_the_stack_on_bad_query)
{
- QueryTermSimple term(R"(<form><iframe+&#09;&#10;&#11;+src=\"javascript&#58;alert(1)\"&#11;&#10;&#09;;>)", TermType::WORD);
+ QueryTermSimple term("<form><iframe+&#09;&#10;&#11;+src=\\\"javascript&#58;alert(1)\\\"&#11;&#10;&#09;;>", TermType::WORD);
EXPECT_FALSE(term.isValid());
}
@@ -762,7 +759,7 @@ TEST(StreamingQueryTest, a_unhandled_sameElement_stack)
const char * stack = "\022\002\026xyz_abcdefghij_xyzxyzxQ\001\vxxxxxx_name\034xxxxxx_xxxx_xxxxxxx_xxxxxxxxE\002\005delta\b<0.00393";
vespalib::stringref stackDump(stack);
EXPECT_EQ(85u, stackDump.size());
- AllowRewrite empty("");
+ AllowRewrite empty;
const Query q(empty, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
@@ -796,7 +793,7 @@ TEST(StreamingQueryTest, test_same_element_evaluate)
vespalib::string stackDump = StackDumpCreator::create(*node);
QueryNodeResultFactory empty;
Query q(empty, stackDump);
- auto * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot());
+ SameElementQueryNode * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot());
EXPECT_TRUE(sameElem != nullptr);
EXPECT_EQ("field", sameElem->getIndex());
EXPECT_EQ(3u, sameElem->size());
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
index 6a17d95ebf2..6b3aa7a2fd0 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
@@ -156,7 +156,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
qt->setFuzzyMaxEditDistance(queryRep.getFuzzyMaxEditDistance());
qt->setFuzzyPrefixLength(queryRep.getFuzzyPrefixLength());
}
- if (possibleFloat(*qt, ssTerm) && factory.getRewriteFloatTerms(ssIndex) && allowRewrite) {
+ if (possibleFloat(*qt, ssTerm) && factory.getRewriteFloatTerms() && allowRewrite) {
auto phrase = std::make_unique<PhraseQueryNode>();
auto dotPos = ssTerm.find('.');
phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD));
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h
index d7704fb60e1..10f3b7cbf21 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h
@@ -1,7 +1,6 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
-#include <vespa/vespalib/stllike/string.h>
#include <memory>
namespace search::streaming {
@@ -21,7 +20,7 @@ public:
class QueryNodeResultFactory {
public:
virtual ~QueryNodeResultFactory() = default;
- virtual bool getRewriteFloatTerms(vespalib::stringref index) const noexcept { (void) index; return false; }
+ virtual bool getRewriteFloatTerms() const { return false; }
virtual std::unique_ptr<QueryNodeResultBase> create() const { return {}; }
};
}
diff --git a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp
index 93e35e4c6d2..2d138d1d336 100644
--- a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp
+++ b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp
@@ -40,7 +40,7 @@ protected:
RankProcessorTest::RankProcessorTest()
: testing::Test(),
- _factory(nullptr),
+ _factory(),
_query(),
_query_wrapper()
{
diff --git a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h
index 36176f70d1d..8c1c3771917 100644
--- a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h
+++ b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h
@@ -17,26 +17,15 @@ private:
search::fef::SimpleTermData _termData;
public:
QueryTermData * clone() const override { return new QueryTermData(); }
- search::fef::SimpleTermData &getTermData() noexcept { return _termData; }
-};
-
-class SearchMethodInfo {
-public:
- virtual ~SearchMethodInfo() = default;
- virtual bool is_text_matching(vespalib::stringref index) const noexcept = 0;
+ search::fef::SimpleTermData &getTermData() { return _termData; }
};
class QueryTermDataFactory final : public search::streaming::QueryNodeResultFactory {
public:
- QueryTermDataFactory(const SearchMethodInfo * searchMethodInfo) noexcept : _searchMethodInfo(searchMethodInfo) {}
std::unique_ptr<search::streaming::QueryNodeResultBase> create() const override {
return std::make_unique<QueryTermData>();
}
- bool getRewriteFloatTerms(vespalib::stringref index ) const noexcept override {
- return _searchMethodInfo && _searchMethodInfo->is_text_matching(index);
- }
-private:
- const SearchMethodInfo * _searchMethodInfo;
+ bool getRewriteFloatTerms() const override { return true; }
};
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
index bd22ba65816..4d31c71c0a0 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -238,16 +238,14 @@ SearchVisitor::SummaryGenerator::fillSummary(AttributeVector::DocId lid, const H
return {};
}
-void
-SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj)
+void SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj)
{
auto & hitsAggr(static_cast<HitsAggregationResult &>(obj));
hitsAggr.setSummaryGenerator(_summaryGenerator);
_numHitsAggregators++;
}
-bool
-SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const
+bool SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const
{
return obj.getClass().inherits(HitsAggregationResult::classId);
}
@@ -261,8 +259,7 @@ SearchVisitor::GroupingEntry::GroupingEntry(Grouping * grouping) :
SearchVisitor::GroupingEntry::~GroupingEntry() = default;
-void
-SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank)
+void SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank)
{
if (_count < _limit) {
_grouping->aggregate(doc, rank);
@@ -313,15 +310,7 @@ SearchVisitor::SearchVisitor(StorageComponent& component,
LOG(debug, "Created SearchVisitor");
}
-bool
-SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept {
- vsm::FieldIdT fId = _fieldSearchSpecMap.nameIdMap().fieldNo(index);
- auto found = _fieldSearchSpecMap.specMap().find(fId);
- return (found != _fieldSearchSpecMap.specMap().end()) && found->second.uses_string_search_method();
-}
-
-void
-SearchVisitor::init(const Parameters & params)
+void SearchVisitor::init(const Parameters & params)
{
VISITOR_TRACE(6, "About to lazily init VSM adapter");
_attrMan.add(_documentIdAttributeBacking);
@@ -408,12 +397,7 @@ SearchVisitor::init(const Parameters & params)
if ( params.lookup("query", queryBlob) ) {
LOG(spam, "Received query blob of %zu bytes", queryBlob.size());
VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size()));
-
- // Create mapping from field name to field id, from field id to search spec,
- // and from index name to list of field ids
- _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config());
-
- QueryTermDataFactory addOnFactory(this);
+ QueryTermDataFactory addOnFactory;
_query = Query(addOnFactory, vespalib::stringref(queryBlob.data(), queryBlob.size()));
_searchBuffer->reserve(0x10000);
@@ -430,6 +414,7 @@ SearchVisitor::init(const Parameters & params)
StringFieldIdTMap fieldsInQuery;
setupFieldSearchers(additionalFields, fieldsInQuery);
+
setupScratchDocument(fieldsInQuery);
_syntheticFieldsController.setup(_fieldSearchSpecMap.nameIdMap(), fieldsInQuery);
@@ -769,6 +754,9 @@ void
SearchVisitor::setupFieldSearchers(const std::vector<vespalib::string> & additionalFields,
StringFieldIdTMap & fieldsInQuery)
{
+ // Create mapping from field name to field id, from field id to search spec,
+ // and from index name to list of field ids
+ _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config());
// Add extra elements to mapping from field name to field id
_fieldSearchSpecMap.buildFromConfig(additionalFields);
@@ -1157,8 +1145,7 @@ SearchVisitor::fillSortBuffer()
return pos;
}
-void
-SearchVisitor::completedBucket(const document::BucketId&, HitCounter&)
+void SearchVisitor::completedBucket(const document::BucketId&, HitCounter&)
{
LOG(debug, "Completed bucket");
}
@@ -1170,8 +1157,7 @@ SearchVisitor::generate_query_result(HitCounter& counter)
return std::move(_queryResult);
}
-void
-SearchVisitor::completedVisitingInternal(HitCounter& hitCounter)
+void SearchVisitor::completedVisitingInternal(HitCounter& hitCounter)
{
if (!_init_called) {
init(_params);
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
index 76b2016e2e2..ef7a41f23a5 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
@@ -8,7 +8,6 @@
#include "rankmanager.h"
#include "rankprocessor.h"
#include "searchenvironment.h"
-#include "querytermdata.h"
#include <vespa/vsm/common/docsum.h>
#include <vespa/vsm/common/documenttypemapping.h>
#include <vespa/vsm/common/storagedocument.h>
@@ -43,8 +42,7 @@ class SearchEnvironmentSnapshot;
* @brief Visitor that applies a search query to visitor data and
* converts them to a QueryResultCommand.
**/
-class SearchVisitor : public storage::Visitor,
- public SearchMethodInfo {
+class SearchVisitor : public storage::Visitor {
public:
SearchVisitor(storage::StorageComponent&, storage::VisitorEnvironment& vEnv,
const vdslib::Parameters & params);
@@ -490,7 +488,6 @@ private:
vsm::StringFieldIdTMapT _fieldsUnion;
void setupAttributeVector(const vsm::FieldPath &fieldPath);
- bool is_text_matching(vespalib::stringref index) const noexcept override;
};
class SearchVisitorFactory : public storage::VisitorFactory {
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
index f7ca07b4dc5..b0154a82dae 100644
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
+++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
@@ -23,11 +23,6 @@ public:
bool valid() const { return static_cast<bool>(_searcher); }
size_t maxLength() const { return _maxLength; }
bool uses_nearest_neighbor_search_method() const noexcept { return _searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR; }
- bool uses_string_search_method() const noexcept {
- return (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::UTF8) ||
- (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8) ||
- (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8);
- }
const vespalib::string& get_arg1() const noexcept { return _arg1; }
/**