diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-02 14:30:46 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-02 19:04:10 +0000 |
commit | d82f792a47954247f3ca105ed2611eed80e86fe2 (patch) | |
tree | f05502e56b8db4e5eab7518f9127564830d545de | |
parent | bdf3c9b825e02605b642d3f9fcd3e4cb81c6e287 (diff) |
Only rewrite numeric terms when searching text fields.
8 files changed, 62 insertions, 28 deletions
diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 08705fa837b..24bd94d2831 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -287,7 +287,7 @@ TEST(StreamingQueryTest, test_query_language) class AllowRewrite : public QueryNodeResultFactory { public: - virtual bool getRewriteFloatTerms() const override { return true; } + bool getRewriteFloatTerms(vespalib::stringref index) const noexcept override { (void) index; return true; } }; const char TERM_UNIQ = static_cast<char>(ParseItem::ITEM_TERM) | static_cast<char>(ParseItem::IF_UNIQUEID); @@ -302,7 +302,7 @@ TEST(StreamingQueryTest, e_is_not_rewritten_even_if_allowed) EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr); - const QueryTerm & qt = static_cast<const QueryTerm &>(root); + const auto & qt = static_cast<const QueryTerm &>(root); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); @@ -318,7 +318,7 @@ TEST(StreamingQueryTest, onedot0e_is_not_rewritten_by_default) EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr); - const QueryTerm & qt = static_cast<const QueryTerm &>(root); + const auto & qt = static_cast<const QueryTerm &>(root); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); @@ -334,29 +334,29 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too) EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast<const EquivQueryNode *>(&root) != nullptr); - const EquivQueryNode & equiv = static_cast<const EquivQueryNode &>(root); + const auto & equiv = static_cast<const EquivQueryNode &>(root); EXPECT_EQ(2u, equiv.size()); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv[0].get()) != nullptr); { - const QueryTerm & qt = static_cast<const QueryTerm &>(*equiv[0]); + const auto & qt = static_cast<const QueryTerm &>(*equiv[0]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); } EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv[1].get()) != nullptr); { - const PhraseQueryNode & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]); + const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]); EXPECT_EQ(2u, phrase.size()); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[0].get()) != nullptr); { - const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[0]); + const auto & qt = static_cast<const QueryTerm &>(*phrase[0]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1"), qt.getTerm()); EXPECT_EQ(0u, qt.uniqueId()); } EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[1].get()) != nullptr); { - const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[1]); + const auto & qt = static_cast<const QueryTerm &>(*phrase[1]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("0e"), qt.getTerm()); EXPECT_EQ(0u, qt.uniqueId()); @@ -460,7 +460,7 @@ TEST(StreamingQueryTest, test_phrase_evaluate) terms[1]->add(1, 5, 0, 1); terms[2]->add(0, 5, 0, 1); HitList hits; - PhraseQueryNode * p = static_cast<PhraseQueryNode *>(phrases[0]); + auto * p = static_cast<PhraseQueryNode *>(phrases[0]); p->evaluateHits(hits); ASSERT_EQ(3u, hits.size()); EXPECT_EQ(hits[0].wordpos(), 2u); @@ -750,7 +750,7 @@ TEST(StreamingQueryTest, require_that_incorrectly_specified_diversity_can_be_par TEST(StreamingQueryTest, require_that_we_do_not_break_the_stack_on_bad_query) { - QueryTermSimple term("<form><iframe+	 +src=\\\"javascript:alert(1)\\\" 	;>", TermType::WORD); + QueryTermSimple term(R"(<form><iframe+	 +src=\"javascript:alert(1)\" 	;>)", TermType::WORD); EXPECT_FALSE(term.isValid()); } @@ -793,7 +793,7 @@ TEST(StreamingQueryTest, test_same_element_evaluate) vespalib::string stackDump = StackDumpCreator::create(*node); QueryNodeResultFactory empty; Query q(empty, stackDump); - SameElementQueryNode * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot()); + auto * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot()); EXPECT_TRUE(sameElem != nullptr); EXPECT_EQ("field", sameElem->getIndex()); EXPECT_EQ(3u, sameElem->size()); diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 6b3aa7a2fd0..6a17d95ebf2 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -156,7 +156,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor qt->setFuzzyMaxEditDistance(queryRep.getFuzzyMaxEditDistance()); qt->setFuzzyPrefixLength(queryRep.getFuzzyPrefixLength()); } - if (possibleFloat(*qt, ssTerm) && factory.getRewriteFloatTerms() && allowRewrite) { + if (possibleFloat(*qt, ssTerm) && factory.getRewriteFloatTerms(ssIndex) && allowRewrite) { auto phrase = std::make_unique<PhraseQueryNode>(); auto dotPos = ssTerm.find('.'); phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD)); diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h index 10f3b7cbf21..d7704fb60e1 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h +++ b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include <vespa/vespalib/stllike/string.h> #include <memory> namespace search::streaming { @@ -20,7 +21,7 @@ public: class QueryNodeResultFactory { public: virtual ~QueryNodeResultFactory() = default; - virtual bool getRewriteFloatTerms() const { return false; } + virtual bool getRewriteFloatTerms(vespalib::stringref index) const noexcept { (void) index; return false; } virtual std::unique_ptr<QueryNodeResultBase> create() const { return {}; } }; } diff --git a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp index 2d138d1d336..93e35e4c6d2 100644 --- a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp +++ b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp @@ -40,7 +40,7 @@ protected: RankProcessorTest::RankProcessorTest() : testing::Test(), - _factory(), + _factory(nullptr), _query(), _query_wrapper() { diff --git a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h index 8c1c3771917..36176f70d1d 100644 --- a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h +++ b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h @@ -17,15 +17,26 @@ private: search::fef::SimpleTermData _termData; public: QueryTermData * clone() const override { return new QueryTermData(); } - search::fef::SimpleTermData &getTermData() { return _termData; } + search::fef::SimpleTermData &getTermData() noexcept { return _termData; } +}; + +class SearchMethodInfo { +public: + virtual ~SearchMethodInfo() = default; + virtual bool is_text_matching(vespalib::stringref index) const noexcept = 0; }; class QueryTermDataFactory final : public search::streaming::QueryNodeResultFactory { public: + QueryTermDataFactory(const SearchMethodInfo * searchMethodInfo) noexcept : _searchMethodInfo(searchMethodInfo) {} std::unique_ptr<search::streaming::QueryNodeResultBase> create() const override { return std::make_unique<QueryTermData>(); } - bool getRewriteFloatTerms() const override { return true; } + bool getRewriteFloatTerms(vespalib::stringref index ) const noexcept override { + return _searchMethodInfo && _searchMethodInfo->is_text_matching(index); + } +private: + const SearchMethodInfo * _searchMethodInfo; }; diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index 4d31c71c0a0..bd22ba65816 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -238,14 +238,16 @@ SearchVisitor::SummaryGenerator::fillSummary(AttributeVector::DocId lid, const H return {}; } -void SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj) +void +SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj) { auto & hitsAggr(static_cast<HitsAggregationResult &>(obj)); hitsAggr.setSummaryGenerator(_summaryGenerator); _numHitsAggregators++; } -bool SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const +bool +SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const { return obj.getClass().inherits(HitsAggregationResult::classId); } @@ -259,7 +261,8 @@ SearchVisitor::GroupingEntry::GroupingEntry(Grouping * grouping) : SearchVisitor::GroupingEntry::~GroupingEntry() = default; -void SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank) +void +SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank) { if (_count < _limit) { _grouping->aggregate(doc, rank); @@ -310,7 +313,15 @@ SearchVisitor::SearchVisitor(StorageComponent& component, LOG(debug, "Created SearchVisitor"); } -void SearchVisitor::init(const Parameters & params) +bool +SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept { + vsm::FieldIdT fId = _fieldSearchSpecMap.nameIdMap().fieldNo(index); + auto found = _fieldSearchSpecMap.specMap().find(fId); + return (found != _fieldSearchSpecMap.specMap().end()) && found->second.uses_string_search_method(); +} + +void +SearchVisitor::init(const Parameters & params) { VISITOR_TRACE(6, "About to lazily init VSM adapter"); _attrMan.add(_documentIdAttributeBacking); @@ -397,7 +408,12 @@ void SearchVisitor::init(const Parameters & params) if ( params.lookup("query", queryBlob) ) { LOG(spam, "Received query blob of %zu bytes", queryBlob.size()); VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size())); - QueryTermDataFactory addOnFactory; + + // Create mapping from field name to field id, from field id to search spec, + // and from index name to list of field ids + _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config()); + + QueryTermDataFactory addOnFactory(this); _query = Query(addOnFactory, vespalib::stringref(queryBlob.data(), queryBlob.size())); _searchBuffer->reserve(0x10000); @@ -414,7 +430,6 @@ void SearchVisitor::init(const Parameters & params) StringFieldIdTMap fieldsInQuery; setupFieldSearchers(additionalFields, fieldsInQuery); - setupScratchDocument(fieldsInQuery); _syntheticFieldsController.setup(_fieldSearchSpecMap.nameIdMap(), fieldsInQuery); @@ -754,9 +769,6 @@ void SearchVisitor::setupFieldSearchers(const std::vector<vespalib::string> & additionalFields, StringFieldIdTMap & fieldsInQuery) { - // Create mapping from field name to field id, from field id to search spec, - // and from index name to list of field ids - _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config()); // Add extra elements to mapping from field name to field id _fieldSearchSpecMap.buildFromConfig(additionalFields); @@ -1145,7 +1157,8 @@ SearchVisitor::fillSortBuffer() return pos; } -void SearchVisitor::completedBucket(const document::BucketId&, HitCounter&) +void +SearchVisitor::completedBucket(const document::BucketId&, HitCounter&) { LOG(debug, "Completed bucket"); } @@ -1157,7 +1170,8 @@ SearchVisitor::generate_query_result(HitCounter& counter) return std::move(_queryResult); } -void SearchVisitor::completedVisitingInternal(HitCounter& hitCounter) +void +SearchVisitor::completedVisitingInternal(HitCounter& hitCounter) { if (!_init_called) { init(_params); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index ef7a41f23a5..76b2016e2e2 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -8,6 +8,7 @@ #include "rankmanager.h" #include "rankprocessor.h" #include "searchenvironment.h" +#include "querytermdata.h" #include <vespa/vsm/common/docsum.h> #include <vespa/vsm/common/documenttypemapping.h> #include <vespa/vsm/common/storagedocument.h> @@ -42,7 +43,8 @@ class SearchEnvironmentSnapshot; * @brief Visitor that applies a search query to visitor data and * converts them to a QueryResultCommand. **/ -class SearchVisitor : public storage::Visitor { +class SearchVisitor : public storage::Visitor, + public SearchMethodInfo { public: SearchVisitor(storage::StorageComponent&, storage::VisitorEnvironment& vEnv, const vdslib::Parameters & params); @@ -488,6 +490,7 @@ private: vsm::StringFieldIdTMapT _fieldsUnion; void setupAttributeVector(const vsm::FieldPath &fieldPath); + bool is_text_matching(vespalib::stringref index) const noexcept override; }; class SearchVisitorFactory : public storage::VisitorFactory { diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h index b0154a82dae..f7ca07b4dc5 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h @@ -23,6 +23,11 @@ public: bool valid() const { return static_cast<bool>(_searcher); } size_t maxLength() const { return _maxLength; } bool uses_nearest_neighbor_search_method() const noexcept { return _searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR; } + bool uses_string_search_method() const noexcept { + return (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::UTF8) || + (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8) || + (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8); + } const vespalib::string& get_arg1() const noexcept { return _arg1; } /** |