From 241b5245ecbb5ca8b44cfe96e9b06ff63ea342e6 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Wed, 3 Jan 2024 00:09:21 +0100 Subject: Revert "Balder/only rewrite numeric terms for text fields" --- searchlib/src/tests/query/streaming_query_test.cpp | 33 +++++++++----------- .../vespa/searchlib/query/streaming/querynode.cpp | 2 +- .../query/streaming/querynoderesultbase.h | 3 +- .../tests/rank_processor/rank_processor_test.cpp | 2 +- .../src/vespa/searchvisitor/querytermdata.h | 15 ++------- .../src/vespa/searchvisitor/searchvisitor.cpp | 36 +++++++--------------- .../src/vespa/searchvisitor/searchvisitor.h | 5 +-- .../src/vespa/vsm/vsm/fieldsearchspec.h | 5 --- 8 files changed, 32 insertions(+), 69 deletions(-) diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 0a751e96222..08705fa837b 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -287,10 +287,7 @@ TEST(StreamingQueryTest, test_query_language) class AllowRewrite : public QueryNodeResultFactory { public: - explicit AllowRewrite(vespalib::stringref index) noexcept : _allowedIndex(index) {} - bool getRewriteFloatTerms(vespalib::stringref index) const noexcept override { return index == _allowedIndex; } -private: - vespalib::string _allowedIndex; + virtual bool getRewriteFloatTerms() const override { return true; } }; const char TERM_UNIQ = static_cast(ParseItem::ITEM_TERM) | static_cast(ParseItem::IF_UNIQUEID); @@ -300,12 +297,12 @@ TEST(StreamingQueryTest, e_is_not_rewritten_even_if_allowed) const char term[6] = {TERM_UNIQ, 3, 1, 'c', 1, 'e'}; vespalib::stringref stackDump(term, sizeof(term)); EXPECT_EQ(6u, stackDump.size()); - AllowRewrite allowRewrite("c"); + AllowRewrite allowRewrite; const Query q(allowRewrite, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast(&root) != nullptr); - const auto & qt = static_cast(root); + const QueryTerm & qt = static_cast(root); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); @@ -316,12 +313,12 @@ TEST(StreamingQueryTest, onedot0e_is_not_rewritten_by_default) const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'}; vespalib::stringref stackDump(term, sizeof(term)); EXPECT_EQ(9u, stackDump.size()); - AllowRewrite empty("nix"); + QueryNodeResultFactory empty; const Query q(empty, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast(&root) != nullptr); - const auto & qt = static_cast(root); + const QueryTerm & qt = static_cast(root); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); @@ -332,34 +329,34 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too) const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'}; vespalib::stringref stackDump(term, sizeof(term)); EXPECT_EQ(9u, stackDump.size()); - AllowRewrite empty("c"); + AllowRewrite empty; const Query q(empty, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast(&root) != nullptr); - const auto & equiv = static_cast(root); + const EquivQueryNode & equiv = static_cast(root); EXPECT_EQ(2u, equiv.size()); EXPECT_TRUE(dynamic_cast(equiv[0].get()) != nullptr); { - const auto & qt = static_cast(*equiv[0]); + const QueryTerm & qt = static_cast(*equiv[0]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); } EXPECT_TRUE(dynamic_cast(equiv[1].get()) != nullptr); { - const auto & phrase = static_cast(*equiv[1]); + const PhraseQueryNode & phrase = static_cast(*equiv[1]); EXPECT_EQ(2u, phrase.size()); EXPECT_TRUE(dynamic_cast(phrase[0].get()) != nullptr); { - const auto & qt = static_cast(*phrase[0]); + const QueryTerm & qt = static_cast(*phrase[0]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1"), qt.getTerm()); EXPECT_EQ(0u, qt.uniqueId()); } EXPECT_TRUE(dynamic_cast(phrase[1].get()) != nullptr); { - const auto & qt = static_cast(*phrase[1]); + const QueryTerm & qt = static_cast(*phrase[1]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("0e"), qt.getTerm()); EXPECT_EQ(0u, qt.uniqueId()); @@ -463,7 +460,7 @@ TEST(StreamingQueryTest, test_phrase_evaluate) terms[1]->add(1, 5, 0, 1); terms[2]->add(0, 5, 0, 1); HitList hits; - auto * p = static_cast(phrases[0]); + PhraseQueryNode * p = static_cast(phrases[0]); p->evaluateHits(hits); ASSERT_EQ(3u, hits.size()); EXPECT_EQ(hits[0].wordpos(), 2u); @@ -753,7 +750,7 @@ TEST(StreamingQueryTest, require_that_incorrectly_specified_diversity_can_be_par TEST(StreamingQueryTest, require_that_we_do_not_break_the_stack_on_bad_query) { - QueryTermSimple term(R"(
)", TermType::WORD); + QueryTermSimple term("", TermType::WORD); EXPECT_FALSE(term.isValid()); } @@ -762,7 +759,7 @@ TEST(StreamingQueryTest, a_unhandled_sameElement_stack) const char * stack = "\022\002\026xyz_abcdefghij_xyzxyzxQ\001\vxxxxxx_name\034xxxxxx_xxxx_xxxxxxx_xxxxxxxxE\002\005delta\b<0.00393"; vespalib::stringref stackDump(stack); EXPECT_EQ(85u, stackDump.size()); - AllowRewrite empty(""); + AllowRewrite empty; const Query q(empty, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); @@ -796,7 +793,7 @@ TEST(StreamingQueryTest, test_same_element_evaluate) vespalib::string stackDump = StackDumpCreator::create(*node); QueryNodeResultFactory empty; Query q(empty, stackDump); - auto * sameElem = dynamic_cast(&q.getRoot()); + SameElementQueryNode * sameElem = dynamic_cast(&q.getRoot()); EXPECT_TRUE(sameElem != nullptr); EXPECT_EQ("field", sameElem->getIndex()); EXPECT_EQ(3u, sameElem->size()); diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 6a17d95ebf2..6b3aa7a2fd0 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -156,7 +156,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor qt->setFuzzyMaxEditDistance(queryRep.getFuzzyMaxEditDistance()); qt->setFuzzyPrefixLength(queryRep.getFuzzyPrefixLength()); } - if (possibleFloat(*qt, ssTerm) && factory.getRewriteFloatTerms(ssIndex) && allowRewrite) { + if (possibleFloat(*qt, ssTerm) && factory.getRewriteFloatTerms() && allowRewrite) { auto phrase = std::make_unique(); auto dotPos = ssTerm.find('.'); phrase->addChild(std::make_unique(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD)); diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h index d7704fb60e1..10f3b7cbf21 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h +++ b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h @@ -1,7 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include #include namespace search::streaming { @@ -21,7 +20,7 @@ public: class QueryNodeResultFactory { public: virtual ~QueryNodeResultFactory() = default; - virtual bool getRewriteFloatTerms(vespalib::stringref index) const noexcept { (void) index; return false; } + virtual bool getRewriteFloatTerms() const { return false; } virtual std::unique_ptr create() const { return {}; } }; } diff --git a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp index 93e35e4c6d2..2d138d1d336 100644 --- a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp +++ b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp @@ -40,7 +40,7 @@ protected: RankProcessorTest::RankProcessorTest() : testing::Test(), - _factory(nullptr), + _factory(), _query(), _query_wrapper() { diff --git a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h index 36176f70d1d..8c1c3771917 100644 --- a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h +++ b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h @@ -17,26 +17,15 @@ private: search::fef::SimpleTermData _termData; public: QueryTermData * clone() const override { return new QueryTermData(); } - search::fef::SimpleTermData &getTermData() noexcept { return _termData; } -}; - -class SearchMethodInfo { -public: - virtual ~SearchMethodInfo() = default; - virtual bool is_text_matching(vespalib::stringref index) const noexcept = 0; + search::fef::SimpleTermData &getTermData() { return _termData; } }; class QueryTermDataFactory final : public search::streaming::QueryNodeResultFactory { public: - QueryTermDataFactory(const SearchMethodInfo * searchMethodInfo) noexcept : _searchMethodInfo(searchMethodInfo) {} std::unique_ptr create() const override { return std::make_unique(); } - bool getRewriteFloatTerms(vespalib::stringref index ) const noexcept override { - return _searchMethodInfo && _searchMethodInfo->is_text_matching(index); - } -private: - const SearchMethodInfo * _searchMethodInfo; + bool getRewriteFloatTerms() const override { return true; } }; diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index bd22ba65816..4d31c71c0a0 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -238,16 +238,14 @@ SearchVisitor::SummaryGenerator::fillSummary(AttributeVector::DocId lid, const H return {}; } -void -SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj) +void SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj) { auto & hitsAggr(static_cast(obj)); hitsAggr.setSummaryGenerator(_summaryGenerator); _numHitsAggregators++; } -bool -SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const +bool SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const { return obj.getClass().inherits(HitsAggregationResult::classId); } @@ -261,8 +259,7 @@ SearchVisitor::GroupingEntry::GroupingEntry(Grouping * grouping) : SearchVisitor::GroupingEntry::~GroupingEntry() = default; -void -SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank) +void SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank) { if (_count < _limit) { _grouping->aggregate(doc, rank); @@ -313,15 +310,7 @@ SearchVisitor::SearchVisitor(StorageComponent& component, LOG(debug, "Created SearchVisitor"); } -bool -SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept { - vsm::FieldIdT fId = _fieldSearchSpecMap.nameIdMap().fieldNo(index); - auto found = _fieldSearchSpecMap.specMap().find(fId); - return (found != _fieldSearchSpecMap.specMap().end()) && found->second.uses_string_search_method(); -} - -void -SearchVisitor::init(const Parameters & params) +void SearchVisitor::init(const Parameters & params) { VISITOR_TRACE(6, "About to lazily init VSM adapter"); _attrMan.add(_documentIdAttributeBacking); @@ -408,12 +397,7 @@ SearchVisitor::init(const Parameters & params) if ( params.lookup("query", queryBlob) ) { LOG(spam, "Received query blob of %zu bytes", queryBlob.size()); VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size())); - - // Create mapping from field name to field id, from field id to search spec, - // and from index name to list of field ids - _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config()); - - QueryTermDataFactory addOnFactory(this); + QueryTermDataFactory addOnFactory; _query = Query(addOnFactory, vespalib::stringref(queryBlob.data(), queryBlob.size())); _searchBuffer->reserve(0x10000); @@ -430,6 +414,7 @@ SearchVisitor::init(const Parameters & params) StringFieldIdTMap fieldsInQuery; setupFieldSearchers(additionalFields, fieldsInQuery); + setupScratchDocument(fieldsInQuery); _syntheticFieldsController.setup(_fieldSearchSpecMap.nameIdMap(), fieldsInQuery); @@ -769,6 +754,9 @@ void SearchVisitor::setupFieldSearchers(const std::vector & additionalFields, StringFieldIdTMap & fieldsInQuery) { + // Create mapping from field name to field id, from field id to search spec, + // and from index name to list of field ids + _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config()); // Add extra elements to mapping from field name to field id _fieldSearchSpecMap.buildFromConfig(additionalFields); @@ -1157,8 +1145,7 @@ SearchVisitor::fillSortBuffer() return pos; } -void -SearchVisitor::completedBucket(const document::BucketId&, HitCounter&) +void SearchVisitor::completedBucket(const document::BucketId&, HitCounter&) { LOG(debug, "Completed bucket"); } @@ -1170,8 +1157,7 @@ SearchVisitor::generate_query_result(HitCounter& counter) return std::move(_queryResult); } -void -SearchVisitor::completedVisitingInternal(HitCounter& hitCounter) +void SearchVisitor::completedVisitingInternal(HitCounter& hitCounter) { if (!_init_called) { init(_params); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index 76b2016e2e2..ef7a41f23a5 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -8,7 +8,6 @@ #include "rankmanager.h" #include "rankprocessor.h" #include "searchenvironment.h" -#include "querytermdata.h" #include #include #include @@ -43,8 +42,7 @@ class SearchEnvironmentSnapshot; * @brief Visitor that applies a search query to visitor data and * converts them to a QueryResultCommand. **/ -class SearchVisitor : public storage::Visitor, - public SearchMethodInfo { +class SearchVisitor : public storage::Visitor { public: SearchVisitor(storage::StorageComponent&, storage::VisitorEnvironment& vEnv, const vdslib::Parameters & params); @@ -490,7 +488,6 @@ private: vsm::StringFieldIdTMapT _fieldsUnion; void setupAttributeVector(const vsm::FieldPath &fieldPath); - bool is_text_matching(vespalib::stringref index) const noexcept override; }; class SearchVisitorFactory : public storage::VisitorFactory { diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h index f7ca07b4dc5..b0154a82dae 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h @@ -23,11 +23,6 @@ public: bool valid() const { return static_cast(_searcher); } size_t maxLength() const { return _maxLength; } bool uses_nearest_neighbor_search_method() const noexcept { return _searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR; } - bool uses_string_search_method() const noexcept { - return (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::UTF8) || - (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8) || - (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8); - } const vespalib::string& get_arg1() const noexcept { return _arg1; } /** -- cgit v1.2.3