From d82f792a47954247f3ca105ed2611eed80e86fe2 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Tue, 2 Jan 2024 14:30:46 +0000 Subject: Only rewrite numeric terms when searching text fields. --- .../tests/rank_processor/rank_processor_test.cpp | 2 +- .../src/vespa/searchvisitor/querytermdata.h | 15 +++++++-- .../src/vespa/searchvisitor/searchvisitor.cpp | 36 +++++++++++++++------- .../src/vespa/searchvisitor/searchvisitor.h | 5 ++- .../src/vespa/vsm/vsm/fieldsearchspec.h | 5 +++ 5 files changed, 48 insertions(+), 15 deletions(-) (limited to 'streamingvisitors') diff --git a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp index 2d138d1d336..93e35e4c6d2 100644 --- a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp +++ b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp @@ -40,7 +40,7 @@ protected: RankProcessorTest::RankProcessorTest() : testing::Test(), - _factory(), + _factory(nullptr), _query(), _query_wrapper() { diff --git a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h index 8c1c3771917..36176f70d1d 100644 --- a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h +++ b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h @@ -17,15 +17,26 @@ private: search::fef::SimpleTermData _termData; public: QueryTermData * clone() const override { return new QueryTermData(); } - search::fef::SimpleTermData &getTermData() { return _termData; } + search::fef::SimpleTermData &getTermData() noexcept { return _termData; } +}; + +class SearchMethodInfo { +public: + virtual ~SearchMethodInfo() = default; + virtual bool is_text_matching(vespalib::stringref index) const noexcept = 0; }; class QueryTermDataFactory final : public search::streaming::QueryNodeResultFactory { public: + QueryTermDataFactory(const SearchMethodInfo * searchMethodInfo) noexcept : _searchMethodInfo(searchMethodInfo) {} std::unique_ptr create() const override { return std::make_unique(); } - bool getRewriteFloatTerms() const override { return true; } + bool getRewriteFloatTerms(vespalib::stringref index ) const noexcept override { + return _searchMethodInfo && _searchMethodInfo->is_text_matching(index); + } +private: + const SearchMethodInfo * _searchMethodInfo; }; diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index 4d31c71c0a0..bd22ba65816 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -238,14 +238,16 @@ SearchVisitor::SummaryGenerator::fillSummary(AttributeVector::DocId lid, const H return {}; } -void SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj) +void +SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj) { auto & hitsAggr(static_cast(obj)); hitsAggr.setSummaryGenerator(_summaryGenerator); _numHitsAggregators++; } -bool SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const +bool +SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const { return obj.getClass().inherits(HitsAggregationResult::classId); } @@ -259,7 +261,8 @@ SearchVisitor::GroupingEntry::GroupingEntry(Grouping * grouping) : SearchVisitor::GroupingEntry::~GroupingEntry() = default; -void SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank) +void +SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank) { if (_count < _limit) { _grouping->aggregate(doc, rank); @@ -310,7 +313,15 @@ SearchVisitor::SearchVisitor(StorageComponent& component, LOG(debug, "Created SearchVisitor"); } -void SearchVisitor::init(const Parameters & params) +bool +SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept { + vsm::FieldIdT fId = _fieldSearchSpecMap.nameIdMap().fieldNo(index); + auto found = _fieldSearchSpecMap.specMap().find(fId); + return (found != _fieldSearchSpecMap.specMap().end()) && found->second.uses_string_search_method(); +} + +void +SearchVisitor::init(const Parameters & params) { VISITOR_TRACE(6, "About to lazily init VSM adapter"); _attrMan.add(_documentIdAttributeBacking); @@ -397,7 +408,12 @@ void SearchVisitor::init(const Parameters & params) if ( params.lookup("query", queryBlob) ) { LOG(spam, "Received query blob of %zu bytes", queryBlob.size()); VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size())); - QueryTermDataFactory addOnFactory; + + // Create mapping from field name to field id, from field id to search spec, + // and from index name to list of field ids + _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config()); + + QueryTermDataFactory addOnFactory(this); _query = Query(addOnFactory, vespalib::stringref(queryBlob.data(), queryBlob.size())); _searchBuffer->reserve(0x10000); @@ -414,7 +430,6 @@ void SearchVisitor::init(const Parameters & params) StringFieldIdTMap fieldsInQuery; setupFieldSearchers(additionalFields, fieldsInQuery); - setupScratchDocument(fieldsInQuery); _syntheticFieldsController.setup(_fieldSearchSpecMap.nameIdMap(), fieldsInQuery); @@ -754,9 +769,6 @@ void SearchVisitor::setupFieldSearchers(const std::vector & additionalFields, StringFieldIdTMap & fieldsInQuery) { - // Create mapping from field name to field id, from field id to search spec, - // and from index name to list of field ids - _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config()); // Add extra elements to mapping from field name to field id _fieldSearchSpecMap.buildFromConfig(additionalFields); @@ -1145,7 +1157,8 @@ SearchVisitor::fillSortBuffer() return pos; } -void SearchVisitor::completedBucket(const document::BucketId&, HitCounter&) +void +SearchVisitor::completedBucket(const document::BucketId&, HitCounter&) { LOG(debug, "Completed bucket"); } @@ -1157,7 +1170,8 @@ SearchVisitor::generate_query_result(HitCounter& counter) return std::move(_queryResult); } -void SearchVisitor::completedVisitingInternal(HitCounter& hitCounter) +void +SearchVisitor::completedVisitingInternal(HitCounter& hitCounter) { if (!_init_called) { init(_params); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index ef7a41f23a5..76b2016e2e2 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -8,6 +8,7 @@ #include "rankmanager.h" #include "rankprocessor.h" #include "searchenvironment.h" +#include "querytermdata.h" #include #include #include @@ -42,7 +43,8 @@ class SearchEnvironmentSnapshot; * @brief Visitor that applies a search query to visitor data and * converts them to a QueryResultCommand. **/ -class SearchVisitor : public storage::Visitor { +class SearchVisitor : public storage::Visitor, + public SearchMethodInfo { public: SearchVisitor(storage::StorageComponent&, storage::VisitorEnvironment& vEnv, const vdslib::Parameters & params); @@ -488,6 +490,7 @@ private: vsm::StringFieldIdTMapT _fieldsUnion; void setupAttributeVector(const vsm::FieldPath &fieldPath); + bool is_text_matching(vespalib::stringref index) const noexcept override; }; class SearchVisitorFactory : public storage::VisitorFactory { diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h index b0154a82dae..f7ca07b4dc5 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h @@ -23,6 +23,11 @@ public: bool valid() const { return static_cast(_searcher); } size_t maxLength() const { return _maxLength; } bool uses_nearest_neighbor_search_method() const noexcept { return _searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR; } + bool uses_string_search_method() const noexcept { + return (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::UTF8) || + (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8) || + (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8); + } const vespalib::string& get_arg1() const noexcept { return _arg1; } /** -- cgit v1.2.3