diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-05-12 14:02:28 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-05-12 14:02:28 +0200 |
commit | 9eec0597af1c91c7f292a2ff9b512371f1fc108f (patch) | |
tree | 7f4fa2d42ffd78ec607af1f825adcc2dd6df7216 | |
parent | f2334d2df98f96eb66bf097a1c4bbc0f89ef4b3e (diff) |
Add attribute access recorder for streaming search mode. Use it to
determine which attributes to populate during a streaming search.
10 files changed, 121 insertions, 37 deletions
diff --git a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt index 06caf080923..4911b0693e4 100644 --- a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt +++ b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt @@ -1,6 +1,7 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(streamingvisitors SOURCES + attribute_access_recorder.cpp hitcollector.cpp indexenvironment.cpp matching_elements_filler.cpp diff --git a/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.cpp b/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.cpp new file mode 100644 index 00000000000..9d520cde187 --- /dev/null +++ b/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.cpp @@ -0,0 +1,67 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "attribute_access_recorder.h" +#include <vespa/vespalib/stllike/hash_set.hpp> + +using search::attribute::IAttributeVector; + +namespace streaming { + +AttributeAccessRecorder::AttributeAccessRecorder(std::unique_ptr<IAttributeContext> ctx) + : _ctx(std::move(ctx)), + _accessed_attributes() +{ +} + +AttributeAccessRecorder::~AttributeAccessRecorder() = default; + +void +AttributeAccessRecorder::asyncForAttribute(const vespalib::string& name, std::unique_ptr<search::attribute::IAttributeFunctor> func) const +{ + _ctx->asyncForAttribute(name, std::move(func)); +} + +const IAttributeVector* +AttributeAccessRecorder::getAttribute(const string& name) const +{ + auto ret = _ctx->getAttribute(name); + if (ret != nullptr) { + _accessed_attributes.insert(name); + } + return ret; +} + +const IAttributeVector* +AttributeAccessRecorder::getAttributeStableEnum(const string& name) const +{ + auto ret = _ctx->getAttributeStableEnum(name); + if (ret != nullptr) { + _accessed_attributes.insert(name); + } + return ret; +} + +void +AttributeAccessRecorder::getAttributeList(std::vector<const IAttributeVector*>& list) const +{ + _ctx->getAttributeList(list); +} + +void +AttributeAccessRecorder::releaseEnumGuards() +{ + _ctx->releaseEnumGuards(); +} + +std::vector<vespalib::string> +AttributeAccessRecorder::get_accessed_attributes() const +{ + std::vector<vespalib::string> result; + result.reserve(_accessed_attributes.size()); + for (auto& attr : _accessed_attributes) { + result.emplace_back(attr); + } + return result; +} + +} diff --git a/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.h b/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.h new file mode 100644 index 00000000000..233c507bda3 --- /dev/null +++ b/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.h @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchcommon/attribute/iattributecontext.h> +#include <vespa/vespalib/stllike/hash_set.h> + +namespace streaming { + +/* + * This class wraps an IAttributeContext and records accesses to attribute + * vectors. + */ +class AttributeAccessRecorder : public search::attribute::IAttributeContext +{ + std::unique_ptr<search::attribute::IAttributeContext> _ctx; + mutable vespalib::hash_set<vespalib::string> _accessed_attributes; + +public: + AttributeAccessRecorder(std::unique_ptr<IAttributeContext> ctx); + ~AttributeAccessRecorder() override; + void asyncForAttribute(const vespalib::string& name, std::unique_ptr<search::attribute::IAttributeFunctor> func) const override; + const search::attribute::IAttributeVector* getAttribute(const string& name) const override; + const search::attribute::IAttributeVector * getAttributeStableEnum(const string& name) const override; + void getAttributeList(std::vector<const search::attribute::IAttributeVector *>& list) const override; + void releaseEnumGuards() override; + std::vector<vespalib::string> get_accessed_attributes() const; +}; + +} diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp index 1242195c9df..0e4c082dea3 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp @@ -13,8 +13,6 @@ IndexEnvironment::IndexEnvironment(const ITableManager & tableManager) : _fields(), _fieldNames(), _motivation(RANK), - _rankAttributes(), - _dumpAttributes(), _ranking_assets_repo() { } @@ -41,15 +39,7 @@ IndexEnvironment::addField(const vespalib::string& name, } void -IndexEnvironment::hintAttributeAccess(const string & name) const { - if (name.empty()) { - return; - } - if (_motivation == RANK) { - _rankAttributes.insert(name); - } else { - _dumpAttributes.insert(name); - } +IndexEnvironment::hintAttributeAccess(const string &) const { } void diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h index 88ea5a5ada8..f741af77e35 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h @@ -29,8 +29,6 @@ private: std::vector<search::fef::FieldInfo> _fields; StringInt32Map _fieldNames; mutable FeatureMotivation _motivation; - mutable std::set<vespalib::string> _rankAttributes; - mutable std::set<vespalib::string> _dumpAttributes; std::shared_ptr<const search::fef::IRankingAssetsRepo> _ranking_assets_repo; public: @@ -88,10 +86,6 @@ public: void set_ranking_assets_repo(std::shared_ptr<const search::fef::IRankingAssetsRepo> ranking_assets_repo); - const std::set<vespalib::string> & getHintedRankAttributes() const { return _rankAttributes; } - - const std::set<vespalib::string> & getHintedDumpAttributes() const { return _dumpAttributes; } - //TODO Wire in proper distribution key uint32_t getDistributionKey() const override { return 0; } diff --git a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp index 0765074e315..306f7f5d655 100644 --- a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp @@ -46,7 +46,7 @@ QueryEnvironment::QueryEnvironment(const string & location_str, const IAttributeManager * attrMgr) : _indexEnv(indexEnv), _properties(properties), - _attrCtx(attrMgr->createContext()), + _attrCtx(std::make_unique<AttributeAccessRecorder>(attrMgr->createContext())), _queryTerms(), _locations(parseLocation(location_str)) { diff --git a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h index dd543a60244..c5dc442e424 100644 --- a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h +++ b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h @@ -2,12 +2,12 @@ #pragma once -#include <vespa/searchcommon/attribute/iattributecontext.h> +#include "attribute_access_recorder.h" +#include "indexenvironment.h" #include <vespa/searchlib/attribute/iattributemanager.h> #include <vespa/searchlib/fef/iindexenvironment.h> #include <vespa/searchlib/fef/iqueryenvironment.h> #include <vespa/searchlib/fef/properties.h> -#include "indexenvironment.h" namespace streaming { @@ -20,7 +20,7 @@ class QueryEnvironment : public search::fef::IQueryEnvironment private: const IndexEnvironment &_indexEnv; const search::fef::Properties &_properties; - search::attribute::IAttributeContext::UP _attrCtx; + std::unique_ptr<AttributeAccessRecorder> _attrCtx; std::vector<const search::fef::ITermData *> _queryTerms; std::vector<search::common::GeoLocationSpec> _locations; @@ -61,6 +61,8 @@ public: virtual const search::fef::IIndexEnvironment & getIndexEnvironment() const override { return _indexEnv; } void addTerm(const search::fef::ITermData *term) { _queryTerms.push_back(term); } + + std::vector<vespalib::string> get_accessed_attributes() const { return _attrCtx->get_accessed_attributes(); } }; } // namespace streaming diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h index 443a2626bf7..4bac204c8e1 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h @@ -78,6 +78,7 @@ public: HitCollector & getHitCollector() { return *_hitCollector; } uint32_t getDocId() const { return _docId; } search::fef::IQueryEnvironment& get_query_env() { return _queryEnv; } + QueryEnvironment& get_real_query_env() { return _queryEnv; } }; } // namespace streaming diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index f9473167b07..c53dfae294a 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -548,11 +548,12 @@ SearchVisitor::PositionInserter::onStructStart(const Content & c) } void -SearchVisitor::RankController::processHintedAttributes(const IndexEnvironment & indexEnv, bool rank, - const search::IAttributeManager & attrMan, - std::vector<AttrInfo> & attributeFields) +SearchVisitor::RankController::processAccessedAttributes(const QueryEnvironment &queryEnv, bool rank, + const search::IAttributeManager & attrMan, + std::vector<AttrInfo> & attributeFields) { - const std::set<vespalib::string> & attributes = (rank ? indexEnv.getHintedRankAttributes() : indexEnv.getHintedDumpAttributes()); + auto attributes = queryEnv.get_accessed_attributes(); + auto& indexEnv = queryEnv.getIndexEnvironment(); for (const vespalib::string & name : attributes) { LOG(debug, "Process attribute access hint (%s): '%s'", rank ? "rank" : "dump", name.c_str()); const search::fef::FieldInfo * fieldInfo = indexEnv.getFieldByName(name); @@ -601,22 +602,18 @@ SearchVisitor::RankController::setupRankProcessors(Query & query, std::vector<AttrInfo> & attributeFields) { _rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile); - - // register attribute vectors needed for ranking - const IndexEnvironment & indexEnv = _rankManagerSnapshot->getIndexEnvironment(_rankProfile); - processHintedAttributes(indexEnv, true, attrMan, attributeFields); - _rankProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan); LOG(debug, "Initialize rank processor"); _rankProcessor->initForRanking(wantedHitCount); + // register attribute vectors needed for ranking + processAccessedAttributes(_rankProcessor->get_real_query_env(), true, attrMan, attributeFields); if (_dumpFeatures) { - // register attribute vectors needed for dumping - processHintedAttributes(indexEnv, false, attrMan, attributeFields); - _dumpProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan); LOG(debug, "Initialize dump processor"); _dumpProcessor->initForDumping(wantedHitCount); + // register attribute vectors needed for dumping + processAccessedAttributes(_dumpProcessor->get_real_query_env(), false, attrMan, attributeFields); } _hasRanking = true; diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index 72cd08ff781..515d032b21b 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -133,11 +133,13 @@ private: RankProcessor::UP _dumpProcessor; /** - * Process attribute hints and add needed attributes to the given list. + * Process attribute accessed and add needed attributes to the + * given list. **/ - static void processHintedAttributes(const IndexEnvironment & indexEnv, bool rank, - const search::IAttributeManager & attrMan, - std::vector<AttrInfo> & attributeFields); + static void processAccessedAttributes(const QueryEnvironment& queryEnv, + bool rank, + const search::IAttributeManager& attrMan, + std::vector<AttrInfo>& attributeFields); public: RankController(); |