aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-05-12 14:02:28 +0200
committerTor Egge <Tor.Egge@online.no>2023-05-12 14:02:28 +0200
commit9eec0597af1c91c7f292a2ff9b512371f1fc108f (patch)
tree7f4fa2d42ffd78ec607af1f825adcc2dd6df7216
parentf2334d2df98f96eb66bf097a1c4bbc0f89ef4b3e (diff)
Add attribute access recorder for streaming search mode. Use it to
determine which attributes to populate during a streaming search.
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt1
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.cpp67
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.h30
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp12
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/indexenvironment.h6
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp2
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/queryenvironment.h8
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.h1
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp21
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.h10
10 files changed, 121 insertions, 37 deletions
diff --git a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt
index 06caf080923..4911b0693e4 100644
--- a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt
+++ b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt
@@ -1,6 +1,7 @@
# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
vespa_add_library(streamingvisitors
SOURCES
+ attribute_access_recorder.cpp
hitcollector.cpp
indexenvironment.cpp
matching_elements_filler.cpp
diff --git a/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.cpp b/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.cpp
new file mode 100644
index 00000000000..9d520cde187
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.cpp
@@ -0,0 +1,67 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "attribute_access_recorder.h"
+#include <vespa/vespalib/stllike/hash_set.hpp>
+
+using search::attribute::IAttributeVector;
+
+namespace streaming {
+
+AttributeAccessRecorder::AttributeAccessRecorder(std::unique_ptr<IAttributeContext> ctx)
+ : _ctx(std::move(ctx)),
+ _accessed_attributes()
+{
+}
+
+AttributeAccessRecorder::~AttributeAccessRecorder() = default;
+
+void
+AttributeAccessRecorder::asyncForAttribute(const vespalib::string& name, std::unique_ptr<search::attribute::IAttributeFunctor> func) const
+{
+ _ctx->asyncForAttribute(name, std::move(func));
+}
+
+const IAttributeVector*
+AttributeAccessRecorder::getAttribute(const string& name) const
+{
+ auto ret = _ctx->getAttribute(name);
+ if (ret != nullptr) {
+ _accessed_attributes.insert(name);
+ }
+ return ret;
+}
+
+const IAttributeVector*
+AttributeAccessRecorder::getAttributeStableEnum(const string& name) const
+{
+ auto ret = _ctx->getAttributeStableEnum(name);
+ if (ret != nullptr) {
+ _accessed_attributes.insert(name);
+ }
+ return ret;
+}
+
+void
+AttributeAccessRecorder::getAttributeList(std::vector<const IAttributeVector*>& list) const
+{
+ _ctx->getAttributeList(list);
+}
+
+void
+AttributeAccessRecorder::releaseEnumGuards()
+{
+ _ctx->releaseEnumGuards();
+}
+
+std::vector<vespalib::string>
+AttributeAccessRecorder::get_accessed_attributes() const
+{
+ std::vector<vespalib::string> result;
+ result.reserve(_accessed_attributes.size());
+ for (auto& attr : _accessed_attributes) {
+ result.emplace_back(attr);
+ }
+ return result;
+}
+
+}
diff --git a/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.h b/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.h
new file mode 100644
index 00000000000..233c507bda3
--- /dev/null
+++ b/streamingvisitors/src/vespa/searchvisitor/attribute_access_recorder.h
@@ -0,0 +1,30 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/vespalib/stllike/hash_set.h>
+
+namespace streaming {
+
+/*
+ * This class wraps an IAttributeContext and records accesses to attribute
+ * vectors.
+ */
+class AttributeAccessRecorder : public search::attribute::IAttributeContext
+{
+ std::unique_ptr<search::attribute::IAttributeContext> _ctx;
+ mutable vespalib::hash_set<vespalib::string> _accessed_attributes;
+
+public:
+ AttributeAccessRecorder(std::unique_ptr<IAttributeContext> ctx);
+ ~AttributeAccessRecorder() override;
+ void asyncForAttribute(const vespalib::string& name, std::unique_ptr<search::attribute::IAttributeFunctor> func) const override;
+ const search::attribute::IAttributeVector* getAttribute(const string& name) const override;
+ const search::attribute::IAttributeVector * getAttributeStableEnum(const string& name) const override;
+ void getAttributeList(std::vector<const search::attribute::IAttributeVector *>& list) const override;
+ void releaseEnumGuards() override;
+ std::vector<vespalib::string> get_accessed_attributes() const;
+};
+
+}
diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp
index 1242195c9df..0e4c082dea3 100644
--- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp
@@ -13,8 +13,6 @@ IndexEnvironment::IndexEnvironment(const ITableManager & tableManager) :
_fields(),
_fieldNames(),
_motivation(RANK),
- _rankAttributes(),
- _dumpAttributes(),
_ranking_assets_repo()
{
}
@@ -41,15 +39,7 @@ IndexEnvironment::addField(const vespalib::string& name,
}
void
-IndexEnvironment::hintAttributeAccess(const string & name) const {
- if (name.empty()) {
- return;
- }
- if (_motivation == RANK) {
- _rankAttributes.insert(name);
- } else {
- _dumpAttributes.insert(name);
- }
+IndexEnvironment::hintAttributeAccess(const string &) const {
}
void
diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h
index 88ea5a5ada8..f741af77e35 100644
--- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h
+++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h
@@ -29,8 +29,6 @@ private:
std::vector<search::fef::FieldInfo> _fields;
StringInt32Map _fieldNames;
mutable FeatureMotivation _motivation;
- mutable std::set<vespalib::string> _rankAttributes;
- mutable std::set<vespalib::string> _dumpAttributes;
std::shared_ptr<const search::fef::IRankingAssetsRepo> _ranking_assets_repo;
public:
@@ -88,10 +86,6 @@ public:
void set_ranking_assets_repo(std::shared_ptr<const search::fef::IRankingAssetsRepo> ranking_assets_repo);
- const std::set<vespalib::string> & getHintedRankAttributes() const { return _rankAttributes; }
-
- const std::set<vespalib::string> & getHintedDumpAttributes() const { return _dumpAttributes; }
-
//TODO Wire in proper distribution key
uint32_t getDistributionKey() const override { return 0; }
diff --git a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp
index 0765074e315..306f7f5d655 100644
--- a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp
@@ -46,7 +46,7 @@ QueryEnvironment::QueryEnvironment(const string & location_str,
const IAttributeManager * attrMgr) :
_indexEnv(indexEnv),
_properties(properties),
- _attrCtx(attrMgr->createContext()),
+ _attrCtx(std::make_unique<AttributeAccessRecorder>(attrMgr->createContext())),
_queryTerms(),
_locations(parseLocation(location_str))
{
diff --git a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h
index dd543a60244..c5dc442e424 100644
--- a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h
+++ b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h
@@ -2,12 +2,12 @@
#pragma once
-#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include "attribute_access_recorder.h"
+#include "indexenvironment.h"
#include <vespa/searchlib/attribute/iattributemanager.h>
#include <vespa/searchlib/fef/iindexenvironment.h>
#include <vespa/searchlib/fef/iqueryenvironment.h>
#include <vespa/searchlib/fef/properties.h>
-#include "indexenvironment.h"
namespace streaming {
@@ -20,7 +20,7 @@ class QueryEnvironment : public search::fef::IQueryEnvironment
private:
const IndexEnvironment &_indexEnv;
const search::fef::Properties &_properties;
- search::attribute::IAttributeContext::UP _attrCtx;
+ std::unique_ptr<AttributeAccessRecorder> _attrCtx;
std::vector<const search::fef::ITermData *> _queryTerms;
std::vector<search::common::GeoLocationSpec> _locations;
@@ -61,6 +61,8 @@ public:
virtual const search::fef::IIndexEnvironment & getIndexEnvironment() const override { return _indexEnv; }
void addTerm(const search::fef::ITermData *term) { _queryTerms.push_back(term); }
+
+ std::vector<vespalib::string> get_accessed_attributes() const { return _attrCtx->get_accessed_attributes(); }
};
} // namespace streaming
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
index 443a2626bf7..4bac204c8e1 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
@@ -78,6 +78,7 @@ public:
HitCollector & getHitCollector() { return *_hitCollector; }
uint32_t getDocId() const { return _docId; }
search::fef::IQueryEnvironment& get_query_env() { return _queryEnv; }
+ QueryEnvironment& get_real_query_env() { return _queryEnv; }
};
} // namespace streaming
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
index f9473167b07..c53dfae294a 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -548,11 +548,12 @@ SearchVisitor::PositionInserter::onStructStart(const Content & c)
}
void
-SearchVisitor::RankController::processHintedAttributes(const IndexEnvironment & indexEnv, bool rank,
- const search::IAttributeManager & attrMan,
- std::vector<AttrInfo> & attributeFields)
+SearchVisitor::RankController::processAccessedAttributes(const QueryEnvironment &queryEnv, bool rank,
+ const search::IAttributeManager & attrMan,
+ std::vector<AttrInfo> & attributeFields)
{
- const std::set<vespalib::string> & attributes = (rank ? indexEnv.getHintedRankAttributes() : indexEnv.getHintedDumpAttributes());
+ auto attributes = queryEnv.get_accessed_attributes();
+ auto& indexEnv = queryEnv.getIndexEnvironment();
for (const vespalib::string & name : attributes) {
LOG(debug, "Process attribute access hint (%s): '%s'", rank ? "rank" : "dump", name.c_str());
const search::fef::FieldInfo * fieldInfo = indexEnv.getFieldByName(name);
@@ -601,22 +602,18 @@ SearchVisitor::RankController::setupRankProcessors(Query & query,
std::vector<AttrInfo> & attributeFields)
{
_rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile);
-
- // register attribute vectors needed for ranking
- const IndexEnvironment & indexEnv = _rankManagerSnapshot->getIndexEnvironment(_rankProfile);
- processHintedAttributes(indexEnv, true, attrMan, attributeFields);
-
_rankProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan);
LOG(debug, "Initialize rank processor");
_rankProcessor->initForRanking(wantedHitCount);
+ // register attribute vectors needed for ranking
+ processAccessedAttributes(_rankProcessor->get_real_query_env(), true, attrMan, attributeFields);
if (_dumpFeatures) {
- // register attribute vectors needed for dumping
- processHintedAttributes(indexEnv, false, attrMan, attributeFields);
-
_dumpProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan);
LOG(debug, "Initialize dump processor");
_dumpProcessor->initForDumping(wantedHitCount);
+ // register attribute vectors needed for dumping
+ processAccessedAttributes(_dumpProcessor->get_real_query_env(), false, attrMan, attributeFields);
}
_hasRanking = true;
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
index 72cd08ff781..515d032b21b 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
@@ -133,11 +133,13 @@ private:
RankProcessor::UP _dumpProcessor;
/**
- * Process attribute hints and add needed attributes to the given list.
+ * Process attribute accessed and add needed attributes to the
+ * given list.
**/
- static void processHintedAttributes(const IndexEnvironment & indexEnv, bool rank,
- const search::IAttributeManager & attrMan,
- std::vector<AttrInfo> & attributeFields);
+ static void processAccessedAttributes(const QueryEnvironment& queryEnv,
+ bool rank,
+ const search::IAttributeManager& attrMan,
+ std::vector<AttrInfo>& attributeFields);
public:
RankController();