diff options
author | Geir Storli <geirst@yahooinc.com> | 2022-09-01 17:19:36 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-01 17:19:36 +0200 |
commit | 5c47a454b71c393c72fd74d508984e98ad92bd10 (patch) | |
tree | 4b0f067cf9cce8051d55b483753e8d204186d5f8 | |
parent | e1f06d33dee2b1935964a248919701ace1c6a5c1 (diff) | |
parent | 14252848337aa8234b8699505fc93e90e7b2ca91 (diff) |
Merge pull request #23895 from vespa-engine/toregge/allocate-a-separate-docsums-state-for-each-summary-class-in-streaming-search
Allocate a docsums state for each summary class in streaming search.
5 files changed, 75 insertions, 43 deletions
diff --git a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp index d12223d5cf4..a845a39207f 100644 --- a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp +++ b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp @@ -54,7 +54,8 @@ struct DocsumFixture : IDocsumStore, GetDocsumsStateCallback { void getDocsum(Slime &slime) { Slime slimeOut; SlimeInserter inserter(slimeOut); - writer->WriteDocsum(1u, &state, this, inserter); + auto rci = writer->resolveClassInfo(state._args.getResultClassName()); + writer->insertDocsum(rci, 1u, &state, this, inserter); vespalib::SmartBuffer buf(4_Ki); BinaryFormat::encode(slimeOut, buf); EXPECT_GREATER(BinaryFormat::decode(buf.obtain(), slime), 0u); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp index 39d4be1aa3b..18371970722 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp @@ -162,12 +162,4 @@ DynamicDocsumWriter::InitState(const IAttributeManager & attrMan, GetDocsumsStat } } - -void -DynamicDocsumWriter::WriteDocsum(uint32_t docid, GetDocsumsState *state, IDocsumStore *docinfos, Inserter& inserter) -{ - ResolveClassInfo rci = resolveClassInfo(state->_args.getResultClassName()); - insertDocsum(rci, docid, state, docinfos, inserter); -} - } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h index 909be169006..f9c5a5ff7cf 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h @@ -37,8 +37,6 @@ public: virtual ~IDocsumWriter() = default; virtual void InitState(const search::IAttributeManager & attrMan, GetDocsumsState *state) = 0; - virtual void WriteDocsum(uint32_t docid, GetDocsumsState *state, - IDocsumStore *docinfos, Inserter & target) = 0; virtual void insertDocsum(const ResolveClassInfo & rci, uint32_t docid, GetDocsumsState *state, IDocsumStore *docinfos, Inserter & target) = 0; virtual ResolveClassInfo resolveClassInfo(vespalib::stringref outputClassName) const = 0; @@ -67,9 +65,6 @@ public: bool Override(const char *fieldName, std::unique_ptr<DocsumFieldWriter> writer); void InitState(const search::IAttributeManager & attrMan, GetDocsumsState *state) override; - void WriteDocsum(uint32_t docid, GetDocsumsState *state, - IDocsumStore *docinfos, Inserter & inserter) override; - void insertDocsum(const ResolveClassInfo & outputClassInfo, uint32_t docid, GetDocsumsState *state, IDocsumStore *docinfos, Inserter & inserter) override; diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index 4973db909e4..b04f09ff70a 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -16,6 +16,7 @@ #include <vespa/searchcommon/attribute/config.h> #include <vespa/vespalib/geo/zcurve.h> #include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/size_literals.h> #include <vespa/vespalib/data/slime/slime.h> @@ -112,28 +113,63 @@ createAttribute(const vespalib::string & name, const document::FieldValue & fv) return {}; } -SearchVisitor::SummaryGenerator::SummaryGenerator() : - HitsAggregationResult::SummaryGenerator(), - _callback(), - _docsumState(_callback), - _docsumFilter(), - _docsumWriter(nullptr), - _buf(4_Ki) +SearchVisitor::StreamingDocsumsState::StreamingDocsumsState(search::docsummary::GetDocsumsStateCallback& callback, ResolveClassInfo& resolve_class_info) + : _state(callback), + _resolve_class_info(resolve_class_info) +{ +} + +SearchVisitor::StreamingDocsumsState::~StreamingDocsumsState() = default; + +SearchVisitor::SummaryGenerator::SummaryGenerator(const search::IAttributeManager& attr_manager) + : HitsAggregationResult::SummaryGenerator(), + _callback(), + _docsum_states(), + _docsumFilter(), + _docsumWriter(nullptr), + _buf(4_Ki), + _dump_features(), + _location(), + _stack_dump(), + _attr_manager(attr_manager) { } SearchVisitor::SummaryGenerator::~SummaryGenerator() = default; +SearchVisitor::StreamingDocsumsState& +SearchVisitor::SummaryGenerator::get_streaming_docsums_state(const vespalib::string& summary_class) +{ + auto itr = _docsum_states.find(summary_class); + if (itr != _docsum_states.end()) { + return *itr->second; + } + auto rci = _docsumWriter->resolveClassInfo(summary_class); + auto state = std::make_unique<StreamingDocsumsState>(_callback, rci); + auto &ds = state->get_state(); + ds._args.setResultClassName(summary_class); + if (_dump_features.has_value()) { + ds._args.dumpFeatures(_dump_features.value()); + } + if (_location.has_value()) { + ds._args.setLocation(_location.value()); + } + if (_stack_dump.has_value()) { + ds._args.SetStackDump(_stack_dump.value().size(), _stack_dump.value().data()); + } + _docsumWriter->InitState(_attr_manager, &ds); + auto insres = _docsum_states.insert(std::make_pair(summary_class, std::move(state))); + return *insres.first->second; +} vespalib::ConstBufferRef SearchVisitor::SummaryGenerator::fillSummary(AttributeVector::DocId lid, const HitsAggregationResult::SummaryClassType & summaryClass) { if (_docsumWriter != nullptr) { - _docsumState._args.setResultClassName(summaryClass); vespalib::Slime slime; vespalib::slime::SlimeInserter inserter(slime); - _docsumWriter->WriteDocsum(lid, &_docsumState, _docsumFilter.get(), inserter); - + auto& sds = get_streaming_docsums_state(summaryClass); + _docsumWriter->insertDocsum(sds.get_resolve_class_info(), lid, &sds.get_state(), _docsumFilter.get(), inserter); _buf.reset(); vespalib::WritableMemory magicId = _buf.reserve(4); memcpy(magicId.data, &search::docsummary::SLIME_MAGIC_ID, 4); @@ -197,10 +233,10 @@ SearchVisitor::SearchVisitor(StorageComponent& component, _docTypeMapping(), _fieldSearchSpecMap(), _snippetModifierManager(), - _summaryGenerator(), _summaryClass("default"), _attrMan(), _attrCtx(_attrMan.createContext()), + _summaryGenerator(_attrMan), _groupingList(), _attributeFields(), _sortList(), @@ -245,7 +281,7 @@ void SearchVisitor::init(const Parameters & params) int queryFlags = params.get("queryflags", 0); if (queryFlags) { bool dumpFeatures = (queryFlags & QFLAG_DUMP_FEATURES) != 0; - _summaryGenerator.getDocsumState()._args.dumpFeatures(dumpFeatures); + _summaryGenerator.set_dump_features(dumpFeatures); _rankController.setDumpFeatures(dumpFeatures); LOG(debug, "QFLAG_DUMP_FEATURES: %s", _rankController.getDumpFeatures() ? "true" : "false"); } @@ -281,7 +317,7 @@ void SearchVisitor::init(const Parameters & params) if (params.lookup("location", valueRef)) { location = vespalib::string(valueRef.data(), valueRef.size()); LOG(debug, "Location = '%s'", location.c_str()); - _summaryGenerator.getDocsumState()._args.setLocation(valueRef); + _summaryGenerator.set_location(valueRef); } Parameters::ValueRef searchClusterBlob; @@ -306,7 +342,7 @@ void SearchVisitor::init(const Parameters & params) int stackCount = 0; if (params.get("querystackcount", stackCount)) { - _summaryGenerator.getDocsumState()._args.SetStackDump(queryBlob.size(), (const char*)queryBlob.data()); + _summaryGenerator.set_stack_dump(std::vector<char>(queryBlob.begin(), queryBlob.end())); } else { LOG(warning, "Request without query stack count"); } @@ -697,17 +733,7 @@ SearchVisitor::setupDocsumObjects() docsumFilter->setSnippetModifiers(_snippetModifierManager.getModifiers()); _summaryGenerator.setFilter(std::move(docsumFilter)); if (_vsmAdapter->getDocsumTools().get()) { - GetDocsumsState * ds(&_summaryGenerator.getDocsumState()); - _vsmAdapter->getDocsumTools()->getDocsumWriter()->InitState(_attrMan, ds); - _summaryGenerator.setDocsumWriter(*_vsmAdapter->getDocsumTools()->getDocsumWriter()); - for (const IAttributeVector * v : ds->_attributes) { - if (v != nullptr) { - vespalib::string name(v->getName()); - auto msg = vespalib::make_string("Illegal config: Docsum field writer using attribute vector '%s' configured for streaming search", name.c_str()); - LOG(error, "%s", msg.c_str()); - throw vespalib::IllegalStateException(msg); - } - } + _summaryGenerator.setDocsumWriter(*_vsmAdapter->getDocsumTools()->getDocsumWriter()); } else { LOG(warning, "No docsum tools available"); } diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index 98aa6b89c9c..37ec6352b5a 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -384,23 +384,41 @@ private: typedef std::vector< GroupingEntry > GroupingList; typedef std::vector<vsm::StorageDocument::UP> DocumentVector; + class StreamingDocsumsState { + using ResolveClassInfo = search::docsummary::IDocsumWriter::ResolveClassInfo; + GetDocsumsState _state; + ResolveClassInfo _resolve_class_info; + public: + StreamingDocsumsState(search::docsummary::GetDocsumsStateCallback& callback, ResolveClassInfo& resolve_class_info); + ~StreamingDocsumsState(); + GetDocsumsState& get_state() noexcept { return _state; } + const ResolveClassInfo& get_resolve_class_info() const noexcept { return _resolve_class_info; } + }; + class SummaryGenerator : public HitsAggregationResult::SummaryGenerator { public: - SummaryGenerator(); + SummaryGenerator(const search::IAttributeManager& attr_manager); ~SummaryGenerator() override; - GetDocsumsState & getDocsumState() { return _docsumState; } vsm::GetDocsumsStateCallback & getDocsumCallback() { return _callback; } void setFilter(std::unique_ptr<vsm::DocsumFilter> filter) { _docsumFilter = std::move(filter); } void setDocsumCache(const vsm::IDocSumCache & cache) { _docsumFilter->setDocSumStore(cache); } void setDocsumWriter(IDocsumWriter & docsumWriter) { _docsumWriter = & docsumWriter; } vespalib::ConstBufferRef fillSummary(search::AttributeVector::DocId lid, const HitsAggregationResult::SummaryClassType & summaryClass) override; + void set_dump_features(bool dump_features) { _dump_features = dump_features; } + void set_location(const vespalib::string& location) { _location = location; } + void set_stack_dump(std::vector<char> stack_dump) { _stack_dump = std::move(stack_dump); } private: + StreamingDocsumsState& get_streaming_docsums_state(const vespalib::string& summary_class); vsm::GetDocsumsStateCallback _callback; - GetDocsumsState _docsumState; + vespalib::hash_map<vespalib::string, std::unique_ptr<StreamingDocsumsState>> _docsum_states; std::unique_ptr<vsm::DocsumFilter> _docsumFilter; search::docsummary::IDocsumWriter * _docsumWriter; vespalib::SmartBuffer _buf; + std::optional<bool> _dump_features; + std::optional<vespalib::string> _location; + std::optional<std::vector<char>> _stack_dump; + const search::IAttributeManager& _attr_manager; }; class HitsResultPreparator : public vespalib::ObjectOperation, public vespalib::ObjectPredicate @@ -432,10 +450,10 @@ private: vsm::DocumentTypeMapping _docTypeMapping; vsm::FieldSearchSpecMap _fieldSearchSpecMap; vsm::SnippetModifierManager _snippetModifierManager; - SummaryGenerator _summaryGenerator; vespalib::string _summaryClass; search::AttributeManager _attrMan; search::attribute::IAttributeContext::UP _attrCtx; + SummaryGenerator _summaryGenerator; GroupingList _groupingList; std::vector<AttrInfo> _attributeFields; search::common::SortSpec _sortSpec; |