From b8d3ab8c0ad28c5cedbe414eb9c7c55a4e0bd6ae Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Tue, 20 Sep 2022 13:53:54 +0000 Subject: Optimize resolving and usage of summary result class. If an explicit set of fields is specified in the docsum request, we avoid reading the document instance from disk if all those fields are generated on the fly. --- .../tests/docsummary/result_class/CMakeLists.txt | 9 ++++ .../docsummary/result_class/result_class_test.cpp | 43 +++++++++++++++++ .../slime_summary/slime_summary_test.cpp | 2 +- .../searchsummary/docsummary/docsumwriter.cpp | 56 +++++++++------------- .../vespa/searchsummary/docsummary/docsumwriter.h | 25 +++++----- .../searchsummary/docsummary/getdocsumargs.cpp | 7 +-- .../vespa/searchsummary/docsummary/getdocsumargs.h | 5 +- .../searchsummary/docsummary/res_config_entry.cpp | 14 ++++-- .../searchsummary/docsummary/res_config_entry.h | 13 +++-- .../vespa/searchsummary/docsummary/resultclass.cpp | 24 ++++++++-- .../vespa/searchsummary/docsummary/resultclass.h | 18 +++---- 11 files changed, 142 insertions(+), 74 deletions(-) create mode 100644 searchsummary/src/tests/docsummary/result_class/CMakeLists.txt create mode 100644 searchsummary/src/tests/docsummary/result_class/result_class_test.cpp (limited to 'searchsummary/src') diff --git a/searchsummary/src/tests/docsummary/result_class/CMakeLists.txt b/searchsummary/src/tests/docsummary/result_class/CMakeLists.txt new file mode 100644 index 00000000000..3b941aec244 --- /dev/null +++ b/searchsummary/src/tests/docsummary/result_class/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchsummary_result_class_test_app TEST + SOURCES + result_class_test.cpp + DEPENDS + searchsummary + GTest::GTest +) +vespa_add_test(NAME searchsummary_result_class_test_app COMMAND searchsummary_result_class_test_app) diff --git a/searchsummary/src/tests/docsummary/result_class/result_class_test.cpp b/searchsummary/src/tests/docsummary/result_class/result_class_test.cpp new file mode 100644 index 00000000000..a47786c2a44 --- /dev/null +++ b/searchsummary/src/tests/docsummary/result_class/result_class_test.cpp @@ -0,0 +1,43 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include + +using namespace search::docsummary; + +class MockWriter : public DocsumFieldWriter { +private: + bool _generated; +public: + MockWriter(bool generated) : _generated(generated) {} + bool IsGenerated() const override { return _generated; } + virtual void insertField(uint32_t, const IDocsumStoreDocument*, GetDocsumsState&, vespalib::slime::Inserter &) const override {} +}; + +TEST(ResultClassTest, subset_of_fields_in_class_are_generated) +{ + ResultClass rc("test"); + rc.AddConfigEntry("from_disk"); + rc.AddConfigEntry("generated", std::make_unique(true)); + rc.AddConfigEntry("not_generated", std::make_unique(false)); + + EXPECT_FALSE(rc.all_fields_generated({})); + EXPECT_FALSE(rc.all_fields_generated({"from_disk", "generated", "not_generated"})); + EXPECT_FALSE(rc.all_fields_generated({"generated", "not_generated"})); + EXPECT_TRUE(rc.all_fields_generated({"generated"})); + EXPECT_FALSE(rc.all_fields_generated({"not_generated"})); +} + +TEST(ResultClassTest, all_fields_in_class_are_generated) +{ + ResultClass rc("test"); + rc.AddConfigEntry("generated_1", std::make_unique(true)); + rc.AddConfigEntry("generated_2", std::make_unique(true)); + + EXPECT_TRUE(rc.all_fields_generated({})); + EXPECT_TRUE(rc.all_fields_generated({"generated_1"})); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp index 7b3e79ec38e..fda4923a826 100644 --- a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp +++ b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp @@ -55,7 +55,7 @@ struct SlimeSummaryTest : testing::Test, IDocsumStore, GetDocsumsStateCallback { void getDocsum(Slime &slime) { Slime slimeOut; SlimeInserter inserter(slimeOut); - auto rci = writer->resolveClassInfo(state._args.getResultClassName()); + auto rci = writer->resolveClassInfo(state._args.getResultClassName(), {}); writer->insertDocsum(rci, 1u, state, this, inserter); vespalib::SmartBuffer buf(4_Ki); BinaryFormat::encode(slimeOut, buf); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp index b4b663718bd..62112af7e3e 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp @@ -20,30 +20,20 @@ using vespalib::slime::ObjectInserter; namespace search::docsummary { DynamicDocsumWriter::ResolveClassInfo -DynamicDocsumWriter::resolveClassInfo(vespalib::stringref outputClassName) const -{ - DynamicDocsumWriter::ResolveClassInfo rci = resolveOutputClass(outputClassName); - return rci; -} - -DynamicDocsumWriter::ResolveClassInfo -DynamicDocsumWriter::resolveOutputClass(vespalib::stringref summaryClass) const +DynamicDocsumWriter::resolveClassInfo(vespalib::stringref class_name, + const vespalib::hash_set& fields) const { DynamicDocsumWriter::ResolveClassInfo result; - auto id = _resultConfig->LookupResultClassId(summaryClass); + auto id = _resultConfig->LookupResultClassId(class_name); - const ResultClass *oC = (id != ResultConfig::NoClassID()) ? _resultConfig->LookupResultClass(id) : nullptr; - if (oC == nullptr) { + const auto* res_class = (id != ResultConfig::NoClassID()) ? _resultConfig->LookupResultClass(id) : nullptr; + if (res_class == nullptr) { Issue::report("Illegal docsum class requested: %s, using empty docsum for documents", - vespalib::string(summaryClass).c_str()); + vespalib::string(class_name).c_str()); } else { - const ResultClass::DynamicInfo &rcInfo = oC->getDynamicInfo(); - if (rcInfo._generateCnt == oC->GetNumEntries()) { - LOG_ASSERT(rcInfo._overrideCnt == rcInfo._generateCnt); - result.allGenerated = true; - } + result.all_fields_generated = res_class->all_fields_generated(fields); } - result.outputClass = oC; + result.res_class = res_class; return result; } @@ -51,18 +41,18 @@ void DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, GetDocsumsState& state, IDocsumStore *docinfos, Inserter& topInserter) { - if (rci.outputClass == nullptr) { + if (rci.res_class == nullptr) { // Use empty docsum when illegal docsum class has been requested return; } - if (rci.allGenerated) { + if (rci.all_fields_generated) { // generate docsum entry on-the-fly vespalib::slime::Cursor & docsum = topInserter.insertObject(); - for (uint32_t i = 0; i < rci.outputClass->GetNumEntries(); ++i) { - const ResConfigEntry *resCfg = rci.outputClass->GetEntry(i); - const DocsumFieldWriter *writer = resCfg->_docsum_field_writer.get(); - if (state._args.needField(resCfg->_name) && ! writer->isDefaultValue(docid, state)) { - const Memory field_name(resCfg->_name.data(), resCfg->_name.size()); + for (uint32_t i = 0; i < rci.res_class->GetNumEntries(); ++i) { + const ResConfigEntry *resCfg = rci.res_class->GetEntry(i); + const DocsumFieldWriter *writer = resCfg->writer(); + if (state._args.need_field(resCfg->name()) && ! writer->isDefaultValue(docid, state)) { + const Memory field_name(resCfg->name().data(), resCfg->name().size()); ObjectInserter inserter(docsum, field_name); writer->insertField(docid, nullptr, state, inserter); } @@ -75,13 +65,13 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, } // insert docsum blob vespalib::slime::Cursor & docsum = topInserter.insertObject(); - for (uint32_t i = 0; i < rci.outputClass->GetNumEntries(); ++i) { - const ResConfigEntry *outCfg = rci.outputClass->GetEntry(i); - if (!state._args.needField(outCfg->_name)) { + for (uint32_t i = 0; i < rci.res_class->GetNumEntries(); ++i) { + const ResConfigEntry *outCfg = rci.res_class->GetEntry(i); + if (!state._args.need_field(outCfg->name())) { continue; } - const DocsumFieldWriter *writer = outCfg->_docsum_field_writer.get(); - const Memory field_name(outCfg->_name.data(), outCfg->_name.size()); + const DocsumFieldWriter *writer = outCfg->writer(); + const Memory field_name(outCfg->name().data(), outCfg->name().size()); ObjectInserter inserter(docsum, field_name); if (writer != nullptr) { if (! writer->isDefaultValue(docid, state)) { @@ -89,7 +79,7 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, } } else { if (doc) { - doc->insert_summary_field(outCfg->_name, inserter); + doc->insert_summary_field(outCfg->name(), inserter); } } } @@ -110,7 +100,7 @@ DynamicDocsumWriter::InitState(const IAttributeManager & attrMan, GetDocsumsStat { state._kwExtractor = _keywordExtractor.get(); state._attrCtx = attrMan.createContext(); - auto result_class = rci.outputClass; + auto result_class = rci.res_class; if (result_class == nullptr) { return; } @@ -118,7 +108,7 @@ DynamicDocsumWriter::InitState(const IAttributeManager & attrMan, GetDocsumsStat state._attributes.resize(num_entries); state._fieldWriterStates.resize(result_class->get_num_field_writer_states()); for (size_t i(0); i < num_entries; i++) { - const DocsumFieldWriter *fw = result_class->GetEntry(i)->_docsum_field_writer.get(); + const DocsumFieldWriter *fw = result_class->GetEntry(i)->writer(); if (fw) { const vespalib::string & attributeName = fw->getAttributeName(); if (!attributeName.empty()) { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h index 52ddeebb0d6..b3cc12cfd25 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h @@ -2,17 +2,16 @@ #pragma once +#include "docsum_field_writer.h" +#include "docsumstore.h" #include "juniperproperties.h" #include "resultclass.h" #include "resultconfig.h" -#include "docsumstore.h" -#include "docsum_field_writer.h" #include #include +#include -namespace search { - class IAttributeManager; -} +namespace search { class IAttributeManager; } namespace vespalib { class Slime; } @@ -27,11 +26,11 @@ class IDocsumWriter public: using Inserter = vespalib::slime::Inserter; struct ResolveClassInfo { - bool allGenerated; - const ResultClass *outputClass; + bool all_fields_generated; + const ResultClass* res_class; ResolveClassInfo() - : allGenerated(false), - outputClass(nullptr) + : all_fields_generated(false), + res_class(nullptr) { } }; @@ -39,7 +38,8 @@ public: virtual void InitState(const search::IAttributeManager & attrMan, GetDocsumsState& state, const ResolveClassInfo& rci) = 0; virtual void insertDocsum(const ResolveClassInfo & rci, uint32_t docid, GetDocsumsState& state, IDocsumStore *docinfos, Inserter & target) = 0; - virtual ResolveClassInfo resolveClassInfo(vespalib::stringref outputClassName) const = 0; + virtual ResolveClassInfo resolveClassInfo(vespalib::stringref class_name, + const vespalib::hash_set& fields) const = 0; }; //-------------------------------------------------------------------------- @@ -50,8 +50,6 @@ private: std::unique_ptr _resultConfig; std::unique_ptr _keywordExtractor; - ResolveClassInfo resolveOutputClass(vespalib::stringref outputClassName) const; - public: DynamicDocsumWriter(std::unique_ptr config, std::unique_ptr extractor); DynamicDocsumWriter(const DynamicDocsumWriter &) = delete; @@ -64,7 +62,8 @@ public: void insertDocsum(const ResolveClassInfo & outputClassInfo, uint32_t docid, GetDocsumsState& state, IDocsumStore *docinfos, Inserter & inserter) override; - ResolveClassInfo resolveClassInfo(vespalib::stringref outputClassName) const override; + ResolveClassInfo resolveClassInfo(vespalib::stringref class_name, + const vespalib::hash_set& fields) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp index becaa6d1ab0..a21b3b597ba 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp @@ -39,13 +39,8 @@ GetDocsumArgs::SetStackDump(uint32_t stackDumpLen, const char *stackDump) } bool -GetDocsumArgs::needField(vespalib::stringref field) const { +GetDocsumArgs::need_field(vespalib::stringref field) const { return _fields.empty() || _fields.contains(field); } -void -GetDocsumArgs::add_field(vespalib::stringref field) { - _fields.insert(field); -} - } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h index 89816977290..975ae5f7592 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h @@ -46,8 +46,9 @@ public: bool dumpFeatures() const { return _dumpFeatures; } const fef::Properties &highlightTerms() const { return _highlightTerms; } - bool needField(vespalib::stringref field) const; - void add_field(vespalib::stringref field); + void set_fields(const FieldSet& fields_in) { _fields = fields_in; } + const FieldSet& get_fields() const { return _fields; } + bool need_field(vespalib::stringref field) const; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp index 12e87cbd899..0c07260c1c8 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp @@ -5,9 +5,10 @@ namespace search::docsummary { -ResConfigEntry::ResConfigEntry() noexcept - : _name(), - _docsum_field_writer() +ResConfigEntry::ResConfigEntry(const vespalib::string& name_in) noexcept + : _name(name_in), + _writer(), + _generated(false) { } @@ -15,4 +16,11 @@ ResConfigEntry::~ResConfigEntry() = default; ResConfigEntry::ResConfigEntry(ResConfigEntry&&) noexcept = default; +void +ResConfigEntry::set_writer(std::unique_ptr writer_in) +{ + _writer = std::move(writer_in); + _generated = _writer ? _writer->IsGenerated() : false; +} + } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h index 02090df8229..e8d6c9f4e73 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h @@ -12,12 +12,19 @@ class DocsumFieldWriter; /** * This struct describes a single docsum field (name and type). **/ -struct ResConfigEntry { +class ResConfigEntry { +private: vespalib::string _name; - std::unique_ptr _docsum_field_writer; - ResConfigEntry() noexcept; + std::unique_ptr _writer; + bool _generated; +public: + ResConfigEntry(const vespalib::string& name_in) noexcept; ~ResConfigEntry(); ResConfigEntry(ResConfigEntry&&) noexcept; + void set_writer(std::unique_ptr writer); + const vespalib::string& name() const { return _name; } + DocsumFieldWriter* writer() const { return _writer.get(); } + bool is_generated() const { return _generated; } }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp index e6eb3064176..998ed9e2ccc 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp @@ -33,17 +33,16 @@ ResultClass::AddConfigEntry(const char *name, std::unique_ptr } _nameMap[name] = _entries.size(); - ResConfigEntry e; - e._name = name; + ResConfigEntry e(name); if (docsum_field_writer) { docsum_field_writer->setIndex(_entries.size()); bool generated = docsum_field_writer->IsGenerated(); - getDynamicInfo().update_override_counts(generated); + _dynInfo.update_override_counts(generated); if (docsum_field_writer->setFieldWriterStateIndex(_num_field_writer_states)) { ++_num_field_writer_states; } } - e._docsum_field_writer = std::move(docsum_field_writer); + e.set_writer(std::move(docsum_field_writer)); _entries.push_back(std::move(e)); return true; } @@ -54,4 +53,21 @@ ResultClass::AddConfigEntry(const char *name) return AddConfigEntry(name, {}); } +bool +ResultClass::all_fields_generated(const vespalib::hash_set& fields) const +{ + if (_dynInfo._generateCnt == GetNumEntries()) { + return true; + } + if (fields.empty()) { + return false; + } + for (const auto& entry : _entries) { + if (fields.contains(entry.name()) && !entry.is_generated()) { + return false; + } + } + return true; +} + } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h index 19f39eeb1f1..502ff879e3c 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h @@ -3,8 +3,9 @@ #pragma once #include "res_config_entry.h" -#include #include +#include +#include namespace search::docsummary { @@ -65,14 +66,6 @@ public: **/ ~ResultClass(); - /** - * Obtain reference to dynamic field data for this result class. - * - * @return reference to dynamic field data. - **/ - DynamicInfo& getDynamicInfo() noexcept { return _dynInfo; } - const DynamicInfo& getDynamicInfo() const noexcept { return _dynInfo; } - /** * Obtain the number of config entries (size of the * ResConfigEntry array) held by this result class. @@ -120,6 +113,13 @@ public: return (offset < _entries.size()) ? &_entries[offset] : nullptr; } + /** + * Returns whether the given fields are generated in this result class (do not require the document instance). + * + * If the given fields set is empty, check all fields defined in this result class. + */ + bool all_fields_generated(const vespalib::hash_set& fields) const; + void set_omit_summary_features(bool value) { _omit_summary_features = value; } -- cgit v1.2.3