diff options
author | Geir Storli <geirst@yahooinc.com> | 2022-09-21 10:35:32 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-21 10:35:32 +0200 |
commit | 79e303da0f7e7d2a4716601bd384745d504be98a (patch) | |
tree | f3d60e7389da653ed6b61e612ebdba7665df536d /searchsummary | |
parent | 222e46fbd5e0d69b3166a09d712ce3466d29d7b9 (diff) | |
parent | b8d3ab8c0ad28c5cedbe414eb9c7c55a4e0bd6ae (diff) |
Merge pull request #24142 from vespa-engine/geirst/optimize-summary-class-resolving
Optimize resolving and usage of summary result class.
Diffstat (limited to 'searchsummary')
12 files changed, 143 insertions, 74 deletions
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt index a5dc62da5c0..3aedae13e70 100644 --- a/searchsummary/CMakeLists.txt +++ b/searchsummary/CMakeLists.txt @@ -21,6 +21,7 @@ vespa_define_module( src/tests/docsummary/attributedfw src/tests/docsummary/document_id_dfw src/tests/docsummary/matched_elements_filter + src/tests/docsummary/result_class src/tests/docsummary/slime_filler src/tests/docsummary/slime_summary src/tests/juniper diff --git a/searchsummary/src/tests/docsummary/result_class/CMakeLists.txt b/searchsummary/src/tests/docsummary/result_class/CMakeLists.txt new file mode 100644 index 00000000000..3b941aec244 --- /dev/null +++ b/searchsummary/src/tests/docsummary/result_class/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchsummary_result_class_test_app TEST + SOURCES + result_class_test.cpp + DEPENDS + searchsummary + GTest::GTest +) +vespa_add_test(NAME searchsummary_result_class_test_app COMMAND searchsummary_result_class_test_app) diff --git a/searchsummary/src/tests/docsummary/result_class/result_class_test.cpp b/searchsummary/src/tests/docsummary/result_class/result_class_test.cpp new file mode 100644 index 00000000000..a47786c2a44 --- /dev/null +++ b/searchsummary/src/tests/docsummary/result_class/result_class_test.cpp @@ -0,0 +1,43 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchsummary/docsummary/docsum_field_writer.h> +#include <vespa/searchsummary/docsummary/resultclass.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <memory> + +using namespace search::docsummary; + +class MockWriter : public DocsumFieldWriter { +private: + bool _generated; +public: + MockWriter(bool generated) : _generated(generated) {} + bool IsGenerated() const override { return _generated; } + virtual void insertField(uint32_t, const IDocsumStoreDocument*, GetDocsumsState&, vespalib::slime::Inserter &) const override {} +}; + +TEST(ResultClassTest, subset_of_fields_in_class_are_generated) +{ + ResultClass rc("test"); + rc.AddConfigEntry("from_disk"); + rc.AddConfigEntry("generated", std::make_unique<MockWriter>(true)); + rc.AddConfigEntry("not_generated", std::make_unique<MockWriter>(false)); + + EXPECT_FALSE(rc.all_fields_generated({})); + EXPECT_FALSE(rc.all_fields_generated({"from_disk", "generated", "not_generated"})); + EXPECT_FALSE(rc.all_fields_generated({"generated", "not_generated"})); + EXPECT_TRUE(rc.all_fields_generated({"generated"})); + EXPECT_FALSE(rc.all_fields_generated({"not_generated"})); +} + +TEST(ResultClassTest, all_fields_in_class_are_generated) +{ + ResultClass rc("test"); + rc.AddConfigEntry("generated_1", std::make_unique<MockWriter>(true)); + rc.AddConfigEntry("generated_2", std::make_unique<MockWriter>(true)); + + EXPECT_TRUE(rc.all_fields_generated({})); + EXPECT_TRUE(rc.all_fields_generated({"generated_1"})); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp index 7b3e79ec38e..fda4923a826 100644 --- a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp +++ b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp @@ -55,7 +55,7 @@ struct SlimeSummaryTest : testing::Test, IDocsumStore, GetDocsumsStateCallback { void getDocsum(Slime &slime) { Slime slimeOut; SlimeInserter inserter(slimeOut); - auto rci = writer->resolveClassInfo(state._args.getResultClassName()); + auto rci = writer->resolveClassInfo(state._args.getResultClassName(), {}); writer->insertDocsum(rci, 1u, state, this, inserter); vespalib::SmartBuffer buf(4_Ki); BinaryFormat::encode(slimeOut, buf); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp index b4b663718bd..62112af7e3e 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp @@ -20,30 +20,20 @@ using vespalib::slime::ObjectInserter; namespace search::docsummary { DynamicDocsumWriter::ResolveClassInfo -DynamicDocsumWriter::resolveClassInfo(vespalib::stringref outputClassName) const -{ - DynamicDocsumWriter::ResolveClassInfo rci = resolveOutputClass(outputClassName); - return rci; -} - -DynamicDocsumWriter::ResolveClassInfo -DynamicDocsumWriter::resolveOutputClass(vespalib::stringref summaryClass) const +DynamicDocsumWriter::resolveClassInfo(vespalib::stringref class_name, + const vespalib::hash_set<vespalib::string>& fields) const { DynamicDocsumWriter::ResolveClassInfo result; - auto id = _resultConfig->LookupResultClassId(summaryClass); + auto id = _resultConfig->LookupResultClassId(class_name); - const ResultClass *oC = (id != ResultConfig::NoClassID()) ? _resultConfig->LookupResultClass(id) : nullptr; - if (oC == nullptr) { + const auto* res_class = (id != ResultConfig::NoClassID()) ? _resultConfig->LookupResultClass(id) : nullptr; + if (res_class == nullptr) { Issue::report("Illegal docsum class requested: %s, using empty docsum for documents", - vespalib::string(summaryClass).c_str()); + vespalib::string(class_name).c_str()); } else { - const ResultClass::DynamicInfo &rcInfo = oC->getDynamicInfo(); - if (rcInfo._generateCnt == oC->GetNumEntries()) { - LOG_ASSERT(rcInfo._overrideCnt == rcInfo._generateCnt); - result.allGenerated = true; - } + result.all_fields_generated = res_class->all_fields_generated(fields); } - result.outputClass = oC; + result.res_class = res_class; return result; } @@ -51,18 +41,18 @@ void DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, GetDocsumsState& state, IDocsumStore *docinfos, Inserter& topInserter) { - if (rci.outputClass == nullptr) { + if (rci.res_class == nullptr) { // Use empty docsum when illegal docsum class has been requested return; } - if (rci.allGenerated) { + if (rci.all_fields_generated) { // generate docsum entry on-the-fly vespalib::slime::Cursor & docsum = topInserter.insertObject(); - for (uint32_t i = 0; i < rci.outputClass->GetNumEntries(); ++i) { - const ResConfigEntry *resCfg = rci.outputClass->GetEntry(i); - const DocsumFieldWriter *writer = resCfg->_docsum_field_writer.get(); - if (state._args.needField(resCfg->_name) && ! writer->isDefaultValue(docid, state)) { - const Memory field_name(resCfg->_name.data(), resCfg->_name.size()); + for (uint32_t i = 0; i < rci.res_class->GetNumEntries(); ++i) { + const ResConfigEntry *resCfg = rci.res_class->GetEntry(i); + const DocsumFieldWriter *writer = resCfg->writer(); + if (state._args.need_field(resCfg->name()) && ! writer->isDefaultValue(docid, state)) { + const Memory field_name(resCfg->name().data(), resCfg->name().size()); ObjectInserter inserter(docsum, field_name); writer->insertField(docid, nullptr, state, inserter); } @@ -75,13 +65,13 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, } // insert docsum blob vespalib::slime::Cursor & docsum = topInserter.insertObject(); - for (uint32_t i = 0; i < rci.outputClass->GetNumEntries(); ++i) { - const ResConfigEntry *outCfg = rci.outputClass->GetEntry(i); - if (!state._args.needField(outCfg->_name)) { + for (uint32_t i = 0; i < rci.res_class->GetNumEntries(); ++i) { + const ResConfigEntry *outCfg = rci.res_class->GetEntry(i); + if (!state._args.need_field(outCfg->name())) { continue; } - const DocsumFieldWriter *writer = outCfg->_docsum_field_writer.get(); - const Memory field_name(outCfg->_name.data(), outCfg->_name.size()); + const DocsumFieldWriter *writer = outCfg->writer(); + const Memory field_name(outCfg->name().data(), outCfg->name().size()); ObjectInserter inserter(docsum, field_name); if (writer != nullptr) { if (! writer->isDefaultValue(docid, state)) { @@ -89,7 +79,7 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, } } else { if (doc) { - doc->insert_summary_field(outCfg->_name, inserter); + doc->insert_summary_field(outCfg->name(), inserter); } } } @@ -110,7 +100,7 @@ DynamicDocsumWriter::InitState(const IAttributeManager & attrMan, GetDocsumsStat { state._kwExtractor = _keywordExtractor.get(); state._attrCtx = attrMan.createContext(); - auto result_class = rci.outputClass; + auto result_class = rci.res_class; if (result_class == nullptr) { return; } @@ -118,7 +108,7 @@ DynamicDocsumWriter::InitState(const IAttributeManager & attrMan, GetDocsumsStat state._attributes.resize(num_entries); state._fieldWriterStates.resize(result_class->get_num_field_writer_states()); for (size_t i(0); i < num_entries; i++) { - const DocsumFieldWriter *fw = result_class->GetEntry(i)->_docsum_field_writer.get(); + const DocsumFieldWriter *fw = result_class->GetEntry(i)->writer(); if (fw) { const vespalib::string & attributeName = fw->getAttributeName(); if (!attributeName.empty()) { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h index 52ddeebb0d6..b3cc12cfd25 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h @@ -2,17 +2,16 @@ #pragma once +#include "docsum_field_writer.h" +#include "docsumstore.h" #include "juniperproperties.h" #include "resultclass.h" #include "resultconfig.h" -#include "docsumstore.h" -#include "docsum_field_writer.h" #include <vespa/fastlib/text/unicodeutil.h> #include <vespa/fastlib/text/wordfolder.h> +#include <vespa/vespalib/stllike/string.h> -namespace search { - class IAttributeManager; -} +namespace search { class IAttributeManager; } namespace vespalib { class Slime; } @@ -27,11 +26,11 @@ class IDocsumWriter public: using Inserter = vespalib::slime::Inserter; struct ResolveClassInfo { - bool allGenerated; - const ResultClass *outputClass; + bool all_fields_generated; + const ResultClass* res_class; ResolveClassInfo() - : allGenerated(false), - outputClass(nullptr) + : all_fields_generated(false), + res_class(nullptr) { } }; @@ -39,7 +38,8 @@ public: virtual void InitState(const search::IAttributeManager & attrMan, GetDocsumsState& state, const ResolveClassInfo& rci) = 0; virtual void insertDocsum(const ResolveClassInfo & rci, uint32_t docid, GetDocsumsState& state, IDocsumStore *docinfos, Inserter & target) = 0; - virtual ResolveClassInfo resolveClassInfo(vespalib::stringref outputClassName) const = 0; + virtual ResolveClassInfo resolveClassInfo(vespalib::stringref class_name, + const vespalib::hash_set<vespalib::string>& fields) const = 0; }; //-------------------------------------------------------------------------- @@ -50,8 +50,6 @@ private: std::unique_ptr<ResultConfig> _resultConfig; std::unique_ptr<KeywordExtractor> _keywordExtractor; - ResolveClassInfo resolveOutputClass(vespalib::stringref outputClassName) const; - public: DynamicDocsumWriter(std::unique_ptr<ResultConfig> config, std::unique_ptr<KeywordExtractor> extractor); DynamicDocsumWriter(const DynamicDocsumWriter &) = delete; @@ -64,7 +62,8 @@ public: void insertDocsum(const ResolveClassInfo & outputClassInfo, uint32_t docid, GetDocsumsState& state, IDocsumStore *docinfos, Inserter & inserter) override; - ResolveClassInfo resolveClassInfo(vespalib::stringref outputClassName) const override; + ResolveClassInfo resolveClassInfo(vespalib::stringref class_name, + const vespalib::hash_set<vespalib::string>& fields) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp index becaa6d1ab0..a21b3b597ba 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp @@ -39,13 +39,8 @@ GetDocsumArgs::SetStackDump(uint32_t stackDumpLen, const char *stackDump) } bool -GetDocsumArgs::needField(vespalib::stringref field) const { +GetDocsumArgs::need_field(vespalib::stringref field) const { return _fields.empty() || _fields.contains(field); } -void -GetDocsumArgs::add_field(vespalib::stringref field) { - _fields.insert(field); -} - } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h index 89816977290..975ae5f7592 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h @@ -46,8 +46,9 @@ public: bool dumpFeatures() const { return _dumpFeatures; } const fef::Properties &highlightTerms() const { return _highlightTerms; } - bool needField(vespalib::stringref field) const; - void add_field(vespalib::stringref field); + void set_fields(const FieldSet& fields_in) { _fields = fields_in; } + const FieldSet& get_fields() const { return _fields; } + bool need_field(vespalib::stringref field) const; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp index 12e87cbd899..0c07260c1c8 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp @@ -5,9 +5,10 @@ namespace search::docsummary { -ResConfigEntry::ResConfigEntry() noexcept - : _name(), - _docsum_field_writer() +ResConfigEntry::ResConfigEntry(const vespalib::string& name_in) noexcept + : _name(name_in), + _writer(), + _generated(false) { } @@ -15,4 +16,11 @@ ResConfigEntry::~ResConfigEntry() = default; ResConfigEntry::ResConfigEntry(ResConfigEntry&&) noexcept = default; +void +ResConfigEntry::set_writer(std::unique_ptr<DocsumFieldWriter> writer_in) +{ + _writer = std::move(writer_in); + _generated = _writer ? _writer->IsGenerated() : false; +} + } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h index 02090df8229..e8d6c9f4e73 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h @@ -12,12 +12,19 @@ class DocsumFieldWriter; /** * This struct describes a single docsum field (name and type). **/ -struct ResConfigEntry { +class ResConfigEntry { +private: vespalib::string _name; - std::unique_ptr<DocsumFieldWriter> _docsum_field_writer; - ResConfigEntry() noexcept; + std::unique_ptr<DocsumFieldWriter> _writer; + bool _generated; +public: + ResConfigEntry(const vespalib::string& name_in) noexcept; ~ResConfigEntry(); ResConfigEntry(ResConfigEntry&&) noexcept; + void set_writer(std::unique_ptr<DocsumFieldWriter> writer); + const vespalib::string& name() const { return _name; } + DocsumFieldWriter* writer() const { return _writer.get(); } + bool is_generated() const { return _generated; } }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp index e6eb3064176..998ed9e2ccc 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp @@ -33,17 +33,16 @@ ResultClass::AddConfigEntry(const char *name, std::unique_ptr<DocsumFieldWriter> } _nameMap[name] = _entries.size(); - ResConfigEntry e; - e._name = name; + ResConfigEntry e(name); if (docsum_field_writer) { docsum_field_writer->setIndex(_entries.size()); bool generated = docsum_field_writer->IsGenerated(); - getDynamicInfo().update_override_counts(generated); + _dynInfo.update_override_counts(generated); if (docsum_field_writer->setFieldWriterStateIndex(_num_field_writer_states)) { ++_num_field_writer_states; } } - e._docsum_field_writer = std::move(docsum_field_writer); + e.set_writer(std::move(docsum_field_writer)); _entries.push_back(std::move(e)); return true; } @@ -54,4 +53,21 @@ ResultClass::AddConfigEntry(const char *name) return AddConfigEntry(name, {}); } +bool +ResultClass::all_fields_generated(const vespalib::hash_set<vespalib::string>& fields) const +{ + if (_dynInfo._generateCnt == GetNumEntries()) { + return true; + } + if (fields.empty()) { + return false; + } + for (const auto& entry : _entries) { + if (fields.contains(entry.name()) && !entry.is_generated()) { + return false; + } + } + return true; +} + } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h index 19f39eeb1f1..502ff879e3c 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h @@ -3,8 +3,9 @@ #pragma once #include "res_config_entry.h" -#include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/stllike/hash_map.h> +#include <vespa/vespalib/stllike/hash_set.h> +#include <vespa/vespalib/stllike/string.h> namespace search::docsummary { @@ -66,14 +67,6 @@ public: ~ResultClass(); /** - * Obtain reference to dynamic field data for this result class. - * - * @return reference to dynamic field data. - **/ - DynamicInfo& getDynamicInfo() noexcept { return _dynInfo; } - const DynamicInfo& getDynamicInfo() const noexcept { return _dynInfo; } - - /** * Obtain the number of config entries (size of the * ResConfigEntry array) held by this result class. * @@ -120,6 +113,13 @@ public: return (offset < _entries.size()) ? &_entries[offset] : nullptr; } + /** + * Returns whether the given fields are generated in this result class (do not require the document instance). + * + * If the given fields set is empty, check all fields defined in this result class. + */ + bool all_fields_generated(const vespalib::hash_set<vespalib::string>& fields) const; + void set_omit_summary_features(bool value) { _omit_summary_features = value; } |