summaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-09-20 13:53:54 +0000
committerGeir Storli <geirst@yahooinc.com>2022-09-21 08:34:20 +0000
commitb8d3ab8c0ad28c5cedbe414eb9c7c55a4e0bd6ae (patch)
tree2edb9e1024ff78cb1fd5980164702fae25cc9889 /searchsummary
parentb66a12b478e9fbda0900c36b1ec67d53dc811488 (diff)
Optimize resolving and usage of summary result class.
If an explicit set of fields is specified in the docsum request, we avoid reading the document instance from disk if all those fields are generated on the fly.
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/CMakeLists.txt1
-rw-r--r--searchsummary/src/tests/docsummary/result_class/CMakeLists.txt9
-rw-r--r--searchsummary/src/tests/docsummary/result_class/result_class_test.cpp43
-rw-r--r--searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp2
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp56
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h25
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp7
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h5
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp14
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h13
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp24
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultclass.h18
12 files changed, 143 insertions, 74 deletions
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt
index a5dc62da5c0..3aedae13e70 100644
--- a/searchsummary/CMakeLists.txt
+++ b/searchsummary/CMakeLists.txt
@@ -21,6 +21,7 @@ vespa_define_module(
src/tests/docsummary/attributedfw
src/tests/docsummary/document_id_dfw
src/tests/docsummary/matched_elements_filter
+ src/tests/docsummary/result_class
src/tests/docsummary/slime_filler
src/tests/docsummary/slime_summary
src/tests/juniper
diff --git a/searchsummary/src/tests/docsummary/result_class/CMakeLists.txt b/searchsummary/src/tests/docsummary/result_class/CMakeLists.txt
new file mode 100644
index 00000000000..3b941aec244
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/result_class/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchsummary_result_class_test_app TEST
+ SOURCES
+ result_class_test.cpp
+ DEPENDS
+ searchsummary
+ GTest::GTest
+)
+vespa_add_test(NAME searchsummary_result_class_test_app COMMAND searchsummary_result_class_test_app)
diff --git a/searchsummary/src/tests/docsummary/result_class/result_class_test.cpp b/searchsummary/src/tests/docsummary/result_class/result_class_test.cpp
new file mode 100644
index 00000000000..a47786c2a44
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/result_class/result_class_test.cpp
@@ -0,0 +1,43 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchsummary/docsummary/docsum_field_writer.h>
+#include <vespa/searchsummary/docsummary/resultclass.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <memory>
+
+using namespace search::docsummary;
+
+class MockWriter : public DocsumFieldWriter {
+private:
+ bool _generated;
+public:
+ MockWriter(bool generated) : _generated(generated) {}
+ bool IsGenerated() const override { return _generated; }
+ virtual void insertField(uint32_t, const IDocsumStoreDocument*, GetDocsumsState&, vespalib::slime::Inserter &) const override {}
+};
+
+TEST(ResultClassTest, subset_of_fields_in_class_are_generated)
+{
+ ResultClass rc("test");
+ rc.AddConfigEntry("from_disk");
+ rc.AddConfigEntry("generated", std::make_unique<MockWriter>(true));
+ rc.AddConfigEntry("not_generated", std::make_unique<MockWriter>(false));
+
+ EXPECT_FALSE(rc.all_fields_generated({}));
+ EXPECT_FALSE(rc.all_fields_generated({"from_disk", "generated", "not_generated"}));
+ EXPECT_FALSE(rc.all_fields_generated({"generated", "not_generated"}));
+ EXPECT_TRUE(rc.all_fields_generated({"generated"}));
+ EXPECT_FALSE(rc.all_fields_generated({"not_generated"}));
+}
+
+TEST(ResultClassTest, all_fields_in_class_are_generated)
+{
+ ResultClass rc("test");
+ rc.AddConfigEntry("generated_1", std::make_unique<MockWriter>(true));
+ rc.AddConfigEntry("generated_2", std::make_unique<MockWriter>(true));
+
+ EXPECT_TRUE(rc.all_fields_generated({}));
+ EXPECT_TRUE(rc.all_fields_generated({"generated_1"}));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp
index 7b3e79ec38e..fda4923a826 100644
--- a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp
+++ b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp
@@ -55,7 +55,7 @@ struct SlimeSummaryTest : testing::Test, IDocsumStore, GetDocsumsStateCallback {
void getDocsum(Slime &slime) {
Slime slimeOut;
SlimeInserter inserter(slimeOut);
- auto rci = writer->resolveClassInfo(state._args.getResultClassName());
+ auto rci = writer->resolveClassInfo(state._args.getResultClassName(), {});
writer->insertDocsum(rci, 1u, state, this, inserter);
vespalib::SmartBuffer buf(4_Ki);
BinaryFormat::encode(slimeOut, buf);
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp
index b4b663718bd..62112af7e3e 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp
@@ -20,30 +20,20 @@ using vespalib::slime::ObjectInserter;
namespace search::docsummary {
DynamicDocsumWriter::ResolveClassInfo
-DynamicDocsumWriter::resolveClassInfo(vespalib::stringref outputClassName) const
-{
- DynamicDocsumWriter::ResolveClassInfo rci = resolveOutputClass(outputClassName);
- return rci;
-}
-
-DynamicDocsumWriter::ResolveClassInfo
-DynamicDocsumWriter::resolveOutputClass(vespalib::stringref summaryClass) const
+DynamicDocsumWriter::resolveClassInfo(vespalib::stringref class_name,
+ const vespalib::hash_set<vespalib::string>& fields) const
{
DynamicDocsumWriter::ResolveClassInfo result;
- auto id = _resultConfig->LookupResultClassId(summaryClass);
+ auto id = _resultConfig->LookupResultClassId(class_name);
- const ResultClass *oC = (id != ResultConfig::NoClassID()) ? _resultConfig->LookupResultClass(id) : nullptr;
- if (oC == nullptr) {
+ const auto* res_class = (id != ResultConfig::NoClassID()) ? _resultConfig->LookupResultClass(id) : nullptr;
+ if (res_class == nullptr) {
Issue::report("Illegal docsum class requested: %s, using empty docsum for documents",
- vespalib::string(summaryClass).c_str());
+ vespalib::string(class_name).c_str());
} else {
- const ResultClass::DynamicInfo &rcInfo = oC->getDynamicInfo();
- if (rcInfo._generateCnt == oC->GetNumEntries()) {
- LOG_ASSERT(rcInfo._overrideCnt == rcInfo._generateCnt);
- result.allGenerated = true;
- }
+ result.all_fields_generated = res_class->all_fields_generated(fields);
}
- result.outputClass = oC;
+ result.res_class = res_class;
return result;
}
@@ -51,18 +41,18 @@ void
DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, GetDocsumsState& state,
IDocsumStore *docinfos, Inserter& topInserter)
{
- if (rci.outputClass == nullptr) {
+ if (rci.res_class == nullptr) {
// Use empty docsum when illegal docsum class has been requested
return;
}
- if (rci.allGenerated) {
+ if (rci.all_fields_generated) {
// generate docsum entry on-the-fly
vespalib::slime::Cursor & docsum = topInserter.insertObject();
- for (uint32_t i = 0; i < rci.outputClass->GetNumEntries(); ++i) {
- const ResConfigEntry *resCfg = rci.outputClass->GetEntry(i);
- const DocsumFieldWriter *writer = resCfg->_docsum_field_writer.get();
- if (state._args.needField(resCfg->_name) && ! writer->isDefaultValue(docid, state)) {
- const Memory field_name(resCfg->_name.data(), resCfg->_name.size());
+ for (uint32_t i = 0; i < rci.res_class->GetNumEntries(); ++i) {
+ const ResConfigEntry *resCfg = rci.res_class->GetEntry(i);
+ const DocsumFieldWriter *writer = resCfg->writer();
+ if (state._args.need_field(resCfg->name()) && ! writer->isDefaultValue(docid, state)) {
+ const Memory field_name(resCfg->name().data(), resCfg->name().size());
ObjectInserter inserter(docsum, field_name);
writer->insertField(docid, nullptr, state, inserter);
}
@@ -75,13 +65,13 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid,
}
// insert docsum blob
vespalib::slime::Cursor & docsum = topInserter.insertObject();
- for (uint32_t i = 0; i < rci.outputClass->GetNumEntries(); ++i) {
- const ResConfigEntry *outCfg = rci.outputClass->GetEntry(i);
- if (!state._args.needField(outCfg->_name)) {
+ for (uint32_t i = 0; i < rci.res_class->GetNumEntries(); ++i) {
+ const ResConfigEntry *outCfg = rci.res_class->GetEntry(i);
+ if (!state._args.need_field(outCfg->name())) {
continue;
}
- const DocsumFieldWriter *writer = outCfg->_docsum_field_writer.get();
- const Memory field_name(outCfg->_name.data(), outCfg->_name.size());
+ const DocsumFieldWriter *writer = outCfg->writer();
+ const Memory field_name(outCfg->name().data(), outCfg->name().size());
ObjectInserter inserter(docsum, field_name);
if (writer != nullptr) {
if (! writer->isDefaultValue(docid, state)) {
@@ -89,7 +79,7 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid,
}
} else {
if (doc) {
- doc->insert_summary_field(outCfg->_name, inserter);
+ doc->insert_summary_field(outCfg->name(), inserter);
}
}
}
@@ -110,7 +100,7 @@ DynamicDocsumWriter::InitState(const IAttributeManager & attrMan, GetDocsumsStat
{
state._kwExtractor = _keywordExtractor.get();
state._attrCtx = attrMan.createContext();
- auto result_class = rci.outputClass;
+ auto result_class = rci.res_class;
if (result_class == nullptr) {
return;
}
@@ -118,7 +108,7 @@ DynamicDocsumWriter::InitState(const IAttributeManager & attrMan, GetDocsumsStat
state._attributes.resize(num_entries);
state._fieldWriterStates.resize(result_class->get_num_field_writer_states());
for (size_t i(0); i < num_entries; i++) {
- const DocsumFieldWriter *fw = result_class->GetEntry(i)->_docsum_field_writer.get();
+ const DocsumFieldWriter *fw = result_class->GetEntry(i)->writer();
if (fw) {
const vespalib::string & attributeName = fw->getAttributeName();
if (!attributeName.empty()) {
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h
index 52ddeebb0d6..b3cc12cfd25 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h
@@ -2,17 +2,16 @@
#pragma once
+#include "docsum_field_writer.h"
+#include "docsumstore.h"
#include "juniperproperties.h"
#include "resultclass.h"
#include "resultconfig.h"
-#include "docsumstore.h"
-#include "docsum_field_writer.h"
#include <vespa/fastlib/text/unicodeutil.h>
#include <vespa/fastlib/text/wordfolder.h>
+#include <vespa/vespalib/stllike/string.h>
-namespace search {
- class IAttributeManager;
-}
+namespace search { class IAttributeManager; }
namespace vespalib { class Slime; }
@@ -27,11 +26,11 @@ class IDocsumWriter
public:
using Inserter = vespalib::slime::Inserter;
struct ResolveClassInfo {
- bool allGenerated;
- const ResultClass *outputClass;
+ bool all_fields_generated;
+ const ResultClass* res_class;
ResolveClassInfo()
- : allGenerated(false),
- outputClass(nullptr)
+ : all_fields_generated(false),
+ res_class(nullptr)
{ }
};
@@ -39,7 +38,8 @@ public:
virtual void InitState(const search::IAttributeManager & attrMan, GetDocsumsState& state, const ResolveClassInfo& rci) = 0;
virtual void insertDocsum(const ResolveClassInfo & rci, uint32_t docid, GetDocsumsState& state,
IDocsumStore *docinfos, Inserter & target) = 0;
- virtual ResolveClassInfo resolveClassInfo(vespalib::stringref outputClassName) const = 0;
+ virtual ResolveClassInfo resolveClassInfo(vespalib::stringref class_name,
+ const vespalib::hash_set<vespalib::string>& fields) const = 0;
};
//--------------------------------------------------------------------------
@@ -50,8 +50,6 @@ private:
std::unique_ptr<ResultConfig> _resultConfig;
std::unique_ptr<KeywordExtractor> _keywordExtractor;
- ResolveClassInfo resolveOutputClass(vespalib::stringref outputClassName) const;
-
public:
DynamicDocsumWriter(std::unique_ptr<ResultConfig> config, std::unique_ptr<KeywordExtractor> extractor);
DynamicDocsumWriter(const DynamicDocsumWriter &) = delete;
@@ -64,7 +62,8 @@ public:
void insertDocsum(const ResolveClassInfo & outputClassInfo, uint32_t docid, GetDocsumsState& state,
IDocsumStore *docinfos, Inserter & inserter) override;
- ResolveClassInfo resolveClassInfo(vespalib::stringref outputClassName) const override;
+ ResolveClassInfo resolveClassInfo(vespalib::stringref class_name,
+ const vespalib::hash_set<vespalib::string>& fields) const override;
};
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp
index becaa6d1ab0..a21b3b597ba 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp
@@ -39,13 +39,8 @@ GetDocsumArgs::SetStackDump(uint32_t stackDumpLen, const char *stackDump)
}
bool
-GetDocsumArgs::needField(vespalib::stringref field) const {
+GetDocsumArgs::need_field(vespalib::stringref field) const {
return _fields.empty() || _fields.contains(field);
}
-void
-GetDocsumArgs::add_field(vespalib::stringref field) {
- _fields.insert(field);
-}
-
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h
index 89816977290..975ae5f7592 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h
@@ -46,8 +46,9 @@ public:
bool dumpFeatures() const { return _dumpFeatures; }
const fef::Properties &highlightTerms() const { return _highlightTerms; }
- bool needField(vespalib::stringref field) const;
- void add_field(vespalib::stringref field);
+ void set_fields(const FieldSet& fields_in) { _fields = fields_in; }
+ const FieldSet& get_fields() const { return _fields; }
+ bool need_field(vespalib::stringref field) const;
};
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp
index 12e87cbd899..0c07260c1c8 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.cpp
@@ -5,9 +5,10 @@
namespace search::docsummary {
-ResConfigEntry::ResConfigEntry() noexcept
- : _name(),
- _docsum_field_writer()
+ResConfigEntry::ResConfigEntry(const vespalib::string& name_in) noexcept
+ : _name(name_in),
+ _writer(),
+ _generated(false)
{
}
@@ -15,4 +16,11 @@ ResConfigEntry::~ResConfigEntry() = default;
ResConfigEntry::ResConfigEntry(ResConfigEntry&&) noexcept = default;
+void
+ResConfigEntry::set_writer(std::unique_ptr<DocsumFieldWriter> writer_in)
+{
+ _writer = std::move(writer_in);
+ _generated = _writer ? _writer->IsGenerated() : false;
+}
+
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h
index 02090df8229..e8d6c9f4e73 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/res_config_entry.h
@@ -12,12 +12,19 @@ class DocsumFieldWriter;
/**
* This struct describes a single docsum field (name and type).
**/
-struct ResConfigEntry {
+class ResConfigEntry {
+private:
vespalib::string _name;
- std::unique_ptr<DocsumFieldWriter> _docsum_field_writer;
- ResConfigEntry() noexcept;
+ std::unique_ptr<DocsumFieldWriter> _writer;
+ bool _generated;
+public:
+ ResConfigEntry(const vespalib::string& name_in) noexcept;
~ResConfigEntry();
ResConfigEntry(ResConfigEntry&&) noexcept;
+ void set_writer(std::unique_ptr<DocsumFieldWriter> writer);
+ const vespalib::string& name() const { return _name; }
+ DocsumFieldWriter* writer() const { return _writer.get(); }
+ bool is_generated() const { return _generated; }
};
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp
index e6eb3064176..998ed9e2ccc 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp
@@ -33,17 +33,16 @@ ResultClass::AddConfigEntry(const char *name, std::unique_ptr<DocsumFieldWriter>
}
_nameMap[name] = _entries.size();
- ResConfigEntry e;
- e._name = name;
+ ResConfigEntry e(name);
if (docsum_field_writer) {
docsum_field_writer->setIndex(_entries.size());
bool generated = docsum_field_writer->IsGenerated();
- getDynamicInfo().update_override_counts(generated);
+ _dynInfo.update_override_counts(generated);
if (docsum_field_writer->setFieldWriterStateIndex(_num_field_writer_states)) {
++_num_field_writer_states;
}
}
- e._docsum_field_writer = std::move(docsum_field_writer);
+ e.set_writer(std::move(docsum_field_writer));
_entries.push_back(std::move(e));
return true;
}
@@ -54,4 +53,21 @@ ResultClass::AddConfigEntry(const char *name)
return AddConfigEntry(name, {});
}
+bool
+ResultClass::all_fields_generated(const vespalib::hash_set<vespalib::string>& fields) const
+{
+ if (_dynInfo._generateCnt == GetNumEntries()) {
+ return true;
+ }
+ if (fields.empty()) {
+ return false;
+ }
+ for (const auto& entry : _entries) {
+ if (fields.contains(entry.name()) && !entry.is_generated()) {
+ return false;
+ }
+ }
+ return true;
+}
+
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h
index 19f39eeb1f1..502ff879e3c 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h
@@ -3,8 +3,9 @@
#pragma once
#include "res_config_entry.h"
-#include <vespa/vespalib/stllike/string.h>
#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/stllike/hash_set.h>
+#include <vespa/vespalib/stllike/string.h>
namespace search::docsummary {
@@ -66,14 +67,6 @@ public:
~ResultClass();
/**
- * Obtain reference to dynamic field data for this result class.
- *
- * @return reference to dynamic field data.
- **/
- DynamicInfo& getDynamicInfo() noexcept { return _dynInfo; }
- const DynamicInfo& getDynamicInfo() const noexcept { return _dynInfo; }
-
- /**
* Obtain the number of config entries (size of the
* ResConfigEntry array) held by this result class.
*
@@ -120,6 +113,13 @@ public:
return (offset < _entries.size()) ? &_entries[offset] : nullptr;
}
+ /**
+ * Returns whether the given fields are generated in this result class (do not require the document instance).
+ *
+ * If the given fields set is empty, check all fields defined in this result class.
+ */
+ bool all_fields_generated(const vespalib::hash_set<vespalib::string>& fields) const;
+
void set_omit_summary_features(bool value) {
_omit_summary_features = value;
}