summaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2022-07-02 18:35:02 +0200
committerTor Egge <Tor.Egge@online.no>2022-07-02 18:35:02 +0200
commit7fe5ed56aeb183da725c7225a01f23d2d91d99de (patch)
tree2b31b302c0aca379073f6bd193df93da32c16189 /searchsummary
parent59a1f9116e9cc37d749caa4ea7b0bf05662eae29 (diff)
Add DocsumBlobEntryFilter to limit types in docsum blob.
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp34
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp6
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp12
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultclass.h34
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp10
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h2
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp10
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h6
8 files changed, 102 insertions, 12 deletions
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp
index e7a9840c47f..de8bb36e98b 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp
@@ -1,8 +1,11 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "docsumwriter.h"
+#include "check_undefined_value_visitor.h"
#include "docsumstate.h"
#include "docsum_field_writer_state.h"
+#include "summaryfieldconverter.h"
+#include <vespa/document/fieldvalue/fieldvalue.h>
#include <vespa/searchcommon/common/undefinedvalues.h>
#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h>
#include <vespa/searchlib/attribute/iattributemanager.h>
@@ -17,6 +20,22 @@ using vespalib::Issue;
namespace search::docsummary {
+namespace {
+
+void insert_document_field(const vespalib::string& field_name, const GeneralResult& gres, Inserter &inserter)
+{
+ auto input_field_value = gres.get_field_value(field_name);
+ if (input_field_value) {
+ CheckUndefinedValueVisitor check_undefined;
+ input_field_value->accept(check_undefined);
+ if (!check_undefined.is_undefined()) {
+ SummaryFieldConverter::insert_summary_field(false, *input_field_value, inserter);
+ }
+ }
+}
+
+}
+
uint32_t
IDocsumWriter::slime2RawBuf(const Slime & slime, RawBuf & buf)
{
@@ -86,6 +105,7 @@ constexpr uint64_t default_64bits_int = search::attribute::getUndefined<int64_t>
static void convertEntry(const ResConfigEntry *resCfg,
const ResEntry *entry,
+ const GeneralResult& gres,
Inserter &inserter,
Slime &slime)
{
@@ -93,7 +113,13 @@ static void convertEntry(const ResConfigEntry *resCfg,
const char *ptr;
uint32_t len;
- LOG_ASSERT(resCfg != nullptr && entry != nullptr);
+ LOG_ASSERT(resCfg != nullptr);
+ if (entry == nullptr || entry->_not_present) {
+ // Entry is not present in docsum blob
+ insert_document_field(resCfg->_bindname, gres, inserter);
+ return;
+ }
+
switch (resCfg->_type) {
case RES_INT:
case RES_SHORT:
@@ -184,7 +210,7 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid,
}
} else {
if (rci.inputClass == rci.outputClass) {
- convertEntry(outCfg, gres.GetEntry(i), inserter, slime);
+ convertEntry(outCfg, gres.GetEntry(i), gres, inserter, slime);
} else {
int inIdx = rci.inputClass->GetIndexFromEnumValue(outCfg->_enumValue);
const ResConfigEntry *inCfg = rci.inputClass->GetEntry(inIdx);
@@ -192,7 +218,9 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid,
// copy field
const ResEntry *entry = gres.GetEntry(inIdx);
LOG_ASSERT(entry != nullptr);
- convertEntry(outCfg, entry, inserter, slime);
+ convertEntry(outCfg, entry, gres, inserter, slime);
+ } else {
+ insert_document_field(outCfg->_bindname, gres, inserter);
}
}
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp b/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp
index f17787ff4e5..781cce8f972 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp
@@ -99,6 +99,12 @@ GeneralResult::unpack(const char *buf, const size_t buflen)
for (uint32_t i = 0; rc && i < _entrycnt; i++) {
const ResConfigEntry *entry = _resClass->GetEntry(i);
+ _entries[i]._not_present = entry->_not_present;
+ if (entry->_not_present) {
+ // Entry is not present in docsum blob
+ _entries[i]._type = entry->_type;
+ continue;
+ }
switch (entry->_type) {
case RES_INT: {
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp
index 37be8e0a1b2..ed1cd8b542b 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp
@@ -7,7 +7,7 @@
namespace search::docsummary {
-ResultClass::ResultClass(const char *name, uint32_t id, util::StringEnum & fieldEnum)
+ResultClass::ResultClass(const char *name, uint32_t id, util::StringEnum & fieldEnum, const DocsumBlobEntryFilter& docsum_blob_entry_filter)
: _name(name),
_classID(id),
_entries(),
@@ -15,7 +15,8 @@ ResultClass::ResultClass(const char *name, uint32_t id, util::StringEnum & field
_fieldEnum(fieldEnum),
_enumMap(),
_dynInfo(NULL),
- _omit_summary_features(false)
+ _omit_summary_features(false),
+ _docsum_blob_entry_filter(docsum_blob_entry_filter)
{ }
@@ -25,7 +26,11 @@ int
ResultClass::GetIndexFromName(const char* name) const
{
NameIdMap::const_iterator found(_nameMap.find(name));
- return (found != _nameMap.end()) ? found->second : -1;
+ if (found == _nameMap.end()) {
+ return -1;
+ }
+ int idx = found->second;
+ return _entries[idx]._not_present ? -1 : idx;
}
bool
@@ -37,6 +42,7 @@ ResultClass::AddConfigEntry(const char *name, ResType type)
_nameMap[name] = _entries.size();
ResConfigEntry e;
e._type = type;
+ e._not_present = _docsum_blob_entry_filter.skip(type);
e._bindname = name;
e._enumValue = _fieldEnum.Add(name);
assert(e._enumValue >= 0);
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h
index 68e5d6848b9..8865e28acc6 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h
@@ -6,6 +6,7 @@
#include <vespa/vespalib/stllike/string.h>
#include <vespa/vespalib/stllike/hash_map.h>
#include <vespa/searchlib/util/stringenum.h>
+#include <bitset>
namespace search::docsummary {
@@ -32,6 +33,24 @@ enum ResType {
RES_FEATUREDATA
};
+/*
+ * Class containing the set of result types not stored in docsum blobs.
+ * This is used for gradual migration towards elimination of docsum blobs.
+ */
+class DocsumBlobEntryFilter {
+ std::bitset<14> _skip_types;
+
+public:
+ DocsumBlobEntryFilter()
+ : _skip_types()
+ {
+ }
+ bool skip(ResType type) const noexcept { return _skip_types.test(type); }
+ DocsumBlobEntryFilter &add_skip(ResType type) {
+ _skip_types.set(type);
+ return *this;
+ }
+};
/**
* This struct describes a single docsum field (name and type). A
@@ -41,6 +60,7 @@ enum ResType {
**/
struct ResConfigEntry {
ResType _type;
+ bool _not_present; // Entry not present in docsum blob when _not_present is set
vespalib::string _bindname;
int _enumValue;
};
@@ -57,6 +77,7 @@ struct ResConfigEntry {
struct ResEntry
{
ResType _type;
+ bool _not_present; // Entry not present in docsum blob when _not_present is set
union {
uint32_t _intval;
uint32_t _stringlen;
@@ -113,6 +134,7 @@ private:
// Whether or not summary features should be omitted when filling this summary class.
// As default, summary features are always included.
bool _omit_summary_features;
+ DocsumBlobEntryFilter _docsum_blob_entry_filter;
public:
typedef std::unique_ptr<ResultClass> UP;
@@ -125,7 +147,7 @@ public:
* @param id the numeric id of this result class.
* @param fieldEnum shared object used to enumerate field names.
**/
- ResultClass(const char *name, uint32_t id, util::StringEnum & fieldEnum);
+ ResultClass(const char *name, uint32_t id, util::StringEnum & fieldEnum, const DocsumBlobEntryFilter& docsum_blob_entry_filter);
/**
* Destructor. Delete internal structures.
@@ -216,7 +238,7 @@ public:
* GeneralResult::GetEntry(string) method; no need to call it
* directly.
*
- * @return field index or -1 if not found.
+ * @return field index or -1 if not found or _not_present is set.
**/
int GetIndexFromName(const char* name) const;
@@ -233,11 +255,15 @@ public:
* call it directly. NOTE3: You need to call the CreateEnumMap
* method before calling this one.
*
- * @return field index or -1 if not found.
+ * @return field index or -1 if not found or _not_present is set.
**/
int GetIndexFromEnumValue(uint32_t value) const
{
- return (value < _enumMap.size()) ? _enumMap[value] : -1;
+ if (value >= _enumMap.size()) {
+ return -1;
+ }
+ int idx = _enumMap[value];
+ return ((idx < 0) || _entries[idx]._not_present) ? -1 : idx;
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp
index 02bb5d25ca4..4096e26a6e3 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp
@@ -25,9 +25,15 @@ ResultConfig::Init()
ResultConfig::ResultConfig()
+ : ResultConfig(DocsumBlobEntryFilter())
+{
+}
+
+ResultConfig::ResultConfig(const DocsumBlobEntryFilter& docsum_blob_entry_filter)
: _defaultSummaryId(-1),
_classLookup(),
- _nameLookup()
+ _nameLookup(),
+ _docsum_blob_entry_filter(docsum_blob_entry_filter)
{
Init();
}
@@ -77,7 +83,7 @@ ResultConfig::AddResultClass(const char *name, uint32_t id)
ResultClass *ret = nullptr;
if (id != NoClassID() && (_classLookup.find(id) == _classLookup.end())) {
- ResultClass::UP rc(new ResultClass(name, id, _fieldEnum));
+ ResultClass::UP rc(new ResultClass(name, id, _fieldEnum, _docsum_blob_entry_filter));
ret = rc.get();
_classLookup[id] = std::move(rc);
if (_nameLookup.find(name) != _nameLookup.end()) {
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h
index 8c5895a779d..1438aee73ce 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h
@@ -37,6 +37,7 @@ private:
search::util::StringEnum _fieldEnum;
IdMap _classLookup;
NameMap _nameLookup; // name -> class id
+ DocsumBlobEntryFilter _docsum_blob_entry_filter;
void Clean();
void Init();
@@ -79,6 +80,7 @@ public:
* NOTE: This method simply calls the Init method.
**/
ResultConfig();
+ ResultConfig(const DocsumBlobEntryFilter& docsum_blob_entry_filter);
/**
* Destructor. Delete all internal structures. NOTE: This method
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp
index 4ab06cbd41a..4cf36785f69 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp
@@ -19,6 +19,14 @@ ResultPacker::WarnType(ResType type) const
GetResTypeName(_cfgEntry->_type));
}
+void
+ResultPacker::skip_entries_not_present()
+{
+ while (_cfgEntry != nullptr && _cfgEntry->_not_present) {
+ _cfgEntry = _resClass->GetEntry(++_entryIdx);
+ }
+}
+
bool ResultPacker::CheckEntry(ResType type)
{
if (_error)
@@ -32,6 +40,7 @@ bool ResultPacker::CheckEntry(ResType type)
WarnType(type);
}
_cfgEntry = _resClass->GetEntry(++_entryIdx);
+ skip_entries_not_present();
} else {
SetFormatError(type);
}
@@ -88,6 +97,7 @@ ResultPacker::Init(uint32_t classID)
uint32_t id = _resClass->GetClassID();
_buf.append(&id, sizeof(id));
_cfgEntry = _resClass->GetEntry(_entryIdx);
+ skip_entries_not_present();
_error = false;
} else {
_cfgEntry = nullptr;
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h
index 4aa31b2df23..f2460f3d3c3 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h
@@ -36,6 +36,7 @@ private:
void WarnType(ResType type) const;
void SetFormatError(ResType type);
+ void skip_entries_not_present();
bool CheckEntry(ResType type);
public:
@@ -249,6 +250,11 @@ public:
**/
bool GetDocsumBlob(const char **buf, uint32_t *buflen);
+ /*
+ * Get index of next entry to add. Used by proton::DocumentStoreAdapter
+ * and vsm::DocsumFilter to track entries to skip.
+ */
+ uint32_t get_entry_idx() const noexcept { return _entryIdx; }
};
}