diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-07-02 18:35:02 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-07-02 18:35:02 +0200 |
commit | 7fe5ed56aeb183da725c7225a01f23d2d91d99de (patch) | |
tree | 2b31b302c0aca379073f6bd193df93da32c16189 /searchsummary | |
parent | 59a1f9116e9cc37d749caa4ea7b0bf05662eae29 (diff) |
Add DocsumBlobEntryFilter to limit types in docsum blob.
Diffstat (limited to 'searchsummary')
8 files changed, 102 insertions, 12 deletions
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp index e7a9840c47f..de8bb36e98b 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp @@ -1,8 +1,11 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "docsumwriter.h" +#include "check_undefined_value_visitor.h" #include "docsumstate.h" #include "docsum_field_writer_state.h" +#include "summaryfieldconverter.h" +#include <vespa/document/fieldvalue/fieldvalue.h> #include <vespa/searchcommon/common/undefinedvalues.h> #include <vespa/searchlib/util/slime_output_raw_buf_adapter.h> #include <vespa/searchlib/attribute/iattributemanager.h> @@ -17,6 +20,22 @@ using vespalib::Issue; namespace search::docsummary { +namespace { + +void insert_document_field(const vespalib::string& field_name, const GeneralResult& gres, Inserter &inserter) +{ + auto input_field_value = gres.get_field_value(field_name); + if (input_field_value) { + CheckUndefinedValueVisitor check_undefined; + input_field_value->accept(check_undefined); + if (!check_undefined.is_undefined()) { + SummaryFieldConverter::insert_summary_field(false, *input_field_value, inserter); + } + } +} + +} + uint32_t IDocsumWriter::slime2RawBuf(const Slime & slime, RawBuf & buf) { @@ -86,6 +105,7 @@ constexpr uint64_t default_64bits_int = search::attribute::getUndefined<int64_t> static void convertEntry(const ResConfigEntry *resCfg, const ResEntry *entry, + const GeneralResult& gres, Inserter &inserter, Slime &slime) { @@ -93,7 +113,13 @@ static void convertEntry(const ResConfigEntry *resCfg, const char *ptr; uint32_t len; - LOG_ASSERT(resCfg != nullptr && entry != nullptr); + LOG_ASSERT(resCfg != nullptr); + if (entry == nullptr || entry->_not_present) { + // Entry is not present in docsum blob + insert_document_field(resCfg->_bindname, gres, inserter); + return; + } + switch (resCfg->_type) { case RES_INT: case RES_SHORT: @@ -184,7 +210,7 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, } } else { if (rci.inputClass == rci.outputClass) { - convertEntry(outCfg, gres.GetEntry(i), inserter, slime); + convertEntry(outCfg, gres.GetEntry(i), gres, inserter, slime); } else { int inIdx = rci.inputClass->GetIndexFromEnumValue(outCfg->_enumValue); const ResConfigEntry *inCfg = rci.inputClass->GetEntry(inIdx); @@ -192,7 +218,9 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, // copy field const ResEntry *entry = gres.GetEntry(inIdx); LOG_ASSERT(entry != nullptr); - convertEntry(outCfg, entry, inserter, slime); + convertEntry(outCfg, entry, gres, inserter, slime); + } else { + insert_document_field(outCfg->_bindname, gres, inserter); } } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp b/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp index f17787ff4e5..781cce8f972 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/general_result.cpp @@ -99,6 +99,12 @@ GeneralResult::unpack(const char *buf, const size_t buflen) for (uint32_t i = 0; rc && i < _entrycnt; i++) { const ResConfigEntry *entry = _resClass->GetEntry(i); + _entries[i]._not_present = entry->_not_present; + if (entry->_not_present) { + // Entry is not present in docsum blob + _entries[i]._type = entry->_type; + continue; + } switch (entry->_type) { case RES_INT: { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp index 37be8e0a1b2..ed1cd8b542b 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp @@ -7,7 +7,7 @@ namespace search::docsummary { -ResultClass::ResultClass(const char *name, uint32_t id, util::StringEnum & fieldEnum) +ResultClass::ResultClass(const char *name, uint32_t id, util::StringEnum & fieldEnum, const DocsumBlobEntryFilter& docsum_blob_entry_filter) : _name(name), _classID(id), _entries(), @@ -15,7 +15,8 @@ ResultClass::ResultClass(const char *name, uint32_t id, util::StringEnum & field _fieldEnum(fieldEnum), _enumMap(), _dynInfo(NULL), - _omit_summary_features(false) + _omit_summary_features(false), + _docsum_blob_entry_filter(docsum_blob_entry_filter) { } @@ -25,7 +26,11 @@ int ResultClass::GetIndexFromName(const char* name) const { NameIdMap::const_iterator found(_nameMap.find(name)); - return (found != _nameMap.end()) ? found->second : -1; + if (found == _nameMap.end()) { + return -1; + } + int idx = found->second; + return _entries[idx]._not_present ? -1 : idx; } bool @@ -37,6 +42,7 @@ ResultClass::AddConfigEntry(const char *name, ResType type) _nameMap[name] = _entries.size(); ResConfigEntry e; e._type = type; + e._not_present = _docsum_blob_entry_filter.skip(type); e._bindname = name; e._enumValue = _fieldEnum.Add(name); assert(e._enumValue >= 0); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h index 68e5d6848b9..8865e28acc6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h @@ -6,6 +6,7 @@ #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/stllike/hash_map.h> #include <vespa/searchlib/util/stringenum.h> +#include <bitset> namespace search::docsummary { @@ -32,6 +33,24 @@ enum ResType { RES_FEATUREDATA }; +/* + * Class containing the set of result types not stored in docsum blobs. + * This is used for gradual migration towards elimination of docsum blobs. + */ +class DocsumBlobEntryFilter { + std::bitset<14> _skip_types; + +public: + DocsumBlobEntryFilter() + : _skip_types() + { + } + bool skip(ResType type) const noexcept { return _skip_types.test(type); } + DocsumBlobEntryFilter &add_skip(ResType type) { + _skip_types.set(type); + return *this; + } +}; /** * This struct describes a single docsum field (name and type). A @@ -41,6 +60,7 @@ enum ResType { **/ struct ResConfigEntry { ResType _type; + bool _not_present; // Entry not present in docsum blob when _not_present is set vespalib::string _bindname; int _enumValue; }; @@ -57,6 +77,7 @@ struct ResConfigEntry { struct ResEntry { ResType _type; + bool _not_present; // Entry not present in docsum blob when _not_present is set union { uint32_t _intval; uint32_t _stringlen; @@ -113,6 +134,7 @@ private: // Whether or not summary features should be omitted when filling this summary class. // As default, summary features are always included. bool _omit_summary_features; + DocsumBlobEntryFilter _docsum_blob_entry_filter; public: typedef std::unique_ptr<ResultClass> UP; @@ -125,7 +147,7 @@ public: * @param id the numeric id of this result class. * @param fieldEnum shared object used to enumerate field names. **/ - ResultClass(const char *name, uint32_t id, util::StringEnum & fieldEnum); + ResultClass(const char *name, uint32_t id, util::StringEnum & fieldEnum, const DocsumBlobEntryFilter& docsum_blob_entry_filter); /** * Destructor. Delete internal structures. @@ -216,7 +238,7 @@ public: * GeneralResult::GetEntry(string) method; no need to call it * directly. * - * @return field index or -1 if not found. + * @return field index or -1 if not found or _not_present is set. **/ int GetIndexFromName(const char* name) const; @@ -233,11 +255,15 @@ public: * call it directly. NOTE3: You need to call the CreateEnumMap * method before calling this one. * - * @return field index or -1 if not found. + * @return field index or -1 if not found or _not_present is set. **/ int GetIndexFromEnumValue(uint32_t value) const { - return (value < _enumMap.size()) ? _enumMap[value] : -1; + if (value >= _enumMap.size()) { + return -1; + } + int idx = _enumMap[value]; + return ((idx < 0) || _entries[idx]._not_present) ? -1 : idx; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp index 02bb5d25ca4..4096e26a6e3 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp @@ -25,9 +25,15 @@ ResultConfig::Init() ResultConfig::ResultConfig() + : ResultConfig(DocsumBlobEntryFilter()) +{ +} + +ResultConfig::ResultConfig(const DocsumBlobEntryFilter& docsum_blob_entry_filter) : _defaultSummaryId(-1), _classLookup(), - _nameLookup() + _nameLookup(), + _docsum_blob_entry_filter(docsum_blob_entry_filter) { Init(); } @@ -77,7 +83,7 @@ ResultConfig::AddResultClass(const char *name, uint32_t id) ResultClass *ret = nullptr; if (id != NoClassID() && (_classLookup.find(id) == _classLookup.end())) { - ResultClass::UP rc(new ResultClass(name, id, _fieldEnum)); + ResultClass::UP rc(new ResultClass(name, id, _fieldEnum, _docsum_blob_entry_filter)); ret = rc.get(); _classLookup[id] = std::move(rc); if (_nameLookup.find(name) != _nameLookup.end()) { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h index 8c5895a779d..1438aee73ce 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h @@ -37,6 +37,7 @@ private: search::util::StringEnum _fieldEnum; IdMap _classLookup; NameMap _nameLookup; // name -> class id + DocsumBlobEntryFilter _docsum_blob_entry_filter; void Clean(); void Init(); @@ -79,6 +80,7 @@ public: * NOTE: This method simply calls the Init method. **/ ResultConfig(); + ResultConfig(const DocsumBlobEntryFilter& docsum_blob_entry_filter); /** * Destructor. Delete all internal structures. NOTE: This method diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp index 4ab06cbd41a..4cf36785f69 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp @@ -19,6 +19,14 @@ ResultPacker::WarnType(ResType type) const GetResTypeName(_cfgEntry->_type)); } +void +ResultPacker::skip_entries_not_present() +{ + while (_cfgEntry != nullptr && _cfgEntry->_not_present) { + _cfgEntry = _resClass->GetEntry(++_entryIdx); + } +} + bool ResultPacker::CheckEntry(ResType type) { if (_error) @@ -32,6 +40,7 @@ bool ResultPacker::CheckEntry(ResType type) WarnType(type); } _cfgEntry = _resClass->GetEntry(++_entryIdx); + skip_entries_not_present(); } else { SetFormatError(type); } @@ -88,6 +97,7 @@ ResultPacker::Init(uint32_t classID) uint32_t id = _resClass->GetClassID(); _buf.append(&id, sizeof(id)); _cfgEntry = _resClass->GetEntry(_entryIdx); + skip_entries_not_present(); _error = false; } else { _cfgEntry = nullptr; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h index 4aa31b2df23..f2460f3d3c3 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h @@ -36,6 +36,7 @@ private: void WarnType(ResType type) const; void SetFormatError(ResType type); + void skip_entries_not_present(); bool CheckEntry(ResType type); public: @@ -249,6 +250,11 @@ public: **/ bool GetDocsumBlob(const char **buf, uint32_t *buflen); + /* + * Get index of next entry to add. Used by proton::DocumentStoreAdapter + * and vsm::DocsumFilter to track entries to skip. + */ + uint32_t get_entry_idx() const noexcept { return _entryIdx; } }; } |