diff options
author | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-03-12 14:17:29 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-03-12 14:52:48 +0000 |
commit | b1c2f5e4548a0c468e280a32af6766b5c3710938 (patch) | |
tree | 2b3f09ea045fce6621dd21b260b6844309449c88 /searchcore/src/vespa | |
parent | 89e2adfc2b268b33b9c25ef18bc32bfed9d5e838 (diff) |
Use attributes when evaluating selection expression on full documents
This addresses an unintended shortcoming in our handling of imported
fields, as these are exposed _only_ through attributes.
Document selection evaluation is automatically optimized in the backend
by pre-filtering documents that can be fully evaluated by exclusively
looking at attribute values (this goes for both selection matching
and mismatching). This is done by cloning the selection AST and
replacing all applicable field value nodes with corresponding attribute
references.
However, if a document _cannot_ be evaluated from attributes alone,
we fall back to reading it fully from the doc store, after which the
original selection is evaluated on it. This is the crux of the problem,
and prior to this commit an expression using both an imported field
and a non-attribute field would fail to be evaluated since the full
document evaluation would not have any knowledge of the attribute.
This commit makes it so that also the full document evaluation will
use a "patched" AST with all possible field references replaced with
attribute lookups. Since we reuse an existing patched AST that was not
otherwise used in this code path, there is no added overhead with this
approach.
Diffstat (limited to 'searchcore/src/vespa')
5 files changed, 54 insertions, 31 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/common/attributefieldvaluenode.cpp b/searchcore/src/vespa/searchcore/proton/common/attributefieldvaluenode.cpp index d94b0a4c909..d22fed89ad1 100644 --- a/searchcore/src/vespa/searchcore/proton/common/attributefieldvaluenode.cpp +++ b/searchcore/src/vespa/searchcore/proton/common/attributefieldvaluenode.cpp @@ -40,7 +40,7 @@ std::unique_ptr<document::select::Value> AttributeFieldValueNode:: getValue(const Context &context) const { - const auto &sc(static_cast<const SelectContext &>(context)); + const auto &sc(dynamic_cast<const SelectContext &>(context)); uint32_t docId(sc._docId); assert(docId != 0u); const auto& v = sc.guarded_attribute_at_index(_attr_guard_index); diff --git a/searchcore/src/vespa/searchcore/proton/common/cachedselect.cpp b/searchcore/src/vespa/searchcore/proton/common/cachedselect.cpp index 293ed3bdcb9..9f890f5d480 100644 --- a/searchcore/src/vespa/searchcore/proton/common/cachedselect.cpp +++ b/searchcore/src/vespa/searchcore/proton/common/cachedselect.cpp @@ -35,13 +35,16 @@ public: uint32_t _mvAttrs; uint32_t _complexAttrs; - uint32_t getFieldNodes() const { return _fieldNodes; } + [[nodiscard]] uint32_t getFieldNodes() const { return _fieldNodes; } - static uint32_t invalidIdx() { + constexpr static uint32_t invalidIdx() noexcept { return std::numeric_limits<uint32_t>::max(); } AttrVisitor(const search::IAttributeManager &amgr, CachedSelect::AttributeVectors &attributes); + AttrVisitor(const search::IAttributeManager &amgr, + CachedSelect::AttributeVectors &attributes, + AttrMap existing_attr_map); ~AttrVisitor() override; /* @@ -62,6 +65,18 @@ AttrVisitor::AttrVisitor(const search::IAttributeManager &amgr, CachedSelect::At _complexAttrs(0u) {} +AttrVisitor::AttrVisitor(const search::IAttributeManager &amgr, + CachedSelect::AttributeVectors &attributes, + AttrMap existing_attr_map) + : CloningVisitor(), + _amap(std::move(existing_attr_map)), + _amgr(amgr), + _attributes(attributes), + _svAttrs(0u), + _mvAttrs(0u), + _complexAttrs(0u) +{} + AttrVisitor::~AttrVisitor() = default; bool isSingleValueThatWeHandle(BasicType type) { @@ -130,7 +145,7 @@ CachedSelect::Session::Session(std::unique_ptr<document::select::Node> docSelect } bool -CachedSelect::Session::contains(const SelectContext &context) const +CachedSelect::Session::contains_pre_doc(const SelectContext &context) const { if (_preDocSelect && (_preDocSelect->contains(context) == document::select::Result::False)) { return false; @@ -140,10 +155,10 @@ CachedSelect::Session::contains(const SelectContext &context) const } bool -CachedSelect::Session::contains(const document::Document &doc) const +CachedSelect::Session::contains_doc(const SelectContext &context) const { return (_preDocOnlySelect) || - (_docSelect && (_docSelect->contains(doc) == document::select::Result::True)); + (_docSelect && (_docSelect->contains(context) == document::select::Result::True)); } const document::select::Node & @@ -177,8 +192,12 @@ CachedSelect::setPreDocumentSelect(const search::IAttributeManager &attrMgr, if (_fieldNodes == _svAttrFieldNodes) { _preDocOnlySelect = std::move(allAttrVisitor.getNode()); } else if (_svAttrFieldNodes > 0) { - _attributes.clear(); - AttrVisitor someAttrVisitor(attrMgr, _attributes); + // Also let document-level selection use attribute wiring; otherwise imported fields + // would not resolve to anything, as these do not exist in the concrete document itself. + _docSelect = std::move(allAttrVisitor.getNode()); + [[maybe_unused]] size_t attrs_before = _attributes.size(); + AttrVisitor someAttrVisitor(attrMgr, _attributes, std::move(allAttrVisitor._amap)); + assert(_attributes.size() == attrs_before); noDocsPruner.getNode()->visit(someAttrVisitor); _preDocSelect = std::move(someAttrVisitor.getNode()); } diff --git a/searchcore/src/vespa/searchcore/proton/common/cachedselect.h b/searchcore/src/vespa/searchcore/proton/common/cachedselect.h index b0660399f6f..89f002bd939 100644 --- a/searchcore/src/vespa/searchcore/proton/common/cachedselect.h +++ b/searchcore/src/vespa/searchcore/proton/common/cachedselect.h @@ -42,9 +42,10 @@ public: Session(std::unique_ptr<document::select::Node> docSelect, std::unique_ptr<document::select::Node> preDocOnlySelect, std::unique_ptr<document::select::Node> preDocSelect); - bool contains(const SelectContext &context) const; - bool contains(const document::Document &doc) const; - const document::select::Node &selectNode() const; + [[nodiscard]] bool contains_pre_doc(const SelectContext &context) const; + // Precondition: context must have non-nullptr _doc + [[nodiscard]] bool contains_doc(const SelectContext &context) const; + [[nodiscard]] const document::select::Node &selectNode() const; }; using AttributeVectors = std::vector<std::shared_ptr<search::attribute::ReadableAttributeVector>>; diff --git a/searchcore/src/vespa/searchcore/proton/common/selectpruner.h b/searchcore/src/vespa/searchcore/proton/common/selectpruner.h index 3612914fc6f..3322a07ebd7 100644 --- a/searchcore/src/vespa/searchcore/proton/common/selectpruner.h +++ b/searchcore/src/vespa/searchcore/proton/common/selectpruner.h @@ -36,7 +36,7 @@ public: class SelectPruner : public document::select::CloningVisitor, - public SelectPrunerBase + public SelectPrunerBase { public: private: @@ -53,16 +53,16 @@ public: bool hasFields, bool hasDocuments); - SelectPruner(const SelectPruner *rhs); - virtual ~SelectPruner(); + explicit SelectPruner(const SelectPruner *rhs); + ~SelectPruner() override; - uint32_t getFieldNodes() const { return _fieldNodes; } - uint32_t getAttrFieldNodes() const { return _attrFieldNodes; } - const document::select::ResultSet & getResultSet() const { return _resultSet; } - bool isFalse() const; - bool isTrue() const; - bool isInvalid() const; - bool isConst() const; + [[nodiscard]] uint32_t getFieldNodes() const noexcept { return _fieldNodes; } + [[nodiscard]] uint32_t getAttrFieldNodes() const noexcept { return _attrFieldNodes; } + [[nodiscard]] const document::select::ResultSet & getResultSet() const noexcept { return _resultSet; } + [[nodiscard]] bool isFalse() const; + [[nodiscard]] bool isTrue() const; + [[nodiscard]] bool isInvalid() const; + [[nodiscard]] bool isConst() const; void trace(std::ostream &t); void process(const document::select::Node &node); private: @@ -83,8 +83,8 @@ private: void setTernaryConst(bool val); void set_null_value_node(); void resolveTernaryConst(bool wantInverted); - bool isInvalidVal() const; - bool isNullVal() const; + [[nodiscard]] bool isInvalidVal() const; + [[nodiscard]] bool isNullVal() const; void swap(SelectPruner &rhs); }; diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp index e9d233ef6ec..85c0eb0c1f2 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp @@ -171,28 +171,31 @@ public: } } - bool willAlwaysFail() const { return _willAlwaysFail; } + [[nodiscard]] bool willAlwaysFail() const noexcept { return _willAlwaysFail; } - bool match(const search::DocumentMetaData & meta) const { + [[nodiscard]] bool match(const search::DocumentMetaData & meta) const { if (meta.lid >= _docidLimit) { return false; } if (_dscTrue || _metaOnly) { return true; } - if (_selectCxt) { - _selectCxt->_docId = meta.lid; - } if (!_gidFilter.gid_might_match_selection(meta.gid)) { return false; } - return _selectSession->contains(*_selectCxt); + assert(_selectCxt); + _selectCxt->_docId = meta.lid; + _selectCxt->_doc = nullptr; + return _selectSession->contains_pre_doc(*_selectCxt); } - bool match(const search::DocumentMetaData & meta, const Document * doc) const { + [[nodiscard]] bool match(const search::DocumentMetaData & meta, const Document * doc) const { if (_dscTrue || _metaOnly) { return true; } - return (doc && (doc->getId().getGlobalId() == meta.gid) && _selectSession->contains(*doc)); + assert(_selectCxt); + _selectCxt->_docId = meta.lid; + _selectCxt->_doc = doc; + return (doc && (doc->getId().getGlobalId() == meta.gid) && _selectSession->contains_doc(*_selectCxt)); } private: bool _dscTrue; |