summaryrefslogtreecommitdiffstats
path: root/searchcore/src/vespa
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@vespa.ai>2024-03-12 14:17:29 +0000
committerTor Brede Vekterli <vekterli@vespa.ai>2024-03-12 14:52:48 +0000
commitb1c2f5e4548a0c468e280a32af6766b5c3710938 (patch)
tree2b3f09ea045fce6621dd21b260b6844309449c88 /searchcore/src/vespa
parent89e2adfc2b268b33b9c25ef18bc32bfed9d5e838 (diff)
Use attributes when evaluating selection expression on full documents
This addresses an unintended shortcoming in our handling of imported fields, as these are exposed _only_ through attributes. Document selection evaluation is automatically optimized in the backend by pre-filtering documents that can be fully evaluated by exclusively looking at attribute values (this goes for both selection matching and mismatching). This is done by cloning the selection AST and replacing all applicable field value nodes with corresponding attribute references. However, if a document _cannot_ be evaluated from attributes alone, we fall back to reading it fully from the doc store, after which the original selection is evaluated on it. This is the crux of the problem, and prior to this commit an expression using both an imported field and a non-attribute field would fail to be evaluated since the full document evaluation would not have any knowledge of the attribute. This commit makes it so that also the full document evaluation will use a "patched" AST with all possible field references replaced with attribute lookups. Since we reuse an existing patched AST that was not otherwise used in this code path, there is no added overhead with this approach.
Diffstat (limited to 'searchcore/src/vespa')
-rw-r--r--searchcore/src/vespa/searchcore/proton/common/attributefieldvaluenode.cpp2
-rw-r--r--searchcore/src/vespa/searchcore/proton/common/cachedselect.cpp33
-rw-r--r--searchcore/src/vespa/searchcore/proton/common/cachedselect.h7
-rw-r--r--searchcore/src/vespa/searchcore/proton/common/selectpruner.h24
-rw-r--r--searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp19
5 files changed, 54 insertions, 31 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/common/attributefieldvaluenode.cpp b/searchcore/src/vespa/searchcore/proton/common/attributefieldvaluenode.cpp
index d94b0a4c909..d22fed89ad1 100644
--- a/searchcore/src/vespa/searchcore/proton/common/attributefieldvaluenode.cpp
+++ b/searchcore/src/vespa/searchcore/proton/common/attributefieldvaluenode.cpp
@@ -40,7 +40,7 @@ std::unique_ptr<document::select::Value>
AttributeFieldValueNode::
getValue(const Context &context) const
{
- const auto &sc(static_cast<const SelectContext &>(context));
+ const auto &sc(dynamic_cast<const SelectContext &>(context));
uint32_t docId(sc._docId);
assert(docId != 0u);
const auto& v = sc.guarded_attribute_at_index(_attr_guard_index);
diff --git a/searchcore/src/vespa/searchcore/proton/common/cachedselect.cpp b/searchcore/src/vespa/searchcore/proton/common/cachedselect.cpp
index 293ed3bdcb9..9f890f5d480 100644
--- a/searchcore/src/vespa/searchcore/proton/common/cachedselect.cpp
+++ b/searchcore/src/vespa/searchcore/proton/common/cachedselect.cpp
@@ -35,13 +35,16 @@ public:
uint32_t _mvAttrs;
uint32_t _complexAttrs;
- uint32_t getFieldNodes() const { return _fieldNodes; }
+ [[nodiscard]] uint32_t getFieldNodes() const { return _fieldNodes; }
- static uint32_t invalidIdx() {
+ constexpr static uint32_t invalidIdx() noexcept {
return std::numeric_limits<uint32_t>::max();
}
AttrVisitor(const search::IAttributeManager &amgr, CachedSelect::AttributeVectors &attributes);
+ AttrVisitor(const search::IAttributeManager &amgr,
+ CachedSelect::AttributeVectors &attributes,
+ AttrMap existing_attr_map);
~AttrVisitor() override;
/*
@@ -62,6 +65,18 @@ AttrVisitor::AttrVisitor(const search::IAttributeManager &amgr, CachedSelect::At
_complexAttrs(0u)
{}
+AttrVisitor::AttrVisitor(const search::IAttributeManager &amgr,
+ CachedSelect::AttributeVectors &attributes,
+ AttrMap existing_attr_map)
+ : CloningVisitor(),
+ _amap(std::move(existing_attr_map)),
+ _amgr(amgr),
+ _attributes(attributes),
+ _svAttrs(0u),
+ _mvAttrs(0u),
+ _complexAttrs(0u)
+{}
+
AttrVisitor::~AttrVisitor() = default;
bool isSingleValueThatWeHandle(BasicType type) {
@@ -130,7 +145,7 @@ CachedSelect::Session::Session(std::unique_ptr<document::select::Node> docSelect
}
bool
-CachedSelect::Session::contains(const SelectContext &context) const
+CachedSelect::Session::contains_pre_doc(const SelectContext &context) const
{
if (_preDocSelect && (_preDocSelect->contains(context) == document::select::Result::False)) {
return false;
@@ -140,10 +155,10 @@ CachedSelect::Session::contains(const SelectContext &context) const
}
bool
-CachedSelect::Session::contains(const document::Document &doc) const
+CachedSelect::Session::contains_doc(const SelectContext &context) const
{
return (_preDocOnlySelect) ||
- (_docSelect && (_docSelect->contains(doc) == document::select::Result::True));
+ (_docSelect && (_docSelect->contains(context) == document::select::Result::True));
}
const document::select::Node &
@@ -177,8 +192,12 @@ CachedSelect::setPreDocumentSelect(const search::IAttributeManager &attrMgr,
if (_fieldNodes == _svAttrFieldNodes) {
_preDocOnlySelect = std::move(allAttrVisitor.getNode());
} else if (_svAttrFieldNodes > 0) {
- _attributes.clear();
- AttrVisitor someAttrVisitor(attrMgr, _attributes);
+ // Also let document-level selection use attribute wiring; otherwise imported fields
+ // would not resolve to anything, as these do not exist in the concrete document itself.
+ _docSelect = std::move(allAttrVisitor.getNode());
+ [[maybe_unused]] size_t attrs_before = _attributes.size();
+ AttrVisitor someAttrVisitor(attrMgr, _attributes, std::move(allAttrVisitor._amap));
+ assert(_attributes.size() == attrs_before);
noDocsPruner.getNode()->visit(someAttrVisitor);
_preDocSelect = std::move(someAttrVisitor.getNode());
}
diff --git a/searchcore/src/vespa/searchcore/proton/common/cachedselect.h b/searchcore/src/vespa/searchcore/proton/common/cachedselect.h
index b0660399f6f..89f002bd939 100644
--- a/searchcore/src/vespa/searchcore/proton/common/cachedselect.h
+++ b/searchcore/src/vespa/searchcore/proton/common/cachedselect.h
@@ -42,9 +42,10 @@ public:
Session(std::unique_ptr<document::select::Node> docSelect,
std::unique_ptr<document::select::Node> preDocOnlySelect,
std::unique_ptr<document::select::Node> preDocSelect);
- bool contains(const SelectContext &context) const;
- bool contains(const document::Document &doc) const;
- const document::select::Node &selectNode() const;
+ [[nodiscard]] bool contains_pre_doc(const SelectContext &context) const;
+ // Precondition: context must have non-nullptr _doc
+ [[nodiscard]] bool contains_doc(const SelectContext &context) const;
+ [[nodiscard]] const document::select::Node &selectNode() const;
};
using AttributeVectors = std::vector<std::shared_ptr<search::attribute::ReadableAttributeVector>>;
diff --git a/searchcore/src/vespa/searchcore/proton/common/selectpruner.h b/searchcore/src/vespa/searchcore/proton/common/selectpruner.h
index 3612914fc6f..3322a07ebd7 100644
--- a/searchcore/src/vespa/searchcore/proton/common/selectpruner.h
+++ b/searchcore/src/vespa/searchcore/proton/common/selectpruner.h
@@ -36,7 +36,7 @@ public:
class SelectPruner : public document::select::CloningVisitor,
- public SelectPrunerBase
+ public SelectPrunerBase
{
public:
private:
@@ -53,16 +53,16 @@ public:
bool hasFields,
bool hasDocuments);
- SelectPruner(const SelectPruner *rhs);
- virtual ~SelectPruner();
+ explicit SelectPruner(const SelectPruner *rhs);
+ ~SelectPruner() override;
- uint32_t getFieldNodes() const { return _fieldNodes; }
- uint32_t getAttrFieldNodes() const { return _attrFieldNodes; }
- const document::select::ResultSet & getResultSet() const { return _resultSet; }
- bool isFalse() const;
- bool isTrue() const;
- bool isInvalid() const;
- bool isConst() const;
+ [[nodiscard]] uint32_t getFieldNodes() const noexcept { return _fieldNodes; }
+ [[nodiscard]] uint32_t getAttrFieldNodes() const noexcept { return _attrFieldNodes; }
+ [[nodiscard]] const document::select::ResultSet & getResultSet() const noexcept { return _resultSet; }
+ [[nodiscard]] bool isFalse() const;
+ [[nodiscard]] bool isTrue() const;
+ [[nodiscard]] bool isInvalid() const;
+ [[nodiscard]] bool isConst() const;
void trace(std::ostream &t);
void process(const document::select::Node &node);
private:
@@ -83,8 +83,8 @@ private:
void setTernaryConst(bool val);
void set_null_value_node();
void resolveTernaryConst(bool wantInverted);
- bool isInvalidVal() const;
- bool isNullVal() const;
+ [[nodiscard]] bool isInvalidVal() const;
+ [[nodiscard]] bool isNullVal() const;
void swap(SelectPruner &rhs);
};
diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp
index e9d233ef6ec..85c0eb0c1f2 100644
--- a/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp
+++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/document_iterator.cpp
@@ -171,28 +171,31 @@ public:
}
}
- bool willAlwaysFail() const { return _willAlwaysFail; }
+ [[nodiscard]] bool willAlwaysFail() const noexcept { return _willAlwaysFail; }
- bool match(const search::DocumentMetaData & meta) const {
+ [[nodiscard]] bool match(const search::DocumentMetaData & meta) const {
if (meta.lid >= _docidLimit) {
return false;
}
if (_dscTrue || _metaOnly) {
return true;
}
- if (_selectCxt) {
- _selectCxt->_docId = meta.lid;
- }
if (!_gidFilter.gid_might_match_selection(meta.gid)) {
return false;
}
- return _selectSession->contains(*_selectCxt);
+ assert(_selectCxt);
+ _selectCxt->_docId = meta.lid;
+ _selectCxt->_doc = nullptr;
+ return _selectSession->contains_pre_doc(*_selectCxt);
}
- bool match(const search::DocumentMetaData & meta, const Document * doc) const {
+ [[nodiscard]] bool match(const search::DocumentMetaData & meta, const Document * doc) const {
if (_dscTrue || _metaOnly) {
return true;
}
- return (doc && (doc->getId().getGlobalId() == meta.gid) && _selectSession->contains(*doc));
+ assert(_selectCxt);
+ _selectCxt->_docId = meta.lid;
+ _selectCxt->_doc = doc;
+ return (doc && (doc->getId().getGlobalId() == meta.gid) && _selectSession->contains_doc(*_selectCxt));
}
private:
bool _dscTrue;