aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2024-02-09 13:12:45 +0100
committerTor Egge <Tor.Egge@online.no>2024-02-09 13:12:45 +0100
commit332bdd44a075c16418b49ddfe66965e5a46e2e8c (patch)
treed1a848f04b0deda0937687a041898faf55f199f8
parent6e03787d79b327915dff98815db777d879986396 (diff)
Handle search::streaming::EquivQueryNode as a leaf in the query tree.
-rw-r--r--searchlib/src/tests/query/streaming/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/query/streaming/equiv_query_node_test.cpp181
-rw-r--r--searchlib/src/tests/query/streaming_query_test.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp169
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h25
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/hit.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/hit.h30
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/query.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/query.h14
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynode.cpp40
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynode.h1
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.h3
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp64
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.h4
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp57
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h2
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp10
19 files changed, 590 insertions, 68 deletions
diff --git a/searchlib/src/tests/query/streaming/CMakeLists.txt b/searchlib/src/tests/query/streaming/CMakeLists.txt
index 7568e45d00a..5ed450ecbc8 100644
--- a/searchlib/src/tests/query/streaming/CMakeLists.txt
+++ b/searchlib/src/tests/query/streaming/CMakeLists.txt
@@ -1,5 +1,14 @@
# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_query_streaming_equiv_query_node_test_app TEST
+ SOURCES
+ equiv_query_node_test.cpp
+ DEPENDS
+ searchlib
+ GTest::gtest
+)
+vespa_add_test(NAME searchlib_query_streaming_equiv_query_node_test_app COMMAND searchlib_query_streaming_equiv_query_node_test_app)
+
vespa_add_executable(searchlib_query_streaming_hit_iterator_test_app TEST
SOURCES
hit_iterator_test.cpp
diff --git a/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp b/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp
new file mode 100644
index 00000000000..cc880dd8d69
--- /dev/null
+++ b/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp
@@ -0,0 +1,181 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/query/streaming/equiv_query_node.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/simpletermdata.h>
+#include <vespa/searchlib/query/streaming/phrase_query_node.h>
+#include <vespa/searchlib/query/streaming/query.h>
+#include <vespa/searchlib/query/streaming/queryterm.h>
+#include <vespa/searchlib/query/tree/querybuilder.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/tree/stackdumpcreator.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using search::fef::MatchData;
+using search::fef::SimpleTermData;
+using search::fef::TermFieldHandle;
+using search::fef::TermFieldMatchDataPosition;
+using search::query::QueryBuilder;
+using search::query::Node;
+using search::query::SimpleQueryNodeTypes;
+using search::query::StackDumpCreator;
+using search::query::Weight;
+using search::streaming::EquivQueryNode;
+using search::streaming::HitList;
+using search::streaming::PhraseQueryNode;
+using search::streaming::Query;
+using search::streaming::QueryNodeResultFactory;
+using search::streaming::QueryTerm;
+using search::streaming::QueryTermList;
+
+class AllowRewrite : public QueryNodeResultFactory
+{
+public:
+ bool allow_float_terms_rewrite(vespalib::stringref) const noexcept override { return true; }
+};
+
+class EquivQueryNodeTest : public ::testing::Test
+{
+public:
+ EquivQueryNodeTest();
+ ~EquivQueryNodeTest();
+
+ void assert_tfmd_pos(const vespalib::string label,
+ const TermFieldMatchDataPosition &tfmd_pos,
+ uint32_t exp_element_id,
+ uint32_t exp_position,
+ int32_t exp_element_weight,
+ uint32_t exp_element_length);
+ vespalib::string make_simple_equiv_stack_dump();
+};
+
+EquivQueryNodeTest::EquivQueryNodeTest()
+ : ::testing::Test()
+{
+}
+
+EquivQueryNodeTest::~EquivQueryNodeTest() = default;
+
+void
+EquivQueryNodeTest::assert_tfmd_pos(const vespalib::string label,
+ const TermFieldMatchDataPosition &tfmd_pos,
+ uint32_t exp_element_id,
+ uint32_t exp_position,
+ int32_t exp_element_weight,
+ uint32_t exp_element_length)
+{
+ SCOPED_TRACE(label);
+ EXPECT_EQ(exp_element_id, tfmd_pos.getElementId());
+ EXPECT_EQ(exp_position, tfmd_pos.getPosition());
+ EXPECT_EQ(exp_element_weight, tfmd_pos.getElementWeight());
+ EXPECT_EQ(exp_element_length, tfmd_pos.getElementLen());
+}
+
+vespalib::string
+EquivQueryNodeTest::make_simple_equiv_stack_dump()
+{
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addEquiv(3, 0, Weight(0));
+ {
+ builder.addStringTerm("2", "", 0, Weight(0));
+ builder.addStringTerm("2.5", "", 0, Weight(0));
+ builder.addStringTerm("3", "", 0, Weight(0));
+ }
+ Node::UP node = builder.build();
+ return StackDumpCreator::create(*node);
+}
+
+TEST_F(EquivQueryNodeTest, test_equiv_evaluate_and_unpack)
+{
+ auto stack_dump = make_simple_equiv_stack_dump();
+ QueryNodeResultFactory empty;
+ Query q(empty, stack_dump);
+ auto& eqn = dynamic_cast<EquivQueryNode&>(q.getRoot());
+ auto& terms = eqn.get_terms();
+ EXPECT_EQ(3, terms.size());
+ for (auto& qt : terms) {
+ qt->resizeFieldId(1);
+ }
+
+ // field 0
+ terms[0]->add(0, 0, 1, 0);
+ terms[1]->add(0, 0, 1, 1);
+ terms[2]->add(0, 1, 1, 0);
+ // field 1
+ terms[1]->add(1, 0, 1, 4);
+ terms[2]->add(1, 0, 2, 4);
+
+ terms[0]->set_element_length(0, 10);
+ terms[1]->set_element_length(0, 10);
+ terms[1]->set_element_length(1, 31);
+ terms[2]->set_element_length(0, 30);
+ terms[2]->set_element_length(1, 31);
+ HitList hits;
+ eqn.evaluateHits(hits);
+ auto exp_hits = HitList{{0,0,1,0},{0,0,1,1},{0,1,1,0},{1,0,2,4}};
+ exp_hits[0].set_element_length(10);
+ exp_hits[1].set_element_length(10);
+ exp_hits[2].set_element_length(30);
+ exp_hits[3].set_element_length(31);
+ ASSERT_EQ(exp_hits, hits);
+ EXPECT_TRUE(eqn.evaluate());
+
+ SimpleTermData td;
+ constexpr TermFieldHandle handle0 = 27;
+ constexpr TermFieldHandle handle1 = 29;
+ constexpr TermFieldHandle handle_max = std::max(handle0, handle1);
+ td.addField(0).setHandle(handle0);
+ td.addField(1).setHandle(handle1);
+ terms[0]->resizeFieldId(0);
+ terms[0]->getFieldInfo(0).setFieldLength(100);
+ terms[1]->resizeFieldId(1);
+ terms[1]->getFieldInfo(0).setFieldLength(100);
+ terms[1]->getFieldInfo(1).setFieldLength(200);
+ terms[2]->resizeFieldId(1);
+ terms[2]->getFieldInfo(0).setFieldLength(100);
+ terms[2]->getFieldInfo(1).setFieldLength(200);
+ auto md = MatchData::makeTestInstance(handle_max + 1, handle_max + 1);
+ auto tfmd0 = md->resolveTermField(handle0);
+ auto tfmd1 = md->resolveTermField(handle1);
+ tfmd0->setNeedInterleavedFeatures(true);
+ tfmd1->setNeedInterleavedFeatures(true);
+ eqn.unpack_match_data(2, td, *md);
+ EXPECT_EQ(2, tfmd0->getDocId());
+ EXPECT_EQ(3, tfmd0->getNumOccs());
+ EXPECT_EQ(3, tfmd0->end() - tfmd0->begin());
+ auto itr = tfmd0->begin();
+ assert_tfmd_pos("tmfd0[0]", *itr, 0, 0, 1, 10);
+ ++itr;
+ assert_tfmd_pos("tmfd0[1]", *itr, 0, 1, 1, 10);
+ ++itr;
+ assert_tfmd_pos("tmfd0[2]", *itr, 1, 0, 1, 30);
+ EXPECT_EQ(100, tfmd0->getFieldLength());
+ EXPECT_EQ(2, tfmd1->getDocId());
+ EXPECT_EQ(1, tfmd1->getNumOccs());
+ EXPECT_EQ(1, tfmd1->end() - tfmd1->begin());
+ itr = tfmd1->begin();
+ assert_tfmd_pos("tmfd1[0]", *itr, 0, 4, 2, 31);
+ EXPECT_EQ(200, tfmd1->getFieldLength());
+}
+
+TEST_F(EquivQueryNodeTest, test_equiv_flattening)
+{
+ auto stack_dump = make_simple_equiv_stack_dump();
+ AllowRewrite allow_rewrite;
+ Query q(allow_rewrite, stack_dump);
+ auto& eqn = dynamic_cast<EquivQueryNode&>(q.getRoot());
+ auto& terms = eqn.get_terms();
+ // Query is flattened to equiv("2", "2.5", phrase("2","5"), "3")
+ EXPECT_EQ(4, terms.size());
+ EXPECT_EQ("2", terms[0]->getTermString());
+ EXPECT_EQ("2.5", terms[1]->getTermString());
+ auto phrase = dynamic_cast<PhraseQueryNode*>(terms[2].get());
+ EXPECT_NE(phrase, nullptr);
+ EXPECT_EQ(2, phrase->get_terms().size());
+ EXPECT_EQ("2", phrase->get_terms()[0]->getTermString());
+ EXPECT_EQ("5", phrase->get_terms()[1]->getTermString());
+ EXPECT_EQ("3", terms[3]->getTermString());
+}
+
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp
index 19a2a0876c6..5559e194c5e 100644
--- a/searchlib/src/tests/query/streaming_query_test.cpp
+++ b/searchlib/src/tests/query/streaming_query_test.cpp
@@ -3,6 +3,7 @@
#include <vespa/searchlib/fef/simpletermdata.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/query/streaming/dot_product_term.h>
+#include <vespa/searchlib/query/streaming/equiv_query_node.h>
#include <vespa/searchlib/query/streaming/in_term.h>
#include <vespa/searchlib/query/streaming/phrase_query_node.h>
#include <vespa/searchlib/query/streaming/query.h>
@@ -352,17 +353,17 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too)
const QueryNode & root = q.getRoot();
EXPECT_TRUE(dynamic_cast<const EquivQueryNode *>(&root) != nullptr);
const auto & equiv = static_cast<const EquivQueryNode &>(root);
- EXPECT_EQ(2u, equiv.size());
- EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv[0].get()) != nullptr);
+ EXPECT_EQ(2u, equiv.get_terms().size());
+ EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv.get_terms()[0].get()) != nullptr);
{
- const auto & qt = static_cast<const QueryTerm &>(*equiv[0]);
+ const auto & qt = static_cast<const QueryTerm &>(*equiv.get_terms()[0]);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm());
EXPECT_EQ(3u, qt.uniqueId());
}
- EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv[1].get()) != nullptr);
+ EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv.get_terms()[1].get()) != nullptr);
{
- const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]);
+ const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv.get_terms()[1]);
EXPECT_EQ(2u, phrase.get_terms().size());
{
const auto & qt = *phrase.get_terms()[0];
diff --git a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
index 63d52cbdf9f..a2f0c8fd136 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
@@ -2,7 +2,9 @@
vespa_add_library(searchlib_query_streaming OBJECT
SOURCES
dot_product_term.cpp
+ equiv_query_node.cpp
fuzzy_term.cpp
+ hit.cpp
hit_iterator_pack.cpp
in_term.cpp
multi_term.cpp
diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp
new file mode 100644
index 00000000000..8a39830799b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp
@@ -0,0 +1,169 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "equiv_query_node.h"
+#include "phrase_query_node.h"
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <algorithm>
+#include <cassert>
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataPosition;
+using search::fef::ITermFieldData;
+
+namespace search::streaming {
+
+namespace {
+
+class HitWithFieldLength : public Hit
+{
+ uint32_t _field_length;
+public:
+ HitWithFieldLength(const Hit& hit, uint32_t field_length) noexcept
+ : Hit(hit),
+ _field_length(field_length)
+ {
+ }
+ uint32_t get_field_length() const noexcept { return _field_length; }
+};
+
+uint16_t
+cap_16_bits(uint32_t value)
+{
+ return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()));
+}
+
+uint32_t
+extract_field_length(const QueryTerm& term, uint32_t field_id)
+{
+ return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH;
+}
+
+void
+set_interleaved_features(TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs)
+{
+ tmd.setFieldLength(cap_16_bits(field_length));
+ tmd.setNumOccs(cap_16_bits(num_occs));
+}
+
+template <typename HitType>
+void merge_hits_from_children(std::vector<HitType>& hl, const MultiTerm& mt)
+{
+ HitList sub_hl_store;
+ for (auto& subterm : mt.get_terms()) {
+ auto *phrase = dynamic_cast<PhraseQueryNode*>(subterm.get());
+ QueryTerm& fl_term = (phrase == nullptr) ? *subterm : *phrase->get_terms().front();
+ auto& sub_hl = subterm->evaluateHits(sub_hl_store);
+ for (auto& h : sub_hl) {
+ if constexpr (std::is_same_v<Hit,HitType>) {
+ hl.emplace_back(h);
+ } else {
+ hl.emplace_back(h, extract_field_length(fl_term, h.field_id()));
+ }
+ }
+ }
+ std::sort(hl.begin(), hl.end());
+ auto last = std::unique(hl.begin(), hl.end(), [](auto& lhs, auto &rhs) noexcept { return lhs.at_same_pos(rhs); });
+ hl.erase(last, hl.end());
+}
+
+}
+
+EquivQueryNode::EquivQueryNode(std::unique_ptr<QueryNodeResultBase> result_base, uint32_t num_terms)
+ : MultiTerm(std::move(result_base), "", num_terms)
+{
+}
+
+EquivQueryNode::~EquivQueryNode() = default;
+
+bool
+EquivQueryNode::evaluate() const
+{
+ for (auto& subterm : get_terms()) {
+ if (subterm->evaluate()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+const HitList &
+EquivQueryNode::evaluateHits(HitList & hl) const
+{
+ hl.clear();
+ merge_hits_from_children(hl, *this);
+ return hl;
+}
+
+void
+EquivQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data)
+{
+ std::vector<HitWithFieldLength> hitList;
+ merge_hits_from_children(hitList, *this);
+
+ if (!hitList.empty()) { // only unpack if we have a hit
+ uint32_t lastFieldId = -1;
+ uint32_t last_field_length = 0;
+ TermFieldMatchData *tmd = nullptr;
+ uint32_t num_occs = 0;
+
+ // optimize for hitlist giving all hits for a single field in one chunk
+ for (auto& hit : hitList) {
+ uint32_t fieldId = hit.field_id();
+ if (fieldId != lastFieldId) {
+ if (tmd != nullptr) {
+ if (tmd->needs_interleaved_features()) {
+ set_interleaved_features(*tmd, last_field_length, num_occs);
+ }
+ // reset to notfound/unknown values
+ tmd = nullptr;
+ }
+ num_occs = 0;
+
+ // setup for new field that had a hit
+ const ITermFieldData *tfd = td.lookupField(fieldId);
+ if (tfd != nullptr) {
+ tmd = match_data.resolveTermField(tfd->getHandle());
+ tmd->setFieldId(fieldId);
+ // reset field match data, but only once per docId
+ if (tmd->getDocId() != docid) {
+ tmd->reset(docid);
+ }
+ }
+ lastFieldId = fieldId;
+ last_field_length = hit.get_field_length();
+ }
+ ++num_occs;
+ if (tmd != nullptr) {
+ TermFieldMatchDataPosition pos(hit.element_id(), hit.position(),
+ hit.element_weight(), hit.element_length());
+ tmd->appendPosition(pos);
+ }
+ }
+ if (tmd != nullptr) {
+ if (tmd->needs_interleaved_features()) {
+ set_interleaved_features(*tmd, last_field_length, num_occs);
+ }
+ }
+ }
+}
+
+EquivQueryNode*
+EquivQueryNode::as_equiv_query_node() noexcept
+{
+ return this;
+}
+
+const EquivQueryNode*
+EquivQueryNode::as_equiv_query_node() const noexcept
+{
+ return this;
+}
+
+std::vector<std::unique_ptr<QueryTerm>>
+EquivQueryNode::steal_terms()
+{
+ return std::move(_terms);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h
new file mode 100644
index 00000000000..b5cdb31274f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h
@@ -0,0 +1,25 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multi_term.h"
+
+namespace search::streaming {
+
+/**
+ N-ary "EQUIV" operator that merges terms from nodes below.
+*/
+class EquivQueryNode : public MultiTerm
+{
+public:
+ EquivQueryNode(std::unique_ptr<QueryNodeResultBase> result_base, uint32_t num_terms);
+ ~EquivQueryNode() override;
+ bool evaluate() const override;
+ const HitList & evaluateHits(HitList & hl) const override;
+ void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override;
+ EquivQueryNode* as_equiv_query_node() noexcept override;
+ const EquivQueryNode* as_equiv_query_node() const noexcept override;
+ std::vector<std::unique_ptr<QueryTerm>> steal_terms();
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit.cpp b/searchlib/src/vespa/searchlib/query/streaming/hit.cpp
new file mode 100644
index 00000000000..c05fda77476
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/hit.cpp
@@ -0,0 +1,17 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "hit.h"
+#include <ostream>
+
+namespace search::streaming {
+
+std::ostream&
+operator<<(std::ostream& os, const Hit& hit)
+{
+ os << "{" << hit.field_id() << "," << hit.element_id() << "," <<
+ hit.element_weight() << "," << hit.element_length() << "," <<
+ hit.position() << "}";
+ return os;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit.h b/searchlib/src/vespa/searchlib/query/streaming/hit.h
index 168c09a91ec..fc24c21f722 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/hit.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/hit.h
@@ -2,6 +2,7 @@
#pragma once
#include <cstdint>
+#include <iosfwd>
#include <vector>
namespace search::streaming {
@@ -27,8 +28,37 @@ public:
uint32_t element_length() const { return _element_length; }
uint32_t position() const { return _position; }
void set_element_length(uint32_t value) { _element_length = value; }
+ bool operator<(const Hit& rhs) const noexcept {
+ if (_field_id != rhs._field_id) {
+ return _field_id < rhs._field_id;
+ }
+ if (_element_id != rhs._element_id) {
+ return _element_id < rhs._element_id;
+ }
+ if (_position != rhs._position) {
+ return _position < rhs._position;
+ }
+ if (_element_weight != rhs._element_weight) {
+ return _element_weight > rhs._element_weight;
+ }
+ return _element_length < rhs._element_length;
+ }
+ bool at_same_pos(const Hit& rhs) const noexcept {
+ return (_field_id == rhs._field_id) &&
+ (_element_id == rhs._element_id) &&
+ (_position == rhs._position);
+ }
+ bool operator==(const Hit& rhs) const noexcept {
+ return (_field_id == rhs._field_id) &&
+ (_element_id == rhs._element_id) &&
+ (_position == rhs._position) &&
+ (_element_weight == rhs._element_weight) &&
+ (_element_length == rhs._element_length);
+ }
};
+std::ostream& operator<<(std::ostream& os, const Hit& hit);
+
using HitList = std::vector<Hit>;
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.cpp b/searchlib/src/vespa/searchlib/query/streaming/query.cpp
index 77424fb2d62..94d9acd02cd 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/query.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/query.cpp
@@ -94,7 +94,6 @@ QueryConnector::create(ParseItem::ItemType type)
case search::ParseItem::ITEM_AND: return std::make_unique<AndQueryNode>();
case search::ParseItem::ITEM_OR:
case search::ParseItem::ITEM_WEAK_AND: return std::make_unique<OrQueryNode>();
- case search::ParseItem::ITEM_EQUIV: return std::make_unique<EquivQueryNode>();
case search::ParseItem::ITEM_NOT: return std::make_unique<AndNotQueryNode>();
case search::ParseItem::ITEM_SAME_ELEMENT: return std::make_unique<SameElementQueryNode>();
case search::ParseItem::ITEM_NEAR: return std::make_unique<NearQueryNode>();
@@ -158,12 +157,6 @@ RankWithQueryNode::evaluate() const {
return firstOk;
}
-bool
-EquivQueryNode::evaluate() const
-{
- return OrQueryNode::evaluate();
-}
-
Query::Query() = default;
Query::Query(const QueryNodeResultFactory & factory, vespalib::stringref queryRep)
diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.h b/searchlib/src/vespa/searchlib/query/streaming/query.h
index e91a2f91dc5..a993a9a8a8a 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/query.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/query.h
@@ -103,20 +103,6 @@ public:
bool evaluate() const override;
};
-
-/**
- N-ary "EQUIV" operator that merges terms from nodes below.
-*/
-class EquivQueryNode : public OrQueryNode
-{
-public:
- EquivQueryNode() noexcept : OrQueryNode("EQUIV") { }
- bool evaluate() const override;
- bool isFlattenable(ParseItem::ItemType type) const override {
- return (type == ParseItem::ITEM_EQUIV);
- }
-};
-
/**
Query packages the query tree. The usage pattern is like this.
Construct the tree with the correct tree description.
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
index 0b277dbe221..dd3b1f84ad9 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
@@ -9,6 +9,7 @@
#include "same_element_query_node.h"
#include <vespa/searchlib/parsequery/stackdumpiterator.h>
#include <vespa/searchlib/query/streaming/dot_product_term.h>
+#include <vespa/searchlib/query/streaming/equiv_query_node.h>
#include <vespa/searchlib/query/streaming/in_term.h>
#include <vespa/searchlib/query/streaming/wand_term.h>
#include <vespa/searchlib/query/streaming/weighted_set_term.h>
@@ -44,7 +45,6 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
case ParseItem::ITEM_AND:
case ParseItem::ITEM_OR:
case ParseItem::ITEM_WEAK_AND:
- case ParseItem::ITEM_EQUIV:
case ParseItem::ITEM_NOT:
case ParseItem::ITEM_SAME_ELEMENT:
case ParseItem::ITEM_NEAR:
@@ -142,10 +142,10 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
auto dotPos = ssTerm.find('.');
phrase->add_term(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD, normalize_mode));
phrase->add_term(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(dotPos + 1), ssIndex, TermType::WORD, normalize_mode));
- auto orqn = std::make_unique<EquivQueryNode>();
- orqn->addChild(std::move(qt));
- orqn->addChild(std::move(phrase));
- qn = std::move(orqn);
+ auto eqn = std::make_unique<EquivQueryNode>(factory.create(), 2);
+ eqn->add_term(std::move(qt));
+ eqn->add_term(std::move(phrase));
+ qn = std::move(eqn);
} else {
qn = std::move(qt);
}
@@ -171,6 +171,9 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
case ParseItem::ITEM_PHRASE:
qn = build_phrase_term(factory, queryRep);
break;
+ case ParseItem::ITEM_EQUIV:
+ qn = build_equiv_term(factory, queryRep, allowRewrite);
+ break;
default:
skip_unknown(queryRep);
break;
@@ -282,6 +285,33 @@ QueryNode::build_phrase_term(const QueryNodeResultFactory& factory, SimpleQueryS
return phrase;
}
+std::unique_ptr<QueryNode>
+QueryNode::build_equiv_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep, bool allow_rewrite)
+{
+ auto eqn = std::make_unique<EquivQueryNode>(factory.create(), queryRep.getArity());
+ auto arity = queryRep.getArity();
+ eqn->setWeight(queryRep.GetWeight());
+ eqn->setUniqueId(queryRep.getUniqueId());
+ for (size_t i = 0; i < arity; ++i) {
+ queryRep.next();
+ auto qn = Build(eqn.get(), factory, queryRep, allow_rewrite);
+ auto nested_eqn = dynamic_cast<EquivQueryNode*>(qn.get());
+ if (nested_eqn != nullptr) {
+ auto stolen_terms = nested_eqn->steal_terms();
+ for (auto& term : stolen_terms) {
+ eqn->add_term(std::move(term));
+ }
+ continue;
+ }
+ auto qtp = dynamic_cast<QueryTerm*>(qn.get());
+ assert(qtp != nullptr);
+ qn.release();
+ std::unique_ptr<QueryTerm> qt(qtp);
+ eqn->add_term(std::move(qt));
+ }
+ return eqn;
+}
+
void
QueryNode::skip_unknown(SimpleQueryStackDumpIterator& queryRep)
{
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.h b/searchlib/src/vespa/searchlib/query/streaming/querynode.h
index 4c7d9e88930..fff3bb15d10 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.h
@@ -34,6 +34,7 @@ class QueryNode
static std::unique_ptr<QueryNode> build_wand_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep);
static std::unique_ptr<QueryNode> build_weighted_set_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep);
static std::unique_ptr<QueryNode> build_phrase_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep);
+ static std::unique_ptr<QueryNode> build_equiv_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep, bool allow_rewrite);
static void skip_unknown(SimpleQueryStackDumpIterator& queryRep);
public:
using UP = std::unique_ptr<QueryNode>;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
index f01f815e673..920d4bc59d7 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
@@ -222,4 +222,16 @@ QueryTerm::as_fuzzy_term() noexcept
return nullptr;
}
+EquivQueryNode*
+QueryTerm::as_equiv_query_node() noexcept
+{
+ return nullptr;
+}
+
+const EquivQueryNode*
+QueryTerm::as_equiv_query_node() const noexcept
+{
+ return nullptr;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
index 2eaecb86854..3f45a99e805 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
@@ -17,6 +17,7 @@ class MatchData;
}
namespace search::streaming {
+class EquivQueryNode;
class FuzzyTerm;
class NearestNeighborQueryNode;
class MultiTerm;
@@ -100,6 +101,8 @@ public:
virtual MultiTerm* as_multi_term() noexcept;
virtual RegexpTerm* as_regexp_term() noexcept;
virtual FuzzyTerm* as_fuzzy_term() noexcept;
+ virtual EquivQueryNode* as_equiv_query_node() noexcept;
+ virtual const EquivQueryNode* as_equiv_query_node() const noexcept;
virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data);
protected:
void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const QueryTerm& fl_term) const;
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
index 3449df57513..a54d2adee78 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
@@ -4,7 +4,7 @@
#include "rankprocessor.h"
#include <vespa/searchlib/fef/handle.h>
#include <vespa/searchlib/fef/simpletermfielddata.h>
-#include <vespa/searchlib/query/streaming/multi_term.h>
+#include <vespa/searchlib/query/streaming/equiv_query_node.h>
#include <vespa/searchlib/query/streaming/nearest_neighbor_query_node.h>
#include <vespa/vsm/vsm/fieldsearchspec.h>
#include <algorithm>
@@ -56,6 +56,51 @@ getFeature(const RankProgram &rankProgram) {
}
void
+RankProcessor::resolve_fields_from_children(QueryTermData& qtd, MultiTerm& mt)
+{
+ vespalib::hash_set<uint32_t> field_ids;
+ for (auto& subterm : mt.get_terms()) {
+ vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(subterm->index());
+ const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName);
+ if (view != nullptr) {
+ for (auto field_id : *view) {
+ field_ids.insert(field_id);
+ }
+ } else {
+ LOG(warning, "Could not find a view for index '%s'. Ranking no fields.",
+ getIndexName(subterm->index(), expandedIndexName).c_str());
+ }
+ }
+ std::vector<uint32_t> sorted_field_ids;
+ sorted_field_ids.reserve(field_ids.size());
+ for (auto field_id : field_ids) {
+ sorted_field_ids.emplace_back(field_id);
+ }
+ std::sort(sorted_field_ids.begin(), sorted_field_ids.end());
+ for (auto field_id : sorted_field_ids) {
+ qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id));
+ }
+}
+
+void
+RankProcessor::resolve_fields_from_term(QueryTermData& qtd, search::streaming::QueryTerm& term)
+{
+ vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term.index());
+ const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName);
+ if (view != nullptr) {
+ for (auto field_id : *view) {
+ qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id));
+ }
+ } else {
+ LOG(warning, "Could not find a view for index '%s'. Ranking no fields.",
+ getIndexName(term.index(), expandedIndexName).c_str());
+ }
+ LOG(debug, "Setup query term '%s:%s'",
+ getIndexName(term.index(), expandedIndexName).c_str(),
+ term.getTerm());
+}
+
+void
RankProcessor::initQueryEnvironment()
{
QueryWrapper::TermList & terms = _query.getTermList();
@@ -75,21 +120,12 @@ RankProcessor::initQueryEnvironment()
if (nn_term != nullptr) {
qtd.getTermData().set_query_tensor_name(nn_term->get_query_tensor_name());
}
-
- vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term->index());
- const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName);
- if (view != nullptr) {
- for (auto field_id : *view) {
- qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id));
- }
+ auto* eqn = term->as_equiv_query_node();
+ if (eqn != nullptr) {
+ resolve_fields_from_children(qtd, *eqn);
} else {
- LOG(warning, "Could not find a view for index '%s'. Ranking no fields.",
- getIndexName(term->index(), expandedIndexName).c_str());
+ resolve_fields_from_term(qtd, *term);
}
-
- LOG(debug, "Setup query term '%s:%s'",
- getIndexName(term->index(), expandedIndexName).c_str(),
- term->getTerm());
_queryEnv.addTerm(&qtd.getTermData());
}
_rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore());
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
index 5651917ce7a..bec70beca77 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
@@ -16,6 +16,8 @@
namespace streaming {
+class QueryTermData;
+
/**
* This class is associated with a query and a rank profile and
* is used to calculate rank and feature set for matched documents.
@@ -43,6 +45,8 @@ private:
HitCollector::UP _hitCollector;
std::unique_ptr<RankProgram> _match_features_program;
+ void resolve_fields_from_children(QueryTermData& qtd, search::streaming::MultiTerm& mt);
+ void resolve_fields_from_term(QueryTermData& qtd, search::streaming::QueryTerm& term);
void initQueryEnvironment();
void initHitCollector(size_t wantedHitCount);
void setupRankProgram(search::fef::RankProgram &program);
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
index c75ab7fccd3..72807bc6c34 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp
@@ -3,7 +3,7 @@
#include <vespa/vsm/vsm/fieldsearchspec.h>
#include <vespa/document/fieldvalue/arrayfieldvalue.h>
#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
-#include <vespa/searchlib/query/streaming/multi_term.h>
+#include <vespa/searchlib/query/streaming/equiv_query_node.h>
#include <vespa/vespalib/stllike/hash_set.h>
#include <cassert>
@@ -190,6 +190,39 @@ FieldSearcher::init()
}
void
+FieldIdTSearcherMap::prepare_term(const DocumentTypeIndexFieldMapT& difm, QueryTerm* qt, FieldIdT fid, vespalib::hash_set<const void*>& seen, QueryTermList& onlyInIndex)
+{
+ auto equiv = qt->as_equiv_query_node();
+ if (equiv != nullptr) {
+ for (auto& subterm : equiv->get_terms()) {
+ prepare_term(difm, subterm.get(), fid, seen, onlyInIndex);
+ }
+ return;
+ }
+ for (const auto& doc_type_elem : difm) {
+ const IndexFieldMapT & fim = doc_type_elem.second;
+ auto found = fim.find(FieldSearchSpecMap::stripNonFields(qt->index()));
+ if (found != fim.end()) {
+ const FieldIdTList & index = found->second;
+ if ((find(index.begin(), index.end(), fid) != index.end()) && !seen.contains(qt)) {
+ seen.insert(qt);
+ auto multi_term = qt->as_multi_term();
+ if (multi_term != nullptr) {
+ for (auto& subterm : multi_term->get_terms()) {
+ onlyInIndex.emplace_back(subterm.get());
+ }
+ } else {
+ onlyInIndex.emplace_back(qt);
+ }
+ }
+ } else {
+ LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.",
+ qt->index().c_str());
+ }
+ }
+}
+
+void
FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT& difm, const SharedSearcherBuf& searcherBuf,
Query& query, const vsm::FieldPathMapT& field_paths,
search::fef::IQueryEnvironment& query_env)
@@ -202,27 +235,7 @@ FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT& difm, const Share
vespalib::hash_set<const void*> seen;
FieldIdT fid = searcher->field();
for (auto qt : qtl) {
- for (const auto& doc_type_elem : difm) {
- const IndexFieldMapT & fim = doc_type_elem.second;
- auto found = fim.find(FieldSearchSpecMap::stripNonFields(qt->index()));
- if (found != fim.end()) {
- const FieldIdTList & index = found->second;
- if ((find(index.begin(), index.end(), fid) != index.end()) && !seen.contains(qt)) {
- seen.insert(qt);
- auto multi_term = qt->as_multi_term();
- if (multi_term != nullptr) {
- for (auto& subterm : multi_term->get_terms()) {
- onlyInIndex.emplace_back(subterm.get());
- }
- } else {
- onlyInIndex.emplace_back(qt);
- }
- }
- } else {
- LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.",
- qt->index().c_str());
- }
- }
+ prepare_term(difm, qt, fid, seen, onlyInIndex);
}
/// Should perhaps do a unique on onlyInIndex
searcher->prepare(onlyInIndex, searcherBuf, field_paths, query_env);
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
index 6f3ec3e1e73..042e47ef164 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
+++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h
@@ -5,6 +5,7 @@
#include <vespa/searchlib/query/streaming/query.h>
#include <vespa/vsm/common/document.h>
#include <vespa/vsm/common/storagedocument.h>
+#include <vespa/vespalib/stllike/hash_set.h>
#include <vespa/vespalib/util/array.h>
#include <utility>
@@ -122,6 +123,7 @@ using FieldIdTSearcherMapT = std::vector<FieldSearcherContainer>;
class FieldIdTSearcherMap : public FieldIdTSearcherMapT
{
+ void prepare_term(const DocumentTypeIndexFieldMapT& difm, search::streaming::QueryTerm* qt, FieldIdT fid, vespalib::hash_set<const void*>& seen, search::streaming::QueryTermList& onlyInIndex);
public:
void prepare(const DocumentTypeIndexFieldMapT& difm, const SharedSearcherBuf& searcherBuf,
search::streaming::Query& query, const vsm::FieldPathMapT& field_paths,
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
index 3ae4794e33f..c596b46a774 100644
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
+++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "fieldsearchspec.h"
+#include <vespa/searchlib/query/streaming/equiv_query_node.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vsm/searcher/boolfieldsearcher.h>
#include <vespa/vsm/searcher/floatfieldsearcher.h>
@@ -222,7 +223,14 @@ FieldSearchSpecMap::buildFieldsInQuery(const Query & query) const
query.getLeaves(qtl);
for (const auto & term : qtl) {
- addFieldsFromIndex(term->index(), fieldsInQuery);
+ auto equiv = term->as_equiv_query_node();
+ if (equiv != nullptr) {
+ for (const auto& subterm : equiv->get_terms()) {
+ addFieldsFromIndex(subterm->index(), fieldsInQuery);
+ }
+ } else {
+ addFieldsFromIndex(term->index(), fieldsInQuery);
+ }
}
return fieldsInQuery;
}