aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-02-09 23:31:57 +0100
committerGitHub <noreply@github.com>2024-02-09 23:31:57 +0100
commita59d72339a96ead18d336b290c8b4e89c768bfa7 (patch)
treeb8b7df32d41024b422c24044191f2996e6f32ff4 /searchlib
parentfc1e1b3def4a26b15c27892fb1d696e453adf0fb (diff)
parent1f24d13afaeb37fac4cd810b76c1b1b9b5a4dc51 (diff)
Merge pull request #30229 from vespa-engine/toregge/handle-equiv-query-node-as-a-leaf-in-streaming-query-tree
Handle search::streaming::EquivQueryNode as a leaf in the query tree.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/query/streaming/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/query/streaming/equiv_query_node_test.cpp209
-rw-r--r--searchlib/src/tests/query/streaming_query_test.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp102
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h25
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/hit.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/hit.h30
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/query.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/query.h14
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynode.cpp40
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynode.h1
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp97
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.h6
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp94
16 files changed, 554 insertions, 114 deletions
diff --git a/searchlib/src/tests/query/streaming/CMakeLists.txt b/searchlib/src/tests/query/streaming/CMakeLists.txt
index 7568e45d00a..5ed450ecbc8 100644
--- a/searchlib/src/tests/query/streaming/CMakeLists.txt
+++ b/searchlib/src/tests/query/streaming/CMakeLists.txt
@@ -1,5 +1,14 @@
# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_query_streaming_equiv_query_node_test_app TEST
+ SOURCES
+ equiv_query_node_test.cpp
+ DEPENDS
+ searchlib
+ GTest::gtest
+)
+vespa_add_test(NAME searchlib_query_streaming_equiv_query_node_test_app COMMAND searchlib_query_streaming_equiv_query_node_test_app)
+
vespa_add_executable(searchlib_query_streaming_hit_iterator_test_app TEST
SOURCES
hit_iterator_test.cpp
diff --git a/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp b/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp
new file mode 100644
index 00000000000..72378385c78
--- /dev/null
+++ b/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp
@@ -0,0 +1,209 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/query/streaming/equiv_query_node.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/simpletermdata.h>
+#include <vespa/searchlib/query/streaming/phrase_query_node.h>
+#include <vespa/searchlib/query/streaming/query.h>
+#include <vespa/searchlib/query/streaming/queryterm.h>
+#include <vespa/searchlib/query/tree/querybuilder.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/tree/stackdumpcreator.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using search::fef::MatchData;
+using search::fef::SimpleTermData;
+using search::fef::TermFieldHandle;
+using search::fef::TermFieldMatchDataPosition;
+using search::query::QueryBuilder;
+using search::query::Node;
+using search::query::SimpleQueryNodeTypes;
+using search::query::StackDumpCreator;
+using search::query::Weight;
+using search::streaming::EquivQueryNode;
+using search::streaming::HitList;
+using search::streaming::PhraseQueryNode;
+using search::streaming::Query;
+using search::streaming::QueryNodeResultFactory;
+using search::streaming::QueryTerm;
+using search::streaming::QueryTermList;
+
+class AllowRewrite : public QueryNodeResultFactory
+{
+public:
+ bool allow_float_terms_rewrite(vespalib::stringref) const noexcept override { return true; }
+};
+
+class EquivQueryNodeTest : public ::testing::Test
+{
+public:
+ EquivQueryNodeTest();
+ ~EquivQueryNodeTest();
+
+ void assert_tfmd_pos(const vespalib::string label,
+ const TermFieldMatchDataPosition &tfmd_pos,
+ uint32_t exp_element_id,
+ uint32_t exp_position,
+ int32_t exp_element_weight,
+ uint32_t exp_element_length);
+ vespalib::string make_simple_equiv_stack_dump();
+};
+
+EquivQueryNodeTest::EquivQueryNodeTest()
+ : ::testing::Test()
+{
+}
+
+EquivQueryNodeTest::~EquivQueryNodeTest() = default;
+
+void
+EquivQueryNodeTest::assert_tfmd_pos(const vespalib::string label,
+ const TermFieldMatchDataPosition &tfmd_pos,
+ uint32_t exp_element_id,
+ uint32_t exp_position,
+ int32_t exp_element_weight,
+ uint32_t exp_element_length)
+{
+ SCOPED_TRACE(label);
+ EXPECT_EQ(exp_element_id, tfmd_pos.getElementId());
+ EXPECT_EQ(exp_position, tfmd_pos.getPosition());
+ EXPECT_EQ(exp_element_weight, tfmd_pos.getElementWeight());
+ EXPECT_EQ(exp_element_length, tfmd_pos.getElementLen());
+}
+
+vespalib::string
+EquivQueryNodeTest::make_simple_equiv_stack_dump()
+{
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addEquiv(3, 0, Weight(0));
+ {
+ builder.addStringTerm("2", "", 0, Weight(0));
+ builder.addStringTerm("2.5", "", 0, Weight(0));
+ builder.addStringTerm("3", "", 0, Weight(0));
+ }
+ Node::UP node = builder.build();
+ return StackDumpCreator::create(*node);
+}
+
+TEST_F(EquivQueryNodeTest, test_equiv_evaluate_and_unpack)
+{
+ auto stack_dump = make_simple_equiv_stack_dump();
+ QueryNodeResultFactory empty;
+ Query q(empty, stack_dump);
+ auto& eqn = dynamic_cast<EquivQueryNode&>(q.getRoot());
+ auto& terms = eqn.get_terms();
+ EXPECT_EQ(3, terms.size());
+ for (auto& qt : terms) {
+ qt->resizeFieldId(1);
+ }
+
+ /*
+ * Populate hit lists in query terms, emulating the result of
+ * having performed a streaming search.
+ */
+ constexpr uint32_t field0 = 0;
+ constexpr uint32_t field1 = 1;
+ constexpr uint32_t elem0 = 0;
+ constexpr uint32_t elem1 = 1;
+ constexpr int32_t weight1 = 1;
+ constexpr int32_t weight2 = 2;
+ constexpr uint32_t pos5 = 5;
+ constexpr uint32_t pos6 = 6;
+ constexpr uint32_t pos3 = 3;
+ constexpr uint32_t pos4 = 4;
+ constexpr uint32_t field0_len = 100;
+ constexpr uint32_t field1_len = 200;
+ constexpr uint32_t field0_element0_len = 10;
+ constexpr uint32_t field0_element1_len = 30;
+ constexpr uint32_t field1_element0_len = 31;
+ // field 0
+ terms[0]->add(field0, elem0, weight1, pos5);
+ terms[1]->add(field0, elem0, weight1, pos6);
+ terms[2]->add(field0, elem1, weight1, pos3);
+ // field 1
+ terms[1]->add(field1, elem0, weight1, pos4);
+ terms[2]->add(field1, elem0, weight2, pos4);
+
+ terms[0]->set_element_length(0, field0_element0_len);
+ terms[1]->set_element_length(0, field0_element0_len);
+ terms[1]->set_element_length(1, field1_element0_len);
+ terms[2]->set_element_length(0, field0_element1_len);
+ terms[2]->set_element_length(1, field1_element0_len);
+
+ /*
+ * evaluateHits() should get the union of the hits for each query term
+ * but without duplicates.
+ */
+ HitList hits;
+ eqn.evaluateHits(hits);
+ auto exp_hits = HitList{{field0,elem0,weight1,pos5},{field0,elem0,weight1,pos6},{field0,elem1,weight1,pos3},{field1,elem0,weight2,pos4}};
+ exp_hits[0].set_element_length(field0_element0_len);
+ exp_hits[1].set_element_length(field0_element0_len);
+ exp_hits[2].set_element_length(field0_element1_len);
+ exp_hits[3].set_element_length(field1_element0_len);
+ ASSERT_EQ(exp_hits, hits);
+ EXPECT_TRUE(eqn.evaluate());
+
+ /*
+ * Verify that unpack_match_data() gives the expected term field
+ * match data information.
+ */
+ SimpleTermData td;
+ constexpr TermFieldHandle handle0 = 27;
+ constexpr TermFieldHandle handle1 = 29;
+ constexpr TermFieldHandle handle_max = std::max(handle0, handle1);
+ td.addField(0).setHandle(handle0);
+ td.addField(1).setHandle(handle1);
+ terms[0]->resizeFieldId(field0);
+ terms[0]->getFieldInfo(field0).setFieldLength(field0_len);
+ terms[1]->resizeFieldId(field1);
+ terms[1]->getFieldInfo(field0).setFieldLength(field0_len);
+ terms[1]->getFieldInfo(field1).setFieldLength(field1_len);
+ terms[2]->resizeFieldId(field1);
+ terms[2]->getFieldInfo(field0).setFieldLength(field0_len);
+ terms[2]->getFieldInfo(field1).setFieldLength(field1_len);
+ auto md = MatchData::makeTestInstance(handle_max + 1, handle_max + 1);
+ auto tfmd0 = md->resolveTermField(handle0);
+ auto tfmd1 = md->resolveTermField(handle1);
+ tfmd0->setNeedInterleavedFeatures(true);
+ tfmd1->setNeedInterleavedFeatures(true);
+ eqn.unpack_match_data(2, td, *md);
+ EXPECT_EQ(2, tfmd0->getDocId());
+ EXPECT_EQ(3, tfmd0->getNumOccs());
+ EXPECT_EQ(3, tfmd0->end() - tfmd0->begin());
+ auto itr = tfmd0->begin();
+ assert_tfmd_pos("tmfd0[0]", *itr, elem0, pos5, weight1, field0_element0_len);
+ ++itr;
+ assert_tfmd_pos("tmfd0[1]", *itr, elem0, pos6, weight1, field0_element0_len);
+ ++itr;
+ assert_tfmd_pos("tmfd0[2]", *itr, elem1, pos3, weight1, field0_element1_len);
+ EXPECT_EQ(field0_len, tfmd0->getFieldLength());
+ EXPECT_EQ(2, tfmd1->getDocId());
+ EXPECT_EQ(1, tfmd1->getNumOccs());
+ EXPECT_EQ(1, tfmd1->end() - tfmd1->begin());
+ itr = tfmd1->begin();
+ assert_tfmd_pos("tmfd1[0]", *itr, elem0, pos4, weight2, field1_element0_len);
+ EXPECT_EQ(field1_len, tfmd1->getFieldLength());
+}
+
+TEST_F(EquivQueryNodeTest, test_equiv_flattening)
+{
+ auto stack_dump = make_simple_equiv_stack_dump();
+ AllowRewrite allow_rewrite;
+ Query q(allow_rewrite, stack_dump);
+ auto& eqn = dynamic_cast<EquivQueryNode&>(q.getRoot());
+ auto& terms = eqn.get_terms();
+ // Query is flattened to equiv("2", "2.5", phrase("2","5"), "3")
+ EXPECT_EQ(4, terms.size());
+ EXPECT_EQ("2", terms[0]->getTermString());
+ EXPECT_EQ("2.5", terms[1]->getTermString());
+ auto phrase = dynamic_cast<PhraseQueryNode*>(terms[2].get());
+ EXPECT_NE(phrase, nullptr);
+ EXPECT_EQ(2, phrase->get_terms().size());
+ EXPECT_EQ("2", phrase->get_terms()[0]->getTermString());
+ EXPECT_EQ("5", phrase->get_terms()[1]->getTermString());
+ EXPECT_EQ("3", terms[3]->getTermString());
+}
+
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp
index 19a2a0876c6..5559e194c5e 100644
--- a/searchlib/src/tests/query/streaming_query_test.cpp
+++ b/searchlib/src/tests/query/streaming_query_test.cpp
@@ -3,6 +3,7 @@
#include <vespa/searchlib/fef/simpletermdata.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/query/streaming/dot_product_term.h>
+#include <vespa/searchlib/query/streaming/equiv_query_node.h>
#include <vespa/searchlib/query/streaming/in_term.h>
#include <vespa/searchlib/query/streaming/phrase_query_node.h>
#include <vespa/searchlib/query/streaming/query.h>
@@ -352,17 +353,17 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too)
const QueryNode & root = q.getRoot();
EXPECT_TRUE(dynamic_cast<const EquivQueryNode *>(&root) != nullptr);
const auto & equiv = static_cast<const EquivQueryNode &>(root);
- EXPECT_EQ(2u, equiv.size());
- EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv[0].get()) != nullptr);
+ EXPECT_EQ(2u, equiv.get_terms().size());
+ EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv.get_terms()[0].get()) != nullptr);
{
- const auto & qt = static_cast<const QueryTerm &>(*equiv[0]);
+ const auto & qt = static_cast<const QueryTerm &>(*equiv.get_terms()[0]);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm());
EXPECT_EQ(3u, qt.uniqueId());
}
- EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv[1].get()) != nullptr);
+ EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv.get_terms()[1].get()) != nullptr);
{
- const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]);
+ const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv.get_terms()[1]);
EXPECT_EQ(2u, phrase.get_terms().size());
{
const auto & qt = *phrase.get_terms()[0];
diff --git a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
index 63d52cbdf9f..a2f0c8fd136 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
@@ -2,7 +2,9 @@
vespa_add_library(searchlib_query_streaming OBJECT
SOURCES
dot_product_term.cpp
+ equiv_query_node.cpp
fuzzy_term.cpp
+ hit.cpp
hit_iterator_pack.cpp
in_term.cpp
multi_term.cpp
diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp
new file mode 100644
index 00000000000..939afec0463
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp
@@ -0,0 +1,102 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "equiv_query_node.h"
+#include "phrase_query_node.h"
+#include "queryterm.hpp"
+
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataPosition;
+using search::fef::ITermFieldData;
+
+namespace search::streaming {
+
+namespace {
+
+class HitWithFieldLength : public Hit
+{
+ uint32_t _field_length;
+public:
+ HitWithFieldLength(const Hit& hit, uint32_t field_length) noexcept
+ : Hit(hit),
+ _field_length(field_length)
+ {
+ }
+ uint32_t get_field_length() const noexcept { return _field_length; }
+};
+
+template <typename HitType>
+void merge_hits_from_children(std::vector<HitType>& hl, const MultiTerm& mt)
+{
+ HitList sub_hl_store;
+ for (auto& subterm : mt.get_terms()) {
+ auto *phrase = dynamic_cast<PhraseQueryNode*>(subterm.get());
+ QueryTerm& fl_term = (phrase == nullptr) ? *subterm : *phrase->get_terms().front();
+ auto& sub_hl = subterm->evaluateHits(sub_hl_store);
+ for (auto& h : sub_hl) {
+ if constexpr (std::is_same_v<Hit,HitType>) {
+ hl.emplace_back(h);
+ } else {
+ hl.emplace_back(h, extract_field_length(fl_term, h.field_id()));
+ }
+ }
+ }
+ std::sort(hl.begin(), hl.end());
+ auto last = std::unique(hl.begin(), hl.end(), [](auto& lhs, auto &rhs) noexcept { return lhs.at_same_pos(rhs); });
+ hl.erase(last, hl.end());
+}
+
+}
+
+EquivQueryNode::EquivQueryNode(std::unique_ptr<QueryNodeResultBase> result_base, uint32_t num_terms)
+ : MultiTerm(std::move(result_base), "", num_terms)
+{
+}
+
+EquivQueryNode::~EquivQueryNode() = default;
+
+bool
+EquivQueryNode::evaluate() const
+{
+ for (auto& subterm : get_terms()) {
+ if (subterm->evaluate()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+const HitList &
+EquivQueryNode::evaluateHits(HitList & hl) const
+{
+ hl.clear();
+ merge_hits_from_children(hl, *this);
+ return hl;
+}
+
+void
+EquivQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data)
+{
+ std::vector<HitWithFieldLength> hit_list;
+ merge_hits_from_children(hit_list, *this);
+ unpack_match_data_helper(docid, td, match_data, hit_list, *this);
+}
+
+EquivQueryNode*
+EquivQueryNode::as_equiv_query_node() noexcept
+{
+ return this;
+}
+
+const EquivQueryNode*
+EquivQueryNode::as_equiv_query_node() const noexcept
+{
+ return this;
+}
+
+std::vector<std::unique_ptr<QueryTerm>>
+EquivQueryNode::steal_terms()
+{
+ return std::move(_terms);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h
new file mode 100644
index 00000000000..b5cdb31274f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h
@@ -0,0 +1,25 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "multi_term.h"
+
+namespace search::streaming {
+
+/**
+ N-ary "EQUIV" operator that merges terms from nodes below.
+*/
+class EquivQueryNode : public MultiTerm
+{
+public:
+ EquivQueryNode(std::unique_ptr<QueryNodeResultBase> result_base, uint32_t num_terms);
+ ~EquivQueryNode() override;
+ bool evaluate() const override;
+ const HitList & evaluateHits(HitList & hl) const override;
+ void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override;
+ EquivQueryNode* as_equiv_query_node() noexcept override;
+ const EquivQueryNode* as_equiv_query_node() const noexcept override;
+ std::vector<std::unique_ptr<QueryTerm>> steal_terms();
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit.cpp b/searchlib/src/vespa/searchlib/query/streaming/hit.cpp
new file mode 100644
index 00000000000..c05fda77476
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/hit.cpp
@@ -0,0 +1,17 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "hit.h"
+#include <ostream>
+
+namespace search::streaming {
+
+std::ostream&
+operator<<(std::ostream& os, const Hit& hit)
+{
+ os << "{" << hit.field_id() << "," << hit.element_id() << "," <<
+ hit.element_weight() << "," << hit.element_length() << "," <<
+ hit.position() << "}";
+ return os;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit.h b/searchlib/src/vespa/searchlib/query/streaming/hit.h
index 168c09a91ec..fc24c21f722 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/hit.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/hit.h
@@ -2,6 +2,7 @@
#pragma once
#include <cstdint>
+#include <iosfwd>
#include <vector>
namespace search::streaming {
@@ -27,8 +28,37 @@ public:
uint32_t element_length() const { return _element_length; }
uint32_t position() const { return _position; }
void set_element_length(uint32_t value) { _element_length = value; }
+ bool operator<(const Hit& rhs) const noexcept {
+ if (_field_id != rhs._field_id) {
+ return _field_id < rhs._field_id;
+ }
+ if (_element_id != rhs._element_id) {
+ return _element_id < rhs._element_id;
+ }
+ if (_position != rhs._position) {
+ return _position < rhs._position;
+ }
+ if (_element_weight != rhs._element_weight) {
+ return _element_weight > rhs._element_weight;
+ }
+ return _element_length < rhs._element_length;
+ }
+ bool at_same_pos(const Hit& rhs) const noexcept {
+ return (_field_id == rhs._field_id) &&
+ (_element_id == rhs._element_id) &&
+ (_position == rhs._position);
+ }
+ bool operator==(const Hit& rhs) const noexcept {
+ return (_field_id == rhs._field_id) &&
+ (_element_id == rhs._element_id) &&
+ (_position == rhs._position) &&
+ (_element_weight == rhs._element_weight) &&
+ (_element_length == rhs._element_length);
+ }
};
+std::ostream& operator<<(std::ostream& os, const Hit& hit);
+
using HitList = std::vector<Hit>;
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp
index 9cd8d41d33d..b090ca13225 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp
@@ -69,7 +69,9 @@ PhraseQueryNode::evaluateHits(HitList & hl) const
void
PhraseQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data)
{
- unpack_match_data_helper(docid, td, match_data, *get_terms().front());
+ HitList list;
+ const HitList & hit_list = evaluateHits(list);
+ unpack_match_data_helper(docid, td, match_data, hit_list, *get_terms().front());
}
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.cpp b/searchlib/src/vespa/searchlib/query/streaming/query.cpp
index 77424fb2d62..94d9acd02cd 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/query.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/query.cpp
@@ -94,7 +94,6 @@ QueryConnector::create(ParseItem::ItemType type)
case search::ParseItem::ITEM_AND: return std::make_unique<AndQueryNode>();
case search::ParseItem::ITEM_OR:
case search::ParseItem::ITEM_WEAK_AND: return std::make_unique<OrQueryNode>();
- case search::ParseItem::ITEM_EQUIV: return std::make_unique<EquivQueryNode>();
case search::ParseItem::ITEM_NOT: return std::make_unique<AndNotQueryNode>();
case search::ParseItem::ITEM_SAME_ELEMENT: return std::make_unique<SameElementQueryNode>();
case search::ParseItem::ITEM_NEAR: return std::make_unique<NearQueryNode>();
@@ -158,12 +157,6 @@ RankWithQueryNode::evaluate() const {
return firstOk;
}
-bool
-EquivQueryNode::evaluate() const
-{
- return OrQueryNode::evaluate();
-}
-
Query::Query() = default;
Query::Query(const QueryNodeResultFactory & factory, vespalib::stringref queryRep)
diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.h b/searchlib/src/vespa/searchlib/query/streaming/query.h
index e91a2f91dc5..a993a9a8a8a 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/query.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/query.h
@@ -103,20 +103,6 @@ public:
bool evaluate() const override;
};
-
-/**
- N-ary "EQUIV" operator that merges terms from nodes below.
-*/
-class EquivQueryNode : public OrQueryNode
-{
-public:
- EquivQueryNode() noexcept : OrQueryNode("EQUIV") { }
- bool evaluate() const override;
- bool isFlattenable(ParseItem::ItemType type) const override {
- return (type == ParseItem::ITEM_EQUIV);
- }
-};
-
/**
Query packages the query tree. The usage pattern is like this.
Construct the tree with the correct tree description.
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
index 0b277dbe221..dd3b1f84ad9 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
@@ -9,6 +9,7 @@
#include "same_element_query_node.h"
#include <vespa/searchlib/parsequery/stackdumpiterator.h>
#include <vespa/searchlib/query/streaming/dot_product_term.h>
+#include <vespa/searchlib/query/streaming/equiv_query_node.h>
#include <vespa/searchlib/query/streaming/in_term.h>
#include <vespa/searchlib/query/streaming/wand_term.h>
#include <vespa/searchlib/query/streaming/weighted_set_term.h>
@@ -44,7 +45,6 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
case ParseItem::ITEM_AND:
case ParseItem::ITEM_OR:
case ParseItem::ITEM_WEAK_AND:
- case ParseItem::ITEM_EQUIV:
case ParseItem::ITEM_NOT:
case ParseItem::ITEM_SAME_ELEMENT:
case ParseItem::ITEM_NEAR:
@@ -142,10 +142,10 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
auto dotPos = ssTerm.find('.');
phrase->add_term(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD, normalize_mode));
phrase->add_term(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(dotPos + 1), ssIndex, TermType::WORD, normalize_mode));
- auto orqn = std::make_unique<EquivQueryNode>();
- orqn->addChild(std::move(qt));
- orqn->addChild(std::move(phrase));
- qn = std::move(orqn);
+ auto eqn = std::make_unique<EquivQueryNode>(factory.create(), 2);
+ eqn->add_term(std::move(qt));
+ eqn->add_term(std::move(phrase));
+ qn = std::move(eqn);
} else {
qn = std::move(qt);
}
@@ -171,6 +171,9 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
case ParseItem::ITEM_PHRASE:
qn = build_phrase_term(factory, queryRep);
break;
+ case ParseItem::ITEM_EQUIV:
+ qn = build_equiv_term(factory, queryRep, allowRewrite);
+ break;
default:
skip_unknown(queryRep);
break;
@@ -282,6 +285,33 @@ QueryNode::build_phrase_term(const QueryNodeResultFactory& factory, SimpleQueryS
return phrase;
}
+std::unique_ptr<QueryNode>
+QueryNode::build_equiv_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep, bool allow_rewrite)
+{
+ auto eqn = std::make_unique<EquivQueryNode>(factory.create(), queryRep.getArity());
+ auto arity = queryRep.getArity();
+ eqn->setWeight(queryRep.GetWeight());
+ eqn->setUniqueId(queryRep.getUniqueId());
+ for (size_t i = 0; i < arity; ++i) {
+ queryRep.next();
+ auto qn = Build(eqn.get(), factory, queryRep, allow_rewrite);
+ auto nested_eqn = dynamic_cast<EquivQueryNode*>(qn.get());
+ if (nested_eqn != nullptr) {
+ auto stolen_terms = nested_eqn->steal_terms();
+ for (auto& term : stolen_terms) {
+ eqn->add_term(std::move(term));
+ }
+ continue;
+ }
+ auto qtp = dynamic_cast<QueryTerm*>(qn.get());
+ assert(qtp != nullptr);
+ qn.release();
+ std::unique_ptr<QueryTerm> qt(qtp);
+ eqn->add_term(std::move(qt));
+ }
+ return eqn;
+}
+
void
QueryNode::skip_unknown(SimpleQueryStackDumpIterator& queryRep)
{
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.h b/searchlib/src/vespa/searchlib/query/streaming/querynode.h
index 4c7d9e88930..fff3bb15d10 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.h
@@ -34,6 +34,7 @@ class QueryNode
static std::unique_ptr<QueryNode> build_wand_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep);
static std::unique_ptr<QueryNode> build_weighted_set_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep);
static std::unique_ptr<QueryNode> build_phrase_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep);
+ static std::unique_ptr<QueryNode> build_equiv_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep, bool allow_rewrite);
static void skip_unknown(SimpleQueryStackDumpIterator& queryRep);
public:
using UP = std::unique_ptr<QueryNode>;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
index f01f815e673..0d0f5a7c4ad 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
@@ -1,6 +1,6 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "queryterm.h"
+#include "queryterm.hpp"
#include <vespa/searchlib/fef/itermdata.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/vespalib/objects/visit.h>
@@ -113,89 +113,12 @@ QueryTerm::set_element_length(uint32_t hitlist_idx, uint32_t element_length)
_hitList[hitlist_idx].set_element_length(element_length);
}
-namespace {
-
-uint16_t
-cap_16_bits(uint32_t value)
-{
- return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()));
-}
-
-uint32_t
-extract_field_length(const QueryTerm& term, uint32_t field_id)
-{
- return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH;
-}
-
-void
-set_interleaved_features(TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs)
-{
- tmd.setFieldLength(cap_16_bits(field_length));
- tmd.setNumOccs(cap_16_bits(num_occs));
-}
-
-}
-
-void
-QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const QueryTerm& fl_term) const
-{
- HitList list;
- const HitList & hitList = evaluateHits(list);
-
- if (!hitList.empty()) { // only unpack if we have a hit
- LOG(debug, "Unpack match data for query term '%s:%s'",
- index().c_str(), getTerm());
-
- uint32_t lastFieldId = -1;
- TermFieldMatchData *tmd = nullptr;
- uint32_t num_occs = 0;
-
- // optimize for hitlist giving all hits for a single field in one chunk
- for (const Hit & hit : hitList) {
- uint32_t fieldId = hit.field_id();
- if (fieldId != lastFieldId) {
- if (tmd != nullptr) {
- if (tmd->needs_interleaved_features()) {
- set_interleaved_features(*tmd, extract_field_length(fl_term, lastFieldId), num_occs);
- }
- // reset to notfound/unknown values
- tmd = nullptr;
- }
- num_occs = 0;
-
- // setup for new field that had a hit
- const ITermFieldData *tfd = td.lookupField(fieldId);
- if (tfd != nullptr) {
- tmd = match_data.resolveTermField(tfd->getHandle());
- tmd->setFieldId(fieldId);
- // reset field match data, but only once per docId
- if (tmd->getDocId() != docid) {
- tmd->reset(docid);
- }
- }
- lastFieldId = fieldId;
- }
- ++num_occs;
- if (tmd != nullptr) {
- TermFieldMatchDataPosition pos(hit.element_id(), hit.position(),
- hit.element_weight(), hit.element_length());
- tmd->appendPosition(pos);
- LOG(debug, "Append elemId(%u),position(%u), weight(%d), tfmd.weight(%d)",
- pos.getElementId(), pos.getPosition(), pos.getElementWeight(), tmd->getWeight());
- }
- }
- if (tmd != nullptr) {
- if (tmd->needs_interleaved_features()) {
- set_interleaved_features(*tmd, extract_field_length(fl_term, lastFieldId), num_occs);
- }
- }
- }
-}
-
void
QueryTerm::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data)
{
- unpack_match_data_helper(docid, td, match_data, *this);
+ HitList list;
+ const HitList & hit_list = evaluateHits(list);
+ unpack_match_data_helper(docid, td, match_data, hit_list, *this);
}
NearestNeighborQueryNode*
@@ -222,4 +145,16 @@ QueryTerm::as_fuzzy_term() noexcept
return nullptr;
}
+EquivQueryNode*
+QueryTerm::as_equiv_query_node() noexcept
+{
+ return nullptr;
+}
+
+const EquivQueryNode*
+QueryTerm::as_equiv_query_node() const noexcept
+{
+ return nullptr;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
index 2eaecb86854..2cb4f2d2ebb 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
@@ -17,6 +17,7 @@ class MatchData;
}
namespace search::streaming {
+class EquivQueryNode;
class FuzzyTerm;
class NearestNeighborQueryNode;
class MultiTerm;
@@ -100,9 +101,12 @@ public:
virtual MultiTerm* as_multi_term() noexcept;
virtual RegexpTerm* as_regexp_term() noexcept;
virtual FuzzyTerm* as_fuzzy_term() noexcept;
+ virtual EquivQueryNode* as_equiv_query_node() noexcept;
+ virtual const EquivQueryNode* as_equiv_query_node() const noexcept;
virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data);
protected:
- void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const QueryTerm& fl_term) const;
+ template <typename HitListType>
+ static void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term);
using QueryNodeResultBaseContainer = std::unique_ptr<QueryNodeResultBase>;
string _index;
EncodingBitMap _encoding;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp
new file mode 100644
index 00000000000..dd6eff1f22b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp
@@ -0,0 +1,94 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "queryterm.h"
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <algorithm>
+#include <limits>
+
+
+namespace search::streaming {
+
+namespace {
+
+uint16_t
+cap_16_bits(uint32_t value)
+{
+ return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()));
+}
+
+uint32_t
+extract_field_length(const QueryTerm& term, uint32_t field_id)
+{
+ return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH;
+}
+
+void
+set_interleaved_features(search::fef::TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs)
+{
+ tmd.setFieldLength(cap_16_bits(field_length));
+ tmd.setNumOccs(cap_16_bits(num_occs));
+}
+
+}
+
+template <typename HitListType>
+void
+QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term)
+{
+ (void) fl_term;
+ if (!hit_list.empty()) { // only unpack if we have a hit
+
+ uint32_t last_field_id = -1;
+ uint32_t last_field_length = 0;
+ search::fef::TermFieldMatchData *tmd = nullptr;
+ uint32_t num_occs = 0;
+
+ // optimize for hitlist giving all hits for a single field in one chunk
+ for (const auto& hit : hit_list) {
+ uint32_t field_id = hit.field_id();
+ if (field_id != last_field_id) {
+ if (tmd != nullptr) {
+ if (tmd->needs_interleaved_features()) {
+ set_interleaved_features(*tmd, last_field_length, num_occs);
+ }
+ // reset to notfound/unknown values
+ tmd = nullptr;
+ }
+ num_occs = 0;
+
+ // setup for new field that had a hit
+ const search::fef::ITermFieldData *tfd = td.lookupField(field_id);
+ if (tfd != nullptr) {
+ tmd = match_data.resolveTermField(tfd->getHandle());
+ tmd->setFieldId(field_id);
+ // reset field match data, but only once per docId
+ if (tmd->getDocId() != docid) {
+ tmd->reset(docid);
+ }
+ }
+ last_field_id = field_id;
+ if constexpr (std::is_same_v<HitList, HitListType>) {
+ last_field_length = extract_field_length(fl_term, field_id);
+ } else {
+ last_field_length = hit.get_field_length();
+ }
+ }
+ ++num_occs;
+ if (tmd != nullptr) {
+ search::fef::TermFieldMatchDataPosition pos(hit.element_id(), hit.position(),
+ hit.element_weight(), hit.element_length());
+ tmd->appendPosition(pos);
+ }
+ }
+ if (tmd != nullptr) {
+ if (tmd->needs_interleaved_features()) {
+ set_interleaved_features(*tmd, last_field_length, num_occs);
+ }
+ }
+ }
+}
+
+}