aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2024-01-22 16:39:49 +0100
committerTor Egge <Tor.Egge@online.no>2024-01-22 16:39:49 +0100
commitb3a2230c45de8f0698dcbb93bd7a46422ed16731 (patch)
treea6bedb35e878ea210abfaeac9ad0f7f66b01e17f
parentccda952db487445f3522eecbcbfee4a6f6a90c32 (diff)
Add hit iterator pack and use it for phrase search in streaming mode.
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/query/streaming/CMakeLists.txt19
-rw-r--r--searchlib/src/tests/query/streaming/hit_iterator_pack_test.cpp44
-rw-r--r--searchlib/src/tests/query/streaming/hit_iterator_test.cpp122
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/hit_iterator.h59
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.cpp57
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.h32
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/query.cpp82
9 files changed, 366 insertions, 51 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 5628db99171..7bec70c00cb 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -190,6 +190,7 @@ vespa_define_module(
src/tests/postinglistbm
src/tests/predicate
src/tests/query
+ src/tests/query/streaming
src/tests/queryeval
src/tests/queryeval/blueprint
src/tests/queryeval/dot_product
diff --git a/searchlib/src/tests/query/streaming/CMakeLists.txt b/searchlib/src/tests/query/streaming/CMakeLists.txt
new file mode 100644
index 00000000000..d40250fbc3f
--- /dev/null
+++ b/searchlib/src/tests/query/streaming/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+vespa_add_executable(searchlib_query_streaming_hit_iterator_test_app TEST
+ SOURCES
+ hit_iterator_test.cpp
+ DEPENDS
+ searchlib
+ GTest::gtest
+)
+vespa_add_test(NAME searchlib_query_streaming_hit_iterator_test_app COMMAND searchlib_query_streaming_hit_iterator_test_app)
+
+vespa_add_executable(searchlib_query_streaming_hit_iterator_pack_test_app TEST
+ SOURCES
+ hit_iterator_pack_test.cpp
+ DEPENDS
+ searchlib
+ GTest::gtest
+)
+vespa_add_test(NAME searchlib_query_streaming_hit_iterator_pack_test_app COMMAND searchlib_query_streaming_hit_iterator_pack_test_app)
diff --git a/searchlib/src/tests/query/streaming/hit_iterator_pack_test.cpp b/searchlib/src/tests/query/streaming/hit_iterator_pack_test.cpp
new file mode 100644
index 00000000000..7d7d8307920
--- /dev/null
+++ b/searchlib/src/tests/query/streaming/hit_iterator_pack_test.cpp
@@ -0,0 +1,44 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/query/streaming/hit_iterator_pack.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using search::streaming::HitIterator;
+using search::streaming::HitIteratorPack;
+using search::streaming::QueryNodeList;
+using search::streaming::QueryTerm;
+using search::streaming::QueryNodeResultBase;
+
+using FieldElement = HitIterator::FieldElement;
+
+TEST(HitIteratorPackTest, seek_to_matching_field_element)
+{
+ QueryNodeList qnl;
+ auto qt = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), "7", "", QueryTerm::Type::WORD);
+ qt->add(11, 0, 10, 0);
+ qt->add(11, 0, 10, 5);
+ qt->add(11, 1, 12, 0);
+ qt->add(11, 1, 12, 0);
+ qt->add(12, 1, 13, 0);
+ qt->add(12, 1, 13, 0);
+ qnl.emplace_back(std::move(qt));
+ qt = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), "8", "", QueryTerm::Type::WORD);
+ qt->add(2, 0, 4, 0);
+ qt->add(11, 0, 10, 0);
+ qt->add(12, 1, 13, 0);
+ qt->add(12, 2, 14, 0);
+ qnl.emplace_back(std::move(qt));
+ HitIteratorPack itr_pack(qnl);
+ EXPECT_TRUE(itr_pack.all_valid());
+ EXPECT_TRUE(itr_pack.seek_to_matching_field_element());
+ EXPECT_EQ(FieldElement(11, 0), itr_pack.get_field_element_ref());
+ EXPECT_TRUE(itr_pack.seek_to_matching_field_element());
+ EXPECT_EQ(FieldElement(11, 0), itr_pack.get_field_element_ref());
+ ++itr_pack.get_field_element_ref().second;
+ EXPECT_TRUE(itr_pack.seek_to_matching_field_element());
+ EXPECT_EQ(FieldElement(12, 1), itr_pack.get_field_element_ref());
+ ++itr_pack.get_field_element_ref().second;
+ EXPECT_FALSE(itr_pack.seek_to_matching_field_element());
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/query/streaming/hit_iterator_test.cpp b/searchlib/src/tests/query/streaming/hit_iterator_test.cpp
new file mode 100644
index 00000000000..a9588ea3d6c
--- /dev/null
+++ b/searchlib/src/tests/query/streaming/hit_iterator_test.cpp
@@ -0,0 +1,122 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/query/streaming/hit_iterator.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using search::streaming::Hit;
+using search::streaming::HitList;
+using search::streaming::HitIterator;
+
+using FieldElement = HitIterator::FieldElement;
+
+namespace {
+
+HitList
+make_hit_list()
+{
+ HitList hl;
+ hl.emplace_back(11, 0, 10, 0);
+ hl.emplace_back(11, 0, 10, 5);
+ hl.emplace_back(11, 1, 12, 0);
+ hl.emplace_back(11, 1, 12, 7);
+ hl.emplace_back(12, 1, 13, 0);
+ hl.emplace_back(12, 1, 13, 9);
+ return hl;
+}
+
+void
+check_seek_to_field_elem(HitIterator& it, const FieldElement& field_element, const Hit* exp_ptr, const vespalib::string& label)
+{
+ SCOPED_TRACE(label);
+ EXPECT_TRUE(it.seek_to_field_element(field_element));
+ EXPECT_TRUE(it.valid());
+ EXPECT_EQ(exp_ptr, &*it);
+}
+
+void
+check_seek_to_field_elem_failure(HitIterator& it, const FieldElement& field_element, const vespalib::string& label)
+{
+ SCOPED_TRACE(label);
+ EXPECT_FALSE(it.seek_to_field_element(field_element));
+ EXPECT_FALSE(it.valid());
+}
+
+void
+check_step_in_field_element(HitIterator& it, FieldElement& field_element, bool exp_success, const Hit* exp_ptr, const vespalib::string& label)
+{
+ SCOPED_TRACE(label);
+ EXPECT_EQ(exp_success, it.step_in_field_element(field_element));
+ if (exp_ptr) {
+ EXPECT_TRUE(it.valid());
+ EXPECT_EQ(it.get_field_element(), field_element);
+ EXPECT_EQ(exp_ptr, &*it);
+ } else {
+ EXPECT_FALSE(it.valid());
+ }
+}
+
+void
+check_seek_in_field_element(HitIterator& it, uint32_t position, FieldElement& field_element, bool exp_success, const Hit* exp_ptr, const vespalib::string& label)
+{
+ SCOPED_TRACE(label);
+ EXPECT_EQ(exp_success, it.seek_in_field_element(position, field_element));
+ if (exp_ptr) {
+ EXPECT_TRUE(it.valid());
+ EXPECT_EQ(it.get_field_element(), field_element);
+ EXPECT_EQ(exp_ptr, &*it);
+ } else {
+ EXPECT_FALSE(it.valid());
+ }
+}
+
+}
+
+TEST(HitITeratorTest, seek_to_field_element)
+{
+ auto hl = make_hit_list();
+ HitIterator it(hl);
+ EXPECT_TRUE(it.valid());
+ EXPECT_EQ(&hl[0], &*it);
+ check_seek_to_field_elem(it, FieldElement(0, 0), &hl[0], "(0, 0)");
+ check_seek_to_field_elem(it, FieldElement(11, 0), &hl[0], "(11, 0)");
+ check_seek_to_field_elem(it, FieldElement(11, 1), &hl[2], "(11, 1)");
+ check_seek_to_field_elem(it, FieldElement(11, 2), &hl[4], "(11, 2)");
+ check_seek_to_field_elem(it, FieldElement(12, 0), &hl[4], "(12, 0)");
+ check_seek_to_field_elem(it, FieldElement(12, 1), &hl[4], "(12, 1)");
+ check_seek_to_field_elem_failure(it, FieldElement(12, 2), "(12, 2)");
+ check_seek_to_field_elem_failure(it, FieldElement(13, 0), "(13, 0)");
+}
+
+TEST(HitIteratorTest, step_in_field_element)
+{
+ auto hl = make_hit_list();
+ HitIterator it(hl);
+ auto field_element = it.get_field_element();
+ check_step_in_field_element(it, field_element, true, &hl[1], "1");
+ check_step_in_field_element(it, field_element, false, &hl[2], "2");
+ check_step_in_field_element(it, field_element, true, &hl[3], "3");
+ check_step_in_field_element(it, field_element, false, &hl[4], "4");
+ check_step_in_field_element(it, field_element, true, &hl[5], "5");
+ check_step_in_field_element(it, field_element, false, nullptr, "end");
+}
+
+TEST(hitIteratorTest, seek_in_field_elem)
+{
+ auto hl = make_hit_list();
+ HitIterator it(hl);
+ auto field_element = it.get_field_element();
+ check_seek_in_field_element(it, 0, field_element, true, &hl[0], "0a");
+ check_seek_in_field_element(it, 2, field_element, true, &hl[1], "2");
+ check_seek_in_field_element(it, 5, field_element, true, &hl[1], "5");
+ check_seek_in_field_element(it, 6, field_element, false, &hl[2], "6");
+ check_seek_in_field_element(it, 0, field_element, true, &hl[2], "0b");
+ check_seek_in_field_element(it, 1, field_element, true, &hl[3], "1");
+ check_seek_in_field_element(it, 7, field_element, true, &hl[3], "7");
+ check_seek_in_field_element(it, 8, field_element, false, &hl[4], "8");
+ check_seek_in_field_element(it, 0, field_element, true, &hl[4], "0c");
+ check_seek_in_field_element(it, 3, field_element, true, &hl[5], "3");
+ check_seek_in_field_element(it, 9, field_element, true, &hl[5], "9");
+ check_seek_in_field_element(it, 10, field_element, false, nullptr, "end");
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
index 76119a6d58f..91b8c429800 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
@@ -3,6 +3,7 @@ vespa_add_library(searchlib_query_streaming OBJECT
SOURCES
dot_product_term.cpp
fuzzy_term.cpp
+ hit_iterator_pack.cpp
in_term.cpp
multi_term.cpp
nearest_neighbor_query_node.cpp
diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit_iterator.h b/searchlib/src/vespa/searchlib/query/streaming/hit_iterator.h
new file mode 100644
index 00000000000..14fd19bba2a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/hit_iterator.h
@@ -0,0 +1,59 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "hit.h"
+
+namespace search::streaming {
+
+/*
+ * Iterator used over hit list for a term to support near, onear, phrase and
+ * same element query nodes.
+ */
+class HitIterator
+{
+ HitList::const_iterator _cur;
+ HitList::const_iterator _end;
+public:
+ using FieldElement = std::pair<uint32_t, uint32_t>;
+ HitIterator(const HitList& hl) noexcept
+ : _cur(hl.begin()),
+ _end(hl.end())
+ { }
+ bool valid() const noexcept { return _cur != _end; }
+ const Hit* operator->() const noexcept { return _cur.operator->(); }
+ const Hit& operator*() const noexcept { return _cur.operator*(); }
+ FieldElement get_field_element() const noexcept { return std::make_pair(_cur->field_id(), _cur->element_id()); }
+ bool seek_to_field_element(const FieldElement& field_element) noexcept {
+ while (valid()) {
+ if (!(get_field_element() < field_element)) {
+ return true;
+ }
+ ++_cur;
+ }
+ return false;
+ }
+ bool step_in_field_element(FieldElement& field_element) noexcept {
+ ++_cur;
+ if (!valid()) {
+ return false;
+ }
+ auto ife = get_field_element();
+ if (field_element < ife) {
+ field_element = ife;
+ return false;
+ }
+ return true;
+ }
+ bool seek_in_field_element(uint32_t word_pos, FieldElement& field_element) {
+ while (_cur->position() < word_pos) {
+ if (!step_in_field_element(field_element)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ HitIterator& operator++() { ++_cur; return *this; }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.cpp b/searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.cpp
new file mode 100644
index 00000000000..fabd992c379
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.cpp
@@ -0,0 +1,57 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "hit_iterator_pack.h"
+
+namespace search::streaming {
+
+
+HitIteratorPack::HitIteratorPack(const QueryNodeList& children)
+ : _iterators(),
+ _field_element(std::make_pair(0, 0))
+{
+ auto num_children = children.size();
+ _iterators.reserve(num_children);
+ for (auto& child : children) {
+ auto& curr = dynamic_cast<const QueryTerm&>(*child);
+ _iterators.emplace_back(curr.getHitList());
+ }
+}
+
+HitIteratorPack::~HitIteratorPack() = default;
+
+bool
+HitIteratorPack::all_valid() const noexcept
+{
+ if (_iterators.empty()) {
+ return false;
+ }
+ for (auto& it : _iterators) {
+ if (!it.valid()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool
+HitIteratorPack::seek_to_matching_field_element() noexcept
+{
+ bool retry = true;
+ while (retry) {
+ retry = false;
+ for (auto& it : _iterators) {
+ if (!it.seek_to_field_element(_field_element)) {
+ return false;
+ }
+ auto ife = it.get_field_element();
+ if (_field_element < ife) {
+ _field_element = ife;
+ retry = true;
+ break;
+ }
+ }
+ }
+ return true;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.h b/searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.h
new file mode 100644
index 00000000000..200d579930a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/hit_iterator_pack.h
@@ -0,0 +1,32 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "hit_iterator.h"
+#include "queryterm.h"
+
+namespace search::streaming {
+
+/*
+ * Iterator pack used over hit list for a term to support near, onear,
+ * phrase and same element query nodes.
+ */
+class HitIteratorPack
+{
+ using iterator = typename std::vector<HitIterator>::iterator;
+ using FieldElement = HitIterator::FieldElement;
+ std::vector<HitIterator> _iterators;
+ FieldElement _field_element;
+public:
+ explicit HitIteratorPack(const QueryNodeList& children);
+ ~HitIteratorPack();
+ FieldElement& get_field_element_ref() noexcept { return _field_element; }
+ HitIterator& front() noexcept { return _iterators.front(); }
+ HitIterator& back() noexcept { return _iterators.back(); }
+ iterator begin() noexcept { return _iterators.begin(); }
+ iterator end() noexcept { return _iterators.end(); }
+ bool all_valid() const noexcept;
+ bool seek_to_matching_field_element() noexcept;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.cpp b/searchlib/src/vespa/searchlib/query/streaming/query.cpp
index 196de23c236..c58e1e62e57 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/query.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/query.cpp
@@ -1,5 +1,6 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "query.h"
+#include "hit_iterator_pack.h"
#include <vespa/searchlib/parsequery/stackdumpiterator.h>
#include <vespa/vespalib/objects/visit.hpp>
#include <cassert>
@@ -238,66 +239,45 @@ PhraseQueryNode::addChild(QueryNode::UP child) {
AndQueryNode::addChild(std::move(child));
}
-namespace {
-
-// TODO: Remove when rewriting PhraseQueryNode::evaluateHits
-uint32_t legacy_pos(const Hit& hit) {
- return ((hit.position() & 0xffffff) | ((hit.field_id() & 0xff) << 24));
-}
-
-}
-
const HitList &
PhraseQueryNode::evaluateHits(HitList & hl) const
{
hl.clear();
_fieldInfo.clear();
- if ( ! AndQueryNode::evaluate()) return hl;
-
- HitList tmpHL;
- const auto & children = getChildren();
- unsigned int fullPhraseLen = children.size();
- unsigned int currPhraseLen = 0;
- std::vector<unsigned int> indexVector(fullPhraseLen, 0);
- auto curr = static_cast<const QueryTerm *> (children[currPhraseLen].get());
- bool exhausted( curr->evaluateHits(tmpHL).empty());
- for (; !exhausted; ) {
- auto next = static_cast<const QueryTerm *>(children[currPhraseLen+1].get());
- unsigned int & currIndex = indexVector[currPhraseLen];
- unsigned int & nextIndex = indexVector[currPhraseLen+1];
-
- const auto & currHit = curr->evaluateHits(tmpHL)[currIndex];
- size_t firstPosition = legacy_pos(currHit);
- uint32_t currElemId = currHit.element_id();
- uint32_t curr_field_id = currHit.field_id();
-
- const HitList & nextHL = next->evaluateHits(tmpHL);
-
- int diff(0);
- size_t nextIndexMax = nextHL.size();
- while ((nextIndex < nextIndexMax) &&
- ((nextHL[nextIndex].field_id() < curr_field_id) ||
- ((nextHL[nextIndex].field_id() == curr_field_id) && (nextHL[nextIndex].element_id() <= currElemId))) &&
- ((diff = legacy_pos(nextHL[nextIndex])-firstPosition) < 1))
- {
- nextIndex++;
- }
- if ((diff == 1) && (nextHL[nextIndex].field_id() == curr_field_id) && (nextHL[nextIndex].element_id() == currElemId)) {
- currPhraseLen++;
- if ((currPhraseLen+1) == fullPhraseLen) {
- Hit h = nextHL[indexVector[currPhraseLen]];
+ HitIteratorPack itr_pack(getChildren());
+ if (!itr_pack.all_valid()) {
+ return hl;
+ }
+ auto& last_child = dynamic_cast<const QueryTerm&>(*(*this)[size() - 1]);
+ while (itr_pack.seek_to_matching_field_element()) {
+ uint32_t first_position = itr_pack.front()->position();
+ bool retry_element = true;
+ while (retry_element) {
+ uint32_t position_offset = 0;
+ bool match = true;
+ for (auto& it : itr_pack) {
+ if (!it.seek_in_field_element(first_position + position_offset, itr_pack.get_field_element_ref())) {
+ retry_element = false;
+ match = false;
+ break;
+ }
+ if (it->position() > first_position + position_offset) {
+ first_position = it->position() - position_offset;
+ match = false;
+ break;
+ }
+ ++position_offset;
+ }
+ if (match) {
+ auto h = *itr_pack.back();
hl.push_back(h);
- const QueryTerm::FieldInfo & fi = next->getFieldInfo(h.field_id());
+ auto& fi = last_child.getFieldInfo(h.field_id());
updateFieldInfo(h.field_id(), hl.size() - 1, fi.getFieldLength());
- currPhraseLen = 0;
- indexVector[0]++;
+ if (!itr_pack.front().step_in_field_element(itr_pack.get_field_element_ref())) {
+ retry_element = false;
+ }
}
- } else {
- currPhraseLen = 0;
- indexVector[currPhraseLen]++;
}
- curr = static_cast<const QueryTerm *>(children[currPhraseLen].get());
- exhausted = (nextIndex >= nextIndexMax) || (indexVector[currPhraseLen] >= curr->evaluateHits(tmpHL).size());
}
return hl;
}