diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-01-24 14:03:19 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-01-24 14:03:19 +0100 |
commit | 1750cc3a7dbe1958ecf0850204736a45ce06c1e9 (patch) | |
tree | 51d5fa45e9462c23b8decb52b62f73d3ea443db4 | |
parent | 2a43344d217ab065425e880cd35be54542423ce8 (diff) |
Implement search::streaming::NearQueryNode::evaluate() and
search::strewaming::ONearQueryNode::evaluate().
4 files changed, 258 insertions, 3 deletions
diff --git a/searchlib/src/tests/query/streaming/CMakeLists.txt b/searchlib/src/tests/query/streaming/CMakeLists.txt index 257ad24854d..7568e45d00a 100644 --- a/searchlib/src/tests/query/streaming/CMakeLists.txt +++ b/searchlib/src/tests/query/streaming/CMakeLists.txt @@ -18,6 +18,15 @@ vespa_add_executable(searchlib_query_streaming_hit_iterator_pack_test_app TEST ) vespa_add_test(NAME searchlib_query_streaming_hit_iterator_pack_test_app COMMAND searchlib_query_streaming_hit_iterator_pack_test_app) +vespa_add_executable(searchlib_query_streaming_near_test_app TEST + SOURCES + near_test.cpp + DEPENDS + searchlib + GTest::gtest +) +vespa_add_test(NAME searchlib_query_streaming_near_test_app COMMAND searchlib_query_streaming_near_test_app) + vespa_add_executable(searchlib_query_streaming_same_element_query_node_test_app TEST SOURCES same_element_query_node_test.cpp diff --git a/searchlib/src/tests/query/streaming/near_test.cpp b/searchlib/src/tests/query/streaming/near_test.cpp new file mode 100644 index 00000000000..ad30725db19 --- /dev/null +++ b/searchlib/src/tests/query/streaming/near_test.cpp @@ -0,0 +1,185 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/query/streaming/near_query_node.h> +#include <vespa/searchlib/query/streaming/onear_query_node.h> +#include <vespa/searchlib/query/streaming/queryterm.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/tree/stackdumpcreator.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <ostream> +#include <tuple> + +using TestHit = std::tuple<uint32_t, uint32_t, int32_t, uint32_t>; + +using search::query::QueryBuilder; +using search::query::Node; +using search::query::SimpleQueryNodeTypes; +using search::query::StackDumpCreator; +using search::query::Weight; +using search::streaming::NearQueryNode; +using search::streaming::ONearQueryNode; +using search::streaming::Query; +using search::streaming::QueryNodeResultFactory; +using search::streaming::QueryTerm; +using search::streaming::QueryTermList; + +class TestParam { + bool _ordered; +public: + TestParam(bool ordered_in) + : _ordered(ordered_in) + { + } + bool ordered() const noexcept { return _ordered; } +}; + +std::ostream& operator<<(std::ostream& os, const TestParam& param) +{ + os << (param.ordered() ? "onear" : "near"); + return os; + +} +class NearTest : public ::testing::TestWithParam<TestParam> { +public: + NearTest(); + ~NearTest(); + bool evaluate_query(uint32_t distance, const std::vector<std::vector<TestHit>> &hitsvv); +}; + +NearTest::NearTest() + : ::testing::TestWithParam<TestParam>() +{ +} + +NearTest::~NearTest() = default; + +bool +NearTest::evaluate_query(uint32_t distance, const std::vector<std::vector<TestHit>> &hitsvv) +{ + QueryBuilder<SimpleQueryNodeTypes> builder; + if (GetParam().ordered()) { + builder.addONear(hitsvv.size(), distance); + } else { + builder.addNear(hitsvv.size(), distance); + } + for (uint32_t idx = 0; idx < hitsvv.size(); ++idx) { + vespalib::asciistream s; + s << "s" << idx; + builder.addStringTerm(s.str(), "field", idx, Weight(0)); + } + auto node = builder.build(); + vespalib::string stackDump = StackDumpCreator::create(*node); + QueryNodeResultFactory empty; + auto q = std::make_unique<Query>(empty, stackDump); + if (GetParam().ordered()) { + auto& top = dynamic_cast<ONearQueryNode&>(q->getRoot()); + EXPECT_EQ(hitsvv.size(), top.size()); + } else { + auto& top = dynamic_cast<NearQueryNode&>(q->getRoot()); + EXPECT_EQ(hitsvv.size(), top.size()); + } + QueryTermList terms; + q->getLeaves(terms); + EXPECT_EQ(hitsvv.size(), terms.size()); + for (QueryTerm * qt : terms) { + qt->resizeFieldId(1); + } + for (uint32_t idx = 0; idx < hitsvv.size(); ++idx) { + auto& hitsv = hitsvv[idx]; + auto& term = terms[idx]; + for (auto& hit : hitsv) { + term->add(std::get<0>(hit), std::get<1>(hit), std::get<2>(hit), std::get<3>(hit)); + } + } + return q->getRoot().evaluate(); +} + +TEST_P(NearTest, test_empty_near) +{ + EXPECT_FALSE(evaluate_query(4, { })); +} + +TEST_P(NearTest, test_near_success) +{ + EXPECT_TRUE(evaluate_query(4, { { { 0, 0, 10, 0} }, + { { 0, 0, 10, 2} }, + { { 0, 0, 10, 4} } })); +} + +TEST_P(NearTest, test_near_fail_distance_exceeded_first_term) +{ + EXPECT_FALSE(evaluate_query(4, { { { 0, 0, 10, 0} }, + { { 0, 0, 10, 2} }, + { { 0, 0, 10, 5} } })); +} + +TEST_P(NearTest, test_near_fail_distance_exceeded_second_term) +{ + EXPECT_FALSE(evaluate_query(4, { { { 0, 0, 10, 2} }, + { { 0, 0, 10, 0} }, + { { 0, 0, 10, 5} } })); +} + +TEST_P(NearTest, test_near_fail_element) +{ + EXPECT_FALSE(evaluate_query(4, { { { 0, 0, 10, 0} }, + { { 0, 0, 10, 2} }, + { { 0, 1, 10, 4} } })); +} + +TEST_P(NearTest, test_near_fail_field) +{ + EXPECT_FALSE(evaluate_query(4, { { { 0, 0, 10, 0} }, + { { 0, 0, 10, 2} }, + { { 1, 0, 10, 4} } })); +} + +TEST_P(NearTest, test_near_sucess_after_step_first_term) +{ + EXPECT_TRUE(evaluate_query(4, { { { 0, 0, 10, 0}, { 0, 0, 10, 2} }, + { { 0, 0, 10, 3} }, + { { 0, 0, 10, 5} } })); +} + +TEST_P(NearTest, test_near_sucess_after_step_second_term) +{ + EXPECT_TRUE(evaluate_query(4, { { { 0, 0, 10, 2} }, + { { 0, 0, 10, 0}, {0, 0, 10, 3} }, + { { 0, 0, 10, 5} } })); +} + +TEST_P(NearTest, test_near_sucess_in_second_element) +{ + EXPECT_TRUE(evaluate_query(4, { { { 0, 0, 10, 0}, { 0, 1, 10, 0} }, + { { 0, 0, 10, 2}, { 0, 1, 10, 2} }, + { { 0, 0, 10, 5}, { 0, 1, 10, 4} } })); +} + +TEST_P(NearTest, test_near_sucess_in_second_field) +{ + EXPECT_TRUE(evaluate_query(4, { { { 0, 0, 10, 0}, { 1, 0, 10, 0} }, + { { 0, 0, 10, 2}, { 1, 0, 10, 2} }, + { { 0, 0, 10, 5}, { 1, 0, 10, 4} } })); +} + +TEST_P(NearTest, test_order_might_matter) +{ + EXPECT_EQ(!GetParam().ordered(), evaluate_query(4, { { { 0, 0, 10, 2} }, + { { 0, 0, 10, 0} }, + { { 0, 0, 10, 4} } })); +} + +TEST_P(NearTest, test_overlap_might_matter) +{ + EXPECT_EQ(!GetParam().ordered(), evaluate_query(4, { { { 0, 0, 10, 0} }, + { { 0, 0, 10, 0} }, + { { 0, 0, 10, 4} } })); +} + +auto test_values = ::testing::Values(TestParam(false), TestParam(true)); + +INSTANTIATE_TEST_SUITE_P(NearTests, NearTest, test_values, testing::PrintToStringParamName()); + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/query/streaming/near_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/near_query_node.cpp index 126ed1ff69e..f1722e4924a 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/near_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/near_query_node.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "near_query_node.h" +#include "hit_iterator_pack.h" #include <vespa/vespalib/objects/visit.hpp> namespace search::streaming { @@ -8,7 +9,36 @@ namespace search::streaming { bool NearQueryNode::evaluate() const { - return AndQueryNode::evaluate(); + HitIteratorPack itr_pack(getChildren()); + if (!itr_pack.all_valid()) { + return false; + } + while (itr_pack.seek_to_matching_field_element()) { + uint32_t min_position = 0; + if (itr_pack.front()->position() > min_position + distance()) { + min_position = itr_pack.front()->position() - distance(); + } + bool retry_element = true; + while (retry_element) { + bool match = true; + for (auto& it : itr_pack) { + if (!it.seek_in_field_element(min_position, itr_pack.get_field_element_ref())) { + retry_element = false; + match = false; + break; + } + if (it->position() > min_position + distance()) { + min_position = it->position() - distance(); + match = false; + break; + } + } + if (match) { + return true; + } + } + } + return false; } void diff --git a/searchlib/src/vespa/searchlib/query/streaming/onear_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/onear_query_node.cpp index a06ff0f90e1..023cb6f95e3 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/onear_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/onear_query_node.cpp @@ -1,14 +1,45 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "onear_query_node.h" +#include "hit_iterator_pack.h" namespace search::streaming { bool ONearQueryNode::evaluate() const { - bool ok(NearQueryNode::evaluate()); - return ok; + HitIteratorPack itr_pack(getChildren()); + if (!itr_pack.all_valid()) { + return false; + } + while (itr_pack.seek_to_matching_field_element()) { + uint32_t min_position = 0; + if (itr_pack.front()->position() > min_position + distance()) { + min_position = itr_pack.front()->position() - distance(); + } + bool retry_element = true; + while (retry_element) { + bool match = true; + uint32_t min_next_position = min_position; + for (auto& it : itr_pack) { + if (!it.seek_in_field_element(min_next_position, itr_pack.get_field_element_ref())) { + retry_element = false; + match = false; + break; + } + if (it->position() > min_position + distance()) { + min_position = it->position() - distance(); + match = false; + break; + } + min_next_position = it->position() + 1; + } + if (match) { + return true; + } + } + } + return false; } } |