diff options
Diffstat (limited to 'searchlib/src/tests')
13 files changed, 590 insertions, 304 deletions
diff --git a/searchlib/src/tests/nearsearch/nearsearch_test.cpp b/searchlib/src/tests/nearsearch/nearsearch_test.cpp index 3751fc93cea..95701e59444 100644 --- a/searchlib/src/tests/nearsearch/nearsearch_test.cpp +++ b/searchlib/src/tests/nearsearch/nearsearch_test.cpp @@ -215,7 +215,7 @@ bool Test::testNearSearch(MyQuery &query, uint32_t matchId) { LOG(info, "testNearSearch(%d)", matchId); - search::queryeval::IntermediateBlueprint *near_b = 0; + search::queryeval::IntermediateBlueprint *near_b = nullptr; if (query.isOrdered()) { near_b = new search::queryeval::ONearBlueprint(query.getWindow()); } else { @@ -228,9 +228,10 @@ Test::testNearSearch(MyQuery &query, uint32_t matchId) layout.allocTermField(fieldId); near_b->addChild(query.getTerm(i).make_blueprint(fieldId, i)); } - search::fef::MatchData::UP md(layout.createMatchData()); - + bp->setDocIdLimit(1000); + bp = search::queryeval::Blueprint::optimize_and_sort(std::move(bp), true, true); bp->fetchPostings(search::queryeval::ExecuteInfo::TRUE); + search::fef::MatchData::UP md(layout.createMatchData()); search::queryeval::SearchIterator::UP near = bp->createSearch(*md, true); near->initFullRange(); bool foundMatch = false; diff --git a/searchlib/src/tests/query/streaming/CMakeLists.txt b/searchlib/src/tests/query/streaming/CMakeLists.txt new file mode 100644 index 00000000000..257ad24854d --- /dev/null +++ b/searchlib/src/tests/query/streaming/CMakeLists.txt @@ -0,0 +1,37 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +vespa_add_executable(searchlib_query_streaming_hit_iterator_test_app TEST + SOURCES + hit_iterator_test.cpp + DEPENDS + searchlib + GTest::gtest +) +vespa_add_test(NAME searchlib_query_streaming_hit_iterator_test_app COMMAND searchlib_query_streaming_hit_iterator_test_app) + +vespa_add_executable(searchlib_query_streaming_hit_iterator_pack_test_app TEST + SOURCES + hit_iterator_pack_test.cpp + DEPENDS + searchlib + GTest::gtest +) +vespa_add_test(NAME searchlib_query_streaming_hit_iterator_pack_test_app COMMAND searchlib_query_streaming_hit_iterator_pack_test_app) + +vespa_add_executable(searchlib_query_streaming_same_element_query_node_test_app TEST + SOURCES + same_element_query_node_test.cpp + DEPENDS + searchlib + GTest::gtest +) +vespa_add_test(NAME searchlib_query_streaming_same_element_query_node_test_app COMMAND searchlib_query_streaming_same_element_query_node_test_app) + +vespa_add_executable(searchlib_query_streaming_phrase_query_node_test_app TEST + SOURCES + phrase_query_node_test.cpp + DEPENDS + searchlib + GTest::gtest +) +vespa_add_test(NAME searchlib_query_streaming_phrase_query_node_test_app COMMAND searchlib_query_streaming_phrase_query_node_test_app) diff --git a/searchlib/src/tests/query/streaming/hit_iterator_pack_test.cpp b/searchlib/src/tests/query/streaming/hit_iterator_pack_test.cpp new file mode 100644 index 00000000000..7d7d8307920 --- /dev/null +++ b/searchlib/src/tests/query/streaming/hit_iterator_pack_test.cpp @@ -0,0 +1,44 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/query/streaming/hit_iterator_pack.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::streaming::HitIterator; +using search::streaming::HitIteratorPack; +using search::streaming::QueryNodeList; +using search::streaming::QueryTerm; +using search::streaming::QueryNodeResultBase; + +using FieldElement = HitIterator::FieldElement; + +TEST(HitIteratorPackTest, seek_to_matching_field_element) +{ + QueryNodeList qnl; + auto qt = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), "7", "", QueryTerm::Type::WORD); + qt->add(11, 0, 10, 0); + qt->add(11, 0, 10, 5); + qt->add(11, 1, 12, 0); + qt->add(11, 1, 12, 0); + qt->add(12, 1, 13, 0); + qt->add(12, 1, 13, 0); + qnl.emplace_back(std::move(qt)); + qt = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), "8", "", QueryTerm::Type::WORD); + qt->add(2, 0, 4, 0); + qt->add(11, 0, 10, 0); + qt->add(12, 1, 13, 0); + qt->add(12, 2, 14, 0); + qnl.emplace_back(std::move(qt)); + HitIteratorPack itr_pack(qnl); + EXPECT_TRUE(itr_pack.all_valid()); + EXPECT_TRUE(itr_pack.seek_to_matching_field_element()); + EXPECT_EQ(FieldElement(11, 0), itr_pack.get_field_element_ref()); + EXPECT_TRUE(itr_pack.seek_to_matching_field_element()); + EXPECT_EQ(FieldElement(11, 0), itr_pack.get_field_element_ref()); + ++itr_pack.get_field_element_ref().second; + EXPECT_TRUE(itr_pack.seek_to_matching_field_element()); + EXPECT_EQ(FieldElement(12, 1), itr_pack.get_field_element_ref()); + ++itr_pack.get_field_element_ref().second; + EXPECT_FALSE(itr_pack.seek_to_matching_field_element()); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/query/streaming/hit_iterator_test.cpp b/searchlib/src/tests/query/streaming/hit_iterator_test.cpp new file mode 100644 index 00000000000..a9588ea3d6c --- /dev/null +++ b/searchlib/src/tests/query/streaming/hit_iterator_test.cpp @@ -0,0 +1,122 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/query/streaming/hit_iterator.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::streaming::Hit; +using search::streaming::HitList; +using search::streaming::HitIterator; + +using FieldElement = HitIterator::FieldElement; + +namespace { + +HitList +make_hit_list() +{ + HitList hl; + hl.emplace_back(11, 0, 10, 0); + hl.emplace_back(11, 0, 10, 5); + hl.emplace_back(11, 1, 12, 0); + hl.emplace_back(11, 1, 12, 7); + hl.emplace_back(12, 1, 13, 0); + hl.emplace_back(12, 1, 13, 9); + return hl; +} + +void +check_seek_to_field_elem(HitIterator& it, const FieldElement& field_element, const Hit* exp_ptr, const vespalib::string& label) +{ + SCOPED_TRACE(label); + EXPECT_TRUE(it.seek_to_field_element(field_element)); + EXPECT_TRUE(it.valid()); + EXPECT_EQ(exp_ptr, &*it); +} + +void +check_seek_to_field_elem_failure(HitIterator& it, const FieldElement& field_element, const vespalib::string& label) +{ + SCOPED_TRACE(label); + EXPECT_FALSE(it.seek_to_field_element(field_element)); + EXPECT_FALSE(it.valid()); +} + +void +check_step_in_field_element(HitIterator& it, FieldElement& field_element, bool exp_success, const Hit* exp_ptr, const vespalib::string& label) +{ + SCOPED_TRACE(label); + EXPECT_EQ(exp_success, it.step_in_field_element(field_element)); + if (exp_ptr) { + EXPECT_TRUE(it.valid()); + EXPECT_EQ(it.get_field_element(), field_element); + EXPECT_EQ(exp_ptr, &*it); + } else { + EXPECT_FALSE(it.valid()); + } +} + +void +check_seek_in_field_element(HitIterator& it, uint32_t position, FieldElement& field_element, bool exp_success, const Hit* exp_ptr, const vespalib::string& label) +{ + SCOPED_TRACE(label); + EXPECT_EQ(exp_success, it.seek_in_field_element(position, field_element)); + if (exp_ptr) { + EXPECT_TRUE(it.valid()); + EXPECT_EQ(it.get_field_element(), field_element); + EXPECT_EQ(exp_ptr, &*it); + } else { + EXPECT_FALSE(it.valid()); + } +} + +} + +TEST(HitITeratorTest, seek_to_field_element) +{ + auto hl = make_hit_list(); + HitIterator it(hl); + EXPECT_TRUE(it.valid()); + EXPECT_EQ(&hl[0], &*it); + check_seek_to_field_elem(it, FieldElement(0, 0), &hl[0], "(0, 0)"); + check_seek_to_field_elem(it, FieldElement(11, 0), &hl[0], "(11, 0)"); + check_seek_to_field_elem(it, FieldElement(11, 1), &hl[2], "(11, 1)"); + check_seek_to_field_elem(it, FieldElement(11, 2), &hl[4], "(11, 2)"); + check_seek_to_field_elem(it, FieldElement(12, 0), &hl[4], "(12, 0)"); + check_seek_to_field_elem(it, FieldElement(12, 1), &hl[4], "(12, 1)"); + check_seek_to_field_elem_failure(it, FieldElement(12, 2), "(12, 2)"); + check_seek_to_field_elem_failure(it, FieldElement(13, 0), "(13, 0)"); +} + +TEST(HitIteratorTest, step_in_field_element) +{ + auto hl = make_hit_list(); + HitIterator it(hl); + auto field_element = it.get_field_element(); + check_step_in_field_element(it, field_element, true, &hl[1], "1"); + check_step_in_field_element(it, field_element, false, &hl[2], "2"); + check_step_in_field_element(it, field_element, true, &hl[3], "3"); + check_step_in_field_element(it, field_element, false, &hl[4], "4"); + check_step_in_field_element(it, field_element, true, &hl[5], "5"); + check_step_in_field_element(it, field_element, false, nullptr, "end"); +} + +TEST(hitIteratorTest, seek_in_field_elem) +{ + auto hl = make_hit_list(); + HitIterator it(hl); + auto field_element = it.get_field_element(); + check_seek_in_field_element(it, 0, field_element, true, &hl[0], "0a"); + check_seek_in_field_element(it, 2, field_element, true, &hl[1], "2"); + check_seek_in_field_element(it, 5, field_element, true, &hl[1], "5"); + check_seek_in_field_element(it, 6, field_element, false, &hl[2], "6"); + check_seek_in_field_element(it, 0, field_element, true, &hl[2], "0b"); + check_seek_in_field_element(it, 1, field_element, true, &hl[3], "1"); + check_seek_in_field_element(it, 7, field_element, true, &hl[3], "7"); + check_seek_in_field_element(it, 8, field_element, false, &hl[4], "8"); + check_seek_in_field_element(it, 0, field_element, true, &hl[4], "0c"); + check_seek_in_field_element(it, 3, field_element, true, &hl[5], "3"); + check_seek_in_field_element(it, 9, field_element, true, &hl[5], "9"); + check_seek_in_field_element(it, 10, field_element, false, nullptr, "end"); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/query/streaming/phrase_query_node_test.cpp b/searchlib/src/tests/query/streaming/phrase_query_node_test.cpp new file mode 100644 index 00000000000..5caae8d6e97 --- /dev/null +++ b/searchlib/src/tests/query/streaming/phrase_query_node_test.cpp @@ -0,0 +1,95 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/query/streaming/phrase_query_node.h> +#include <vespa/searchlib/query/streaming/queryterm.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/tree/stackdumpcreator.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::query::QueryBuilder; +using search::query::Node; +using search::query::SimpleQueryNodeTypes; +using search::query::StackDumpCreator; +using search::query::Weight; +using search::streaming::HitList; +using search::streaming::PhraseQueryNode; +using search::streaming::Query; +using search::streaming::QueryTerm; +using search::streaming::QueryNodeRefList; +using search::streaming::QueryNodeResultFactory; +using search::streaming::QueryTermList; + +TEST(PhraseQueryNodeTest, test_phrase_evaluate) +{ + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addPhrase(3, "", 0, Weight(0)); + { + builder.addStringTerm("a", "", 0, Weight(0)); + builder.addStringTerm("b", "", 0, Weight(0)); + builder.addStringTerm("c", "", 0, Weight(0)); + } + Node::UP node = builder.build(); + vespalib::string stackDump = StackDumpCreator::create(*node); + QueryNodeResultFactory empty; + Query q(empty, stackDump); + QueryNodeRefList phrases; + q.getPhrases(phrases); + QueryTermList terms; + q.getLeaves(terms); + for (QueryTerm * qt : terms) { + qt->resizeFieldId(1); + } + + // field 0 + terms[0]->add(0, 0, 1, 0); + terms[1]->add(0, 0, 1, 1); + terms[2]->add(0, 0, 1, 2); + terms[0]->add(0, 0, 1, 7); + terms[1]->add(0, 1, 1, 8); + terms[2]->add(0, 0, 1, 9); + // field 1 + terms[0]->add(1, 0, 1, 4); + terms[1]->add(1, 0, 1, 5); + terms[2]->add(1, 0, 1, 6); + // field 2 (not complete match) + terms[0]->add(2, 0, 1, 1); + terms[1]->add(2, 0, 1, 2); + terms[2]->add(2, 0, 1, 4); + // field 3 + terms[0]->add(3, 0, 1, 0); + terms[1]->add(3, 0, 1, 1); + terms[2]->add(3, 0, 1, 2); + // field 4 (not complete match) + terms[0]->add(4, 0, 1, 1); + terms[1]->add(4, 0, 1, 2); + // field 5 (not complete match) + terms[0]->add(5, 0, 1, 2); + terms[1]->add(5, 0, 1, 1); + terms[2]->add(5, 0, 1, 0); + HitList hits; + auto * p = static_cast<PhraseQueryNode *>(phrases[0]); + p->evaluateHits(hits); + ASSERT_EQ(3u, hits.size()); + EXPECT_EQ(0u, hits[0].field_id()); + EXPECT_EQ(0u, hits[0].element_id()); + EXPECT_EQ(2u, hits[0].position()); + EXPECT_EQ(1u, hits[1].field_id()); + EXPECT_EQ(0u, hits[1].element_id()); + EXPECT_EQ(6u, hits[1].position()); + EXPECT_EQ(3u, hits[2].field_id()); + EXPECT_EQ(0u, hits[2].element_id()); + EXPECT_EQ(2u, hits[2].position()); + ASSERT_EQ(4u, p->getFieldInfoSize()); + EXPECT_EQ(0u, p->getFieldInfo(0).getHitOffset()); + EXPECT_EQ(1u, p->getFieldInfo(0).getHitCount()); + EXPECT_EQ(1u, p->getFieldInfo(1).getHitOffset()); + EXPECT_EQ(1u, p->getFieldInfo(1).getHitCount()); + EXPECT_EQ(0u, p->getFieldInfo(2).getHitOffset()); // invalid, but will never be used + EXPECT_EQ(0u, p->getFieldInfo(2).getHitCount()); + EXPECT_EQ(2u, p->getFieldInfo(3).getHitOffset()); + EXPECT_EQ(1u, p->getFieldInfo(3).getHitCount()); + EXPECT_TRUE(p->evaluate()); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/query/streaming/same_element_query_node_test.cpp b/searchlib/src/tests/query/streaming/same_element_query_node_test.cpp new file mode 100644 index 00000000000..ece6dc551b2 --- /dev/null +++ b/searchlib/src/tests/query/streaming/same_element_query_node_test.cpp @@ -0,0 +1,135 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/query/streaming/same_element_query_node.h> +#include <vespa/searchlib/query/streaming/queryterm.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/tree/stackdumpcreator.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::query::QueryBuilder; +using search::query::Node; +using search::query::SimpleQueryNodeTypes; +using search::query::StackDumpCreator; +using search::query::Weight; +using search::streaming::HitList; +using search::streaming::Query; +using search::streaming::QueryNode; +using search::streaming::QueryNodeResultFactory; +using search::streaming::QueryTerm; +using search::streaming::QueryTermList; +using search::streaming::SameElementQueryNode; + +namespace { + +class AllowRewrite : public QueryNodeResultFactory +{ +public: + explicit AllowRewrite(vespalib::stringref index) noexcept : _allowedIndex(index) {} + bool allow_float_terms_rewrite(vespalib::stringref index) const noexcept override { return index == _allowedIndex; } +private: + vespalib::string _allowedIndex; +}; + +} + +TEST(SameElementQueryNodeTest, a_unhandled_sameElement_stack) +{ + const char * stack = "\022\002\026xyz_abcdefghij_xyzxyzxQ\001\vxxxxxx_name\034xxxxxx_xxxx_xxxxxxx_xxxxxxxxE\002\005delta\b<0.00393"; + vespalib::stringref stackDump(stack); + EXPECT_EQ(85u, stackDump.size()); + AllowRewrite empty(""); + const Query q(empty, stackDump); + EXPECT_TRUE(q.valid()); + const QueryNode & root = q.getRoot(); + auto sameElement = dynamic_cast<const SameElementQueryNode *>(&root); + EXPECT_TRUE(sameElement != nullptr); + EXPECT_EQ(2u, sameElement->size()); + EXPECT_EQ("xyz_abcdefghij_xyzxyzx", sameElement->getIndex()); + auto term0 = dynamic_cast<const QueryTerm *>((*sameElement)[0].get()); + EXPECT_TRUE(term0 != nullptr); + auto term1 = dynamic_cast<const QueryTerm *>((*sameElement)[1].get()); + EXPECT_TRUE(term1 != nullptr); +} + +namespace { + void verifyQueryTermNode(const vespalib::string & index, const QueryNode *node) { + EXPECT_TRUE(dynamic_cast<const QueryTerm *>(node) != nullptr); + EXPECT_EQ(index, node->getIndex()); + } +} + +TEST(SameElementQueryNodeTest, test_same_element_evaluate) +{ + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addSameElement(3, "field", 0, Weight(0)); + { + builder.addStringTerm("a", "f1", 0, Weight(0)); + builder.addStringTerm("b", "f2", 1, Weight(0)); + builder.addStringTerm("c", "f3", 2, Weight(0)); + } + Node::UP node = builder.build(); + vespalib::string stackDump = StackDumpCreator::create(*node); + QueryNodeResultFactory empty; + Query q(empty, stackDump); + auto * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot()); + EXPECT_TRUE(sameElem != nullptr); + EXPECT_EQ("field", sameElem->getIndex()); + EXPECT_EQ(3u, sameElem->size()); + verifyQueryTermNode("field.f1", (*sameElem)[0].get()); + verifyQueryTermNode("field.f2", (*sameElem)[1].get()); + verifyQueryTermNode("field.f3", (*sameElem)[2].get()); + + QueryTermList terms; + q.getLeaves(terms); + EXPECT_EQ(3u, terms.size()); + for (QueryTerm * qt : terms) { + qt->resizeFieldId(3); + } + + // field 0 + terms[0]->add(0, 0, 10, 1); + terms[0]->add(0, 1, 20, 2); + terms[0]->add(0, 2, 30, 3); + terms[0]->add(0, 3, 40, 4); + terms[0]->add(0, 4, 50, 5); + terms[0]->add(0, 5, 60, 6); + + terms[1]->add(1, 0, 70, 7); + terms[1]->add(1, 1, 80, 8); + terms[1]->add(1, 2, 90, 9); + terms[1]->add(1, 4, 100, 10); + terms[1]->add(1, 5, 110, 11); + terms[1]->add(1, 6, 120, 12); + + terms[2]->add(2, 0, 130, 13); + terms[2]->add(2, 2, 140, 14); + terms[2]->add(2, 4, 150, 15); + terms[2]->add(2, 5, 160, 16); + terms[2]->add(2, 6, 170, 17); + HitList hits; + sameElem->evaluateHits(hits); + EXPECT_EQ(4u, hits.size()); + EXPECT_EQ(2u, hits[0].field_id()); + EXPECT_EQ(0u, hits[0].element_id()); + EXPECT_EQ(130, hits[0].element_weight()); + EXPECT_EQ(0u, hits[0].position()); + + EXPECT_EQ(2u, hits[1].field_id()); + EXPECT_EQ(2u, hits[1].element_id()); + EXPECT_EQ(140, hits[1].element_weight()); + EXPECT_EQ(0u, hits[1].position()); + + EXPECT_EQ(2u, hits[2].field_id()); + EXPECT_EQ(4u, hits[2].element_id()); + EXPECT_EQ(150, hits[2].element_weight()); + EXPECT_EQ(0u, hits[2].position()); + + EXPECT_EQ(2u, hits[3].field_id()); + EXPECT_EQ(5u, hits[3].element_id()); + EXPECT_EQ(160, hits[3].element_weight()); + EXPECT_EQ(0u, hits[3].position()); + EXPECT_TRUE(sameElem->evaluate()); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 97b3d88c25e..d2be1d453a2 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -4,6 +4,7 @@ #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/query/streaming/dot_product_term.h> #include <vespa/searchlib/query/streaming/in_term.h> +#include <vespa/searchlib/query/streaming/phrase_query_node.h> #include <vespa/searchlib/query/streaming/query.h> #include <vespa/searchlib/query/streaming/nearest_neighbor_query_node.h> #include <vespa/searchlib/query/streaming/wand_term.h> @@ -23,10 +24,11 @@ using TermType = QueryTerm::Type; using search::fef::SimpleTermData; using search::fef::MatchData; -void assertHit(const Hit & h, size_t expWordpos, size_t exp_field_id, int32_t weight) { - EXPECT_EQ(h.wordpos(), expWordpos); +void assertHit(const Hit & h, uint32_t exp_field_id, uint32_t exp_element_id, int32_t exp_element_weight, size_t exp_position) { EXPECT_EQ(h.field_id(), exp_field_id); - EXPECT_EQ(h.weight(), weight); + EXPECT_EQ(h.element_id(), exp_element_id); + EXPECT_EQ(h.element_weight(), exp_element_weight); + EXPECT_EQ(h.position(), exp_position); } @@ -427,87 +429,19 @@ TEST(StreamingQueryTest, test_get_query_parts) } } -TEST(StreamingQueryTest, test_phrase_evaluate) +TEST(StreamingQueryTest, test_hit) { - QueryBuilder<SimpleQueryNodeTypes> builder; - builder.addPhrase(3, "", 0, Weight(0)); - { - builder.addStringTerm("a", "", 0, Weight(0)); - builder.addStringTerm("b", "", 0, Weight(0)); - builder.addStringTerm("c", "", 0, Weight(0)); - } - Node::UP node = builder.build(); - vespalib::string stackDump = StackDumpCreator::create(*node); - QueryNodeResultFactory empty; - Query q(empty, stackDump); - QueryNodeRefList phrases; - q.getPhrases(phrases); - QueryTermList terms; - q.getLeaves(terms); - for (QueryTerm * qt : terms) { - qt->resizeFieldId(1); - } + // field id + assertHit(Hit( 1, 0, 1, 0), 1, 0, 1, 0); + assertHit(Hit(255, 0, 1, 0), 255, 0, 1, 0); + assertHit(Hit(256, 0, 1, 0), 256, 0, 1, 0); - // field 0 - terms[0]->add(0, 0, 0, 1); - terms[1]->add(1, 0, 0, 1); - terms[2]->add(2, 0, 0, 1); - terms[0]->add(7, 0, 0, 1); - terms[1]->add(8, 0, 1, 1); - terms[2]->add(9, 0, 0, 1); - // field 1 - terms[0]->add(4, 1, 0, 1); - terms[1]->add(5, 1, 0, 1); - terms[2]->add(6, 1, 0, 1); - // field 2 (not complete match) - terms[0]->add(1, 2, 0, 1); - terms[1]->add(2, 2, 0, 1); - terms[2]->add(4, 2, 0, 1); - // field 3 - terms[0]->add(0, 3, 0, 1); - terms[1]->add(1, 3, 0, 1); - terms[2]->add(2, 3, 0, 1); - // field 4 (not complete match) - terms[0]->add(1, 4, 0, 1); - terms[1]->add(2, 4, 0, 1); - // field 5 (not complete match) - terms[0]->add(2, 5, 0, 1); - terms[1]->add(1, 5, 0, 1); - terms[2]->add(0, 5, 0, 1); - HitList hits; - auto * p = static_cast<PhraseQueryNode *>(phrases[0]); - p->evaluateHits(hits); - ASSERT_EQ(3u, hits.size()); - EXPECT_EQ(hits[0].wordpos(), 2u); - EXPECT_EQ(hits[0].field_id(), 0u); - EXPECT_EQ(hits[1].wordpos(), 6u); - EXPECT_EQ(hits[1].field_id(), 1u); - EXPECT_EQ(hits[2].wordpos(), 2u); - EXPECT_EQ(hits[2].field_id(), 3u); - ASSERT_EQ(4u, p->getFieldInfoSize()); - EXPECT_EQ(p->getFieldInfo(0).getHitOffset(), 0u); - EXPECT_EQ(p->getFieldInfo(0).getHitCount(), 1u); - EXPECT_EQ(p->getFieldInfo(1).getHitOffset(), 1u); - EXPECT_EQ(p->getFieldInfo(1).getHitCount(), 1u); - EXPECT_EQ(p->getFieldInfo(2).getHitOffset(), 0u); // invalid, but will never be used - EXPECT_EQ(p->getFieldInfo(2).getHitCount(), 0u); - EXPECT_EQ(p->getFieldInfo(3).getHitOffset(), 2u); - EXPECT_EQ(p->getFieldInfo(3).getHitCount(), 1u); - EXPECT_TRUE(p->evaluate()); -} + // positions + assertHit(Hit(0, 0, 0, 0), 0, 0, 0, 0); + assertHit(Hit(0, 0, 1, 256), 0, 0, 1, 256); + assertHit(Hit(0, 0, -1, 16777215), 0, 0, -1, 16777215); + assertHit(Hit(0, 0, 1, 16777216), 0, 0, 1, 16777216); -TEST(StreamingQueryTest, test_hit) -{ - // positions (0 - (2^24-1)) - assertHit(Hit(0, 0, 0, 0), 0, 0, 0); - assertHit(Hit(256, 0, 0, 1), 256, 0, 1); - assertHit(Hit(16777215, 0, 0, -1), 16777215, 0, -1); - assertHit(Hit(16777216, 0, 0, 1), 0, 1, 1); // overflow - - // contexts (0 - 255) - assertHit(Hit(0, 1, 0, 1), 0, 1, 1); - assertHit(Hit(0, 255, 0, 1), 0, 255, 1); - assertHit(Hit(0, 256, 0, 1), 0, 0, 1); // overflow } void assertInt8Range(const std::string &term, bool expAdjusted, int64_t expLow, int64_t expHigh) { @@ -769,105 +703,6 @@ TEST(StreamingQueryTest, require_that_we_do_not_break_the_stack_on_bad_query) EXPECT_FALSE(term.isValid()); } -TEST(StreamingQueryTest, a_unhandled_sameElement_stack) -{ - const char * stack = "\022\002\026xyz_abcdefghij_xyzxyzxQ\001\vxxxxxx_name\034xxxxxx_xxxx_xxxxxxx_xxxxxxxxE\002\005delta\b<0.00393"; - vespalib::stringref stackDump(stack); - EXPECT_EQ(85u, stackDump.size()); - AllowRewrite empty(""); - const Query q(empty, stackDump); - EXPECT_TRUE(q.valid()); - const QueryNode & root = q.getRoot(); - auto sameElement = dynamic_cast<const SameElementQueryNode *>(&root); - EXPECT_TRUE(sameElement != nullptr); - EXPECT_EQ(2u, sameElement->size()); - EXPECT_EQ("xyz_abcdefghij_xyzxyzx", sameElement->getIndex()); - auto term0 = dynamic_cast<const QueryTerm *>((*sameElement)[0].get()); - EXPECT_TRUE(term0 != nullptr); - auto term1 = dynamic_cast<const QueryTerm *>((*sameElement)[1].get()); - EXPECT_TRUE(term1 != nullptr); -} - -namespace { - void verifyQueryTermNode(const vespalib::string & index, const QueryNode *node) { - EXPECT_TRUE(dynamic_cast<const QueryTerm *>(node) != nullptr); - EXPECT_EQ(index, node->getIndex()); - } -} - -TEST(StreamingQueryTest, test_same_element_evaluate) -{ - QueryBuilder<SimpleQueryNodeTypes> builder; - builder.addSameElement(3, "field", 0, Weight(0)); - { - builder.addStringTerm("a", "f1", 0, Weight(0)); - builder.addStringTerm("b", "f2", 1, Weight(0)); - builder.addStringTerm("c", "f3", 2, Weight(0)); - } - Node::UP node = builder.build(); - vespalib::string stackDump = StackDumpCreator::create(*node); - QueryNodeResultFactory empty; - Query q(empty, stackDump); - auto * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot()); - EXPECT_TRUE(sameElem != nullptr); - EXPECT_EQ("field", sameElem->getIndex()); - EXPECT_EQ(3u, sameElem->size()); - verifyQueryTermNode("field.f1", (*sameElem)[0].get()); - verifyQueryTermNode("field.f2", (*sameElem)[1].get()); - verifyQueryTermNode("field.f3", (*sameElem)[2].get()); - - QueryTermList terms; - q.getLeaves(terms); - EXPECT_EQ(3u, terms.size()); - for (QueryTerm * qt : terms) { - qt->resizeFieldId(3); - } - - // field 0 - terms[0]->add(1, 0, 0, 10); - terms[0]->add(2, 0, 1, 20); - terms[0]->add(3, 0, 2, 30); - terms[0]->add(4, 0, 3, 40); - terms[0]->add(5, 0, 4, 50); - terms[0]->add(6, 0, 5, 60); - - terms[1]->add(7, 1, 0, 70); - terms[1]->add(8, 1, 1, 80); - terms[1]->add(9, 1, 2, 90); - terms[1]->add(10, 1, 4, 100); - terms[1]->add(11, 1, 5, 110); - terms[1]->add(12, 1, 6, 120); - - terms[2]->add(13, 2, 0, 130); - terms[2]->add(14, 2, 2, 140); - terms[2]->add(15, 2, 4, 150); - terms[2]->add(16, 2, 5, 160); - terms[2]->add(17, 2, 6, 170); - HitList hits; - sameElem->evaluateHits(hits); - EXPECT_EQ(4u, hits.size()); - EXPECT_EQ(0u, hits[0].wordpos()); - EXPECT_EQ(2u, hits[0].field_id()); - EXPECT_EQ(0u, hits[0].elemId()); - EXPECT_EQ(130, hits[0].weight()); - - EXPECT_EQ(0u, hits[1].wordpos()); - EXPECT_EQ(2u, hits[1].field_id()); - EXPECT_EQ(2u, hits[1].elemId()); - EXPECT_EQ(140, hits[1].weight()); - - EXPECT_EQ(0u, hits[2].wordpos()); - EXPECT_EQ(2u, hits[2].field_id()); - EXPECT_EQ(4u, hits[2].elemId()); - EXPECT_EQ(150, hits[2].weight()); - - EXPECT_EQ(0u, hits[3].wordpos()); - EXPECT_EQ(2u, hits[3].field_id()); - EXPECT_EQ(5u, hits[3].elemId()); - EXPECT_EQ(160, hits[3].weight()); - EXPECT_TRUE(sameElem->evaluate()); -} - TEST(StreamingQueryTest, test_nearest_neighbor_query_node) { QueryBuilder<SimpleQueryNodeTypes> builder; @@ -917,8 +752,8 @@ TEST(StreamingQueryTest, test_in_term) td.lookupField(12)->setHandle(1); EXPECT_FALSE(term.evaluate()); auto& q = *term.get_terms().front(); - q.add(0, 11, 0, 1); - q.add(0, 12, 0, 1); + q.add(11, 0, 1, 0); + q.add(12, 0, 1, 0); EXPECT_TRUE(term.evaluate()); MatchData md(MatchData::params().numTermFields(2)); term.unpack_match_data(23, td, md); @@ -944,11 +779,11 @@ TEST(StreamingQueryTest, dot_product_term) td.lookupField(12)->setHandle(1); EXPECT_FALSE(term.evaluate()); auto& q0 = *term.get_terms()[0]; - q0.add(0, 11, 0, -13); - q0.add(0, 12, 0, -17); + q0.add(11, 0, -13, 0); + q0.add(12, 0, -17, 0); auto& q1 = *term.get_terms()[1]; - q1.add(0, 11, 0, 4); - q1.add(0, 12, 0, 9); + q1.add(11, 0, 4, 0); + q1.add(12, 0, 9, 0); EXPECT_TRUE(term.evaluate()); MatchData md(MatchData::params().numTermFields(2)); term.unpack_match_data(23, td, md); @@ -989,11 +824,11 @@ check_wand_term(double limit, const vespalib::string& label) td.lookupField(12)->setHandle(1); EXPECT_FALSE(term.evaluate()); auto& q0 = *term.get_terms()[0]; - q0.add(0, 11, 0, 17); - q0.add(0, 12, 0, 13); + q0.add(11, 0, 17, 0); + q0.add(12, 0, 13, 0); auto& q1 = *term.get_terms()[1]; - q1.add(0, 11, 0, 9); - q1.add(0, 12, 0, 4); + q1.add(11, 0, 9, 0); + q1.add(12, 0, 4, 0); EXPECT_EQ(limit < exp_wand_score_field_11, term.evaluate()); MatchData md(MatchData::params().numTermFields(2)); term.unpack_match_data(23, td, md); @@ -1043,11 +878,11 @@ TEST(StreamingQueryTest, weighted_set_term) td.lookupField(12)->setHandle(1); EXPECT_FALSE(term.evaluate()); auto& q0 = *term.get_terms()[0]; - q0.add(0, 11, 0, 10); - q0.add(0, 12, 0, 10); + q0.add(11, 0, 10, 0); + q0.add(12, 0, 10, 0); auto& q1 = *term.get_terms()[1]; - q1.add(0, 11, 0, 10); - q1.add(0, 12, 0, 10); + q1.add(11, 0, 10, 0); + q1.add(12, 0, 10, 0); EXPECT_TRUE(term.evaluate()); MatchData md(MatchData::params().numTermFields(2)); term.unpack_match_data(23, td, md); diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index bbd2744119a..90452f1d12b 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -23,12 +23,15 @@ class MyOr : public IntermediateBlueprint { private: public: - double calculate_cost() const final { - return OrFlow::cost_of(get_children()); - } double calculate_relative_estimate() const final { return OrFlow::estimate_of(get_children()); } + double calculate_cost() const final { + return OrFlow::cost_of(get_children(), false); + } + double calculate_strict_cost() const final { + return OrFlow::cost_of(get_children(), true); + } HitEstimate combine(const std::vector<HitEstimate> &data) const override { return max(data); } @@ -37,7 +40,7 @@ public: return mixChildrenFields(); } - void sort(Children &children, bool) const override { + void sort(Children &children, bool, bool) const override { std::sort(children.begin(), children.end(), TieredGreaterEstimate()); } @@ -446,7 +449,7 @@ TEST_F("testChildAndNotCollapsing", Fixture) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - unsorted = Blueprint::optimize(std::move(unsorted), true); + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -486,7 +489,7 @@ TEST_F("testChildAndCollapsing", Fixture) TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - unsorted = Blueprint::optimize(std::move(unsorted), true); + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -525,7 +528,10 @@ TEST_F("testChildOrCollapsing", Fixture) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - unsorted = Blueprint::optimize(std::move(unsorted), true); + // we sort non-strict here since the default costs of 1/est for + // non-strict/strict leaf iterators makes the order of iterators + // under a strict OR irrelevant. + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), false, true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -569,7 +575,7 @@ TEST_F("testChildSorting", Fixture) TEST_DO(f.check_not_equal(*sorted, *unsorted)); unsorted->setDocIdLimit(1000); - unsorted = Blueprint::optimize(std::move(unsorted), true); + unsorted = Blueprint::optimize_and_sort(std::move(unsorted), true, true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -650,12 +656,13 @@ getExpectedBlueprint() " estimate: HitEstimate {\n" " empty: false\n" " estHits: 9\n" - " relative_estimate: 0.5\n" " cost_tier: 1\n" " tree_size: 2\n" " allow_termwise_eval: false\n" " }\n" - " cost: 1\n" + " relative_estimate: 0\n" + " cost: 0\n" + " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " children: std::vector {\n" @@ -671,12 +678,13 @@ getExpectedBlueprint() " estimate: HitEstimate {\n" " empty: false\n" " estHits: 9\n" - " relative_estimate: 0.5\n" " cost_tier: 1\n" " tree_size: 1\n" " allow_termwise_eval: true\n" " }\n" - " cost: 1\n" + " relative_estimate: 0\n" + " cost: 0\n" + " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " }\n" @@ -702,12 +710,13 @@ getExpectedSlimeBlueprint() { " '[type]': 'HitEstimate'," " empty: false," " estHits: 9," - " relative_estimate: 0.5," " cost_tier: 1," " tree_size: 2," " allow_termwise_eval: false" " }," - " cost: 1.0," + " relative_estimate: 0.0," + " cost: 0.0," + " strict_cost: 0.0," " sourceId: 4294967295," " docid_limit: 0," " children: {" @@ -728,12 +737,13 @@ getExpectedSlimeBlueprint() { " '[type]': 'HitEstimate'," " empty: false," " estHits: 9," - " relative_estimate: 0.5," " cost_tier: 1," " tree_size: 1," " allow_termwise_eval: true" " }," - " cost: 1.0," + " relative_estimate: 0.0," + " cost: 0.0," + " strict_cost: 0.0," " sourceId: 4294967295," " docid_limit: 0" " }" @@ -786,9 +796,9 @@ TEST("requireThatDocIdLimitInjectionWorks") } TEST("Control object sizes") { - EXPECT_EQUAL(40u, sizeof(Blueprint::State)); - EXPECT_EQUAL(40u, sizeof(Blueprint)); - EXPECT_EQUAL(80u, sizeof(LeafBlueprint)); + EXPECT_EQUAL(32u, sizeof(Blueprint::State)); + EXPECT_EQUAL(56u, sizeof(Blueprint)); + EXPECT_EQUAL(96u, sizeof(LeafBlueprint)); } TEST_MAIN() { diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index 856ac2391f8..2cf523b508b 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -77,7 +77,7 @@ void check_sort_order(IntermediateBlueprint &self, BlueprintVector children, std unordered.push_back(child.get()); } // TODO: sort by cost (requires both setDocIdLimit and optimize to be called) - self.sort(children, false); + self.sort(children, true, false); for (size_t i = 0; i < children.size(); ++i) { EXPECT_EQUAL(children[i].get(), unordered[order[i]]); } @@ -130,8 +130,8 @@ TEST("test AndNot Blueprint") { } template <typename BP> -void optimize(std::unique_ptr<BP> &ref) { - auto optimized = Blueprint::optimize(std::move(ref), true); +void optimize(std::unique_ptr<BP> &ref, bool strict) { + auto optimized = Blueprint::optimize_and_sort(std::move(ref), strict, true); ref.reset(dynamic_cast<BP*>(optimized.get())); ASSERT_TRUE(ref); optimized.release(); @@ -144,7 +144,7 @@ TEST("test And propagates updated histestimate") { bp->addChild(ap(MyLeafSpec(200).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2))); bp->setDocIdLimit(5000); - optimize(bp); + optimize(bp, true); bp->fetchPostings(ExecuteInfo::TRUE); EXPECT_EQUAL(3u, bp->childCnt()); for (uint32_t i = 0; i < bp->childCnt(); i++) { @@ -164,7 +164,10 @@ TEST("test Or propagates updated histestimate") { bp->addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2))); bp->setDocIdLimit(5000); - optimize(bp); + // sort OR as non-strict to get expected order. With strict OR, + // the order would be irrelevant since we use the relative + // estimate as strict_cost for leafs. + optimize(bp, false); bp->fetchPostings(ExecuteInfo::TRUE); EXPECT_EQUAL(4u, bp->childCnt()); for (uint32_t i = 0; i < bp->childCnt(); i++) { @@ -519,16 +522,18 @@ void compare(const Blueprint &bp1, const Blueprint &bp2, bool expect_eq) { auto cmp_hook = [expect_eq](const auto &path, const auto &a, const auto &b) { if (!path.empty() && std::holds_alternative<vespalib::stringref>(path.back())) { vespalib::stringref field = std::get<vespalib::stringref>(path.back()); - if (field == "cost") { + // ignore these fields to enable comparing optimized with unoptimized trees + if (field == "relative_estimate" || field == "cost" || field == "strict_cost") { + auto check_value = [&](double value){ + if ((value > 0.0 && value < 1e-6) || (value > 0.0 && value < 1e-6)) { + fprintf(stderr, " small value at %s: %g\n", path_to_str(path).c_str(), + value); + } + }; + check_value(a.asDouble()); + check_value(b.asDouble()); return true; } - if (field == "relative_estimate") { - double a_val = a.asDouble(); - double b_val = b.asDouble(); - if (a_val != 0.0 && b_val != 0.0 && vespalib::approx_equal(a_val, b_val)) { - return true; - } - } } if (expect_eq) { fprintf(stderr, " mismatch at %s: %s vs %s\n", path_to_str(path).c_str(), @@ -548,13 +553,13 @@ void compare(const Blueprint &bp1, const Blueprint &bp2, bool expect_eq) { } void -optimize_and_compare(Blueprint::UP top, Blueprint::UP expect, bool sort_by_cost = true) { +optimize_and_compare(Blueprint::UP top, Blueprint::UP expect, bool strict = true, bool sort_by_cost = true) { top->setDocIdLimit(1000); expect->setDocIdLimit(1000); TEST_DO(compare(*top, *expect, false)); - top = Blueprint::optimize(std::move(top), sort_by_cost); + top = Blueprint::optimize_and_sort(std::move(top), strict, sort_by_cost); TEST_DO(compare(*top, *expect, true)); - expect = Blueprint::optimize(std::move(expect), sort_by_cost); + expect = Blueprint::optimize_and_sort(std::move(expect), strict, sort_by_cost); TEST_DO(compare(*expect, *top, true)); } @@ -614,7 +619,8 @@ TEST_F("test SourceBlender below OR partial optimization", SourceBlenderTestFixt expect->addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(f.selector_2), {{10, 1}, {20, 2}})); addLeafs(*expect, {3, 2, 1}); - optimize_and_compare(std::move(top), std::move(expect)); + // NOTE: use non-strict cost based sorting for expected order + optimize_and_compare(std::move(top), std::move(expect), false); } TEST_F("test OR replaced by source blender after full optimization", SourceBlenderTestFixture) { @@ -626,7 +632,8 @@ TEST_F("test OR replaced by source blender after full optimization", SourceBlend expect->addChild(addLeafsWithSourceId(2, std::make_unique<OrBlueprint>(), {{2000, 2}, {200, 2}, {20, 2}})); expect->addChild(addLeafsWithSourceId(1, std::make_unique<OrBlueprint>(), {{1000, 1}, {100, 1}, {10, 1}})); - optimize_and_compare(std::move(top), std::move(expect)); + // NOTE: use non-strict cost based sorting for expected order + optimize_and_compare(std::move(top), std::move(expect), false); } TEST_F("test SourceBlender below AND_NOT optimization", SourceBlenderTestFixture) { @@ -681,11 +688,11 @@ TEST("test empty root node optimization and safeness") { //------------------------------------------------------------------------- auto expect_up = std::make_unique<EmptyBlueprint>(); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top1), true)->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top2), true)->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top3), true)->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top4), true)->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top5), true)->asString()); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top1), true, true), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top2), true, true), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top3), true, true), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top4), true, true), true); + compare(*expect_up, *Blueprint::optimize_and_sort(std::move(top5), true, true), true); } TEST("and with one empty child is optimized away") { @@ -693,11 +700,11 @@ TEST("and with one empty child is optimized away") { Blueprint::UP top = ap((new SourceBlenderBlueprint(*selector))-> addChild(ap(MyLeafSpec(10).create())). addChild(addLeafs(std::make_unique<AndBlueprint>(), {{0, true}, 10, 20}))); - top = Blueprint::optimize(std::move(top), true); + top = Blueprint::optimize_and_sort(std::move(top), true, true); Blueprint::UP expect_up(ap((new SourceBlenderBlueprint(*selector))-> addChild(ap(MyLeafSpec(10).create())). addChild(std::make_unique<EmptyBlueprint>()))); - EXPECT_EQUAL(expect_up->asString(), top->asString()); + compare(*expect_up, *top, true); } struct make { @@ -731,7 +738,7 @@ struct make { return std::move(*this); } make &&leaf(uint32_t estimate) && { - std::unique_ptr<LeafBlueprint> bp(MyLeafSpec(estimate).create()); + std::unique_ptr<MyLeaf> bp(MyLeafSpec(estimate).create()); if (cost_tag != invalid_cost) { bp->set_cost(cost_tag); cost_tag = invalid_cost; @@ -763,7 +770,8 @@ TEST("AND AND collapsing") { TEST("OR OR collapsing") { Blueprint::UP top = make::OR().leafs({1,3,5}).add(make::OR().leafs({2,4})); Blueprint::UP expect = make::OR().leafs({5,4,3,2,1}); - optimize_and_compare(std::move(top), std::move(expect)); + // NOTE: use non-strict cost based sorting for expected order + optimize_and_compare(std::move(top), std::move(expect), false); } TEST("AND_NOT AND_NOT collapsing") { @@ -841,7 +849,8 @@ TEST("test single child optimization") { TEST("test empty OR child optimization") { Blueprint::UP top = addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 20, {0, true}, 10, {0, true}, 0, 30, {0, true}}); Blueprint::UP expect = addLeafs(std::make_unique<OrBlueprint>(), {30, 20, 10, 0}); - optimize_and_compare(std::move(top), std::move(expect)); + // NOTE: use non-strict cost based sorting for expected order + optimize_and_compare(std::move(top), std::move(expect), false); } TEST("test empty AND_NOT child optimization") { @@ -868,10 +877,10 @@ TEST("require that replaced blueprints retain source id") { addChild(ap(MyLeafSpec(30).create()->setSourceId(55))))); Blueprint::UP expect2_up(ap(MyLeafSpec(30).create()->setSourceId(42))); //------------------------------------------------------------------------- - top1_up = Blueprint::optimize(std::move(top1_up), true); - top2_up = Blueprint::optimize(std::move(top2_up), true); - EXPECT_EQUAL(expect1_up->asString(), top1_up->asString()); - EXPECT_EQUAL(expect2_up->asString(), top2_up->asString()); + top1_up = Blueprint::optimize_and_sort(std::move(top1_up), true, true); + top2_up = Blueprint::optimize_and_sort(std::move(top2_up), true, true); + compare(*expect1_up, *top1_up, true); + compare(*expect2_up, *top2_up, true); EXPECT_EQUAL(13u, top1_up->getSourceId()); EXPECT_EQUAL(42u, top2_up->getSourceId()); } @@ -1181,45 +1190,25 @@ TEST("require_that_unpack_optimization_is_not_overruled_by_equiv") { } } -TEST("require that children of near are not optimized") { - auto top_up = ap((new NearBlueprint(10))-> - addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). - addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - auto expect_up = ap((new NearBlueprint(10))-> - addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). - addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - top_up = Blueprint::optimize(std::move(top_up), true); - TEST_DO(compare(*top_up, *expect_up, true)); -} - -TEST("require that children of onear are not optimized") { - auto top_up = ap((new ONearBlueprint(10))-> - addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). - addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - auto expect_up = ap((new ONearBlueprint(10))-> - addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). - addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - top_up = Blueprint::optimize(std::move(top_up), true); - TEST_DO(compare(*top_up, *expect_up, true)); -} - TEST("require that ANDNOT without children is optimized to empty search") { Blueprint::UP top_up = std::make_unique<AndNotBlueprint>(); auto expect_up = std::make_unique<EmptyBlueprint>(); - top_up = Blueprint::optimize(std::move(top_up), true); - EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize_and_sort(std::move(top_up), true, true); + compare(*expect_up, *top_up, true); } TEST("require that highest cost tier sorts last for OR") { Blueprint::UP top = addLeafsWithCostTier(std::make_unique<OrBlueprint>(), {{50, 1}, {30, 3}, {20, 2}, {10, 1}}); Blueprint::UP expect = addLeafsWithCostTier(std::make_unique<OrBlueprint>(), {{50, 1}, {10, 1}, {20, 2}, {30, 3}}); - optimize_and_compare(std::move(top), std::move(expect), false); + // cost-based sorting would ignore cost tier + optimize_and_compare(std::move(top), std::move(expect), true, false); } TEST("require that highest cost tier sorts last for AND") { Blueprint::UP top = addLeafsWithCostTier(std::make_unique<AndBlueprint>(), {{10, 1}, {20, 3}, {30, 2}, {50, 1}}); Blueprint::UP expect = addLeafsWithCostTier(std::make_unique<AndBlueprint>(), {{10, 1}, {50, 1}, {30, 2}, {20, 3}}); - optimize_and_compare(std::move(top), std::move(expect), false); + // cost-based sorting would ignore cost tier + optimize_and_compare(std::move(top), std::move(expect), true, false); } template<typename BP> @@ -1292,6 +1281,7 @@ void verify_relative_estimate(make &&mk, double expect) { EXPECT_EQUAL(mk.making->estimate(), 0.0); Blueprint::UP bp = std::move(mk).leafs({200,300,950}); bp->setDocIdLimit(1000); + bp = Blueprint::optimize(std::move(bp)); EXPECT_EQUAL(bp->estimate(), expect); } @@ -1329,15 +1319,17 @@ TEST("relative estimate for WEAKAND") { verify_relative_estimate(make::WEAKAND(50), 0.05); } -void verify_cost(make &&mk, double expect) { - EXPECT_EQUAL(mk.making->cost(), 1.0); +void verify_cost(make &&mk, double expect, double expect_strict) { + EXPECT_EQUAL(mk.making->cost(), 0.0); + EXPECT_EQUAL(mk.making->strict_cost(), 0.0); Blueprint::UP bp = std::move(mk) - .cost(1.1).leaf(200) - .cost(1.2).leaf(300) - .cost(1.3).leaf(500); + .cost(1.1).leaf(200) // strict_cost: 0.2*1.1 + .cost(1.2).leaf(300) // strict_cost: 0.3*1.2 + .cost(1.3).leaf(950); // rel_est: 0.5, strict_cost: 1.3 bp->setDocIdLimit(1000); - bp = Blueprint::optimize(std::move(bp), true); + bp = Blueprint::optimize(std::move(bp)); EXPECT_EQUAL(bp->cost(), expect); + EXPECT_EQUAL(bp->strict_cost(), expect_strict); } double calc_cost(std::vector<std::pair<double,double>> list) { @@ -1351,36 +1343,48 @@ double calc_cost(std::vector<std::pair<double,double>> list) { } TEST("cost for OR") { - verify_cost(make::OR(), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}})); + verify_cost(make::OR(), + calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}), + calc_cost({{0.2*1.1, 0.8},{0.3*1.2, 0.7},{1.3, 0.5}})); } TEST("cost for AND") { - verify_cost(make::AND(), calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); + verify_cost(make::AND(), + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}), + calc_cost({{0.2*1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); } TEST("cost for RANK") { - verify_cost(make::RANK(), 1.1); // first + verify_cost(make::RANK(), 1.1, 0.2*1.1); // first } TEST("cost for ANDNOT") { - verify_cost(make::ANDNOT(), calc_cost({{1.1, 0.2},{1.3, 0.5},{1.2, 0.7}})); + verify_cost(make::ANDNOT(), + calc_cost({{1.1, 0.2},{1.3, 0.5},{1.2, 0.7}}), + calc_cost({{0.2*1.1, 0.2},{1.3, 0.5},{1.2, 0.7}})); } TEST("cost for SB") { InvalidSelector sel; - verify_cost(make::SB(sel), 1.3); // max + verify_cost(make::SB(sel), 1.3, 1.3); // max } TEST("cost for NEAR") { - verify_cost(make::NEAR(1), 3.0 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); + verify_cost(make::NEAR(1), + 0.2*0.3*0.5 * 3 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}), + 0.2*0.3*0.5 * 3 + calc_cost({{0.2*1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); } TEST("cost for ONEAR") { - verify_cost(make::ONEAR(1), 3.0 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); + verify_cost(make::ONEAR(1), + 0.2*0.3*0.5 * 3 + calc_cost({{1.1, 0.2},{1.2, 0.3},{1.3, 0.5}}), + 0.2*0.3*0.5 * 3 + calc_cost({{0.2*1.1, 0.2},{1.2, 0.3},{1.3, 0.5}})); } TEST("cost for WEAKAND") { - verify_cost(make::WEAKAND(1000), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}})); + verify_cost(make::WEAKAND(1000), + calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}}), + calc_cost({{0.2*1.1, 0.8},{0.3*1.2, 0.7},{1.3, 0.5}})); } TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/blueprint/mysearch.h b/searchlib/src/tests/queryeval/blueprint/mysearch.h index db7dd2adae6..6eb27364c2b 100644 --- a/searchlib/src/tests/queryeval/blueprint/mysearch.h +++ b/searchlib/src/tests/queryeval/blueprint/mysearch.h @@ -104,7 +104,8 @@ public: class MyLeaf : public SimpleLeafBlueprint { using TFMDA = search::fef::TermFieldMatchDataArray; - bool _got_global_filter; + bool _got_global_filter = false; + double _cost = 1.0; public: SearchIterator::UP @@ -113,18 +114,15 @@ public: return std::make_unique<MySearch>("leaf", tfmda, strict); } - MyLeaf() - : SimpleLeafBlueprint(), _got_global_filter(false) - {} - MyLeaf(FieldSpecBaseList fields) - : SimpleLeafBlueprint(std::move(fields)), _got_global_filter(false) - {} - + MyLeaf() : SimpleLeafBlueprint() {} + MyLeaf(FieldSpecBaseList fields) : SimpleLeafBlueprint(std::move(fields)) {} + void set_cost(double value) noexcept { _cost = value; } + double calculate_cost() const override { return _cost; } + MyLeaf &estimate(uint32_t hits, bool empty = false) { setEstimate(HitEstimate(hits, empty)); return *this; } - MyLeaf &cost_tier(uint32_t value) { set_cost_tier(value); return *this; @@ -153,7 +151,7 @@ private: public: explicit MyLeafSpec(uint32_t estHits, bool empty = false) - : _fields(), _estimate(estHits, empty), _cost_tier(0), _want_global_filter(false) {} + : _fields(), _estimate(estHits, empty), _cost_tier(0), _want_global_filter(false) {} MyLeafSpec &addField(uint32_t fieldId, uint32_t handle) { _fields.add(FieldSpecBase(fieldId, handle)); diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp index 1180206279d..4fc8922b9a3 100644 --- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp +++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp @@ -48,7 +48,10 @@ concept ChildCollector = requires(T a, std::unique_ptr<Blueprint> bp) { // inherit Blueprint to capture the default filter factory struct DefaultBlueprint : Blueprint { double calculate_relative_estimate() const override { abort(); } - void optimize(Blueprint* &, OptimizePass, bool) override { abort(); } + double calculate_cost() const override { abort(); } + double calculate_strict_cost() const override { abort(); } + void optimize(Blueprint* &, OptimizePass) override { abort(); } + void sort(bool, bool) override { abort(); } const State &getState() const override { abort(); } void fetchPostings(const ExecuteInfo &) override { abort(); } void freeze() override { abort(); } diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp index a9f549a0bd9..7a7abb20cdf 100644 --- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -626,12 +626,13 @@ TEST(ParallelWeakAndTest, require_that_asString_on_blueprint_works) " estimate: HitEstimate {\n" " empty: false\n" " estHits: 2\n" - " relative_estimate: 0.5\n" " cost_tier: 1\n" " tree_size: 2\n" " allow_termwise_eval: false\n" " }\n" - " cost: 1\n" + " relative_estimate: 0\n" + " cost: 0\n" + " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " _weights: std::vector {\n" @@ -650,12 +651,13 @@ TEST(ParallelWeakAndTest, require_that_asString_on_blueprint_works) " estimate: HitEstimate {\n" " empty: false\n" " estHits: 2\n" - " relative_estimate: 0.5\n" " cost_tier: 1\n" " tree_size: 1\n" " allow_termwise_eval: true\n" " }\n" - " cost: 1\n" + " relative_estimate: 0\n" + " cost: 0\n" + " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " }\n" diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp index 7c535e5d3d5..c9fcb472b68 100644 --- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp +++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp @@ -46,7 +46,7 @@ std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResul } Blueprint::UP finalize(Blueprint::UP bp, bool strict) { - Blueprint::UP result = Blueprint::optimize(std::move(bp), true); + Blueprint::UP result = Blueprint::optimize_and_sort(std::move(bp), true, true); result->fetchPostings(ExecuteInfo::createForTest(strict)); result->freeze(); return result; |