diff options
Diffstat (limited to 'searchcore')
23 files changed, 902 insertions, 555 deletions
diff --git a/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp b/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp index 49c7d2d3867..38fba787bff 100644 --- a/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp +++ b/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp @@ -45,7 +45,6 @@ public: int usage(const char *self) { fprintf(stderr, "usage: %s <port|spec|--local|--id=name> <cmd> [args]\n", self); - fprintf(stderr, "die\n"); fprintf(stderr, "getProtonStatus\n"); fprintf(stderr, "getState\n"); fprintf(stderr, "triggerFlush\n"); @@ -331,8 +330,6 @@ public: if (! _req->IsError()) { printf("OK: prepareRestart enabled\n"); } - } else if (strcmp(argv[2], "die") == 0) { - _req->SetMethodName("pandora.rtc.die"); } else { finiRPC(); return usage(argv[0]); diff --git a/searchcore/src/tests/proton/matching/CMakeLists.txt b/searchcore/src/tests/proton/matching/CMakeLists.txt index c35e9498986..be9e10c45a0 100644 --- a/searchcore/src/tests/proton/matching/CMakeLists.txt +++ b/searchcore/src/tests/proton/matching/CMakeLists.txt @@ -13,7 +13,7 @@ vespa_add_executable(searchcore_matching_test_app TEST searchcore_grouping searchlib_test ) -vespa_add_test(NAME searchcore_matching_test_app COMMAND searchcore_matching_test_app) +vespa_add_test(NAME searchcore_matching_test_app COMMAND searchcore_matching_test_app COST 100) vespa_add_executable(searchcore_sessionmanager_test_app TEST SOURCES sessionmanager_test.cpp diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/CMakeLists.txt b/searchcore/src/tests/proton/matching/match_loop_communicator/CMakeLists.txt index b545023ce97..b5b71836581 100644 --- a/searchcore/src/tests/proton/matching/match_loop_communicator/CMakeLists.txt +++ b/searchcore/src/tests/proton/matching/match_loop_communicator/CMakeLists.txt @@ -4,5 +4,6 @@ vespa_add_executable(searchcore_match_loop_communicator_test_app TEST match_loop_communicator_test.cpp DEPENDS searchcore_matching + GTest::gtest ) vespa_add_test(NAME searchcore_match_loop_communicator_test_app COMMAND searchcore_match_loop_communicator_test_app) diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp index d5ee88e1617..dc05471a1eb 100644 --- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp +++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp @@ -1,7 +1,10 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/test_kit.h> #include <vespa/searchcore/proton/matching/match_loop_communicator.h> +#include <vespa/searchlib/features/first_phase_rank_lookup.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/test/nexus.h> #include <algorithm> +#include <atomic> using namespace proton::matching; @@ -12,10 +15,22 @@ using Hit = MatchLoopCommunicator::Hit; using Hits = MatchLoopCommunicator::Hits; using TaggedHit = MatchLoopCommunicator::TaggedHit; using TaggedHits = MatchLoopCommunicator::TaggedHits; +using search::features::FirstPhaseRankLookup; using search::queryeval::SortedHitSequence; +using vespalib::test::Nexus; + +namespace search::queryeval { + +void PrintTo(const Scores& scores, std::ostream* os) { + *os << "{" << scores.low << "," << scores.high << "}"; +} + +} std::vector<Hit> hit_vec(std::vector<Hit> list) { return list; } +auto do_nothing = []() noexcept {}; + Hits makeScores(size_t id) { switch (id) { case 0: return {{1, 5.4}, {2, 4.4}, {3, 3.4}, {4, 2.4}, {5, 1.4}}; @@ -27,6 +42,13 @@ Hits makeScores(size_t id) { return {}; } +Hits make_first_scores(size_t id, size_t size) { + auto result = makeScores(id); + EXPECT_LE(size, result.size()); + result.resize(size); + return result; +} + std::tuple<size_t,Hits,RangePair> second_phase(MatchLoopCommunicator &com, const Hits &hits, size_t thread_id, double delta = 0.0) { std::vector<uint32_t> refs; for (size_t i = 0; i < hits.size(); ++i) { @@ -60,25 +82,6 @@ size_t my_work_size(MatchLoopCommunicator &com, const Hits &hits, size_t thread_ return work_size; } -void equal(size_t count, const Hits & a, const Hits & b) { - EXPECT_EQUAL(count, b.size()); - for (size_t i(0); i < count; i++) { - EXPECT_EQUAL(a[i].first, b[i].first); - EXPECT_EQUAL(a[i].second , b[i].second); - } -} - -void equal_range(const Range &a, const Range &b) { - EXPECT_EQUAL(a.isValid(), b.isValid()); - EXPECT_EQUAL(a.low, b.low); - EXPECT_EQUAL(a.high, b.high); -} - -void equal_ranges(const RangePair &a, const RangePair &b) { - TEST_DO(equal_range(a.first, b.first)); - TEST_DO(equal_range(a.second, b.second)); -} - struct EveryOdd : public search::queryeval::IDiversifier { bool accepted(uint32_t docId) override { return docId & 0x01; @@ -89,122 +92,242 @@ struct None : public search::queryeval::IDiversifier { bool accepted(uint32_t) override { return false; } }; -TEST_F("require that selectBest gives appropriate results for single thread", MatchLoopCommunicator(num_threads, 3)) { - TEST_DO(equal(2u, hit_vec({{1, 5}, {2, 4}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}}), thread_id))); - TEST_DO(equal(3u, hit_vec({{1, 5}, {2, 4}, {3, 3}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}, {3, 3}}), thread_id))); - TEST_DO(equal(3u, hit_vec({{1, 5}, {2, 4}, {3, 3}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}, {3, 3}, {4, 2}}), thread_id))); +TEST(MatchLoopCommunicatorTest, require_that_selectBest_gives_appropriate_results_for_single_thread) +{ + constexpr size_t num_threads = 1; + constexpr size_t thread_id = 0; + MatchLoopCommunicator f1(num_threads, 3); + EXPECT_EQ(hit_vec({{1, 5}, {2, 4}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}}), thread_id)); + EXPECT_EQ(hit_vec({{1, 5}, {2, 4}, {3, 3}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}, {3, 3}}), thread_id)); + EXPECT_EQ(hit_vec({{1, 5}, {2, 4}, {3, 3}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}, {3, 3}, {4, 2}}), thread_id)); } -TEST_F("require that selectBest gives appropriate results for single thread with filter", - MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>())) +TEST(MatchLoopCommunicatorTest, require_that_selectBest_gives_appropriate_results_for_single_thread_with_filter) { - TEST_DO(equal(1u, hit_vec({{1, 5}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}}), thread_id))); - TEST_DO(equal(2u, hit_vec({{1, 5}, {3, 3}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}, {3, 3}}), thread_id))); - TEST_DO(equal(3u, hit_vec({{1, 5}, {3, 3}, {5, 1}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}, {6, 0}}), thread_id))); + constexpr size_t num_threads = 1; + constexpr size_t thread_id = 0; + MatchLoopCommunicator f1(num_threads, 3, std::make_unique<EveryOdd>(), nullptr, do_nothing); + EXPECT_EQ(hit_vec({{1, 5}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}}), thread_id)); + EXPECT_EQ(hit_vec({{1, 5}, {3, 3}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}, {3, 3}}), thread_id)); + EXPECT_EQ(hit_vec({{1, 5}, {3, 3}, {5, 1}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}, {6, 0}}), thread_id)); } -TEST_MT_F("require that selectBest works with no hits", 10, MatchLoopCommunicator(num_threads, 10)) { - EXPECT_TRUE(selectBest(f1, hit_vec({}), thread_id).empty()); +TEST(MatchLoopCommunicatorTest, require_that_selectBest_works_with_no_hits) +{ + constexpr size_t num_threads = 10; + MatchLoopCommunicator f1(num_threads, 10); + auto task = [&f1](Nexus& ctx) { + EXPECT_TRUE(selectBest(f1, hit_vec({}), ctx.thread_id()).empty()); + }; + Nexus::run(num_threads, task); } -TEST_MT_F("require that selectBest works with too many hits from all threads", 5, MatchLoopCommunicator(num_threads, 13)) { - if (thread_id < 3) { - TEST_DO(equal(3u, makeScores(thread_id), selectBest(f1, makeScores(thread_id), thread_id))); - } else { - TEST_DO(equal(2u, makeScores(thread_id), selectBest(f1, makeScores(thread_id), thread_id))); - } +TEST(MatchLoopCommunicatorTest, require_that_selectBest_works_with_too_many_hits_from_all_threads) +{ + constexpr size_t num_threads = 5; + MatchLoopCommunicator f1(num_threads, 13); + auto task = [&f1](Nexus& ctx) { + auto thread_id = ctx.thread_id(); + if (thread_id < 3) { + EXPECT_EQ(make_first_scores(thread_id, 3), selectBest(f1, makeScores(thread_id), thread_id)); + } else { + EXPECT_EQ(make_first_scores(thread_id, 2), selectBest(f1, makeScores(thread_id), thread_id)); + } + }; + Nexus::run(num_threads, task); } -TEST_MT_F("require that selectBest works with some exhausted threads", 5, MatchLoopCommunicator(num_threads, 22)) { - if (thread_id < 2) { - TEST_DO(equal(5u, makeScores(thread_id), selectBest(f1, makeScores(thread_id), thread_id))); - } else { - TEST_DO(equal(4u, makeScores(thread_id), selectBest(f1, makeScores(thread_id), thread_id))); - } +TEST(MatchLoopCommunicatorTest, require_that_selectBest_works_with_some_exhausted_threads) +{ + constexpr size_t num_threads = 5; + MatchLoopCommunicator f1(num_threads, 22); + auto task = [&f1](Nexus& ctx) { + auto thread_id = ctx.thread_id(); + if (thread_id < 2) { + EXPECT_EQ(makeScores(thread_id), selectBest(f1, makeScores(thread_id), thread_id)); + } else { + EXPECT_EQ(make_first_scores(thread_id, 4), selectBest(f1, makeScores(thread_id), thread_id)); + } + }; + Nexus::run(num_threads, task); } -TEST_MT_F("require that selectBest can select all hits from all threads", 5, MatchLoopCommunicator(num_threads, 100)) { - EXPECT_EQUAL(5u, selectBest(f1, makeScores(thread_id), thread_id).size()); +TEST(MatchLoopCommunicatorTest, require_that_selectBest_can_select_all_hits_from_all_threads) +{ + constexpr size_t num_threads = 5; + MatchLoopCommunicator f1(num_threads, 100); + auto task = [&f1](Nexus& ctx) { + auto thread_id = ctx.thread_id(); + EXPECT_EQ(5u, selectBest(f1, makeScores(thread_id), thread_id).size()); + }; + Nexus::run(num_threads, task); } -TEST_MT_F("require that selectBest works with some empty threads", 10, MatchLoopCommunicator(num_threads, 7)) { - if (thread_id < 2) { - TEST_DO(equal(2u, makeScores(thread_id), selectBest(f1, makeScores(thread_id), thread_id))); - } else if (thread_id < 5) { - TEST_DO(equal(1u, makeScores(thread_id), selectBest(f1, makeScores(thread_id), thread_id))); - } else { - EXPECT_TRUE(selectBest(f1, makeScores(thread_id), thread_id).empty()); - } +TEST(MatchLoopCommunicatorTest, require_that_selectBest_works_with_some_empty_threads) +{ + constexpr size_t num_threads = 5; + MatchLoopCommunicator f1(num_threads, 7); + auto task = [&f1](Nexus& ctx) { + auto thread_id = ctx.thread_id(); + if (thread_id < 2) { + EXPECT_EQ(make_first_scores(thread_id, 2), selectBest(f1, makeScores(thread_id), thread_id)); + } else if (thread_id < 5) { + EXPECT_EQ(make_first_scores(thread_id, 1), selectBest(f1, makeScores(thread_id), thread_id)); + } else { + EXPECT_TRUE(selectBest(f1, makeScores(thread_id), thread_id).empty()); + } + }; + Nexus::run(num_threads, task); } -TEST_F("require that rangeCover works with a single thread", MatchLoopCommunicator(num_threads, 5)) { +TEST(MatchLoopCommunicatorTest, require_that_rangeCover_works_with_a_single_thread) +{ + constexpr size_t num_threads = 1; + constexpr size_t thread_id = 0; + MatchLoopCommunicator f1(num_threads, 5); RangePair res = rangeCover(f1, hit_vec({{1, 7.5}, {2, 1.5}}), thread_id, 10); - TEST_DO(equal_ranges(RangePair({1.5, 7.5}, {11.5, 17.5}), res)); + EXPECT_EQ(RangePair({1.5, 7.5}, {11.5, 17.5}), res); } -TEST_MT_F("require that rangeCover works with multiple threads", 5, MatchLoopCommunicator(num_threads, 10)) { - RangePair res = rangeCover(f1, hit_vec({{thread_id * 100 + 1, 100.0 + thread_id}, {thread_id * 100 + 2, 100.0 - thread_id}}), thread_id, 10); - TEST_DO(equal_ranges(RangePair({96.0, 104.0}, {106.0, 114.0}), res)); +TEST(MatchLoopCommunicatorTest, require_that_rangeCover_works_with_multiple_threads) +{ + constexpr size_t num_threads = 5; + MatchLoopCommunicator f1(num_threads, 10); + auto task = [&f1](Nexus& ctx) { + auto thread_id = ctx.thread_id(); + RangePair res = rangeCover(f1, hit_vec({{thread_id * 100 + 1, 100.0 + thread_id}, {thread_id * 100 + 2, 100.0 - thread_id}}), thread_id, 10); + EXPECT_EQ(RangePair({96.0, 104.0}, {106.0, 114.0}), res); + }; + Nexus::run(num_threads, task); } -TEST_MT_F("require that rangeCover works with no hits", 10, MatchLoopCommunicator(num_threads, 5)) { - RangePair res = rangeCover(f1, hit_vec({}), thread_id, 10); - TEST_DO(equal_ranges(RangePair({}, {}), res)); +TEST(MatchLoopCommunicatorTest, require_that_rangeCover_works_with_no_hits) +{ + constexpr size_t num_threads = 10; + MatchLoopCommunicator f1(num_threads, 5); + auto task = [&f1](Nexus& ctx) { + auto thread_id = ctx.thread_id(); + RangePair res = rangeCover(f1, hit_vec({}), thread_id, 10); + EXPECT_EQ(RangePair({}, {}), res); + }; + Nexus::run(num_threads, task); } -TEST_FFF("require that hits dropped due to lack of diversity affects range cover result", - MatchLoopCommunicator(num_threads, 3), - MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>()), - MatchLoopCommunicator(num_threads, 3, std::make_unique<None>())) +TEST(MatchLoopCommunicatorTest, require_that_hits_dropped_due_to_lack_of_diversity_affects_range_cover_result) { + constexpr size_t num_threads = 1; + constexpr size_t thread_id = 0; + MatchLoopCommunicator f1(num_threads, 3); + MatchLoopCommunicator f2(num_threads, 3, std::make_unique<EveryOdd>(), nullptr, do_nothing); + MatchLoopCommunicator f3(num_threads, 3, std::make_unique<None>(), nullptr, do_nothing); auto hits_in = hit_vec({{1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}}); auto [my_work1, hits1, ranges1] = second_phase(f1, hits_in, thread_id, 10); auto [my_work2, hits2, ranges2] = second_phase(f2, hits_in, thread_id, 10); auto [my_work3, hits3, ranges3] = second_phase(f3, hits_in, thread_id, 10); - EXPECT_EQUAL(my_work1, 3u); - EXPECT_EQUAL(my_work2, 3u); - EXPECT_EQUAL(my_work3, 0u); + EXPECT_EQ(my_work1, 3u); + EXPECT_EQ(my_work2, 3u); + EXPECT_EQ(my_work3, 0u); - TEST_DO(equal(3u, hit_vec({{1, 15}, {2, 14}, {3, 13}}), hits1)); - TEST_DO(equal(3u, hit_vec({{1, 15}, {3, 13}, {5, 11}}), hits2)); - TEST_DO(equal(0u, hit_vec({}), hits3)); + EXPECT_EQ(hit_vec({{1, 15}, {2, 14}, {3, 13}}), hits1); + EXPECT_EQ(hit_vec({{1, 15}, {3, 13}, {5, 11}}), hits2); + EXPECT_EQ(hit_vec({}), hits3); - TEST_DO(equal_ranges(RangePair({3,5},{13,15}), ranges1)); - TEST_DO(equal_ranges(RangePair({4,5},{11,15}), ranges2)); // best dropped: 4 + EXPECT_EQ(RangePair({3,5},{13,15}), ranges1); + EXPECT_EQ(RangePair({4,5},{11,15}), ranges2); // best dropped: 4 // note that the 'drops all hits due to diversity' case will // trigger much of the same code path as dropping second phase // ranking due to hard doom. - TEST_DO(equal_ranges(RangePair({},{}), ranges3)); + EXPECT_EQ(RangePair({},{}), ranges3); } -TEST_MT_F("require that estimate_match_frequency will count hits and docs across threads", 4, MatchLoopCommunicator(num_threads, 5)) { - double freq = (0.0/10.0 + 1.0/11.0 + 2.0/12.0 + 3.0/13.0) / 4.0; - EXPECT_APPROX(freq, f1.estimate_match_frequency(Matches(thread_id, thread_id + 10)), 0.00001); +TEST(MatchLoopCommunicatorTest, require_that_estimate_match_frequency_will_count_hits_and_docs_across_threads) +{ + constexpr size_t num_threads = 4; + MatchLoopCommunicator f1(num_threads, 5); + auto task = [&f1](Nexus& ctx) { + auto thread_id = ctx.thread_id(); + double freq = (0.0/10.0 + 1.0/11.0 + 2.0/12.0 + 3.0/13.0) / 4.0; + EXPECT_NEAR(freq, f1.estimate_match_frequency(Matches(thread_id, thread_id + 10)), 0.00001); + }; + Nexus::run(num_threads, task); } -TEST_MT_F("require that second phase work is evenly distributed among search threads", 5, MatchLoopCommunicator(num_threads, 20)) { - size_t num_hits = thread_id * 5; - size_t docid = thread_id * 100; - double score = thread_id * 100.0; - Hits my_hits; - for(size_t i = 0; i < num_hits; ++i) { - my_hits.emplace_back(++docid, score); - score -= 1.0; - } - auto [my_work, best_hits, ranges] = second_phase(f1, my_hits, thread_id, 1000.0); - EXPECT_EQUAL(my_work, 4u); - TEST_DO(equal_ranges(RangePair({381,400},{1381,1400}), ranges)); - if (thread_id == 4) { - for (auto &hit: my_hits) { - hit.second += 1000.0; - } - TEST_DO(equal(num_hits, my_hits, best_hits)); - } else { - EXPECT_TRUE(best_hits.empty()); +TEST(MatchLoopCommunicatorTest, require_that_second_phase_work_is_evenly_distributed_among_search_threads) +{ + constexpr size_t num_threads = 5; + MatchLoopCommunicator f1(num_threads, 20); + auto task = [&f1](Nexus& ctx) { + auto thread_id = ctx.thread_id(); + size_t num_hits = thread_id * 5; + size_t docid = thread_id * 100; + double score = thread_id * 100.0; + Hits my_hits; + for(size_t i = 0; i < num_hits; ++i) { + my_hits.emplace_back(++docid, score); + score -= 1.0; + } + auto [my_work, best_hits, ranges] = second_phase(f1, my_hits, thread_id, 1000.0); + EXPECT_EQ(my_work, 4u); + EXPECT_EQ(RangePair({381,400},{1381,1400}), ranges); + if (thread_id == 4) { + for (auto &hit: my_hits) { + hit.second += 1000.0; + } + EXPECT_EQ(my_hits, best_hits); + } else { + EXPECT_TRUE(best_hits.empty()); + } + }; + Nexus::run(num_threads, task); +} + +namespace { + +std::vector<double> extract_ranks(const FirstPhaseRankLookup& l) { + std::vector<double> result; + for (uint32_t docid = 21; docid < 26; ++docid) { + result.emplace_back(l.lookup(docid)); } + return result; +} + +search::feature_t unranked = std::numeric_limits<search::feature_t>::max(); + +using FeatureVec = std::vector<search::feature_t>; + +} + +TEST(MatchLoopCommunicatorTest, require_that_first_phase_rank_lookup_is_populated) +{ + constexpr size_t num_threads = 1; + constexpr size_t thread_id = 0; + FirstPhaseRankLookup l1; + FirstPhaseRankLookup l2; + MatchLoopCommunicator f1(num_threads, 3, {}, &l1, do_nothing); + MatchLoopCommunicator f2(num_threads, 3, std::make_unique<EveryOdd>(), &l2, do_nothing); + auto hits_in = hit_vec({{21, 5}, {22, 4}, {23, 3}, {24, 2}, {25, 1}}); + auto res1 = second_phase(f1, hits_in, thread_id, 10); + auto res2 = second_phase(f2, hits_in, thread_id, 10); + EXPECT_EQ(FeatureVec({1, 2, 3, unranked, unranked}), extract_ranks(l1)); + EXPECT_EQ(FeatureVec({1, unranked, 3, unranked, 5}), extract_ranks(l2)); +} + +TEST(MatchLoopCommunicatorTest, require_that_before_second_phase_is_called_once) +{ + constexpr size_t num_threads = 5; + std::atomic<int> cnt(0); + auto before_second_phase = [&cnt]() noexcept { ++cnt; }; + MatchLoopCommunicator f1(num_threads, 3, {}, nullptr, before_second_phase); + auto task = [&f1](Nexus& ctx) { + auto thread_id = ctx.thread_id(); + auto hits_in = hit_vec({}); + (void) second_phase(f1, hits_in, thread_id, 1000.0); + }; + Nexus::run(num_threads, task); + EXPECT_EQ(1, cnt.load(std::memory_order_acquire)); } -TEST_MAIN() { TEST_RUN_ALL(); } +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index 6dd8a93bcbd..fc0e5acdaa0 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -31,9 +31,10 @@ #include <vespa/eval/eval/simple_value.h> #include <vespa/eval/eval/tensor_spec.h> #include <vespa/eval/eval/value_codec.h> +#include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/objects/nbostream.h> -#include <vespa/vespalib/testkit/testapp.h> #include <vespa/vespalib/util/featureset.h> +#include <vespa/vespalib/util/limited_thread_bundle_wrapper.h> #include <vespa/vespalib/util/simple_thread_bundle.h> #include <vespa/vespalib/util/testclock.h> #include <vespa/vespalib/stllike/asciistream.h> @@ -65,6 +66,97 @@ using vespalib::eval::TensorSpec; using vespalib::FeatureSet; using vespalib::nbostream; +constexpr uint32_t NUM_DOCS = 1000; + +class MatchingTestSharedState { + std::unique_ptr<vespalib::SimpleThreadBundle> _thread_bundle; + std::unique_ptr<MockAttributeContext> _attribute_context; + std::unique_ptr<DocumentMetaStore> _meta_store; +public: + static constexpr size_t max_threads = 75; + MatchingTestSharedState(); + ~MatchingTestSharedState(); + vespalib::ThreadBundle& thread_bundle(); + IAttributeContext& attribute_context(); + const proton::IDocumentMetaStore& meta_store(); +}; + +MatchingTestSharedState::MatchingTestSharedState() + : _thread_bundle(), + _attribute_context(), + _meta_store() +{ +} + +MatchingTestSharedState::~MatchingTestSharedState() = default; + +vespalib::ThreadBundle& +MatchingTestSharedState::thread_bundle() +{ + if (!_thread_bundle) { + _thread_bundle = std::make_unique<vespalib::SimpleThreadBundle>(max_threads); + } + return *_thread_bundle; +} + +IAttributeContext& +MatchingTestSharedState::attribute_context() +{ + if (!_attribute_context) { + _attribute_context = std::make_unique<MockAttributeContext>(); + // attribute context + { + auto attr = std::make_shared<SingleInt32ExtAttribute>("a1"); + AttributeVector::DocId docid(0); + for (uint32_t i = 0; i < NUM_DOCS; ++i) { + attr->addDoc(docid); + attr->add(i, docid); // value = docid + } + assert(docid + 1 == NUM_DOCS); + _attribute_context->add(attr); + } + { + auto attr = std::make_shared<SingleInt32ExtAttribute>("a2"); + AttributeVector::DocId docid(0); + for (uint32_t i = 0; i < NUM_DOCS; ++i) { + attr->addDoc(docid); + attr->add(i * 2, docid); // value = docid * 2 + } + assert(docid + 1 == NUM_DOCS); + _attribute_context->add(attr); + } + { + auto attr = std::make_shared<SingleInt32ExtAttribute>("a3"); + AttributeVector::DocId docid(0); + for (uint32_t i = 0; i < NUM_DOCS; ++i) { + attr->addDoc(docid); + attr->add(i%10, docid); + } + assert(docid + 1 == NUM_DOCS); + _attribute_context->add(attr); + } + } + return *_attribute_context; +} + +const proton::IDocumentMetaStore& +MatchingTestSharedState::meta_store() +{ + if (!_meta_store) { + _meta_store = std::make_unique<DocumentMetaStore>(std::make_shared<bucketdb::BucketDBOwner>()); + // metaStore + for (uint32_t i = 0; i < NUM_DOCS; ++i) { + document::DocumentId docId(vespalib::make_string("id:ns:searchdocument::%u", i)); + const document::GlobalId &gid = docId.getGlobalId(); + document::BucketId bucketId(BucketFactory::getBucketId(docId)); + uint32_t docSize = 1; + _meta_store->put(gid, bucketId, Timestamp(0u), docSize, i, 0u); + _meta_store->setBucketState(bucketId, true); + } + } + return *_meta_store; +} + vespalib::ThreadBundle &ttb() { return vespalib::ThreadBundle::trivial(); } void inject_match_phase_limiting(Properties &setup, const vespalib::string &attribute, size_t max_hits, bool descending) @@ -106,8 +198,6 @@ vespalib::string make_same_element_stack_dump(const vespalib::string &a1_term, c //----------------------------------------------------------------------------- -const uint32_t NUM_DOCS = 1000; - struct EmptyRankingAssetsRepo : public search::fef::IRankingAssetsRepo { vespalib::eval::ConstantValue::UP getConstant(const vespalib::string &) const override { return {}; @@ -125,18 +215,19 @@ struct EmptyRankingAssetsRepo : public search::fef::IRankingAssetsRepo { //----------------------------------------------------------------------------- struct MyWorld { + MatchingTestSharedState& shared_state; Schema schema; Properties config; FakeSearchContext searchContext; - MockAttributeContext attributeContext; + IAttributeContext& attributeContext; std::shared_ptr<SessionManager> sessionManager; - DocumentMetaStore metaStore; + const proton::IDocumentMetaStore& metaStore; MatchingStats matchingStats; vespalib::TestClock clock; QueryLimiter queryLimiter; EmptyRankingAssetsRepo constantValueRepo; - MyWorld(); + MyWorld(MatchingTestSharedState& shared_state); ~MyWorld(); void basicSetup(size_t heapSize=10, size_t arraySize=100) { @@ -172,50 +263,9 @@ struct MyWorld { // odd -> 1 } - // attribute context - { - auto attr = std::make_shared<SingleInt32ExtAttribute>("a1"); - AttributeVector::DocId docid(0); - for (uint32_t i = 0; i < NUM_DOCS; ++i) { - attr->addDoc(docid); - attr->add(i, docid); // value = docid - } - assert(docid + 1 == NUM_DOCS); - attributeContext.add(attr); - } - { - auto attr = std::make_shared<SingleInt32ExtAttribute>("a2"); - AttributeVector::DocId docid(0); - for (uint32_t i = 0; i < NUM_DOCS; ++i) { - attr->addDoc(docid); - attr->add(i * 2, docid); // value = docid * 2 - } - assert(docid + 1 == NUM_DOCS); - attributeContext.add(attr); - } - { - auto attr = std::make_shared<SingleInt32ExtAttribute>("a3"); - AttributeVector::DocId docid(0); - for (uint32_t i = 0; i < NUM_DOCS; ++i) { - attr->addDoc(docid); - attr->add(i%10, docid); - } - assert(docid + 1 == NUM_DOCS); - attributeContext.add(attr); - } - // grouping sessionManager = std::make_shared<SessionManager>(100); - // metaStore - for (uint32_t i = 0; i < NUM_DOCS; ++i) { - document::DocumentId docId(vespalib::make_string("id:ns:searchdocument::%u", i)); - const document::GlobalId &gid = docId.getGlobalId(); - document::BucketId bucketId(BucketFactory::getBucketId(docId)); - uint32_t docSize = 1; - metaStore.put(gid, bucketId, Timestamp(0u), docSize, i, 0u); - metaStore.setBucketState(bucketId, true); - } } void set_property(const vespalib::string &name, const vespalib::string &value) { @@ -241,30 +291,30 @@ struct MyWorld { static void verify_match_features(SearchReply &reply, const vespalib::string &matched_field) { if (reply.hits.empty()) { - EXPECT_EQUAL(reply.match_features.names.size(), 0u); - EXPECT_EQUAL(reply.match_features.values.size(), 0u); + EXPECT_EQ(reply.match_features.names.size(), 0u); + EXPECT_EQ(reply.match_features.values.size(), 0u); } else { - ASSERT_EQUAL(reply.match_features.names.size(), 5u); - EXPECT_EQUAL(reply.match_features.names[0], "attribute(a1)"); - EXPECT_EQUAL(reply.match_features.names[1], "attribute(a2)"); - EXPECT_EQUAL(reply.match_features.names[2], "matches(a1)"); - EXPECT_EQUAL(reply.match_features.names[3], "matches(f1)"); - EXPECT_EQUAL(reply.match_features.names[4], "rankingExpression(\"tensor(x[3])(x)\")"); - ASSERT_EQUAL(reply.match_features.values.size(), 5 * reply.hits.size()); + ASSERT_EQ(reply.match_features.names.size(), 5u); + EXPECT_EQ(reply.match_features.names[0], "attribute(a1)"); + EXPECT_EQ(reply.match_features.names[1], "attribute(a2)"); + EXPECT_EQ(reply.match_features.names[2], "matches(a1)"); + EXPECT_EQ(reply.match_features.names[3], "matches(f1)"); + EXPECT_EQ(reply.match_features.names[4], "rankingExpression(\"tensor(x[3])(x)\")"); + ASSERT_EQ(reply.match_features.values.size(), 5 * reply.hits.size()); for (size_t i = 0; i < reply.hits.size(); ++i) { const auto *f = &reply.match_features.values[i * 5]; - EXPECT_GREATER(f[0].as_double(), 0.0); - EXPECT_GREATER(f[1].as_double(), 0.0); - EXPECT_EQUAL(f[0].as_double(), reply.hits[i].metric); - EXPECT_EQUAL(f[0].as_double() * 2, f[1].as_double()); - EXPECT_EQUAL(f[2].as_double(), double(matched_field == "a1")); - EXPECT_EQUAL(f[3].as_double(), double(matched_field == "f1")); + EXPECT_GT(f[0].as_double(), 0.0); + EXPECT_GT(f[1].as_double(), 0.0); + EXPECT_EQ(f[0].as_double(), reply.hits[i].metric); + EXPECT_EQ(f[0].as_double() * 2, f[1].as_double()); + EXPECT_EQ(f[2].as_double(), double(matched_field == "a1")); + EXPECT_EQ(f[3].as_double(), double(matched_field == "f1")); EXPECT_TRUE(f[4].is_data()); { nbostream buf(f[4].as_data().data, f[4].as_data().size); auto actual = spec_from_value(*SimpleValue::from_stream(buf)); auto expect = TensorSpec("tensor(x[3])").add({{"x", 0}}, 0).add({{"x", 1}}, 1).add({{"x", 2}}, 2); - EXPECT_EQUAL(actual, expect); + EXPECT_EQ(actual, expect); } } } @@ -272,16 +322,16 @@ struct MyWorld { static void verify_match_feature_renames(SearchReply &reply, const vespalib::string &matched_field) { if (reply.hits.empty()) { - EXPECT_EQUAL(reply.match_features.names.size(), 0u); - EXPECT_EQUAL(reply.match_features.values.size(), 0u); + EXPECT_EQ(reply.match_features.names.size(), 0u); + EXPECT_EQ(reply.match_features.values.size(), 0u); } else { - ASSERT_EQUAL(reply.match_features.names.size(), 5u); - EXPECT_EQUAL(reply.match_features.names[3], "foobar"); - EXPECT_EQUAL(reply.match_features.names[4], "tensor(x[3])(x)"); - ASSERT_EQUAL(reply.match_features.values.size(), 5 * reply.hits.size()); + ASSERT_EQ(reply.match_features.names.size(), 5u); + EXPECT_EQ(reply.match_features.names[3], "foobar"); + EXPECT_EQ(reply.match_features.names[4], "tensor(x[3])(x)"); + ASSERT_EQ(reply.match_features.values.size(), 5 * reply.hits.size()); for (size_t i = 0; i < reply.hits.size(); ++i) { const auto *f = &reply.match_features.values[i * 5]; - EXPECT_EQUAL(f[3].as_double(), double(matched_field == "f1")); + EXPECT_EQ(f[3].as_double(), double(matched_field == "f1")); EXPECT_TRUE(f[4].is_data()); } } @@ -378,7 +428,7 @@ struct MyWorld { auto mtf = matcher->create_match_tools_factory(req, searchContext, attributeContext, metaStore, overrides, ttb(), nullptr, searchContext.getDocIdLimit(), true); auto diversity = mtf->createDiversifier(HeapSize::lookup(config)); - EXPECT_EQUAL(expectDiverse, static_cast<bool>(diversity)); + EXPECT_EQ(expectDiverse, static_cast<bool>(diversity)); } double get_first_phase_termwise_limit() { @@ -397,7 +447,8 @@ struct MyWorld { SearchSession::OwnershipBundle owned_objects({std::make_unique<MockAttributeContext>(), std::make_unique<FakeSearchContext>()}, std::make_shared<MySearchHandler>(matcher)); - vespalib::SimpleThreadBundle threadBundle(threads); + assert(threads <= MatchingTestSharedState::max_threads); + vespalib::LimitedThreadBundleWrapper threadBundle(shared_state.thread_bundle(), threads); SearchReply::UP reply = matcher->match(req, threadBundle, searchContext, attributeContext, *sessionManager, metaStore, metaStore.getBucketDB(), std::move(owned_objects)); @@ -449,13 +500,14 @@ struct MyWorld { } }; -MyWorld::MyWorld() - : schema(), +MyWorld::MyWorld(MatchingTestSharedState& shared_state_in) + : shared_state(shared_state_in), + schema(), config(), searchContext(), - attributeContext(), + attributeContext(shared_state.attribute_context()), sessionManager(), - metaStore(std::make_shared<bucketdb::BucketDBOwner>()), + metaStore(shared_state.meta_store()), matchingStats(), clock(), queryLimiter() @@ -469,30 +521,66 @@ void verifyViewResolver(const ViewResolver &resolver) { std::vector<vespalib::string> fields; EXPECT_TRUE(resolver.resolve("foo", fields)); ASSERT_TRUE(fields.size() == 2u); - EXPECT_EQUAL("x", fields[0]); - EXPECT_EQUAL("y", fields[1]); + EXPECT_EQ("x", fields[0]); + EXPECT_EQ("y", fields[1]); } { std::vector<vespalib::string> fields; EXPECT_TRUE(resolver.resolve("bar", fields)); ASSERT_TRUE(fields.size() == 1u); - EXPECT_EQUAL("z", fields[0]); + EXPECT_EQ("z", fields[0]); } { std::vector<vespalib::string> fields; EXPECT_TRUE(!resolver.resolve("baz", fields)); ASSERT_TRUE(fields.size() == 1u); - EXPECT_EQUAL("baz", fields[0]); + EXPECT_EQ("baz", fields[0]); } } -TEST("require that view resolver can be set up directly") { +class MatchingTest : public ::testing::Test { + static std::unique_ptr<MatchingTestSharedState> _shared_state; +protected: + MatchingTest(); + ~MatchingTest() override; + static void SetUpTestSuite(); + static void TearDownTestSuite(); + static MatchingTestSharedState& shared_state(); +}; + +MatchingTest::MatchingTest() = default; + +MatchingTest::~MatchingTest() = default; + +void +MatchingTest::SetUpTestSuite() +{ + _shared_state = std::make_unique<MatchingTestSharedState>(); +} + +void +MatchingTest::TearDownTestSuite() +{ + _shared_state.reset(); +} + +MatchingTestSharedState& +MatchingTest::shared_state() +{ + return *_shared_state; +} + +std::unique_ptr<MatchingTestSharedState> MatchingTest::_shared_state; + +TEST_F(MatchingTest, require_that_view_resolver_can_be_set_up_directly) +{ ViewResolver resolver; resolver.add("foo", "x").add("foo", "y").add("bar", "z"); - TEST_DO(verifyViewResolver(resolver)); + verifyViewResolver(resolver); } -TEST("require that view resolver can be set up from schema") { +TEST_F(MatchingTest, require_that_view_resolver_can_be_set_up_from_schema) +{ Schema schema; Schema::FieldSet foo("foo"); foo.addField("x").addField("y"); @@ -501,124 +589,132 @@ TEST("require that view resolver can be set up from schema") { schema.addFieldSet(foo); schema.addFieldSet(bar); ViewResolver resolver = ViewResolver::createFromSchema(schema); - TEST_DO(verifyViewResolver(resolver)); + verifyViewResolver(resolver); } //----------------------------------------------------------------------------- -TEST("require that matching is performed (multi-threaded)") { +TEST_F(MatchingTest, require_that_matching_is_performed_with_multi_threaded_matcher) +{ for (size_t threads = 1; threads <= 16; ++threads) { - MyWorld world; + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); SearchRequest::SP request = MyWorld::createSimpleRequest("f1", "spread"); SearchReply::UP reply = world.performSearch(*request, threads); - EXPECT_EQUAL(9u, world.matchingStats.docsMatched()); - EXPECT_EQUAL(9u, reply->hits.size()); - EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001); + EXPECT_EQ(9u, world.matchingStats.docsMatched()); + EXPECT_EQ(9u, reply->hits.size()); + EXPECT_GT(world.matchingStats.matchTimeAvg(), 0.0000001); } } -TEST("require that match features are calculated (multi-threaded)") { +TEST_F(MatchingTest, require_that_match_features_are_calculated_with_multi_threaded_matcher) +{ for (size_t threads = 1; threads <= 16; ++threads) { - MyWorld world; + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); world.setup_match_features(); SearchRequest::SP request = MyWorld::createSimpleRequest("f1", "spread"); SearchReply::UP reply = world.performSearch(*request, threads); - EXPECT_GREATER(reply->hits.size(), 0u); + EXPECT_GT(reply->hits.size(), 0u); MyWorld::verify_match_features(*reply, "f1"); } } -TEST("require that match features can be renamed") { - MyWorld world; +TEST_F(MatchingTest, require_that_match_features_can_be_renamed) +{ + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); world.setup_match_features(); world.setup_feature_renames(); SearchRequest::SP request = MyWorld::createSimpleRequest("f1", "spread"); SearchReply::UP reply = world.performSearch(*request, 1); - EXPECT_GREATER(reply->hits.size(), 0u); + EXPECT_GT(reply->hits.size(), 0u); MyWorld::verify_match_feature_renames(*reply, "f1"); } -TEST("require that no hits gives no match feature names") { - MyWorld world; +TEST_F(MatchingTest, require_that_no_hits_gives_no_match_feature_names) + { + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); world.setup_match_features(); SearchRequest::SP request = MyWorld::createSimpleRequest("f1", "not_found"); SearchReply::UP reply = world.performSearch(*request, 1); - EXPECT_EQUAL(reply->hits.size(), 0u); + EXPECT_EQ(reply->hits.size(), 0u); MyWorld::verify_match_features(*reply, "f1"); } -TEST("require that matching also returns hits when only bitvector is used (multi-threaded)") { +TEST_F(MatchingTest, require_that_matching_also_returns_hits_when_only_bitvector_is_used_with_multi_threaded_matcher) + { for (size_t threads = 1; threads <= 16; ++threads) { - MyWorld world; + MyWorld world(shared_state()); world.basicSetup(0, 0); world.verbose_a1_result("all"); SearchRequest::SP request = MyWorld::createSimpleRequest("a1", "all"); SearchReply::UP reply = world.performSearch(*request, threads); - EXPECT_EQUAL(985u, world.matchingStats.docsMatched()); - EXPECT_EQUAL(10u, reply->hits.size()); - EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001); + EXPECT_EQ(985u, world.matchingStats.docsMatched()); + EXPECT_EQ(10u, reply->hits.size()); + EXPECT_GT(world.matchingStats.matchTimeAvg(), 0.0000001); } } -TEST("require that ranking is performed (multi-threaded)") { +TEST_F(MatchingTest, require_that_ranking_is_performed_with_multi_threaded_matcher) + { for (size_t threads = 1; threads <= 16; ++threads) { - MyWorld world; + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); SearchRequest::SP request = MyWorld::createSimpleRequest("f1", "spread"); SearchReply::UP reply = world.performSearch(*request, threads); - EXPECT_EQUAL(9u, world.matchingStats.docsMatched()); - EXPECT_EQUAL(9u, world.matchingStats.docsRanked()); - EXPECT_EQUAL(0u, world.matchingStats.docsReRanked()); + EXPECT_EQ(9u, world.matchingStats.docsMatched()); + EXPECT_EQ(9u, world.matchingStats.docsRanked()); + EXPECT_EQ(0u, world.matchingStats.docsReRanked()); ASSERT_TRUE(reply->hits.size() == 9u); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::900").getGlobalId(), reply->hits[0].gid); - EXPECT_EQUAL(900.0, reply->hits[0].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::800").getGlobalId(), reply->hits[1].gid); - EXPECT_EQUAL(800.0, reply->hits[1].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::700").getGlobalId(), reply->hits[2].gid); - EXPECT_EQUAL(700.0, reply->hits[2].metric); - EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001); - EXPECT_EQUAL(0.0, world.matchingStats.rerankTimeAvg()); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::900").getGlobalId(), reply->hits[0].gid); + EXPECT_EQ(900.0, reply->hits[0].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::800").getGlobalId(), reply->hits[1].gid); + EXPECT_EQ(800.0, reply->hits[1].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::700").getGlobalId(), reply->hits[2].gid); + EXPECT_EQ(700.0, reply->hits[2].metric); + EXPECT_GT(world.matchingStats.matchTimeAvg(), 0.0000001); + EXPECT_EQ(0.0, world.matchingStats.rerankTimeAvg()); } } -TEST("require that re-ranking is performed (multi-threaded)") { +TEST_F(MatchingTest, require_that_reranking_is_performed_with_multi_threaded_matcher) + { for (size_t threads = 1; threads <= 16; ++threads) { - MyWorld world; + MyWorld world(shared_state()); world.basicSetup(); world.setupSecondPhaseRanking(); world.basicResults(); SearchRequest::SP request = MyWorld::createSimpleRequest("f1", "spread"); SearchReply::UP reply = world.performSearch(*request, threads); - EXPECT_EQUAL(9u, world.matchingStats.docsMatched()); - EXPECT_EQUAL(9u, world.matchingStats.docsRanked()); - EXPECT_EQUAL(3u, world.matchingStats.docsReRanked()); + EXPECT_EQ(9u, world.matchingStats.docsMatched()); + EXPECT_EQ(9u, world.matchingStats.docsRanked()); + EXPECT_EQ(3u, world.matchingStats.docsReRanked()); ASSERT_TRUE(reply->hits.size() == 9u); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::900").getGlobalId(), reply->hits[0].gid); - EXPECT_EQUAL(1800.0, reply->hits[0].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::800").getGlobalId(), reply->hits[1].gid); - EXPECT_EQUAL(1600.0, reply->hits[1].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::700").getGlobalId(), reply->hits[2].gid); - EXPECT_EQUAL(1400.0, reply->hits[2].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::600").getGlobalId(), reply->hits[3].gid); - EXPECT_EQUAL(600.0, reply->hits[3].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::500").getGlobalId(), reply->hits[4].gid); - EXPECT_EQUAL(500.0, reply->hits[4].metric); - EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001); - EXPECT_GREATER(world.matchingStats.rerankTimeAvg(), 0.0000001); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::900").getGlobalId(), reply->hits[0].gid); + EXPECT_EQ(1800.0, reply->hits[0].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::800").getGlobalId(), reply->hits[1].gid); + EXPECT_EQ(1600.0, reply->hits[1].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::700").getGlobalId(), reply->hits[2].gid); + EXPECT_EQ(1400.0, reply->hits[2].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::600").getGlobalId(), reply->hits[3].gid); + EXPECT_EQ(600.0, reply->hits[3].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::500").getGlobalId(), reply->hits[4].gid); + EXPECT_EQ(500.0, reply->hits[4].metric); + EXPECT_GT(world.matchingStats.matchTimeAvg(), 0.0000001); + EXPECT_GT(world.matchingStats.rerankTimeAvg(), 0.0000001); } } -TEST("require that re-ranking is not diverse when not requested to be.") { - MyWorld world; +TEST_F(MatchingTest, require_that_reranking_is_not_diverse_when_not_requested_to_be) +{ + MyWorld world(shared_state()); world.basicSetup(); world.setupSecondPhaseRanking(); world.basicResults(); @@ -628,8 +724,9 @@ TEST("require that re-ranking is not diverse when not requested to be.") { using namespace search::fef::indexproperties::matchphase; -TEST("require that re-ranking is diverse when requested to be") { - MyWorld world; +TEST_F(MatchingTest, require_that_reranking_is_diverse_when_requested_to_be) +{ + MyWorld world(shared_state()); world.basicSetup(); world.setupSecondPhaseRanking(); world.basicResults(); @@ -641,8 +738,9 @@ TEST("require that re-ranking is diverse when requested to be") { world.verify_diversity_filter(*request, true); } -TEST("require that re-ranking is diverse with diversity = 1/1") { - MyWorld world; +TEST_F(MatchingTest, require_that_reranking_is_diverse_with_diversity_1_of_1) +{ + MyWorld world(shared_state()); world.basicSetup(); world.setupSecondPhaseRanking(); world.basicResults(); @@ -652,24 +750,25 @@ TEST("require that re-ranking is diverse with diversity = 1/1") { .add(DiversityMinGroups::NAME, "3") .add(DiversityCutoffStrategy::NAME, "strict"); SearchReply::UP reply = world.performSearch(*request, 1); - EXPECT_EQUAL(9u, world.matchingStats.docsMatched()); - EXPECT_EQUAL(9u, world.matchingStats.docsRanked()); - EXPECT_EQUAL(3u, world.matchingStats.docsReRanked()); + EXPECT_EQ(9u, world.matchingStats.docsMatched()); + EXPECT_EQ(9u, world.matchingStats.docsRanked()); + EXPECT_EQ(3u, world.matchingStats.docsReRanked()); ASSERT_TRUE(reply->hits.size() == 9u); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::900").getGlobalId(), reply->hits[0].gid); - EXPECT_EQUAL(1800.0, reply->hits[0].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::800").getGlobalId(), reply->hits[1].gid); - EXPECT_EQUAL(1600.0, reply->hits[1].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::700").getGlobalId(), reply->hits[2].gid); - EXPECT_EQUAL(1400.0, reply->hits[2].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::600").getGlobalId(), reply->hits[3].gid); - EXPECT_EQUAL(600.0, reply->hits[3].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::500").getGlobalId(), reply->hits[4].gid); - EXPECT_EQUAL(500.0, reply->hits[4].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::900").getGlobalId(), reply->hits[0].gid); + EXPECT_EQ(1800.0, reply->hits[0].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::800").getGlobalId(), reply->hits[1].gid); + EXPECT_EQ(1600.0, reply->hits[1].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::700").getGlobalId(), reply->hits[2].gid); + EXPECT_EQ(1400.0, reply->hits[2].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::600").getGlobalId(), reply->hits[3].gid); + EXPECT_EQ(600.0, reply->hits[3].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::500").getGlobalId(), reply->hits[4].gid); + EXPECT_EQ(500.0, reply->hits[4].metric); } -TEST("require that re-ranking is diverse with diversity = 1/10") { - MyWorld world; +TEST_F(MatchingTest, require_that_reranking_is_diverse_with_diversity_1_of_10) + { + MyWorld world(shared_state()); world.basicSetup(); world.setupSecondPhaseRanking(); world.basicResults(); @@ -679,47 +778,50 @@ TEST("require that re-ranking is diverse with diversity = 1/10") { .add(DiversityMinGroups::NAME, "3") .add(DiversityCutoffStrategy::NAME, "strict"); SearchReply::UP reply = world.performSearch(*request, 1); - EXPECT_EQUAL(9u, world.matchingStats.docsMatched()); - EXPECT_EQUAL(9u, world.matchingStats.docsRanked()); - EXPECT_EQUAL(1u, world.matchingStats.docsReRanked()); + EXPECT_EQ(9u, world.matchingStats.docsMatched()); + EXPECT_EQ(9u, world.matchingStats.docsRanked()); + EXPECT_EQ(1u, world.matchingStats.docsReRanked()); ASSERT_TRUE(reply->hits.size() == 9u); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::900").getGlobalId(), reply->hits[0].gid); - EXPECT_EQUAL(1800.0, reply->hits[0].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::900").getGlobalId(), reply->hits[0].gid); + EXPECT_EQ(1800.0, reply->hits[0].metric); //TODO This is of course incorrect until the selectBest method sees everything. - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::800").getGlobalId(), reply->hits[1].gid); - EXPECT_EQUAL(800.0, reply->hits[1].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::700").getGlobalId(), reply->hits[2].gid); - EXPECT_EQUAL(700.0, reply->hits[2].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::600").getGlobalId(), reply->hits[3].gid); - EXPECT_EQUAL(600.0, reply->hits[3].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::500").getGlobalId(), reply->hits[4].gid); - EXPECT_EQUAL(500.0, reply->hits[4].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::800").getGlobalId(), reply->hits[1].gid); + EXPECT_EQ(800.0, reply->hits[1].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::700").getGlobalId(), reply->hits[2].gid); + EXPECT_EQ(700.0, reply->hits[2].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::600").getGlobalId(), reply->hits[3].gid); + EXPECT_EQ(600.0, reply->hits[3].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::500").getGlobalId(), reply->hits[4].gid); + EXPECT_EQ(500.0, reply->hits[4].metric); } -TEST("require that sortspec can be used (multi-threaded)") { +TEST_F(MatchingTest, require_that_sortspec_can_be_used_with_multi_threaded_matcher) +{ for (size_t threads = 1; threads <= 16; ++threads) { - MyWorld world; + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); SearchRequest::SP request = MyWorld::createSimpleRequest("f1", "spread"); request->sortSpec = "+a1"; SearchReply::UP reply = world.performSearch(*request, threads); - ASSERT_EQUAL(9u, reply->hits.size()); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::100").getGlobalId(), reply->hits[0].gid); - EXPECT_EQUAL(zero_rank_value, reply->hits[0].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::200").getGlobalId(), reply->hits[1].gid); - EXPECT_EQUAL(zero_rank_value, reply->hits[1].metric); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::300").getGlobalId(), reply->hits[2].gid); - EXPECT_EQUAL(zero_rank_value, reply->hits[2].metric); + ASSERT_EQ(9u, reply->hits.size()); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::100").getGlobalId(), reply->hits[0].gid); + EXPECT_EQ(zero_rank_value, reply->hits[0].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::200").getGlobalId(), reply->hits[1].gid); + EXPECT_EQ(zero_rank_value, reply->hits[1].metric); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::300").getGlobalId(), reply->hits[2].gid); + EXPECT_EQ(zero_rank_value, reply->hits[2].metric); EXPECT_FALSE(reply->sortIndex.empty()); EXPECT_FALSE(reply->sortData.empty()); } } ExpressionNode::UP createAttr() { return std::make_unique<AttributeNode>("a1"); } -TEST("require that grouping is performed (multi-threaded)") { + +TEST_F(MatchingTest, require_that_grouping_is_performed_with_multi_threaded_matcher) + { for (size_t threads = 1; threads <= 16; ++threads) { - MyWorld world; + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); SearchRequest::SP request = MyWorld::createSimpleRequest("f1", "spread"); @@ -739,120 +841,125 @@ TEST("require that grouping is performed (multi-threaded)") { vespalib::NBOSerializer is(buf); uint32_t n; is >> n; - EXPECT_EQUAL(1u, n); + EXPECT_EQ(1u, n); Grouping gresult; gresult.deserialize(is); Grouping gexpect; gexpect.setRoot(Group().addResult(SumAggregationResult() .setExpression(createAttr()) .setResult(Int64ResultNode(4500)))); - EXPECT_EQUAL(gexpect.root().asString(), gresult.root().asString()); + EXPECT_EQ(gexpect.root().asString(), gresult.root().asString()); } - EXPECT_GREATER(world.matchingStats.groupingTimeAvg(), 0.0000001); + EXPECT_GT(world.matchingStats.groupingTimeAvg(), 0.0000001); } } -TEST("require that summary features are filled") { - MyWorld world; +TEST_F(MatchingTest, require_that_summary_features_are_filled) +{ + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); DocsumRequest::SP req = MyWorld::createSimpleDocsumRequest("f1", "foo"); FeatureSet::SP fs = world.getSummaryFeatures(*req); const FeatureSet::Value * f = nullptr; - EXPECT_EQUAL(5u, fs->numFeatures()); - EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); - EXPECT_EQUAL("matches(f1)", fs->getNames()[1]); - EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); - EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); - EXPECT_EQUAL("value(100)", fs->getNames()[4]); - EXPECT_EQUAL(3u, fs->numDocs()); + EXPECT_EQ(5u, fs->numFeatures()); + EXPECT_EQ("attribute(a1)", fs->getNames()[0]); + EXPECT_EQ("matches(f1)", fs->getNames()[1]); + EXPECT_EQ("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); + EXPECT_EQ("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); + EXPECT_EQ("value(100)", fs->getNames()[4]); + EXPECT_EQ(3u, fs->numDocs()); f = fs->getFeaturesByDocId(10); EXPECT_TRUE(f != nullptr); - EXPECT_EQUAL(10, f[0].as_double()); - EXPECT_EQUAL(1, f[1].as_double()); - EXPECT_EQUAL(100, f[4].as_double()); + EXPECT_EQ(10, f[0].as_double()); + EXPECT_EQ(1, f[1].as_double()); + EXPECT_EQ(100, f[4].as_double()); f = fs->getFeaturesByDocId(15); EXPECT_TRUE(f != nullptr); - EXPECT_EQUAL(15, f[0].as_double()); - EXPECT_EQUAL(0, f[1].as_double()); - EXPECT_EQUAL(100, f[4].as_double()); + EXPECT_EQ(15, f[0].as_double()); + EXPECT_EQ(0, f[1].as_double()); + EXPECT_EQ(100, f[4].as_double()); f = fs->getFeaturesByDocId(30); EXPECT_TRUE(f != nullptr); - EXPECT_EQUAL(30, f[0].as_double()); - EXPECT_EQUAL(1, f[1].as_double()); + EXPECT_EQ(30, f[0].as_double()); + EXPECT_EQ(1, f[1].as_double()); EXPECT_TRUE(f[2].is_double()); EXPECT_TRUE(!f[2].is_data()); - EXPECT_EQUAL(f[2].as_double(), 3.0); // 0 + 1 + 2 + EXPECT_EQ(f[2].as_double(), 3.0); // 0 + 1 + 2 EXPECT_TRUE(!f[3].is_double()); EXPECT_TRUE(f[3].is_data()); - EXPECT_EQUAL(100, f[4].as_double()); + EXPECT_EQ(100, f[4].as_double()); { nbostream buf(f[3].as_data().data, f[3].as_data().size); auto actual = spec_from_value(*SimpleValue::from_stream(buf)); auto expect = TensorSpec("tensor(x[3])").add({{"x", 0}}, 0).add({{"x", 1}}, 1).add({{"x", 2}}, 2); - EXPECT_EQUAL(actual, expect); + EXPECT_EQ(actual, expect); } } -TEST("require that rank features are filled") { - MyWorld world; +TEST_F(MatchingTest, require_that_rank_features_are_filled) +{ + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); DocsumRequest::SP req = MyWorld::createSimpleDocsumRequest("f1", "foo"); FeatureSet::SP fs = world.getRankFeatures(*req); const FeatureSet::Value * f = nullptr; - EXPECT_EQUAL(1u, fs->numFeatures()); - EXPECT_EQUAL("attribute(a2)", fs->getNames()[0]); - EXPECT_EQUAL(3u, fs->numDocs()); + EXPECT_EQ(1u, fs->numFeatures()); + EXPECT_EQ("attribute(a2)", fs->getNames()[0]); + EXPECT_EQ(3u, fs->numDocs()); f = fs->getFeaturesByDocId(10); EXPECT_TRUE(f != nullptr); - EXPECT_EQUAL(20, f[0].as_double()); + EXPECT_EQ(20, f[0].as_double()); f = fs->getFeaturesByDocId(15); EXPECT_TRUE(f != nullptr); - EXPECT_EQUAL(30, f[0].as_double()); + EXPECT_EQ(30, f[0].as_double()); f = fs->getFeaturesByDocId(30); EXPECT_TRUE(f != nullptr); - EXPECT_EQUAL(60, f[0].as_double()); + EXPECT_EQ(60, f[0].as_double()); } -TEST("require that search session can be cached") { - MyWorld world; +TEST_F(MatchingTest, require_that_search_session_can_be_cached) +{ + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); SearchRequest::SP request = MyWorld::createSimpleRequest("f1", "foo"); request->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); request->sessionId.push_back('a'); - EXPECT_EQUAL(0u, world.sessionManager->getSearchStats().numInsert); + EXPECT_EQ(0u, world.sessionManager->getSearchStats().numInsert); SearchReply::UP reply = world.performSearch(*request, 1); - EXPECT_EQUAL(1u, world.sessionManager->getSearchStats().numInsert); + EXPECT_EQ(1u, world.sessionManager->getSearchStats().numInsert); SearchSession::SP session = world.sessionManager->pickSearch("a"); ASSERT_TRUE(session.get()); - EXPECT_EQUAL(request->getTimeOfDoom(), session->getTimeOfDoom()); - EXPECT_EQUAL("a", session->getSessionId()); + EXPECT_EQ(request->getTimeOfDoom(), session->getTimeOfDoom()); + EXPECT_EQ("a", session->getSessionId()); } -TEST("require that summary features can be renamed") { - MyWorld world; +TEST_F(MatchingTest, require_that_summary_features_can_be_renamed) +{ + MyWorld world(shared_state()); world.basicSetup(); world.setup_feature_renames(); world.basicResults(); DocsumRequest::SP req = MyWorld::createSimpleDocsumRequest("f1", "foo"); FeatureSet::SP fs = world.getSummaryFeatures(*req); const FeatureSet::Value * f = nullptr; - EXPECT_EQUAL(5u, fs->numFeatures()); - EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); - EXPECT_EQUAL("foobar", fs->getNames()[1]); - EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); - EXPECT_EQUAL("tensor(x[3])(x)", fs->getNames()[3]); - EXPECT_EQUAL(3u, fs->numDocs()); + EXPECT_EQ(5u, fs->numFeatures()); + EXPECT_EQ("attribute(a1)", fs->getNames()[0]); + EXPECT_EQ("foobar", fs->getNames()[1]); + EXPECT_EQ("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); + EXPECT_EQ("tensor(x[3])(x)", fs->getNames()[3]); + EXPECT_EQ(3u, fs->numDocs()); f = fs->getFeaturesByDocId(30); EXPECT_TRUE(f != nullptr); EXPECT_TRUE(f[2].is_double()); EXPECT_TRUE(f[3].is_data()); } -TEST("require that getSummaryFeatures can use cached query setup") { - MyWorld world; +TEST_F(MatchingTest, require_that_getSummaryFeatures_can_use_cached_query_setup) +{ + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); SearchRequest::SP request = MyWorld::createSimpleRequest("f1", "foo"); @@ -867,46 +974,46 @@ TEST("require that getSummaryFeatures can use cached query setup") { docsum_request->hits.back().docid = 30; FeatureSet::SP fs = world.getSummaryFeatures(*docsum_request); - ASSERT_EQUAL(5u, fs->numFeatures()); - EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); - EXPECT_EQUAL("matches(f1)", fs->getNames()[1]); - EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); - EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); - EXPECT_EQUAL("value(100)", fs->getNames()[4]); - ASSERT_EQUAL(1u, fs->numDocs()); + ASSERT_EQ(5u, fs->numFeatures()); + EXPECT_EQ("attribute(a1)", fs->getNames()[0]); + EXPECT_EQ("matches(f1)", fs->getNames()[1]); + EXPECT_EQ("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); + EXPECT_EQ("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); + EXPECT_EQ("value(100)", fs->getNames()[4]); + ASSERT_EQ(1u, fs->numDocs()); const auto *f = fs->getFeaturesByDocId(30); ASSERT_TRUE(f); - EXPECT_EQUAL(30, f[0].as_double()); - EXPECT_EQUAL(100, f[4].as_double()); + EXPECT_EQ(30, f[0].as_double()); + EXPECT_EQ(100, f[4].as_double()); // getSummaryFeatures can be called multiple times. fs = world.getSummaryFeatures(*docsum_request); - ASSERT_EQUAL(5u, fs->numFeatures()); - EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); - EXPECT_EQUAL("matches(f1)", fs->getNames()[1]); - EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); - EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); - EXPECT_EQUAL("value(100)", fs->getNames()[4]); - ASSERT_EQUAL(1u, fs->numDocs()); + ASSERT_EQ(5u, fs->numFeatures()); + EXPECT_EQ("attribute(a1)", fs->getNames()[0]); + EXPECT_EQ("matches(f1)", fs->getNames()[1]); + EXPECT_EQ("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); + EXPECT_EQ("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); + EXPECT_EQ("value(100)", fs->getNames()[4]); + ASSERT_EQ(1u, fs->numDocs()); f = fs->getFeaturesByDocId(30); ASSERT_TRUE(f); - EXPECT_EQUAL(30, f[0].as_double()); - EXPECT_EQUAL(100, f[4].as_double()); + EXPECT_EQ(30, f[0].as_double()); + EXPECT_EQ(100, f[4].as_double()); } -double count_f1_matches(FeatureSet &fs) { +void count_f1_matches(FeatureSet &fs, double& sum) { ASSERT_TRUE(fs.getNames().size() > 1); - ASSERT_EQUAL(fs.getNames()[1], "matches(f1)"); - double sum = 0.0; + ASSERT_EQ(fs.getNames()[1], "matches(f1)"); + sum = 0.0; for (size_t i = 0; i < fs.numDocs(); ++i) { auto *f = fs.getFeaturesByIndex(i); sum += f[1].as_double(); } - return sum; } -TEST("require that getSummaryFeatures prefers cached query setup") { - MyWorld world; +TEST_F(MatchingTest, require_that_getSummaryFeatures_prefers_cached_query_setup) +{ + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); SearchRequest::SP request = MyWorld::createSimpleRequest("f1", "spread"); @@ -918,94 +1025,106 @@ TEST("require that getSummaryFeatures prefers cached query setup") { req->sessionId = request->sessionId; req->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); FeatureSet::SP fs = world.getSummaryFeatures(*req); - EXPECT_EQUAL(5u, fs->numFeatures()); - EXPECT_EQUAL(3u, fs->numDocs()); - EXPECT_EQUAL(0.0, count_f1_matches(*fs)); // "spread" has no hits + EXPECT_EQ(5u, fs->numFeatures()); + EXPECT_EQ(3u, fs->numDocs()); + double sum = 0.0; + ASSERT_NO_FATAL_FAILURE(count_f1_matches(*fs, sum)); + EXPECT_EQ(0.0, sum); // "spread" has no hits // Empty cache auto pruneTime = vespalib::steady_clock::now() + 600s; world.sessionManager->pruneTimedOutSessions(pruneTime); fs = world.getSummaryFeatures(*req); - EXPECT_EQUAL(5u, fs->numFeatures()); - EXPECT_EQUAL(3u, fs->numDocs()); - EXPECT_EQUAL(2.0, count_f1_matches(*fs)); // "foo" has two hits + EXPECT_EQ(5u, fs->numFeatures()); + EXPECT_EQ(3u, fs->numDocs()); + ASSERT_NO_FATAL_FAILURE(count_f1_matches(*fs, sum)); + EXPECT_EQ(2.0, sum); // "foo" has two hits } -TEST("require that match params are set up straight with ranking on") { - MatchParams p(10, 2, 4, 0.7, 0, 1, true, true); - ASSERT_EQUAL(10u, p.numDocs); - ASSERT_EQUAL(2u, p.heapSize); - ASSERT_EQUAL(4u, p.arraySize); - ASSERT_EQUAL(0.7, p.rankDropLimit); - ASSERT_EQUAL(0u, p.offset); - ASSERT_EQUAL(1u, p.hits); - ASSERT_TRUE(p.has_rank_drop_limit()); +TEST_F(MatchingTest, require_that_match_params_are_set_up_straight_with_ranking_on) +{ + MatchParams p(10, 2, 4, 0.7, 0.75, 0, 1, true, true); + ASSERT_EQ(10u, p.numDocs); + ASSERT_EQ(2u, p.heapSize); + ASSERT_EQ(4u, p.arraySize); + ASSERT_EQ(0.7, p.first_phase_rank_score_drop_limit.value()); + ASSERT_EQ(0.75, p.second_phase_rank_score_drop_limit.value()); + ASSERT_EQ(0u, p.offset); + ASSERT_EQ(1u, p.hits); } -TEST("require that match params can turn off rank-drop-limit") { - MatchParams p(10, 2, 4, -std::numeric_limits<feature_t>::quiet_NaN(), 0, 1, true, true); - ASSERT_EQUAL(10u, p.numDocs); - ASSERT_EQUAL(2u, p.heapSize); - ASSERT_EQUAL(4u, p.arraySize); - ASSERT_TRUE(std::isnan(p.rankDropLimit)); - ASSERT_EQUAL(0u, p.offset); - ASSERT_EQUAL(1u, p.hits); - ASSERT_FALSE(p.has_rank_drop_limit()); +TEST_F(MatchingTest, require_that_match_params_can_turn_off_rank_score_drop_limits) +{ + MatchParams p(10, 2, 4, std::nullopt, std::nullopt, 0, 1, true, true); + ASSERT_EQ(10u, p.numDocs); + ASSERT_EQ(2u, p.heapSize); + ASSERT_EQ(4u, p.arraySize); + ASSERT_FALSE(p.first_phase_rank_score_drop_limit.has_value()); + ASSERT_FALSE(p.second_phase_rank_score_drop_limit.has_value()); + ASSERT_EQ(0u, p.offset); + ASSERT_EQ(1u, p.hits); } -TEST("require that match params are set up straight with ranking on arraySize is atleast the size of heapSize") { - MatchParams p(10, 6, 4, 0.7, 1, 1, true, true); - ASSERT_EQUAL(10u, p.numDocs); - ASSERT_EQUAL(6u, p.heapSize); - ASSERT_EQUAL(6u, p.arraySize); - ASSERT_EQUAL(0.7, p.rankDropLimit); - ASSERT_EQUAL(1u, p.offset); - ASSERT_EQUAL(1u, p.hits); +TEST_F(MatchingTest, require_that_match_params_are_set_up_straight_with_ranking_on_arraySize_is_atleast_the_size_of_heapSize) +{ + MatchParams p(10, 6, 4, 0.7, std::nullopt, 1, 1, true, true); + ASSERT_EQ(10u, p.numDocs); + ASSERT_EQ(6u, p.heapSize); + ASSERT_EQ(6u, p.arraySize); + ASSERT_EQ(0.7, p.first_phase_rank_score_drop_limit.value()); + ASSERT_FALSE(p.second_phase_rank_score_drop_limit.has_value()); + ASSERT_EQ(1u, p.offset); + ASSERT_EQ(1u, p.hits); } -TEST("require that match params are set up straight with ranking on arraySize is atleast the size of hits+offset") { - MatchParams p(10, 6, 4, 0.7, 4, 4, true, true); - ASSERT_EQUAL(10u, p.numDocs); - ASSERT_EQUAL(6u, p.heapSize); - ASSERT_EQUAL(8u, p.arraySize); - ASSERT_EQUAL(0.7, p.rankDropLimit); - ASSERT_EQUAL(4u, p.offset); - ASSERT_EQUAL(4u, p.hits); +TEST_F(MatchingTest, require_that_match_params_are_set_up_straight_with_ranking_on_arraySize_is_atleast_the_size_of_hits_plus_offset) +{ + MatchParams p(10, 6, 4, 0.7, std::nullopt, 4, 4, true, true); + ASSERT_EQ(10u, p.numDocs); + ASSERT_EQ(6u, p.heapSize); + ASSERT_EQ(8u, p.arraySize); + ASSERT_EQ(0.7, p.first_phase_rank_score_drop_limit.value()); + ASSERT_EQ(4u, p.offset); + ASSERT_EQ(4u, p.hits); } -TEST("require that match params are capped by numDocs") { - MatchParams p(1, 6, 4, 0.7, 4, 4, true, true); - ASSERT_EQUAL(1u, p.numDocs); - ASSERT_EQUAL(1u, p.heapSize); - ASSERT_EQUAL(1u, p.arraySize); - ASSERT_EQUAL(0.7, p.rankDropLimit); - ASSERT_EQUAL(1u, p.offset); - ASSERT_EQUAL(0u, p.hits); +TEST_F(MatchingTest, require_that_match_params_are_capped_by_numDocs) +{ + MatchParams p(1, 6, 4, 0.7, std::nullopt, 4, 4, true, true); + ASSERT_EQ(1u, p.numDocs); + ASSERT_EQ(1u, p.heapSize); + ASSERT_EQ(1u, p.arraySize); + ASSERT_EQ(0.7, p.first_phase_rank_score_drop_limit.value()); + ASSERT_EQ(1u, p.offset); + ASSERT_EQ(0u, p.hits); } -TEST("require that match params are capped by numDocs and hits adjusted down") { - MatchParams p(5, 6, 4, 0.7, 4, 4, true, true); - ASSERT_EQUAL(5u, p.numDocs); - ASSERT_EQUAL(5u, p.heapSize); - ASSERT_EQUAL(5u, p.arraySize); - ASSERT_EQUAL(0.7, p.rankDropLimit); - ASSERT_EQUAL(4u, p.offset); - ASSERT_EQUAL(1u, p.hits); +TEST_F(MatchingTest, require_that_match_params_are_capped_by_numDocs_and_hits_adjusted_down) +{ + MatchParams p(5, 6, 4, 0.7, std::nullopt, 4, 4, true, true); + ASSERT_EQ(5u, p.numDocs); + ASSERT_EQ(5u, p.heapSize); + ASSERT_EQ(5u, p.arraySize); + ASSERT_EQ(0.7, p.first_phase_rank_score_drop_limit.value()); + ASSERT_EQ(4u, p.offset); + ASSERT_EQ(1u, p.hits); } -TEST("require that match params are set up straight with ranking off array and heap size is 0") { - MatchParams p(10, 6, 4, 0.7, 4, 4, true, false); - ASSERT_EQUAL(10u, p.numDocs); - ASSERT_EQUAL(0u, p.heapSize); - ASSERT_EQUAL(0u, p.arraySize); - ASSERT_EQUAL(0.7, p.rankDropLimit); - ASSERT_EQUAL(4u, p.offset); - ASSERT_EQUAL(4u, p.hits); +TEST_F(MatchingTest, require_that_match_params_are_set_up_straight_with_ranking_off_array_and_heap_size_is_0) +{ + MatchParams p(10, 6, 4, 0.7, std::nullopt, 4, 4, true, false); + ASSERT_EQ(10u, p.numDocs); + ASSERT_EQ(0u, p.heapSize); + ASSERT_EQ(0u, p.arraySize); + ASSERT_EQ(0.7, p.first_phase_rank_score_drop_limit.value()); + ASSERT_EQ(4u, p.offset); + ASSERT_EQ(4u, p.hits); } -TEST("require that match phase limiting works") { +TEST_F(MatchingTest, require_that_match_phase_limiting_works) +{ for (int s = 0; s <= 1; ++s) { for (int i = 0; i <= 6; ++i) { bool enable = (i != 0); @@ -1014,7 +1133,7 @@ TEST("require that match phase limiting works") { bool descending = (i == 2) || (i == 4) || (i == 6); bool use_sorting = (s == 1); size_t want_threads = 75; - MyWorld world; + MyWorld world(shared_state()); world.basicSetup(); world.verbose_a1_result("all"); if (enable) { @@ -1036,51 +1155,54 @@ TEST("require that match phase limiting works") { request->sortSpec = "-a1"; } SearchReply::UP reply = world.performSearch(*request, want_threads); - ASSERT_EQUAL(10u, reply->hits.size()); + ASSERT_EQ(10u, reply->hits.size()); if (enable) { - EXPECT_EQUAL(79u, reply->totalHitCount); + EXPECT_EQ(79u, reply->totalHitCount); if (!use_sorting) { - EXPECT_EQUAL(997.0, reply->hits[0].metric); - EXPECT_EQUAL(994.0, reply->hits[1].metric); - EXPECT_EQUAL(991.0, reply->hits[2].metric); - EXPECT_EQUAL(987.0, reply->hits[3].metric); - EXPECT_EQUAL(974.0, reply->hits[4].metric); - EXPECT_EQUAL(963.0, reply->hits[5].metric); - EXPECT_EQUAL(961.0, reply->hits[6].metric); - EXPECT_EQUAL(951.0, reply->hits[7].metric); - EXPECT_EQUAL(948.0, reply->hits[8].metric); - EXPECT_EQUAL(935.0, reply->hits[9].metric); + EXPECT_EQ(997.0, reply->hits[0].metric); + EXPECT_EQ(994.0, reply->hits[1].metric); + EXPECT_EQ(991.0, reply->hits[2].metric); + EXPECT_EQ(987.0, reply->hits[3].metric); + EXPECT_EQ(974.0, reply->hits[4].metric); + EXPECT_EQ(963.0, reply->hits[5].metric); + EXPECT_EQ(961.0, reply->hits[6].metric); + EXPECT_EQ(951.0, reply->hits[7].metric); + EXPECT_EQ(948.0, reply->hits[8].metric); + EXPECT_EQ(935.0, reply->hits[9].metric); } } else { - EXPECT_EQUAL(985u, reply->totalHitCount); + EXPECT_EQ(985u, reply->totalHitCount); if (!use_sorting) { - EXPECT_EQUAL(999.0, reply->hits[0].metric); - EXPECT_EQUAL(998.0, reply->hits[1].metric); - EXPECT_EQUAL(997.0, reply->hits[2].metric); - EXPECT_EQUAL(996.0, reply->hits[3].metric); + EXPECT_EQ(999.0, reply->hits[0].metric); + EXPECT_EQ(998.0, reply->hits[1].metric); + EXPECT_EQ(997.0, reply->hits[2].metric); + EXPECT_EQ(996.0, reply->hits[3].metric); } } } } } -TEST("require that arithmetic used for rank drop limit works") { +TEST_F(MatchingTest, require_that_arithmetic_used_for_rank_drop_limit_works) +{ double small = -HUGE_VAL; double limit = -std::numeric_limits<feature_t>::quiet_NaN(); EXPECT_TRUE(!(small <= limit)); } -TEST("require that termwise limit is set correctly for first phase ranking program") { - MyWorld world; +TEST_F(MatchingTest, require_that_termwise_limit_is_set_correctly_for_first_phase_ranking_program) +{ + MyWorld world(shared_state()); world.basicSetup(); world.basicResults(); - EXPECT_EQUAL(1.0, world.get_first_phase_termwise_limit()); + EXPECT_EQ(1.0, world.get_first_phase_termwise_limit()); world.set_property(indexproperties::matching::TermwiseLimit::NAME, "0.02"); - EXPECT_EQUAL(0.02, world.get_first_phase_termwise_limit()); + EXPECT_EQ(0.02, world.get_first_phase_termwise_limit()); } -TEST("require that fields are tagged with data type") { - MyWorld world; +TEST_F(MatchingTest, require_that_fields_are_tagged_with_data_type) +{ + MyWorld world(shared_state()); world.basicSetup(); auto int32_field = world.get_field_info("a1"); auto string_field = world.get_field_info("f1"); @@ -1090,24 +1212,26 @@ TEST("require that fields are tagged with data type") { ASSERT_TRUE(bool(string_field)); ASSERT_TRUE(bool(tensor_field)); ASSERT_TRUE(bool(predicate_field)); - EXPECT_EQUAL(int32_field->get_data_type(), FieldInfo::DataType::INT32); - EXPECT_EQUAL(string_field->get_data_type(), FieldInfo::DataType::STRING); - EXPECT_EQUAL(tensor_field->get_data_type(), FieldInfo::DataType::TENSOR); - EXPECT_EQUAL(predicate_field->get_data_type(), FieldInfo::DataType::BOOLEANTREE); + EXPECT_EQ(int32_field->get_data_type(), FieldInfo::DataType::INT32); + EXPECT_EQ(string_field->get_data_type(), FieldInfo::DataType::STRING); + EXPECT_EQ(tensor_field->get_data_type(), FieldInfo::DataType::TENSOR); + EXPECT_EQ(predicate_field->get_data_type(), FieldInfo::DataType::BOOLEANTREE); } -TEST("require that same element search works") { - MyWorld world; +TEST_F(MatchingTest, require_that_same_element_search_works) +{ + MyWorld world(shared_state()); world.basicSetup(); world.add_same_element_results("foo", "bar"); SearchRequest::SP request = MyWorld::createSameElementRequest("foo", "bar"); SearchReply::UP reply = world.performSearch(*request, 1); - ASSERT_EQUAL(1u, reply->hits.size()); - EXPECT_EQUAL(document::DocumentId("id:ns:searchdocument::20").getGlobalId(), reply->hits[0].gid); + ASSERT_EQ(1u, reply->hits.size()); + EXPECT_EQ(document::DocumentId("id:ns:searchdocument::20").getGlobalId(), reply->hits[0].gid); } -TEST("require that docsum matcher can extract matching elements from same element blueprint") { - MyWorld world; +TEST_F(MatchingTest, require_that_docsum_matcher_can_extract_matching_elements_from_same_element_blueprint) +{ + MyWorld world(shared_state()); world.basicSetup(); world.add_same_element_results("foo", "bar"); auto request = MyWorld::create_docsum_request(make_same_element_stack_dump("foo", "bar"), {20}); @@ -1116,12 +1240,13 @@ TEST("require that docsum matcher can extract matching elements from same elemen fields.add_mapping("my", "my.f1"); auto result = world.get_matching_elements(*request, fields); const auto &list = result->get_matching_elements(20, "my"); - ASSERT_EQUAL(list.size(), 1u); - EXPECT_EQUAL(list[0], 2u); + ASSERT_EQ(list.size(), 1u); + EXPECT_EQ(list[0], 2u); } -TEST("require that docsum matcher can extract matching elements from single attribute term") { - MyWorld world; +TEST_F(MatchingTest, require_that_docsum_matcher_can_extract_matching_elements_from_single_attribute_term) +{ + MyWorld world(shared_state()); world.basicSetup(); world.add_same_element_results("foo", "bar"); auto request = MyWorld::create_docsum_request(make_simple_stack_dump("my.a1", "foo"), {20}); @@ -1130,9 +1255,9 @@ TEST("require that docsum matcher can extract matching elements from single attr fields.add_mapping("my", "my.f1"); auto result = world.get_matching_elements(*request, fields); const auto &list = result->get_matching_elements(20, "my"); - ASSERT_EQUAL(list.size(), 2u); - EXPECT_EQUAL(list[0], 2u); - EXPECT_EQUAL(list[1], 3u); + ASSERT_EQ(list.size(), 2u); + EXPECT_EQ(list[0], 2u); + EXPECT_EQ(list[1], 3u); } using FMA = vespalib::FuzzyMatchingAlgorithm; @@ -1162,35 +1287,41 @@ struct AttributeBlueprintParamsFixture { rank_properties.add(TargetHitsMaxAdjustmentFactor::NAME, target_hits_max_adjustment_factor); rank_properties.add(FuzzyAlgorithm::NAME, fuzzy_matching_algorithm); } + ~AttributeBlueprintParamsFixture(); AttributeBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) const { return MatchToolsFactory::extract_attribute_blueprint_params(rank_setup, rank_properties, active_docids, docid_limit); } }; -TEST_F("attribute blueprint params are extracted from rank profile", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::DfaTable)) +AttributeBlueprintParamsFixture::~AttributeBlueprintParamsFixture() = default; + +TEST_F(MatchingTest, attribute_blueprint_params_are_extracted_from_rank_profile) { + AttributeBlueprintParamsFixture f(0.2, 0.8, 5.0, FMA::DfaTable); auto params = f.extract(); - EXPECT_EQUAL(0.2, params.global_filter_lower_limit); - EXPECT_EQUAL(0.8, params.global_filter_upper_limit); - EXPECT_EQUAL(5.0, params.target_hits_max_adjustment_factor); - EXPECT_EQUAL(FMA::DfaTable, params.fuzzy_matching_algorithm); + EXPECT_EQ(0.2, params.global_filter_lower_limit); + EXPECT_EQ(0.8, params.global_filter_upper_limit); + EXPECT_EQ(5.0, params.target_hits_max_adjustment_factor); + EXPECT_EQ(FMA::DfaTable, params.fuzzy_matching_algorithm); } -TEST_F("attribute blueprint params are extracted from query", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::DfaTable)) +TEST_F(MatchingTest, attribute_blueprint_params_are_extracted_from_query) { + AttributeBlueprintParamsFixture f(0.2, 0.8, 5.0, FMA::DfaTable); f.set_query_properties("0.15", "0.75", "3.0", "dfa_explicit"); auto params = f.extract(); - EXPECT_EQUAL(0.15, params.global_filter_lower_limit); - EXPECT_EQUAL(0.75, params.global_filter_upper_limit); - EXPECT_EQUAL(3.0, params.target_hits_max_adjustment_factor); - EXPECT_EQUAL(FMA::DfaExplicit, params.fuzzy_matching_algorithm); + EXPECT_EQ(0.15, params.global_filter_lower_limit); + EXPECT_EQ(0.75, params.global_filter_upper_limit); + EXPECT_EQ(3.0, params.target_hits_max_adjustment_factor); + EXPECT_EQ(FMA::DfaExplicit, params.fuzzy_matching_algorithm); } -TEST_F("global filter params are scaled with active hit ratio", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::DfaTable)) +TEST_F(MatchingTest, global_filter_params_are_scaled_with_active_hit_ratio) { + AttributeBlueprintParamsFixture f(0.2, 0.8, 5.0, FMA::DfaTable); auto params = f.extract(5, 10); - EXPECT_EQUAL(0.12, params.global_filter_lower_limit); - EXPECT_EQUAL(0.48, params.global_filter_upper_limit); + EXPECT_EQ(0.12, params.global_filter_lower_limit); + EXPECT_EQ(0.48, params.global_filter_upper_limit); } -TEST_MAIN() { TEST_RUN_ALL(); } +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchcore/src/tests/proton/verify_ranksetup/CMakeLists.txt b/searchcore/src/tests/proton/verify_ranksetup/CMakeLists.txt index b27a7ee53c2..eb330be1446 100644 --- a/searchcore/src/tests/proton/verify_ranksetup/CMakeLists.txt +++ b/searchcore/src/tests/proton/verify_ranksetup/CMakeLists.txt @@ -5,4 +5,4 @@ vespa_add_executable(searchcore_verify_ranksetup_test_app TEST DEPENDS ) vespa_add_test(NAME searchcore_verify_ranksetup_test_app COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/verify_ranksetup_test.sh - DEPENDS searchcore_verify_ranksetup_test_app searchcore_verify_ranksetup_app) + DEPENDS searchcore_verify_ranksetup_test_app searchcore_verify_ranksetup_app COST 50) diff --git a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp index 919309c5dae..f8b6666afc4 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp @@ -62,12 +62,17 @@ private: Blueprint::UP _result; void buildChildren(IntermediateBlueprint &parent, const std::vector<Node *> &children); + bool is_search_multi_threaded() const noexcept { + return _requestContext.thread_bundle().size() > 1; + } template <typename NodeType> void buildIntermediate(IntermediateBlueprint *b, NodeType &n) __attribute__((noinline)); void buildWeakAnd(ProtonWeakAnd &n) { - auto *wand = new WeakAndBlueprint(n.getTargetNumHits(), _requestContext.get_attribute_blueprint_params().weakand_range); + auto *wand = new WeakAndBlueprint(n.getTargetNumHits(), + _requestContext.get_attribute_blueprint_params().weakand_range, + is_search_multi_threaded()); Blueprint::UP result(wand); for (auto node : n.getChildren()) { uint32_t weight = getWeightFromNode(*node).percent(); diff --git a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp index 4d4b136aba5..c7f86903e05 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp @@ -24,6 +24,7 @@ using search::queryeval::AndNotBlueprint; using search::queryeval::Blueprint; using search::queryeval::IntermediateBlueprint; using search::queryeval::MatchingElementsSearch; +using search::queryeval::MatchingPhase; using search::queryeval::SameElementBlueprint; using search::queryeval::SearchIterator; using vespalib::FeatureSet; @@ -41,8 +42,10 @@ get_feature_set(const MatchToolsFactory &mtf, { MatchTools::UP matchTools = mtf.createMatchTools(); if (summaryFeatures) { + mtf.query().set_matching_phase(MatchingPhase::SUMMARY_FEATURES); matchTools->setup_summary(); } else { + mtf.query().set_matching_phase(MatchingPhase::DUMP_FEATURES); matchTools->setup_dump(); } auto retval = ExtractFeatures::get_feature_set(matchTools->search(), matchTools->rank_program(), docs, diff --git a/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp index 87b2fa8c1cb..ee255ab41ba 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp @@ -19,6 +19,7 @@ using vespalib::Runnable; using vespalib::ThreadBundle; using search::fef::FeatureResolver; using search::fef::RankProgram; +using search::queryeval::MatchingPhase; using search::queryeval::SearchIterator; namespace proton::matching { @@ -112,6 +113,7 @@ FeatureValues ExtractFeatures::get_match_features(const MatchToolsFactory &mtf, const OrderedDocs &docs, ThreadBundle &thread_bundle) { FeatureValues result; + mtf.query().set_matching_phase(MatchingPhase::MATCH_FEATURES); auto tools = mtf.createMatchTools(); tools->setup_match_features(); FeatureResolver resolver(tools->rank_program().get_seeds(false)); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp index 01a9508220d..affa2bdc554 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp @@ -1,18 +1,21 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "match_loop_communicator.h" +#include <vespa/searchlib/features/first_phase_rank_lookup.h> #include <vespa/vespalib/util/priority_queue.h> +using search::features::FirstPhaseRankLookup; + namespace proton:: matching { MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN) - : MatchLoopCommunicator(threads, topN, std::unique_ptr<IDiversifier>()) + : MatchLoopCommunicator(threads, topN, {}, nullptr, []() noexcept {}) {} -MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier> diversifier) +MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier> diversifier, FirstPhaseRankLookup* first_phase_rank_lookup, std::function<void()> before_second_phase) : _best_scores(), _best_dropped(), _estimate_match_frequency(threads), - _get_second_phase_work(threads, topN, _best_scores, _best_dropped, std::move(diversifier)), + _get_second_phase_work(threads, topN, _best_scores, _best_dropped, std::move(diversifier), first_phase_rank_lookup, std::move(before_second_phase)), _complete_second_phase(threads, topN, _best_scores, _best_dropped) {} MatchLoopCommunicator::~MatchLoopCommunicator() = default; @@ -34,18 +37,44 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle() } } -MatchLoopCommunicator::GetSecondPhaseWork::GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier> diversifier) +namespace { + +class NoRegisterFirstPhaseRank { +public: + static void pick(uint32_t) noexcept { }; + static void drop() noexcept { } +}; + +class RegisterFirstPhaseRank { + FirstPhaseRankLookup& _first_phase_rank_lookup; + uint32_t _rank; +public: + RegisterFirstPhaseRank(FirstPhaseRankLookup& first_phase_rank_lookup) + : _first_phase_rank_lookup(first_phase_rank_lookup), + _rank(0) + { + } + void pick(uint32_t docid) noexcept { _first_phase_rank_lookup.add(docid, ++_rank); } + void drop() noexcept { ++_rank; } +}; + +} + +MatchLoopCommunicator::GetSecondPhaseWork::GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier> diversifier, FirstPhaseRankLookup* first_phase_rank_lookup, std::function<void()> before_second_phase) : vespalib::Rendezvous<SortedHitSequence, TaggedHits, true>(n), topN(topN_in), best_scores(best_scores_in), best_dropped(best_dropped_in), - _diversifier(std::move(diversifier)) + _diversifier(std::move(diversifier)), + _first_phase_rank_lookup(first_phase_rank_lookup), + _before_second_phase(std::move(before_second_phase)) {} + MatchLoopCommunicator::GetSecondPhaseWork::~GetSecondPhaseWork() = default; -template<typename Q, typename F> +template<typename Q, typename F, typename R> void -MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept) +MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept, R register_first_phase_rank) { size_t picked = 0; search::feature_t last_score = 0.0; @@ -53,14 +82,18 @@ MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept) uint32_t i = queue.front(); const Hit & hit = in(i).get(); if (accept(hit.first)) { + register_first_phase_rank.pick(hit.first); out(picked % size()).emplace_back(hit, i); last_score = hit.second; if (++picked == 1) { best_scores.high = hit.second; } - } else if (!best_dropped.valid) { - best_dropped.valid = true; - best_dropped.score = hit.second; + } else { + if (!best_dropped.valid) { + best_dropped.valid = true; + best_dropped.score = hit.second; + } + register_first_phase_rank.drop(); } in(i).next(); if (in(i).valid()) { @@ -74,9 +107,21 @@ MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept) } } +template<typename Q, typename R> +void +MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, R register_first_phase_rank) +{ + if (_diversifier) { + mingle(queue, [diversifier=_diversifier.get()](uint32_t docId) { return diversifier->accepted(docId);}, register_first_phase_rank); + } else { + mingle(queue, [](uint32_t) { return true;}, register_first_phase_rank); + } +} + void MatchLoopCommunicator::GetSecondPhaseWork::mingle() { + _before_second_phase(); best_scores = Range(); best_dropped.valid = false; size_t est_out = (topN / size()) + 1; @@ -87,10 +132,10 @@ MatchLoopCommunicator::GetSecondPhaseWork::mingle() queue.push(i); } } - if (_diversifier) { - mingle(queue, [diversifier=_diversifier.get()](uint32_t docId) { return diversifier->accepted(docId);}); + if (_first_phase_rank_lookup != nullptr) { + mingle(queue, RegisterFirstPhaseRank(*_first_phase_rank_lookup)); } else { - mingle(queue, [](uint32_t) { return true;}); + mingle(queue, NoRegisterFirstPhaseRank()); } } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h index eb93bdb68d5..a0a8376254b 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h @@ -5,6 +5,9 @@ #include "i_match_loop_communicator.h" #include <vespa/searchlib/queryeval/idiversifier.h> #include <vespa/vespalib/util/rendezvous.h> +#include <functional> + +namespace search::features { class FirstPhaseRankLookup; } namespace proton::matching { @@ -12,6 +15,7 @@ class MatchLoopCommunicator final : public IMatchLoopCommunicator { private: using IDiversifier = search::queryeval::IDiversifier; + using FirstPhaseRankLookup = search::features::FirstPhaseRankLookup; struct BestDropped { bool valid = false; search::feature_t score = 0.0; @@ -25,11 +29,15 @@ private: Range &best_scores; BestDropped &best_dropped; std::unique_ptr<IDiversifier> _diversifier; - GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier>); + FirstPhaseRankLookup* _first_phase_rank_lookup; + std::function<void()> _before_second_phase; + GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier> diversifier, FirstPhaseRankLookup* first_phase_rank_lookup, std::function<void()> before_second_phase); ~GetSecondPhaseWork() override; void mingle() override; - template<typename Q, typename F> - void mingle(Q &queue, F &&accept); + template<typename Q, typename R> + void mingle(Q &queue, R register_first_phase_rank); + template<typename Q, typename F, typename R> + void mingle(Q &queue, F &&accept, R register_first_phase_rank); bool cmp(uint32_t a, uint32_t b) { return (in(a).get().second > in(b).get().second); } @@ -59,7 +67,7 @@ private: public: MatchLoopCommunicator(size_t threads, size_t topN); - MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier>); + MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier>, FirstPhaseRankLookup* first_phase_rank_lookup, std::function<void()> before_second_phsae); ~MatchLoopCommunicator(); double estimate_match_frequency(const Matches &matches) override { diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp index 89cc97767bf..152ba978cd1 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp @@ -17,6 +17,7 @@ namespace proton::matching { using namespace search::fef; +using search::queryeval::MatchingPhase; using search::queryeval::SearchIterator; using vespalib::FeatureSet; using vespalib::ThreadBundle; @@ -85,7 +86,13 @@ MatchMaster::match(search::engine::Trace & trace, { vespalib::Timer query_latency_time; vespalib::DualMergeDirector mergeDirector(threadBundle.size()); - MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.heapSize)); + /* + * We need a non-const first phase rank lookup since it will be populated + * later on when selecting documents for second phase ranking. + */ + MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.heapSize), + mtf.get_first_phase_rank_lookup(), + [&mtf]() noexcept { mtf.query().set_matching_phase(MatchingPhase::SECOND_PHASE); }); TimedMatchLoopCommunicator timedCommunicator(communicator); DocidRangeScheduler::UP scheduler = createScheduler(threadBundle.size(), numSearchPartitions, params.numDocs); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp index 5cd97d314b5..316ef003a28 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp @@ -19,7 +19,8 @@ computeArraySize(uint32_t hitsPlussOffset, uint32_t heapSize, uint32_t arraySize MatchParams::MatchParams(uint32_t numDocs_in, uint32_t heapSize_in, uint32_t arraySize_in, - search::feature_t rankDropLimit_in, + std::optional<search::feature_t> first_phase_rank_score_drop_limit_in, + std::optional<search::feature_t> second_phase_rank_score_drop_limit_in, uint32_t offset_in, uint32_t hits_in, bool hasFinalRank, @@ -31,12 +32,8 @@ MatchParams::MatchParams(uint32_t numDocs_in, : 0), offset(std::min(numDocs_in, offset_in)), hits(std::min(numDocs_in - offset, hits_in)), - rankDropLimit(rankDropLimit_in) + first_phase_rank_score_drop_limit(first_phase_rank_score_drop_limit_in), + second_phase_rank_score_drop_limit(second_phase_rank_score_drop_limit_in) { } -bool -MatchParams::has_rank_drop_limit() const { - return ! std::isnan(rankDropLimit); -} - } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_params.h b/searchcore/src/vespa/searchcore/proton/matching/match_params.h index 5b58c11b7e1..19abcd8e449 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_params.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_params.h @@ -4,6 +4,7 @@ #include <vespa/searchlib/common/feature.h> #include <cstdint> +#include <optional> namespace proton::matching { @@ -17,18 +18,19 @@ struct MatchParams { const uint32_t arraySize; const uint32_t offset; const uint32_t hits; - const search::feature_t rankDropLimit; + const std::optional<search::feature_t> first_phase_rank_score_drop_limit; + const std::optional<search::feature_t> second_phase_rank_score_drop_limit; MatchParams(uint32_t numDocs_in, uint32_t heapSize_in, uint32_t arraySize_in, - search::feature_t rankDropLimit_in, + std::optional<search::feature_t> first_phase_rank_drop_limit_in, + std::optional<search::feature_t> second_phase_rank_score_drop_limit_in, uint32_t offset_in, uint32_t hits_in, bool hasFinalRank, - bool needRanking=true); - bool save_rank_scores() const { return ((heapSize + arraySize) != 0); } - bool has_rank_drop_limit() const; + bool needRanking); + bool save_rank_scores() const noexcept { return (arraySize != 0); } }; } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp index 211e67f1e2b..7ccc120d047 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp @@ -15,6 +15,7 @@ #include <vespa/searchlib/queryeval/profiled_iterator.h> #include <vespa/vespalib/data/slime/cursor.h> #include <vespa/vespalib/data/slime/inserter.h> +#include <limits> #include <vespa/log/log.h> LOG_SETUP(".proton.matching.match_thread"); @@ -99,11 +100,11 @@ fillPartialResult(ResultProcessor::Context & context, size_t totalHits, size_t n //----------------------------------------------------------------------------- -MatchThread::Context::Context(double rankDropLimit, MatchTools &tools, HitCollector &hits, uint32_t num_threads) +MatchThread::Context::Context(std::optional<double> first_phase_rank_score_drop_limit, MatchTools &tools, HitCollector &hits, uint32_t num_threads) : matches(0), _matches_limit(tools.match_limiter().sample_hits_per_thread(num_threads)), _score_feature(get_score_feature(tools.rank_program())), - _rankDropLimit(rankDropLimit), + _first_phase_rank_score_drop_limit(first_phase_rank_score_drop_limit.value_or(0.0 /* ignored */)), _hits(hits), _doom(tools.getDoom()), dropped() @@ -119,7 +120,7 @@ MatchThread::Context::rankHit(uint32_t docId) { score = -HUGE_VAL; } if (use_rank_drop_limit != RankDropLimitE::no) { - if (__builtin_expect(score > _rankDropLimit, true)) { + if (__builtin_expect(score > _first_phase_rank_score_drop_limit, true)) { _hits.addHit(docId, score); } else if (use_rank_drop_limit == RankDropLimitE::track) { dropped.template emplace_back(docId); @@ -217,7 +218,7 @@ MatchThread::match_loop(MatchTools &tools, HitCollector &hits) bool softDoomed = false; uint32_t docsCovered = 0; vespalib::duration overtime(vespalib::duration::zero()); - Context context(matchParams.rankDropLimit, tools, hits, num_threads); + Context context(matchParams.first_phase_rank_score_drop_limit, tools, hits, num_threads); for (DocidRange docid_range = scheduler.first_range(thread_id); !docid_range.empty(); docid_range = scheduler.next_range(thread_id)) @@ -270,7 +271,7 @@ template <bool do_rank, bool do_limit, bool do_share> void MatchThread::match_loop_helper_rank_limit_share(MatchTools &tools, HitCollector &hits) { - if (matchParams.has_rank_drop_limit()) { + if (matchParams.first_phase_rank_score_drop_limit.has_value()) { if (matchToolsFactory.hasOnMatchTask()) { match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::track>(tools, hits); } else { @@ -367,7 +368,7 @@ MatchThread::findMatches(MatchTools &tools) tools.give_back_search(ProfiledIterator::profile(*match_profiler, tools.borrow_search())); tools.tag_search_as_changed(); } - HitCollector hits(matchParams.numDocs, matchParams.arraySize); + HitCollector hits(matchParams.numDocs, match_with_ranking ? matchParams.arraySize : 0); trace->addEvent(4, "Start match and first phase rank"); /** * All, or none of the threads in the bundle must execute the match loop. @@ -380,7 +381,32 @@ MatchThread::findMatches(MatchTools &tools) secondPhase(tools, hits); } trace->addEvent(4, "Create result set"); - return hits.getResultSet(fallback_rank_value()); + if (tools.has_second_phase_rank() && matchParams.second_phase_rank_score_drop_limit.has_value()) { + return get_matches_after_second_phase_rank_score_drop(hits); + } else { + return hits.getResultSet(); + } +} + +std::unique_ptr<search::ResultSet> +MatchThread::get_matches_after_second_phase_rank_score_drop(HitCollector& hits) +{ + std::vector<uint32_t> dropped; + auto result = hits.get_result_set(matchParams.second_phase_rank_score_drop_limit, &dropped); + if (!dropped.empty()) { + /* + * Hits dropped due to second phase rank score drop limit are + * not present in the result. Schedule extra tasks to update + * mutable attributes for earlier match phases. + */ + if (auto task = matchToolsFactory.createOnMatchTask()) { + task->run(dropped); + } + if (auto task = matchToolsFactory.createOnFirstPhaseTask()) { + task->run(std::move(dropped)); + } + } + return result; } void diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h index c6b233f2fcd..e017dc53c5c 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h @@ -73,7 +73,7 @@ private: class Context { public: - Context(double rankDropLimit, MatchTools &tools, HitCollector &hits, + Context(std::optional<double> first_phase_rank_score_drop_limit, MatchTools &tools, HitCollector &hits, uint32_t num_threads) __attribute__((noinline)); template <RankDropLimitE use_rank_drop_limit> void rankHit(uint32_t docId); @@ -86,7 +86,7 @@ private: private: uint32_t _matches_limit; LazyValue _score_feature; - double _rankDropLimit; + double _first_phase_rank_score_drop_limit; HitCollector &_hits; const Doom _doom; public: @@ -113,6 +113,7 @@ private: void match_loop_helper(MatchTools &tools, HitCollector &hits); search::ResultSet::UP findMatches(MatchTools &tools); + std::unique_ptr<search::ResultSet> get_matches_after_second_phase_rank_score_drop(HitCollector& hits); void secondPhase(MatchTools & tools, HitCollector & hits); void processResult(const Doom & doom, search::ResultSet::UP result, ResultProcessor::Context &context); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 06290386a31..73a812f936f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -9,6 +9,7 @@ #include <vespa/searchlib/attribute/diversity.h> #include <vespa/searchlib/queryeval/flow.h> #include <vespa/searchlib/engine/trace.h> +#include <vespa/searchlib/features/first_phase_rank_lookup.h> #include <vespa/searchlib/fef/indexproperties.h> #include <vespa/searchlib/fef/ranksetup.h> #include <vespa/vespalib/util/issue.h> @@ -190,7 +191,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _rankSetup(rankSetup), _featureOverrides(featureOverrides), _diversityParams(), - _valid(false) + _valid(false), + _first_phase_rank_lookup(nullptr) { if (doom.soft_doom()) return; auto trace = root_trace.make_trace(); @@ -204,6 +206,9 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _query.extractLocations(_queryEnv.locations()); trace.addEvent(5, "Build query execution plan"); _query.reserveHandles(_requestContext, searchContext, _mdl); + if (trace.getLevel() >= 6) { // will dump blueprint later + _query.enumerate_blueprint_nodes(); + } trace.addEvent(5, "Optimize query execution plan"); bool sort_by_cost = SortBlueprintsByCost::check(_queryEnv.getProperties(), rankSetup.sort_blueprints_by_cost()); double hitRate = std::min(1.0, double(maxNumHits)/double(searchContext.getDocIdLimit())); @@ -219,6 +224,7 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _query.freeze(); trace.addEvent(5, "Prepare shared state for multi-threaded rank executors"); _rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore()); + _first_phase_rank_lookup = FirstPhaseRankLookup::get_mutable_shared_state(_queryEnv.getObjectStore()); _diversityParams = extractDiversityParams(_rankSetup, rankProperties); vespalib::string attribute = DegradationAttribute::lookup(rankProperties, _rankSetup.getDegradationAttribute()); DegradationParams degradationParams = extractDegradationParams(_rankSetup, attribute, rankProperties); @@ -272,11 +278,13 @@ MatchToolsFactory::createTask(vespalib::stringref attribute, vespalib::stringref ? std::make_unique<AttributeOperationTask>(_requestContext, attribute, operation) : std::unique_ptr<AttributeOperationTask>(); } + std::unique_ptr<AttributeOperationTask> MatchToolsFactory::createOnMatchTask() const { const auto & op = _rankSetup.getMutateOnMatch(); return createTask(op._attribute, op._operation); } + std::unique_ptr<AttributeOperationTask> MatchToolsFactory::createOnFirstPhaseTask() const { const auto & op = _rankSetup.getMutateOnFirstPhase(); @@ -289,6 +297,7 @@ MatchToolsFactory::createOnFirstPhaseTask() const { return createTask(op._attribute, op._operation); } } + std::unique_ptr<AttributeOperationTask> MatchToolsFactory::createOnSecondPhaseTask() const { const auto & op = _rankSetup.getMutateOnSecondPhase(); @@ -299,6 +308,7 @@ MatchToolsFactory::createOnSecondPhaseTask() const { return createTask(op._attribute, op._operation); } } + std::unique_ptr<AttributeOperationTask> MatchToolsFactory::createOnSummaryTask() const { const auto & op = _rankSetup.getMutateOnSummary(); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index 759fe68eea2..da18a8b0a2f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -21,6 +21,7 @@ namespace vespalib { class ExecutionProfiler; } namespace vespalib { struct ThreadBundle; } namespace search::engine { class Trace; } +namespace search::features { class FirstPhaseRankLookup; } namespace search::fef { class RankProgram; @@ -119,6 +120,7 @@ private: using RankSetup = search::fef::RankSetup; using IIndexEnvironment = search::fef::IIndexEnvironment; using IDiversifier = search::queryeval::IDiversifier; + using FirstPhaseRankLookup = search::features::FirstPhaseRankLookup; QueryLimiter & _queryLimiter; AttributeBlueprintParams _attribute_blueprint_params; Query _query; @@ -131,6 +133,7 @@ private: const Properties & _featureOverrides; DiversityParams _diversityParams; bool _valid; + FirstPhaseRankLookup* _first_phase_rank_lookup; std::unique_ptr<AttributeOperationTask> createTask(vespalib::stringref attribute, vespalib::stringref operation) const; @@ -186,6 +189,7 @@ public: static AttributeBlueprintParams extract_attribute_blueprint_params(const RankSetup& rank_setup, const Properties& rank_properties, uint32_t active_docids, uint32_t docid_limit); + FirstPhaseRankLookup* get_first_phase_rank_lookup() const noexcept { return _first_phase_rank_lookup; } }; } diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp index 4a9156770f0..a5ace1676ef 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp @@ -18,6 +18,7 @@ #include <vespa/searchlib/fef/test/plugin/setup.h> #include <vespa/searchlib/common/allocatedbitvector.h> #include <vespa/vespalib/data/slime/inserter.h> +#include <vespa/vespalib/util/limited_thread_bundle_wrapper.h> #include <cinttypes> #include <vespa/log/log.h> @@ -38,7 +39,8 @@ using search::fef::MatchData; using search::fef::RankSetup; using search::fef::indexproperties::hitcollector::HeapSize; using search::fef::indexproperties::hitcollector::ArraySize; -using search::fef::indexproperties::hitcollector::RankScoreDropLimit; +using search::fef::indexproperties::hitcollector::FirstPhaseRankScoreDropLimit; +using search::fef::indexproperties::hitcollector::SecondPhaseRankScoreDropLimit; using search::queryeval::Blueprint; using search::queryeval::SearchIterator; using vespalib::Doom; @@ -76,30 +78,14 @@ numThreads(size_t hits, size_t minHits) { return static_cast<size_t>(std::ceil(double(hits) / double(minHits))); } -class LimitedThreadBundleWrapper final : public vespalib::ThreadBundle -{ -public: - LimitedThreadBundleWrapper(vespalib::ThreadBundle &threadBundle, uint32_t maxThreads) - : _threadBundle(threadBundle), - _maxThreads(std::min(maxThreads, static_cast<uint32_t>(threadBundle.size()))) - { } - size_t size() const override { return _maxThreads; } - void run(vespalib::Runnable* const* targets, size_t cnt) override { - _threadBundle.run(targets, cnt); - } -private: - vespalib::ThreadBundle &_threadBundle; - const uint32_t _maxThreads; -}; - bool willNeedRanking(const SearchRequest & request, const GroupingContext & groupingContext, - search::feature_t rank_score_drop_limit) + std::optional<search::feature_t> first_phase_rank_score_drop_limit) { return (groupingContext.needRanking() || (request.maxhits != 0)) && (request.sortSpec.empty() || (request.sortSpec.find("[rank]") != vespalib::string::npos) || - !std::isnan(rank_score_drop_limit)); + first_phase_rank_score_drop_limit.has_value()); } SearchReply::UP @@ -289,17 +275,19 @@ Matcher::match(const SearchRequest &request, vespalib::ThreadBundle &threadBundl const Properties & rankProperties = request.propertiesMap.rankProperties(); uint32_t heapSize = HeapSize::lookup(rankProperties, _rankSetup->getHeapSize()); uint32_t arraySize = ArraySize::lookup(rankProperties, _rankSetup->getArraySize()); - search::feature_t rank_score_drop_limit = RankScoreDropLimit::lookup(rankProperties, _rankSetup->getRankScoreDropLimit()); + auto first_phase_rank_score_drop_limit = FirstPhaseRankScoreDropLimit::lookup(rankProperties, _rankSetup->get_first_phase_rank_score_drop_limit()); + auto second_phase_rank_score_drop_limit = SecondPhaseRankScoreDropLimit::lookup(rankProperties, _rankSetup->get_second_phase_rank_score_drop_limit()); - MatchParams params(searchContext.getDocIdLimit(), heapSize, arraySize, rank_score_drop_limit, + MatchParams params(searchContext.getDocIdLimit(), heapSize, arraySize, first_phase_rank_score_drop_limit, + second_phase_rank_score_drop_limit, request.offset, request.maxhits, !_rankSetup->getSecondPhaseRank().empty(), - willNeedRanking(request, groupingContext, rank_score_drop_limit)); + willNeedRanking(request, groupingContext, first_phase_rank_score_drop_limit)); ResultProcessor rp(attrContext, metaStore, sessionMgr, groupingContext, sessionId, request.sortSpec, params.offset, params.hits); size_t numThreadsPerSearch = computeNumThreadsPerSearch(mtf->estimate(), rankProperties); - LimitedThreadBundleWrapper limitedThreadBundle(threadBundle, numThreadsPerSearch); + vespalib::LimitedThreadBundleWrapper limitedThreadBundle(threadBundle, numThreadsPerSearch); MatchMaster master; uint32_t numParts = NumSearchPartitions::lookup(rankProperties, _rankSetup->getNumSearchPartitions()); if (limitedThreadBundle.size() > 1) { diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index 70f60ff1c2d..e0fe8f5beb9 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -39,6 +39,7 @@ using search::queryeval::Blueprint; using search::queryeval::GlobalFilter; using search::queryeval::IRequestContext; using search::queryeval::IntermediateBlueprint; +using search::queryeval::MatchingPhase; using search::queryeval::RankBlueprint; using search::queryeval::SearchIterator; using vespalib::Issue; @@ -198,6 +199,12 @@ Query::reserveHandles(const IRequestContext & requestContext, ISearchContext &co } void +Query::enumerate_blueprint_nodes() noexcept +{ + _blueprint->enumerate(1); +} + +void Query::optimize(InFlow in_flow, bool sort_by_cost) { _in_flow = in_flow; @@ -280,6 +287,12 @@ Query::freeze() _blueprint->freeze(); } +void +Query::set_matching_phase(MatchingPhase matching_phase) const noexcept +{ + _blueprint->set_matching_phase(matching_phase); +} + Blueprint::HitEstimate Query::estimate() const { diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index 03aea5a0d2d..32c7b2a91b6 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -97,6 +97,8 @@ public: ISearchContext &context, search::fef::MatchDataLayout &mdl); + void enumerate_blueprint_nodes() noexcept; + /** * Optimize the query to be executed. This function should be * called after the reserveHandles function and before the @@ -130,6 +132,7 @@ public: vespalib::ThreadBundle &thread_bundle, search::engine::Trace* trace); void freeze(); + void set_matching_phase(search::queryeval::MatchingPhase matching_phase) const noexcept; /** * Create the actual search iterator tree used to find matches. diff --git a/searchcore/src/vespa/searchcore/proton/server/rpc_hooks.cpp b/searchcore/src/vespa/searchcore/proton/server/rpc_hooks.cpp index c72a4eaf352..785ffcd9663 100644 --- a/searchcore/src/vespa/searchcore/proton/server/rpc_hooks.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/rpc_hooks.cpp @@ -90,11 +90,6 @@ RPCHooksBase::initRPC() rb.ReturnDesc("message", "Array of status messages"); rb.RequestAccessFilter(make_proton_admin_api_capability_filter()); //------------------------------------------------------------------------- - rb.DefineMethod("pandora.rtc.die", "", "", - FRT_METHOD(RPCHooksBase::rpc_die), this); - rb.MethodDesc("Exit the rtc application without cleanup"); - rb.RequestAccessFilter(make_proton_admin_api_capability_filter()); - //------------------------------------------------------------------------- rb.DefineMethod("proton.triggerFlush", "", "b", FRT_METHOD(RPCHooksBase::rpc_triggerFlush), this); rb.MethodDesc("Tell the node to trigger flush ASAP"); @@ -242,19 +237,6 @@ RPCHooksBase::getProtonStatus(FRT_RPCRequest *req) } void -RPCHooksBase::rpc_die(FRT_RPCRequest * req) -{ - LOG(debug, "RPCHooksBase::rpc_die"); - req->Detach(); - letProtonDo(makeLambdaTask([req]() { - LOG(debug, "Nap for 10ms and then quickly exit."); - req->Return(); - std::this_thread::sleep_for(10ms); - std::_Exit(0); - })); -} - -void RPCHooksBase::rpc_triggerFlush(FRT_RPCRequest *req) { LOG(info, "RPCHooksBase::rpc_triggerFlush started"); diff --git a/searchcore/src/vespa/searchcore/proton/server/rpc_hooks.h b/searchcore/src/vespa/searchcore/proton/server/rpc_hooks.h index 0b9329551f5..7f863bc1fc3 100644 --- a/searchcore/src/vespa/searchcore/proton/server/rpc_hooks.h +++ b/searchcore/src/vespa/searchcore/proton/server/rpc_hooks.h @@ -55,7 +55,6 @@ public: void rpc_GetState(FRT_RPCRequest *req); void rpc_GetProtonStatus(FRT_RPCRequest *req); - void rpc_die(FRT_RPCRequest *req); void rpc_triggerFlush(FRT_RPCRequest *req); void rpc_prepareRestart(FRT_RPCRequest *req); protected: |