diff options
Diffstat (limited to 'searchlib/src/tests/queryeval')
20 files changed, 457 insertions, 270 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt b/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt index e46ad1085e3..ef8d974151a 100644 --- a/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt +++ b/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt @@ -11,6 +11,7 @@ vespa_add_executable(searchlib_leaf_blueprints_test_app TEST leaf_blueprints_test.cpp DEPENDS searchlib + GTest::gtest ) vespa_add_test(NAME searchlib_leaf_blueprints_test_app COMMAND searchlib_leaf_blueprints_test_app || diff -u lhs.out rhs.out) vespa_add_executable(searchlib_intermediate_blueprints_test_app TEST diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index 485410e0eba..f7745da174c 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -13,7 +13,7 @@ LOG_SETUP("blueprint_test"); using namespace search::queryeval; -using namespace search::fef; +using MatchData = search::fef::MatchData; namespace { @@ -44,9 +44,7 @@ public: } SearchIterator::UP - createIntermediateSearch(MultiSearch::Children subSearches, - MatchData &md) const override - { + createIntermediateSearch(MultiSearch::Children subSearches, MatchData &md) const override { return std::make_unique<MySearch>("or", std::move(subSearches), &md, strict()); } SearchIteratorUP createFilterSearch(FilterConstraint constraint) const override { @@ -63,9 +61,7 @@ class OtherOr : public OrBlueprint private: public: SearchIterator::UP - createIntermediateSearch(MultiSearch::Children subSearches, - MatchData &md) const override - { + createIntermediateSearch(MultiSearch::Children subSearches, MatchData &md) const override { return std::make_unique<MySearch>("or", std::move(subSearches), &md, strict()); } @@ -89,9 +85,7 @@ public: } SearchIterator::UP - createIntermediateSearch(MultiSearch::Children subSearches, - MatchData &md) const override - { + createIntermediateSearch(MultiSearch::Children subSearches, MatchData &md) const override { return std::make_unique<MySearch>("and", std::move(subSearches), &md, strict()); } @@ -106,9 +100,7 @@ class OtherAnd : public AndBlueprint private: public: SearchIterator::UP - createIntermediateSearch(MultiSearch::Children subSearches, - MatchData &md) const override - { + createIntermediateSearch(MultiSearch::Children subSearches, MatchData &md) const override { return std::make_unique<MySearch>("and", std::move(subSearches), &md, strict()); } @@ -121,9 +113,7 @@ class OtherAndNot : public AndNotBlueprint { public: SearchIterator::UP - createIntermediateSearch(MultiSearch::Children subSearches, - MatchData &md) const override - { + createIntermediateSearch(MultiSearch::Children subSearches, MatchData &md) const override { return std::make_unique<MySearch>("andnot", std::move(subSearches), &md, strict()); } @@ -658,6 +648,7 @@ getExpectedBlueprint() " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" + " id: 0\n" " strict: false\n" " children: std::vector {\n" " [0]: (anonymous namespace)::MyTerm {\n" @@ -681,6 +672,7 @@ getExpectedBlueprint() " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" + " id: 0\n" " strict: false\n" " }\n" " }\n" @@ -714,6 +706,7 @@ getExpectedSlimeBlueprint() { " strict_cost: 0.0," " sourceId: 4294967295," " docid_limit: 0," + " id: 0," " strict: false," " children: {" " '[type]': 'std::vector'," @@ -742,6 +735,7 @@ getExpectedSlimeBlueprint() { " strict_cost: 0.0," " sourceId: 4294967295," " docid_limit: 0," + " id: 0," " strict: false" " }" " }" @@ -852,6 +846,30 @@ TEST("self strict resolving during sort") { } } +void check_ids(Blueprint &bp, const std::vector<uint32_t> &expect) { + std::vector<uint32_t> actual; + bp.each_node_post_order([&](auto &node){ actual.push_back(node.id()); }); + ASSERT_EQUAL(actual.size(), expect.size()); + for (size_t i = 0; i < actual.size(); ++i) { + EXPECT_EQUAL(actual[i], expect[i]); + } +} + +TEST("blueprint node enumeration") { + auto a = std::make_unique<AndBlueprint>(); + a->addChild(std::make_unique<MyLeaf>()); + a->addChild(std::make_unique<MyLeaf>()); + auto b = std::make_unique<AndBlueprint>(); + b->addChild(std::make_unique<MyLeaf>()); + b->addChild(std::make_unique<MyLeaf>()); + auto root = std::make_unique<OrBlueprint>(); + root->addChild(std::move(a)); + root->addChild(std::move(b)); + TEST_DO(check_ids(*root, {0,0,0,0,0,0,0})); + root->enumerate(1); + TEST_DO(check_ids(*root, {3,4,2,6,7,5,1})); +} + TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index bddc9f92111..490f221d1d8 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -27,8 +27,9 @@ LOG_SETUP("blueprint_test"); using namespace search::queryeval; -using namespace search::fef; using namespace search::query; +using search::fef::MatchData; +using search::queryeval::Blueprint; using search::BitVector; using BlueprintVector = std::vector<std::unique_ptr<Blueprint>>; using vespalib::Slime; @@ -575,7 +576,9 @@ void compare(const Blueprint &bp1, const Blueprint &bp2, bool expect_eq) { bp1.asSlime(SlimeInserter(a)); bp2.asSlime(SlimeInserter(b)); if (expect_eq) { - EXPECT_TRUE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook)); + if(!EXPECT_TRUE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook))) { + fprintf(stderr, "a: %s\n\nb: %s\n\n", bp1.asString().c_str(), bp2.asString().c_str()); + } } else { EXPECT_FALSE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook)); } @@ -613,7 +616,6 @@ TEST_F("test SourceBlender below AND partial optimization", SourceBlenderTestFix auto expect = std::make_unique<AndBlueprint>(); addLeafs(*expect, {1,2,3}); - expect->addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(f.selector_2), {{10, 1}, {20, 2}})); auto blender = std::make_unique<SourceBlenderBlueprint>(f.selector_1); blender->addChild(addLeafsWithSourceId(3, std::make_unique<AndBlueprint>(), {{30, 3}, {300, 3}})); @@ -621,6 +623,8 @@ TEST_F("test SourceBlender below AND partial optimization", SourceBlenderTestFix blender->addChild(addLeafsWithSourceId(1, std::make_unique<AndBlueprint>(), {{10, 1}, {100, 1}, {1000, 1}})); expect->addChild(std::move(blender)); + expect->addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(f.selector_2), {{10, 1}, {20, 2}})); + optimize_and_compare(std::move(top), std::move(expect)); } @@ -1401,7 +1405,7 @@ TEST("cost for ANDNOT") { TEST("cost for SB") { InvalidSelector sel; - verify_cost(make::SB(sel), 1.3, 1.3); // max + verify_cost(make::SB(sel), 1.3+1.0, 1.3+(1.0-0.8*0.7*0.5)); // max, non_strict+1.0, strict+est } TEST("cost for NEAR") { diff --git a/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp index cb5473babbd..ea7f3d8fdc9 100644 --- a/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp @@ -1,33 +1,20 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/testapp.h> #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/searchlib/queryeval/leaf_blueprints.h> #include <vespa/searchlib/fef/matchdata.h> - -#include <vespa/log/log.h> -LOG_SETUP("blueprint_test"); +#include <vespa/vespalib/gtest/gtest.h> using namespace search::queryeval; using namespace search::fef; -class Test : public vespalib::TestApp -{ -public: - void testEmptyBlueprint(); - void testSimpleBlueprint(); - void testFakeBlueprint(); - int Main() override; -}; - -void -Test::testEmptyBlueprint() +TEST(LeafBlueprintsTest, empty_blueprint) { MatchData::UP md(MatchData::makeTestInstance(100, 10)); EmptyBlueprint empty(FieldSpecBase(1, 11)); ASSERT_TRUE(empty.getState().numFields() == 1u); - EXPECT_EQUAL(1u, empty.getState().field(0).getFieldId()); - EXPECT_EQUAL(11u, empty.getState().field(0).getHandle()); + EXPECT_EQ(1u, empty.getState().field(0).getFieldId()); + EXPECT_EQ(11u, empty.getState().field(0).getHandle()); empty.basic_plan(true, 100); empty.fetchPostings(ExecuteInfo::FULL); @@ -36,18 +23,17 @@ Test::testEmptyBlueprint() SimpleResult res; res.search(*search); SimpleResult expect; // empty - EXPECT_EQUAL(res, expect); + EXPECT_EQ(res, expect); } -void -Test::testSimpleBlueprint() +TEST(LeafBlueprintsTest, simple_blueprint) { MatchData::UP md(MatchData::makeTestInstance(100, 10)); SimpleResult a; a.addHit(3).addHit(5).addHit(7); SimpleBlueprint simple(a); simple.tag("tag"); - EXPECT_EQUAL("tag", simple.tag()); + EXPECT_EQ("tag", simple.tag()); simple.basic_plan(true, 100); simple.fetchPostings(ExecuteInfo::FULL); SearchIterator::UP search = simple.createSearch(*md); @@ -56,11 +42,10 @@ Test::testSimpleBlueprint() res.search(*search); SimpleResult expect; expect.addHit(3).addHit(5).addHit(7); - EXPECT_EQUAL(res, expect); + EXPECT_EQ(res, expect); } -void -Test::testFakeBlueprint() +TEST(LeafBlueprintsTest, fake_blueprint) { MatchData::UP md(MatchData::makeTestInstance(100, 10)); FakeResult fake; @@ -76,36 +61,36 @@ Test::testFakeBlueprint() SearchIterator::UP search = orig.createSearch(*md); search->initFullRange(); EXPECT_TRUE(!search->seek(1u)); - EXPECT_EQUAL(10u, search->getDocId()); + EXPECT_EQ(10u, search->getDocId()); { search->unpack(10u); TermFieldMatchData &data = *md->resolveTermField(handle); - EXPECT_EQUAL(fieldId, data.getFieldId()); - EXPECT_EQUAL(10u, data.getDocId()); - EXPECT_EQUAL(10u, data.getDocId()); + EXPECT_EQ(fieldId, data.getFieldId()); + EXPECT_EQ(10u, data.getDocId()); + EXPECT_EQ(10u, data.getDocId()); FieldPositionsIterator itr = data.getIterator(); - EXPECT_EQUAL(50u, itr.getFieldLength()); - EXPECT_EQUAL(2u, itr.size()); + EXPECT_EQ(50u, itr.getFieldLength()); + EXPECT_EQ(2u, itr.size()); ASSERT_TRUE(itr.valid()); - EXPECT_EQUAL(2u, itr.getPosition()); + EXPECT_EQ(2u, itr.getPosition()); itr.next(); ASSERT_TRUE(itr.valid()); - EXPECT_EQUAL(3u, itr.getPosition()); + EXPECT_EQ(3u, itr.getPosition()); itr.next(); EXPECT_TRUE(!itr.valid()); } EXPECT_TRUE(search->seek(25)); - EXPECT_EQUAL(25u, search->getDocId()); + EXPECT_EQ(25u, search->getDocId()); { search->unpack(25u); TermFieldMatchData &data = *md->resolveTermField(handle); - EXPECT_EQUAL(fieldId, data.getFieldId()); - EXPECT_EQUAL(25u, data.getDocId()); + EXPECT_EQ(fieldId, data.getFieldId()); + EXPECT_EQ(25u, data.getDocId()); FieldPositionsIterator itr = data.getIterator(); - EXPECT_EQUAL(10u, itr.getFieldLength()); - EXPECT_EQUAL(1u, itr.size()); + EXPECT_EQ(10u, itr.getFieldLength()); + EXPECT_EQ(1u, itr.size()); ASSERT_TRUE(itr.valid()); - EXPECT_EQUAL(5u, itr.getPosition()); + EXPECT_EQ(5u, itr.getPosition()); itr.next(); EXPECT_TRUE(!itr.valid()); } @@ -113,14 +98,4 @@ Test::testFakeBlueprint() EXPECT_TRUE(search->isAtEnd()); } -int -Test::Main() -{ - TEST_INIT("leaf_blueprints_test"); - testEmptyBlueprint(); - testSimpleBlueprint(); - testFakeBlueprint(); - TEST_DONE(); -} - -TEST_APPHOOK(Test); +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/queryeval/exact_nearest_neighbor/CMakeLists.txt b/searchlib/src/tests/queryeval/exact_nearest_neighbor/CMakeLists.txt new file mode 100644 index 00000000000..538fc936056 --- /dev/null +++ b/searchlib/src/tests/queryeval/exact_nearest_neighbor/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +vespa_add_executable(searchlib_exact_nearest_neighbor_test_app TEST + SOURCES + exact_nearest_neighbor_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_exact_nearest_neighbor_test_app COMMAND searchlib_exact_nearest_neighbor_test_app) diff --git a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp b/searchlib/src/tests/queryeval/exact_nearest_neighbor/exact_nearest_neighbor_test.cpp index e4a8be121f5..404ed37eb59 100644 --- a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp +++ b/searchlib/src/tests/queryeval/exact_nearest_neighbor/exact_nearest_neighbor_test.cpp @@ -7,7 +7,7 @@ #include <vespa/searchlib/common/feature.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/queryeval/global_filter.h> -#include <vespa/searchlib/queryeval/nearest_neighbor_iterator.h> +#include <vespa/searchlib/queryeval/exact_nearest_neighbor_iterator.h> #include <vespa/searchlib/queryeval/nns_index_iterator.h> #include <vespa/searchlib/queryeval/simpleresult.h> #include <vespa/searchlib/tensor/dense_tensor_attribute.h> @@ -15,12 +15,8 @@ #include <vespa/searchlib/tensor/distance_function_factory.h> #include <vespa/searchlib/tensor/serialized_fast_value_attribute.h> #include <vespa/vespalib/gtest/gtest.h> -#include <vespa/vespalib/test/insertion_operators.h> #include <vespa/vespalib/util/stringfmt.h> -#include <vespa/log/log.h> -LOG_SETUP("nearest_neighbor_test"); - #define EPS 1.0e-6 using search::AttributeVector; @@ -129,9 +125,9 @@ SimpleResult find_matches(Fixture &env, const Value &qtv, double threshold = std NearestNeighborDistanceHeap dh(2); dh.set_distance_threshold(threshold); const GlobalFilter &filter = *env._global_filter; - auto search = NearestNeighborIterator::create(strict, tfmd, - std::make_unique<DistanceCalculator>(attr, qtv), - dh, filter); + auto search = ExactNearestNeighborIterator::create(strict, tfmd, + std::make_unique<DistanceCalculator>(attr, qtv), + dh, filter); if (strict) { return SimpleResult().searchStrict(*search, attr.getNumDocs()); } else { @@ -201,17 +197,17 @@ std::ostream& operator<<(std::ostream& os, const TestParam& param) return os; } -struct NnsIndexIteratorParameterizedTest : public ::testing::TestWithParam<TestParam> {}; +struct ExactNearestNeighborIteratorParameterizedTest : public ::testing::TestWithParam<TestParam> {}; -INSTANTIATE_TEST_SUITE_P(NnsTestSuite, - NnsIndexIteratorParameterizedTest, +INSTANTIATE_TEST_SUITE_P(ExactNearestNeighborIteratorTestSuite, + ExactNearestNeighborIteratorParameterizedTest, ::testing::Values( TestParam(denseSpecDouble, denseSpecDouble), TestParam(denseSpecFloat, denseSpecFloat), TestParam(mixed_spec, denseSpecDouble) )); -TEST_P(NnsIndexIteratorParameterizedTest, require_that_iterator_returns_expected_results) { +TEST_P(ExactNearestNeighborIteratorParameterizedTest, require_that_iterator_returns_expected_results) { auto param = GetParam(); verify_iterator_returns_expected_results(param.attribute_tensor_type_spec, param.query_tensor_type_spec); } @@ -243,7 +239,7 @@ verify_iterator_returns_filtered_results(const vespalib::string& attribute_tenso EXPECT_EQ(result, farExpect); } -TEST_P(NnsIndexIteratorParameterizedTest, require_that_iterator_returns_filtered_results) { +TEST_P(ExactNearestNeighborIteratorParameterizedTest, require_that_iterator_returns_filtered_results) { auto param = GetParam(); verify_iterator_returns_filtered_results(param.attribute_tensor_type_spec, param.query_tensor_type_spec); } @@ -256,9 +252,9 @@ std::vector<feature_t> get_rawscores(Fixture &env, const Value &qtv) { auto dff = search::tensor::make_distance_function_factory(DistanceMetric::Euclidean, qtv.cells().type); NearestNeighborDistanceHeap dh(2); auto dummy_filter = GlobalFilter::create(); - auto search = NearestNeighborIterator::create(strict, tfmd, - std::make_unique<DistanceCalculator>(attr, qtv), - dh, *dummy_filter); + auto search = ExactNearestNeighborIterator::create(strict, tfmd, + std::make_unique<DistanceCalculator>(attr, qtv), + dh, *dummy_filter); uint32_t limit = attr.getNumDocs(); uint32_t docid = 1; search->initRange(docid, limit); @@ -299,7 +295,7 @@ verify_iterator_sets_expected_rawscore(const vespalib::string& attribute_tensor_ } } -TEST_P(NnsIndexIteratorParameterizedTest, require_that_iterator_sets_expected_rawscore) { +TEST_P(ExactNearestNeighborIteratorParameterizedTest, require_that_iterator_sets_expected_rawscore) { auto param = GetParam(); verify_iterator_sets_expected_rawscore(param.attribute_tensor_type_spec, param.query_tensor_type_spec); } diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp index 16e78f77eec..9fdf1417a92 100644 --- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp +++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp @@ -356,7 +356,7 @@ DotProductAdapter::~DotProductAdapter() = default; struct ParallelWeakAndAdapter { FieldSpec field; ParallelWeakAndBlueprint blueprint; - ParallelWeakAndAdapter() : field("foo", 3, 7), blueprint(field, 100, 0.0, 1.0) {} + ParallelWeakAndAdapter() : field("foo", 3, 7), blueprint(field, 100, 0.0, 1.0, true) {} void addChild(std::unique_ptr<Blueprint> child) { auto child_field = blueprint.getNextChildField(field); auto term = std::make_unique<LeafProxy>(child_field, std::move(child)); diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index 57fddb0a819..d6008136d73 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchlib/queryeval/flow.h> +#include <vespa/searchlib/queryeval/flow_tuning.h> #include <vespa/vespalib/gtest/gtest.h> #include <vector> #include <random> @@ -349,6 +350,35 @@ TEST(FlowTest, blender_flow_cost_accumulation_is_max) { } } +double my_non_strict_cost(double est, double adjust) { + return (1.0/adjust) * flow::forced_strict_cost(FlowStats(est, 0.0, est), adjust); +} + +TEST(FlowTest, non_strict_btree_cost) { + for (double est: {0.001, 0.01, 0.1, 0.2, 0.3, 0.5, 0.75, 1.0}) { + auto prev = FlowStats(est, 1.0, est); + auto base = FlowStats(est, flow::non_strict_cost_of_strict_iterator(est, est), est); + auto opt05 = FlowStats(est, my_non_strict_cost(est, 0.5), est); + auto opt02 = FlowStats(est, my_non_strict_cost(est, 0.2), est); + auto opt01 = FlowStats(est, my_non_strict_cost(est, 0.1), est); + auto opt005 = FlowStats(est, my_non_strict_cost(est, 0.05), est); + auto opt003 = FlowStats(est, my_non_strict_cost(est, 0.03), est); + EXPECT_NEAR(strict_crossover(opt05), 0.5, 1e-6); + EXPECT_NEAR(strict_crossover(opt02), 0.2, 1e-6); + EXPECT_NEAR(strict_crossover(opt01), 0.1, 1e-6); + EXPECT_NEAR(strict_crossover(opt005), 0.05, 1e-6); + EXPECT_NEAR(strict_crossover(opt003), 0.03, 1e-6); + fprintf(stderr, "est: %5.3f\n", est); + fprintf(stderr, " prev crossover: %6.4f (cost: %6.4f)\n", strict_crossover(prev), prev.cost); + fprintf(stderr, " base crossover: %6.4f (cost: %6.4f)\n", strict_crossover(base), base.cost); + fprintf(stderr, " 0.5 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt05), opt05.cost); + fprintf(stderr, " 0.2 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt02), opt02.cost); + fprintf(stderr, " 0.1 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt01), opt01.cost); + fprintf(stderr, " 0.05 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt005), opt005.cost); + fprintf(stderr, " 0.03 crossover: %6.4f (cost: %6.4f)\n", strict_crossover(opt003), opt003.cost); + } +} + TEST(FlowTest, optimal_and_flow) { for (size_t i = 0; i < loop_cnt; ++i) { for (bool strict: {false, true}) { diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp index 8591ec1415d..51177850155 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp @@ -2,14 +2,14 @@ #include "intermediate_blueprint_factory.h" #include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> #include <iomanip> #include <sstream> namespace search::queryeval::test { -template <typename BlueprintType> char -IntermediateBlueprintFactory<BlueprintType>::child_name(void* blueprint) const +IntermediateBlueprintFactory::child_name(void* blueprint) const { auto itr = _child_names.find(blueprint); if (itr != _child_names.end()) { @@ -18,35 +18,33 @@ IntermediateBlueprintFactory<BlueprintType>::child_name(void* blueprint) const return '?'; } -template <typename BlueprintType> -IntermediateBlueprintFactory<BlueprintType>::IntermediateBlueprintFactory(vespalib::stringref name) +IntermediateBlueprintFactory::IntermediateBlueprintFactory(vespalib::stringref name) : _name(name), _children(), _child_names() { } -template <typename BlueprintType> -IntermediateBlueprintFactory<BlueprintType>::~IntermediateBlueprintFactory() = default; +IntermediateBlueprintFactory::~IntermediateBlueprintFactory() = default; -template <typename BlueprintType> std::unique_ptr<Blueprint> -IntermediateBlueprintFactory<BlueprintType>::make_blueprint() +IntermediateBlueprintFactory::make_blueprint() { - auto res = std::make_unique<BlueprintType>(); + auto res = make_self(); _child_names.clear(); char name = 'A'; + uint32_t source = 1; for (const auto& factory : _children) { auto child = factory->make_blueprint(); _child_names[child.get()] = name++; + child->setSourceId(source++); // ignored by non-source-blender blueprints res->addChild(std::move(child)); } return res; } -template <typename BlueprintType> vespalib::string -IntermediateBlueprintFactory<BlueprintType>::get_name(Blueprint& blueprint) const +IntermediateBlueprintFactory::get_name(Blueprint& blueprint) const { auto* intermediate = blueprint.asIntermediate(); if (intermediate != nullptr) { @@ -69,11 +67,29 @@ IntermediateBlueprintFactory<BlueprintType>::get_name(Blueprint& blueprint) cons return get_class_name(blueprint); } -template class IntermediateBlueprintFactory<AndBlueprint>; +//----------------------------------------------------------------------------- AndBlueprintFactory::AndBlueprintFactory() - : IntermediateBlueprintFactory<AndBlueprint>("AND") + : IntermediateBlueprintFactory("AND") {} +std::unique_ptr<IntermediateBlueprint> +AndBlueprintFactory::make_self() const +{ + return std::make_unique<AndBlueprint>(); +} + +//----------------------------------------------------------------------------- + +SourceBlenderBlueprintFactory::SourceBlenderBlueprintFactory() + : IntermediateBlueprintFactory("SB"), + _selector(250, "my_source_blender", 1000) +{} + +std::unique_ptr<IntermediateBlueprint> +SourceBlenderBlueprintFactory::make_self() const +{ + return std::make_unique<SourceBlenderBlueprint>(_selector); } +} diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h index 6f7fe4f9ee7..c791d866612 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h +++ b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h @@ -4,6 +4,7 @@ #include "benchmark_blueprint_factory.h" #include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/attribute/fixedsourceselector.h> #include <unordered_map> namespace search::queryeval::test { @@ -11,7 +12,6 @@ namespace search::queryeval::test { /** * Factory that creates an IntermediateBlueprint (of the given type) with children created by the given factories. */ -template <typename BlueprintType> class IntermediateBlueprintFactory : public BenchmarkBlueprintFactory { private: vespalib::string _name; @@ -19,7 +19,8 @@ private: std::unordered_map<void*, char> _child_names; char child_name(void* blueprint) const; - +protected: + virtual std::unique_ptr<IntermediateBlueprint> make_self() const = 0; public: IntermediateBlueprintFactory(vespalib::stringref name); ~IntermediateBlueprintFactory(); @@ -30,10 +31,26 @@ public: vespalib::string get_name(Blueprint& blueprint) const override; }; -class AndBlueprintFactory : public IntermediateBlueprintFactory<AndBlueprint> { +class AndBlueprintFactory : public IntermediateBlueprintFactory { +protected: + std::unique_ptr<IntermediateBlueprint> make_self() const override; public: AndBlueprintFactory(); }; -} +class SourceBlenderBlueprintFactory : public IntermediateBlueprintFactory +{ +private: + FixedSourceSelector _selector; +protected: + std::unique_ptr<IntermediateBlueprint> make_self() const override; +public: + SourceBlenderBlueprintFactory(); + void init_selector(auto f, uint32_t limit) { + for (uint32_t i = 0; i < limit; ++i) { + _selector.setSource(i, f(i)); + } + } +}; +} diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index f4a1ade8a66..e74fefac70e 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -13,19 +13,31 @@ #include <vector> using namespace search::attribute; -using namespace search::fef; using namespace search::queryeval::test; using namespace search::queryeval; using namespace search; using namespace vespalib; using search::index::Schema; +using search::fef::MatchData; using vespalib::make_string_short::fmt; const vespalib::string field_name = "myfield"; double budget_sec = 1.0; +double estimate_actual_cost(Blueprint &bp, InFlow in_flow) { + if (in_flow.strict()) { + assert(bp.strict()); + return bp.strict_cost(); + } else if (bp.strict()) { + auto stats = FlowStats::from(flow::DefaultAdapter(), &bp); + return flow::forced_strict_cost(stats, in_flow.rate()); + } else { + return bp.cost() * in_flow.rate(); + } +} + enum class PlanningAlgo { Order, Estimate, @@ -236,7 +248,8 @@ strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, Planning timer.after(); } FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost()); - return {timer.min_time() * 1000.0, hits + 1, hits, flow, flow.strict_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)}; + double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(true)); + return {timer.min_time() * 1000.0, hits + 1, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)}; } template <bool do_unpack> @@ -269,7 +282,7 @@ non_strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, doub timer.after(); } FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost()); - double actual_cost = flow.cost * filter_hit_ratio; + double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(filter_hit_ratio)); return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)}; } @@ -291,10 +304,6 @@ benchmark_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, bool } } - - - - //----------------------------------------------------------------------------- double est_forced_strict_cost(double estimate, double strict_cost, double rate) { @@ -317,26 +326,26 @@ struct Sample { } }; -double find_crossover(const char *type, const auto &calculate_at, double delta) { +double find_crossover(const char *type, const char *a, const char *b, const auto &calculate_at, double delta) { double min = delta; double max = 1.0; fprintf(stderr, "looking for %s crossover in the range [%g, %g]...\n", type, min, max); auto at_min = calculate_at(min); auto at_max = calculate_at(max); - fprintf(stderr, " before: [%s, %s], after: [%s, %s]\n", - at_min.first.str().c_str(), at_max.first.str().c_str(), - at_min.second.str().c_str(), at_max.second.str().c_str()); - auto best_before = [](auto values) { return (values.first < values.second); }; - if (best_before(at_min) == best_before(at_max)) { + fprintf(stderr, " %s: [%s, %s], %s: [%s, %s]\n", + a, at_min.first.str().c_str(), at_max.first.str().c_str(), + b, at_min.second.str().c_str(), at_max.second.str().c_str()); + auto a_best = [](auto values) { return (values.first < values.second); }; + if (a_best(at_min) == a_best(at_max)) { fprintf(stderr, " NO %s CROSSOVER FOUND\n", type); return 0.0; } while (max > (min + delta)) { double x = (min + max) / 2.0; auto at_x = calculate_at(x); - fprintf(stderr, " best@%g: %s (%s vs %s)\n", x, best_before(at_x) ? "before" : "after", + fprintf(stderr, " best@%g: %s (%s vs %s)\n", x, a_best(at_x) ? a : b, at_x.first.str().c_str(), at_x.second.str().c_str()); - if (best_before(at_min) == best_before(at_x)) { + if (a_best(at_min) == a_best(at_x)) { min = x; at_min = at_x; } else { @@ -409,11 +418,11 @@ void analyze_crossover(BenchmarkBlueprintFactory &fixed, std::function<std::uniq std::vector<double> results; std::vector<const char *> names; names.push_back("time crossover"); - results.push_back(find_crossover("TIME", combine(estimate_AND_time_ms), delta)); + results.push_back(find_crossover("TIME", "before", "after", combine(estimate_AND_time_ms), delta)); names.push_back("cost crossover"); - results.push_back(find_crossover("COST", combine(calculate_AND_cost), delta)); + results.push_back(find_crossover("COST", "before", "after", combine(calculate_AND_cost), delta)); names.push_back("abs_est crossover"); - results.push_back(find_crossover("ABS_EST", combine(first_abs_est), delta)); + results.push_back(find_crossover("ABS_EST", "before", "after", combine(first_abs_est), delta)); sample_at("COST", combine(calculate_AND_cost), results, names); sample_at("TIME", combine(estimate_AND_time_ms), results, names); } @@ -429,21 +438,37 @@ to_string(bool val) void print_result_header() { - std::cout << "| chn | f_ratio | o_ratio | a_ratio | f.est | f.cost | f.scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl; + std::cout << "| in_flow | chn | o_ratio | a_ratio | f.est | f.cost | f.act_cost | f.scost | f.act_scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl; +} + +std::ostream &operator<<(std::ostream &dst, InFlow in_flow) { + auto old_w = dst.width(); + auto old_p = dst.precision(); + dst << std::setw(7) << std::setprecision(5); + if (in_flow.strict()) { + dst << " STRICT"; + } else { + dst << in_flow.rate(); + } + dst << std::setw(old_w); + dst << std::setprecision(old_p); + return dst; } void -print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, double filter_hit_ratio, uint32_t num_docs) +print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, InFlow in_flow, uint32_t num_docs) { std::cout << std::fixed << std::setprecision(5) - << "| " << std::setw(5) << children - << " | " << std::setw(7) << filter_hit_ratio + << "| " << in_flow + << " | " << std::setw(5) << children << " | " << std::setw(7) << op_hit_ratio << " | " << std::setw(7) << ((double) res.hits / (double) num_docs) << " | " << std::setw(6) << res.flow.estimate << std::setprecision(4) << " | " << std::setw(9) << res.flow.cost + << " | " << std::setw(10) << (res.flow.cost * in_flow.rate()) << " | " << std::setw(7) << res.flow.strict_cost + << " | " << std::setw(11) << (in_flow.strict() ? res.flow.strict_cost : flow::forced_strict_cost(res.flow, in_flow.rate())) << " | " << std::setw(8) << res.hits << " | " << std::setw(8) << res.seeks << std::setprecision(3) @@ -640,7 +665,7 @@ run_benchmark_case(const BenchmarkCaseSetup& setup) if (filter_hit_ratio * setup.filter_crossover_factor <= op_hit_ratio) { auto res = benchmark_search(*factory, setup.num_docs + 1, setup.bcase.strict_context, setup.bcase.force_strict, setup.bcase.unpack_iterator, filter_hit_ratio, PlanningAlgo::Cost); - print_result(res, children, op_hit_ratio, filter_hit_ratio, setup.num_docs); + print_result(res, children, op_hit_ratio, InFlow(setup.bcase.strict_context, filter_hit_ratio), setup.num_docs); result.add(res); } } @@ -681,23 +706,25 @@ run_benchmarks(const BenchmarkSetup& setup) void print_intermediate_blueprint_result_header(size_t children) { + std::cout << "| in_flow"; // This matches the naming scheme in IntermediateBlueprintFactory. char name = 'A'; for (size_t i = 0; i < children; ++i) { - std::cout << "| " << name++ << ".ratio "; + std::cout << " | " << name++ << ".ratio"; } - std::cout << "| flow.cost | flow.scost | flow.est | ratio | hits | seeks | ms_per_cost | time_ms | algo | blueprint |" << std::endl; + std::cout << " | flow.cost | flow.scost | flow.est | ratio | hits | seeks | ms_per_cost | time_ms | algo | blueprint |" << std::endl; } void -print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, uint32_t num_docs) +print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, InFlow in_flow, uint32_t num_docs) { - std::cout << std::fixed << std::setprecision(5); + std::cout << std::fixed << std::setprecision(5) + << "| " << in_flow; for (auto ratio : children_ratios) { - std::cout << "| " << std::setw(7) << ratio << " "; + std::cout << " | " << std::setw(7) << ratio; } std::cout << std::setprecision(5) - << "| " << std::setw(10) << res.flow.cost + << " | " << std::setw(10) << res.flow.cost << " | " << std::setw(10) << res.flow.strict_cost << " | " << std::setw(8) << res.flow.estimate << " | " << std::setw(7) << ((double) res.hits / (double) num_docs) @@ -745,9 +772,8 @@ struct BlueprintFactorySetup { BlueprintFactorySetup::~BlueprintFactorySetup() = default; -template <typename IntermediateBlueprintFactoryType> void -run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs) +run_intermediate_blueprint_benchmark(auto factory_factory, std::vector<InFlow> in_flows, const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs) { print_intermediate_blueprint_result_header(2); double max_speedup = 0.0; @@ -755,26 +781,28 @@ run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const Bluep for (double b_hit_ratio: b.op_hit_ratios) { auto b_factory = b.make_factory_shared(num_docs, b_hit_ratio); for (double a_hit_ratio : a.op_hit_ratios) { - IntermediateBlueprintFactoryType factory; - factory.add_child(a.make_factory(num_docs, a_hit_ratio)); - factory.add_child(b_factory); + auto factory = factory_factory(); + factory->add_child(a.make_factory(num_docs, a_hit_ratio)); + factory->add_child(b_factory); double time_ms_esti = 0.0; - for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost, - PlanningAlgo::CostForceStrict}) { - auto res = benchmark_search(factory, num_docs + 1, true, false, false, 1.0, algo); - print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, num_docs); - if (algo == PlanningAlgo::Estimate) { - time_ms_esti = res.time_ms; - } - if (algo == PlanningAlgo::CostForceStrict) { - double speedup = time_ms_esti / res.time_ms; - if (speedup > max_speedup) { - max_speedup = speedup; + for (InFlow in_flow: in_flows) { + for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost, + PlanningAlgo::CostForceStrict}) { + auto res = benchmark_search(*factory, num_docs + 1, in_flow.strict(), false, false, in_flow.rate(), algo); + print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, in_flow, num_docs); + if (algo == PlanningAlgo::Estimate) { + time_ms_esti = res.time_ms; } - if (speedup < min_speedup) { - min_speedup = speedup; + if (algo == PlanningAlgo::CostForceStrict) { + double speedup = time_ms_esti / res.time_ms; + if (speedup > max_speedup) { + max_speedup = speedup; + } + if (speedup < min_speedup) { + min_speedup = speedup; + } + std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl; } - std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl; } } } @@ -786,7 +814,19 @@ void run_and_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs) { std::cout << "AND[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl; - run_intermediate_blueprint_benchmark<AndBlueprintFactory>(a, b, num_docs); + run_intermediate_blueprint_benchmark([](){ return std::make_unique<AndBlueprintFactory>(); }, {true}, a, b, num_docs); +} + +void +run_source_blender_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs) +{ + std::cout << "SB[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl; + auto factory_factory = [&](){ + auto factory = std::make_unique<SourceBlenderBlueprintFactory>(); + factory->init_selector([](uint32_t i){ return (i%10 == 0) ? 1 : 2; }, num_docs + 1); + return factory; + }; + run_intermediate_blueprint_benchmark(factory_factory, {true, 0.75, 0.5, 0.25, 0.1, 0.01, 0.001}, a, b, num_docs); } //------------------------------------------------------------------------------------- @@ -970,16 +1010,40 @@ TEST(IteratorBenchmark, analyze_AND_bitvector_vs_IN) } } +TEST(IteratorBenchmark, analyze_strict_SOURCEBLENDER_memory_and_disk) +{ + for (double small_ratio: {0.001, 0.005, 0.01, 0.05}) { + run_source_blender_benchmark({str_fs, QueryOperator::Term, {small_ratio}}, + {str_index, QueryOperator::Term, {small_ratio * 10}}, + num_docs); + } +} + TEST(IteratorBenchmark, analyze_OR_non_strict_fs) { for (auto or_hit_ratio : {0.01, 0.1, 0.5}) { BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {false}, {or_hit_ratio}, {2, 4, 6, 8, 10, 100, 1000}); + //setup.force_strict = true; setup.filter_hit_ratios = gen_ratios(or_hit_ratio, 10.0, 13); run_benchmarks(setup); } } +TEST(IteratorBenchmark, analyze_OR_non_strict_fs_child_est_adjust) +{ + for (auto or_hit_ratio : {0.01, 0.1, 0.5}) { + for (uint32_t children : {2, 4, 6, 8, 10, 100, 1000}) { + double child_est = or_hit_ratio / children; + BenchmarkSetup setup(num_docs, {int32_fs}, {QueryOperator::Or}, {false}, {or_hit_ratio}, + {children}); + //setup.force_strict = true; + setup.filter_hit_ratios = gen_ratios(child_est, 10.0, 13); + run_benchmarks(setup); + } + } +} + TEST(IteratorBenchmark, analyze_OR_non_strict_non_fs) { BenchmarkSetup setup(num_docs, {int32}, {QueryOperator::Or}, {false}, {0.1}, {2, 4, 6, 8, 10}); @@ -1008,6 +1072,22 @@ TEST(IteratorBenchmark, analyze_btree_vs_bitvector_iterators_strict) run_benchmarks(setup); } +TEST(IteratorBenchmark, btree_vs_array_nonstrict_crossover) { + for (double hit_ratio: { 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, + 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, + 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, + 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0}) + { + auto btree = make_blueprint_factory(int32_array_fs, QueryOperator::Term, num_docs, 0, hit_ratio, 1, false); + auto array = make_blueprint_factory( int32_array, QueryOperator::Term, num_docs, 0, hit_ratio, 1, false); + auto time_ms = [&](auto &bpf, double in_flow) { + return Sample(benchmark_search(bpf, num_docs + 1, false, false, false, in_flow, PlanningAlgo::Cost).time_ms); + }; + auto calculate_at = [&](double in_flow) { return std::make_pair(time_ms(*btree, in_flow), time_ms(*array, in_flow)); }; + fprintf(stderr, "btree/array crossover@%5.3f: %8.6f\n", hit_ratio, find_crossover("TIME", "btree", "array", calculate_at, 0.0001)); + } +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); int res = RUN_ALL_TESTS(); diff --git a/searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt b/searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt deleted file mode 100644 index b68f7f93c18..00000000000 --- a/searchlib/src/tests/queryeval/nearest_neighbor/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -vespa_add_executable(searchlib_nearest_neighbor_test_app TEST - SOURCES - nearest_neighbor_test.cpp - DEPENDS - searchlib - GTest::GTest -) -vespa_add_test(NAME searchlib_nearest_neighbor_test_app COMMAND searchlib_nearest_neighbor_test_app) diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp index 2bd560637d2..1cec376b01c 100644 --- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -68,8 +68,8 @@ struct TestHeap : public WeakAndHeap { ScoresHistory history; - TestHeap(uint32_t scoresToTrack_) : WeakAndHeap(scoresToTrack_), history() {} - virtual void adjust(score_t *begin, score_t *end) override { + explicit TestHeap(uint32_t scoresToTrack_) : WeakAndHeap(scoresToTrack_), history() {} + void adjust(score_t *begin, score_t *end) override { Scores scores; for (score_t *itr = begin; itr != end; ++itr) { scores.add(*itr); @@ -87,8 +87,8 @@ struct WandTestSpec : public WandSpec TermFieldMatchData rootMatchData; MatchParams matchParams; - WandTestSpec(uint32_t scoresToTrack, uint32_t scoresAdjustFrequency = 1, - score_t scoreThreshold = 0, double thresholdBoostFactor = 1); + explicit WandTestSpec(uint32_t scoresToTrack, uint32_t scoresAdjustFrequency = 1, + score_t scoreThreshold = 0, double thresholdBoostFactor = 1); ~WandTestSpec(); SearchIterator::UP create() { MatchData::UP childrenMatchData = createMatchData(); @@ -114,7 +114,7 @@ WandTestSpec<HeapType>::WandTestSpec(uint32_t scoresToTrack, uint32_t scoresAdju {} template <typename HeapType> -WandTestSpec<HeapType>::~WandTestSpec() {} +WandTestSpec<HeapType>::~WandTestSpec() = default; using WandSpecWithTestHeap = WandTestSpec<TestHeap>; using WandSpecWithRealHeap = WandTestSpec<SharedWeakAndPriorityQueue>; @@ -137,8 +137,8 @@ SimpleResult asSimpleResult(const FakeResult &result) { SimpleResult retval; - for (size_t i = 0; i < result.inspect().size(); ++i) { - retval.addHit(result.inspect()[i].docId); + for (const auto & doc : result.inspect()) { + retval.addHit(doc.docId); } return retval; } @@ -152,26 +152,26 @@ struct WandBlueprintSpec FakeRequestContext requestContext; WandBlueprintSpec &add(const std::string &token, int32_t weight) { - tokens.push_back(std::make_pair(token, weight)); + tokens.emplace_back(token, weight); return *this; } Node::UP createNode(uint32_t scoresToTrack = 100, score_t scoreThreshold = 0, double thresholdBoostFactor = 1) const { - SimpleWandTerm *node = new SimpleWandTerm(tokens.size(), "view", 0, Weight(0), - scoresToTrack, scoreThreshold, thresholdBoostFactor); - for (size_t i = 0; i < tokens.size(); ++i) { - node->addTerm(tokens[i].first, Weight(tokens[i].second)); + auto node = std::make_unique<SimpleWandTerm>(tokens.size(), "view", 0, Weight(0), + scoresToTrack, scoreThreshold, thresholdBoostFactor); + for (const auto & token : tokens) { + node->addTerm(token.first, Weight(token.second)); } - return Node::UP(node); + return node; } Blueprint::UP blueprint(Searchable &searchable, const std::string &field, const search::query::Node &term) const { FieldSpecList fields; fields.add(FieldSpec(field, fieldId, handle)); Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, term); - EXPECT_TRUE(dynamic_cast<ParallelWeakAndBlueprint*>(bp.get()) != 0); + EXPECT_TRUE(dynamic_cast<ParallelWeakAndBlueprint*>(bp.get()) != nullptr); return bp; } @@ -182,7 +182,7 @@ struct WandBlueprintSpec bp->basic_plan(true, docIdLimit); bp->fetchPostings(ExecuteInfo::FULL); SearchIterator::UP sb = bp->createSearch(*md); - EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != 0); + EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != nullptr); return sb; } @@ -197,7 +197,7 @@ struct WandBlueprintSpec bp->basic_plan(true, docIdLimit); bp->fetchPostings(ExecuteInfo::FULL); SearchIterator::UP sb = bp->createSearch(*md); - EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != 0); + EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != nullptr); return doSearch(*sb, *md->resolveTermField(handle)); } }; @@ -258,7 +258,7 @@ struct AlgoSameScoreFixture : public FixtureBase struct AlgoScoreThresholdFixture : public FixtureBase { - AlgoScoreThresholdFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { + explicit AlgoScoreThresholdFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { spec.leaf(LeafSpec("A", 1).doc(1, 10).doc(2, 30)); spec.leaf(LeafSpec("B", 2).doc(1, 20).doc(3, 40)); prepare(); @@ -267,7 +267,7 @@ struct AlgoScoreThresholdFixture : public FixtureBase struct AlgoLargeScoresFixture : public FixtureBase { - AlgoLargeScoresFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { + explicit AlgoLargeScoresFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { spec.leaf(LeafSpec("A", 60000).doc(1, 60000).doc(2, 70000)); spec.leaf(LeafSpec("B", 70000).doc(1, 80000).doc(3, 90000)); prepare(); @@ -276,7 +276,7 @@ struct AlgoLargeScoresFixture : public FixtureBase struct AlgoExhaustPastFixture : public FixtureBase { - AlgoExhaustPastFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { + explicit AlgoExhaustPastFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { spec.leaf(LeafSpec("A", 1).doc(1, 20).doc(3, 40).doc(5, 10)); spec.leaf(LeafSpec("B", 1).doc(5, 10)); spec.leaf(LeafSpec("C", 1).doc(5, 10)); @@ -449,11 +449,11 @@ struct BlueprintFixtureBase }; BlueprintFixtureBase::BlueprintFixtureBase() : spec(), searchable() {} -BlueprintFixtureBase::~BlueprintFixtureBase() {} +BlueprintFixtureBase::~BlueprintFixtureBase() = default; struct BlueprintHitsFixture : public BlueprintFixtureBase { - FakeResult createResult(size_t hits) { + static FakeResult createResult(size_t hits) { FakeResult result; for (size_t i = 0; i < hits; ++i) { result.doc(i + 1); @@ -479,7 +479,7 @@ struct BlueprintHitsFixture : public BlueprintFixtureBase struct ThresholdBoostFixture : public FixtureBase { FakeResult result; - ThresholdBoostFixture(double boost) : FixtureBase(1, 1, 800, boost) { + explicit ThresholdBoostFixture(double boost) : FixtureBase(1, 1, 800, boost) { spec.leaf(LeafSpec("A").doc(1, 10)); spec.leaf(LeafSpec("B").doc(2, 20)); spec.leaf(LeafSpec("C").doc(3, 30)); @@ -532,7 +532,7 @@ TEST(ParallelWeakAndTest, require_that_blueprint_picks_up_docid_limit) BlueprintFixture f; Node::UP term = f.spec.createNode(57, 67, 77.7); Blueprint::UP bp = f.blueprint(*term); - const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get()); + const auto * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get()); EXPECT_EQ(0u, pbp->get_docid_limit()); bp->setDocIdLimit(1000); EXPECT_EQ(1000u, pbp->get_docid_limit()); @@ -543,7 +543,7 @@ TEST(ParallelWeakAndTest, require_that_scores_to_track_score_threshold_and_thres BlueprintFixture f; Node::UP term = f.spec.createNode(57, 67, 77.7); Blueprint::UP bp = f.blueprint(*term); - const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get()); + const auto * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get()); EXPECT_EQ(57u, pbp->getScores().getScoresToTrack()); EXPECT_EQ(67u, pbp->getScoreThreshold()); EXPECT_EQ(77.7, pbp->getThresholdBoostFactor()); @@ -635,6 +635,7 @@ TEST(ParallelWeakAndTest, require_that_asString_on_blueprint_works) " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" + " id: 0\n" " strict: false\n" " _weights: std::vector {\n" " [0]: 5\n" @@ -661,6 +662,7 @@ TEST(ParallelWeakAndTest, require_that_asString_on_blueprint_works) " strict_cost: 0\n" " sourceId: 4294967295\n" " docid_limit: 0\n" + " id: 0\n" " strict: false\n" " }\n" " }\n" @@ -708,7 +710,7 @@ SearchIterator::UP create_wand(bool use_dww, class Verifier : public search::test::DwwIteratorChildrenVerifier { public: - Verifier(bool use_dww) : _use_dww(use_dww) { } + explicit Verifier(bool use_dww) : _use_dww(use_dww) { } private: SearchIterator::UP create(bool strict) const override { MatchParams match_params(_dummy_heap, _dummy_heap.getMinScore(), 1.0, 1); diff --git a/searchlib/src/tests/queryeval/sourceblender/sourceblender_test.cpp b/searchlib/src/tests/queryeval/sourceblender/sourceblender_test.cpp index b84cb02a357..b2a1f6a645a 100644 --- a/searchlib/src/tests/queryeval/sourceblender/sourceblender_test.cpp +++ b/searchlib/src/tests/queryeval/sourceblender/sourceblender_test.cpp @@ -7,15 +7,14 @@ #include <vespa/searchlib/queryeval/leaf_blueprints.h> #define ENABLE_GTEST_MIGRATION #include <vespa/searchlib/test/searchiteratorverifier.h> -#include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/searchlib/attribute/fixedsourceselector.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/vespalib/gtest/gtest.h> using namespace search::queryeval; -using namespace search::fef; using namespace search; using std::make_unique; +using search::fef::MatchData; /** * Proxy search used to verify unpack pattern @@ -27,24 +26,24 @@ private: SimpleResult _unpacked; protected: - virtual void doSeek(uint32_t docid) override { + void doSeek(uint32_t docid) override { _search->seek(docid); setDocId(_search->getDocId()); } - virtual void doUnpack(uint32_t docid) override { + void doUnpack(uint32_t docid) override { _unpacked.addHit(docid); _search->unpack(docid); } public: - UnpackChecker(SearchIterator *search) : _search(search), _unpacked() {} + explicit UnpackChecker(SearchIterator *search) : _search(search), _unpacked() {} const SimpleResult &getUnpacked() const { return _unpacked; } }; class MySelector : public search::FixedSourceSelector { public: - MySelector(int defaultSource) : search::FixedSourceSelector(defaultSource, "fs") { } + explicit MySelector(int defaultSource) : search::FixedSourceSelector(defaultSource, "fs") { } MySelector & set(Source s, uint32_t docId) { setSource(s, docId); return *this; @@ -65,12 +64,12 @@ TEST(SourceBlenderTest, test_strictness) a.addHit(2).addHit(5).addHit(6).addHit(8); b.addHit(3).addHit(5).addHit(6).addHit(7); - MySelector *sel = new MySelector(5); + auto *sel = new MySelector(5); sel->set(2, 1).set(3, 2).set(5, 2).set(7, 1); - SourceBlenderBlueprint *blend_b = new SourceBlenderBlueprint(*sel); - Blueprint::UP a_b(new SimpleBlueprint(a)); - Blueprint::UP b_b(new SimpleBlueprint(b)); + auto *blend_b = new SourceBlenderBlueprint(*sel); + auto a_b = std::make_unique<SimpleBlueprint>(a); + auto b_b = std::make_unique<SimpleBlueprint>(b); a_b->setSourceId(1); b_b->setSourceId(2); blend_b->addChild(std::move(a_b)); @@ -111,16 +110,16 @@ TEST(SourceBlenderTest, test_full_sourceblender_search) c.addHit(4).addHit(11).addHit(21).addHit(32); // these are all handed over to the blender - UnpackChecker *ua = new UnpackChecker(new SimpleSearch(a)); - UnpackChecker *ub = new UnpackChecker(new SimpleSearch(b)); - UnpackChecker *uc = new UnpackChecker(new SimpleSearch(c)); + auto *ua = new UnpackChecker(new SimpleSearch(a)); + auto *ub = new UnpackChecker(new SimpleSearch(b)); + auto *uc = new UnpackChecker(new SimpleSearch(c)); auto sel = make_unique<MySelector>(5); sel->set(2, 1).set(3, 2).set(11, 2).set(21, 3).set(34, 1); SourceBlenderSearch::Children abc; - abc.push_back(SourceBlenderSearch::Child(ua, 1)); - abc.push_back(SourceBlenderSearch::Child(ub, 2)); - abc.push_back(SourceBlenderSearch::Child(uc, 3)); + abc.emplace_back(ua, 1); + abc.emplace_back(ub, 2); + abc.emplace_back(uc, 3); SearchIterator::UP blend(SourceBlenderSearch::create(sel->createIterator(), abc, true)); SimpleResult result; @@ -149,7 +148,7 @@ using search::test::SearchIteratorVerifier; class Verifier : public SearchIteratorVerifier { public: Verifier(); - ~Verifier(); + ~Verifier() override; SearchIterator::UP create(bool strict) const override { return SearchIterator::UP(SourceBlenderSearch::create(_selector.createIterator(), createChildren(strict), @@ -178,7 +177,7 @@ Verifier::Verifier() : _indexes[indexId].push_back(docId); } } -Verifier::~Verifier() {} +Verifier::~Verifier() = default; TEST(SourceBlenderTest, test_that_source_blender_iterator_adheres_to_search_terator_requirements) { diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp index 94ecd8fa539..a7516226daf 100644 --- a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp @@ -13,6 +13,7 @@ #include <vespa/searchlib/queryeval/simpleresult.h> #include <vespa/searchlib/queryeval/wand/weak_and_search.h> #include <vespa/searchlib/queryeval/weighted_set_term_search.h> +#include <vespa/searchlib/queryeval/wand/weak_and_heap.h> #include <vespa/vespalib/util/box.h> #include <vespa/vespalib/util/stringfmt.h> @@ -135,7 +136,7 @@ constexpr vespalib::duration max_time = 1000s; //----------------------------------------------------------------------------- struct ChildFactory { - ChildFactory() {} + ChildFactory() = default; virtual std::string name() const = 0; virtual SearchIterator::UP createChild(uint32_t idx, uint32_t limit) const = 0; virtual ~ChildFactory() = default; @@ -190,8 +191,9 @@ struct ModSearchFactory : ChildFactory { //----------------------------------------------------------------------------- struct VespaWandFactory : SparseVectorFactory { + mutable SharedWeakAndPriorityQueue _scores; uint32_t n; - explicit VespaWandFactory(uint32_t n_in) noexcept : n(n_in) {} + explicit VespaWandFactory(uint32_t n_in) : _scores(n_in), n(n_in) {} std::string name() const override { return vespalib::make_string("VespaWand(%u)", n); } @@ -200,7 +202,7 @@ struct VespaWandFactory : SparseVectorFactory { for (size_t i = 0; i < childCnt; ++i) { terms.emplace_back(childFactory.createChild(i, limit), default_weight, limit / (i + 1)); } - return WeakAndSearch::create(terms, n, true); + return WeakAndSearch::create(terms, wand::MatchParams(_scores), n, true); } }; diff --git a/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp index 310c6d628e3..4d84dabf834 100644 --- a/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp +++ b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp @@ -465,7 +465,7 @@ TEST(TermwiseEvalTest, require_that_termwise_evaluation_can_be_multi_level_but_n child->addChild(UP(new MyBlueprint({3}, true, 3))); my_or.addChild(std::move(child)); for (bool strict: {true, false}) { - my_or.basic_plan(strict, 100); + my_or.null_plan(strict, 100); EXPECT_EQ(my_or.createSearch(*md)->asString(), make_termwise(OR({ TERM({1}, strict), ORz({ TERM({2}, strict), TERM({3}, strict) }, strict) }, diff --git a/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp index 5e056eb6c0e..457f7133dc1 100644 --- a/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp +++ b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp @@ -29,20 +29,20 @@ struct Stats { size_t unpackCnt; size_t skippedDocs; size_t skippedHits; - Stats() : hitCnt(0), seekCnt(0), unpackCnt(0), + Stats() noexcept : hitCnt(0), seekCnt(0), unpackCnt(0), skippedDocs(0), skippedHits(0) {} - void hit() { + void hit() noexcept { ++hitCnt; } - void seek(size_t docs, size_t hits) { + void seek(size_t docs, size_t hits) noexcept { ++seekCnt; skippedDocs += docs; skippedHits += hits; } - void unpack() { + void unpack() noexcept { ++unpackCnt; } - void print() { + void print() const { fprintf(stderr, "Stats: hits=%zu, seeks=%zu, unpacks=%zu, skippedDocs=%zu, skippedHits=%zu\n", hitCnt, seekCnt, unpackCnt, skippedDocs, skippedHits); } @@ -77,7 +77,7 @@ struct ModSearch : SearchIterator { } } void doUnpack(uint32_t docid) override { - if (tfmd != NULL) { + if (tfmd != nullptr) { tfmd->reset(docid); search::fef::TermFieldMatchDataPosition pos; pos.setElementWeight(info.getMaxWeight()); @@ -96,40 +96,52 @@ ModSearch::~ModSearch() = default; struct WandFactory { virtual std::string name() const = 0; virtual SearchIterator::UP create(const wand::Terms &terms) = 0; - virtual ~WandFactory() {} + virtual ~WandFactory() = default; }; struct VespaWandFactory : WandFactory { + mutable SharedWeakAndPriorityQueue _scores; uint32_t n; - VespaWandFactory(uint32_t n_in) : n(n_in) {} + explicit VespaWandFactory(uint32_t n_in) noexcept + : _scores(n_in), + n(n_in) + {} ~VespaWandFactory() override; - virtual std::string name() const override { return make_string("VESPA WAND (n=%u)", n); } - virtual SearchIterator::UP create(const wand::Terms &terms) override { - return SearchIterator::UP(WeakAndSearch::create(terms, n, true)); + std::string name() const override { return make_string("VESPA WAND (n=%u)", n); } + SearchIterator::UP create(const wand::Terms &terms) override { + return WeakAndSearch::create(terms, wand::MatchParams(_scores, 1, 1), n, true); } }; VespaWandFactory::~VespaWandFactory() = default; struct VespaArrayWandFactory : WandFactory { + mutable SharedWeakAndPriorityQueue _scores; uint32_t n; - VespaArrayWandFactory(uint32_t n_in) : n(n_in) {} + explicit VespaArrayWandFactory(uint32_t n_in) + : _scores(n_in), + n(n_in) + {} ~VespaArrayWandFactory() override; - virtual std::string name() const override { return make_string("VESPA ARRAY WAND (n=%u)", n); } - virtual SearchIterator::UP create(const wand::Terms &terms) override { - return SearchIterator::UP(WeakAndSearch::createArrayWand(terms, n, true)); + std::string name() const override { return make_string("VESPA ARRAY WAND (n=%u)", n); } + SearchIterator::UP create(const wand::Terms &terms) override { + return WeakAndSearch::createArrayWand(terms, wand::MatchParams(_scores, 1, 1), wand::TermFrequencyScorer(), n, true); } }; VespaArrayWandFactory::~VespaArrayWandFactory() = default; struct VespaHeapWandFactory : WandFactory { + mutable SharedWeakAndPriorityQueue _scores; uint32_t n; - VespaHeapWandFactory(uint32_t n_in) : n(n_in) {} + explicit VespaHeapWandFactory(uint32_t n_in) + : _scores(n_in), + n(n_in) + {} ~VespaHeapWandFactory() override; - virtual std::string name() const override { return make_string("VESPA HEAP WAND (n=%u)", n); } - virtual SearchIterator::UP create(const wand::Terms &terms) override { - return SearchIterator::UP(WeakAndSearch::createHeapWand(terms, n, true)); + std::string name() const override { return make_string("VESPA HEAP WAND (n=%u)", n); } + SearchIterator::UP create(const wand::Terms &terms) override { + return WeakAndSearch::createHeapWand(terms, wand::MatchParams(_scores, 1, 1), wand::TermFrequencyScorer(), n, true); } }; @@ -138,39 +150,39 @@ VespaHeapWandFactory::~VespaHeapWandFactory() = default; struct VespaParallelWandFactory : public WandFactory { SharedWeakAndPriorityQueue scores; TermFieldMatchData rootMatchData; - VespaParallelWandFactory(uint32_t n) : scores(n), rootMatchData() {} + explicit VespaParallelWandFactory(uint32_t n) noexcept : scores(n), rootMatchData() {} ~VespaParallelWandFactory() override; - virtual std::string name() const override { return make_string("VESPA PWAND (n=%u)", scores.getScoresToTrack()); } - virtual SearchIterator::UP create(const wand::Terms &terms) override { - return SearchIterator::UP(ParallelWeakAndSearch::create(terms, + std::string name() const override { return make_string("VESPA PWAND (n=%u)", scores.getScoresToTrack()); } + SearchIterator::UP create(const wand::Terms &terms) override { + return ParallelWeakAndSearch::create(terms, PWMatchParams(scores, 0, 1, 1), - PWRankParams(rootMatchData, MatchData::UP()), true)); + PWRankParams(rootMatchData, {}), true); } }; VespaParallelWandFactory::~VespaParallelWandFactory() = default; struct VespaParallelArrayWandFactory : public VespaParallelWandFactory { - VespaParallelArrayWandFactory(uint32_t n) : VespaParallelWandFactory(n) {} + explicit VespaParallelArrayWandFactory(uint32_t n) noexcept : VespaParallelWandFactory(n) {} ~VespaParallelArrayWandFactory() override; - virtual std::string name() const override { return make_string("VESPA ARRAY PWAND (n=%u)", scores.getScoresToTrack()); } - virtual SearchIterator::UP create(const wand::Terms &terms) override { - return SearchIterator::UP(ParallelWeakAndSearch::createArrayWand(terms, + std::string name() const override { return make_string("VESPA ARRAY PWAND (n=%u)", scores.getScoresToTrack()); } + SearchIterator::UP create(const wand::Terms &terms) override { + return ParallelWeakAndSearch::createArrayWand(terms, PWMatchParams(scores, 0, 1, 1), - PWRankParams(rootMatchData, MatchData::UP()), true)); + PWRankParams(rootMatchData, {}), true); } }; VespaParallelArrayWandFactory::~VespaParallelArrayWandFactory() = default; struct VespaParallelHeapWandFactory : public VespaParallelWandFactory { - VespaParallelHeapWandFactory(uint32_t n) : VespaParallelWandFactory(n) {} + explicit VespaParallelHeapWandFactory(uint32_t n) noexcept : VespaParallelWandFactory(n) {} ~VespaParallelHeapWandFactory() override; - virtual std::string name() const override { return make_string("VESPA HEAP PWAND (n=%u)", scores.getScoresToTrack()); } - virtual SearchIterator::UP create(const wand::Terms &terms) override { - return SearchIterator::UP(ParallelWeakAndSearch::createHeapWand(terms, + std::string name() const override { return make_string("VESPA HEAP PWAND (n=%u)", scores.getScoresToTrack()); } + SearchIterator::UP create(const wand::Terms &terms) override { + return ParallelWeakAndSearch::createHeapWand(terms, PWMatchParams(scores, 0, 1, 1), - PWRankParams(rootMatchData, MatchData::UP()), true)); + PWRankParams(rootMatchData, {}), true); } }; @@ -178,11 +190,11 @@ VespaParallelHeapWandFactory::~VespaParallelHeapWandFactory() = default; struct TermFrequencyRiseWandFactory : WandFactory { uint32_t n; - TermFrequencyRiseWandFactory(uint32_t n_in) : n(n_in) {} + explicit TermFrequencyRiseWandFactory(uint32_t n_in) noexcept : n(n_in) {} ~TermFrequencyRiseWandFactory() override; - virtual std::string name() const override { return make_string("RISE WAND TF (n=%u)", n); } - virtual SearchIterator::UP create(const wand::Terms &terms) override { - return SearchIterator::UP(new rise::TermFrequencyRiseWand(terms, n)); + std::string name() const override { return make_string("RISE WAND TF (n=%u)", n); } + SearchIterator::UP create(const wand::Terms &terms) override { + return std::make_unique<rise::TermFrequencyRiseWand>(terms, n); } }; @@ -190,11 +202,11 @@ TermFrequencyRiseWandFactory::~TermFrequencyRiseWandFactory() = default; struct DotProductRiseWandFactory : WandFactory { uint32_t n; - DotProductRiseWandFactory(uint32_t n_in) : n(n_in) {} + explicit DotProductRiseWandFactory(uint32_t n_in) noexcept : n(n_in) {} ~DotProductRiseWandFactory() override; - virtual std::string name() const override { return make_string("RISE WAND DP (n=%u)", n); } - virtual SearchIterator::UP create(const wand::Terms &terms) override { - return SearchIterator::UP(new rise::DotProductRiseWand(terms, n)); + std::string name() const override { return make_string("RISE WAND DP (n=%u)", n); } + SearchIterator::UP create(const wand::Terms &terms) override { + return std::make_unique<rise::DotProductRiseWand>(terms, n); } }; @@ -204,13 +216,13 @@ struct FilterFactory : WandFactory { WandFactory &factory; Stats stats; uint32_t n; - FilterFactory(WandFactory &f, uint32_t n_in) : factory(f), n(n_in) {} + FilterFactory(WandFactory &f, uint32_t n_in) noexcept : factory(f), n(n_in) {} ~FilterFactory() override; - virtual std::string name() const override { return make_string("Filter (mod=%u) [%s]", n, factory.name().c_str()); } - virtual SearchIterator::UP create(const wand::Terms &terms) override { + std::string name() const override { return make_string("Filter (mod=%u) [%s]", n, factory.name().c_str()); } + SearchIterator::UP create(const wand::Terms &terms) override { AndNotSearch::Children children; children.push_back(factory.create(terms)); - children.emplace_back(new ModSearch(stats, n, search::endDocId, n, NULL)); + children.emplace_back(new ModSearch(stats, n, search::endDocId, n, nullptr)); return AndNotSearch::create(std::move(children), true); } }; @@ -220,8 +232,8 @@ FilterFactory::~FilterFactory() = default; struct Setup { Stats stats; vespalib::duration minTime; - Setup() : stats(), minTime(10000s) {} - virtual ~Setup() {} + Setup() noexcept : stats(), minTime(10000s) {} + virtual ~Setup() = default; virtual std::string name() const = 0; virtual SearchIterator::UP create() = 0; void perform() { @@ -256,10 +268,10 @@ struct WandSetup : Setup { MatchData::UP matchData; WandSetup(WandFactory &f, uint32_t c, uint32_t l) : Setup(), factory(f), childCnt(c), limit(l), weight(100), matchData() {} ~WandSetup() override; - virtual std::string name() const override { + std::string name() const override { return make_string("Wand Setup (terms=%u,docs=%u) [%s]", childCnt, limit, factory.name().c_str()); } - virtual SearchIterator::UP create() override { + SearchIterator::UP create() override { MatchDataLayout layout; std::vector<TermFieldHandle> handles; for (size_t i = 0; i < childCnt; ++i) { diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp index 689f9f085d0..4aab66f3cc9 100644 --- a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp @@ -1,8 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/searchlib/queryeval/fake_search.h> #include <vespa/searchlib/queryeval/wand/weak_and_search.h> +#include <vespa/searchlib/queryeval/wand/weak_and_heap.h> #include <vespa/searchlib/queryeval/simpleresult.h> -#include <vespa/searchlib/queryeval/simplesearch.h> #include <vespa/searchlib/queryeval/test/eagerchild.h> #include <vespa/searchlib/queryeval/test/leafspec.h> #include <vespa/searchlib/queryeval/test/wandspec.h> @@ -20,11 +19,13 @@ namespace { struct MyWandSpec : public WandSpec { + SharedWeakAndPriorityQueue scores; uint32_t n; - MyWandSpec(uint32_t n_) : WandSpec(), n(n_) {} + explicit MyWandSpec(uint32_t n_in) : WandSpec(), scores(n_in), n(n_in) {} SearchIterator *create() { - return new TrackedSearch("WAND", getHistory(), WeakAndSearch::create(getTerms(), n, true)); + return new TrackedSearch("WAND", getHistory(), + WeakAndSearch::create(getTerms(), wand::MatchParams(scores, 1, 1), n, true)); } }; @@ -104,7 +105,8 @@ TEST(WeakAndTest, require_that_initial_docid_for_subsearches_are_taken_into_acco wand::Terms terms; terms.push_back(wand::Term(new TrackedSearch("foo", history, new EagerChild(search::endDocId)), 100, 1)); terms.push_back(wand::Term(new TrackedSearch("bar", history, new EagerChild(10)), 100, 2)); - SearchIterator::UP search(new TrackedSearch("WAND", history, WeakAndSearch::create(terms, 2, true))); + SharedWeakAndPriorityQueue scores(2); + auto search = std::make_unique<TrackedSearch>("WAND", history, WeakAndSearch::create(terms, wand::MatchParams(scores), 2, true)); SimpleResult hits; hits.search(*search); EXPECT_EQ(SimpleResult().addHit(10), hits); @@ -114,17 +116,26 @@ TEST(WeakAndTest, require_that_initial_docid_for_subsearches_are_taken_into_acco } class IteratorChildrenVerifier : public search::test::IteratorChildrenVerifier { +public: + IteratorChildrenVerifier(); + ~IteratorChildrenVerifier() override; private: + mutable std::vector<std::unique_ptr<SharedWeakAndPriorityQueue>> _scores; SearchIterator::UP create(bool strict) const override { wand::Terms terms; for (size_t i = 0; i < _num_children; ++i) { terms.emplace_back(createIterator(_split_lists[i], strict).release(), 100, _split_lists[i].size()); } - return SearchIterator::UP(WeakAndSearch::create(terms, -1, strict)); + static constexpr size_t LARGE_ENOUGH_HEAP_FOR_ALL = 10000; + _scores.push_back(std::make_unique<SharedWeakAndPriorityQueue>(LARGE_ENOUGH_HEAP_FOR_ALL)); + return WeakAndSearch::create(terms, wand::MatchParams(*_scores.back(), 1, 1), -1, strict); } }; +IteratorChildrenVerifier::IteratorChildrenVerifier() : _scores() {} +IteratorChildrenVerifier::~IteratorChildrenVerifier() = default; + TEST(WeakAndTest, verify_search_iterator_conformance) { IteratorChildrenVerifier verifier; diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp index 54bf1e92037..0573404a3b4 100644 --- a/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp +++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp @@ -16,15 +16,16 @@ void checkWandHits(WandFactory &vespa, WandFactory &rise, uint32_t step, uint32_ s1->initFullRange(); SearchIterator::UP s2 = riseSetup.create(); s2->initFullRange(); - ASSERT_TRUE(dynamic_cast<WeakAndType*>(s1.get()) != 0); - ASSERT_TRUE(dynamic_cast<WeakAndType*>(s2.get()) == 0); - ASSERT_TRUE(dynamic_cast<RiseType*>(s2.get()) != 0); - ASSERT_TRUE(dynamic_cast<RiseType*>(s1.get()) == 0); + ASSERT_TRUE(dynamic_cast<WeakAndType*>(s1.get()) != nullptr); + ASSERT_TRUE(dynamic_cast<WeakAndType*>(s2.get()) == nullptr); + ASSERT_TRUE(dynamic_cast<RiseType*>(s2.get()) != nullptr); + ASSERT_TRUE(dynamic_cast<RiseType*>(s1.get()) == nullptr); s1->seek(1); s2->seek(1); while (!s1->isAtEnd() && !s2->isAtEnd()) { + if (s1->getDocId() != s2->getDocId()) assert(true); ASSERT_EQUAL(s1->getDocId(), s2->getDocId()); if ((filter == 0) || ((s1->getDocId() % filter) != 0)) { s1->unpack(s1->getDocId()); diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp index e1f3f0805d9..8a0bc28f4dd 100644 --- a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp +++ b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp @@ -63,4 +63,27 @@ TEST("require that DotProductScorer calculates term score") EXPECT_EQUAL(11u, itr->_unpackDocId); } +TEST("test bm25 idf scorer for wand") +{ + wand::Bm25TermFrequencyScorer scorer(1000000, 1.0); + EXPECT_EQUAL(13410046, scorer.calculateMaxScore(1, 1)); + EXPECT_EQUAL(11464136, scorer.calculateMaxScore(10, 1)); + EXPECT_EQUAL(6907256, scorer.calculateMaxScore(1000, 1)); + EXPECT_EQUAL(4605121, scorer.calculateMaxScore(10000, 1)); + EXPECT_EQUAL(2302581, scorer.calculateMaxScore(100000, 1)); + EXPECT_EQUAL(693147, scorer.calculateMaxScore(500000, 1)); + EXPECT_EQUAL(105360, scorer.calculateMaxScore(900000, 1)); + EXPECT_EQUAL(10050, scorer.calculateMaxScore(990000, 1)); +} + +TEST("test limited range of bm25 idf scorer for wand") +{ + wand::Bm25TermFrequencyScorer scorer08(1000000, 0.8); + wand::Bm25TermFrequencyScorer scorer10(1000000, 1.0); + EXPECT_EQUAL(8207814, scorer08.calculateMaxScore(1000, 1)); + EXPECT_EQUAL(2690049, scorer08.calculateMaxScore(990000, 1)); + EXPECT_EQUAL(6907256, scorer10.calculateMaxScore(1000, 1)); + EXPECT_EQUAL(10050, scorer10.calculateMaxScore(990000, 1)); +} + TEST_MAIN() { TEST_RUN_ALL(); } |