diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-05-24 13:19:03 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-05-24 13:19:03 +0200 |
commit | f6e379f78ee1591c6fcd7ae59caa79f74021b4f5 (patch) | |
tree | 15d017ed94fcb7d6ce958ff78486a9e7ccee08ca | |
parent | fd2776e9fd77fb5a836fd0a84c55474ac4536e2e (diff) |
Add firstPhaseRank feature (step 1, not yet integrated with match loop).
11 files changed, 379 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index a5453ac5273..570bffa59c2 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -140,6 +140,7 @@ vespa_define_module( src/tests/features/element_completeness src/tests/features/element_similarity_feature src/tests/features/euclidean_distance + src/tests/features/first_phase_rank src/tests/features/imported_dot_product src/tests/features/internal_max_reduce_prod_join_feature src/tests/features/item_raw_score diff --git a/searchlib/src/tests/features/first_phase_rank/CMakeLists.txt b/searchlib/src/tests/features/first_phase_rank/CMakeLists.txt new file mode 100644 index 00000000000..5aa83399d3d --- /dev/null +++ b/searchlib/src/tests/features/first_phase_rank/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +vespa_add_executable(searchlib_features_first_phase_rank_test_app TEST + SOURCES + first_phase_rank_test.cpp + DEPENDS + searchlib + searchlib_test + GTest::GTest +) +vespa_add_test(NAME searchlib_features_first_phase_rank_test_app COMMAND searchlib_features_first_phase_rank_test_app) diff --git a/searchlib/src/tests/features/first_phase_rank/first_phase_rank_test.cpp b/searchlib/src/tests/features/first_phase_rank/first_phase_rank_test.cpp new file mode 100644 index 00000000000..01ba6c36124 --- /dev/null +++ b/searchlib/src/tests/features/first_phase_rank/first_phase_rank_test.cpp @@ -0,0 +1,143 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/features/first_phase_rank_feature.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/blueprintfactory.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#define ENABLE_GTEST_MIGRATION +#include <vespa/searchlib/test/ft_test_app_base.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::features::FirstPhaseRankBlueprint; +using search::features::FirstPhaseRankLookup; +using search::features::setup_search_features; +using search::fef::Blueprint; +using search::fef::BlueprintFactory; +using search::fef::ObjectStore; +using search::fef::test::IndexEnvironment; +using search::fef::test::DummyDependencyHandler; +using StringVector = std::vector<vespalib::string>; + +constexpr feature_t unranked = std::numeric_limits<feature_t>::max(); + +struct FirstPhaseRankBlueprintTest : public ::testing::Test { + BlueprintFactory factory; + IndexEnvironment index_env; + + FirstPhaseRankBlueprintTest() + : ::testing::Test(), + factory(), + index_env() + { + setup_search_features(factory); + } + + ~FirstPhaseRankBlueprintTest() override; + + std::shared_ptr<Blueprint> make_blueprint() const { + return factory.createBlueprint("firstPhaseRank"); + } + + void expect_setup_fail(const StringVector& params, const vespalib::string& exp_fail_msg) { + auto blueprint = make_blueprint(); + DummyDependencyHandler deps(*blueprint); + EXPECT_FALSE(blueprint->setup(index_env, params)); + EXPECT_EQ(exp_fail_msg, deps.fail_msg); + } + + std::shared_ptr<Blueprint> expect_setup_succeed(const StringVector& params) { + auto blueprint = make_blueprint(); + DummyDependencyHandler deps(*blueprint); + EXPECT_TRUE(blueprint->setup(index_env, params)); + EXPECT_EQ(0, deps.input.size()); + EXPECT_EQ(StringVector({"score"}), deps.output); + return blueprint; + } +}; + +FirstPhaseRankBlueprintTest::~FirstPhaseRankBlueprintTest() = default; + +TEST_F(FirstPhaseRankBlueprintTest, blueprint_can_be_created_from_factory) +{ + auto bp = make_blueprint(); + EXPECT_TRUE(bp); + EXPECT_TRUE(dynamic_pointer_cast<FirstPhaseRankBlueprint>(bp)); +} + +TEST_F(FirstPhaseRankBlueprintTest, blueprint_setup_fails_when_parameter_list_is_not_empty) +{ + expect_setup_fail({"is"}, + "The parameter list used for setting up rank feature firstPhaseRank is not valid: " + "Expected 0 parameter(s), but got 1"); +} + +TEST_F(FirstPhaseRankBlueprintTest, blueprint_setup_succeeds) +{ + expect_setup_succeed({}); +} + +TEST_F(FirstPhaseRankBlueprintTest, blueprint_can_prepare_shared_state) +{ + auto blueprint = expect_setup_succeed({}); + search::fef::test::QueryEnvironment query_env; + ObjectStore store; + EXPECT_EQ(nullptr, FirstPhaseRankLookup::get_mutable_shared_state(store)); + EXPECT_EQ(nullptr, FirstPhaseRankLookup::get_shared_state(store)); + blueprint->prepareSharedState(query_env, store); + EXPECT_NE(nullptr, FirstPhaseRankLookup::get_mutable_shared_state(store)); + EXPECT_NE(nullptr, FirstPhaseRankLookup::get_shared_state(store)); +} + +TEST_F(FirstPhaseRankBlueprintTest, dump_features) +{ + FtTestAppBase::FT_DUMP_EMPTY(factory, "firstPhaseRank", index_env); +} + +struct FirstPhaseRankExecutorTest : public ::testing::Test { + BlueprintFactory factory; + FtFeatureTest test; + + FirstPhaseRankExecutorTest() + : ::testing::Test(), + factory(), + test(factory, "firstPhaseRank") + { + setup_search_features(factory); + } + ~FirstPhaseRankExecutorTest() override; + void setup(std::vector<std::pair<uint32_t,uint32_t>> ranks) { + EXPECT_TRUE(test.setup()); + auto* lookup = FirstPhaseRankLookup::get_mutable_shared_state(test.getQueryEnv().getObjectStore()); + ASSERT_NE(nullptr, lookup); + for (auto& entry : ranks) { + lookup->add(entry.first, entry.second); + } + } + bool execute(feature_t exp_score, uint32_t docid) { + return test.execute(exp_score, 0.000001, docid); + } +}; + +FirstPhaseRankExecutorTest::~FirstPhaseRankExecutorTest() = default; + +TEST_F(FirstPhaseRankExecutorTest, unranked_docid_gives_huge_output) +{ + setup({}); + EXPECT_TRUE(execute(unranked, 1)); +} + +TEST_F(FirstPhaseRankExecutorTest, ranked_docid_gives_expected_output) +{ + setup({{3, 5}, {7, 4}}); + EXPECT_TRUE(execute(unranked, 2)); + EXPECT_TRUE(execute(5, 3)); + EXPECT_TRUE(execute(unranked, 4)); + EXPECT_TRUE(execute(unranked, 5)); + EXPECT_TRUE(execute(unranked, 6)); + EXPECT_TRUE(execute(4, 7)); + EXPECT_TRUE(execute(unranked, 8)); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index 4736dbecb86..27c2b6d5e41 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -26,6 +26,8 @@ vespa_add_library(searchlib_features OBJECT fieldmatchfeature.cpp fieldtermmatchfeature.cpp firstphasefeature.cpp + first_phase_rank_feature.cpp + first_phase_rank_lookup.cpp flow_completeness_feature.cpp foreachfeature.cpp freshnessfeature.cpp diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp new file mode 100644 index 00000000000..5c8a9a391ff --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp @@ -0,0 +1,71 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "first_phase_rank_feature.h" +#include "valuefeature.h" +#include <vespa/vespalib/util/stash.h> + +namespace search::features { + +FirstPhaseRankExecutor::FirstPhaseRankExecutor(const FirstPhaseRankLookup& lookup) + : FeatureExecutor(), + _lookup(lookup) +{ +} +FirstPhaseRankExecutor::~FirstPhaseRankExecutor() = default; + +void +FirstPhaseRankExecutor::execute(uint32_t docid) +{ + outputs().set_number(0, _lookup.lookup(docid)); +} + +FirstPhaseRankBlueprint::FirstPhaseRankBlueprint() + : Blueprint("firstPhaseRank") +{ +} + +FirstPhaseRankBlueprint::~FirstPhaseRankBlueprint() = default; + +void +FirstPhaseRankBlueprint::visitDumpFeatures(const fef::IIndexEnvironment&, fef::IDumpFeatureVisitor&) const +{ +} + +std::unique_ptr<fef::Blueprint> +FirstPhaseRankBlueprint::createInstance() const +{ + return std::make_unique<FirstPhaseRankBlueprint>(); +} + +fef::ParameterDescriptions +FirstPhaseRankBlueprint::getDescriptions() const +{ + return fef::ParameterDescriptions().desc(); +} + +bool +FirstPhaseRankBlueprint::setup(const fef::IIndexEnvironment&, const fef::ParameterList&) +{ + describeOutput("score", "The first phase rank."); + return true; +} + +void +FirstPhaseRankBlueprint::prepareSharedState(const fef::IQueryEnvironment&, fef::IObjectStore& store) const +{ + FirstPhaseRankLookup::make_shared_state(store); +} + +fef::FeatureExecutor& +FirstPhaseRankBlueprint::createExecutor(const fef::IQueryEnvironment& env, vespalib::Stash& stash) const +{ + const auto* lookup = FirstPhaseRankLookup::get_shared_state(env.getObjectStore()); + if (lookup != nullptr) { + return stash.create<FirstPhaseRankExecutor>(*lookup); + } else { + std::vector<feature_t> values{std::numeric_limits<feature_t>::max()}; + return stash.create<ValueExecutor>(values); + } +} + +} diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h new file mode 100644 index 00000000000..f90ea26f859 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h @@ -0,0 +1,40 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "first_phase_rank_lookup.h" +#include <vespa/searchlib/fef/blueprint.h> +#include <vespa/searchlib/fef/featureexecutor.h> + +namespace search::features { + +class FirstPhaseRankLookup; + +/* + * Executor for first phase rank feature that outputs the first phase rank + * for the given docid on this search node (1.0, 2.0, 3.0, etc.). + */ +class FirstPhaseRankExecutor : public fef::FeatureExecutor { + const FirstPhaseRankLookup& _lookup; +public: + FirstPhaseRankExecutor(const FirstPhaseRankLookup& lookup); + ~FirstPhaseRankExecutor() override; + void execute(uint32_t docid) override; +}; + +/* + * Blueprint for first phase rank feature. + */ +class FirstPhaseRankBlueprint : public fef::Blueprint { +public: + FirstPhaseRankBlueprint(); + ~FirstPhaseRankBlueprint() override; + void visitDumpFeatures(const fef::IIndexEnvironment& env, fef::IDumpFeatureVisitor& visitor) const override; + std::unique_ptr<fef::Blueprint> createInstance() const override; + fef::ParameterDescriptions getDescriptions() const override; + bool setup(const fef::IIndexEnvironment& env, const fef::ParameterList& params) override; + void prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const override; + fef::FeatureExecutor& createExecutor(const fef::IQueryEnvironment& env, vespalib::Stash& stash) const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp new file mode 100644 index 00000000000..2dfaabb8326 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp @@ -0,0 +1,67 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "first_phase_rank_lookup.h" +#include <vespa/searchlib/fef/objectstore.h> +#include <cassert> +#include <limits> + +using search::fef::AnyWrapper; + +namespace search::features { + +namespace { + +const vespalib::string key = "firstPhaseRankLookup"; + +} + +FirstPhaseRankLookup::FirstPhaseRankLookup() + : _map() +{ +} + +FirstPhaseRankLookup::FirstPhaseRankLookup(FirstPhaseRankLookup&&) = default; + +FirstPhaseRankLookup::~FirstPhaseRankLookup() = default; + +feature_t +FirstPhaseRankLookup::lookup(uint32_t docid) const noexcept +{ + auto itr = _map.find(docid); + if (itr != _map.end()) [[likely]] { + return itr->second; + } else { + return std::numeric_limits<feature_t>::max(); + } +} + +void +FirstPhaseRankLookup::add(uint32_t docid, uint32_t rank) +{ + auto insres = _map.insert(std::make_pair(docid, rank)); + assert(insres.second); +} + +void +FirstPhaseRankLookup::make_shared_state(fef::IObjectStore& store) +{ + if (store.get(key) == nullptr) { + store.add(key, std::make_unique<AnyWrapper<FirstPhaseRankLookup>>(FirstPhaseRankLookup())); + } +} + +FirstPhaseRankLookup* +FirstPhaseRankLookup::get_mutable_shared_state(fef::IObjectStore& store) +{ + auto* wrapper = dynamic_cast<AnyWrapper<FirstPhaseRankLookup>*>(store.get_mutable(key)); + return (wrapper == nullptr) ? nullptr : &wrapper->getValue(); +} + +const FirstPhaseRankLookup* +FirstPhaseRankLookup::get_shared_state(const fef::IObjectStore& store) +{ + const auto* wrapper = dynamic_cast<const AnyWrapper<FirstPhaseRankLookup>*>(store.get(key)); + return (wrapper == nullptr) ? nullptr : &wrapper->getValue(); +} + +} diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h new file mode 100644 index 00000000000..83d89ed2dd1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h @@ -0,0 +1,32 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/common/feature.h> +#include <vespa/vespalib/stllike/hash_map.h> + +namespace search::fef { class IObjectStore; } + +namespace search::features { + +/* + * This class contains a mapping from docids used by second phase to + * first phase rank. + */ +class FirstPhaseRankLookup { + vespalib::hash_map<uint32_t, uint32_t> _map; +public: + FirstPhaseRankLookup(); + FirstPhaseRankLookup(const FirstPhaseRankLookup&) = delete; + FirstPhaseRankLookup(FirstPhaseRankLookup&&); + ~FirstPhaseRankLookup(); + FirstPhaseRankLookup& operator=(const FirstPhaseRankLookup&) = delete; + FirstPhaseRankLookup& operator=(FirstPhaseRankLookup&&) = delete; + feature_t lookup(uint32_t docid) const noexcept; + void add(uint32_t docid, uint32_t rank); + static void make_shared_state(fef::IObjectStore& store); + static FirstPhaseRankLookup* get_mutable_shared_state(fef::IObjectStore& store); + static const FirstPhaseRankLookup* get_shared_state(const fef::IObjectStore& store); +}; + +} diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index bdffbd1c6aa..d65459817f0 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -22,6 +22,7 @@ #include "fieldmatchfeature.h" #include "fieldtermmatchfeature.h" #include "firstphasefeature.h" +#include "first_phase_rank_feature.h" #include "flow_completeness_feature.h" #include "foreachfeature.h" #include "freshnessfeature.h" @@ -91,6 +92,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(std::make_shared<FieldMatchBlueprint>()); registry.addPrototype(std::make_shared<FieldTermMatchBlueprint>()); registry.addPrototype(std::make_shared<FirstPhaseBlueprint>()); + registry.addPrototype(std::make_shared<FirstPhaseRankBlueprint>()); registry.addPrototype(std::make_shared<FlowCompletenessBlueprint>()); registry.addPrototype(std::make_shared<ForeachBlueprint>()); registry.addPrototype(std::make_shared<FreshnessBlueprint>()); diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.cpp b/searchlib/src/vespa/searchlib/fef/objectstore.cpp index 3e5baf49116..a90702a88a6 100644 --- a/searchlib/src/vespa/searchlib/fef/objectstore.cpp +++ b/searchlib/src/vespa/searchlib/fef/objectstore.cpp @@ -35,4 +35,11 @@ ObjectStore::get(const vespalib::string & key) const return (found != _objectMap.end()) ? found->second : NULL; } +Anything * +ObjectStore::get_mutable(const vespalib::string& key) +{ + auto found = _objectMap.find(key); + return (found != _objectMap.end()) ? found->second : nullptr; +} + } diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.h b/searchlib/src/vespa/searchlib/fef/objectstore.h index 9d1671e521c..d2d768ee338 100644 --- a/searchlib/src/vespa/searchlib/fef/objectstore.h +++ b/searchlib/src/vespa/searchlib/fef/objectstore.h @@ -24,6 +24,7 @@ class AnyWrapper : public Anything public: explicit AnyWrapper(T value) : _value(std::move(value)) { } const T & getValue() const { return _value; } + T& getValue() { return _value; } static const T & getValue(const Anything & any) { return static_cast<const AnyWrapper &>(any).getValue(); } private: T _value; @@ -38,6 +39,7 @@ public: virtual ~IObjectStore() = default; virtual void add(const vespalib::string & key, Anything::UP value) = 0; virtual const Anything * get(const vespalib::string & key) const = 0; + virtual Anything* get_mutable(const vespalib::string& key) = 0; }; /** @@ -50,6 +52,7 @@ public: ~ObjectStore() override; void add(const vespalib::string & key, Anything::UP value) override; const Anything * get(const vespalib::string & key) const override; + Anything* get_mutable(const vespalib::string & key) override; private: using ObjectMap = vespalib::hash_map<vespalib::string, Anything *>; ObjectMap _objectMap; |