summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2024-05-24 13:19:03 +0200
committerTor Egge <Tor.Egge@online.no>2024-05-24 13:19:03 +0200
commitf6e379f78ee1591c6fcd7ae59caa79f74021b4f5 (patch)
tree15d017ed94fcb7d6ce958ff78486a9e7ccee08ca
parentfd2776e9fd77fb5a836fd0a84c55474ac4536e2e (diff)
Add firstPhaseRank feature (step 1, not yet integrated with match loop).
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/first_phase_rank/CMakeLists.txt11
-rw-r--r--searchlib/src/tests/features/first_phase_rank/first_phase_rank_test.cpp143
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp71
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h40
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp67
-rw-r--r--searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h32
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/fef/objectstore.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/fef/objectstore.h3
11 files changed, 379 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index a5453ac5273..570bffa59c2 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -140,6 +140,7 @@ vespa_define_module(
src/tests/features/element_completeness
src/tests/features/element_similarity_feature
src/tests/features/euclidean_distance
+ src/tests/features/first_phase_rank
src/tests/features/imported_dot_product
src/tests/features/internal_max_reduce_prod_join_feature
src/tests/features/item_raw_score
diff --git a/searchlib/src/tests/features/first_phase_rank/CMakeLists.txt b/searchlib/src/tests/features/first_phase_rank/CMakeLists.txt
new file mode 100644
index 00000000000..5aa83399d3d
--- /dev/null
+++ b/searchlib/src/tests/features/first_phase_rank/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+vespa_add_executable(searchlib_features_first_phase_rank_test_app TEST
+ SOURCES
+ first_phase_rank_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_features_first_phase_rank_test_app COMMAND searchlib_features_first_phase_rank_test_app)
diff --git a/searchlib/src/tests/features/first_phase_rank/first_phase_rank_test.cpp b/searchlib/src/tests/features/first_phase_rank/first_phase_rank_test.cpp
new file mode 100644
index 00000000000..01ba6c36124
--- /dev/null
+++ b/searchlib/src/tests/features/first_phase_rank/first_phase_rank_test.cpp
@@ -0,0 +1,143 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/features/first_phase_rank_feature.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/blueprintfactory.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#define ENABLE_GTEST_MIGRATION
+#include <vespa/searchlib/test/ft_test_app_base.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using search::features::FirstPhaseRankBlueprint;
+using search::features::FirstPhaseRankLookup;
+using search::features::setup_search_features;
+using search::fef::Blueprint;
+using search::fef::BlueprintFactory;
+using search::fef::ObjectStore;
+using search::fef::test::IndexEnvironment;
+using search::fef::test::DummyDependencyHandler;
+using StringVector = std::vector<vespalib::string>;
+
+constexpr feature_t unranked = std::numeric_limits<feature_t>::max();
+
+struct FirstPhaseRankBlueprintTest : public ::testing::Test {
+ BlueprintFactory factory;
+ IndexEnvironment index_env;
+
+ FirstPhaseRankBlueprintTest()
+ : ::testing::Test(),
+ factory(),
+ index_env()
+ {
+ setup_search_features(factory);
+ }
+
+ ~FirstPhaseRankBlueprintTest() override;
+
+ std::shared_ptr<Blueprint> make_blueprint() const {
+ return factory.createBlueprint("firstPhaseRank");
+ }
+
+ void expect_setup_fail(const StringVector& params, const vespalib::string& exp_fail_msg) {
+ auto blueprint = make_blueprint();
+ DummyDependencyHandler deps(*blueprint);
+ EXPECT_FALSE(blueprint->setup(index_env, params));
+ EXPECT_EQ(exp_fail_msg, deps.fail_msg);
+ }
+
+ std::shared_ptr<Blueprint> expect_setup_succeed(const StringVector& params) {
+ auto blueprint = make_blueprint();
+ DummyDependencyHandler deps(*blueprint);
+ EXPECT_TRUE(blueprint->setup(index_env, params));
+ EXPECT_EQ(0, deps.input.size());
+ EXPECT_EQ(StringVector({"score"}), deps.output);
+ return blueprint;
+ }
+};
+
+FirstPhaseRankBlueprintTest::~FirstPhaseRankBlueprintTest() = default;
+
+TEST_F(FirstPhaseRankBlueprintTest, blueprint_can_be_created_from_factory)
+{
+ auto bp = make_blueprint();
+ EXPECT_TRUE(bp);
+ EXPECT_TRUE(dynamic_pointer_cast<FirstPhaseRankBlueprint>(bp));
+}
+
+TEST_F(FirstPhaseRankBlueprintTest, blueprint_setup_fails_when_parameter_list_is_not_empty)
+{
+ expect_setup_fail({"is"},
+ "The parameter list used for setting up rank feature firstPhaseRank is not valid: "
+ "Expected 0 parameter(s), but got 1");
+}
+
+TEST_F(FirstPhaseRankBlueprintTest, blueprint_setup_succeeds)
+{
+ expect_setup_succeed({});
+}
+
+TEST_F(FirstPhaseRankBlueprintTest, blueprint_can_prepare_shared_state)
+{
+ auto blueprint = expect_setup_succeed({});
+ search::fef::test::QueryEnvironment query_env;
+ ObjectStore store;
+ EXPECT_EQ(nullptr, FirstPhaseRankLookup::get_mutable_shared_state(store));
+ EXPECT_EQ(nullptr, FirstPhaseRankLookup::get_shared_state(store));
+ blueprint->prepareSharedState(query_env, store);
+ EXPECT_NE(nullptr, FirstPhaseRankLookup::get_mutable_shared_state(store));
+ EXPECT_NE(nullptr, FirstPhaseRankLookup::get_shared_state(store));
+}
+
+TEST_F(FirstPhaseRankBlueprintTest, dump_features)
+{
+ FtTestAppBase::FT_DUMP_EMPTY(factory, "firstPhaseRank", index_env);
+}
+
+struct FirstPhaseRankExecutorTest : public ::testing::Test {
+ BlueprintFactory factory;
+ FtFeatureTest test;
+
+ FirstPhaseRankExecutorTest()
+ : ::testing::Test(),
+ factory(),
+ test(factory, "firstPhaseRank")
+ {
+ setup_search_features(factory);
+ }
+ ~FirstPhaseRankExecutorTest() override;
+ void setup(std::vector<std::pair<uint32_t,uint32_t>> ranks) {
+ EXPECT_TRUE(test.setup());
+ auto* lookup = FirstPhaseRankLookup::get_mutable_shared_state(test.getQueryEnv().getObjectStore());
+ ASSERT_NE(nullptr, lookup);
+ for (auto& entry : ranks) {
+ lookup->add(entry.first, entry.second);
+ }
+ }
+ bool execute(feature_t exp_score, uint32_t docid) {
+ return test.execute(exp_score, 0.000001, docid);
+ }
+};
+
+FirstPhaseRankExecutorTest::~FirstPhaseRankExecutorTest() = default;
+
+TEST_F(FirstPhaseRankExecutorTest, unranked_docid_gives_huge_output)
+{
+ setup({});
+ EXPECT_TRUE(execute(unranked, 1));
+}
+
+TEST_F(FirstPhaseRankExecutorTest, ranked_docid_gives_expected_output)
+{
+ setup({{3, 5}, {7, 4}});
+ EXPECT_TRUE(execute(unranked, 2));
+ EXPECT_TRUE(execute(5, 3));
+ EXPECT_TRUE(execute(unranked, 4));
+ EXPECT_TRUE(execute(unranked, 5));
+ EXPECT_TRUE(execute(unranked, 6));
+ EXPECT_TRUE(execute(4, 7));
+ EXPECT_TRUE(execute(unranked, 8));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index 4736dbecb86..27c2b6d5e41 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -26,6 +26,8 @@ vespa_add_library(searchlib_features OBJECT
fieldmatchfeature.cpp
fieldtermmatchfeature.cpp
firstphasefeature.cpp
+ first_phase_rank_feature.cpp
+ first_phase_rank_lookup.cpp
flow_completeness_feature.cpp
foreachfeature.cpp
freshnessfeature.cpp
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp
new file mode 100644
index 00000000000..5c8a9a391ff
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp
@@ -0,0 +1,71 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "first_phase_rank_feature.h"
+#include "valuefeature.h"
+#include <vespa/vespalib/util/stash.h>
+
+namespace search::features {
+
+FirstPhaseRankExecutor::FirstPhaseRankExecutor(const FirstPhaseRankLookup& lookup)
+ : FeatureExecutor(),
+ _lookup(lookup)
+{
+}
+FirstPhaseRankExecutor::~FirstPhaseRankExecutor() = default;
+
+void
+FirstPhaseRankExecutor::execute(uint32_t docid)
+{
+ outputs().set_number(0, _lookup.lookup(docid));
+}
+
+FirstPhaseRankBlueprint::FirstPhaseRankBlueprint()
+ : Blueprint("firstPhaseRank")
+{
+}
+
+FirstPhaseRankBlueprint::~FirstPhaseRankBlueprint() = default;
+
+void
+FirstPhaseRankBlueprint::visitDumpFeatures(const fef::IIndexEnvironment&, fef::IDumpFeatureVisitor&) const
+{
+}
+
+std::unique_ptr<fef::Blueprint>
+FirstPhaseRankBlueprint::createInstance() const
+{
+ return std::make_unique<FirstPhaseRankBlueprint>();
+}
+
+fef::ParameterDescriptions
+FirstPhaseRankBlueprint::getDescriptions() const
+{
+ return fef::ParameterDescriptions().desc();
+}
+
+bool
+FirstPhaseRankBlueprint::setup(const fef::IIndexEnvironment&, const fef::ParameterList&)
+{
+ describeOutput("score", "The first phase rank.");
+ return true;
+}
+
+void
+FirstPhaseRankBlueprint::prepareSharedState(const fef::IQueryEnvironment&, fef::IObjectStore& store) const
+{
+ FirstPhaseRankLookup::make_shared_state(store);
+}
+
+fef::FeatureExecutor&
+FirstPhaseRankBlueprint::createExecutor(const fef::IQueryEnvironment& env, vespalib::Stash& stash) const
+{
+ const auto* lookup = FirstPhaseRankLookup::get_shared_state(env.getObjectStore());
+ if (lookup != nullptr) {
+ return stash.create<FirstPhaseRankExecutor>(*lookup);
+ } else {
+ std::vector<feature_t> values{std::numeric_limits<feature_t>::max()};
+ return stash.create<ValueExecutor>(values);
+ }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h
new file mode 100644
index 00000000000..f90ea26f859
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h
@@ -0,0 +1,40 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "first_phase_rank_lookup.h"
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchlib/fef/featureexecutor.h>
+
+namespace search::features {
+
+class FirstPhaseRankLookup;
+
+/*
+ * Executor for first phase rank feature that outputs the first phase rank
+ * for the given docid on this search node (1.0, 2.0, 3.0, etc.).
+ */
+class FirstPhaseRankExecutor : public fef::FeatureExecutor {
+ const FirstPhaseRankLookup& _lookup;
+public:
+ FirstPhaseRankExecutor(const FirstPhaseRankLookup& lookup);
+ ~FirstPhaseRankExecutor() override;
+ void execute(uint32_t docid) override;
+};
+
+/*
+ * Blueprint for first phase rank feature.
+ */
+class FirstPhaseRankBlueprint : public fef::Blueprint {
+public:
+ FirstPhaseRankBlueprint();
+ ~FirstPhaseRankBlueprint() override;
+ void visitDumpFeatures(const fef::IIndexEnvironment& env, fef::IDumpFeatureVisitor& visitor) const override;
+ std::unique_ptr<fef::Blueprint> createInstance() const override;
+ fef::ParameterDescriptions getDescriptions() const override;
+ bool setup(const fef::IIndexEnvironment& env, const fef::ParameterList& params) override;
+ void prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const override;
+ fef::FeatureExecutor& createExecutor(const fef::IQueryEnvironment& env, vespalib::Stash& stash) const override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp
new file mode 100644
index 00000000000..2dfaabb8326
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp
@@ -0,0 +1,67 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "first_phase_rank_lookup.h"
+#include <vespa/searchlib/fef/objectstore.h>
+#include <cassert>
+#include <limits>
+
+using search::fef::AnyWrapper;
+
+namespace search::features {
+
+namespace {
+
+const vespalib::string key = "firstPhaseRankLookup";
+
+}
+
+FirstPhaseRankLookup::FirstPhaseRankLookup()
+ : _map()
+{
+}
+
+FirstPhaseRankLookup::FirstPhaseRankLookup(FirstPhaseRankLookup&&) = default;
+
+FirstPhaseRankLookup::~FirstPhaseRankLookup() = default;
+
+feature_t
+FirstPhaseRankLookup::lookup(uint32_t docid) const noexcept
+{
+ auto itr = _map.find(docid);
+ if (itr != _map.end()) [[likely]] {
+ return itr->second;
+ } else {
+ return std::numeric_limits<feature_t>::max();
+ }
+}
+
+void
+FirstPhaseRankLookup::add(uint32_t docid, uint32_t rank)
+{
+ auto insres = _map.insert(std::make_pair(docid, rank));
+ assert(insres.second);
+}
+
+void
+FirstPhaseRankLookup::make_shared_state(fef::IObjectStore& store)
+{
+ if (store.get(key) == nullptr) {
+ store.add(key, std::make_unique<AnyWrapper<FirstPhaseRankLookup>>(FirstPhaseRankLookup()));
+ }
+}
+
+FirstPhaseRankLookup*
+FirstPhaseRankLookup::get_mutable_shared_state(fef::IObjectStore& store)
+{
+ auto* wrapper = dynamic_cast<AnyWrapper<FirstPhaseRankLookup>*>(store.get_mutable(key));
+ return (wrapper == nullptr) ? nullptr : &wrapper->getValue();
+}
+
+const FirstPhaseRankLookup*
+FirstPhaseRankLookup::get_shared_state(const fef::IObjectStore& store)
+{
+ const auto* wrapper = dynamic_cast<const AnyWrapper<FirstPhaseRankLookup>*>(store.get(key));
+ return (wrapper == nullptr) ? nullptr : &wrapper->getValue();
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h
new file mode 100644
index 00000000000..83d89ed2dd1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h
@@ -0,0 +1,32 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/common/feature.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+
+namespace search::fef { class IObjectStore; }
+
+namespace search::features {
+
+/*
+ * This class contains a mapping from docids used by second phase to
+ * first phase rank.
+ */
+class FirstPhaseRankLookup {
+ vespalib::hash_map<uint32_t, uint32_t> _map;
+public:
+ FirstPhaseRankLookup();
+ FirstPhaseRankLookup(const FirstPhaseRankLookup&) = delete;
+ FirstPhaseRankLookup(FirstPhaseRankLookup&&);
+ ~FirstPhaseRankLookup();
+ FirstPhaseRankLookup& operator=(const FirstPhaseRankLookup&) = delete;
+ FirstPhaseRankLookup& operator=(FirstPhaseRankLookup&&) = delete;
+ feature_t lookup(uint32_t docid) const noexcept;
+ void add(uint32_t docid, uint32_t rank);
+ static void make_shared_state(fef::IObjectStore& store);
+ static FirstPhaseRankLookup* get_mutable_shared_state(fef::IObjectStore& store);
+ static const FirstPhaseRankLookup* get_shared_state(const fef::IObjectStore& store);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
index bdffbd1c6aa..d65459817f0 100644
--- a/searchlib/src/vespa/searchlib/features/setup.cpp
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -22,6 +22,7 @@
#include "fieldmatchfeature.h"
#include "fieldtermmatchfeature.h"
#include "firstphasefeature.h"
+#include "first_phase_rank_feature.h"
#include "flow_completeness_feature.h"
#include "foreachfeature.h"
#include "freshnessfeature.h"
@@ -91,6 +92,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(std::make_shared<FieldMatchBlueprint>());
registry.addPrototype(std::make_shared<FieldTermMatchBlueprint>());
registry.addPrototype(std::make_shared<FirstPhaseBlueprint>());
+ registry.addPrototype(std::make_shared<FirstPhaseRankBlueprint>());
registry.addPrototype(std::make_shared<FlowCompletenessBlueprint>());
registry.addPrototype(std::make_shared<ForeachBlueprint>());
registry.addPrototype(std::make_shared<FreshnessBlueprint>());
diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.cpp b/searchlib/src/vespa/searchlib/fef/objectstore.cpp
index 3e5baf49116..a90702a88a6 100644
--- a/searchlib/src/vespa/searchlib/fef/objectstore.cpp
+++ b/searchlib/src/vespa/searchlib/fef/objectstore.cpp
@@ -35,4 +35,11 @@ ObjectStore::get(const vespalib::string & key) const
return (found != _objectMap.end()) ? found->second : NULL;
}
+Anything *
+ObjectStore::get_mutable(const vespalib::string& key)
+{
+ auto found = _objectMap.find(key);
+ return (found != _objectMap.end()) ? found->second : nullptr;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.h b/searchlib/src/vespa/searchlib/fef/objectstore.h
index 9d1671e521c..d2d768ee338 100644
--- a/searchlib/src/vespa/searchlib/fef/objectstore.h
+++ b/searchlib/src/vespa/searchlib/fef/objectstore.h
@@ -24,6 +24,7 @@ class AnyWrapper : public Anything
public:
explicit AnyWrapper(T value) : _value(std::move(value)) { }
const T & getValue() const { return _value; }
+ T& getValue() { return _value; }
static const T & getValue(const Anything & any) { return static_cast<const AnyWrapper &>(any).getValue(); }
private:
T _value;
@@ -38,6 +39,7 @@ public:
virtual ~IObjectStore() = default;
virtual void add(const vespalib::string & key, Anything::UP value) = 0;
virtual const Anything * get(const vespalib::string & key) const = 0;
+ virtual Anything* get_mutable(const vespalib::string& key) = 0;
};
/**
@@ -50,6 +52,7 @@ public:
~ObjectStore() override;
void add(const vespalib::string & key, Anything::UP value) override;
const Anything * get(const vespalib::string & key) const override;
+ Anything* get_mutable(const vespalib::string & key) override;
private:
using ObjectMap = vespalib::hash_map<vespalib::string, Anything *>;
ObjectMap _objectMap;