diff options
Diffstat (limited to 'searchlib/src/vespa/searchlib/features')
10 files changed, 311 insertions, 2 deletions
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index 0690801ee61..27c2b6d5e41 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -26,6 +26,8 @@ vespa_add_library(searchlib_features OBJECT fieldmatchfeature.cpp fieldtermmatchfeature.cpp firstphasefeature.cpp + first_phase_rank_feature.cpp + first_phase_rank_lookup.cpp flow_completeness_feature.cpp foreachfeature.cpp freshnessfeature.cpp @@ -57,6 +59,7 @@ vespa_add_library(searchlib_features OBJECT rankingexpressionfeature.cpp raw_score_feature.cpp reverseproximityfeature.cpp + second_phase_feature.cpp setup.cpp subqueries_feature.cpp tensor_attribute_executor.cpp diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp index 505b8166ee7..03d2e94b5d0 100644 --- a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp @@ -68,7 +68,7 @@ Bm25Executor::Bm25Executor(const fef::FieldInfo& field, } double -Bm25Executor::calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count) +Bm25Executor::calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count) noexcept { return std::log(1 + (static_cast<double>(total_doc_count - matching_doc_count + 0.5) / static_cast<double>(matching_doc_count + 0.5))); diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.h b/searchlib/src/vespa/searchlib/features/bm25_feature.h index a1b45375285..637d656990b 100644 --- a/searchlib/src/vespa/searchlib/features/bm25_feature.h +++ b/searchlib/src/vespa/searchlib/features/bm25_feature.h @@ -39,7 +39,7 @@ public: double k1_param, double b_param); - double static calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count); + double static calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count) noexcept; void handle_bind_match_data(const fef::MatchData& match_data) override; void execute(uint32_t docId) override; diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp new file mode 100644 index 00000000000..5c8a9a391ff --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.cpp @@ -0,0 +1,71 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "first_phase_rank_feature.h" +#include "valuefeature.h" +#include <vespa/vespalib/util/stash.h> + +namespace search::features { + +FirstPhaseRankExecutor::FirstPhaseRankExecutor(const FirstPhaseRankLookup& lookup) + : FeatureExecutor(), + _lookup(lookup) +{ +} +FirstPhaseRankExecutor::~FirstPhaseRankExecutor() = default; + +void +FirstPhaseRankExecutor::execute(uint32_t docid) +{ + outputs().set_number(0, _lookup.lookup(docid)); +} + +FirstPhaseRankBlueprint::FirstPhaseRankBlueprint() + : Blueprint("firstPhaseRank") +{ +} + +FirstPhaseRankBlueprint::~FirstPhaseRankBlueprint() = default; + +void +FirstPhaseRankBlueprint::visitDumpFeatures(const fef::IIndexEnvironment&, fef::IDumpFeatureVisitor&) const +{ +} + +std::unique_ptr<fef::Blueprint> +FirstPhaseRankBlueprint::createInstance() const +{ + return std::make_unique<FirstPhaseRankBlueprint>(); +} + +fef::ParameterDescriptions +FirstPhaseRankBlueprint::getDescriptions() const +{ + return fef::ParameterDescriptions().desc(); +} + +bool +FirstPhaseRankBlueprint::setup(const fef::IIndexEnvironment&, const fef::ParameterList&) +{ + describeOutput("score", "The first phase rank."); + return true; +} + +void +FirstPhaseRankBlueprint::prepareSharedState(const fef::IQueryEnvironment&, fef::IObjectStore& store) const +{ + FirstPhaseRankLookup::make_shared_state(store); +} + +fef::FeatureExecutor& +FirstPhaseRankBlueprint::createExecutor(const fef::IQueryEnvironment& env, vespalib::Stash& stash) const +{ + const auto* lookup = FirstPhaseRankLookup::get_shared_state(env.getObjectStore()); + if (lookup != nullptr) { + return stash.create<FirstPhaseRankExecutor>(*lookup); + } else { + std::vector<feature_t> values{std::numeric_limits<feature_t>::max()}; + return stash.create<ValueExecutor>(values); + } +} + +} diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h new file mode 100644 index 00000000000..f90ea26f859 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_feature.h @@ -0,0 +1,40 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "first_phase_rank_lookup.h" +#include <vespa/searchlib/fef/blueprint.h> +#include <vespa/searchlib/fef/featureexecutor.h> + +namespace search::features { + +class FirstPhaseRankLookup; + +/* + * Executor for first phase rank feature that outputs the first phase rank + * for the given docid on this search node (1.0, 2.0, 3.0, etc.). + */ +class FirstPhaseRankExecutor : public fef::FeatureExecutor { + const FirstPhaseRankLookup& _lookup; +public: + FirstPhaseRankExecutor(const FirstPhaseRankLookup& lookup); + ~FirstPhaseRankExecutor() override; + void execute(uint32_t docid) override; +}; + +/* + * Blueprint for first phase rank feature. + */ +class FirstPhaseRankBlueprint : public fef::Blueprint { +public: + FirstPhaseRankBlueprint(); + ~FirstPhaseRankBlueprint() override; + void visitDumpFeatures(const fef::IIndexEnvironment& env, fef::IDumpFeatureVisitor& visitor) const override; + std::unique_ptr<fef::Blueprint> createInstance() const override; + fef::ParameterDescriptions getDescriptions() const override; + bool setup(const fef::IIndexEnvironment& env, const fef::ParameterList& params) override; + void prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const override; + fef::FeatureExecutor& createExecutor(const fef::IQueryEnvironment& env, vespalib::Stash& stash) const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp new file mode 100644 index 00000000000..2dfaabb8326 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.cpp @@ -0,0 +1,67 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "first_phase_rank_lookup.h" +#include <vespa/searchlib/fef/objectstore.h> +#include <cassert> +#include <limits> + +using search::fef::AnyWrapper; + +namespace search::features { + +namespace { + +const vespalib::string key = "firstPhaseRankLookup"; + +} + +FirstPhaseRankLookup::FirstPhaseRankLookup() + : _map() +{ +} + +FirstPhaseRankLookup::FirstPhaseRankLookup(FirstPhaseRankLookup&&) = default; + +FirstPhaseRankLookup::~FirstPhaseRankLookup() = default; + +feature_t +FirstPhaseRankLookup::lookup(uint32_t docid) const noexcept +{ + auto itr = _map.find(docid); + if (itr != _map.end()) [[likely]] { + return itr->second; + } else { + return std::numeric_limits<feature_t>::max(); + } +} + +void +FirstPhaseRankLookup::add(uint32_t docid, uint32_t rank) +{ + auto insres = _map.insert(std::make_pair(docid, rank)); + assert(insres.second); +} + +void +FirstPhaseRankLookup::make_shared_state(fef::IObjectStore& store) +{ + if (store.get(key) == nullptr) { + store.add(key, std::make_unique<AnyWrapper<FirstPhaseRankLookup>>(FirstPhaseRankLookup())); + } +} + +FirstPhaseRankLookup* +FirstPhaseRankLookup::get_mutable_shared_state(fef::IObjectStore& store) +{ + auto* wrapper = dynamic_cast<AnyWrapper<FirstPhaseRankLookup>*>(store.get_mutable(key)); + return (wrapper == nullptr) ? nullptr : &wrapper->getValue(); +} + +const FirstPhaseRankLookup* +FirstPhaseRankLookup::get_shared_state(const fef::IObjectStore& store) +{ + const auto* wrapper = dynamic_cast<const AnyWrapper<FirstPhaseRankLookup>*>(store.get(key)); + return (wrapper == nullptr) ? nullptr : &wrapper->getValue(); +} + +} diff --git a/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h new file mode 100644 index 00000000000..83d89ed2dd1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/first_phase_rank_lookup.h @@ -0,0 +1,32 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/common/feature.h> +#include <vespa/vespalib/stllike/hash_map.h> + +namespace search::fef { class IObjectStore; } + +namespace search::features { + +/* + * This class contains a mapping from docids used by second phase to + * first phase rank. + */ +class FirstPhaseRankLookup { + vespalib::hash_map<uint32_t, uint32_t> _map; +public: + FirstPhaseRankLookup(); + FirstPhaseRankLookup(const FirstPhaseRankLookup&) = delete; + FirstPhaseRankLookup(FirstPhaseRankLookup&&); + ~FirstPhaseRankLookup(); + FirstPhaseRankLookup& operator=(const FirstPhaseRankLookup&) = delete; + FirstPhaseRankLookup& operator=(FirstPhaseRankLookup&&) = delete; + feature_t lookup(uint32_t docid) const noexcept; + void add(uint32_t docid, uint32_t rank); + static void make_shared_state(fef::IObjectStore& store); + static FirstPhaseRankLookup* get_mutable_shared_state(fef::IObjectStore& store); + static const FirstPhaseRankLookup* get_shared_state(const fef::IObjectStore& store); +}; + +} diff --git a/searchlib/src/vespa/searchlib/features/second_phase_feature.cpp b/searchlib/src/vespa/searchlib/features/second_phase_feature.cpp new file mode 100644 index 00000000000..82ce36be859 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/second_phase_feature.cpp @@ -0,0 +1,57 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "second_phase_feature.h" +#include <vespa/searchlib/fef/featureexecutor.h> +#include <vespa/searchlib/fef/indexproperties.h> +#include <vespa/searchlib/fef/properties.h> +#include <vespa/vespalib/util/stash.h> + +using namespace search::fef; + +namespace search::features { + +void +SecondPhaseExecutor::execute(uint32_t) +{ + outputs().set_number(0, inputs().get_number(0)); +} + + +SecondPhaseBlueprint::SecondPhaseBlueprint() + : Blueprint("secondPhase") +{ +} + +void +SecondPhaseBlueprint::visitDumpFeatures(const IIndexEnvironment&, + IDumpFeatureVisitor&) const +{ +} + +Blueprint::UP +SecondPhaseBlueprint::createInstance() const +{ + return std::make_unique<SecondPhaseBlueprint>(); +} + +bool +SecondPhaseBlueprint::setup(const IIndexEnvironment& env, + const ParameterList&) +{ + if (auto maybe_input = defineInput(indexproperties::rank::SecondPhase::lookup(env.getProperties()), + AcceptInput::ANY)) + { + describeOutput("score", "The ranking score for second phase.", maybe_input.value()); + return true; + } else { + return false; + } +} + +FeatureExecutor & +SecondPhaseBlueprint::createExecutor(const IQueryEnvironment&, vespalib::Stash& stash) const +{ + return stash.create<SecondPhaseExecutor>(); +} + +} diff --git a/searchlib/src/vespa/searchlib/features/second_phase_feature.h b/searchlib/src/vespa/searchlib/features/second_phase_feature.h new file mode 100644 index 00000000000..61805186453 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/second_phase_feature.h @@ -0,0 +1,35 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/fef/blueprint.h> + +namespace search::features { + +/** + * Implements the executor outputting the second phase ranking. + */ +class SecondPhaseExecutor : public fef::FeatureExecutor { +public: + bool isPure() override { return true; } + void execute(uint32_t docId) override; +}; + +/** + * Implements the blueprint for the second phase feature. + */ +class SecondPhaseBlueprint : public fef::Blueprint { +public: + SecondPhaseBlueprint(); + void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; + fef::Blueprint::UP createInstance() const override; + + fef::ParameterDescriptions getDescriptions() const override { + return fef::ParameterDescriptions().desc(); + } + bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + + fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index 71e083e2326..d65459817f0 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -22,6 +22,7 @@ #include "fieldmatchfeature.h" #include "fieldtermmatchfeature.h" #include "firstphasefeature.h" +#include "first_phase_rank_feature.h" #include "flow_completeness_feature.h" #include "foreachfeature.h" #include "freshnessfeature.h" @@ -48,6 +49,7 @@ #include "rankingexpressionfeature.h" #include "raw_score_feature.h" #include "reverseproximityfeature.h" +#include "second_phase_feature.h" #include "subqueries_feature.h" #include "tensor_from_labels_feature.h" #include "tensor_from_weighted_set_feature.h" @@ -90,6 +92,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(std::make_shared<FieldMatchBlueprint>()); registry.addPrototype(std::make_shared<FieldTermMatchBlueprint>()); registry.addPrototype(std::make_shared<FirstPhaseBlueprint>()); + registry.addPrototype(std::make_shared<FirstPhaseRankBlueprint>()); registry.addPrototype(std::make_shared<FlowCompletenessBlueprint>()); registry.addPrototype(std::make_shared<ForeachBlueprint>()); registry.addPrototype(std::make_shared<FreshnessBlueprint>()); @@ -109,6 +112,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(std::make_shared<RandomNormalBlueprint>()); registry.addPrototype(std::make_shared<RandomNormalStableBlueprint>()); registry.addPrototype(std::make_shared<RawScoreBlueprint>()); + registry.addPrototype(std::make_shared<SecondPhaseBlueprint>()); registry.addPrototype(std::make_shared<SubqueriesBlueprint>()); registry.addPrototype(std::make_shared<TensorFromLabelsBlueprint>()); registry.addPrototype(std::make_shared<TensorFromWeightedSetBlueprint>()); |