diff options
author | Geir Storli <geirst@yahooinc.com> | 2022-07-07 13:17:55 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2022-07-08 12:59:38 +0000 |
commit | 2a601ec4acee4ef3bdd9dece2d098b004d9420b3 (patch) | |
tree | 108db2c461819c18c5afae0d20a0aebd90d4d2b7 /searchlib | |
parent | d26d5f3aa3d47eb85049d57acc881942b1db5329 (diff) |
Setup distance calculators per term-field pair in distance and closeness features.
Diffstat (limited to 'searchlib')
8 files changed, 182 insertions, 16 deletions
diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp index 17bd914d690..dfeeebf3584 100644 --- a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp @@ -29,13 +29,13 @@ public: }; ConvertRawScoreToCloseness::ConvertRawScoreToCloseness(const fef::IQueryEnvironment &env, uint32_t fieldId) - : _bundle(env, fieldId), + : _bundle(env, fieldId, "closeness"), _md(nullptr) { } ConvertRawScoreToCloseness::ConvertRawScoreToCloseness(const fef::IQueryEnvironment &env, const vespalib::string &label) - : _bundle(env, label), + : _bundle(env, label, "closeness"), _md(nullptr) { } @@ -185,15 +185,26 @@ ClosenessBlueprint::createInstance() const return std::make_unique<ClosenessBlueprint>(); } -FeatureExecutor & -ClosenessBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const +void +ClosenessBlueprint::prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const { + if (_use_nns_tensor) { + DistanceCalculatorBundle::prepare_shared_state(env, store, _attr_id, "closeness"); + } if (_use_item_label) { - return stash.create<ConvertRawScoreToCloseness>(env, _arg_string); + DistanceCalculatorBundle::prepare_shared_state(env, store, _arg_string, "closeness"); } +} + +FeatureExecutor & +ClosenessBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const +{ if (_use_nns_tensor) { return stash.create<ConvertRawScoreToCloseness>(env, _attr_id); } + if (_use_item_label) { + return stash.create<ConvertRawScoreToCloseness>(env, _arg_string); + } assert(_use_geo_pos); return stash.create<ClosenessExecutor>(_maxDistance, _scaleDistance); } diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.h b/searchlib/src/vespa/searchlib/features/closenessfeature.h index 799495eaff5..6e265e5dcb8 100644 --- a/searchlib/src/vespa/searchlib/features/closenessfeature.h +++ b/searchlib/src/vespa/searchlib/features/closenessfeature.h @@ -45,6 +45,7 @@ public: return fef::ParameterDescriptions().desc().string().desc().string().string(); } bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + void prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const override; fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; }; diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp index 361ccce2fe2..90386dffd51 100644 --- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp +++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp @@ -3,50 +3,170 @@ #include "distance_calculator_bundle.h" #include "utils.h" #include <vespa/searchlib/fef/iqueryenvironment.h> +#include <vespa/searchlib/fef/query_value.h> #include <vespa/searchlib/tensor/distance_calculator.h> +#include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/util/issue.h> using search::fef::ITermData; using search::fef::IllegalHandle; +using search::fef::InvalidValueTypeException; +using search::fef::QueryValue; using search::fef::TermFieldHandle; +using search::tensor::DistanceCalculator; +using vespalib::Issue; namespace search::features { +namespace { + +void +prepare_query_tensor(const fef::IQueryEnvironment& env, + fef::IObjectStore& store, + const vespalib::string& query_tensor_name, + const vespalib::string& feature_name) +{ + try { + auto qvalue = QueryValue::from_config(query_tensor_name, env.getIndexEnvironment()); + qvalue.prepare_shared_state(env, store); + } catch (const InvalidValueTypeException& ex) { + Issue::report("%s feature: Query tensor '%s' has invalid type '%s'.", + feature_name.c_str(), query_tensor_name.c_str(), ex.type_str().c_str()); + } +} + +std::unique_ptr<DistanceCalculator> +make_distance_calculator(const fef::IQueryEnvironment& env, + const search::attribute::IAttributeVector& attr, + const vespalib::string& query_tensor_name, + const vespalib::string& feature_name) +{ + try { + auto qvalue = QueryValue::from_config(query_tensor_name, env.getIndexEnvironment()); + const auto* query_tensor = qvalue.lookup_value(env.getObjectStore()); + if (query_tensor == nullptr) { + Issue::report("%s feature: Query tensor '%s' is not found in the object store.", + feature_name.c_str(), query_tensor_name.c_str()); + return {}; + } + return DistanceCalculator::make_with_validation(attr, *query_tensor); + } catch (const InvalidValueTypeException& ex) { + Issue::report("%s feature: Query tensor '%s' has invalid type '%s'.", + feature_name.c_str(), query_tensor_name.c_str(), ex.type_str().c_str()); + } catch (const vespalib::IllegalArgumentException& ex) { + Issue::report("%s feature: Could not create distance calculator for attribute '%s' and query tensor '%s': %s.", + feature_name.c_str(), attr.getName().c_str(), query_tensor_name.c_str(), ex.getMessage().c_str()); + } + return {}; +} + +const search::attribute::IAttributeVector* +resolve_attribute_for_field(const fef::IQueryEnvironment& env, + uint32_t field_id, + const vespalib::string& feature_name) +{ + const auto* field = env.getIndexEnvironment().getField(field_id); + if (field != nullptr) { + const auto* attr = env.getAttributeContext().getAttribute(field->name()); + if (attr == nullptr) { + Issue::report("%s feature: The attribute vector '%s' for field id '%u' doesn't exist.", + feature_name.c_str(), field->name().c_str(), field_id); + } + return attr; + } + return nullptr; +} + +} + DistanceCalculatorBundle::Element::Element(fef::TermFieldHandle handle_in) : handle(handle_in), calc() { } +DistanceCalculatorBundle::Element::Element(fef::TermFieldHandle handle_in, std::unique_ptr<search::tensor::DistanceCalculator> calc_in) + : handle(handle_in), + calc(std::move(calc_in)) +{ +} + DistanceCalculatorBundle::Element::~Element() = default; DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& env, - uint32_t field_id) + uint32_t field_id, + const vespalib::string& feature_name) + : _elems() { _elems.reserve(env.getNumTerms()); + const auto* attr = resolve_attribute_for_field(env, field_id, feature_name); for (uint32_t i = 0; i < env.getNumTerms(); ++i) { search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, field_id); if (handle != search::fef::IllegalHandle) { - _elems.emplace_back(handle); + const auto* term = env.getTerm(i); + if (term->query_tensor_name().has_value() && (attr != nullptr)) { + _elems.emplace_back(handle, make_distance_calculator(env, *attr, term->query_tensor_name().value(), feature_name)); + } else { + _elems.emplace_back(handle); + } } } } DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& env, - const vespalib::string& label) + const vespalib::string& label, + const vespalib::string& feature_name) : _elems() { - const ITermData *term = util::getTermByLabel(env, label); + const ITermData* term = util::getTermByLabel(env, label); if (term != nullptr) { // expect numFields() == 1 for (uint32_t i = 0; i < term->numFields(); ++i) { - TermFieldHandle handle = term->field(i).getHandle(); + const auto& term_field = term->field(i); + TermFieldHandle handle = term_field.getHandle(); if (handle != IllegalHandle) { - _elems.emplace_back(handle); + std::unique_ptr<DistanceCalculator> calc; + if (term->query_tensor_name().has_value()) { + const auto* attr = resolve_attribute_for_field(env, term_field.getFieldId(), feature_name); + if (attr != nullptr) { + calc = make_distance_calculator(env, *attr, term->query_tensor_name().value(), feature_name); + } + } + _elems.emplace_back(handle, std::move(calc)); } } } } +void +DistanceCalculatorBundle::prepare_shared_state(const fef::IQueryEnvironment& env, + fef::IObjectStore& store, + uint32_t field_id, + const vespalib::string& feature_name) +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, field_id); + if (handle != search::fef::IllegalHandle) { + const auto* term = env.getTerm(i); + if (term->query_tensor_name().has_value()) { + prepare_query_tensor(env, store, term->query_tensor_name().value(), feature_name); + } + } + } +} + +void +DistanceCalculatorBundle::prepare_shared_state(const fef::IQueryEnvironment& env, + fef::IObjectStore& store, + const vespalib::string& label, + const vespalib::string& feature_name) +{ + const auto* term = util::getTermByLabel(env, label); + if ((term != nullptr) && term->query_tensor_name().has_value()) { + prepare_query_tensor(env, store, term->query_tensor_name().value(), feature_name); + } +} + } diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h index d28a315edd1..dd3fc521d96 100644 --- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h +++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h @@ -8,7 +8,10 @@ #include <vector> namespace search::tensor { class DistanceCalculator; } -namespace search::fef { class IQueryEnvironment; } +namespace search::fef { +class IObjectStore; +class IQueryEnvironment; +} namespace search::features { @@ -25,16 +28,32 @@ public: std::unique_ptr<search::tensor::DistanceCalculator> calc; Element(Element&& rhs) noexcept = default; // Needed as std::vector::reserve() is used. Element(fef::TermFieldHandle handle_in); + Element(fef::TermFieldHandle handle_in, std::unique_ptr<search::tensor::DistanceCalculator> calc_in); ~Element(); }; private: std::vector<Element> _elems; public: - DistanceCalculatorBundle(const fef::IQueryEnvironment& env, uint32_t field_id); - DistanceCalculatorBundle(const fef::IQueryEnvironment& env, const vespalib::string& label); + DistanceCalculatorBundle(const fef::IQueryEnvironment& env, + uint32_t field_id, + const vespalib::string& feature_name); + + DistanceCalculatorBundle(const fef::IQueryEnvironment& env, + const vespalib::string& label, + const vespalib::string& feature_name); const std::vector<Element>& elements() const { return _elems; } + + static void prepare_shared_state(const fef::IQueryEnvironment& env, + fef::IObjectStore& store, + uint32_t field_id, + const vespalib::string& feature_name); + + static void prepare_shared_state(const fef::IQueryEnvironment& env, + fef::IObjectStore& store, + const vespalib::string& label, + const vespalib::string& feature_name); }; } diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp index 4a279da9bdd..260d78cf7a1 100644 --- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp +++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp @@ -37,13 +37,13 @@ public: }; ConvertRawscoreToDistance::ConvertRawscoreToDistance(const fef::IQueryEnvironment &env, uint32_t fieldId) - : _bundle(env, fieldId), + : _bundle(env, fieldId, "distance"), _md(nullptr) { } ConvertRawscoreToDistance::ConvertRawscoreToDistance(const fef::IQueryEnvironment &env, const vespalib::string &label) - : _bundle(env, label), + : _bundle(env, label, "distance"), _md(nullptr) { } @@ -233,6 +233,17 @@ DistanceBlueprint::setup(const IIndexEnvironment & env, return false; } +void +DistanceBlueprint::prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const +{ + if (_use_nns_tensor) { + DistanceCalculatorBundle::prepare_shared_state(env, store, _attr_id, "distance"); + } + if (_use_item_label) { + DistanceCalculatorBundle::prepare_shared_state(env, store, _arg_string, "distance"); + } +} + FeatureExecutor & DistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const { diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.h b/searchlib/src/vespa/searchlib/features/distancefeature.h index 6eff0380c3a..bf578d45f42 100644 --- a/searchlib/src/vespa/searchlib/features/distancefeature.h +++ b/searchlib/src/vespa/searchlib/features/distancefeature.h @@ -63,6 +63,7 @@ public: return fef::ParameterDescriptions().desc().string().desc().string().string(); } bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + void prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const override; fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; }; diff --git a/searchlib/src/vespa/searchlib/fef/query_value.cpp b/searchlib/src/vespa/searchlib/fef/query_value.cpp index d9cdb0aa23d..a60a24425b5 100644 --- a/searchlib/src/vespa/searchlib/fef/query_value.cpp +++ b/searchlib/src/vespa/searchlib/fef/query_value.cpp @@ -161,6 +161,8 @@ QueryValue::QueryValue(const vespalib::string& key, const vespalib::eval::ValueT { } +QueryValue::~QueryValue() = default; + QueryValue QueryValue::from_config(const vespalib::string& key, const IIndexEnvironment& env) { diff --git a/searchlib/src/vespa/searchlib/fef/query_value.h b/searchlib/src/vespa/searchlib/fef/query_value.h index 477b6aa451f..3cdb90ea871 100644 --- a/searchlib/src/vespa/searchlib/fef/query_value.h +++ b/searchlib/src/vespa/searchlib/fef/query_value.h @@ -59,6 +59,7 @@ private: public: QueryValue(); QueryValue(const vespalib::string& key, const vespalib::eval::ValueType& type); + ~QueryValue(); /** * Create a QueryValue using properties from the given index environment to extract the value type. |