aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-07-07 13:17:55 +0000
committerGeir Storli <geirst@yahooinc.com>2022-07-08 12:59:38 +0000
commit2a601ec4acee4ef3bdd9dece2d098b004d9420b3 (patch)
tree108db2c461819c18c5afae0d20a0aebd90d4d2b7
parentd26d5f3aa3d47eb85049d57acc881942b1db5329 (diff)
Setup distance calculators per term-field pair in distance and closeness features.
-rw-r--r--searchlib/src/vespa/searchlib/features/closenessfeature.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/features/closenessfeature.h1
-rw-r--r--searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp132
-rw-r--r--searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h25
-rw-r--r--searchlib/src/vespa/searchlib/features/distancefeature.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/features/distancefeature.h1
-rw-r--r--searchlib/src/vespa/searchlib/fef/query_value.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/fef/query_value.h1
8 files changed, 182 insertions, 16 deletions
diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
index 17bd914d690..dfeeebf3584 100644
--- a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
@@ -29,13 +29,13 @@ public:
};
ConvertRawScoreToCloseness::ConvertRawScoreToCloseness(const fef::IQueryEnvironment &env, uint32_t fieldId)
- : _bundle(env, fieldId),
+ : _bundle(env, fieldId, "closeness"),
_md(nullptr)
{
}
ConvertRawScoreToCloseness::ConvertRawScoreToCloseness(const fef::IQueryEnvironment &env, const vespalib::string &label)
- : _bundle(env, label),
+ : _bundle(env, label, "closeness"),
_md(nullptr)
{
}
@@ -185,15 +185,26 @@ ClosenessBlueprint::createInstance() const
return std::make_unique<ClosenessBlueprint>();
}
-FeatureExecutor &
-ClosenessBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const
+void
+ClosenessBlueprint::prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const
{
+ if (_use_nns_tensor) {
+ DistanceCalculatorBundle::prepare_shared_state(env, store, _attr_id, "closeness");
+ }
if (_use_item_label) {
- return stash.create<ConvertRawScoreToCloseness>(env, _arg_string);
+ DistanceCalculatorBundle::prepare_shared_state(env, store, _arg_string, "closeness");
}
+}
+
+FeatureExecutor &
+ClosenessBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const
+{
if (_use_nns_tensor) {
return stash.create<ConvertRawScoreToCloseness>(env, _attr_id);
}
+ if (_use_item_label) {
+ return stash.create<ConvertRawScoreToCloseness>(env, _arg_string);
+ }
assert(_use_geo_pos);
return stash.create<ClosenessExecutor>(_maxDistance, _scaleDistance);
}
diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.h b/searchlib/src/vespa/searchlib/features/closenessfeature.h
index 799495eaff5..6e265e5dcb8 100644
--- a/searchlib/src/vespa/searchlib/features/closenessfeature.h
+++ b/searchlib/src/vespa/searchlib/features/closenessfeature.h
@@ -45,6 +45,7 @@ public:
return fef::ParameterDescriptions().desc().string().desc().string().string();
}
bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
+ void prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const override;
fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
};
diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp
index 361ccce2fe2..90386dffd51 100644
--- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp
+++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp
@@ -3,50 +3,170 @@
#include "distance_calculator_bundle.h"
#include "utils.h"
#include <vespa/searchlib/fef/iqueryenvironment.h>
+#include <vespa/searchlib/fef/query_value.h>
#include <vespa/searchlib/tensor/distance_calculator.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/issue.h>
using search::fef::ITermData;
using search::fef::IllegalHandle;
+using search::fef::InvalidValueTypeException;
+using search::fef::QueryValue;
using search::fef::TermFieldHandle;
+using search::tensor::DistanceCalculator;
+using vespalib::Issue;
namespace search::features {
+namespace {
+
+void
+prepare_query_tensor(const fef::IQueryEnvironment& env,
+ fef::IObjectStore& store,
+ const vespalib::string& query_tensor_name,
+ const vespalib::string& feature_name)
+{
+ try {
+ auto qvalue = QueryValue::from_config(query_tensor_name, env.getIndexEnvironment());
+ qvalue.prepare_shared_state(env, store);
+ } catch (const InvalidValueTypeException& ex) {
+ Issue::report("%s feature: Query tensor '%s' has invalid type '%s'.",
+ feature_name.c_str(), query_tensor_name.c_str(), ex.type_str().c_str());
+ }
+}
+
+std::unique_ptr<DistanceCalculator>
+make_distance_calculator(const fef::IQueryEnvironment& env,
+ const search::attribute::IAttributeVector& attr,
+ const vespalib::string& query_tensor_name,
+ const vespalib::string& feature_name)
+{
+ try {
+ auto qvalue = QueryValue::from_config(query_tensor_name, env.getIndexEnvironment());
+ const auto* query_tensor = qvalue.lookup_value(env.getObjectStore());
+ if (query_tensor == nullptr) {
+ Issue::report("%s feature: Query tensor '%s' is not found in the object store.",
+ feature_name.c_str(), query_tensor_name.c_str());
+ return {};
+ }
+ return DistanceCalculator::make_with_validation(attr, *query_tensor);
+ } catch (const InvalidValueTypeException& ex) {
+ Issue::report("%s feature: Query tensor '%s' has invalid type '%s'.",
+ feature_name.c_str(), query_tensor_name.c_str(), ex.type_str().c_str());
+ } catch (const vespalib::IllegalArgumentException& ex) {
+ Issue::report("%s feature: Could not create distance calculator for attribute '%s' and query tensor '%s': %s.",
+ feature_name.c_str(), attr.getName().c_str(), query_tensor_name.c_str(), ex.getMessage().c_str());
+ }
+ return {};
+}
+
+const search::attribute::IAttributeVector*
+resolve_attribute_for_field(const fef::IQueryEnvironment& env,
+ uint32_t field_id,
+ const vespalib::string& feature_name)
+{
+ const auto* field = env.getIndexEnvironment().getField(field_id);
+ if (field != nullptr) {
+ const auto* attr = env.getAttributeContext().getAttribute(field->name());
+ if (attr == nullptr) {
+ Issue::report("%s feature: The attribute vector '%s' for field id '%u' doesn't exist.",
+ feature_name.c_str(), field->name().c_str(), field_id);
+ }
+ return attr;
+ }
+ return nullptr;
+}
+
+}
+
DistanceCalculatorBundle::Element::Element(fef::TermFieldHandle handle_in)
: handle(handle_in),
calc()
{
}
+DistanceCalculatorBundle::Element::Element(fef::TermFieldHandle handle_in, std::unique_ptr<search::tensor::DistanceCalculator> calc_in)
+ : handle(handle_in),
+ calc(std::move(calc_in))
+{
+}
+
DistanceCalculatorBundle::Element::~Element() = default;
DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& env,
- uint32_t field_id)
+ uint32_t field_id,
+ const vespalib::string& feature_name)
+
: _elems()
{
_elems.reserve(env.getNumTerms());
+ const auto* attr = resolve_attribute_for_field(env, field_id, feature_name);
for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, field_id);
if (handle != search::fef::IllegalHandle) {
- _elems.emplace_back(handle);
+ const auto* term = env.getTerm(i);
+ if (term->query_tensor_name().has_value() && (attr != nullptr)) {
+ _elems.emplace_back(handle, make_distance_calculator(env, *attr, term->query_tensor_name().value(), feature_name));
+ } else {
+ _elems.emplace_back(handle);
+ }
}
}
}
DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& env,
- const vespalib::string& label)
+ const vespalib::string& label,
+ const vespalib::string& feature_name)
: _elems()
{
- const ITermData *term = util::getTermByLabel(env, label);
+ const ITermData* term = util::getTermByLabel(env, label);
if (term != nullptr) {
// expect numFields() == 1
for (uint32_t i = 0; i < term->numFields(); ++i) {
- TermFieldHandle handle = term->field(i).getHandle();
+ const auto& term_field = term->field(i);
+ TermFieldHandle handle = term_field.getHandle();
if (handle != IllegalHandle) {
- _elems.emplace_back(handle);
+ std::unique_ptr<DistanceCalculator> calc;
+ if (term->query_tensor_name().has_value()) {
+ const auto* attr = resolve_attribute_for_field(env, term_field.getFieldId(), feature_name);
+ if (attr != nullptr) {
+ calc = make_distance_calculator(env, *attr, term->query_tensor_name().value(), feature_name);
+ }
+ }
+ _elems.emplace_back(handle, std::move(calc));
}
}
}
}
+void
+DistanceCalculatorBundle::prepare_shared_state(const fef::IQueryEnvironment& env,
+ fef::IObjectStore& store,
+ uint32_t field_id,
+ const vespalib::string& feature_name)
+{
+ for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
+ search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, field_id);
+ if (handle != search::fef::IllegalHandle) {
+ const auto* term = env.getTerm(i);
+ if (term->query_tensor_name().has_value()) {
+ prepare_query_tensor(env, store, term->query_tensor_name().value(), feature_name);
+ }
+ }
+ }
+}
+
+void
+DistanceCalculatorBundle::prepare_shared_state(const fef::IQueryEnvironment& env,
+ fef::IObjectStore& store,
+ const vespalib::string& label,
+ const vespalib::string& feature_name)
+{
+ const auto* term = util::getTermByLabel(env, label);
+ if ((term != nullptr) && term->query_tensor_name().has_value()) {
+ prepare_query_tensor(env, store, term->query_tensor_name().value(), feature_name);
+ }
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h
index d28a315edd1..dd3fc521d96 100644
--- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h
+++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h
@@ -8,7 +8,10 @@
#include <vector>
namespace search::tensor { class DistanceCalculator; }
-namespace search::fef { class IQueryEnvironment; }
+namespace search::fef {
+class IObjectStore;
+class IQueryEnvironment;
+}
namespace search::features {
@@ -25,16 +28,32 @@ public:
std::unique_ptr<search::tensor::DistanceCalculator> calc;
Element(Element&& rhs) noexcept = default; // Needed as std::vector::reserve() is used.
Element(fef::TermFieldHandle handle_in);
+ Element(fef::TermFieldHandle handle_in, std::unique_ptr<search::tensor::DistanceCalculator> calc_in);
~Element();
};
private:
std::vector<Element> _elems;
public:
- DistanceCalculatorBundle(const fef::IQueryEnvironment& env, uint32_t field_id);
- DistanceCalculatorBundle(const fef::IQueryEnvironment& env, const vespalib::string& label);
+ DistanceCalculatorBundle(const fef::IQueryEnvironment& env,
+ uint32_t field_id,
+ const vespalib::string& feature_name);
+
+ DistanceCalculatorBundle(const fef::IQueryEnvironment& env,
+ const vespalib::string& label,
+ const vespalib::string& feature_name);
const std::vector<Element>& elements() const { return _elems; }
+
+ static void prepare_shared_state(const fef::IQueryEnvironment& env,
+ fef::IObjectStore& store,
+ uint32_t field_id,
+ const vespalib::string& feature_name);
+
+ static void prepare_shared_state(const fef::IQueryEnvironment& env,
+ fef::IObjectStore& store,
+ const vespalib::string& label,
+ const vespalib::string& feature_name);
};
}
diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
index 4a279da9bdd..260d78cf7a1 100644
--- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
@@ -37,13 +37,13 @@ public:
};
ConvertRawscoreToDistance::ConvertRawscoreToDistance(const fef::IQueryEnvironment &env, uint32_t fieldId)
- : _bundle(env, fieldId),
+ : _bundle(env, fieldId, "distance"),
_md(nullptr)
{
}
ConvertRawscoreToDistance::ConvertRawscoreToDistance(const fef::IQueryEnvironment &env, const vespalib::string &label)
- : _bundle(env, label),
+ : _bundle(env, label, "distance"),
_md(nullptr)
{
}
@@ -233,6 +233,17 @@ DistanceBlueprint::setup(const IIndexEnvironment & env,
return false;
}
+void
+DistanceBlueprint::prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const
+{
+ if (_use_nns_tensor) {
+ DistanceCalculatorBundle::prepare_shared_state(env, store, _attr_id, "distance");
+ }
+ if (_use_item_label) {
+ DistanceCalculatorBundle::prepare_shared_state(env, store, _arg_string, "distance");
+ }
+}
+
FeatureExecutor &
DistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const
{
diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.h b/searchlib/src/vespa/searchlib/features/distancefeature.h
index 6eff0380c3a..bf578d45f42 100644
--- a/searchlib/src/vespa/searchlib/features/distancefeature.h
+++ b/searchlib/src/vespa/searchlib/features/distancefeature.h
@@ -63,6 +63,7 @@ public:
return fef::ParameterDescriptions().desc().string().desc().string().string();
}
bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
+ void prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const override;
fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
};
diff --git a/searchlib/src/vespa/searchlib/fef/query_value.cpp b/searchlib/src/vespa/searchlib/fef/query_value.cpp
index d9cdb0aa23d..a60a24425b5 100644
--- a/searchlib/src/vespa/searchlib/fef/query_value.cpp
+++ b/searchlib/src/vespa/searchlib/fef/query_value.cpp
@@ -161,6 +161,8 @@ QueryValue::QueryValue(const vespalib::string& key, const vespalib::eval::ValueT
{
}
+QueryValue::~QueryValue() = default;
+
QueryValue
QueryValue::from_config(const vespalib::string& key, const IIndexEnvironment& env)
{
diff --git a/searchlib/src/vespa/searchlib/fef/query_value.h b/searchlib/src/vespa/searchlib/fef/query_value.h
index 477b6aa451f..3cdb90ea871 100644
--- a/searchlib/src/vespa/searchlib/fef/query_value.h
+++ b/searchlib/src/vespa/searchlib/fef/query_value.h
@@ -59,6 +59,7 @@ private:
public:
QueryValue();
QueryValue(const vespalib::string& key, const vespalib::eval::ValueType& type);
+ ~QueryValue();
/**
* Create a QueryValue using properties from the given index environment to extract the value type.