summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-02-27 15:17:49 +0100
committerTor Egge <Tor.Egge@online.no>2023-02-27 15:17:49 +0100
commit7ec09d5ab437590cdeea50397ea845b07e18f786 (patch)
tree2ff51fda437ffcff5aed999dd48087f1e9d56406 /searchlib
parent023d8a0851d321c0f33ba6dde16e1b1fa2fe12ce (diff)
Add closest feature.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/closest/CMakeLists.txt10
-rw-r--r--searchlib/src/tests/features/closest/closest_test.cpp149
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/features/closenessfeature.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/features/closest_feature.cpp288
-rw-r--r--searchlib/src/vespa/searchlib/features/closest_feature.h33
-rw-r--r--searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h2
-rw-r--r--searchlib/src/vespa/searchlib/features/distancefeature.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/fef/parameterdescriptions.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_calculator.h10
-rw-r--r--searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp30
-rw-r--r--searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h12
15 files changed, 539 insertions, 11 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 03429b956a4..4ccab1a9380 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -129,6 +129,7 @@ vespa_define_module(
src/tests/features
src/tests/features/beta
src/tests/features/bm25
+ src/tests/features/closest
src/tests/features/constant
src/tests/features/element_completeness
src/tests/features/element_similarity_feature
diff --git a/searchlib/src/tests/features/closest/CMakeLists.txt b/searchlib/src/tests/features/closest/CMakeLists.txt
new file mode 100644
index 00000000000..71572c5e5a2
--- /dev/null
+++ b/searchlib/src/tests/features/closest/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+vespa_add_executable(searchlib_closest_test_app TEST
+ SOURCES
+ closest_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_closest_test_app COMMAND searchlib_closest_test_app)
diff --git a/searchlib/src/tests/features/closest/closest_test.cpp b/searchlib/src/tests/features/closest/closest_test.cpp
new file mode 100644
index 00000000000..c903cc79aac
--- /dev/null
+++ b/searchlib/src/tests/features/closest/closest_test.cpp
@@ -0,0 +1,149 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/searchlib/features/closest_feature.h>
+#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/searchlib/fef/test/labels.h>
+#include <vespa/searchlib/test/features/distance_closeness_fixture.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+using search::feature_t;
+using search::features::test::BlueprintFactoryFixture;
+using search::features::test::DistanceClosenessFixture;
+using search::features::test::FeatureDumpFixture;
+using search::features::test::IndexEnvironmentFixture;
+using search::features::ClosestBlueprint;
+using vespalib::eval::FastValueBuilderFactory;
+using vespalib::eval::TensorSpec;
+using vespalib::eval::Value;
+using vespalib::eval::spec_from_value;
+using vespalib::eval::value_from_spec;
+
+const vespalib::string field_and_label_feature_name("closest(bar,nns)");
+const vespalib::string field_feature_name("closest(bar)");
+
+const vespalib::string dense_tensor_type("tensor(x[2])");
+const vespalib::string mixed_tensor_type("tensor(a{},x[2])");
+const vespalib::string sparse_tensor_type("tensor(a{})");
+
+TensorSpec no_subspace(sparse_tensor_type);
+TensorSpec subspace_a = TensorSpec::from_expr("tensor(a{}):{{a:\"a\"}:1}");
+TensorSpec subspace_b = TensorSpec::from_expr("tensor(a{}):{{a:\"b\"}:1}");
+
+TensorSpec doc_tensor = TensorSpec::from_expr("tensor(a{},x[2]):{{a:\"a\",x:0}:3,{a:\"a\",x:1}:10,{a:\"b\",x:0}:5,{a:\"b\",x:1}:10}");
+
+using RankFixture = DistanceClosenessFixture;
+
+TensorSpec get_spec(RankFixture& f, uint32_t docid) {
+ return spec_from_value(f.getObject(docid).get());
+}
+
+struct TestParam
+{
+ vespalib::string _name;
+ bool _direct_tensor;
+ TestParam(vespalib::string name, bool direct_tensor)
+ : _name(std::move(name)),
+ _direct_tensor(direct_tensor)
+ {
+ }
+ ~TestParam();
+};
+
+TestParam::~TestParam() = default;
+
+std::ostream& operator<<(std::ostream& os, const TestParam param)
+{
+ os << param._name;
+ return os;
+}
+
+
+class ClosestTest : public ::testing::TestWithParam<TestParam>
+{
+protected:
+ ClosestTest();
+ ~ClosestTest();
+ bool direct_tensor() const noexcept { return GetParam()._direct_tensor; }
+ void assert_closest(const Labels& labels, const vespalib::string& feature_name, const vespalib::string& query_tensor, const TensorSpec& exp_spec);
+ void assert_closest(const Labels& labels, const vespalib::string& feature_name, const std::vector<TensorSpec>& exp_specs);
+};
+
+ClosestTest::ClosestTest()
+ : testing::TestWithParam<TestParam>()
+{
+}
+
+ClosestTest::~ClosestTest() = default;
+
+void
+ClosestTest::assert_closest(const Labels& labels, const vespalib::string& feature_name, const vespalib::string& query_tensor, const TensorSpec& exp_spec)
+{
+ RankFixture f2(mixed_tensor_type, direct_tensor(), 0, 1, labels, feature_name,
+ dense_tensor_type + ":" + query_tensor);
+ ASSERT_FALSE(f2.failed());
+ SCOPED_TRACE(query_tensor);
+ f2.set_attribute_tensor(9, doc_tensor);
+ EXPECT_EQ(exp_spec, get_spec(f2, 9));
+}
+
+void
+ClosestTest::assert_closest(const Labels& labels, const vespalib::string& feature_name, const std::vector<TensorSpec>& exp_specs)
+{
+ assert_closest(labels, feature_name, "[9,10]", exp_specs[0]);
+ assert_closest(labels, feature_name, "[1,10]", exp_specs[1]);
+}
+
+INSTANTIATE_TEST_SUITE_P(ClosestMultiTest,
+ ClosestTest,
+ testing::Values(TestParam("Serialized", false),
+ TestParam("Direct", true)),
+ testing::PrintToStringParamName());
+
+TEST(ClosestTest, require_that_blueprint_can_be_created_from_factory)
+{
+ BlueprintFactoryFixture f;
+ Blueprint::SP bp = f.factory.createBlueprint("closest");
+ EXPECT_TRUE(bp.get() != 0);
+ EXPECT_TRUE(dynamic_cast<ClosestBlueprint*>(bp.get()) != 0);
+}
+
+TEST(ClosestTest, require_that_no_features_are_dumped)
+{
+ ClosestBlueprint f1;
+ IndexEnvironmentFixture f2;
+ FeatureDumpFixture f3;
+ f1.visitDumpFeatures(f2.indexEnv, f3);
+}
+
+TEST_P(ClosestTest, require_that_no_label_gives_empty_result)
+{
+ NoLabel f1;
+ assert_closest(f1, field_and_label_feature_name, {no_subspace, no_subspace});
+}
+
+TEST_P(ClosestTest, require_that_unrelated_label_gives_empty_result)
+{
+ SingleLabel f1("unrelated", 1);
+ assert_closest(f1, field_and_label_feature_name, {no_subspace, no_subspace});
+}
+
+TEST_P(ClosestTest, closest_using_field_setup)
+{
+ NoLabel f1;
+ assert_closest(f1, field_feature_name, {subspace_b, subspace_a});
+}
+
+TEST_P(ClosestTest, closest_using_field_and_label_setup)
+{
+ SingleLabel f1("nns", 1);
+ assert_closest(f1, field_and_label_feature_name, {subspace_b, subspace_a});
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index 8acf28f4a2f..4af5c0e561e 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -7,6 +7,7 @@ vespa_add_library(searchlib_features OBJECT
attributematchfeature.cpp
bm25_feature.cpp
closenessfeature.cpp
+ closest_feature.cpp
constant_feature.cpp
debug_attribute_wait.cpp
debug_wait.cpp
diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
index e44c94dbb2d..048a507b3fd 100644
--- a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
@@ -36,7 +36,7 @@ ConvertRawScoreToCloseness::ConvertRawScoreToCloseness(const fef::IQueryEnvironm
}
ConvertRawScoreToCloseness::ConvertRawScoreToCloseness(const fef::IQueryEnvironment &env, const vespalib::string &label)
- : _bundle(env, label, "closeness"),
+ : _bundle(env, std::nullopt, label, "closeness"),
_md(nullptr)
{
}
diff --git a/searchlib/src/vespa/searchlib/features/closest_feature.cpp b/searchlib/src/vespa/searchlib/features/closest_feature.cpp
new file mode 100644
index 00000000000..335d5c6a1c9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/closest_feature.cpp
@@ -0,0 +1,288 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "closest_feature.h"
+#include "constant_tensor_executor.h"
+#include "distance_calculator_bundle.h"
+#include "valuefeature.h"
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/searchlib/fef/parameterdescriptions.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/searchlib/tensor/distance_calculator.h>
+#include <vespa/searchlib/tensor/fast_value_view.h>
+#include <vespa/searchlib/tensor/i_tensor_attribute.h>
+#include <vespa/searchlib/tensor/serialized_tensor_ref.h>
+#include <vespa/searchlib/tensor/subspace_type.h>
+#include <vespa/vespalib/util/stash.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".features.closest_feature");
+
+using search::fef::FeatureType;
+using search::fef::FieldInfo;
+using search::fef::ParameterDataTypeSet;
+using search::tensor::FastValueView;
+using search::tensor::ITensorAttribute;
+using search::tensor::SubspaceType;
+using search::tensor::VectorBundle;
+using vespalib::eval::CellType;
+using vespalib::eval::FastValueBuilderFactory;
+using vespalib::eval::TypedCells;
+using vespalib::eval::TypifyCellType;
+using vespalib::eval::Value;
+using vespalib::eval::ValueType;
+using vespalib::string_id;
+using vespalib::typify_invoke;
+
+using namespace search::fef::indexproperties;
+
+namespace {
+
+struct SetIdentity {
+ template <typename T>
+ static void invoke(void *space, size_t size) {
+ assert(size == sizeof(T));
+ *(T *) space = 1.0;
+ }
+};
+
+void setup_identity_cells(const ValueType& type, std::vector<char>& space, TypedCells& cells)
+{
+ if (type.is_double()) {
+ return;
+ }
+ space.resize(vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), 1));
+ cells = TypedCells(space.data(), type.cell_type(), 1);
+ typify_invoke<1,TypifyCellType,SetIdentity>(type.cell_type(), space.data(), space.size());
+}
+
+}
+
+namespace search::features {
+
+/**
+ * Implements the executor for the closest feature.
+ */
+class ClosestExecutor : public fef::FeatureExecutor {
+protected:
+ DistanceCalculatorBundle _bundle;
+ Value& _empty_output;
+ TypedCells _identity;
+ const ITensorAttribute& _attr;
+ std::unique_ptr<Value> _output;
+public:
+ ClosestExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr);
+ ~ClosestExecutor() override;
+ static fef::FeatureExecutor& make(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr, vespalib::Stash& stash);
+};
+
+class ClosestSerializedExecutor : public ClosestExecutor {
+public:
+ ClosestSerializedExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr);
+ ~ClosestSerializedExecutor() override;
+ void execute(uint32_t docId) override;
+};
+
+class ClosestDirectExecutor : public ClosestExecutor {
+ SubspaceType _subspace_type;
+ std::vector<string_id> _labels;
+ std::vector<string_id*> _label_ptrs;
+public:
+ ClosestDirectExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr);
+ ~ClosestDirectExecutor() override;
+ void execute(uint32_t docId) override;
+};
+
+ClosestExecutor::ClosestExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr)
+ : _bundle(std::move(bundle)),
+ _empty_output(empty_output),
+ _identity(identity),
+ _attr(attr),
+ _output()
+{
+}
+
+ClosestExecutor::~ClosestExecutor() = default;
+
+fef::FeatureExecutor&
+ClosestExecutor::make(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr, vespalib::Stash& stash)
+{
+ if (attr.supports_get_serialized_tensor_ref()) {
+ return stash.create<ClosestSerializedExecutor>(std::move(bundle), empty_output, identity, attr);
+ } else if (attr.supports_get_tensor_ref()) {
+ return stash.create<ClosestDirectExecutor>(std::move(bundle), empty_output, identity, attr);
+ } else {
+ return ConstantTensorExecutor::createEmpty(empty_output.type(), stash);
+ }
+}
+
+ClosestSerializedExecutor::ClosestSerializedExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr)
+ : ClosestExecutor(std::move(bundle), empty_output, identity, attr)
+{
+}
+
+ClosestSerializedExecutor::~ClosestSerializedExecutor() = default;
+
+void
+ClosestSerializedExecutor::execute(uint32_t docId)
+{
+ double best_distance = 0.0;
+ std::optional<uint32_t> closest_subspace;
+ auto ref = _attr.get_serialized_tensor_ref(docId);
+ for (const auto& elem : _bundle.elements()) {
+ elem.calc->calc_closest_subspace(ref.get_vectors(), closest_subspace, best_distance);
+ }
+ if (closest_subspace.has_value()) {
+ auto labels = ref.get_labels(closest_subspace.value());
+ _output = std::make_unique<FastValueView>(_empty_output.type(), labels, _identity, labels.size(), 1);
+ outputs().set_object(0, *_output);
+ } else {
+ outputs().set_object(0, _empty_output);
+ }
+}
+
+ClosestDirectExecutor::ClosestDirectExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr)
+ : ClosestExecutor(std::move(bundle), empty_output, identity, attr),
+ _subspace_type(attr.getTensorType()),
+ _labels(attr.getTensorType().count_mapped_dimensions()),
+ _label_ptrs(_labels.size())
+{
+ for (size_t i = 0; i < _labels.size(); ++i) {
+ _label_ptrs[i] = &_labels[i];
+ }
+}
+
+ClosestDirectExecutor::~ClosestDirectExecutor() = default;
+
+void
+ClosestDirectExecutor::execute(uint32_t docId)
+{
+ double best_distance = 0.0;
+ std::optional<uint32_t> closest_subspace;
+ auto& tensor = _attr.get_tensor_ref(docId);
+ VectorBundle vectors(tensor.cells().data, tensor.index().size(), _subspace_type);
+ for (const auto& elem : _bundle.elements()) {
+ elem.calc->calc_closest_subspace(vectors, closest_subspace, best_distance);
+ }
+ if (closest_subspace.has_value()) {
+ size_t subspace_id = 0;
+ auto view = tensor.index().create_view({});
+ view->lookup({});
+ while (view->next_result(_label_ptrs, subspace_id)) {
+ if (subspace_id == closest_subspace.value()) {
+ _output = std::make_unique<FastValueView>(_empty_output.type(), _labels, _identity, _labels.size(), 1);
+ outputs().set_object(0, *_output);
+ return;
+ }
+ }
+ }
+ outputs().set_object(0, _empty_output);
+}
+
+ClosestBlueprint::ClosestBlueprint()
+ : Blueprint("closest"),
+ _field_name(),
+ _field_tensor_type(ValueType::error_type()),
+ _output_tensor_type(ValueType::error_type()),
+ _field_id(search::index::Schema::UNKNOWN_FIELD_ID),
+ _item_label(),
+ _empty_output(),
+ _identity_space(),
+ _identity_cells()
+{
+}
+
+ClosestBlueprint::~ClosestBlueprint() = default;
+
+void
+ClosestBlueprint::visitDumpFeatures(const fef::IIndexEnvironment&, fef::IDumpFeatureVisitor&) const
+{
+}
+
+std::unique_ptr<fef::Blueprint>
+ClosestBlueprint::createInstance() const
+{
+ return std::make_unique<ClosestBlueprint>();
+}
+
+fef::ParameterDescriptions
+ClosestBlueprint::getDescriptions() const
+{
+ auto data_type_set = ParameterDataTypeSet::tensor_type_set();
+ return fef::ParameterDescriptions().
+ desc().attribute(data_type_set, fef::ParameterCollection::SINGLE).
+ desc().attribute(data_type_set, fef::ParameterCollection::SINGLE).string();
+}
+
+bool
+ClosestBlueprint::setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params)
+{
+ if (params.size() < 1 || params.size() > 2) {
+ LOG(error, "%s: Wrong number of parameters, was %d, must be 1 or 2", getName().c_str(), (int) params.size());
+ return false;
+ }
+ _field_name = params[0].getValue();
+ if (params.size() == 2) {
+ _item_label = params[1].getValue();
+ }
+ auto fi = env.getFieldByName(_field_name);
+ if (fi == nullptr) {
+ LOG(error, "%s: Unknown field %s", getName().c_str(), _field_name.c_str());
+ return false;
+ }
+ auto dt = fi->get_data_type();
+ auto ct = fi->collection();
+ if (dt != search::index::schema::DataType::TENSOR ||
+ ct != search::index::schema::CollectionType::SINGLE) {
+ LOG(error, "%s: Field %s is not a single value tensor field", getName().c_str(), _field_name.c_str());
+ return false;
+ }
+ if (!fi->hasAttribute()) {
+ LOG(error, "%s: Field %s does not have an attribute", getName().c_str(), _field_name.c_str());
+ return false;
+ }
+ vespalib::string attr_type_spec = type::Attribute::lookup(env.getProperties(), _field_name);
+ if (attr_type_spec.empty()) {
+ LOG(error, "%s: Field %s lacks a type in index properties", getName().c_str(), _field_name.c_str());
+ return false;
+ }
+ _field_tensor_type = ValueType::from_spec(attr_type_spec);
+ if (_field_tensor_type.is_error() || _field_tensor_type.is_double() || _field_tensor_type.count_mapped_dimensions() == 0) {
+ LOG(error, "%s: Field %s invalid type: '%s'", getName().c_str(), _field_name.c_str(), attr_type_spec.c_str());
+ return false;
+ }
+ _output_tensor_type = ValueType::make_type(_field_tensor_type.cell_type(), _field_tensor_type.mapped_dimensions());
+ assert(!_output_tensor_type.is_double());
+ FeatureType output_type = FeatureType::object(_output_tensor_type);
+ describeOutput("out", "The closest tensor subspace.", output_type);
+ _field_id = fi->id();
+ _empty_output = vespalib::eval::value_from_spec(_output_tensor_type.to_spec(), FastValueBuilderFactory::get());
+ setup_identity_cells(_output_tensor_type, _identity_space, _identity_cells);
+ return true;
+}
+
+void
+ClosestBlueprint::prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const
+{
+ if (_item_label.has_value()) {
+ DistanceCalculatorBundle::prepare_shared_state(env, store, _item_label.value(), "closest");
+ } else {
+ DistanceCalculatorBundle::prepare_shared_state(env, store, _field_id, "closest");
+ }
+}
+
+fef::FeatureExecutor&
+ClosestBlueprint::createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const
+{
+ auto bundle = _item_label.has_value() ? DistanceCalculatorBundle(env, _field_id, _item_label.value(), "closest") : DistanceCalculatorBundle(env, _field_id, "closest");
+ if (bundle.elements().empty()) {
+ return ConstantTensorExecutor::createEmpty(_output_tensor_type, stash);
+ } else {
+ auto& attr = bundle.elements().front().calc->attribute_tensor();
+ return ClosestExecutor::make(std::move(bundle), *_empty_output, _identity_cells, attr, stash);
+ }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/closest_feature.h b/searchlib/src/vespa/searchlib/features/closest_feature.h
new file mode 100644
index 00000000000..840f896abe2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/closest_feature.h
@@ -0,0 +1,33 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <optional>
+
+namespace search::features {
+
+/**
+ * Implements the blueprint for the closest executor.
+ */
+class ClosestBlueprint : public fef::Blueprint {
+ vespalib::string _field_name;
+ vespalib::eval::ValueType _field_tensor_type;
+ vespalib::eval::ValueType _output_tensor_type;
+ uint32_t _field_id;
+ std::optional<vespalib::string> _item_label;
+ std::unique_ptr<vespalib::eval::Value> _empty_output;
+ std::vector<char> _identity_space;
+ vespalib::eval::TypedCells _identity_cells;
+public:
+ ClosestBlueprint();
+ ~ClosestBlueprint() override;
+ void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override;
+ std::unique_ptr<fef::Blueprint> createInstance() const override;
+ fef::ParameterDescriptions getDescriptions() const override;
+ bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
+ void prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const override;
+ fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp
index 4b2d67c933d..fad4c649165 100644
--- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp
+++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp
@@ -115,6 +115,7 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment&
}
DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& env,
+ std::optional<uint32_t> field_id,
const vespalib::string& label,
const vespalib::string& feature_name)
: _elems()
@@ -124,6 +125,9 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment&
// expect numFields() == 1
for (uint32_t i = 0; i < term->numFields(); ++i) {
const auto& term_field = term->field(i);
+ if (field_id.has_value() && field_id.value() != term_field.getFieldId()) {
+ continue;
+ }
TermFieldHandle handle = term_field.getHandle();
if (handle != IllegalHandle) {
std::unique_ptr<DistanceCalculator> calc;
diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h
index 35295c771a6..e3be52aecc5 100644
--- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h
+++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h
@@ -5,6 +5,7 @@
#include <vespa/searchlib/fef/handle.h>
#include <vespa/vespalib/stllike/string.h>
#include <memory>
+#include <optional>
#include <vector>
namespace search::tensor { class DistanceCalculator; }
@@ -40,6 +41,7 @@ public:
const vespalib::string& feature_name);
DistanceCalculatorBundle(const fef::IQueryEnvironment& env,
+ std::optional<uint32_t> field_id,
const vespalib::string& label,
const vespalib::string& feature_name);
diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
index 40f994c18e9..f601c91a0b2 100644
--- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
@@ -44,7 +44,7 @@ ConvertRawscoreToDistance::ConvertRawscoreToDistance(const fef::IQueryEnvironmen
}
ConvertRawscoreToDistance::ConvertRawscoreToDistance(const fef::IQueryEnvironment &env, const vespalib::string &label)
- : _bundle(env, label, "distance"),
+ : _bundle(env, std::nullopt, label, "distance"),
_md(nullptr)
{
}
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
index 2bc8a349d1b..5e152d4b455 100644
--- a/searchlib/src/vespa/searchlib/features/setup.cpp
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -6,6 +6,7 @@
#include "attributematchfeature.h"
#include "bm25_feature.h"
#include "closenessfeature.h"
+#include "closest_feature.h"
#include "constant_feature.h"
#include "debug_attribute_wait.h"
#include "debug_wait.h"
@@ -75,6 +76,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(std::make_shared<AttributeMatchBlueprint>());
registry.addPrototype(std::make_shared<Bm25Blueprint>());
registry.addPrototype(std::make_shared<ClosenessBlueprint>());
+ registry.addPrototype(std::make_shared<ClosestBlueprint>());
registry.addPrototype(std::make_shared<DebugAttributeWaitBlueprint>());
registry.addPrototype(std::make_shared<DebugWaitBlueprint>());
registry.addPrototype(std::make_shared<DistanceBlueprint>());
diff --git a/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h
index e47ce0df7a5..46a932696ca 100644
--- a/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h
+++ b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h
@@ -71,6 +71,7 @@ private:
asMask(DataType::REFERENCE) |
asMask(DataType::COMBINED));
}
+ static uint32_t tensor_type_mask() { return asMask(DataType::TENSOR); }
ParameterDataTypeSet(uint32_t typeMask)
: _typeMask(typeMask)
{
@@ -87,8 +88,9 @@ public:
return ParameterDataTypeSet(asMask(DataType::INT32) | asMask(DataType::INT64));
}
static ParameterDataTypeSet normalOrTensorTypeSet() {
- return ParameterDataTypeSet(normalTypesMask() | asMask(DataType::TENSOR));
+ return ParameterDataTypeSet(normalTypesMask() | tensor_type_mask());
}
+ static ParameterDataTypeSet tensor_type_set() { return ParameterDataTypeSet(tensor_type_mask()); }
bool allowedType(DataType dataType) const {
return ((asMask(dataType) & _typeMask) != 0);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
index f501b004254..6b4cf142264 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
@@ -65,9 +65,7 @@ public:
return result;
}
- std::optional<uint32_t> calc_closest_subspace(VectorBundle vectors) {
- double best_distance = 0.0;
- std::optional<uint32_t> closest_subspace;
+ void calc_closest_subspace(VectorBundle vectors, std::optional<uint32_t>& closest_subspace, double& best_distance) {
for (uint32_t i = 0; i < vectors.subspaces(); ++i) {
double distance = _dist_fun->calc(_query_tensor_cells, vectors.cells(i));
if (!closest_subspace.has_value() || distance < best_distance) {
@@ -75,6 +73,12 @@ public:
closest_subspace = i;
}
}
+ }
+
+ std::optional<uint32_t> calc_closest_subspace(VectorBundle vectors) {
+ double best_distance = 0.0;
+ std::optional<uint32_t> closest_subspace;
+ calc_closest_subspace(vectors, closest_subspace, best_distance);
return closest_subspace;
}
diff --git a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp
index 275ee145240..e0444e8dca7 100644
--- a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp
+++ b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp
@@ -7,6 +7,8 @@
#include <vespa/eval/eval/value_type.h>
#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
+#include <vespa/searchlib/tensor/direct_tensor_attribute.h>
+#include <vespa/searchlib/tensor/serialized_fast_value_attribute.h>
using search::attribute::BasicType;
using search::attribute::CollectionType;
@@ -15,6 +17,9 @@ using search::attribute::DistanceMetric;
using search::fef::test::IndexEnvironment;
using search::fef::test::QueryEnvironment;
using search::tensor::DenseTensorAttribute;
+using search::tensor::DirectTensorAttribute;
+using search::tensor::SerializedFastValueAttribute;
+using search::tensor::TensorAttribute;
using vespalib::eval::SimpleValue;
using vespalib::eval::TensorSpec;
using vespalib::eval::Value;
@@ -24,15 +29,23 @@ namespace search::features::test {
namespace {
-std::shared_ptr<DenseTensorAttribute>
+std::shared_ptr<TensorAttribute>
create_tensor_attribute(const vespalib::string& attr_name,
const vespalib::string& tensor_type,
+ bool direct_tensor,
uint32_t docid_limit)
{
Config cfg(BasicType::TENSOR, CollectionType::SINGLE);
cfg.setTensorType(ValueType::from_spec(tensor_type));
cfg.set_distance_metric(DistanceMetric::Euclidean);
- auto result = std::make_shared<DenseTensorAttribute>(attr_name, cfg);
+ std::shared_ptr<TensorAttribute> result;
+ if (cfg.tensorType().is_dense()) {
+ result = std::make_shared<DenseTensorAttribute>(attr_name, cfg);
+ } else if (direct_tensor) {
+ result = std::make_shared<DirectTensorAttribute>(attr_name, cfg);
+ } else {
+ result = std::make_shared<SerializedFastValueAttribute>(attr_name, cfg);
+ }
result->addReservedDoc();
result->addDocs(docid_limit-1);
result->commit();
@@ -47,6 +60,16 @@ DistanceClosenessFixture::DistanceClosenessFixture(size_t fooCnt, size_t barCnt,
const Labels& labels,
const vespalib::string& featureName,
const vespalib::string& query_tensor)
+ : DistanceClosenessFixture("tensor(x[2])", false, fooCnt, barCnt, labels, featureName, query_tensor)
+{
+}
+
+DistanceClosenessFixture::DistanceClosenessFixture(const vespalib::string& tensor_type,
+ bool direct_tensor,
+ size_t fooCnt, size_t barCnt,
+ const Labels& labels,
+ const vespalib::string& featureName,
+ const vespalib::string& query_tensor)
: queryEnv(&indexEnv), rankSetup(factory, indexEnv),
mdl(), match_data(), rankProgram(), fooHandles(), barHandles(),
tensor_attr(),
@@ -73,8 +96,9 @@ DistanceClosenessFixture::DistanceClosenessFixture(size_t fooCnt, size_t barCnt,
queryEnv.getTerms().push_back(term);
}
if (!query_tensor.empty()) {
- tensor_attr = create_tensor_attribute("bar", "tensor(x[2])", docid_limit);
+ tensor_attr = create_tensor_attribute("bar", tensor_type, direct_tensor, docid_limit);
indexEnv.getAttributeMap().add(tensor_attr);
+ search::fef::indexproperties::type::Attribute::set(indexEnv.getProperties(), "bar", tensor_type);
set_query_tensor("qbar", "tensor(x[2])", TensorSpec::from_expr(query_tensor));
}
labels.inject(queryEnv.getProperties());
diff --git a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h
index 15ff68e325c..02879b8f0e3 100644
--- a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h
+++ b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h
@@ -13,7 +13,7 @@
using namespace search::fef;
using namespace search::fef::test;
-namespace search::tensor { class DenseTensorAttribute; }
+namespace search::tensor { class TensorAttribute; }
namespace vespalib::eval { class TensorSpec; }
namespace search::features::test {
@@ -55,12 +55,17 @@ struct DistanceClosenessFixture : BlueprintFactoryFixture, IndexEnvironmentFixtu
RankProgram::UP rankProgram;
std::vector<TermFieldHandle> fooHandles;
std::vector<TermFieldHandle> barHandles;
- std::shared_ptr<search::tensor::DenseTensorAttribute> tensor_attr;
+ std::shared_ptr<search::tensor::TensorAttribute> tensor_attr;
uint32_t docid_limit;
bool _failed;
DistanceClosenessFixture(size_t fooCnt, size_t barCnt,
const Labels &labels, const vespalib::string &featureName,
const vespalib::string& query_tensor = "");
+ DistanceClosenessFixture(const vespalib::string& tensor_type,
+ bool direct_tensor,
+ size_t fooCnt, size_t barCnt,
+ const Labels &labels, const vespalib::string &featureName,
+ const vespalib::string& query_tensor = "");
~DistanceClosenessFixture();
void set_attribute_tensor(uint32_t docid, const vespalib::eval::TensorSpec& spec);
void set_query_tensor(const vespalib::string& query_tensor_name,
@@ -69,6 +74,9 @@ struct DistanceClosenessFixture : BlueprintFactoryFixture, IndexEnvironmentFixtu
feature_t getScore(uint32_t docId) {
return Utils::getScoreFeature(*rankProgram, docId);
}
+ vespalib::eval::Value::CREF getObject(uint32_t docId) {
+ return Utils::getObjectFeature(*rankProgram, docId);
+ }
void setScore(TermFieldHandle handle, uint32_t docId, feature_t score) {
match_data->resolveTermField(handle)->setRawScore(docId, score);
}