diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-02-27 15:17:49 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-02-27 15:17:49 +0100 |
commit | 7ec09d5ab437590cdeea50397ea845b07e18f786 (patch) | |
tree | 2ff51fda437ffcff5aed999dd48087f1e9d56406 /searchlib | |
parent | 023d8a0851d321c0f33ba6dde16e1b1fa2fe12ce (diff) |
Add closest feature.
Diffstat (limited to 'searchlib')
15 files changed, 539 insertions, 11 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 03429b956a4..4ccab1a9380 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -129,6 +129,7 @@ vespa_define_module( src/tests/features src/tests/features/beta src/tests/features/bm25 + src/tests/features/closest src/tests/features/constant src/tests/features/element_completeness src/tests/features/element_similarity_feature diff --git a/searchlib/src/tests/features/closest/CMakeLists.txt b/searchlib/src/tests/features/closest/CMakeLists.txt new file mode 100644 index 00000000000..71572c5e5a2 --- /dev/null +++ b/searchlib/src/tests/features/closest/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +vespa_add_executable(searchlib_closest_test_app TEST + SOURCES + closest_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_closest_test_app COMMAND searchlib_closest_test_app) diff --git a/searchlib/src/tests/features/closest/closest_test.cpp b/searchlib/src/tests/features/closest/closest_test.cpp new file mode 100644 index 00000000000..c903cc79aac --- /dev/null +++ b/searchlib/src/tests/features/closest/closest_test.cpp @@ -0,0 +1,149 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/searchlib/features/closest_feature.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> +#include <vespa/searchlib/fef/test/labels.h> +#include <vespa/searchlib/test/features/distance_closeness_fixture.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/util/stringfmt.h> + +using search::feature_t; +using search::features::test::BlueprintFactoryFixture; +using search::features::test::DistanceClosenessFixture; +using search::features::test::FeatureDumpFixture; +using search::features::test::IndexEnvironmentFixture; +using search::features::ClosestBlueprint; +using vespalib::eval::FastValueBuilderFactory; +using vespalib::eval::TensorSpec; +using vespalib::eval::Value; +using vespalib::eval::spec_from_value; +using vespalib::eval::value_from_spec; + +const vespalib::string field_and_label_feature_name("closest(bar,nns)"); +const vespalib::string field_feature_name("closest(bar)"); + +const vespalib::string dense_tensor_type("tensor(x[2])"); +const vespalib::string mixed_tensor_type("tensor(a{},x[2])"); +const vespalib::string sparse_tensor_type("tensor(a{})"); + +TensorSpec no_subspace(sparse_tensor_type); +TensorSpec subspace_a = TensorSpec::from_expr("tensor(a{}):{{a:\"a\"}:1}"); +TensorSpec subspace_b = TensorSpec::from_expr("tensor(a{}):{{a:\"b\"}:1}"); + +TensorSpec doc_tensor = TensorSpec::from_expr("tensor(a{},x[2]):{{a:\"a\",x:0}:3,{a:\"a\",x:1}:10,{a:\"b\",x:0}:5,{a:\"b\",x:1}:10}"); + +using RankFixture = DistanceClosenessFixture; + +TensorSpec get_spec(RankFixture& f, uint32_t docid) { + return spec_from_value(f.getObject(docid).get()); +} + +struct TestParam +{ + vespalib::string _name; + bool _direct_tensor; + TestParam(vespalib::string name, bool direct_tensor) + : _name(std::move(name)), + _direct_tensor(direct_tensor) + { + } + ~TestParam(); +}; + +TestParam::~TestParam() = default; + +std::ostream& operator<<(std::ostream& os, const TestParam param) +{ + os << param._name; + return os; +} + + +class ClosestTest : public ::testing::TestWithParam<TestParam> +{ +protected: + ClosestTest(); + ~ClosestTest(); + bool direct_tensor() const noexcept { return GetParam()._direct_tensor; } + void assert_closest(const Labels& labels, const vespalib::string& feature_name, const vespalib::string& query_tensor, const TensorSpec& exp_spec); + void assert_closest(const Labels& labels, const vespalib::string& feature_name, const std::vector<TensorSpec>& exp_specs); +}; + +ClosestTest::ClosestTest() + : testing::TestWithParam<TestParam>() +{ +} + +ClosestTest::~ClosestTest() = default; + +void +ClosestTest::assert_closest(const Labels& labels, const vespalib::string& feature_name, const vespalib::string& query_tensor, const TensorSpec& exp_spec) +{ + RankFixture f2(mixed_tensor_type, direct_tensor(), 0, 1, labels, feature_name, + dense_tensor_type + ":" + query_tensor); + ASSERT_FALSE(f2.failed()); + SCOPED_TRACE(query_tensor); + f2.set_attribute_tensor(9, doc_tensor); + EXPECT_EQ(exp_spec, get_spec(f2, 9)); +} + +void +ClosestTest::assert_closest(const Labels& labels, const vespalib::string& feature_name, const std::vector<TensorSpec>& exp_specs) +{ + assert_closest(labels, feature_name, "[9,10]", exp_specs[0]); + assert_closest(labels, feature_name, "[1,10]", exp_specs[1]); +} + +INSTANTIATE_TEST_SUITE_P(ClosestMultiTest, + ClosestTest, + testing::Values(TestParam("Serialized", false), + TestParam("Direct", true)), + testing::PrintToStringParamName()); + +TEST(ClosestTest, require_that_blueprint_can_be_created_from_factory) +{ + BlueprintFactoryFixture f; + Blueprint::SP bp = f.factory.createBlueprint("closest"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast<ClosestBlueprint*>(bp.get()) != 0); +} + +TEST(ClosestTest, require_that_no_features_are_dumped) +{ + ClosestBlueprint f1; + IndexEnvironmentFixture f2; + FeatureDumpFixture f3; + f1.visitDumpFeatures(f2.indexEnv, f3); +} + +TEST_P(ClosestTest, require_that_no_label_gives_empty_result) +{ + NoLabel f1; + assert_closest(f1, field_and_label_feature_name, {no_subspace, no_subspace}); +} + +TEST_P(ClosestTest, require_that_unrelated_label_gives_empty_result) +{ + SingleLabel f1("unrelated", 1); + assert_closest(f1, field_and_label_feature_name, {no_subspace, no_subspace}); +} + +TEST_P(ClosestTest, closest_using_field_setup) +{ + NoLabel f1; + assert_closest(f1, field_feature_name, {subspace_b, subspace_a}); +} + +TEST_P(ClosestTest, closest_using_field_and_label_setup) +{ + SingleLabel f1("nns", 1); + assert_closest(f1, field_and_label_feature_name, {subspace_b, subspace_a}); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index 8acf28f4a2f..4af5c0e561e 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -7,6 +7,7 @@ vespa_add_library(searchlib_features OBJECT attributematchfeature.cpp bm25_feature.cpp closenessfeature.cpp + closest_feature.cpp constant_feature.cpp debug_attribute_wait.cpp debug_wait.cpp diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp index e44c94dbb2d..048a507b3fd 100644 --- a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp @@ -36,7 +36,7 @@ ConvertRawScoreToCloseness::ConvertRawScoreToCloseness(const fef::IQueryEnvironm } ConvertRawScoreToCloseness::ConvertRawScoreToCloseness(const fef::IQueryEnvironment &env, const vespalib::string &label) - : _bundle(env, label, "closeness"), + : _bundle(env, std::nullopt, label, "closeness"), _md(nullptr) { } diff --git a/searchlib/src/vespa/searchlib/features/closest_feature.cpp b/searchlib/src/vespa/searchlib/features/closest_feature.cpp new file mode 100644 index 00000000000..335d5c6a1c9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/closest_feature.cpp @@ -0,0 +1,288 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "closest_feature.h" +#include "constant_tensor_executor.h" +#include "distance_calculator_bundle.h" +#include "valuefeature.h" +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/searchcommon/common/schema.h> +#include <vespa/searchlib/fef/indexproperties.h> +#include <vespa/searchlib/fef/parameterdescriptions.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> +#include <vespa/searchlib/tensor/distance_calculator.h> +#include <vespa/searchlib/tensor/fast_value_view.h> +#include <vespa/searchlib/tensor/i_tensor_attribute.h> +#include <vespa/searchlib/tensor/serialized_tensor_ref.h> +#include <vespa/searchlib/tensor/subspace_type.h> +#include <vespa/vespalib/util/stash.h> + +#include <vespa/log/log.h> +LOG_SETUP(".features.closest_feature"); + +using search::fef::FeatureType; +using search::fef::FieldInfo; +using search::fef::ParameterDataTypeSet; +using search::tensor::FastValueView; +using search::tensor::ITensorAttribute; +using search::tensor::SubspaceType; +using search::tensor::VectorBundle; +using vespalib::eval::CellType; +using vespalib::eval::FastValueBuilderFactory; +using vespalib::eval::TypedCells; +using vespalib::eval::TypifyCellType; +using vespalib::eval::Value; +using vespalib::eval::ValueType; +using vespalib::string_id; +using vespalib::typify_invoke; + +using namespace search::fef::indexproperties; + +namespace { + +struct SetIdentity { + template <typename T> + static void invoke(void *space, size_t size) { + assert(size == sizeof(T)); + *(T *) space = 1.0; + } +}; + +void setup_identity_cells(const ValueType& type, std::vector<char>& space, TypedCells& cells) +{ + if (type.is_double()) { + return; + } + space.resize(vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), 1)); + cells = TypedCells(space.data(), type.cell_type(), 1); + typify_invoke<1,TypifyCellType,SetIdentity>(type.cell_type(), space.data(), space.size()); +} + +} + +namespace search::features { + +/** + * Implements the executor for the closest feature. + */ +class ClosestExecutor : public fef::FeatureExecutor { +protected: + DistanceCalculatorBundle _bundle; + Value& _empty_output; + TypedCells _identity; + const ITensorAttribute& _attr; + std::unique_ptr<Value> _output; +public: + ClosestExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr); + ~ClosestExecutor() override; + static fef::FeatureExecutor& make(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr, vespalib::Stash& stash); +}; + +class ClosestSerializedExecutor : public ClosestExecutor { +public: + ClosestSerializedExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr); + ~ClosestSerializedExecutor() override; + void execute(uint32_t docId) override; +}; + +class ClosestDirectExecutor : public ClosestExecutor { + SubspaceType _subspace_type; + std::vector<string_id> _labels; + std::vector<string_id*> _label_ptrs; +public: + ClosestDirectExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr); + ~ClosestDirectExecutor() override; + void execute(uint32_t docId) override; +}; + +ClosestExecutor::ClosestExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr) + : _bundle(std::move(bundle)), + _empty_output(empty_output), + _identity(identity), + _attr(attr), + _output() +{ +} + +ClosestExecutor::~ClosestExecutor() = default; + +fef::FeatureExecutor& +ClosestExecutor::make(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr, vespalib::Stash& stash) +{ + if (attr.supports_get_serialized_tensor_ref()) { + return stash.create<ClosestSerializedExecutor>(std::move(bundle), empty_output, identity, attr); + } else if (attr.supports_get_tensor_ref()) { + return stash.create<ClosestDirectExecutor>(std::move(bundle), empty_output, identity, attr); + } else { + return ConstantTensorExecutor::createEmpty(empty_output.type(), stash); + } +} + +ClosestSerializedExecutor::ClosestSerializedExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr) + : ClosestExecutor(std::move(bundle), empty_output, identity, attr) +{ +} + +ClosestSerializedExecutor::~ClosestSerializedExecutor() = default; + +void +ClosestSerializedExecutor::execute(uint32_t docId) +{ + double best_distance = 0.0; + std::optional<uint32_t> closest_subspace; + auto ref = _attr.get_serialized_tensor_ref(docId); + for (const auto& elem : _bundle.elements()) { + elem.calc->calc_closest_subspace(ref.get_vectors(), closest_subspace, best_distance); + } + if (closest_subspace.has_value()) { + auto labels = ref.get_labels(closest_subspace.value()); + _output = std::make_unique<FastValueView>(_empty_output.type(), labels, _identity, labels.size(), 1); + outputs().set_object(0, *_output); + } else { + outputs().set_object(0, _empty_output); + } +} + +ClosestDirectExecutor::ClosestDirectExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr) + : ClosestExecutor(std::move(bundle), empty_output, identity, attr), + _subspace_type(attr.getTensorType()), + _labels(attr.getTensorType().count_mapped_dimensions()), + _label_ptrs(_labels.size()) +{ + for (size_t i = 0; i < _labels.size(); ++i) { + _label_ptrs[i] = &_labels[i]; + } +} + +ClosestDirectExecutor::~ClosestDirectExecutor() = default; + +void +ClosestDirectExecutor::execute(uint32_t docId) +{ + double best_distance = 0.0; + std::optional<uint32_t> closest_subspace; + auto& tensor = _attr.get_tensor_ref(docId); + VectorBundle vectors(tensor.cells().data, tensor.index().size(), _subspace_type); + for (const auto& elem : _bundle.elements()) { + elem.calc->calc_closest_subspace(vectors, closest_subspace, best_distance); + } + if (closest_subspace.has_value()) { + size_t subspace_id = 0; + auto view = tensor.index().create_view({}); + view->lookup({}); + while (view->next_result(_label_ptrs, subspace_id)) { + if (subspace_id == closest_subspace.value()) { + _output = std::make_unique<FastValueView>(_empty_output.type(), _labels, _identity, _labels.size(), 1); + outputs().set_object(0, *_output); + return; + } + } + } + outputs().set_object(0, _empty_output); +} + +ClosestBlueprint::ClosestBlueprint() + : Blueprint("closest"), + _field_name(), + _field_tensor_type(ValueType::error_type()), + _output_tensor_type(ValueType::error_type()), + _field_id(search::index::Schema::UNKNOWN_FIELD_ID), + _item_label(), + _empty_output(), + _identity_space(), + _identity_cells() +{ +} + +ClosestBlueprint::~ClosestBlueprint() = default; + +void +ClosestBlueprint::visitDumpFeatures(const fef::IIndexEnvironment&, fef::IDumpFeatureVisitor&) const +{ +} + +std::unique_ptr<fef::Blueprint> +ClosestBlueprint::createInstance() const +{ + return std::make_unique<ClosestBlueprint>(); +} + +fef::ParameterDescriptions +ClosestBlueprint::getDescriptions() const +{ + auto data_type_set = ParameterDataTypeSet::tensor_type_set(); + return fef::ParameterDescriptions(). + desc().attribute(data_type_set, fef::ParameterCollection::SINGLE). + desc().attribute(data_type_set, fef::ParameterCollection::SINGLE).string(); +} + +bool +ClosestBlueprint::setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) +{ + if (params.size() < 1 || params.size() > 2) { + LOG(error, "%s: Wrong number of parameters, was %d, must be 1 or 2", getName().c_str(), (int) params.size()); + return false; + } + _field_name = params[0].getValue(); + if (params.size() == 2) { + _item_label = params[1].getValue(); + } + auto fi = env.getFieldByName(_field_name); + if (fi == nullptr) { + LOG(error, "%s: Unknown field %s", getName().c_str(), _field_name.c_str()); + return false; + } + auto dt = fi->get_data_type(); + auto ct = fi->collection(); + if (dt != search::index::schema::DataType::TENSOR || + ct != search::index::schema::CollectionType::SINGLE) { + LOG(error, "%s: Field %s is not a single value tensor field", getName().c_str(), _field_name.c_str()); + return false; + } + if (!fi->hasAttribute()) { + LOG(error, "%s: Field %s does not have an attribute", getName().c_str(), _field_name.c_str()); + return false; + } + vespalib::string attr_type_spec = type::Attribute::lookup(env.getProperties(), _field_name); + if (attr_type_spec.empty()) { + LOG(error, "%s: Field %s lacks a type in index properties", getName().c_str(), _field_name.c_str()); + return false; + } + _field_tensor_type = ValueType::from_spec(attr_type_spec); + if (_field_tensor_type.is_error() || _field_tensor_type.is_double() || _field_tensor_type.count_mapped_dimensions() == 0) { + LOG(error, "%s: Field %s invalid type: '%s'", getName().c_str(), _field_name.c_str(), attr_type_spec.c_str()); + return false; + } + _output_tensor_type = ValueType::make_type(_field_tensor_type.cell_type(), _field_tensor_type.mapped_dimensions()); + assert(!_output_tensor_type.is_double()); + FeatureType output_type = FeatureType::object(_output_tensor_type); + describeOutput("out", "The closest tensor subspace.", output_type); + _field_id = fi->id(); + _empty_output = vespalib::eval::value_from_spec(_output_tensor_type.to_spec(), FastValueBuilderFactory::get()); + setup_identity_cells(_output_tensor_type, _identity_space, _identity_cells); + return true; +} + +void +ClosestBlueprint::prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const +{ + if (_item_label.has_value()) { + DistanceCalculatorBundle::prepare_shared_state(env, store, _item_label.value(), "closest"); + } else { + DistanceCalculatorBundle::prepare_shared_state(env, store, _field_id, "closest"); + } +} + +fef::FeatureExecutor& +ClosestBlueprint::createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const +{ + auto bundle = _item_label.has_value() ? DistanceCalculatorBundle(env, _field_id, _item_label.value(), "closest") : DistanceCalculatorBundle(env, _field_id, "closest"); + if (bundle.elements().empty()) { + return ConstantTensorExecutor::createEmpty(_output_tensor_type, stash); + } else { + auto& attr = bundle.elements().front().calc->attribute_tensor(); + return ClosestExecutor::make(std::move(bundle), *_empty_output, _identity_cells, attr, stash); + } +} + +} diff --git a/searchlib/src/vespa/searchlib/features/closest_feature.h b/searchlib/src/vespa/searchlib/features/closest_feature.h new file mode 100644 index 00000000000..840f896abe2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/closest_feature.h @@ -0,0 +1,33 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/fef/blueprint.h> +#include <optional> + +namespace search::features { + +/** + * Implements the blueprint for the closest executor. + */ +class ClosestBlueprint : public fef::Blueprint { + vespalib::string _field_name; + vespalib::eval::ValueType _field_tensor_type; + vespalib::eval::ValueType _output_tensor_type; + uint32_t _field_id; + std::optional<vespalib::string> _item_label; + std::unique_ptr<vespalib::eval::Value> _empty_output; + std::vector<char> _identity_space; + vespalib::eval::TypedCells _identity_cells; +public: + ClosestBlueprint(); + ~ClosestBlueprint() override; + void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; + std::unique_ptr<fef::Blueprint> createInstance() const override; + fef::ParameterDescriptions getDescriptions() const override; + bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + void prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const override; + fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp index 4b2d67c933d..fad4c649165 100644 --- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp +++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp @@ -115,6 +115,7 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& } DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& env, + std::optional<uint32_t> field_id, const vespalib::string& label, const vespalib::string& feature_name) : _elems() @@ -124,6 +125,9 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& // expect numFields() == 1 for (uint32_t i = 0; i < term->numFields(); ++i) { const auto& term_field = term->field(i); + if (field_id.has_value() && field_id.value() != term_field.getFieldId()) { + continue; + } TermFieldHandle handle = term_field.getHandle(); if (handle != IllegalHandle) { std::unique_ptr<DistanceCalculator> calc; diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h index 35295c771a6..e3be52aecc5 100644 --- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h +++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h @@ -5,6 +5,7 @@ #include <vespa/searchlib/fef/handle.h> #include <vespa/vespalib/stllike/string.h> #include <memory> +#include <optional> #include <vector> namespace search::tensor { class DistanceCalculator; } @@ -40,6 +41,7 @@ public: const vespalib::string& feature_name); DistanceCalculatorBundle(const fef::IQueryEnvironment& env, + std::optional<uint32_t> field_id, const vespalib::string& label, const vespalib::string& feature_name); diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp index 40f994c18e9..f601c91a0b2 100644 --- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp +++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp @@ -44,7 +44,7 @@ ConvertRawscoreToDistance::ConvertRawscoreToDistance(const fef::IQueryEnvironmen } ConvertRawscoreToDistance::ConvertRawscoreToDistance(const fef::IQueryEnvironment &env, const vespalib::string &label) - : _bundle(env, label, "distance"), + : _bundle(env, std::nullopt, label, "distance"), _md(nullptr) { } diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index 2bc8a349d1b..5e152d4b455 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -6,6 +6,7 @@ #include "attributematchfeature.h" #include "bm25_feature.h" #include "closenessfeature.h" +#include "closest_feature.h" #include "constant_feature.h" #include "debug_attribute_wait.h" #include "debug_wait.h" @@ -75,6 +76,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(std::make_shared<AttributeMatchBlueprint>()); registry.addPrototype(std::make_shared<Bm25Blueprint>()); registry.addPrototype(std::make_shared<ClosenessBlueprint>()); + registry.addPrototype(std::make_shared<ClosestBlueprint>()); registry.addPrototype(std::make_shared<DebugAttributeWaitBlueprint>()); registry.addPrototype(std::make_shared<DebugWaitBlueprint>()); registry.addPrototype(std::make_shared<DistanceBlueprint>()); diff --git a/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h index e47ce0df7a5..46a932696ca 100644 --- a/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h +++ b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h @@ -71,6 +71,7 @@ private: asMask(DataType::REFERENCE) | asMask(DataType::COMBINED)); } + static uint32_t tensor_type_mask() { return asMask(DataType::TENSOR); } ParameterDataTypeSet(uint32_t typeMask) : _typeMask(typeMask) { @@ -87,8 +88,9 @@ public: return ParameterDataTypeSet(asMask(DataType::INT32) | asMask(DataType::INT64)); } static ParameterDataTypeSet normalOrTensorTypeSet() { - return ParameterDataTypeSet(normalTypesMask() | asMask(DataType::TENSOR)); + return ParameterDataTypeSet(normalTypesMask() | tensor_type_mask()); } + static ParameterDataTypeSet tensor_type_set() { return ParameterDataTypeSet(tensor_type_mask()); } bool allowedType(DataType dataType) const { return ((asMask(dataType) & _typeMask) != 0); } diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h index f501b004254..6b4cf142264 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h @@ -65,9 +65,7 @@ public: return result; } - std::optional<uint32_t> calc_closest_subspace(VectorBundle vectors) { - double best_distance = 0.0; - std::optional<uint32_t> closest_subspace; + void calc_closest_subspace(VectorBundle vectors, std::optional<uint32_t>& closest_subspace, double& best_distance) { for (uint32_t i = 0; i < vectors.subspaces(); ++i) { double distance = _dist_fun->calc(_query_tensor_cells, vectors.cells(i)); if (!closest_subspace.has_value() || distance < best_distance) { @@ -75,6 +73,12 @@ public: closest_subspace = i; } } + } + + std::optional<uint32_t> calc_closest_subspace(VectorBundle vectors) { + double best_distance = 0.0; + std::optional<uint32_t> closest_subspace; + calc_closest_subspace(vectors, closest_subspace, best_distance); return closest_subspace; } diff --git a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp index 275ee145240..e0444e8dca7 100644 --- a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp +++ b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp @@ -7,6 +7,8 @@ #include <vespa/eval/eval/value_type.h> #include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/tensor/dense_tensor_attribute.h> +#include <vespa/searchlib/tensor/direct_tensor_attribute.h> +#include <vespa/searchlib/tensor/serialized_fast_value_attribute.h> using search::attribute::BasicType; using search::attribute::CollectionType; @@ -15,6 +17,9 @@ using search::attribute::DistanceMetric; using search::fef::test::IndexEnvironment; using search::fef::test::QueryEnvironment; using search::tensor::DenseTensorAttribute; +using search::tensor::DirectTensorAttribute; +using search::tensor::SerializedFastValueAttribute; +using search::tensor::TensorAttribute; using vespalib::eval::SimpleValue; using vespalib::eval::TensorSpec; using vespalib::eval::Value; @@ -24,15 +29,23 @@ namespace search::features::test { namespace { -std::shared_ptr<DenseTensorAttribute> +std::shared_ptr<TensorAttribute> create_tensor_attribute(const vespalib::string& attr_name, const vespalib::string& tensor_type, + bool direct_tensor, uint32_t docid_limit) { Config cfg(BasicType::TENSOR, CollectionType::SINGLE); cfg.setTensorType(ValueType::from_spec(tensor_type)); cfg.set_distance_metric(DistanceMetric::Euclidean); - auto result = std::make_shared<DenseTensorAttribute>(attr_name, cfg); + std::shared_ptr<TensorAttribute> result; + if (cfg.tensorType().is_dense()) { + result = std::make_shared<DenseTensorAttribute>(attr_name, cfg); + } else if (direct_tensor) { + result = std::make_shared<DirectTensorAttribute>(attr_name, cfg); + } else { + result = std::make_shared<SerializedFastValueAttribute>(attr_name, cfg); + } result->addReservedDoc(); result->addDocs(docid_limit-1); result->commit(); @@ -47,6 +60,16 @@ DistanceClosenessFixture::DistanceClosenessFixture(size_t fooCnt, size_t barCnt, const Labels& labels, const vespalib::string& featureName, const vespalib::string& query_tensor) + : DistanceClosenessFixture("tensor(x[2])", false, fooCnt, barCnt, labels, featureName, query_tensor) +{ +} + +DistanceClosenessFixture::DistanceClosenessFixture(const vespalib::string& tensor_type, + bool direct_tensor, + size_t fooCnt, size_t barCnt, + const Labels& labels, + const vespalib::string& featureName, + const vespalib::string& query_tensor) : queryEnv(&indexEnv), rankSetup(factory, indexEnv), mdl(), match_data(), rankProgram(), fooHandles(), barHandles(), tensor_attr(), @@ -73,8 +96,9 @@ DistanceClosenessFixture::DistanceClosenessFixture(size_t fooCnt, size_t barCnt, queryEnv.getTerms().push_back(term); } if (!query_tensor.empty()) { - tensor_attr = create_tensor_attribute("bar", "tensor(x[2])", docid_limit); + tensor_attr = create_tensor_attribute("bar", tensor_type, direct_tensor, docid_limit); indexEnv.getAttributeMap().add(tensor_attr); + search::fef::indexproperties::type::Attribute::set(indexEnv.getProperties(), "bar", tensor_type); set_query_tensor("qbar", "tensor(x[2])", TensorSpec::from_expr(query_tensor)); } labels.inject(queryEnv.getProperties()); diff --git a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h index 15ff68e325c..02879b8f0e3 100644 --- a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h +++ b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h @@ -13,7 +13,7 @@ using namespace search::fef; using namespace search::fef::test; -namespace search::tensor { class DenseTensorAttribute; } +namespace search::tensor { class TensorAttribute; } namespace vespalib::eval { class TensorSpec; } namespace search::features::test { @@ -55,12 +55,17 @@ struct DistanceClosenessFixture : BlueprintFactoryFixture, IndexEnvironmentFixtu RankProgram::UP rankProgram; std::vector<TermFieldHandle> fooHandles; std::vector<TermFieldHandle> barHandles; - std::shared_ptr<search::tensor::DenseTensorAttribute> tensor_attr; + std::shared_ptr<search::tensor::TensorAttribute> tensor_attr; uint32_t docid_limit; bool _failed; DistanceClosenessFixture(size_t fooCnt, size_t barCnt, const Labels &labels, const vespalib::string &featureName, const vespalib::string& query_tensor = ""); + DistanceClosenessFixture(const vespalib::string& tensor_type, + bool direct_tensor, + size_t fooCnt, size_t barCnt, + const Labels &labels, const vespalib::string &featureName, + const vespalib::string& query_tensor = ""); ~DistanceClosenessFixture(); void set_attribute_tensor(uint32_t docid, const vespalib::eval::TensorSpec& spec); void set_query_tensor(const vespalib::string& query_tensor_name, @@ -69,6 +74,9 @@ struct DistanceClosenessFixture : BlueprintFactoryFixture, IndexEnvironmentFixtu feature_t getScore(uint32_t docId) { return Utils::getScoreFeature(*rankProgram, docId); } + vespalib::eval::Value::CREF getObject(uint32_t docId) { + return Utils::getObjectFeature(*rankProgram, docId); + } void setScore(TermFieldHandle handle, uint32_t docId, feature_t score) { match_data->resolveTermField(handle)->setRawScore(docId, score); } |