diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-06-01 15:28:58 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2023-06-01 15:28:58 +0000 |
commit | 6479ec6926adff29b2f86d32e0062bb30b6790ee (patch) | |
tree | 5d41cedc11e6c83b1b38e218074e8c927c07e1f8 | |
parent | 4cae2ecf6c9c5ac7f329a112451909d0f7ac3950 (diff) |
Allow negative values from closeness when using dotproduct distance metric.
9 files changed, 58 insertions, 12 deletions
diff --git a/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp b/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp index 8cb060c08e4..703f03918d8 100644 --- a/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp +++ b/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp @@ -16,6 +16,7 @@ using namespace search::features; using namespace search::fef::test; using namespace search::fef; +using search::attribute::DistanceMetric; using vespalib::eval::TensorSpec; const vespalib::string labelFeatureName("closeness(label,nns)"); @@ -146,4 +147,23 @@ TEST(NnsClosenessTest, raw_score_is_calculated_on_the_fly_using_label_setup) expect_raw_score_calculated_on_the_fly(f2); } +TEST(NnsClosenessTest, can_return_negative_values_with_dotproduct_distance_metric) +{ + NoLabel f1; + RankFixture f2(0, 2, f1, fieldFeatureName, "tensor(x[2]):[2,3]", DistanceMetric::Dotproduct); + ASSERT_FALSE(f2.failed()); + + f2.set_bar_rawscore(0, 7, 5.0); + f2.set_bar_rawscore(1, 8, -5.0); + f2.set_attribute_tensor(9, TensorSpec::from_expr("tensor(x[2]):[4,5]")); + f2.set_attribute_tensor(10, TensorSpec::from_expr("tensor(x[2]):[-4,-5]")); + + // For docids 9 and 10 the raw score is calculated on the fly + // using a distance calculator over the attribute and query tensors. + EXPECT_EQ(5.0, f2.getScore(7)); + EXPECT_EQ(-5.0, f2.getScore(8)); + EXPECT_EQ(23.0, f2.getScore(9)); + EXPECT_EQ(-23.0, f2.getScore(10)); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp index 048a507b3fd..05579ad4fc1 100644 --- a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp @@ -44,7 +44,7 @@ ConvertRawScoreToCloseness::ConvertRawScoreToCloseness(const fef::IQueryEnvironm void ConvertRawScoreToCloseness::execute(uint32_t docId) { - feature_t max_closeness = 0.0; + feature_t max_closeness = _bundle.min_rawscore(); assert(_md); for (const auto& elem : _bundle.elements()) { const TermFieldMatchData *tfmd = _md->resolveTermField(elem.handle); diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp index fad4c649165..22afaa3ca84 100644 --- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp +++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp @@ -97,7 +97,8 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& uint32_t field_id, const vespalib::string& feature_name) - : _elems() + : _elems(), + _min_rawscore(0.0) { _elems.reserve(env.getNumTerms()); const auto* attr = resolve_attribute_for_field(env, field_id, feature_name); @@ -107,6 +108,7 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& const auto* term = env.getTerm(i); if (term->query_tensor_name().has_value() && (attr != nullptr)) { _elems.emplace_back(handle, make_distance_calculator(env, *attr, term->query_tensor_name().value(), feature_name)); + _min_rawscore = _elems.back().calc->function().min_rawscore(); } else { _elems.emplace_back(handle); } @@ -118,7 +120,8 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& std::optional<uint32_t> field_id, const vespalib::string& label, const vespalib::string& feature_name) - : _elems() + : _elems(), + _min_rawscore(0.0) { const ITermData* term = util::getTermByLabel(env, label); if (term != nullptr) { @@ -135,6 +138,7 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment& const auto* attr = resolve_attribute_for_field(env, term_field.getFieldId(), feature_name); if (attr != nullptr) { calc = make_distance_calculator(env, *attr, term->query_tensor_name().value(), feature_name); + _min_rawscore = calc->function().min_rawscore(); } } _elems.emplace_back(handle, std::move(calc)); diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h index e3be52aecc5..cb85985cc09 100644 --- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h +++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h @@ -34,6 +34,7 @@ public: }; private: std::vector<Element> _elems; + double _min_rawscore; public: DistanceCalculatorBundle(const fef::IQueryEnvironment& env, @@ -47,6 +48,8 @@ public: const std::vector<Element>& elements() const { return _elems; } + double min_rawscore() const { return _min_rawscore; } + static void prepare_shared_state(const fef::IQueryEnvironment& env, fef::IObjectStore& store, uint32_t field_id, diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h index f29cd389732..b65f4ff1868 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h @@ -46,7 +46,7 @@ public: double calc_raw_score(uint32_t docid) const { auto vectors = _attr_tensor.get_vectors(docid); - double result = 0.0; + double result = _dist_fun->min_rawscore(); for (uint32_t i = 0; i < vectors.subspaces(); ++i) { double distance = _dist_fun->calc(vectors.cells(i)); double score = _dist_fun->to_rawscore(distance); diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h index a06c451d5e2..0df7fe6cc1d 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h @@ -30,6 +30,13 @@ public: virtual double to_distance(double rawscore) const { return (1.0 / rawscore) - 1.0; } + + /** + * The minimum rawscore (also used as closeness) that this distance function can return. + */ + virtual double min_rawscore() const { + return 0.0; + } }; } diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp index 16f9eeeabc2..5ad3a044df6 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp @@ -67,6 +67,9 @@ public: double to_distance(double rawscore) const override { return -rawscore; } + double min_rawscore() const override { + return std::numeric_limits<double>::lowest(); + } double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { return calc(rhs); } diff --git a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp index e0444e8dca7..f6fb96cb74b 100644 --- a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp +++ b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp @@ -32,12 +32,13 @@ namespace { std::shared_ptr<TensorAttribute> create_tensor_attribute(const vespalib::string& attr_name, const vespalib::string& tensor_type, + DistanceMetric distance_metric, bool direct_tensor, uint32_t docid_limit) { Config cfg(BasicType::TENSOR, CollectionType::SINGLE); cfg.setTensorType(ValueType::from_spec(tensor_type)); - cfg.set_distance_metric(DistanceMetric::Euclidean); + cfg.set_distance_metric(distance_metric); std::shared_ptr<TensorAttribute> result; if (cfg.tensorType().is_dense()) { result = std::make_shared<DenseTensorAttribute>(attr_name, cfg); @@ -59,8 +60,9 @@ FeatureDumpFixture::~FeatureDumpFixture() = default; DistanceClosenessFixture::DistanceClosenessFixture(size_t fooCnt, size_t barCnt, const Labels& labels, const vespalib::string& featureName, - const vespalib::string& query_tensor) - : DistanceClosenessFixture("tensor(x[2])", false, fooCnt, barCnt, labels, featureName, query_tensor) + const vespalib::string& query_tensor, + DistanceMetric distance_metric) + : DistanceClosenessFixture("tensor(x[2])", false, fooCnt, barCnt, labels, featureName, query_tensor, distance_metric) { } @@ -69,7 +71,8 @@ DistanceClosenessFixture::DistanceClosenessFixture(const vespalib::string& tenso size_t fooCnt, size_t barCnt, const Labels& labels, const vespalib::string& featureName, - const vespalib::string& query_tensor) + const vespalib::string& query_tensor, + DistanceMetric distance_metric) : queryEnv(&indexEnv), rankSetup(factory, indexEnv), mdl(), match_data(), rankProgram(), fooHandles(), barHandles(), tensor_attr(), @@ -96,7 +99,7 @@ DistanceClosenessFixture::DistanceClosenessFixture(const vespalib::string& tenso queryEnv.getTerms().push_back(term); } if (!query_tensor.empty()) { - tensor_attr = create_tensor_attribute("bar", tensor_type, direct_tensor, docid_limit); + tensor_attr = create_tensor_attribute("bar", tensor_type, distance_metric, direct_tensor, docid_limit); indexEnv.getAttributeMap().add(tensor_attr); search::fef::indexproperties::type::Attribute::set(indexEnv.getProperties(), "bar", tensor_type); set_query_tensor("qbar", "tensor(x[2])", TensorSpec::from_expr(query_tensor)); diff --git a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h index 8aae1ecb942..768e54cc19b 100644 --- a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h +++ b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h @@ -2,6 +2,7 @@ #pragma once +#include <vespa/searchcommon/attribute/distance_metric.h> #include <vespa/searchlib/features/setup.h> #include <vespa/searchlib/fef/fef.h> #include <vespa/searchlib/fef/test/indexenvironment.h> @@ -61,12 +62,14 @@ struct DistanceClosenessFixture : BlueprintFactoryFixture, IndexEnvironmentFixtu bool _failed; DistanceClosenessFixture(size_t fooCnt, size_t barCnt, const Labels &labels, const vespalib::string &featureName, - const vespalib::string& query_tensor = ""); + const vespalib::string& query_tensor = "", + search::attribute::DistanceMetric distance_metric = search::attribute::DistanceMetric::Euclidean); DistanceClosenessFixture(const vespalib::string& tensor_type, bool direct_tensor, size_t fooCnt, size_t barCnt, const Labels &labels, const vespalib::string &featureName, - const vespalib::string& query_tensor = ""); + const vespalib::string& query_tensor = "", + search::attribute::DistanceMetric distance_metric = search::attribute::DistanceMetric::Euclidean); ~DistanceClosenessFixture(); void set_attribute_tensor(uint32_t docid, const vespalib::eval::TensorSpec& spec); void set_query_tensor(const vespalib::string& query_tensor_name, @@ -86,8 +89,11 @@ struct DistanceClosenessFixture : BlueprintFactoryFixture, IndexEnvironmentFixtu setScore(fooHandles[i], docId, 1.0/(1.0+distance)); } void setBarScore(uint32_t i, uint32_t docId, feature_t distance) { + set_bar_rawscore(i, docId, 1.0/(1.0+distance)); + } + void set_bar_rawscore(uint32_t i, uint32_t docid, feature_t rawscore) { ASSERT_LT(i, barHandles.size()); - setScore(barHandles[i], docId, 1.0/(1.0+distance)); + setScore(barHandles[i], docid, rawscore); } bool failed() const noexcept { return _failed; } }; |