summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-06-01 22:33:11 +0200
committerGitHub <noreply@github.com>2023-06-01 22:33:11 +0200
commit74c1b6bcfe2e3021e500e0592933e6eb14a36605 (patch)
tree22073df7b8ba2440316f2f32c9665b48d4c21aac
parentb103bebf5eb80262ccb7dfcced1a54c99334459e (diff)
parent6479ec6926adff29b2f86d32e0062bb30b6790ee (diff)
Merge pull request #27264 from vespa-engine/geirst/closeness-dotproduct-negative-values
Allow negative values from closeness when using dotproduct distance m…
-rw-r--r--searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/features/closenessfeature.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_calculator.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function.h7
-rw-r--r--searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h12
9 files changed, 58 insertions, 12 deletions
diff --git a/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp b/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp
index 8cb060c08e4..703f03918d8 100644
--- a/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp
+++ b/searchlib/src/tests/features/nns_closeness/nns_closeness_test.cpp
@@ -16,6 +16,7 @@ using namespace search::features;
using namespace search::fef::test;
using namespace search::fef;
+using search::attribute::DistanceMetric;
using vespalib::eval::TensorSpec;
const vespalib::string labelFeatureName("closeness(label,nns)");
@@ -146,4 +147,23 @@ TEST(NnsClosenessTest, raw_score_is_calculated_on_the_fly_using_label_setup)
expect_raw_score_calculated_on_the_fly(f2);
}
+TEST(NnsClosenessTest, can_return_negative_values_with_dotproduct_distance_metric)
+{
+ NoLabel f1;
+ RankFixture f2(0, 2, f1, fieldFeatureName, "tensor(x[2]):[2,3]", DistanceMetric::Dotproduct);
+ ASSERT_FALSE(f2.failed());
+
+ f2.set_bar_rawscore(0, 7, 5.0);
+ f2.set_bar_rawscore(1, 8, -5.0);
+ f2.set_attribute_tensor(9, TensorSpec::from_expr("tensor(x[2]):[4,5]"));
+ f2.set_attribute_tensor(10, TensorSpec::from_expr("tensor(x[2]):[-4,-5]"));
+
+ // For docids 9 and 10 the raw score is calculated on the fly
+ // using a distance calculator over the attribute and query tensors.
+ EXPECT_EQ(5.0, f2.getScore(7));
+ EXPECT_EQ(-5.0, f2.getScore(8));
+ EXPECT_EQ(23.0, f2.getScore(9));
+ EXPECT_EQ(-23.0, f2.getScore(10));
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
index 048a507b3fd..05579ad4fc1 100644
--- a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp
@@ -44,7 +44,7 @@ ConvertRawScoreToCloseness::ConvertRawScoreToCloseness(const fef::IQueryEnvironm
void
ConvertRawScoreToCloseness::execute(uint32_t docId)
{
- feature_t max_closeness = 0.0;
+ feature_t max_closeness = _bundle.min_rawscore();
assert(_md);
for (const auto& elem : _bundle.elements()) {
const TermFieldMatchData *tfmd = _md->resolveTermField(elem.handle);
diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp
index fad4c649165..22afaa3ca84 100644
--- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp
+++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.cpp
@@ -97,7 +97,8 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment&
uint32_t field_id,
const vespalib::string& feature_name)
- : _elems()
+ : _elems(),
+ _min_rawscore(0.0)
{
_elems.reserve(env.getNumTerms());
const auto* attr = resolve_attribute_for_field(env, field_id, feature_name);
@@ -107,6 +108,7 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment&
const auto* term = env.getTerm(i);
if (term->query_tensor_name().has_value() && (attr != nullptr)) {
_elems.emplace_back(handle, make_distance_calculator(env, *attr, term->query_tensor_name().value(), feature_name));
+ _min_rawscore = _elems.back().calc->function().min_rawscore();
} else {
_elems.emplace_back(handle);
}
@@ -118,7 +120,8 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment&
std::optional<uint32_t> field_id,
const vespalib::string& label,
const vespalib::string& feature_name)
- : _elems()
+ : _elems(),
+ _min_rawscore(0.0)
{
const ITermData* term = util::getTermByLabel(env, label);
if (term != nullptr) {
@@ -135,6 +138,7 @@ DistanceCalculatorBundle::DistanceCalculatorBundle(const fef::IQueryEnvironment&
const auto* attr = resolve_attribute_for_field(env, term_field.getFieldId(), feature_name);
if (attr != nullptr) {
calc = make_distance_calculator(env, *attr, term->query_tensor_name().value(), feature_name);
+ _min_rawscore = calc->function().min_rawscore();
}
}
_elems.emplace_back(handle, std::move(calc));
diff --git a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h
index e3be52aecc5..cb85985cc09 100644
--- a/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h
+++ b/searchlib/src/vespa/searchlib/features/distance_calculator_bundle.h
@@ -34,6 +34,7 @@ public:
};
private:
std::vector<Element> _elems;
+ double _min_rawscore;
public:
DistanceCalculatorBundle(const fef::IQueryEnvironment& env,
@@ -47,6 +48,8 @@ public:
const std::vector<Element>& elements() const { return _elems; }
+ double min_rawscore() const { return _min_rawscore; }
+
static void prepare_shared_state(const fef::IQueryEnvironment& env,
fef::IObjectStore& store,
uint32_t field_id,
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
index f29cd389732..b65f4ff1868 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
@@ -46,7 +46,7 @@ public:
double calc_raw_score(uint32_t docid) const {
auto vectors = _attr_tensor.get_vectors(docid);
- double result = 0.0;
+ double result = _dist_fun->min_rawscore();
for (uint32_t i = 0; i < vectors.subspaces(); ++i) {
double distance = _dist_fun->calc(vectors.cells(i));
double score = _dist_fun->to_rawscore(distance);
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h
index a06c451d5e2..0df7fe6cc1d 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h
@@ -30,6 +30,13 @@ public:
virtual double to_distance(double rawscore) const {
return (1.0 / rawscore) - 1.0;
}
+
+ /**
+ * The minimum rawscore (also used as closeness) that this distance function can return.
+ */
+ virtual double min_rawscore() const {
+ return 0.0;
+ }
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
index 16f9eeeabc2..5ad3a044df6 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
@@ -67,6 +67,9 @@ public:
double to_distance(double rawscore) const override {
return -rawscore;
}
+ double min_rawscore() const override {
+ return std::numeric_limits<double>::lowest();
+ }
double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override {
return calc(rhs);
}
diff --git a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp
index e0444e8dca7..f6fb96cb74b 100644
--- a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp
+++ b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.cpp
@@ -32,12 +32,13 @@ namespace {
std::shared_ptr<TensorAttribute>
create_tensor_attribute(const vespalib::string& attr_name,
const vespalib::string& tensor_type,
+ DistanceMetric distance_metric,
bool direct_tensor,
uint32_t docid_limit)
{
Config cfg(BasicType::TENSOR, CollectionType::SINGLE);
cfg.setTensorType(ValueType::from_spec(tensor_type));
- cfg.set_distance_metric(DistanceMetric::Euclidean);
+ cfg.set_distance_metric(distance_metric);
std::shared_ptr<TensorAttribute> result;
if (cfg.tensorType().is_dense()) {
result = std::make_shared<DenseTensorAttribute>(attr_name, cfg);
@@ -59,8 +60,9 @@ FeatureDumpFixture::~FeatureDumpFixture() = default;
DistanceClosenessFixture::DistanceClosenessFixture(size_t fooCnt, size_t barCnt,
const Labels& labels,
const vespalib::string& featureName,
- const vespalib::string& query_tensor)
- : DistanceClosenessFixture("tensor(x[2])", false, fooCnt, barCnt, labels, featureName, query_tensor)
+ const vespalib::string& query_tensor,
+ DistanceMetric distance_metric)
+ : DistanceClosenessFixture("tensor(x[2])", false, fooCnt, barCnt, labels, featureName, query_tensor, distance_metric)
{
}
@@ -69,7 +71,8 @@ DistanceClosenessFixture::DistanceClosenessFixture(const vespalib::string& tenso
size_t fooCnt, size_t barCnt,
const Labels& labels,
const vespalib::string& featureName,
- const vespalib::string& query_tensor)
+ const vespalib::string& query_tensor,
+ DistanceMetric distance_metric)
: queryEnv(&indexEnv), rankSetup(factory, indexEnv),
mdl(), match_data(), rankProgram(), fooHandles(), barHandles(),
tensor_attr(),
@@ -96,7 +99,7 @@ DistanceClosenessFixture::DistanceClosenessFixture(const vespalib::string& tenso
queryEnv.getTerms().push_back(term);
}
if (!query_tensor.empty()) {
- tensor_attr = create_tensor_attribute("bar", tensor_type, direct_tensor, docid_limit);
+ tensor_attr = create_tensor_attribute("bar", tensor_type, distance_metric, direct_tensor, docid_limit);
indexEnv.getAttributeMap().add(tensor_attr);
search::fef::indexproperties::type::Attribute::set(indexEnv.getProperties(), "bar", tensor_type);
set_query_tensor("qbar", "tensor(x[2])", TensorSpec::from_expr(query_tensor));
diff --git a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h
index 8aae1ecb942..768e54cc19b 100644
--- a/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h
+++ b/searchlib/src/vespa/searchlib/test/features/distance_closeness_fixture.h
@@ -2,6 +2,7 @@
#pragma once
+#include <vespa/searchcommon/attribute/distance_metric.h>
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/fef/fef.h>
#include <vespa/searchlib/fef/test/indexenvironment.h>
@@ -61,12 +62,14 @@ struct DistanceClosenessFixture : BlueprintFactoryFixture, IndexEnvironmentFixtu
bool _failed;
DistanceClosenessFixture(size_t fooCnt, size_t barCnt,
const Labels &labels, const vespalib::string &featureName,
- const vespalib::string& query_tensor = "");
+ const vespalib::string& query_tensor = "",
+ search::attribute::DistanceMetric distance_metric = search::attribute::DistanceMetric::Euclidean);
DistanceClosenessFixture(const vespalib::string& tensor_type,
bool direct_tensor,
size_t fooCnt, size_t barCnt,
const Labels &labels, const vespalib::string &featureName,
- const vespalib::string& query_tensor = "");
+ const vespalib::string& query_tensor = "",
+ search::attribute::DistanceMetric distance_metric = search::attribute::DistanceMetric::Euclidean);
~DistanceClosenessFixture();
void set_attribute_tensor(uint32_t docid, const vespalib::eval::TensorSpec& spec);
void set_query_tensor(const vespalib::string& query_tensor_name,
@@ -86,8 +89,11 @@ struct DistanceClosenessFixture : BlueprintFactoryFixture, IndexEnvironmentFixtu
setScore(fooHandles[i], docId, 1.0/(1.0+distance));
}
void setBarScore(uint32_t i, uint32_t docId, feature_t distance) {
+ set_bar_rawscore(i, docId, 1.0/(1.0+distance));
+ }
+ void set_bar_rawscore(uint32_t i, uint32_t docid, feature_t rawscore) {
ASSERT_LT(i, barHandles.size());
- setScore(barHandles[i], docId, 1.0/(1.0+distance));
+ setScore(barHandles[i], docid, rawscore);
}
bool failed() const noexcept { return _failed; }
};