aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-05-08 11:36:27 +0000
committerGeir Storli <geirst@yahooinc.com>2023-05-08 11:36:27 +0000
commita961f9922c280d5033d1578a105bf39fa53d2c99 (patch)
treeac61e3e1313f714a9cf8317cd8dd57b24a477dfc /searchlib
parent005d0b28426912fb7c77fb29ad2760f41112e3f4 (diff)
Update closeness (rawscore) and distance for the dotproduct distance metric.
closeness and rawScore rank features return the dot product. distance rank feature returns the negative dot product.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/features/distancefeature.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function.h16
-rw-r--r--searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp9
4 files changed, 24 insertions, 22 deletions
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
index 9d0b7259912..363193da110 100644
--- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
@@ -528,8 +528,10 @@ double computeTransformedMipsChecked(TypedCells a, TypedCells b, bool check_inse
double closeness_r = d_r->to_rawscore(result);
EXPECT_DOUBLE_EQ(closeness_n, closeness_f);
EXPECT_DOUBLE_EQ(closeness_n, closeness_r);
- EXPECT_GT(closeness_n, 0.0);
- EXPECT_LE(closeness_n, 1.0);
+ EXPECT_DOUBLE_EQ(closeness_n, -result);
+ EXPECT_DOUBLE_EQ(result, d_n->to_distance(closeness_n));
+ EXPECT_DOUBLE_EQ(result, d_f->to_distance(closeness_f));
+ EXPECT_DOUBLE_EQ(result, d_r->to_distance(closeness_r));
if (check_insert) {
auto d_i = dbl_dff.for_insertion_vector(a);
EXPECT_DOUBLE_EQ(d_i->calc(b), result);
@@ -601,11 +603,6 @@ TEST(DistanceFunctionsTest, transformed_mips_growing_norm)
EXPECT_DOUBLE_EQ(4.0, f->calc(t(p7)));
EXPECT_DOUBLE_EQ(-4.0, f->calc(t(p8)));
- // closeness
- EXPECT_DOUBLE_EQ(0.25, f->to_rawscore(1.0));
- EXPECT_DOUBLE_EQ(0.50, f->to_rawscore(0.0));
- EXPECT_DOUBLE_EQ(0.75, f->to_rawscore(-1.0));
-
// now "insert" a bigger vector
f = dff.for_insertion_vector(t(p6));
EXPECT_DOUBLE_EQ(0.0, f->calc(t(p1)));
@@ -618,12 +615,6 @@ TEST(DistanceFunctionsTest, transformed_mips_growing_norm)
// now max squared norm is 32, so p1 is "closer" to itself
f = dff.for_insertion_vector(t(p1));
EXPECT_DOUBLE_EQ(-32.0, f->calc(t(p1)));
- // closeness (rawscore) is also different:
- EXPECT_DOUBLE_EQ(0.25, f->to_rawscore(32.0));
- EXPECT_DOUBLE_EQ(1/3., f->to_rawscore(16.0));
- EXPECT_DOUBLE_EQ(0.50, f->to_rawscore(0.0));
- EXPECT_DOUBLE_EQ(2/3., f->to_rawscore(-16.0));
- EXPECT_DOUBLE_EQ(0.75, f->to_rawscore(-32.0));
// also closer to other small vectors
EXPECT_DOUBLE_EQ(-31.0, f->calc(t(p2)));
diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
index f601c91a0b2..4f98625d0e1 100644
--- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
@@ -58,11 +58,11 @@ ConvertRawscoreToDistance::execute(uint32_t docId)
const TermFieldMatchData *tfmd = _md->resolveTermField(elem.handle);
if (tfmd->getDocId() == docId) {
feature_t invdist = tfmd->getRawScore();
- feature_t converted = (1.0 / invdist) - 1.0;
+ feature_t converted = elem.calc ? elem.calc->function().to_distance(invdist) : ((1.0 / invdist) - 1.0);
min_distance = std::min(min_distance, converted);
} else if (elem.calc) {
feature_t invdist = elem.calc->calc_raw_score(docId);
- feature_t converted = (1.0 / invdist) - 1.0;
+ feature_t converted = elem.calc->function().to_distance(invdist);
min_distance = std::min(min_distance, converted);
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h
index 31a837d17f4..a06c451d5e2 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h
@@ -13,11 +13,23 @@ class DistanceConverter {
public:
virtual ~DistanceConverter() = default;
- // convert threshold (external distance units) to internal units
+ /**
+ * Convert threshold (external distance units) to internal units.
+ */
virtual double convert_threshold(double threshold) const = 0;
- // convert internal distance to rawscore (1.0 / (1.0 + d))
+ /**
+ * Convert internal distance to rawscore (also used as closeness).
+ */
virtual double to_rawscore(double distance) const = 0;
+
+ /**
+ * Convert rawscore to external distance.
+ * Override this when the rawscore is NOT defined as (1.0 / (1.0 + external_distance)).
+ */
+ virtual double to_distance(double rawscore) const {
+ return (1.0 / rawscore) - 1.0;
+ }
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
index 1e238aaacc7..16f9eeeabc2 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
@@ -62,11 +62,10 @@ public:
return threshold;
}
double to_rawscore(double distance) const override {
- double dp = -distance;
- double t1 = dp / _max_sq_norm;
- double t2 = t1 / (1.0 + std::fabs(t1));
- double r = (t2 + 1.0) * 0.5;
- return r;
+ return -distance;
+ }
+ double to_distance(double rawscore) const override {
+ return -rawscore;
}
double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override {
return calc(rhs);