diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-05-08 11:36:27 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2023-05-08 11:36:27 +0000 |
commit | a961f9922c280d5033d1578a105bf39fa53d2c99 (patch) | |
tree | ac61e3e1313f714a9cf8317cd8dd57b24a477dfc /searchlib | |
parent | 005d0b28426912fb7c77fb29ad2760f41112e3f4 (diff) |
Update closeness (rawscore) and distance for the dotproduct distance metric.
closeness and rawScore rank features return the dot product.
distance rank feature returns the negative dot product.
Diffstat (limited to 'searchlib')
4 files changed, 24 insertions, 22 deletions
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp index 9d0b7259912..363193da110 100644 --- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp +++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp @@ -528,8 +528,10 @@ double computeTransformedMipsChecked(TypedCells a, TypedCells b, bool check_inse double closeness_r = d_r->to_rawscore(result); EXPECT_DOUBLE_EQ(closeness_n, closeness_f); EXPECT_DOUBLE_EQ(closeness_n, closeness_r); - EXPECT_GT(closeness_n, 0.0); - EXPECT_LE(closeness_n, 1.0); + EXPECT_DOUBLE_EQ(closeness_n, -result); + EXPECT_DOUBLE_EQ(result, d_n->to_distance(closeness_n)); + EXPECT_DOUBLE_EQ(result, d_f->to_distance(closeness_f)); + EXPECT_DOUBLE_EQ(result, d_r->to_distance(closeness_r)); if (check_insert) { auto d_i = dbl_dff.for_insertion_vector(a); EXPECT_DOUBLE_EQ(d_i->calc(b), result); @@ -601,11 +603,6 @@ TEST(DistanceFunctionsTest, transformed_mips_growing_norm) EXPECT_DOUBLE_EQ(4.0, f->calc(t(p7))); EXPECT_DOUBLE_EQ(-4.0, f->calc(t(p8))); - // closeness - EXPECT_DOUBLE_EQ(0.25, f->to_rawscore(1.0)); - EXPECT_DOUBLE_EQ(0.50, f->to_rawscore(0.0)); - EXPECT_DOUBLE_EQ(0.75, f->to_rawscore(-1.0)); - // now "insert" a bigger vector f = dff.for_insertion_vector(t(p6)); EXPECT_DOUBLE_EQ(0.0, f->calc(t(p1))); @@ -618,12 +615,6 @@ TEST(DistanceFunctionsTest, transformed_mips_growing_norm) // now max squared norm is 32, so p1 is "closer" to itself f = dff.for_insertion_vector(t(p1)); EXPECT_DOUBLE_EQ(-32.0, f->calc(t(p1))); - // closeness (rawscore) is also different: - EXPECT_DOUBLE_EQ(0.25, f->to_rawscore(32.0)); - EXPECT_DOUBLE_EQ(1/3., f->to_rawscore(16.0)); - EXPECT_DOUBLE_EQ(0.50, f->to_rawscore(0.0)); - EXPECT_DOUBLE_EQ(2/3., f->to_rawscore(-16.0)); - EXPECT_DOUBLE_EQ(0.75, f->to_rawscore(-32.0)); // also closer to other small vectors EXPECT_DOUBLE_EQ(-31.0, f->calc(t(p2))); diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp index f601c91a0b2..4f98625d0e1 100644 --- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp +++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp @@ -58,11 +58,11 @@ ConvertRawscoreToDistance::execute(uint32_t docId) const TermFieldMatchData *tfmd = _md->resolveTermField(elem.handle); if (tfmd->getDocId() == docId) { feature_t invdist = tfmd->getRawScore(); - feature_t converted = (1.0 / invdist) - 1.0; + feature_t converted = elem.calc ? elem.calc->function().to_distance(invdist) : ((1.0 / invdist) - 1.0); min_distance = std::min(min_distance, converted); } else if (elem.calc) { feature_t invdist = elem.calc->calc_raw_score(docId); - feature_t converted = (1.0 / invdist) - 1.0; + feature_t converted = elem.calc->function().to_distance(invdist); min_distance = std::min(min_distance, converted); } } diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h index 31a837d17f4..a06c451d5e2 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h @@ -13,11 +13,23 @@ class DistanceConverter { public: virtual ~DistanceConverter() = default; - // convert threshold (external distance units) to internal units + /** + * Convert threshold (external distance units) to internal units. + */ virtual double convert_threshold(double threshold) const = 0; - // convert internal distance to rawscore (1.0 / (1.0 + d)) + /** + * Convert internal distance to rawscore (also used as closeness). + */ virtual double to_rawscore(double distance) const = 0; + + /** + * Convert rawscore to external distance. + * Override this when the rawscore is NOT defined as (1.0 / (1.0 + external_distance)). + */ + virtual double to_distance(double rawscore) const { + return (1.0 / rawscore) - 1.0; + } }; } diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp index 1e238aaacc7..16f9eeeabc2 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp @@ -62,11 +62,10 @@ public: return threshold; } double to_rawscore(double distance) const override { - double dp = -distance; - double t1 = dp / _max_sq_norm; - double t2 = t1 / (1.0 + std::fabs(t1)); - double r = (t2 + 1.0) * 0.5; - return r; + return -distance; + } + double to_distance(double rawscore) const override { + return -rawscore; } double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { return calc(rhs); |