diff options
author | Arne Juul <arnej@verizonmedia.com> | 2021-01-05 11:45:06 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2021-01-08 10:56:13 +0000 |
commit | 8aa9ffda4324ddd5baff87be858063c6399a26ca (patch) | |
tree | 2266104c190b3f7d1b7dc64f74ee095da78d9c26 /searchlib | |
parent | f55c3c4c32abdec9803c4ad999d2157705dd6fd4 (diff) |
add method for threshold->internal distance
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/distance_function.h | 11 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/distance_functions.h | 18 |
2 files changed, 28 insertions, 1 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h index 30ad1876317..724f83b6129 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h @@ -11,15 +11,24 @@ namespace search::tensor { /** * Interface used to calculate the distance between two n-dimensional vectors. * - * The vectors must be of same size and same type (float or double). + * The vectors must be of same size and same cell type (float or double). * The actual implementation must know which type the vectors are. */ class DistanceFunction { public: using UP = std::unique_ptr<DistanceFunction>; virtual ~DistanceFunction() {} + + // calculate internal distance (comparable) virtual double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const = 0; + + // convert threshold (external distance units) to internal units + virtual double convert_threshold(double threshold) const = 0; + + // convert internal distance to rawscore (1.0 / (1.0 + d)) virtual double to_rawscore(double distance) const = 0; + + // calculate internal distance, early return allowed if > limit virtual double calc_with_limit(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs, double limit) const = 0; diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h index 8db7b1f48f1..bccf8c0bdb6 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h @@ -26,6 +26,9 @@ public: assert(sz == rhs_vector.size()); return _computer.squaredEuclideanDistance(&lhs_vector[0], &rhs_vector[0], sz); } + double convert_threshold(double threshold) const override { + return threshold*threshold; + } double to_rawscore(double distance) const override { double d = sqrt(distance); double score = 1.0 / (1.0 + d); @@ -75,6 +78,10 @@ public: double distance = 1.0 - cosine_similarity; // in range [0,2] return distance; } + double convert_threshold(double threshold) const override { + double cosine_similarity = cos(threshold); + return 1.0 - cosine_similarity; + } double to_rawscore(double distance) const override { double cosine_similarity = 1.0 - distance; // should be in in range [-1,1] but roundoff may cause problems: @@ -112,6 +119,9 @@ public: double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz); return std::max(0.0, score); } + double convert_threshold(double threshold) const override { + return threshold; + } double to_rawscore(double distance) const override { double score = 1.0 / (1.0 + distance); return score; @@ -163,6 +173,11 @@ public: double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon; return hav_central_angle; } + double convert_threshold(double threshold) const override { + double half_angle = threshold / (2 * 6371.0088); + double rt_hav = sin(half_angle); + return rt_hav * rt_hav; + } double to_rawscore(double distance) const override { double hav_diff = sqrt(distance); // distance in kilometers: @@ -197,6 +212,9 @@ public: } return (double)sum; } + double convert_threshold(double threshold) const override { + return threshold; + } double to_rawscore(double distance) const override { double score = 1.0 / (1.0 + distance); return score; |