aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2021-01-05 11:45:06 +0000
committerArne Juul <arnej@verizonmedia.com>2021-01-08 10:56:13 +0000
commit8aa9ffda4324ddd5baff87be858063c6399a26ca (patch)
tree2266104c190b3f7d1b7dc64f74ee095da78d9c26 /searchlib
parentf55c3c4c32abdec9803c4ad999d2157705dd6fd4 (diff)
add method for threshold->internal distance
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function.h11
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_functions.h18
2 files changed, 28 insertions, 1 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h
index 30ad1876317..724f83b6129 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h
@@ -11,15 +11,24 @@ namespace search::tensor {
/**
* Interface used to calculate the distance between two n-dimensional vectors.
*
- * The vectors must be of same size and same type (float or double).
+ * The vectors must be of same size and same cell type (float or double).
* The actual implementation must know which type the vectors are.
*/
class DistanceFunction {
public:
using UP = std::unique_ptr<DistanceFunction>;
virtual ~DistanceFunction() {}
+
+ // calculate internal distance (comparable)
virtual double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const = 0;
+
+ // convert threshold (external distance units) to internal units
+ virtual double convert_threshold(double threshold) const = 0;
+
+ // convert internal distance to rawscore (1.0 / (1.0 + d))
virtual double to_rawscore(double distance) const = 0;
+
+ // calculate internal distance, early return allowed if > limit
virtual double calc_with_limit(const vespalib::eval::TypedCells& lhs,
const vespalib::eval::TypedCells& rhs,
double limit) const = 0;
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
index 8db7b1f48f1..bccf8c0bdb6 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
@@ -26,6 +26,9 @@ public:
assert(sz == rhs_vector.size());
return _computer.squaredEuclideanDistance(&lhs_vector[0], &rhs_vector[0], sz);
}
+ double convert_threshold(double threshold) const override {
+ return threshold*threshold;
+ }
double to_rawscore(double distance) const override {
double d = sqrt(distance);
double score = 1.0 / (1.0 + d);
@@ -75,6 +78,10 @@ public:
double distance = 1.0 - cosine_similarity; // in range [0,2]
return distance;
}
+ double convert_threshold(double threshold) const override {
+ double cosine_similarity = cos(threshold);
+ return 1.0 - cosine_similarity;
+ }
double to_rawscore(double distance) const override {
double cosine_similarity = 1.0 - distance;
// should be in in range [-1,1] but roundoff may cause problems:
@@ -112,6 +119,9 @@ public:
double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz);
return std::max(0.0, score);
}
+ double convert_threshold(double threshold) const override {
+ return threshold;
+ }
double to_rawscore(double distance) const override {
double score = 1.0 / (1.0 + distance);
return score;
@@ -163,6 +173,11 @@ public:
double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon;
return hav_central_angle;
}
+ double convert_threshold(double threshold) const override {
+ double half_angle = threshold / (2 * 6371.0088);
+ double rt_hav = sin(half_angle);
+ return rt_hav * rt_hav;
+ }
double to_rawscore(double distance) const override {
double hav_diff = sqrt(distance);
// distance in kilometers:
@@ -197,6 +212,9 @@ public:
}
return (double)sum;
}
+ double convert_threshold(double threshold) const override {
+ return threshold;
+ }
double to_rawscore(double distance) const override {
double score = 1.0 / (1.0 + distance);
return score;