searchlib/src/vespa/searchlib/tensor/distance_functions.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "distance_function.h"
#include <vespa/eval/tensor/dense/typed_cells.h>
#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
#include <cmath>

namespace search::tensor {

/**
 * Calculates the square of the standard Euclidean distance.
 * Will use instruction optimal for the cpu it is running on.
 */
template <typename FloatType>
class SquaredEuclideanDistance : public DistanceFunction {
public:
    SquaredEuclideanDistance()
        : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
    {}
    double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override {
        auto lhs_vector = lhs.typify<FloatType>();
        auto rhs_vector = rhs.typify<FloatType>();
        size_t sz = lhs_vector.size();
        assert(sz == rhs_vector.size());
        return _computer.squaredEuclideanDistance(&lhs_vector[0], &rhs_vector[0], sz);
    }
    double to_rawscore(double distance) const override {
        double d = sqrt(distance);
        double score = 1.0 / (1.0 + d);
        return score;
    }
    double calc_with_limit(const vespalib::tensor::TypedCells& lhs,
                           const vespalib::tensor::TypedCells& rhs,
                           double limit) const override
    {
        auto lhs_vector = lhs.typify<FloatType>();
        auto rhs_vector = rhs.typify<FloatType>();
        double sum = 0.0;
        size_t sz = lhs_vector.size();
        assert(sz == rhs_vector.size());
        for (size_t i = 0; i < sz && sum <= limit; ++i) {
            double diff = lhs_vector[i] - rhs_vector[i];
            sum += diff*diff;
        }
        return sum;
    }

    const vespalib::hwaccelrated::IAccelrated & _computer;
};

template class SquaredEuclideanDistance<float>;
template class SquaredEuclideanDistance<double>;

/**
 * Calculates angular distance between vectors with assumed norm 1.
 */
template <typename FloatType>
class AngularDistance : public DistanceFunction {
public:
    AngularDistance()
        : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
    {}
    double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override {
        auto lhs_vector = lhs.typify<FloatType>();
        auto rhs_vector = rhs.typify<FloatType>();
        size_t sz = lhs_vector.size();
        assert(sz == rhs_vector.size());
        double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz);
        return std::max(0.0, score);
    }
    double to_rawscore(double distance) const override {
        double score = 1.0 / (1.0 + distance);
        return score;
    }
    double calc_with_limit(const vespalib::tensor::TypedCells& lhs,
                           const vespalib::tensor::TypedCells& rhs,
                           double /*limit*/) const override
    {
        return calc(lhs, rhs);
    }

    const vespalib::hwaccelrated::IAccelrated & _computer;
};

template class AngularDistance<float>;
template class AngularDistance<double>;

/**
 * Calculates great-circle distance between Latitude/Longitude pairs,
 * measured in degrees.  Output distance is measured in meters.
 * Uses the haversine formula directly from:
 * https://en.wikipedia.org/wiki/Haversine_formula
 **/
template <typename FloatType>
class GeoDegreesDistance : public DistanceFunction {
public:
    GeoDegreesDistance() {}
    // haversine function:
    static double hav(double angle) {
        double s = sin(0.5*angle);
        return s*s;
    }
    double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override {
        auto lhs_vector = lhs.typify<FloatType>();
        auto rhs_vector = rhs.typify<FloatType>();
        assert(2 == lhs_vector.size());
        assert(2 == rhs_vector.size());
        // convert to radians:
        double lat_A = lhs_vector[0] * M_PI / 180.0;
        double lat_B = rhs_vector[0] * M_PI / 180.0;
        double lon_A = lhs_vector[1] * M_PI / 180.0;
        double lon_B = rhs_vector[1] * M_PI / 180.0;

        double lat_diff = lat_A - lat_B;
        double lon_diff = lon_A - lon_B;

        // haversines of differences:
        double hav_lat = hav(lat_diff);
        double hav_lon = hav(lon_diff);

        // haversine of central angle between the two points:
        double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon;
        return hav_central_angle;
    }
    double to_rawscore(double distance) const override {
        double hav_diff = sqrt(distance);
        // distance in meters:
        double d = 2 * asin(hav_diff) * 6371008.8; // Earth mean radius
        return 1.0 / (1.0 + d);
    }
    double calc_with_limit(const vespalib::tensor::TypedCells& lhs,
                           const vespalib::tensor::TypedCells& rhs,
                           double /*limit*/) const override
    {
        return calc(lhs, rhs);
    }

};

template class GeoDegreesDistance<float>;
template class GeoDegreesDistance<double>;

}