diff options
author | Arne Juul <arnej@yahooinc.com> | 2023-04-28 14:26:51 +0000 |
---|---|---|
committer | Arne Juul <arnej@yahooinc.com> | 2023-04-28 14:26:51 +0000 |
commit | 22e933ddb54a825c646296e0153df3cba73fcfe9 (patch) | |
tree | f74b4492fac9fcdb530ffa3414c91add0cce893d | |
parent | 78e64bf345d40a863c4a31a79a50482bedff04c6 (diff) |
add some documentation comments
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/configconverter.cpp | 2 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h | 19 |
2 files changed, 20 insertions, 1 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp index 365c092ba3a..2119f441a14 100644 --- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp @@ -137,7 +137,7 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg) case CfgDm::PRENORMALIZED_ANGULAR: dm = DistanceMetric::PrenormalizedAngular; /* - case CfgDm::TRANFORMED_MIPS: + case CfgDm::TRANSFORMED_MIPS: dm = DistanceMetric::TransformedMips; */ break; diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h index 929bfdcc8c4..fabd6bfcc57 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h @@ -10,12 +10,24 @@ namespace search::tensor { +/** + * Thread-safe storage of maximum value for squared vector norm. + * sq_norm = |x|^2 = sum(x[i]*x[i]) = dotproduct(x,x) + * Note that the initial value is 1.0; so even if all + * vectors seen have 0 or very small length, you will never + * get a value < 1.0. + */ class MaximumSquaredNormStore { private: std::mutex _lock; double _max_sq_norm; public: MaximumSquaredNormStore() noexcept : _lock(), _max_sq_norm(1.0) {} + /** + * Fetch the maximum value seen so far. + * Usually you will also supply a value computed for a newly seen + * vector, which may update the maximum value. + */ double get_max(double value = 0.0) { std::lock_guard<std::mutex> guard(_lock); if (value > _max_sq_norm) [[unlikely]] { @@ -25,6 +37,13 @@ public: } }; +/** + * Factory for distance functions which can apply a transformation + * mapping Maximum Inner Product Search to a nearest neighbor + * problem. When inserting vectors, an extra dimension is + * added ensuring behavior "as if" all vectors had length equal + * to the longest vector inserted so far, or at least length 1. + */ template<typename FloatType> class MipsDistanceFunctionFactory : public DistanceFunctionFactory { std::shared_ptr<MaximumSquaredNormStore> _sq_norm_store; |