summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne Juul <arnej@yahooinc.com>2023-04-28 14:26:51 +0000
committerArne Juul <arnej@yahooinc.com>2023-04-28 14:26:51 +0000
commit22e933ddb54a825c646296e0153df3cba73fcfe9 (patch)
treef74b4492fac9fcdb530ffa3414c91add0cce893d
parent78e64bf345d40a863c4a31a79a50482bedff04c6 (diff)
add some documentation comments
-rw-r--r--searchlib/src/vespa/searchlib/attribute/configconverter.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h19
2 files changed, 20 insertions, 1 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
index 365c092ba3a..2119f441a14 100644
--- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
@@ -137,7 +137,7 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg)
case CfgDm::PRENORMALIZED_ANGULAR:
dm = DistanceMetric::PrenormalizedAngular;
/*
- case CfgDm::TRANFORMED_MIPS:
+ case CfgDm::TRANSFORMED_MIPS:
dm = DistanceMetric::TransformedMips;
*/
break;
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
index 929bfdcc8c4..fabd6bfcc57 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
@@ -10,12 +10,24 @@
namespace search::tensor {
+/**
+ * Thread-safe storage of maximum value for squared vector norm.
+ * sq_norm = |x|^2 = sum(x[i]*x[i]) = dotproduct(x,x)
+ * Note that the initial value is 1.0; so even if all
+ * vectors seen have 0 or very small length, you will never
+ * get a value < 1.0.
+ */
class MaximumSquaredNormStore {
private:
std::mutex _lock;
double _max_sq_norm;
public:
MaximumSquaredNormStore() noexcept : _lock(), _max_sq_norm(1.0) {}
+ /**
+ * Fetch the maximum value seen so far.
+ * Usually you will also supply a value computed for a newly seen
+ * vector, which may update the maximum value.
+ */
double get_max(double value = 0.0) {
std::lock_guard<std::mutex> guard(_lock);
if (value > _max_sq_norm) [[unlikely]] {
@@ -25,6 +37,13 @@ public:
}
};
+/**
+ * Factory for distance functions which can apply a transformation
+ * mapping Maximum Inner Product Search to a nearest neighbor
+ * problem. When inserting vectors, an extra dimension is
+ * added ensuring behavior "as if" all vectors had length equal
+ * to the longest vector inserted so far, or at least length 1.
+ */
template<typename FloatType>
class MipsDistanceFunctionFactory : public DistanceFunctionFactory {
std::shared_ptr<MaximumSquaredNormStore> _sq_norm_store;