summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2021-04-12 14:16:54 +0000
committerArne Juul <arnej@verizonmedia.com>2021-04-12 14:16:54 +0000
commitd083381fadef275687e7af2a566acac1b2ebc56b (patch)
tree706c610f6a9eb92cff67aa384fd07b043fdad630 /searchlib
parent0e48c75de4e9e4ae343cad61bb4b267474a0c4c1 (diff)
let the distance function decide how it wants the query tensor converted
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp2
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp2
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp52
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.h38
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function.h11
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.h20
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/inner_product_distance.h22
11 files changed, 85 insertions, 78 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 2a509031e24..9621b93fd37 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -230,7 +230,7 @@ public:
const search::tensor::DistanceFunction *distance_function() const override {
- static search::tensor::SquaredEuclideanDistance my_dist_fun;
+ static search::tensor::SquaredEuclideanDistance my_dist_fun(vespalib::eval::CellType::DOUBLE);
return &my_dist_fun;
}
};
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 6ffe118aa65..3c6668dd410 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -78,7 +78,7 @@ public:
void init(bool heuristic_select_neighbors) {
auto generator = std::make_unique<LevelGenerator>();
level_generator = generator.get();
- index = std::make_unique<HnswIndex>(vectors, std::make_unique<SquaredEuclideanDistance>(),
+ index = std::make_unique<HnswIndex>(vectors, std::make_unique<SquaredEuclideanDistance>(vespalib::eval::CellType::DOUBLE),
std::move(generator),
HnswIndex::Config(5, 2, 10, 0, heuristic_select_neighbors));
}
diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
index 7acdb4df983..090f0306fa7 100644
--- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
@@ -231,7 +231,7 @@ public:
void init() {
uint32_t m = 16;
- index = std::make_unique<HnswIndex>(vectors, std::make_unique<SquaredEuclideanDistance>(),
+ index = std::make_unique<HnswIndex>(vectors, std::make_unique<SquaredEuclideanDistance>(vespalib::eval::CellType::DOUBLE),
std::make_unique<InvLogLevelGenerator>(m),
HnswIndex::Config(2*m, m, 200, 10, true));
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
index 8012c48a04a..243cf4d3911 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
@@ -4,7 +4,7 @@
#include "nearest_neighbor_blueprint.h"
#include "nearest_neighbor_iterator.h"
#include "nns_index_iterator.h"
-#include <vespa/eval/eval/dense_cells_value.h>
+#include <vespa/eval/eval/fast_value.h>
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
#include <vespa/searchlib/tensor/distance_function_factory.h>
@@ -12,35 +12,36 @@
LOG_SETUP(".searchlib.queryeval.nearest_neighbor_blueprint");
-using vespalib::eval::DenseCellsValue;
+using vespalib::eval::CellType;
+using vespalib::eval::FastValueBuilderFactory;
+using vespalib::eval::TypedCells;
using vespalib::eval::Value;
+using vespalib::eval::ValueType;
namespace search::queryeval {
namespace {
template<typename LCT, typename RCT>
-void
-convert_cells(std::unique_ptr<Value> &original, const vespalib::eval::ValueType &want_type)
+std::unique_ptr<Value>
+convert_cells(const ValueType &new_type, TypedCells cells)
{
- if constexpr (std::is_same<LCT,RCT>::value) {
- return;
- } else {
- auto old_cells = original->cells().typify<LCT>();
- std::vector<RCT> new_cells;
- new_cells.reserve(old_cells.size());
- for (LCT value : old_cells) {
- RCT conv(value);
- new_cells.push_back(conv);
- }
- original = std::make_unique<DenseCellsValue<RCT>>(want_type, std::move(new_cells));
+ auto old_cells = cells.typify<LCT>();
+ auto builder = FastValueBuilderFactory::get().create_value_builder<RCT>(new_type);
+ auto new_cells = builder->add_subspace();
+ assert(old_cells.size() == new_cells.size());
+ auto p = new_cells.begin();
+ for (LCT value : old_cells) {
+ RCT conv(value);
+ *p++ = conv;
}
+ return builder->build(std::move(builder));
}
struct ConvertCellsSelector
{
template <typename LCT, typename RCT>
- static auto invoke() { return convert_cells<LCT, RCT>; }
+ static auto invoke(const ValueType &new_type, TypedCells old_cells) { return convert_cells<LCT, RCT>(new_type, old_cells); }
};
} // namespace <unnamed>
@@ -63,15 +64,8 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f
_found_hits(),
_global_filter(GlobalFilter::create())
{
- auto lct = _query_tensor->cells().type;
- auto rct = _attr_tensor.getTensorType().cell_type();
- if (rct == vespalib::eval::CellType::FLOAT || rct == vespalib::eval::CellType::DOUBLE) {
- // avoid downcasting to bfloat16 etc, that is just extra work
- using MyTypify = vespalib::eval::TypifyCellType;
- auto fixup_fun = vespalib::typify_invoke<2,MyTypify,ConvertCellsSelector>(lct, rct);
- fixup_fun(_query_tensor, _attr_tensor.getTensorType());
- }
- _fallback_dist_fun = search::tensor::make_distance_function(_attr_tensor.distance_metric(), rct);
+ CellType attr_ct = _attr_tensor.getTensorType().cell_type();
+ _fallback_dist_fun = search::tensor::make_distance_function(_attr_tensor.distance_metric(), attr_ct);
_dist_fun = _fallback_dist_fun.get();
assert(_dist_fun);
auto nns_index = _attr_tensor.nearest_neighbor_index();
@@ -79,6 +73,14 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f
_dist_fun = nns_index->distance_function();
assert(_dist_fun);
}
+ auto query_ct = _query_tensor->cells().type;
+ CellType want_ct = _dist_fun->expected_cell_type();
+ if (query_ct != want_ct) {
+ ValueType new_type = ValueType::make_type(want_ct, _query_tensor->type().dimensions());
+ using MyTypify = vespalib::eval::TypifyCellType;
+ TypedCells old_cells = _query_tensor->cells();
+ _query_tensor = vespalib::typify_invoke<2,MyTypify,ConvertCellsSelector>(query_ct, want_ct, new_type, old_cells);
+ }
if (distance_threshold < std::numeric_limits<double>::max()) {
_distance_threshold = _dist_fun->convert_threshold(distance_threshold);
_distance_heap.set_distance_threshold(_distance_threshold);
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
index c480ba2879e..2c8643e7747 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
@@ -14,7 +14,7 @@ namespace search::tensor {
*/
class AngularDistance : public DistanceFunction {
public:
- AngularDistance() {}
+ AngularDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
double convert_threshold(double threshold) const override {
double cosine_similarity = cos(threshold);
@@ -46,28 +46,26 @@ template <typename FloatType>
class AngularDistanceHW : public AngularDistance {
public:
AngularDistanceHW()
- : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
+ : AngularDistance(vespalib::eval::get_cell_type<FloatType>()),
+ _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
{}
double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
- if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) {
- auto lhs_vector = lhs.unsafe_typify<FloatType>();
- auto rhs_vector = rhs.unsafe_typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- auto a = &lhs_vector[0];
- auto b = &rhs_vector[0];
- double a_norm_sq = _computer.dotProduct(a, a, sz);
- double b_norm_sq = _computer.dotProduct(b, b, sz);
- double squared_norms = a_norm_sq * b_norm_sq;
- double dot_product = _computer.dotProduct(a, b, sz);
- double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
- double cosine_similarity = dot_product / div;
- double distance = 1.0 - cosine_similarity; // in range [0,2]
- return distance;
- } else {
- return AngularDistance::calc(lhs, rhs);
- }
+ assert(lhs.type == expected && rhs.type == expected);
+ auto lhs_vector = lhs.typify<FloatType>();
+ auto rhs_vector = rhs.typify<FloatType>();
+ size_t sz = lhs_vector.size();
+ assert(sz == rhs_vector.size());
+ auto a = &lhs_vector[0];
+ auto b = &rhs_vector[0];
+ double a_norm_sq = _computer.dotProduct(a, a, sz);
+ double b_norm_sq = _computer.dotProduct(b, b, sz);
+ double squared_norms = a_norm_sq * b_norm_sq;
+ double dot_product = _computer.dotProduct(a, b, sz);
+ double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
+ double cosine_similarity = dot_product / div;
+ double distance = 1.0 - cosine_similarity; // in range [0,2]
+ return distance;
}
private:
const vespalib::hwaccelrated::IAccelrated & _computer;
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h
index 08f90fec041..531d7faf2b3 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h
@@ -3,6 +3,7 @@
#pragma once
#include <memory>
+#include <vespa/eval/eval/cell_type.h>
namespace vespalib::eval { struct TypedCells; }
@@ -15,10 +16,20 @@ namespace search::tensor {
* The actual implementation must know which type the vectors are.
*/
class DistanceFunction {
+protected:
+ vespalib::eval::CellType expect_cell_type;
public:
using UP = std::unique_ptr<DistanceFunction>;
+
+ DistanceFunction(vespalib::eval::CellType expected) : expect_cell_type(expected) {}
+
virtual ~DistanceFunction() {}
+ // input (query) vectors must be converted to this cell type:
+ vespalib::eval::CellType expected_cell_type() const {
+ return expect_cell_type;
+ }
+
// calculate internal distance (comparable)
virtual double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const = 0;
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index 1d58c01fd99..8ae9441ff11 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -21,24 +21,24 @@ make_distance_function(DistanceMetric variant, CellType cell_type)
switch (cell_type) {
case CellType::FLOAT: return std::make_unique<SquaredEuclideanDistanceHW<float>>();
case CellType::DOUBLE: return std::make_unique<SquaredEuclideanDistanceHW<double>>();
- default: return std::make_unique<SquaredEuclideanDistance>();
+ default: return std::make_unique<SquaredEuclideanDistance>(CellType::FLOAT);
}
case DistanceMetric::Angular:
switch (cell_type) {
case CellType::FLOAT: return std::make_unique<AngularDistanceHW<float>>();
case CellType::DOUBLE: return std::make_unique<AngularDistanceHW<double>>();
- default: return std::make_unique<AngularDistance>();
+ default: return std::make_unique<AngularDistance>(CellType::FLOAT);
}
case DistanceMetric::GeoDegrees:
- return std::make_unique<GeoDegreesDistance>();
+ return std::make_unique<GeoDegreesDistance>(CellType::DOUBLE);
case DistanceMetric::InnerProduct:
switch (cell_type) {
case CellType::FLOAT: return std::make_unique<InnerProductDistanceHW<float>>();
case CellType::DOUBLE: return std::make_unique<InnerProductDistanceHW<double>>();
- default: return std::make_unique<InnerProductDistance>();
+ default: return std::make_unique<InnerProductDistance>(CellType::FLOAT);
}
case DistanceMetric::Hamming:
- return std::make_unique<HammingDistance>();
+ return std::make_unique<HammingDistance>(cell_type);
}
// not reached:
return DistanceFunction::UP();
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
index 6d4d982834f..380c6b2add2 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
@@ -14,7 +14,7 @@ namespace search::tensor {
*/
class SquaredEuclideanDistance : public DistanceFunction {
public:
- SquaredEuclideanDistance() {}
+ SquaredEuclideanDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
double calc_with_limit(const vespalib::eval::TypedCells& lhs,
const vespalib::eval::TypedCells& rhs,
@@ -38,19 +38,17 @@ template <typename FloatType>
class SquaredEuclideanDistanceHW : public SquaredEuclideanDistance {
public:
SquaredEuclideanDistanceHW()
- : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
+ : SquaredEuclideanDistance(vespalib::eval::get_cell_type<FloatType>()),
+ _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
{}
double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
- if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) {
- auto lhs_vector = lhs.unsafe_typify<FloatType>();
- auto rhs_vector = rhs.unsafe_typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- return _computer.squaredEuclideanDistance(&lhs_vector[0], &rhs_vector[0], sz);
- } else {
- return SquaredEuclideanDistance::calc(lhs, rhs);
- }
+ assert(lhs.type == expected && rhs.type == expected);
+ auto lhs_vector = lhs.typify<FloatType>();
+ auto rhs_vector = rhs.typify<FloatType>();
+ size_t sz = lhs_vector.size();
+ assert(sz == rhs_vector.size());
+ return _computer.squaredEuclideanDistance(&lhs_vector[0], &rhs_vector[0], sz);
}
private:
const vespalib::hwaccelrated::IAccelrated & _computer;
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
index b8b9bec50d5..7ce69ef8aae 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
@@ -23,7 +23,7 @@ public:
static constexpr double earth_mean_radius = 6371.0088;
static constexpr double degrees_to_radians = M_PI / 180.0;
- GeoDegreesDistance() {}
+ GeoDegreesDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
// haversine function:
static double hav(double angle) {
double s = sin(0.5*angle);
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
index c2cd7af3863..d92671e4922 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
@@ -17,7 +17,7 @@ namespace search::tensor {
*/
class HammingDistance : public DistanceFunction {
public:
- HammingDistance() {}
+ HammingDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
double convert_threshold(double threshold) const override {
return threshold;
diff --git a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h
index cb60d18c0f5..a6ecb4eb7bb 100644
--- a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h
@@ -15,7 +15,7 @@ namespace search::tensor {
*/
class InnerProductDistance : public DistanceFunction {
public:
- InnerProductDistance() {}
+ InnerProductDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
double convert_threshold(double threshold) const override {
return threshold;
@@ -42,20 +42,18 @@ template <typename FloatType>
class InnerProductDistanceHW : public InnerProductDistance {
public:
InnerProductDistanceHW()
- : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
+ : InnerProductDistance(vespalib::eval::get_cell_type<FloatType>()),
+ _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
{}
double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
- if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) {
- auto lhs_vector = lhs.unsafe_typify<FloatType>();
- auto rhs_vector = rhs.unsafe_typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz);
- return std::max(0.0, score);
- } else {
- return InnerProductDistance::calc(lhs, rhs);
- }
+ assert(lhs.type == expected && rhs.type == expected);
+ auto lhs_vector = lhs.typify<FloatType>();
+ auto rhs_vector = rhs.typify<FloatType>();
+ size_t sz = lhs_vector.size();
+ assert(sz == rhs_vector.size());
+ double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz);
+ return std::max(0.0, score);
}
private:
const vespalib::hwaccelrated::IAccelrated & _computer;