aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/tensor
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/vespa/searchlib/tensor')
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/bound_distance_function.h5
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h1
22 files changed, 144 insertions, 70 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
index af99260979d..d1ebb1f4e4e 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
@@ -9,6 +9,7 @@
using vespalib::typify_invoke;
using vespalib::eval::TypifyCellType;
using vespalib::eval::TypedCells;
+using vespalib::eval::Int8Float;
namespace search::tensor {
@@ -26,16 +27,16 @@ public:
_lhs(_tmpSpace.storeLhs(lhs))
{
auto a = _lhs.data();
- _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size);
+ _lhs_norm_sq = _computer.dotProduct(cast(a), cast(a), lhs.size);
}
double calc(TypedCells rhs) const noexcept override {
size_t sz = _lhs.size();
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
auto a = _lhs.data();
auto b = rhs_vector.data();
- double b_norm_sq = _computer.dotProduct(b, b, sz);
+ double b_norm_sq = _computer.dotProduct(cast(b), cast(b), sz);
double squared_norms = _lhs_norm_sq * b_norm_sq;
- double dot_product = _computer.dotProduct(a, b, sz);
+ double dot_product = _computer.dotProduct(cast(a), cast(b), sz);
double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
double cosine_similarity = dot_product / div;
double distance = 1.0 - cosine_similarity; // in range [0,2]
@@ -70,19 +71,20 @@ template class BoundAngularDistance<double>;
template <typename FloatType>
BoundDistanceFunction::UP
-AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
+AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
using DFT = BoundAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
+AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
using DFT = BoundAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template class AngularDistanceFunctionFactory<float>;
template class AngularDistanceFunctionFactory<double>;
+template class AngularDistanceFunctionFactory<Int8Float>;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
index 5e0a060e060..aa51f58b3cd 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
@@ -15,8 +15,8 @@ template <typename FloatType>
class AngularDistanceFunctionFactory : public DistanceFunctionFactory {
public:
AngularDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
index 85089196a7a..318271835ad 100644
--- a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
@@ -19,6 +19,7 @@ class BoundDistanceFunction : public DistanceConverter {
public:
using UP = std::unique_ptr<BoundDistanceFunction>;
using TypedCells = vespalib::eval::TypedCells;
+ using Int8Float = vespalib::eval::Int8Float;
BoundDistanceFunction() noexcept = default;
@@ -29,6 +30,10 @@ public:
// calculate internal distance, early return allowed if > limit
virtual double calc_with_limit(TypedCells rhs, double limit) const noexcept = 0;
+protected:
+ static const double *cast(const double * p) { return p; }
+ static const float *cast(const float * p) { return p; }
+ static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index ed08df5866e..f39994dfdcf 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -6,6 +6,7 @@
using search::attribute::DistanceMetric;
using vespalib::eval::CellType;
+using vespalib::eval::Int8Float;
namespace search::tensor {
@@ -16,25 +17,27 @@ make_distance_function_factory(DistanceMetric variant, CellType cell_type)
case DistanceMetric::Angular:
switch (cell_type) {
case CellType::DOUBLE: return std::make_unique<AngularDistanceFunctionFactory<double>>();
+ case CellType::INT8: return std::make_unique<AngularDistanceFunctionFactory<Int8Float>>();
default: return std::make_unique<AngularDistanceFunctionFactory<float>>();
}
case DistanceMetric::Euclidean:
switch (cell_type) {
- case CellType::DOUBLE: return std::make_unique<EuclideanDistanceFunctionFactory<double>>();
- case CellType::INT8: return std::make_unique<EuclideanDistanceFunctionFactory<vespalib::eval::Int8Float>>();
+ case CellType::DOUBLE: return std::make_unique<EuclideanDistanceFunctionFactory<double>>();
+ case CellType::INT8: return std::make_unique<EuclideanDistanceFunctionFactory<Int8Float>>();
case CellType::BFLOAT16: return std::make_unique<EuclideanDistanceFunctionFactory<vespalib::BFloat16>>();
- default: return std::make_unique<EuclideanDistanceFunctionFactory<float>>();
+ default: return std::make_unique<EuclideanDistanceFunctionFactory<float>>();
}
case DistanceMetric::InnerProduct:
case DistanceMetric::PrenormalizedAngular:
switch (cell_type) {
case CellType::DOUBLE: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<double>>();
+ case CellType::INT8: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<Int8Float>>();
default: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<float>>();
}
case DistanceMetric::Dotproduct:
switch (cell_type) {
case CellType::DOUBLE: return std::make_unique<MipsDistanceFunctionFactory<double>>();
- case CellType::INT8: return std::make_unique<MipsDistanceFunctionFactory<vespalib::eval::Int8Float>>();
+ case CellType::INT8: return std::make_unique<MipsDistanceFunctionFactory<Int8Float>>();
default: return std::make_unique<MipsDistanceFunctionFactory<float>>();
}
case DistanceMetric::GeoDegrees:
@@ -42,7 +45,7 @@ make_distance_function_factory(DistanceMetric variant, CellType cell_type)
case DistanceMetric::Hamming:
switch (cell_type) {
case CellType::DOUBLE: return std::make_unique<HammingDistanceFunctionFactory<double>>();
- case CellType::INT8: return std::make_unique<HammingDistanceFunctionFactory<vespalib::eval::Int8Float>>();
+ case CellType::INT8: return std::make_unique<HammingDistanceFunctionFactory<Int8Float>>();
default: return std::make_unique<HammingDistanceFunctionFactory<float>>();
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
index 356366d6a77..3b0a0ac91fd 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
@@ -17,8 +17,8 @@ struct DistanceFunctionFactory {
using TypedCells = vespalib::eval::TypedCells;
DistanceFunctionFactory() noexcept = default;
virtual ~DistanceFunctionFactory() = default;
- virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) = 0;
- virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) = 0;
+ virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const = 0;
+ virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const = 0;
using UP = std::unique_ptr<DistanceFunctionFactory>;
};
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
index 3ab3a1123eb..62b92b43ad9 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
@@ -16,14 +16,11 @@ using vespalib::BFloat16;
template<typename AttributeCellType>
class BoundEuclideanDistance final : public BoundDistanceFunction {
- using FloatType = std::conditional_t<std::is_same<AttributeCellType,BFloat16>::value,float,AttributeCellType>;
+ using FloatType = std::conditional_t<std::is_same<AttributeCellType, BFloat16>::value, float, AttributeCellType>;
private:
const vespalib::hwaccelrated::IAccelrated & _computer;
mutable TemporaryVectorStore<FloatType> _tmpSpace;
const vespalib::ConstArrayRef<FloatType> _lhs_vector;
- static const double *cast(const double * p) { return p; }
- static const float *cast(const float * p) { return p; }
- static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
public:
explicit BoundEuclideanDistance(TypedCells lhs)
: _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()),
@@ -44,16 +41,8 @@ public:
double score = 1.0 / (1.0 + d);
return score;
}
- double calc_with_limit(TypedCells rhs, double limit) const noexcept override {
- vespalib::ConstArrayRef<AttributeCellType> rhs_vector = rhs.typify<AttributeCellType>();
- double sum = 0.0;
- size_t sz = _lhs_vector.size();
- assert(sz == rhs_vector.size());
- for (size_t i = 0; i < sz && sum <= limit; ++i) {
- double diff = _lhs_vector[i] - rhs_vector[i];
- sum += diff*diff;
- }
- return sum;
+ double calc_with_limit(TypedCells rhs, double) const noexcept override {
+ return calc(rhs);
}
};
@@ -64,14 +53,14 @@ template class BoundEuclideanDistance<double>;
template <typename FloatType>
BoundDistanceFunction::UP
-EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
+EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
using DFT = BoundEuclideanDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
+EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
using DFT = BoundEuclideanDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
index 8c39a12bf86..78460c93307 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
@@ -15,8 +15,8 @@ template <typename FloatType>
class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory {
public:
EuclideanDistanceFunctionFactory() noexcept = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
index f5484f40271..a8a48ae4116 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
@@ -82,12 +82,12 @@ public:
};
BoundDistanceFunction::UP
-GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) {
+GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) const {
return std::make_unique<BoundGeoDistance>(lhs);
}
BoundDistanceFunction::UP
-GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) {
+GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) const {
return std::make_unique<BoundGeoDistance>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
index 1464898421b..a85e31e8ecc 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
@@ -14,8 +14,8 @@ namespace search::tensor {
class GeoDistanceFunctionFactory : public DistanceFunctionFactory {
public:
GeoDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
index 7f29a100492..7ea2e440a51 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
@@ -49,14 +49,14 @@ public:
template <typename FloatType>
BoundDistanceFunction::UP
-HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
+HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
using DFT = BoundHammingDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
+HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
using DFT = BoundHammingDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
index 6e7f96e1e2f..2e3b75cc61f 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
@@ -16,8 +16,8 @@ template <typename FloatType>
class HammingDistanceFunctionFactory : public DistanceFunctionFactory {
public:
HammingDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 1db688156e0..b542c422f50 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -672,15 +672,18 @@ HnswIndex<type>::mutual_reconnect(const LinkArrayRef &cluster, uint32_t level)
std::vector<PairDist> pairs;
for (uint32_t i = 0; i + 1 < cluster.size(); ++i) {
uint32_t n_id_1 = cluster[i];
+ TypedCells n_cells_1 = get_vector(n_id_1);
+ if (n_cells_1.non_existing_attribute_value()) [[unlikely]] continue;
LinkArrayRef n_list_1 = _graph.get_link_array(n_id_1, level);
- std::unique_ptr<BoundDistanceFunction> df;
+ std::unique_ptr<BoundDistanceFunction> df = _distance_ff->for_insertion_vector(n_cells_1);
for (uint32_t j = i + 1; j < cluster.size(); ++j) {
uint32_t n_id_2 = cluster[j];
- if (has_link_to(n_list_1, n_id_2)) continue;
- if (!df) {
- df = _distance_ff->for_insertion_vector(get_vector(n_id_1));
+ if ( ! has_link_to(n_list_1, n_id_2)) {
+ auto n_cells_2 = get_vector(n_id_2);
+ if (!n_cells_2.non_existing_attribute_value()) {
+ pairs.emplace_back(n_id_1, n_id_2, df->calc(n_cells_2));
+ }
}
- pairs.emplace_back(n_id_1, n_id_2, calc_distance(*df, n_id_2));
}
}
std::sort(pairs.begin(), pairs.end());
@@ -1120,6 +1123,32 @@ HnswIndex<type>::count_reachable_nodes() const
return {found_cnt, true};
}
+template <HnswIndexType type>
+uint32_t
+HnswIndex<type>::get_subspaces(uint32_t docid) const noexcept
+{
+ if constexpr (type == HnswIndexType::SINGLE) {
+ return (docid < _graph.nodes.get_size() && _graph.nodes.get_elem_ref(docid).levels_ref().load_relaxed().valid()) ? 1 : 0;
+ } else {
+ return _id_mapping.get_ids(docid).size();
+ }
+}
+
+template <HnswIndexType type>
+uint32_t
+HnswIndex<type>::check_consistency(uint32_t docid_limit) const noexcept
+{
+ uint32_t inconsistencies = 0;
+ for (uint32_t docid = 1; docid < docid_limit; ++docid) {
+ auto index_subspaces = get_subspaces(docid);
+ auto store_subspaces = get_vectors(docid).subspaces();
+ if (index_subspaces != store_subspaces) {
+ ++inconsistencies;
+ }
+ }
+ return inconsistencies;
+}
+
template class HnswIndex<HnswIndexType::SINGLE>;
template class HnswIndex<HnswIndexType::MULTI>;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 616140f426f..4d4440c1bcb 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -193,6 +193,9 @@ protected:
LinkArray filter_valid_nodeids(uint32_t level, const internal::PreparedAddNode::Links &neighbors, uint32_t self_nodeid);
void internal_complete_add(uint32_t docid, internal::PreparedAddDoc &op);
void internal_complete_add_node(uint32_t nodeid, uint32_t docid, uint32_t subspace, internal::PreparedAddNode &prepared_node);
+
+ // Called from writer only.
+ uint32_t get_subspaces(uint32_t docid) const noexcept;
public:
HnswIndex(const DocVectorAccess& vectors, DistanceFunctionFactory::UP distance_ff,
RandomLevelGenerator::UP level_generator, const HnswIndexConfig& cfg);
@@ -248,6 +251,9 @@ public:
uint32_t get_active_nodes() const noexcept { return _graph.get_active_nodes(); }
+ // Called from writer only.
+ uint32_t check_consistency(uint32_t docid_limit) const noexcept override;
+
// Should only be used by unit tests.
HnswTestNode get_node(uint32_t nodeid) const;
void set_node(uint32_t nodeid, const HnswTestNode &node);
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
index c42242d8dc8..5bc727ebd97 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
@@ -19,9 +19,6 @@ class BoundMipsDistanceFunction final : public BoundDistanceFunction {
using ExtraDimT = std::conditional_t<extra_dim,double,std::monostate>;
[[no_unique_address]] ExtraDimT _lhs_extra_dim;
- static const double *cast(const double * p) { return p; }
- static const float *cast(const float * p) { return p; }
- static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
public:
BoundMipsDistanceFunction(TypedCells lhs, MaximumSquaredNormStore& sq_norm_store)
: BoundDistanceFunction(),
@@ -76,13 +73,13 @@ public:
template<typename FloatType>
BoundDistanceFunction::UP
-MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
+MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
return std::make_unique<BoundMipsDistanceFunction<FloatType, false>>(lhs, *_sq_norm_store);
}
template<typename FloatType>
BoundDistanceFunction::UP
-MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
+MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
return std::make_unique<BoundMipsDistanceFunction<FloatType, true>>(lhs, *_sq_norm_store);
};
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
index 67a6eb58de0..336511ab78f 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
@@ -62,8 +62,8 @@ public:
MipsDistanceFunctionFactory() noexcept = default;
~MipsDistanceFunctionFactory() override = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index 8462ff05eca..c2bbd17ce63 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -114,6 +114,12 @@ public:
double distance_threshold) const = 0;
virtual DistanceFunctionFactory &distance_function_factory() const = 0;
+
+ /*
+ * Used when checking consistency during load.
+ * Called from writer only.
+ */
+ virtual uint32_t check_consistency(uint32_t docid_limit) const noexcept = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
index 4bc90001227..6f0966e7fb3 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
@@ -6,6 +6,7 @@
using vespalib::typify_invoke;
using vespalib::eval::TypifyCellType;
+using vespalib::eval::Int8Float;
namespace search::tensor {
@@ -23,7 +24,7 @@ public:
_lhs(_tmpSpace.storeLhs(lhs))
{
auto a = _lhs.data();
- _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size);
+ _lhs_norm_sq = _computer.dotProduct(cast(a), cast(a), lhs.size);
if (_lhs_norm_sq <= 0.0) {
_lhs_norm_sq = 1.0;
}
@@ -32,7 +33,7 @@ public:
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
auto a = _lhs.data();
auto b = rhs_vector.data();
- double dot_product = _computer.dotProduct(a, b, _lhs.size());
+ double dot_product = _computer.dotProduct(cast(a), cast(b), _lhs.size());
double distance = _lhs_norm_sq - dot_product;
return distance;
}
@@ -62,19 +63,20 @@ template class BoundPrenormalizedAngularDistance<double>;
template <typename FloatType>
BoundDistanceFunction::UP
-PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) {
+PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
using DFT = BoundPrenormalizedAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template <typename FloatType>
BoundDistanceFunction::UP
-PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) {
+PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
using DFT = BoundPrenormalizedAngularDistance<FloatType>;
return std::make_unique<DFT>(lhs);
}
template class PrenormalizedAngularDistanceFunctionFactory<float>;
template class PrenormalizedAngularDistanceFunctionFactory<double>;
+template class PrenormalizedAngularDistanceFunctionFactory<Int8Float>;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
index 7e3a8c2c676..6a791e0b6ec 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
@@ -14,8 +14,8 @@ template <typename FloatType>
class PrenormalizedAngularDistanceFunctionFactory : public DistanceFunctionFactory {
public:
PrenormalizedAngularDistanceFunctionFactory() = default;
- BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override;
- BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override;
+ BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
+ BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
index 4753e9d7c87..097ea67cc9e 100644
--- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
@@ -1,11 +1,13 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "temporary_vector_store.h"
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
using vespalib::ConstArrayRef;
using vespalib::ArrayRef;
using vespalib::eval::CellType;
using vespalib::eval::TypedCells;
+using vespalib::hwaccelrated::IAccelrated;
namespace search::tensor {
@@ -13,18 +15,29 @@ namespace {
template<typename FromType, typename ToType>
ConstArrayRef<ToType>
+convert_cells(ArrayRef<ToType> space, TypedCells cells) noexcept __attribute__((noinline));
+
+template<typename FromType, typename ToType>
+ConstArrayRef<ToType>
convert_cells(ArrayRef<ToType> space, TypedCells cells) noexcept
{
- assert(cells.size == space.size());
- auto old_cells = cells.typify<FromType>();
+ auto old_cells = cells.unsafe_typify<FromType>();
ToType *p = space.data();
for (FromType value : old_cells) {
- ToType conv(value);
- *p++ = conv;
+ *p++ = static_cast<ToType>(value);
}
return space;
}
+template<>
+ConstArrayRef<float>
+convert_cells<vespalib::BFloat16, float>(ArrayRef<float> space, TypedCells cells) noexcept
+{
+ static const IAccelrated & accelerator = IAccelrated::getAccelerator();
+ accelerator.convert_bfloat16_to_float(reinterpret_cast<const uint16_t *>(cells.data), space.data(), space.size());
+ return space;
+}
+
template <typename ToType>
struct ConvertCellsSelector
{
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
index a5d670096ab..9f551166a1d 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
@@ -418,18 +418,25 @@ TensorAttribute::complete_set_tensor(DocId docid, const vespalib::eval::Value& t
std::unique_ptr<PrepareResult> prepare_result)
{
if (_index && !prepare_result) {
- // The tensor cells are unchanged
- if (!_is_dense) {
- // but labels might have changed.
- EntryRef ref = _tensorStore.store_tensor(tensor);
- assert(ref.valid());
- setTensorRef(docid, ref);
+ VectorBundle vectors(tensor.cells().data, tensor.index().size(), _subspace_type);
+ if (tensor_cells_are_unchanged(docid, vectors)) {
+ // The tensor cells are unchanged
+ if (!_is_dense) {
+ // but labels might have changed.
+ EntryRef ref = _tensorStore.store_tensor(tensor);
+ assert(ref.valid());
+ setTensorRef(docid, ref);
+ }
+ return;
}
- return;
}
internal_set_tensor(docid, tensor);
if (_index) {
- _index->complete_add_document(docid, std::move(prepare_result));
+ if (prepare_result) {
+ _index->complete_add_document(docid, std::move(prepare_result));
+ } else {
+ _index->add_document(docid);
+ }
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
index 28c4099c38b..223c9d7d1f2 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
@@ -322,6 +322,9 @@ TensorAttributeLoader::on_load(vespalib::Executor* executor)
if (!load_index()) {
return false;
}
+ if (dense_store == nullptr) {
+ check_consistency(docid_limit);
+ }
} else {
build_index(executor, docid_limit);
}
@@ -329,4 +332,15 @@ TensorAttributeLoader::on_load(vespalib::Executor* executor)
return true;
}
+void
+TensorAttributeLoader::check_consistency(uint32_t docid_limit)
+{
+ auto before = vespalib::steady_clock::now();
+ uint32_t inconsistencies = _index->check_consistency(docid_limit);
+ auto after = vespalib::steady_clock::now();
+ double elapsed = vespalib::to_s(after - before);
+ LOG(info, "%u inconsistencies detected after loading index for attribute %s, (check used %6.3fs)",
+ inconsistencies, _attr.getName().c_str(), elapsed);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
index 6bf68957adc..59baaf0b6dc 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.h
@@ -34,6 +34,7 @@ class TensorAttributeLoader {
void load_tensor_store(search::attribute::BlobSequenceReader& reader, uint32_t docid_limit);
void build_index(vespalib::Executor* executor, uint32_t docid_limit);
bool load_index();
+ void check_consistency(uint32_t docid_limit);
public:
TensorAttributeLoader(TensorAttribute& attr, GenerationHandler& generation_handler, RefVector& ref_vector, TensorStore& store, NearestNeighborIndex* index);