diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-11-10 14:23:36 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-11-10 14:23:36 +0100 |
commit | 558e133864f44a9e5b43a6b4d54624f710665b74 (patch) | |
tree | a02bf7347b99599b8e01762efd8a8fa49b1ec561 /searchlib/src | |
parent | 8ca78146bf4cb02dd5b3e8aa51b750bd13805e1a (diff) |
Move prepare_set_tensor() and complete_set_tensor() to TensorAttribute.
Diffstat (limited to 'searchlib/src')
4 files changed, 53 insertions, 51 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index 5cd5455bcac..bd7fe2d3276 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -13,18 +13,10 @@ using vespalib::slime::ObjectInserter; namespace search::tensor { -bool -DenseTensorAttribute::tensor_is_unchanged(DocId docid, const Value& new_tensor) const -{ - auto old_tensor = extract_cells_ref(docid); - return _comp.equals(old_tensor, new_tensor.cells()); -} - DenseTensorAttribute::DenseTensorAttribute(vespalib::stringref baseFileName, const Config& cfg, const NearestNeighborIndexFactory& index_factory) : TensorAttribute(baseFileName, cfg, _denseTensorStore), - _denseTensorStore(cfg.tensorType(), get_memory_allocator()), - _comp(cfg.tensorType()) + _denseTensorStore(cfg.tensorType(), get_memory_allocator()) { if (cfg.hnsw_index_params().has_value()) { auto tensor_type = cfg.tensorType(); @@ -42,36 +34,6 @@ DenseTensorAttribute::~DenseTensorAttribute() _tensorStore.reclaim_all_memory(); } -std::unique_ptr<PrepareResult> -DenseTensorAttribute::prepare_set_tensor(DocId docid, const Value& tensor) const -{ - checkTensorType(tensor); - if (_index) { - if (tensor_is_unchanged(docid, tensor)) { - // Don't make changes to the nearest neighbor index when the inserted tensor is unchanged. - // With this optimization we avoid doing unnecessary costly work, first removing the vector point, then inserting the same point. - return {}; - } - VectorBundle vectors(tensor.cells().data, tensor.index().size(), _denseTensorStore.get_subspace_type()); - return _index->prepare_add_document(docid, vectors, getGenerationHandler().takeGuard()); - } - return {}; -} - -void -DenseTensorAttribute::complete_set_tensor(DocId docid, const Value& tensor, - std::unique_ptr<PrepareResult> prepare_result) -{ - if (_index && !prepare_result) { - // The tensor is unchanged. - return; - } - internal_set_tensor(docid, tensor); - if (_index) { - _index->complete_add_document(docid, std::move(prepare_result)); - } -} - vespalib::eval::TypedCells DenseTensorAttribute::extract_cells_ref(DocId docId) const { diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h index 2db7bb332a9..45bd0d98274 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h @@ -5,7 +5,6 @@ #include "default_nearest_neighbor_index_factory.h" #include "dense_tensor_store.h" #include "tensor_attribute.h" -#include "typed_cells_comparator.h" #include <memory> namespace search::tensor { @@ -19,16 +18,12 @@ class NearestNeighborIndex; class DenseTensorAttribute : public TensorAttribute { private: DenseTensorStore _denseTensorStore; - TypedCellsComparator _comp; - bool tensor_is_unchanged(DocId docid, const vespalib::eval::Value& new_tensor) const; public: DenseTensorAttribute(vespalib::stringref baseFileName, const Config& cfg, const NearestNeighborIndexFactory& index_factory = DefaultNearestNeighborIndexFactory()); ~DenseTensorAttribute() override; // Implements AttributeVector and ITensorAttribute - std::unique_ptr<PrepareResult> prepare_set_tensor(DocId docid, const vespalib::eval::Value& tensor) const override; - void complete_set_tensor(DocId docid, const vespalib::eval::Value& tensor, std::unique_ptr<PrepareResult> prepare_result) override; vespalib::eval::TypedCells extract_cells_ref(DocId docId) const override; bool supports_extract_cells_ref() const override { return true; } void get_state(const vespalib::slime::Inserter& inserter) const override; diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp index 43fc602ff4c..b66dc29be6e 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp @@ -58,7 +58,9 @@ TensorAttribute::TensorAttribute(vespalib::stringref name, const Config &cfg, Te _index(), _is_dense(cfg.tensorType().is_dense()), _emptyTensor(createEmptyTensor(cfg.tensorType())), - _compactGeneration(0) + _compactGeneration(0), + _subspace_type(cfg.tensorType()), + _comp(cfg.tensorType()) { } @@ -393,18 +395,37 @@ TensorAttribute::update_tensor(DocId docId, std::unique_ptr<PrepareResult> TensorAttribute::prepare_set_tensor(DocId docid, const vespalib::eval::Value& tensor) const { - (void) docid; - (void) tensor; - return std::unique_ptr<PrepareResult>(); + checkTensorType(tensor); + if (_index) { + VectorBundle vectors(tensor.cells().data, tensor.index().size(), _subspace_type); + if (tensor_cells_are_unchanged(docid, vectors)) { + // Don't make changes to the nearest neighbor index when the inserted tensor cells are unchanged. + // With this optimization we avoid doing unnecessary costly work, first removing the vector point, then inserting the same point. + return {}; + } + return _index->prepare_add_document(docid, vectors, getGenerationHandler().takeGuard()); + } + return {}; } void TensorAttribute::complete_set_tensor(DocId docid, const vespalib::eval::Value& tensor, std::unique_ptr<PrepareResult> prepare_result) { - (void) docid; - (void) tensor; - (void) prepare_result; + if (_index && !prepare_result) { + // The tensor cells are unchanged + if (!_is_dense) { + // but labels might have changed. + EntryRef ref = _tensorStore.store_tensor(tensor); + assert(ref.valid()); + setTensorRef(docid, ref); + } + return; + } + internal_set_tensor(docid, tensor); + if (_index) { + _index->complete_add_document(docid, std::move(prepare_result)); + } } attribute::DistanceMetric @@ -412,4 +433,23 @@ TensorAttribute::distance_metric() const { return getConfig().distance_metric(); } +bool +TensorAttribute::tensor_cells_are_unchanged(DocId docid, VectorBundle vectors) const +{ + if (docid >= getCommittedDocIdLimit()) { + return false; + } + auto old_vectors = get_vectors(docid); + auto old_subspaces = old_vectors.subspaces(); + if (old_subspaces != vectors.subspaces()) { + return false; + } + for (uint32_t subspace = 0; subspace < old_subspaces; ++subspace) { + if (!_comp.equals(old_vectors.cells(subspace), vectors.cells(subspace))) { + return false; + } + } + return true; +} + } diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h index a91bcd80731..c25ac7833ef 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h @@ -5,7 +5,9 @@ #include "i_tensor_attribute.h" #include "doc_vector_access.h" #include "prepare_result.h" +#include "subspace_type.h" #include "tensor_store.h" +#include "typed_cells_comparator.h" #include <vespa/searchlib/attribute/not_implemented_attribute.h> #include <vespa/vespalib/util/rcuvector.h> #include <vespa/document/update/tensor_update.h> @@ -29,6 +31,8 @@ protected: bool _is_dense; std::unique_ptr<vespalib::eval::Value> _emptyTensor; uint64_t _compactGeneration; // Generation when last compact occurred + SubspaceType _subspace_type; + TypedCellsComparator _comp; void checkTensorType(const vespalib::eval::Value &tensor) const; void setTensorRef(DocId docId, EntryRef ref); @@ -41,6 +45,7 @@ protected: EntryRef acquire_entry_ref(DocId doc_id) const noexcept { return _refVector.acquire_elem_ref(doc_id).load_acquire(); } bool onLoad(vespalib::Executor *executor) override; std::unique_ptr<AttributeSaver> onInitSave(vespalib::stringref fileName) override; + bool tensor_cells_are_unchanged(DocId docid, VectorBundle vectors) const; public: using RefCopyVector = vespalib::Array<EntryRef>; |