diff options
Diffstat (limited to 'searchlib')
8 files changed, 7 insertions, 90 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 72b2f1e320a..bf0b74b0003 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -157,12 +157,6 @@ public: void expect_empty_add() const { EXPECT_TRUE(_adds.empty()); } - void expect_empty_prepare_add() const { - EXPECT_TRUE(_prepare_adds.empty()); - } - void expect_empty_complete_add() const { - EXPECT_TRUE(_complete_adds.empty()); - } void expect_entry(uint32_t exp_docid, const DoubleVector& exp_vector, const EntryVector& entries) const { EXPECT_EQUAL(1u, entries.size()); EXPECT_EQUAL(exp_docid, entries.back().first); @@ -887,30 +881,6 @@ TEST_F("nearest neighbor index can be updated in two phases", DenseTensorAttribu } } -TEST_F("nearest neighbor index is NOT updated when tensor value is unchanged", DenseTensorAttributeMockIndex) -{ - auto& index = f.mock_index(); - { - auto vec_a = vec_2d(3, 5); - auto prepare_result = f.prepare_set_tensor(1, vec_a); - index.expect_prepare_add(1, {3, 5}); - f.complete_set_tensor(1, vec_a, std::move(prepare_result)); - f.assertGetTensor(vec_a, 1); - index.expect_complete_add(1, {3, 5}); - } - index.clear(); - { - // Replaces previous value with the same value - auto vec_b = vec_2d(3, 5); - auto prepare_result = f.prepare_set_tensor(1, vec_b); - EXPECT_TRUE(prepare_result.get() == nullptr); - index.expect_empty_prepare_add(); - f.complete_set_tensor(1, vec_b, std::move(prepare_result)); - f.assertGetTensor(vec_b, 1); - index.expect_empty_complete_add(); - } -} - TEST_F("clearDoc() updates nearest neighbor index", DenseTensorAttributeMockIndex) { auto& index = f.mock_index(); diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index c713b3ef335..2fdb73fcf96 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -8,9 +8,9 @@ #include "tensor_attribute.hpp" #include <vespa/eval/eval/value.h> #include <vespa/fastlib/io/bufferedfile.h> -#include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/attribute/load_utils.h> #include <vespa/searchlib/attribute/readerbase.h> +#include <vespa/searchcommon/attribute/config.h> #include <vespa/vespalib/data/slime/inserter.h> #include <vespa/vespalib/util/cpu_usage.h> #include <vespa/vespalib/util/lambdatask.h> @@ -102,16 +102,10 @@ BlobSequenceReader::is_present() { } -bool -DenseTensorAttribute::tensor_is_unchanged(DocId docid, const vespalib::eval::Value& new_tensor) const -{ - auto old_tensor = extract_cells_ref(docid); - return _comp.equals(old_tensor, new_tensor.cells()); -} - void DenseTensorAttribute::internal_set_tensor(DocId docid, const vespalib::eval::Value& tensor) { + checkTensorType(tensor); consider_remove_from_index(docid); EntryRef ref = _denseTensorStore.setTensor(tensor); setTensorRef(docid, ref); @@ -158,8 +152,7 @@ DenseTensorAttribute::DenseTensorAttribute(vespalib::stringref baseFileName, con const NearestNeighborIndexFactory& index_factory) : TensorAttribute(baseFileName, cfg, _denseTensorStore), _denseTensorStore(cfg.tensorType(), get_memory_allocator()), - _index(), - _comp(cfg.tensorType()) + _index() { if (cfg.hnsw_index_params().has_value()) { auto tensor_type = cfg.tensorType(); @@ -187,7 +180,6 @@ DenseTensorAttribute::clearDoc(DocId docId) void DenseTensorAttribute::setTensor(DocId docId, const vespalib::eval::Value &tensor) { - checkTensorType(tensor); internal_set_tensor(docId, tensor); if (_index) { _index->add_document(docId); @@ -197,26 +189,16 @@ DenseTensorAttribute::setTensor(DocId docId, const vespalib::eval::Value &tensor std::unique_ptr<PrepareResult> DenseTensorAttribute::prepare_set_tensor(DocId docid, const vespalib::eval::Value& tensor) const { - checkTensorType(tensor); if (_index) { - if (tensor_is_unchanged(docid, tensor)) { - // Don't make changes to the nearest neighbor index when the inserted tensor is unchanged. - // With this optimization we avoid doing unnecessary costly work, first removing the vector point, then inserting the same point. - return {}; - } return _index->prepare_add_document(docid, tensor.cells(), getGenerationHandler().takeGuard()); } - return {}; + return std::unique_ptr<PrepareResult>(); } void DenseTensorAttribute::complete_set_tensor(DocId docid, const vespalib::eval::Value& tensor, std::unique_ptr<PrepareResult> prepare_result) { - if (_index && !prepare_result) { - // The tensor is unchanged. - return; - } internal_set_tensor(docid, tensor); if (_index) { _index->complete_add_document(docid, std::move(prepare_result)); diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h index 1138a4f4433..da7a88af1be 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h @@ -6,7 +6,6 @@ #include "dense_tensor_store.h" #include "doc_vector_access.h" #include "tensor_attribute.h" -#include "typed_cells_comparator.h" #include <memory> namespace search::tensor { @@ -21,9 +20,7 @@ class DenseTensorAttribute : public TensorAttribute, public DocVectorAccess { private: DenseTensorStore _denseTensorStore; std::unique_ptr<NearestNeighborIndex> _index; - TypedCellsComparator _comp; - bool tensor_is_unchanged(DocId docid, const vespalib::eval::Value& new_tensor) const; void internal_set_tensor(DocId docid, const vespalib::eval::Value& tensor); void consider_remove_from_index(DocId docid); vespalib::MemoryUsage update_stat() override; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 2ee1b268449..e82f31df38e 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -436,7 +436,7 @@ HnswIndex::prepare_add_document(uint32_t docid, if (max_nodes < _cfg.min_size_before_two_phase()) { // the first documents added will do all work in write thread // to ensure they are linked together: - return std::make_unique<PreparedFirstAddDoc>(); + return std::unique_ptr<PrepareResult>(); } PreparedAddDoc op = internal_prepare_add(docid, vector, std::move(read_guard)); return std::make_unique<PreparedAddDoc>(std::move(op)); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 3f5a9d514ed..72a10724ff1 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -152,8 +152,6 @@ protected: const BitVector *filter, uint32_t explore_k, double distance_threshold) const; - struct PreparedFirstAddDoc : public PrepareResult {}; - struct PreparedAddDoc : public PrepareResult { using ReadGuard = vespalib::GenerationHandler::Guard; uint32_t docid; diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp index 78c58e86a3b..add5184c4eb 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp @@ -135,7 +135,7 @@ TensorAttribute::addDoc(DocId &docId) } void -TensorAttribute::checkTensorType(const vespalib::eval::Value &tensor) const +TensorAttribute::checkTensorType(const vespalib::eval::Value &tensor) { const ValueType &fieldTensorType = getConfig().tensorType(); const ValueType &tensorType = tensor.type(); diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h index c8aa42c6133..ae6a4a302ea 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h @@ -32,7 +32,7 @@ protected: template <typename RefType> void doCompactWorst(); - void checkTensorType(const vespalib::eval::Value &tensor) const; + void checkTensorType(const vespalib::eval::Value &tensor); void setTensorRef(DocId docId, EntryRef ref); virtual vespalib::MemoryUsage update_stat(); virtual vespalib::MemoryUsage memory_usage() const; diff --git a/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h b/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h deleted file mode 100644 index d1c890be961..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/eval/eval/cell_type.h> -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/eval/eval/value_type.h> -#include <cstring> - -namespace search::tensor { - -/** - * Comparator used to compare two vespalib::eval::TypedCells instances. - * - * The caller must first validate that they are of the same vespalib::eval::ValueType. - */ -class TypedCellsComparator { -private: - size_t _mem_size; - -public: - TypedCellsComparator(const vespalib::eval::ValueType& type) - : _mem_size(vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), type.dense_subspace_size())) - {} - bool equals(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const { - return std::memcmp(lhs.data, rhs.data, _mem_size) == 0; - } -}; - -} |