From ffe28ab39fe90676dd0f29f00c75bdb4014e2159 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Tue, 25 Feb 2020 14:32:41 +0000 Subject: Add proper memory management to hnsw index and integrate with dense tensor attribute. --- .../tensorattribute/tensorattribute_test.cpp | 50 +++++++++++++++++++++- .../searchlib/tensor/dense_tensor_attribute.cpp | 20 +++++++++ .../searchlib/tensor/dense_tensor_attribute.h | 4 +- .../src/vespa/searchlib/tensor/hnsw_index.cpp | 18 ++++++++ searchlib/src/vespa/searchlib/tensor/hnsw_index.h | 4 ++ .../searchlib/tensor/nearest_neighbor_index.h | 6 +++ .../vespa/searchlib/tensor/tensor_attribute.cpp | 8 ---- 7 files changed, 100 insertions(+), 10 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 5089743a54a..761d403ac6c 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ using vespalib::tensor::DenseTensor; using vespalib::tensor::Tensor; using DoubleVector = std::vector; +using generation_t = vespalib::GenerationHandler::generation_t; namespace vespalib::tensor { @@ -80,12 +82,16 @@ private: const DocVectorAccess& _vectors; EntryVector _adds; EntryVector _removes; + generation_t _transfer_gen; + generation_t _trim_gen; public: MockNearestNeighborIndex(const DocVectorAccess& vectors) : _vectors(vectors), _adds(), - _removes() + _removes(), + _transfer_gen(std::numeric_limits::max()), + _trim_gen(std::numeric_limits::max()) { } void clear() { @@ -111,6 +117,9 @@ public: EXPECT_EQUAL(exp_docid, _removes.back().first); EXPECT_EQUAL(exp_vector, _removes.back().second); } + generation_t get_transfer_gen() const { return _transfer_gen; } + generation_t get_trim_gen() const { return _trim_gen; } + void add_document(uint32_t docid) override { auto vector = _vectors.get_vector(docid).typify(); _adds.emplace_back(docid, DoubleVector(vector.begin(), vector.end())); @@ -119,6 +128,12 @@ public: auto vector = _vectors.get_vector(docid).typify(); _removes.emplace_back(docid, DoubleVector(vector.begin(), vector.end())); } + void transfer_hold_lists(generation_t current_gen) override { + _transfer_gen = current_gen; + } + void trim_hold_lists(generation_t first_used_gen) override { + _trim_gen = first_used_gen; + } std::vector find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) const override { (void) k; (void) vector; @@ -232,6 +247,10 @@ struct Fixture _attr->commit(); } + generation_t get_current_gen() const { + return _attr->getCurrentGeneration(); + } + search::attribute::Status getStatus() { _attr->commit(true); return _attr->getStatus(); @@ -531,4 +550,33 @@ TEST_F("onLoad() updates nearest neighbor index", DenseTensorAttributeMockIndex) index.expect_adds({{1, {3, 5}}, {2, {7, 9}}}); } + +TEST_F("commit() ensures transfer and trim hold lists on nearest neighbor index", DenseTensorAttributeMockIndex) +{ + auto& index = f.mock_index(); + TensorSpec spec = vec_2d(3, 5); + + f.set_tensor(1, spec); + generation_t gen_1 = f.get_current_gen(); + EXPECT_EQUAL(gen_1 - 1, index.get_transfer_gen()); + EXPECT_EQUAL(gen_1, index.get_trim_gen()); + + generation_t gen_2 = 0; + { + // Takes guard on gen_1 + auto guard = f._attr->makeReadGuard(false); + f.set_tensor(2, spec); + gen_2 = f.get_current_gen(); + EXPECT_GREATER(gen_2, gen_1); + EXPECT_EQUAL(gen_2 - 1, index.get_transfer_gen()); + EXPECT_EQUAL(gen_1, index.get_trim_gen()); + } + + f.set_tensor(3, spec); + generation_t gen_3 = f.get_current_gen(); + EXPECT_GREATER(gen_3, gen_2); + EXPECT_EQUAL(gen_3 - 1, index.get_transfer_gen()); + EXPECT_EQUAL(gen_3, index.get_trim_gen()); +} + TEST_MAIN() { TEST_RUN_ALL(); vespalib::unlink("test.dat"); } diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index 171340e07f1..229c6c2c34f 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -183,6 +183,26 @@ DenseTensorAttribute::getVersion() const return DENSE_TENSOR_ATTRIBUTE_VERSION; } +void +DenseTensorAttribute::onGenerationChange(generation_t next_gen) +{ + // TODO: Change onGenerationChange() to send current generation instead of next generation. + // This applies for entire attribute vector code. + TensorAttribute::onGenerationChange(next_gen); + if (_index) { + _index->transfer_hold_lists(next_gen - 1); + } +} + +void +DenseTensorAttribute::removeOldGenerations(generation_t first_used_gen) +{ + TensorAttribute::removeOldGenerations(first_used_gen); + if (_index) { + _index->trim_hold_lists(first_used_gen); + } +} + vespalib::tensor::TypedCells DenseTensorAttribute::get_vector(uint32_t docid) const { diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h index f9a8a81b56b..84a60376fe7 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h @@ -29,7 +29,7 @@ public: DenseTensorAttribute(vespalib::stringref baseFileName, const Config& cfg, const NearestNeighborIndexFactory& index_factory = DefaultNearestNeighborIndexFactory()); virtual ~DenseTensorAttribute(); - // Implements TensorAttribute + // Implements AttributeVector and ITensorAttribute uint32_t clearDoc(DocId docId) override; void setTensor(DocId docId, const Tensor &tensor) override; std::unique_ptr getTensor(DocId docId) const override; @@ -38,6 +38,8 @@ public: std::unique_ptr onInitSave(vespalib::stringref fileName) override; void compactWorst() override; uint32_t getVersion() const override; + void onGenerationChange(generation_t next_gen) override; + void removeOldGenerations(generation_t first_used_gen) override; // Implements DocVectorAccess vespalib::tensor::TypedCells get_vector(uint32_t docid) const override; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 54779408b37..e0b999bf5c5 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -310,6 +310,24 @@ HnswIndex::remove_document(uint32_t docid) _node_refs[docid].store_release(invalid); } +void +HnswIndex::transfer_hold_lists(generation_t current_gen) +{ + // Note: RcuVector transfers hold lists as part of reallocation based on current generation. + // We need to set the next generation here, as it is incremented on a higher level right after this call. + _node_refs.setGeneration(current_gen + 1); + _nodes.transferHoldLists(current_gen); + _links.transferHoldLists(current_gen); +} + +void +HnswIndex::trim_hold_lists(generation_t first_used_gen) +{ + _node_refs.removeOldGenerations(first_used_gen); + _nodes.trimHoldLists(first_used_gen); + _links.trimHoldLists(first_used_gen); +} + struct NeighborsByDocId { bool operator() (const NearestNeighborIndex::Neighbor &lhs, const NearestNeighborIndex::Neighbor &rhs) diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index a8129032c11..afbb3207fc2 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -133,9 +133,13 @@ public: const Config& config() const { return _cfg; } + // Implements NearestNeighborIndex void add_document(uint32_t docid) override; void remove_document(uint32_t docid) override; + void transfer_hold_lists(generation_t current_gen) override; + void trim_hold_lists(generation_t first_used_gen) override; std::vector find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) const override; + FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k) const; // TODO: Add support for generation handling and cleanup (transfer_hold_lists, trim_hold_lists) diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h index f933af0147e..003daa3185e 100644 --- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace search::tensor { @@ -13,6 +14,7 @@ namespace search::tensor { */ class NearestNeighborIndex { public: + using generation_t = vespalib::GenerationHandler::generation_t; struct Neighbor { uint32_t docid; double distance; @@ -24,9 +26,13 @@ public: virtual ~NearestNeighborIndex() {} virtual void add_document(uint32_t docid) = 0; virtual void remove_document(uint32_t docid) = 0; + virtual void transfer_hold_lists(generation_t current_gen) = 0; + virtual void trim_hold_lists(generation_t first_used_gen) = 0; + virtual std::vector find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) const = 0; + }; } diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp index 89b8e77e136..0b9628e6872 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp @@ -71,7 +71,6 @@ TensorAttribute::TensorAttribute(vespalib::stringref name, const Config &cfg, Te { } - TensorAttribute::~TensorAttribute() = default; const ITensorAttribute * @@ -93,7 +92,6 @@ TensorAttribute::clearDoc(DocId docId) return 0u; } - void TensorAttribute::onCommit() { @@ -110,7 +108,6 @@ TensorAttribute::onCommit() } } - void TensorAttribute::onUpdateStat() { @@ -126,7 +123,6 @@ TensorAttribute::onUpdateStat() total.allocatedBytesOnHold()); } - void TensorAttribute::removeOldGenerations(generation_t firstUsed) { @@ -141,7 +137,6 @@ TensorAttribute::onGenerationChange(generation_t generation) _tensorStore.transferHoldLists(generation - 1); } - bool TensorAttribute::addDoc(DocId &docId) { @@ -209,7 +204,6 @@ TensorAttribute::clearDocs(DocId lidLow, DocId lidLimit) } } - void TensorAttribute::onShrinkLidSpace() { @@ -220,14 +214,12 @@ TensorAttribute::onShrinkLidSpace() setNumDocs(committedDocIdLimit); } - uint32_t TensorAttribute::getVersion() const { return TENSOR_ATTRIBUTE_VERSION; } - TensorAttribute::RefCopyVector TensorAttribute::getRefCopy() const { -- cgit v1.2.3