Merge pull request #12337 from vespa-engine/geirst/memory-management-in-hnsw-index

Add proper memory management to hnsw index and integrate with dense t…
author: Arne H Juul <arnej27959@users.noreply.github.com> 2020-02-26 12:33:01 +0100
committer: GitHub <noreply@github.com> 2020-02-26 12:33:01 +0100
commit: 9432751c53285669b9a3b3ab605d673459bcbf37 (patch)
tree: cd37e55362be3466f5d403f443093862088cecc7 /searchlib
parent: 16f36db67f619098dc3c97f60d697037face1ce2 (diff)
parent: 8cd46748e4e326e6f96a9060abc77b112c7642f0 (diff)
8 files changed, 204 insertions, 18 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 5089743a54a..1f3ad7c2fec 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -6,6 +6,7 @@
 #include <vespa/eval/tensor/tensor.h>
 #include <vespa/fastos/file.h>
 #include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attribute_read_guard.h>
 #include <vespa/searchlib/tensor/default_nearest_neighbor_index_factory.h>
 #include <vespa/searchlib/tensor/dense_tensor_attribute.h>
 #include <vespa/searchlib/tensor/doc_vector_access.h>
@@ -41,6 +42,7 @@ using vespalib::tensor::DenseTensor;
 using vespalib::tensor::Tensor;
 
 using DoubleVector = std::vector<double>;
+using generation_t = vespalib::GenerationHandler::generation_t;
 
 namespace vespalib::tensor {
 
@@ -80,12 +82,16 @@ private:
     const DocVectorAccess& _vectors;
     EntryVector _adds;
     EntryVector _removes;
+    generation_t _transfer_gen;
+    generation_t _trim_gen;
 
 public:
     MockNearestNeighborIndex(const DocVectorAccess& vectors)
         : _vectors(vectors),
           _adds(),
-          _removes()
+          _removes(),
+          _transfer_gen(std::numeric_limits<generation_t>::max()),
+          _trim_gen(std::numeric_limits<generation_t>::max())
     {
     }
     void clear() {
@@ -111,6 +117,9 @@ public:
         EXPECT_EQUAL(exp_docid, _removes.back().first);
         EXPECT_EQUAL(exp_vector, _removes.back().second);
     }
+    generation_t get_transfer_gen() const { return _transfer_gen; }
+    generation_t get_trim_gen() const { return _trim_gen; }
+
     void add_document(uint32_t docid) override {
         auto vector = _vectors.get_vector(docid).typify<double>();
         _adds.emplace_back(docid, DoubleVector(vector.begin(), vector.end()));
@@ -119,6 +128,15 @@ public:
         auto vector = _vectors.get_vector(docid).typify<double>();
         _removes.emplace_back(docid, DoubleVector(vector.begin(), vector.end()));
     }
+    void transfer_hold_lists(generation_t current_gen) override {
+        _transfer_gen = current_gen;
+    }
+    void trim_hold_lists(generation_t first_used_gen) override {
+        _trim_gen = first_used_gen;
+    }
+    vespalib::MemoryUsage memory_usage() const override {
+        return vespalib::MemoryUsage();
+    }
     std::vector<Neighbor> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) const override {
         (void) k;
         (void) vector;
@@ -232,6 +250,10 @@ struct Fixture
         _attr->commit();
     }
 
+    generation_t get_current_gen() const {
+        return _attr->getCurrentGeneration();
+    }
+
     search::attribute::Status getStatus() {
         _attr->commit(true);
         return _attr->getStatus();
@@ -531,4 +553,33 @@ TEST_F("onLoad() updates nearest neighbor index", DenseTensorAttributeMockIndex)
     index.expect_adds({{1, {3, 5}}, {2, {7, 9}}});
 }
 
+
+TEST_F("commit() ensures transfer and trim hold lists on nearest neighbor index", DenseTensorAttributeMockIndex)
+{
+    auto& index = f.mock_index();
+    TensorSpec spec = vec_2d(3, 5);
+
+    f.set_tensor(1, spec);
+    generation_t gen_1 = f.get_current_gen();
+    EXPECT_EQUAL(gen_1 - 1, index.get_transfer_gen());
+    EXPECT_EQUAL(gen_1, index.get_trim_gen());
+
+    generation_t gen_2 = 0;
+    {
+        // Takes guard on gen_1
+        auto guard = f._attr->makeReadGuard(false);
+        f.set_tensor(2, spec);
+        gen_2 = f.get_current_gen();
+        EXPECT_GREATER(gen_2, gen_1);
+        EXPECT_EQUAL(gen_2 - 1, index.get_transfer_gen());
+        EXPECT_EQUAL(gen_1, index.get_trim_gen());
+    }
+
+    f.set_tensor(3, spec);
+    generation_t gen_3 = f.get_current_gen();
+    EXPECT_GREATER(gen_3, gen_2);
+    EXPECT_EQUAL(gen_3 - 1, index.get_transfer_gen());
+    EXPECT_EQUAL(gen_3, index.get_trim_gen());
+}
+
 TEST_MAIN() { TEST_RUN_ALL(); vespalib::unlink("test.dat"); }
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 52f45860c1e..37c4d02017f 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -6,11 +6,14 @@
 #include <vespa/searchlib/tensor/hnsw_index.h>
 #include <vespa/searchlib/tensor/random_level_generator.h>
 #include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/vespalib/util/generationhandler.h>
 #include <vector>
 
 #include <vespa/log/log.h>
 LOG_SETUP("hnsw_index_test");
 
+using vespalib::GenerationHandler;
+using vespalib::MemoryUsage;
 using namespace search::tensor;
 
 template <typename FloatType>
@@ -49,11 +52,13 @@ class HnswIndexTest : public ::testing::Test {
 public:
     FloatVectors vectors;
     LevelGenerator* level_generator;
+    GenerationHandler gen_handler;
     HnswIndexUP index;
 
     HnswIndexTest()
         : vectors(),
           level_generator(),
+          gen_handler(),
           index()
     {
         vectors.set(1, {2, 2}).set(2, {3, 2}).set(3, {2, 3})
@@ -70,9 +75,23 @@ public:
     void add_document(uint32_t docid, uint32_t max_level = 0) {
         level_generator->level = max_level;
         index->add_document(docid);
+        commit();
     }
     void remove_document(uint32_t docid) {
         index->remove_document(docid);
+        commit();
+    }
+    void commit() {
+        index->transfer_hold_lists(gen_handler.getCurrentGeneration());
+        gen_handler.incGeneration();
+        gen_handler.updateFirstUsedGeneration();
+        index->trim_hold_lists(gen_handler.getFirstUsedGeneration());
+    }
+    GenerationHandler::Guard take_read_guard() {
+        return gen_handler.takeGuard();
+    }
+    MemoryUsage memory_usage() const {
+        return index->memory_usage();
     }
     void expect_entry_point(uint32_t exp_docid, uint32_t exp_level) {
         EXPECT_EQ(exp_docid, index->get_entry_docid());
@@ -83,6 +102,10 @@ public:
         ASSERT_EQ(1, node.size());
         EXPECT_EQ(exp_links, node.level(0));
     }
+    void expect_empty_level_0(uint32_t docid) {
+        auto node = index->get_node(docid);
+        EXPECT_TRUE(node.empty());
+    }
     void expect_levels(uint32_t docid, const HnswNode::LevelArray& exp_levels) {
         auto act_node = index->get_node(docid);
         ASSERT_EQ(exp_levels.size(), act_node.size());
@@ -299,5 +322,50 @@ TEST_F(HnswIndexTest, manual_insert)
     expect_levels(5, {{1,2}, {4}});
 }
 
+TEST_F(HnswIndexTest, memory_is_reclaimed_when_doing_changes_to_graph)
+{
+    init(true);
+
+    add_document(1);
+    add_document(3);
+    auto mem_1 = memory_usage();
+
+    add_document(2);
+    expect_level_0(1, {2,3});
+    expect_level_0(2, {1,3});
+    expect_level_0(3, {1,2});
+    auto mem_2 = memory_usage();
+    // We should use more memory with larger link arrays and extra document.
+    EXPECT_GT((mem_2.usedBytes() - mem_2.deadBytes()), (mem_1.usedBytes() - mem_1.deadBytes()));
+    EXPECT_EQ(0, mem_2.allocatedBytesOnHold());
+
+    remove_document(2);
+    expect_level_0(1, {3});
+    expect_empty_level_0(2);
+    expect_level_0(3, {1});
+    auto mem_3 = memory_usage();
+    // We end up in the same state as before document 2 was added and effectively use the same amount of memory.
+    EXPECT_EQ((mem_1.usedBytes() - mem_1.deadBytes()), (mem_3.usedBytes() - mem_3.deadBytes()));
+    EXPECT_EQ(0, mem_3.allocatedBytesOnHold());
+}
+
+TEST_F(HnswIndexTest, memory_is_put_on_hold_while_read_guard_is_held)
+{
+    init(true);
+
+    add_document(1);
+    add_document(3);
+    {
+        auto guard = take_read_guard();
+        add_document(2);
+        auto mem = memory_usage();
+        // As read guard is held memory to reclaim is put on hold
+        EXPECT_GT(mem.allocatedBytesOnHold(), 0);
+    }
+    commit();
+    auto mem = memory_usage();
+    EXPECT_EQ(0, mem.allocatedBytesOnHold());
+}
+
 GTEST_MAIN_RUN_ALL_TESTS()
 
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index 171340e07f1..229c6c2c34f 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -183,6 +183,26 @@ DenseTensorAttribute::getVersion() const
     return DENSE_TENSOR_ATTRIBUTE_VERSION;
 }
 
+void
+DenseTensorAttribute::onGenerationChange(generation_t next_gen)
+{
+    // TODO: Change onGenerationChange() to send current generation instead of next generation.
+    //       This applies for entire attribute vector code.
+    TensorAttribute::onGenerationChange(next_gen);
+    if (_index) {
+        _index->transfer_hold_lists(next_gen - 1);
+    }
+}
+
+void
+DenseTensorAttribute::removeOldGenerations(generation_t first_used_gen)
+{
+    TensorAttribute::removeOldGenerations(first_used_gen);
+    if (_index) {
+        _index->trim_hold_lists(first_used_gen);
+    }
+}
+
 vespalib::tensor::TypedCells
 DenseTensorAttribute::get_vector(uint32_t docid) const
 {
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
index f9a8a81b56b..84a60376fe7 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
@@ -29,7 +29,7 @@ public:
     DenseTensorAttribute(vespalib::stringref baseFileName, const Config& cfg,
                          const NearestNeighborIndexFactory& index_factory = DefaultNearestNeighborIndexFactory());
     virtual ~DenseTensorAttribute();
-    // Implements TensorAttribute
+    // Implements AttributeVector and ITensorAttribute
     uint32_t clearDoc(DocId docId) override;
     void setTensor(DocId docId, const Tensor &tensor) override;
     std::unique_ptr<Tensor> getTensor(DocId docId) const override;
@@ -38,6 +38,8 @@ public:
     std::unique_ptr<AttributeSaver> onInitSave(vespalib::stringref fileName) override;
     void compactWorst() override;
     uint32_t getVersion() const override;
+    void onGenerationChange(generation_t next_gen) override;
+    void removeOldGenerations(generation_t first_used_gen) override;
 
     // Implements DocVectorAccess
     vespalib::tensor::TypedCells get_vector(uint32_t docid) const override;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 54779408b37..467e41d83fc 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -9,6 +9,8 @@
 
 namespace search::tensor {
 
+using search::datastore::EntryRef;
+
 namespace {
 
 // TODO: Move this to MemoryAllocator, with name PAGE_SIZE.
@@ -44,6 +46,9 @@ HnswIndex::max_links_for_level(uint32_t level) const
 uint32_t
 HnswIndex::make_node_for_document(uint32_t docid)
 {
+    // A document cannot be added twice.
+    assert(!_node_refs[docid].load_acquire().valid());
+
     uint32_t max_level = _level_generator->max_level();
     // TODO: Add capping on num_levels
     uint32_t num_levels = max_level + 1;
@@ -54,6 +59,15 @@ HnswIndex::make_node_for_document(uint32_t docid)
     return max_level;
 }
 
+void
+HnswIndex::remove_node_for_document(uint32_t docid)
+{
+    auto node_ref = _node_refs[docid].load_acquire();
+    _nodes.remove(node_ref);
+    EntryRef invalid;
+    _node_refs[docid].store_release(invalid);
+}
+
 HnswIndex::LevelArrayRef
 HnswIndex::get_level_array(uint32_t docid) const
 {
@@ -72,10 +86,12 @@ HnswIndex::get_link_array(uint32_t docid, uint32_t level) const
 void
 HnswIndex::set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& links)
 {
-    auto links_ref = _links.add(links);
+    auto new_links_ref = _links.add(links);
     auto node_ref = _node_refs[docid].load_acquire();
     auto levels = _nodes.get_writable(node_ref);
-    levels[level].store_release(links_ref);
+    auto old_links_ref = levels[level].load_acquire();
+    levels[level].store_release(new_links_ref);
+    _links.remove(old_links_ref);
 }
 
 bool
@@ -248,8 +264,6 @@ HnswIndex::add_document(uint32_t docid)
 {
     auto input = get_vector(docid);
     _node_refs.ensure_size(docid + 1, AtomicEntryRef());
-    // A document cannot be added twice.
-    assert(!_node_refs[docid].load_acquire().valid());
     int level = make_node_for_document(docid);
     if (_entry_docid == 0) {
         _entry_docid = docid;
@@ -306,8 +320,35 @@ HnswIndex::remove_document(uint32_t docid)
         _entry_docid = 0;
         _entry_level = -1;
     }
-    search::datastore::EntryRef invalid;
-    _node_refs[docid].store_release(invalid);
+    remove_node_for_document(docid);
+}
+
+void
+HnswIndex::transfer_hold_lists(generation_t current_gen)
+{
+    // Note: RcuVector transfers hold lists as part of reallocation based on current generation.
+    //       We need to set the next generation here, as it is incremented on a higher level right after this call.
+    _node_refs.setGeneration(current_gen + 1);
+    _nodes.transferHoldLists(current_gen);
+    _links.transferHoldLists(current_gen);
+}
+
+void
+HnswIndex::trim_hold_lists(generation_t first_used_gen)
+{
+    _node_refs.removeOldGenerations(first_used_gen);
+    _nodes.trimHoldLists(first_used_gen);
+    _links.trimHoldLists(first_used_gen);
+}
+
+vespalib::MemoryUsage
+HnswIndex::memory_usage() const
+{
+    vespalib::MemoryUsage result;
+    result.merge(_node_refs.getMemoryUsage());
+    result.merge(_nodes.getMemoryUsage());
+    result.merge(_links.getMemoryUsage());
+    return result;
 }
 
 struct NeighborsByDocId {
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index a8129032c11..89c45d6b50c 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -95,6 +95,7 @@ protected:
 
     uint32_t max_links_for_level(uint32_t level) const;
     uint32_t make_node_for_document(uint32_t docid);
+    void remove_node_for_document(uint32_t docid);
     LevelArrayRef get_level_array(uint32_t docid) const;
     LinkArrayRef get_link_array(uint32_t docid, uint32_t level) const;
     void set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& links);
@@ -133,12 +134,15 @@ public:
 
     const Config& config() const { return _cfg; }
 
+    // Implements NearestNeighborIndex
     void add_document(uint32_t docid) override;
     void remove_document(uint32_t docid) override;
+    void transfer_hold_lists(generation_t current_gen) override;
+    void trim_hold_lists(generation_t first_used_gen) override;
+    vespalib::MemoryUsage memory_usage() const override;
     std::vector<Neighbor> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) const override;
-    FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k) const;
 
-    // TODO: Add support for generation handling and cleanup (transfer_hold_lists, trim_hold_lists)
+    FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k) const;
 
     uint32_t get_entry_docid() const { return _entry_docid; }
     uint32_t get_entry_level() const { return _entry_level; }
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index f933af0147e..bd98623bdd3 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -5,6 +5,8 @@
 #include <cstdint>
 #include <vector>
 #include <vespa/eval/tensor/dense/typed_cells.h>
+#include <vespa/vespalib/util/generationhandler.h>
+#include <vespa/vespalib/util/memoryusage.h>
 
 namespace search::tensor {
 
@@ -13,6 +15,7 @@ namespace search::tensor {
  */
 class NearestNeighborIndex {
 public:
+    using generation_t = vespalib::GenerationHandler::generation_t;
     struct Neighbor {
         uint32_t docid;
         double distance;
@@ -24,9 +27,14 @@ public:
     virtual ~NearestNeighborIndex() {}
     virtual void add_document(uint32_t docid) = 0;
     virtual void remove_document(uint32_t docid) = 0;
+    virtual void transfer_hold_lists(generation_t current_gen) = 0;
+    virtual void trim_hold_lists(generation_t first_used_gen) = 0;
+    virtual vespalib::MemoryUsage memory_usage() const = 0;
+
     virtual std::vector<Neighbor> find_top_k(uint32_t k,
                                              vespalib::tensor::TypedCells vector,
                                              uint32_t explore_k) const = 0;
+
 };
 
 }
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
index 89b8e77e136..0b9628e6872 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
@@ -71,7 +71,6 @@ TensorAttribute::TensorAttribute(vespalib::stringref name, const Config &cfg, Te
 {
 }
 
-
 TensorAttribute::~TensorAttribute() = default;
 
 const ITensorAttribute *
@@ -93,7 +92,6 @@ TensorAttribute::clearDoc(DocId docId)
     return 0u;
 }
 
-
 void
 TensorAttribute::onCommit()
 {
@@ -110,7 +108,6 @@ TensorAttribute::onCommit()
     }
 }
 
-
 void
 TensorAttribute::onUpdateStat()
 {
@@ -126,7 +123,6 @@ TensorAttribute::onUpdateStat()
                            total.allocatedBytesOnHold());
 }
 
-
 void
 TensorAttribute::removeOldGenerations(generation_t firstUsed)
 {
@@ -141,7 +137,6 @@ TensorAttribute::onGenerationChange(generation_t generation)
     _tensorStore.transferHoldLists(generation - 1);
 }
 
-
 bool
 TensorAttribute::addDoc(DocId &docId)
 {
@@ -209,7 +204,6 @@ TensorAttribute::clearDocs(DocId lidLow, DocId lidLimit)
     }
 }
 
-
 void
 TensorAttribute::onShrinkLidSpace()
 {
@@ -220,14 +214,12 @@ TensorAttribute::onShrinkLidSpace()
     setNumDocs(committedDocIdLimit);
 }
 
-
 uint32_t
 TensorAttribute::getVersion() const
 {
     return TENSOR_ATTRIBUTE_VERSION;
 }
 
-
 TensorAttribute::RefCopyVector
 TensorAttribute::getRefCopy() const
 {
author	Arne H Juul <arnej27959@users.noreply.github.com>	2020-02-26 12:33:01 +0100
committer	GitHub <noreply@github.com>	2020-02-26 12:33:01 +0100
commit	9432751c53285669b9a3b3ab605d673459bcbf37 (patch)
tree	cd37e55362be3466f5d403f443093862088cecc7 /searchlib
parent	16f36db67f619098dc3c97f60d697037face1ce2 (diff)
parent	8cd46748e4e326e6f96a9060abc77b112c7642f0 (diff)