diff options
Diffstat (limited to 'searchlib/src')
6 files changed, 61 insertions, 6 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index da58dd749ba..c01fc33767a 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -273,6 +273,9 @@ public: void reset_doom(vespalib::steady_time::duration time_to_doom) { _doom = std::make_unique<vespalib::FakeDoom>(time_to_doom); } + uint32_t get_active_nodes() const noexcept { + return index->get_active_nodes(); + } static constexpr bool is_single = std::is_same_v<IndexType, HnswIndex<HnswIndexType::SINGLE>>; }; @@ -284,24 +287,29 @@ TYPED_TEST_SUITE(HnswIndexTest, HnswIndexTestTypes); TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_select_neighbors) { this->init(false); + EXPECT_EQ(0, this->get_active_nodes()); this->add_document(1); this->expect_level_0(1, {}); + EXPECT_EQ(1, this->get_active_nodes()); this->add_document(2); this->expect_level_0(1, {2}); this->expect_level_0(2, {1}); + EXPECT_EQ(2, this->get_active_nodes()); this->add_document(3); this->expect_level_0(1, {2, 3}); this->expect_level_0(2, {1, 3}); this->expect_level_0(3, {1, 2}); + EXPECT_EQ(3, this->get_active_nodes()); this->add_document(4); this->expect_level_0(1, {2, 3, 4}); this->expect_level_0(2, {1, 3}); this->expect_level_0(3, {1, 2, 4}); this->expect_level_0(4, {1, 3}); + EXPECT_EQ(4, this->get_active_nodes()); this->add_document(5); this->expect_level_0(1, {2, 3, 4}); @@ -309,6 +317,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_selec this->expect_level_0(3, {1, 2, 4, 5}); this->expect_level_0(4, {1, 3}); this->expect_level_0(5, {2, 3}); + EXPECT_EQ(5, this->get_active_nodes()); this->add_document(6); this->expect_level_0(1, {2, 3, 4}); @@ -317,6 +326,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_selec this->expect_level_0(4, {1, 3}); this->expect_level_0(5, {2, 3, 6}); this->expect_level_0(6, {2, 5}); + EXPECT_EQ(6, this->get_active_nodes()); this->add_document(7); this->expect_level_0(1, {2, 3, 4}); @@ -326,6 +336,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_selec this->expect_level_0(5, {2, 3, 6}); this->expect_level_0(6, {2, 5}); this->expect_level_0(7, {2, 3}); + EXPECT_EQ(7, this->get_active_nodes()); this->expect_top_3(1, {1}); this->expect_top_3(2, {2, 1, 3}); @@ -352,47 +363,57 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_selec TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_and_removed) { this->init(false); + EXPECT_EQ(0, this->get_active_nodes()); this->add_document(1); this->expect_level_0(1, {}); this->expect_entry_point(1, 0); + EXPECT_EQ(1, this->get_active_nodes()); this->add_document(2); this->expect_level_0(1, {2}); this->expect_level_0(2, {1}); this->expect_entry_point(1, 0); + EXPECT_EQ(2, this->get_active_nodes()); this->add_document(3); this->expect_level_0(1, {2, 3}); this->expect_level_0(2, {1, 3}); this->expect_level_0(3, {1, 2}); this->expect_entry_point(1, 0); + EXPECT_EQ(3, this->get_active_nodes()); this->remove_document(2); this->expect_level_0(1, {3}); this->expect_level_0(3, {1}); this->expect_entry_point(1, 0); + EXPECT_EQ(2, this->get_active_nodes()); this->remove_document(1); this->expect_level_0(3, {}); this->expect_entry_point(3, 0); + EXPECT_EQ(1, this->get_active_nodes()); this->remove_document(3); this->expect_entry_point(0, -1); + EXPECT_EQ(0, this->get_active_nodes()); } TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic_select_neighbors) { this->init(true); + EXPECT_EQ(0, this->get_active_nodes()); this->add_document(1); this->expect_entry_point(1, 0); this->expect_level_0(1, {}); + EXPECT_EQ(1, this->get_active_nodes()); this->add_document(2); this->expect_entry_point(1, 0); this->expect_level_0(1, {2}); this->expect_level_0(2, {1}); + EXPECT_EQ(2, this->get_active_nodes()); // Doc 3 is also added to level 1 this->add_document(3, 1); @@ -402,6 +423,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_level_0(1, {2, 3}); this->expect_level_0(2, {1}); this->expect_levels(3, {{1}, {}}); + EXPECT_EQ(3, this->get_active_nodes()); // Doc 4 is closest to 1 and they are linked. // Doc 4 is NOT linked to 3 as the distance between 4 and 3 is greater than the distance between 3 and 1. @@ -412,6 +434,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_level_0(2, {1}); this->expect_levels(3, {{1}, {}}); this->expect_level_0(4, {1}); + EXPECT_EQ(4, this->get_active_nodes()); // Doc 5 is closest to 2 and they are linked. // The other docs are reachable via 2, and no other links are created. Same argument as with doc 4 above. @@ -422,6 +445,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_levels(3, {{1}, {}}); this->expect_level_0(4, {1}); this->expect_level_0(5, {2}); + EXPECT_EQ(5, this->get_active_nodes()); // Doc 6 is closest to 5 and they are linked. // Doc 6 is also linked to 2 as the distance between 6 and 2 is less than the distance between 2 and 5. @@ -434,6 +458,7 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_level_0(4, {1}); this->expect_level_0(5, {2, 6}); this->expect_levels(6, {{2, 5}, {3}, {}}); + EXPECT_EQ(6, this->get_active_nodes()); // Doc 7 is closest to 3 and they are linked. // Doc 7 is also linked to 6 as the distance between 7 and 6 is less than the distance between 6 and 3. @@ -447,13 +472,15 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_level_0(5, {2, 6}); this->expect_levels(6, {{2, 5, 7}, {3}, {}}); this->expect_level_0(7, {3, 6}); + EXPECT_EQ(7, this->get_active_nodes()); { Slime actualSlime; SlimeInserter inserter(actualSlime); this->index->get_state(inserter); const auto &root = actualSlime.get(); EXPECT_EQ(0, root["memory_usage"]["onHold"].asLong()); - EXPECT_EQ(8, root["nodes"].asLong()); + EXPECT_EQ(8, root["nodeid_limit"].asLong()); + EXPECT_EQ(7, root["nodes"].asLong()); EXPECT_EQ(7, root["valid_nodes"].asLong()); EXPECT_EQ(0, root["level_histogram"][0].asLong()); EXPECT_EQ(5, root["level_histogram"][1].asLong()); @@ -476,13 +503,15 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic this->expect_level_0(5, {2, 6}); this->expect_levels(6, {{2, 5, 7}, {3}, {}}); this->expect_level_0(7, {3, 6}); + EXPECT_EQ(6, this->get_active_nodes()); { Slime actualSlime; SlimeInserter inserter(actualSlime); this->index->get_state(inserter); const auto &root = actualSlime.get(); EXPECT_EQ(0, root["memory_usage"]["onHold"].asLong()); - EXPECT_EQ(8, root["nodes"].asLong()); + EXPECT_EQ(8, root["nodeid_limit"].asLong()); + EXPECT_EQ(6, root["nodes"].asLong()); EXPECT_EQ(6, root["valid_nodes"].asLong()); EXPECT_EQ(0, root["level_histogram"][0].asLong()); EXPECT_EQ(4, root["level_histogram"][1].asLong()); @@ -498,17 +527,21 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic TYPED_TEST(HnswIndexTest, manual_insert) { this->init(false); + EXPECT_EQ(0, this->get_active_nodes()); std::vector<uint32_t> nbl; HnswTestNode empty{nbl}; this->index->set_node(1, empty); + EXPECT_EQ(1, this->get_active_nodes()); this->index->set_node(2, empty); + EXPECT_EQ(2, this->get_active_nodes()); HnswTestNode three{{1,2}}; this->index->set_node(3, three); this->expect_level_0(1, {3}); this->expect_level_0(2, {3}); this->expect_level_0(3, {1,2}); + EXPECT_EQ(3, this->get_active_nodes()); this->expect_entry_point(1, 0); @@ -517,6 +550,7 @@ TYPED_TEST(HnswIndexTest, manual_insert) this->expect_entry_point(4, 1); this->expect_level_0(1, {3,4}); + EXPECT_EQ(4, this->get_active_nodes()); HnswTestNode five{{{1,2}, {4}}}; this->index->set_node(5, five); @@ -526,6 +560,7 @@ TYPED_TEST(HnswIndexTest, manual_insert) this->expect_levels(3, {{1,2}}); this->expect_levels(4, {{1}, {5}}); this->expect_levels(5, {{1,2}, {4}}); + EXPECT_EQ(5, this->get_active_nodes()); } TYPED_TEST(HnswIndexTest, memory_is_reclaimed_when_doing_changes_to_graph) @@ -540,12 +575,14 @@ TYPED_TEST(HnswIndexTest, memory_is_reclaimed_when_doing_changes_to_graph) this->expect_level_0(1, {2,3}); this->expect_level_0(2, {1,3}); this->expect_level_0(3, {1,2}); + EXPECT_EQ(3, this->get_active_nodes()); auto mem_2 = this->memory_usage(); // We should use more memory with larger link arrays and extra document. EXPECT_GT((mem_2.usedBytes() - mem_2.deadBytes()), (mem_1.usedBytes() - mem_1.deadBytes())); EXPECT_EQ(0, mem_2.allocatedBytesOnHold()); this->remove_document(2); + EXPECT_EQ(2, this->get_active_nodes()); size_t nodes_growth = 0; if constexpr (TestFixture::is_single) { this->expect_level_0(1, {3}); @@ -562,6 +599,7 @@ TYPED_TEST(HnswIndexTest, memory_is_reclaimed_when_doing_changes_to_graph) // We end up in the same state as before document 2 was added and effectively use the same amount of memory. EXPECT_EQ((mem_1.usedBytes() - mem_1.deadBytes() + nodes_growth), (mem_3.usedBytes() - mem_3.deadBytes())); EXPECT_EQ(0, mem_3.allocatedBytesOnHold()); + EXPECT_EQ(2, this->get_active_nodes()); } TYPED_TEST(HnswIndexTest, memory_is_put_on_hold_while_read_guard_is_held) @@ -957,16 +995,21 @@ TYPED_TEST(TwoPhaseTest, two_phase_add) this->expect_levels(4, {{1,2}, {5,6}}); this->expect_levels(5, {{2,3}, {4,6}}); this->expect_levels(6, {{1,3}, {4,5}, {}}); + EXPECT_EQ(6, this->get_active_nodes()); auto up = this->prepare_add(7, 1); // simulate things happening while 7 is in progress: this->add_document(8); // added + EXPECT_EQ(7, this->get_active_nodes()); this->remove_document(1); // removed this->remove_document(5); + EXPECT_EQ(5, this->get_active_nodes()); this->vectors.set(5, {8, 14}); // updated and moved this->add_document(5, 2); this->add_document(9, 1); // added + EXPECT_EQ(7, this->get_active_nodes()); this->complete_add(7, std::move(up)); + EXPECT_EQ(8, this->get_active_nodes()); auto& id_mapping = this->index->get_id_mapping(); auto nodeids = id_mapping.get_ids(7); diff --git a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp index b64d87d24ea..efaa8ca0171 100644 --- a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp @@ -167,6 +167,7 @@ public: void expect_copy_as_populated() const { EXPECT_EQ(copy.size(), 7); + EXPECT_EQ(4, copy.get_active_nodes()); auto entry = copy.get_entry_node(); EXPECT_EQ(entry.nodeid, 2); EXPECT_EQ(entry.level, 1); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp index 56459d47dab..16e2af0ad97 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp @@ -11,6 +11,7 @@ template <HnswIndexType type> HnswGraph<type>::HnswGraph() : nodes(), nodes_size(1u), + active_nodes(0u), levels_store(HnswIndex<type>::make_default_level_array_store_config(), {}), links_store(HnswIndex<type>::make_default_link_array_store_config(), {}), entry_nodeid_and_level() @@ -40,6 +41,9 @@ HnswGraph<type>::make_node(uint32_t nodeid, uint32_t docid, uint32_t subspace, u if (nodeid >= nodes_size.load(std::memory_order_relaxed)) { nodes_size.store(nodeid + 1, std::memory_order_release); } + if (levels_ref.valid()) { + set_active_nodes(get_active_nodes() + 1); + } return levels_ref; } @@ -58,6 +62,7 @@ HnswGraph<type>::remove_node(uint32_t nodeid) auto old_links_ref = levels[i].load_relaxed(); links_store.remove(old_links_ref); } + set_active_nodes(get_active_nodes() - 1); if (nodeid + 1 == nodes_size.load(std::memory_order_relaxed)) { trim_nodes_size(); } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h index 52d227bead1..a1a9e9632be 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h @@ -47,6 +47,7 @@ struct HnswGraph { NodeVector nodes; std::atomic<uint32_t> nodes_size; + std::atomic<uint32_t> active_nodes; LevelArrayStore levels_store; LinkArrayStore links_store; @@ -169,6 +170,8 @@ struct HnswGraph { } size_t size() const { return nodes_size.load(std::memory_order_acquire); } + uint32_t get_active_nodes() const noexcept { return active_nodes.load(std::memory_order_relaxed); } + void set_active_nodes(uint32_t value) noexcept { active_nodes.store(value, std::memory_order_relaxed); } struct Histograms { std::vector<uint32_t> level_histogram; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index f0dcbe30317..bf62fcdaa23 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -609,8 +609,8 @@ HnswIndex<type>::prepare_add_document(uint32_t docid, VectorBundle vectors, vespalib::GenerationHandler::Guard read_guard) const { - uint32_t max_nodes = _graph.nodes_size.load(std::memory_order_acquire); - if (max_nodes < _cfg.min_size_before_two_phase()) { + uint32_t active_nodes = _graph.get_active_nodes(); + if (active_nodes < _cfg.min_size_before_two_phase()) { // the first documents added will do all work in write thread // to ensure they are linked together: return std::make_unique<PreparedFirstAddDoc>(); @@ -628,7 +628,7 @@ HnswIndex<type>::complete_add_document(uint32_t docid, std::unique_ptr<PrepareRe internal_complete_add(docid, *prepared); } else { // we expect this for the first documents added, so no warning for them - if (_graph.nodes.size() > 1.25 * _cfg.min_size_before_two_phase()) { + if (_graph.get_active_nodes() > 1.25 * _cfg.min_size_before_two_phase()) { LOG(warning, "complete_add_document(%u) called with invalid prepare_result %s/%u", docid, (prepared ? "valid ptr" : "nullptr"), (prepared ? prepared->docid : 0u)); } @@ -824,7 +824,8 @@ HnswIndex<type>::get_state(const vespalib::slime::Inserter& inserter) const StateExplorerUtils::memory_usage_to_slime(_graph.nodes.getMemoryUsage(), memUsageObj.setObject("nodes")); StateExplorerUtils::memory_usage_to_slime(_graph.levels_store.getMemoryUsage(), memUsageObj.setObject("levels")); StateExplorerUtils::memory_usage_to_slime(_graph.links_store.getMemoryUsage(), memUsageObj.setObject("links")); - object.setLong("nodes", _graph.size()); + object.setLong("nodeid_limit", _graph.size()); + object.setLong("nodes", _graph.get_active_nodes()); auto& histogram_array = object.setArray("level_histogram"); auto& links_hst_array = object.setArray("level_0_links_histogram"); auto histograms = _graph.histograms(); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index eeaa30f55df..d94b6584c35 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -240,6 +240,8 @@ public: uint32_t get_entry_nodeid() const { return _graph.get_entry_node().nodeid; } int32_t get_entry_level() const { return _graph.get_entry_node().level; } + uint32_t get_active_nodes() const noexcept { return _graph.get_active_nodes(); } + // Should only be used by unit tests. HnswTestNode get_node(uint32_t nodeid) const; void set_node(uint32_t nodeid, const HnswTestNode &node); |