diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-02-07 13:52:02 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-02-07 13:52:02 +0000 |
commit | eb9ad2afa3d0a687532d80cf233f69a8cc7d2560 (patch) | |
tree | 3e620fdfcb7990f8ec2de865c96c541c379047e4 /searchlib/src | |
parent | ad08f1b293cdda3c0863feb2700302d9c664b44e (diff) |
implement remove_document
Diffstat (limited to 'searchlib/src')
4 files changed, 68 insertions, 2 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 76548a025a0..634569cbc8c 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -67,6 +67,9 @@ public: level_generator.level = max_level; index->add_document(docid); } + void remove_document(uint32_t docid) { + index->remove_document(docid); + } void expect_entry_point(uint32_t exp_docid, uint32_t exp_level) { EXPECT_EQ(exp_docid, index->get_entry_docid()); EXPECT_EQ(exp_level, index->get_entry_level()); @@ -131,6 +134,38 @@ TEST_F(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_select_ne expect_level_0(7, {2, 3}); } +TEST_F(HnswIndexTest, 2d_vectors_inserted_and_removed) +{ + init(false); + + add_document(1); + expect_level_0(1, {}); + expect_entry_point(1, 0); + + add_document(2); + expect_level_0(1, {2}); + expect_level_0(2, {1}); + expect_entry_point(1, 0); + + add_document(3); + expect_level_0(1, {2, 3}); + expect_level_0(2, {1, 3}); + expect_level_0(3, {1, 2}); + expect_entry_point(1, 0); + + remove_document(2); + expect_level_0(1, {3}); + expect_level_0(3, {1}); + expect_entry_point(1, 0); + + remove_document(1); + expect_level_0(3, {}); + expect_entry_point(3, 0); + + remove_document(3); + expect_entry_point(0, -1); +} + TEST_F(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic_select_neighbors) { init(true); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 4992a26d8b4..345f7f551a6 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -140,8 +140,27 @@ template <typename FloatType> void HnswIndex<FloatType>::remove_document(uint32_t docid) { - (void) docid; - // TODO: implement + bool need_new_entrypoint = (docid == _entry_docid); + LinkArray empty; + LevelArrayRef node_levels = get_level_array(docid); + for (int level = node_levels.size(); level-- > 0; ) { + LinkArrayRef my_links = get_link_array(docid, level); + for (uint32_t neighbor_id : my_links) { + if (need_new_entrypoint) { + _entry_docid = neighbor_id; + _entry_level = level; + need_new_entrypoint = false; + } + remove_link_to(neighbor_id, docid, level); + } + set_link_array(docid, level, empty); + } + if (need_new_entrypoint) { + _entry_docid = 0; + _entry_level = -1; + } + search::datastore::EntryRef invalid; + _node_refs[docid].store_release(invalid); } } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp index 7a6493b6fcf..a7b9c1dba79 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp @@ -145,6 +145,17 @@ HnswIndexBase::connect_new_node(uint32_t docid, const LinkArray& neighbors, uint } } +void +HnswIndexBase::remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32_t level) +{ + LinkArray new_links; + auto old_links = get_link_array(remove_from, level); + for (uint32_t id : old_links) { + if (id != remove_id) new_links.push_back(id); + } + set_link_array(remove_from, level, new_links); +} + HnswIndexBase::HnswIndexBase(const DocVectorAccess& vectors, RandomLevelGenerator& level_generator, const Config& cfg) : _vectors(vectors), _level_generator(level_generator), diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h index e92ca31e789..9987b61c157 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h @@ -109,6 +109,7 @@ protected: LinkArray select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const; LinkArray select_neighbors(const HnswCandidateVector& neighbors, uint32_t max_links) const; void connect_new_node(uint32_t docid, const LinkArray& neighbors, uint32_t level); + void remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32_t level); public: HnswIndexBase(const DocVectorAccess& vectors, RandomLevelGenerator& level_generator, const Config& cfg); |