aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-02-07 16:32:59 +0100
committerGitHub <noreply@github.com>2020-02-07 16:32:59 +0100
commit37582d1a0db24cc07a5843af5e319cc3cb90b642 (patch)
tree3db6abc5cb5d84462cbff0bd78da497b1d0d40ba /searchlib/src
parent96f979c97c2a63354ee40b24334abdef2ed09e24 (diff)
parenteb9ad2afa3d0a687532d80cf233f69a8cc7d2560 (diff)
Merge pull request #12118 from vespa-engine/arnej/add-remove-to-hnsw-index
implement remove_document
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp35
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h1
4 files changed, 68 insertions, 2 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 76548a025a0..634569cbc8c 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -67,6 +67,9 @@ public:
level_generator.level = max_level;
index->add_document(docid);
}
+ void remove_document(uint32_t docid) {
+ index->remove_document(docid);
+ }
void expect_entry_point(uint32_t exp_docid, uint32_t exp_level) {
EXPECT_EQ(exp_docid, index->get_entry_docid());
EXPECT_EQ(exp_level, index->get_entry_level());
@@ -131,6 +134,38 @@ TEST_F(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_select_ne
expect_level_0(7, {2, 3});
}
+TEST_F(HnswIndexTest, 2d_vectors_inserted_and_removed)
+{
+ init(false);
+
+ add_document(1);
+ expect_level_0(1, {});
+ expect_entry_point(1, 0);
+
+ add_document(2);
+ expect_level_0(1, {2});
+ expect_level_0(2, {1});
+ expect_entry_point(1, 0);
+
+ add_document(3);
+ expect_level_0(1, {2, 3});
+ expect_level_0(2, {1, 3});
+ expect_level_0(3, {1, 2});
+ expect_entry_point(1, 0);
+
+ remove_document(2);
+ expect_level_0(1, {3});
+ expect_level_0(3, {1});
+ expect_entry_point(1, 0);
+
+ remove_document(1);
+ expect_level_0(3, {});
+ expect_entry_point(3, 0);
+
+ remove_document(3);
+ expect_entry_point(0, -1);
+}
+
TEST_F(HnswIndexTest, 2d_vectors_inserted_in_hierarchic_graph_with_heuristic_select_neighbors)
{
init(true);
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 4992a26d8b4..345f7f551a6 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -140,8 +140,27 @@ template <typename FloatType>
void
HnswIndex<FloatType>::remove_document(uint32_t docid)
{
- (void) docid;
- // TODO: implement
+ bool need_new_entrypoint = (docid == _entry_docid);
+ LinkArray empty;
+ LevelArrayRef node_levels = get_level_array(docid);
+ for (int level = node_levels.size(); level-- > 0; ) {
+ LinkArrayRef my_links = get_link_array(docid, level);
+ for (uint32_t neighbor_id : my_links) {
+ if (need_new_entrypoint) {
+ _entry_docid = neighbor_id;
+ _entry_level = level;
+ need_new_entrypoint = false;
+ }
+ remove_link_to(neighbor_id, docid, level);
+ }
+ set_link_array(docid, level, empty);
+ }
+ if (need_new_entrypoint) {
+ _entry_docid = 0;
+ _entry_level = -1;
+ }
+ search::datastore::EntryRef invalid;
+ _node_refs[docid].store_release(invalid);
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp
index 7a6493b6fcf..a7b9c1dba79 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp
@@ -145,6 +145,17 @@ HnswIndexBase::connect_new_node(uint32_t docid, const LinkArray& neighbors, uint
}
}
+void
+HnswIndexBase::remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32_t level)
+{
+ LinkArray new_links;
+ auto old_links = get_link_array(remove_from, level);
+ for (uint32_t id : old_links) {
+ if (id != remove_id) new_links.push_back(id);
+ }
+ set_link_array(remove_from, level, new_links);
+}
+
HnswIndexBase::HnswIndexBase(const DocVectorAccess& vectors, RandomLevelGenerator& level_generator, const Config& cfg)
: _vectors(vectors),
_level_generator(level_generator),
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h
index e92ca31e789..9987b61c157 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h
@@ -109,6 +109,7 @@ protected:
LinkArray select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const;
LinkArray select_neighbors(const HnswCandidateVector& neighbors, uint32_t max_links) const;
void connect_new_node(uint32_t docid, const LinkArray& neighbors, uint32_t level);
+ void remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32_t level);
public:
HnswIndexBase(const DocVectorAccess& vectors, RandomLevelGenerator& level_generator, const Config& cfg);