diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2020-06-25 08:32:55 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-25 08:32:55 +0200 |
commit | d1cbff19dd37c79fe0dc24b24969f3a0eacee403 (patch) | |
tree | b3c0e96e84ace1676625954d9c9843ef96e94cf6 /searchlib/src | |
parent | c341910e1509d4d607db4baa6fd914d0802e8532 (diff) | |
parent | 213c2ad09f9183975717166a4c0022cbbed63eaa (diff) |
Merge pull request #13684 from vespa-engine/arnej/first-1000-no-twophase
the very first documents added to HNSW index should use single-phase …
Diffstat (limited to 'searchlib/src')
5 files changed, 18 insertions, 3 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 7dc0efc106d..cd989c03b4e 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -82,7 +82,7 @@ public: level_generator = generator.get(); index = std::make_unique<HnswIndex>(vectors, std::make_unique<FloatSqEuclideanDistance>(), std::move(generator), - HnswIndex::Config(5, 2, 10, heuristic_select_neighbors)); + HnswIndex::Config(5, 2, 10, 0, heuristic_select_neighbors)); } void add_document(uint32_t docid, uint32_t max_level = 0) { level_generator->level = max_level; diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp index 4dec9550f6f..1e10d94bb18 100644 --- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp @@ -234,7 +234,7 @@ public: uint32_t m = 16; index = std::make_unique<HnswIndex>(vectors, std::make_unique<FloatSqEuclideanDistance>(), std::make_unique<InvLogLevelGenerator>(m), - HnswIndex::Config(2*m, m, 200, true)); + HnswIndex::Config(2*m, m, 200, 10, true)); } size_t get_rnd(size_t size) { return rng.nextUniform() * size; diff --git a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp index 067280e9a23..0bb6f339455 100644 --- a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp @@ -36,6 +36,7 @@ DefaultNearestNeighborIndexFactory::make(const DocVectorAccess& vectors, HnswIndex::Config cfg(m * 2, m, params.neighbors_to_explore_at_insert(), + 10000, true); return std::make_unique<HnswIndex>(vectors, make_distance_function(params.distance_metric(), cell_type), diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 540a7a86104..5c36f7bcb37 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -373,6 +373,12 @@ HnswIndex::prepare_add_document(uint32_t docid, TypedCells vector, vespalib::GenerationHandler::Guard read_guard) const { + uint32_t max_nodes = _graph.node_refs.size(); + if (max_nodes < _cfg.min_size_before_two_phase()) { + // the first documents added will do all work in write thread + // to ensure they are linked together: + return std::unique_ptr<PrepareResult>(); + } PreparedAddDoc op = internal_prepare_add(docid, vector); (void) read_guard; // must keep guard until this point return std::make_unique<PreparedAddDoc>(std::move(op)); @@ -385,7 +391,11 @@ HnswIndex::complete_add_document(uint32_t docid, std::unique_ptr<PrepareResult> if (prepared && (prepared->docid == docid)) { internal_complete_add(docid, *prepared); } else { - LOG(warning, "complete_add_document called with invalid prepare_result"); + // we expect this for the first documents added, so no warning for them + if (_graph.node_refs.size() > 1.25 * _cfg.min_size_before_two_phase()) { + LOG(warning, "complete_add_document(%u) called with invalid prepare_result %s/%u", + docid, (prepared ? "valid ptr" : "nullptr"), (prepared ? prepared->docid : 0u)); + } // fallback to normal add add_document(docid); } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index ab3eced8fdc..c237e3b8fcf 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -39,21 +39,25 @@ public: uint32_t _max_links_at_level_0; uint32_t _max_links_on_inserts; uint32_t _neighbors_to_explore_at_construction; + uint32_t _min_size_before_two_phase; bool _heuristic_select_neighbors; public: Config(uint32_t max_links_at_level_0_in, uint32_t max_links_on_inserts_in, uint32_t neighbors_to_explore_at_construction_in, + uint32_t min_size_before_two_phase_in, bool heuristic_select_neighbors_in) : _max_links_at_level_0(max_links_at_level_0_in), _max_links_on_inserts(max_links_on_inserts_in), _neighbors_to_explore_at_construction(neighbors_to_explore_at_construction_in), + _min_size_before_two_phase(min_size_before_two_phase_in), _heuristic_select_neighbors(heuristic_select_neighbors_in) {} uint32_t max_links_at_level_0() const { return _max_links_at_level_0; } uint32_t max_links_on_inserts() const { return _max_links_on_inserts; } uint32_t neighbors_to_explore_at_construction() const { return _neighbors_to_explore_at_construction; } + uint32_t min_size_before_two_phase() const { return _min_size_before_two_phase; } bool heuristic_select_neighbors() const { return _heuristic_select_neighbors; } }; |