summaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2020-06-25 08:32:55 +0200
committerGitHub <noreply@github.com>2020-06-25 08:32:55 +0200
commitd1cbff19dd37c79fe0dc24b24969f3a0eacee403 (patch)
treeb3c0e96e84ace1676625954d9c9843ef96e94cf6 /searchlib/src
parentc341910e1509d4d607db4baa6fd914d0802e8532 (diff)
parent213c2ad09f9183975717166a4c0022cbbed63eaa (diff)
Merge pull request #13684 from vespa-engine/arnej/first-1000-no-twophase
the very first documents added to HNSW index should use single-phase …
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp2
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h4
5 files changed, 18 insertions, 3 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 7dc0efc106d..cd989c03b4e 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -82,7 +82,7 @@ public:
level_generator = generator.get();
index = std::make_unique<HnswIndex>(vectors, std::make_unique<FloatSqEuclideanDistance>(),
std::move(generator),
- HnswIndex::Config(5, 2, 10, heuristic_select_neighbors));
+ HnswIndex::Config(5, 2, 10, 0, heuristic_select_neighbors));
}
void add_document(uint32_t docid, uint32_t max_level = 0) {
level_generator->level = max_level;
diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
index 4dec9550f6f..1e10d94bb18 100644
--- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
@@ -234,7 +234,7 @@ public:
uint32_t m = 16;
index = std::make_unique<HnswIndex>(vectors, std::make_unique<FloatSqEuclideanDistance>(),
std::make_unique<InvLogLevelGenerator>(m),
- HnswIndex::Config(2*m, m, 200, true));
+ HnswIndex::Config(2*m, m, 200, 10, true));
}
size_t get_rnd(size_t size) {
return rng.nextUniform() * size;
diff --git a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp
index 067280e9a23..0bb6f339455 100644
--- a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp
@@ -36,6 +36,7 @@ DefaultNearestNeighborIndexFactory::make(const DocVectorAccess& vectors,
HnswIndex::Config cfg(m * 2,
m,
params.neighbors_to_explore_at_insert(),
+ 10000,
true);
return std::make_unique<HnswIndex>(vectors,
make_distance_function(params.distance_metric(), cell_type),
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 540a7a86104..5c36f7bcb37 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -373,6 +373,12 @@ HnswIndex::prepare_add_document(uint32_t docid,
TypedCells vector,
vespalib::GenerationHandler::Guard read_guard) const
{
+ uint32_t max_nodes = _graph.node_refs.size();
+ if (max_nodes < _cfg.min_size_before_two_phase()) {
+ // the first documents added will do all work in write thread
+ // to ensure they are linked together:
+ return std::unique_ptr<PrepareResult>();
+ }
PreparedAddDoc op = internal_prepare_add(docid, vector);
(void) read_guard; // must keep guard until this point
return std::make_unique<PreparedAddDoc>(std::move(op));
@@ -385,7 +391,11 @@ HnswIndex::complete_add_document(uint32_t docid, std::unique_ptr<PrepareResult>
if (prepared && (prepared->docid == docid)) {
internal_complete_add(docid, *prepared);
} else {
- LOG(warning, "complete_add_document called with invalid prepare_result");
+ // we expect this for the first documents added, so no warning for them
+ if (_graph.node_refs.size() > 1.25 * _cfg.min_size_before_two_phase()) {
+ LOG(warning, "complete_add_document(%u) called with invalid prepare_result %s/%u",
+ docid, (prepared ? "valid ptr" : "nullptr"), (prepared ? prepared->docid : 0u));
+ }
// fallback to normal add
add_document(docid);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index ab3eced8fdc..c237e3b8fcf 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -39,21 +39,25 @@ public:
uint32_t _max_links_at_level_0;
uint32_t _max_links_on_inserts;
uint32_t _neighbors_to_explore_at_construction;
+ uint32_t _min_size_before_two_phase;
bool _heuristic_select_neighbors;
public:
Config(uint32_t max_links_at_level_0_in,
uint32_t max_links_on_inserts_in,
uint32_t neighbors_to_explore_at_construction_in,
+ uint32_t min_size_before_two_phase_in,
bool heuristic_select_neighbors_in)
: _max_links_at_level_0(max_links_at_level_0_in),
_max_links_on_inserts(max_links_on_inserts_in),
_neighbors_to_explore_at_construction(neighbors_to_explore_at_construction_in),
+ _min_size_before_two_phase(min_size_before_two_phase_in),
_heuristic_select_neighbors(heuristic_select_neighbors_in)
{}
uint32_t max_links_at_level_0() const { return _max_links_at_level_0; }
uint32_t max_links_on_inserts() const { return _max_links_on_inserts; }
uint32_t neighbors_to_explore_at_construction() const { return _neighbors_to_explore_at_construction; }
+ uint32_t min_size_before_two_phase() const { return _min_size_before_two_phase; }
bool heuristic_select_neighbors() const { return _heuristic_select_neighbors; }
};