summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2021-08-16 13:04:48 +0200
committerTor Egge <Tor.Egge@online.no>2021-08-16 13:04:48 +0200
commitb74e8425c532cf2f24c82fd118306f36ceef81e7 (patch)
tree7e22be7bdf527614139b49821336758d1f970d28
parent90a67fbeab441a67e1893690b9e67ff75d3213bb (diff)
Use 4096 buffers for HNSW link array store.
Configure link array store to handle arrays of 193 elements or less without indirect storage.
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.h9
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp2
-rw-r--r--vespalib/src/vespa/vespalib/datastore/entryref.cpp1
3 files changed, 8 insertions, 4 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
index ce2bc172752..4d07f74f8e3 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
@@ -18,7 +18,10 @@ struct HnswGraph {
// This uses 10 bits for buffer id -> 1024 buffers.
// As we have very short arrays we get less fragmentation with fewer and larger buffers.
- using EntryRefType = vespalib::datastore::EntryRefT<22>;
+ using LevelArrayEntryRefType = vespalib::datastore::EntryRefT<22>;
+
+ // This uses 12 bits for buffer id -> 4096 buffers.
+ using LinkArrayEntryRefType = vespalib::datastore::EntryRefT<20>;
// Provides mapping from document id -> node reference.
// The reference is used to lookup the node data in NodeStore.
@@ -27,13 +30,13 @@ struct HnswGraph {
// This stores the level arrays for all nodes.
// Each node consists of an array of levels (from level 0 to n) where each entry is a reference to the link array at that level.
- using NodeStore = vespalib::datastore::ArrayStore<AtomicEntryRef, EntryRefType>;
+ using NodeStore = vespalib::datastore::ArrayStore<AtomicEntryRef, LevelArrayEntryRefType>;
using StoreConfig = vespalib::datastore::ArrayStoreConfig;
using LevelArrayRef = NodeStore::ConstArrayRef;
// This stores all link arrays.
// A link array consists of the document ids of the nodes a particular node is linked to.
- using LinkStore = vespalib::datastore::ArrayStore<uint32_t, EntryRefType>;
+ using LinkStore = vespalib::datastore::ArrayStore<uint32_t, LinkArrayEntryRefType>;
using LinkArrayRef = LinkStore::ConstArrayRef;
NodeRefVector node_refs;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 3d780d0b534..8183a7caf3d 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -30,7 +30,7 @@ constexpr size_t min_num_arrays_for_new_buffer = 512_Ki;
constexpr float alloc_grow_factor = 0.3;
// TODO: Adjust these numbers to what we accept as max in config.
constexpr size_t max_level_array_size = 16;
-constexpr size_t max_link_array_size = 64;
+constexpr size_t max_link_array_size = 193;
constexpr vespalib::duration MAX_COUNT_DURATION(100ms);
bool has_link_to(vespalib::ConstArrayRef<uint32_t> links, uint32_t id) {
diff --git a/vespalib/src/vespa/vespalib/datastore/entryref.cpp b/vespalib/src/vespa/vespalib/datastore/entryref.cpp
index 0551e617283..83501e49ed3 100644
--- a/vespalib/src/vespa/vespalib/datastore/entryref.cpp
+++ b/vespalib/src/vespa/vespalib/datastore/entryref.cpp
@@ -8,6 +8,7 @@ namespace vespalib::datastore {
template EntryRefT<24u, 8u>::EntryRefT(size_t, uint32_t);
template EntryRefT<31u, 1u>::EntryRefT(size_t, uint32_t);
template EntryRefT<22u,10u>::EntryRefT(size_t, uint32_t);
+template EntryRefT<20u,12u>::EntryRefT(size_t, uint32_t);
template EntryRefT<19u,13u>::EntryRefT(size_t, uint32_t);
template EntryRefT<18u, 6u>::EntryRefT(size_t, uint32_t);
template EntryRefT<15u,17u>::EntryRefT(size_t, uint32_t);