summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-08-16 12:06:29 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2021-08-16 12:06:29 +0000
commit1f97c8c7a45eaf4823354645eba3718ff3d1c264 (patch)
tree43fcf99c7d2b12cf7928507b58ce7eb13637e918 /searchlib
parent4324d883cbd8fe08a919d776772052c4e2fef297 (diff)
Instead of having one large array of individually allocated vectors use
2 large, optionally mmapped, vectors where the first just points into the second. In order to avoid resizing, count first.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp49
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h12
2 files changed, 46 insertions, 15 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
index 6593a60d6b5..b21f7ec0d07 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
@@ -6,7 +6,31 @@
namespace search::tensor {
-HnswIndexSaver::~HnswIndexSaver() {}
+namespace {
+
+size_t
+countValidLinks(const HnswGraph & graph) {
+ size_t count(0);
+ size_t num_nodes = graph.node_refs.size();
+ for (size_t i = 0; i < num_nodes; ++i) {
+ auto node_ref = graph.node_refs[i].load_acquire();
+ if (node_ref.valid()) {
+ count += graph.nodes.get(node_ref).size();
+ }
+ }
+ return count;
+}
+
+}
+
+HnswIndexSaver::MetaData::MetaData()
+ : entry_docid(0),
+ entry_level(-1),
+ refs(),
+ nodes()
+{}
+HnswIndexSaver::MetaData::~MetaData() = default;
+HnswIndexSaver::~HnswIndexSaver() = default;
HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph)
: _graph_links(graph.links), _meta_data()
@@ -15,19 +39,22 @@ HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph)
_meta_data.entry_docid = entry.docid;
_meta_data.entry_level = entry.level;
size_t num_nodes = graph.node_refs.size();
- _meta_data.nodes.reserve(num_nodes);
+ assert (num_nodes <= 0xfffffffful);
+ size_t linkCount = countValidLinks(graph);
+ assert (linkCount <= 0xfffffffful);
+ _meta_data.refs.reserve(linkCount);
+ _meta_data.nodes.reserve(num_nodes+1);
for (size_t i = 0; i < num_nodes; ++i) {
- LevelVector node;
+ _meta_data.nodes.push_back(_meta_data.refs.size());
auto node_ref = graph.node_refs[i].load_acquire();
if (node_ref.valid()) {
auto levels = graph.nodes.get(node_ref);
for (const auto& links_ref : levels) {
- auto level = links_ref.load_acquire();
- node.push_back(level);
+ _meta_data.refs.push_back(links_ref.load_acquire());
}
}
- _meta_data.nodes.emplace_back(std::move(node));
}
+ _meta_data.nodes.push_back(_meta_data.refs.size());
}
void
@@ -35,12 +62,14 @@ HnswIndexSaver::save(BufferWriter& writer) const
{
writer.write(&_meta_data.entry_docid, sizeof(uint32_t));
writer.write(&_meta_data.entry_level, sizeof(int32_t));
- uint32_t num_nodes = _meta_data.nodes.size();
+ uint32_t num_nodes = _meta_data.nodes.size() - 1;
writer.write(&num_nodes, sizeof(uint32_t));
- for (const auto &node : _meta_data.nodes) {
- uint32_t num_levels = node.size();
+ for (uint32_t i(0); i < num_nodes; i++) {
+ uint32_t offset = _meta_data.nodes[i];
+ uint32_t num_levels = _meta_data.nodes[i+1] - offset;
writer.write(&num_levels, sizeof(uint32_t));
- for (auto links_ref : node) {
+ for (; offset <_meta_data.nodes[i+1]; offset++) {
+ auto links_ref = _meta_data.refs[offset];
if (links_ref.valid()) {
vespalib::ConstArrayRef<uint32_t> link_array = _graph_links.get(links_ref);
uint32_t num_links = link_array.size();
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h
index 387e35efd8b..9052c3f1909 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h
@@ -5,6 +5,7 @@
#include "nearest_neighbor_index_saver.h"
#include "hnsw_graph.h"
#include <vespa/vespalib/datastore/entryref.h>
+#include <vespa/vespalib/stllike/allocator.h>
#include <vector>
namespace search::tensor {
@@ -17,18 +18,19 @@ namespace search::tensor {
**/
class HnswIndexSaver : public NearestNeighborIndexSaver {
public:
- using LevelVector = std::vector<vespalib::datastore::EntryRef>;
-
HnswIndexSaver(const HnswGraph &graph);
- ~HnswIndexSaver();
+ ~HnswIndexSaver() override;
void save(BufferWriter& writer) const override;
private:
struct MetaData {
+ using EntryRef = vespalib::datastore::EntryRef;
uint32_t entry_docid;
int32_t entry_level;
- std::vector<LevelVector> nodes;
- MetaData() : entry_docid(0), entry_level(-1), nodes() {}
+ std::vector<EntryRef, vespalib::allocator_large<EntryRef>> refs;
+ std::vector<uint32_t, vespalib::allocator_large<uint32_t>> nodes;
+ MetaData();
+ ~MetaData();
};
const HnswGraph::LinkStore &_graph_links;
MetaData _meta_data;