summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-03-27 12:59:14 +0000
committerArne Juul <arnej@verizonmedia.com>2020-03-27 13:08:38 +0000
commitd217c94fcbf129e4a557264b64b76f8563c1d892 (patch)
tree2e5a91436cf68c321a5574f379fca94226197e99 /searchlib
parent3c2f98129e223c0dbf713ad046d7e913228873a0 (diff)
review follow-up:
* add documentation comments * style fixes * return bool from load() for error handling
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h15
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h23
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h2
9 files changed, 55 insertions, 38 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index b6bdee4f94d..00450eab21a 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -148,7 +148,7 @@ public:
std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override {
return std::unique_ptr<NearestNeighborIndexSaver>();
}
- void load(const search::fileutil::LoadedBuffer&) override {}
+ bool load(const search::fileutil::LoadedBuffer&) override { return false; }
std::vector<Neighbor> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) const override {
(void) k;
(void) vector;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
index a77cc5b8f63..db8d1067980 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
@@ -18,7 +18,8 @@ HnswGraph::HnswGraph()
HnswGraph::~HnswGraph() {}
void
-HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels) {
+HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels)
+{
node_refs.ensure_size(docid + 1, AtomicEntryRef());
// A document cannot be added twice.
assert(!node_refs[docid].load_acquire().valid());
@@ -29,7 +30,8 @@ HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels) {
}
void
-HnswGraph::remove_node_for_document(uint32_t docid) {
+HnswGraph::remove_node_for_document(uint32_t docid)
+{
auto node_ref = node_refs[docid].load_acquire();
nodes.remove(node_ref);
search::datastore::EntryRef invalid;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 7ed2a160107..de6daba650c 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -389,14 +389,12 @@ HnswIndex::make_saver() const
return std::make_unique<HnswIndexSaver>(_graph);
}
-void
+bool
HnswIndex::load(const fileutil::LoadedBuffer& buf)
{
assert(get_entry_docid() == 0); // cannot load after index has data
HnswIndexLoader loader(_graph);
- if (! loader.load(buf)) {
- fprintf(stderr, "ERROR loading HNSW graph\n");
- }
+ return loader.load(buf);
}
struct NeighborsByDocId {
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 12eecb7e44c..b5d57c2ebfd 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -133,7 +133,7 @@ public:
void get_state(const vespalib::slime::Inserter& inserter) const override;
std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override;
- void load(const fileutil::LoadedBuffer& buf) override;
+ bool load(const fileutil::LoadedBuffer& buf) override;
std::vector<Neighbor> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) const override;
const DistanceFunction *distance_function() const override { return _distance_func.get(); }
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp
index 6626515afdb..75c62e5202f 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp
@@ -9,7 +9,7 @@ namespace search::tensor {
HnswIndexLoader::~HnswIndexLoader() {}
HnswIndexLoader::HnswIndexLoader(HnswGraph &graph)
- : _graph(graph)
+ : _graph(graph), _ptr(nullptr), _end(nullptr), _failed(false)
{
}
@@ -17,29 +17,28 @@ bool
HnswIndexLoader::load(const fileutil::LoadedBuffer& buf)
{
size_t num_readable = buf.size(sizeof(uint32_t));
- if (num_readable < 3) return false;
- const uint32_t *ptr = static_cast<const uint32_t *>(buf.buffer());
- const uint32_t *end = ptr + num_readable;
- uint32_t entry_docid = *ptr++;
- int32_t entry_level = *ptr++;
- uint32_t num_nodes = *ptr++;
+ _ptr = static_cast<const uint32_t *>(buf.buffer());
+ _end = _ptr + num_readable;
+ uint32_t entry_docid = nextVal();
+ int32_t entry_level = nextVal();
+ uint32_t num_nodes = nextVal();
std::vector<uint32_t> link_array;
for (uint32_t docid = 0; docid < num_nodes; ++docid) {
- if (ptr == end) return false;
- uint32_t num_levels = *ptr++;
+ uint32_t num_levels = nextVal();
if (num_levels > 0) {
_graph.make_node_for_document(docid, num_levels);
for (uint32_t level = 0; level < num_levels; ++level) {
- uint32_t num_links = *ptr++;
+ uint32_t num_links = nextVal();
link_array.clear();
while (num_links-- > 0) {
- if (ptr == end) return false;
- link_array.push_back(*ptr++);
+ link_array.push_back(nextVal());
}
_graph.set_link_array(docid, level, link_array);
}
}
}
+ if (_failed) return false;
+ _graph.node_refs.ensure_size(num_nodes);
_graph.set_entry_node(entry_docid, entry_level);
return true;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h
index e7653b37946..abc68889a1b 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h
@@ -2,12 +2,17 @@
#pragma once
+#include <cstdint>
+
namespace search::fileutil { class LoadedBuffer; }
namespace search::tensor {
class HnswGraph;
+/**
+ * Implements loading of HNSW graph structure from binary format.
+ **/
class HnswIndexLoader {
public:
HnswIndexLoader(HnswGraph &graph);
@@ -15,6 +20,16 @@ public:
bool load(const fileutil::LoadedBuffer& buf);
private:
HnswGraph &_graph;
+ const uint32_t *_ptr;
+ const uint32_t *_end;
+ bool _failed;
+ uint32_t nextVal() {
+ if (__builtin_expect((_ptr == _end), false)) {
+ _failed = true;
+ return 0;
+ }
+ return *_ptr++;
+ }
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
index e9d70a17794..acff30f8cbf 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
@@ -9,17 +9,17 @@ namespace search::tensor {
HnswIndexSaver::~HnswIndexSaver() {}
HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph)
- : _graph(graph), _meta_data()
+ : _graph_links(graph.links), _meta_data()
{
- _meta_data.entry_docid = _graph.entry_docid;
- _meta_data.entry_level = _graph.entry_level;
- size_t num_nodes = _graph.node_refs.size();
+ _meta_data.entry_docid = graph.entry_docid;
+ _meta_data.entry_level = graph.entry_level;
+ size_t num_nodes = graph.node_refs.size();
_meta_data.nodes.reserve(num_nodes);
for (size_t i = 0; i < num_nodes; ++i) {
LevelVector node;
- auto node_ref = _graph.node_refs[i].load_acquire();
+ auto node_ref = graph.node_refs[i].load_acquire();
if (node_ref.valid()) {
- auto levels = _graph.nodes.get(node_ref);
+ auto levels = graph.nodes.get(node_ref);
for (const auto& links_ref : levels) {
auto level = links_ref.load_acquire();
node.push_back(level);
@@ -41,7 +41,7 @@ HnswIndexSaver::save(BufferWriter& writer) const
writer.write(&num_levels, sizeof(uint32_t));
for (auto links_ref : node) {
if (links_ref.valid()) {
- vespalib::ConstArrayRef<uint32_t> link_array = _graph.links.get(links_ref);
+ vespalib::ConstArrayRef<uint32_t> link_array = _graph_links.get(links_ref);
uint32_t num_links = link_array.size();
writer.write(&num_links, sizeof(uint32_t));
writer.write(link_array.cbegin(), sizeof(uint32_t)*num_links);
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h
index 70dc37e38c5..d1d8e0db19d 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h
@@ -3,32 +3,35 @@
#pragma once
#include "nearest_neighbor_index_saver.h"
+#include "hnsw_graph.h"
#include <vespa/vespalib/datastore/entryref.h>
#include <vector>
namespace search::tensor {
-class HnswGraph;
-
+/**
+ * Implements saving of HNSW graph structure in binary format.
+ * The constructor takes a snapshot of all meta-data, but
+ * the links will be fetched from the graph in the save()
+ * method.
+ **/
class HnswIndexSaver : public NearestNeighborIndexSaver {
public:
using LevelVector = std::vector<search::datastore::EntryRef>;
+ HnswIndexSaver(const HnswGraph &graph);
+ ~HnswIndexSaver();
+ void save(BufferWriter& writer) const override;
+
+private:
struct MetaData {
uint32_t entry_docid;
int32_t entry_level;
std::vector<LevelVector> nodes;
MetaData() : entry_docid(0), entry_level(-1), nodes() {}
};
-
- HnswIndexSaver(const HnswGraph &graph);
- ~HnswIndexSaver();
- void save(BufferWriter& writer) const override;
-
-private:
- const HnswGraph &_graph;
+ const HnswGraph::LinkStore &_graph_links;
MetaData _meta_data;
-
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index bb6ef012a56..aca2ce2af66 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -47,7 +47,7 @@ public:
* and the caller ensures that an attribute read guard is held during the lifetime of the saver.
*/
virtual std::unique_ptr<NearestNeighborIndexSaver> make_saver() const = 0;
- virtual void load(const fileutil::LoadedBuffer& buf) = 0;
+ virtual bool load(const fileutil::LoadedBuffer& buf) = 0;
virtual std::vector<Neighbor> find_top_k(uint32_t k,
vespalib::tensor::TypedCells vector,