diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-03-27 12:59:14 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-03-27 13:08:38 +0000 |
commit | d217c94fcbf129e4a557264b64b76f8563c1d892 (patch) | |
tree | 2e5a91436cf68c321a5574f379fca94226197e99 /searchlib | |
parent | 3c2f98129e223c0dbf713ad046d7e913228873a0 (diff) |
review follow-up:
* add documentation comments
* style fixes
* return bool from load() for error handling
Diffstat (limited to 'searchlib')
9 files changed, 55 insertions, 38 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index b6bdee4f94d..00450eab21a 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -148,7 +148,7 @@ public: std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override { return std::unique_ptr<NearestNeighborIndexSaver>(); } - void load(const search::fileutil::LoadedBuffer&) override {} + bool load(const search::fileutil::LoadedBuffer&) override { return false; } std::vector<Neighbor> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) const override { (void) k; (void) vector; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp index a77cc5b8f63..db8d1067980 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp @@ -18,7 +18,8 @@ HnswGraph::HnswGraph() HnswGraph::~HnswGraph() {} void -HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels) { +HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels) +{ node_refs.ensure_size(docid + 1, AtomicEntryRef()); // A document cannot be added twice. assert(!node_refs[docid].load_acquire().valid()); @@ -29,7 +30,8 @@ HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels) { } void -HnswGraph::remove_node_for_document(uint32_t docid) { +HnswGraph::remove_node_for_document(uint32_t docid) +{ auto node_ref = node_refs[docid].load_acquire(); nodes.remove(node_ref); search::datastore::EntryRef invalid; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 7ed2a160107..de6daba650c 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -389,14 +389,12 @@ HnswIndex::make_saver() const return std::make_unique<HnswIndexSaver>(_graph); } -void +bool HnswIndex::load(const fileutil::LoadedBuffer& buf) { assert(get_entry_docid() == 0); // cannot load after index has data HnswIndexLoader loader(_graph); - if (! loader.load(buf)) { - fprintf(stderr, "ERROR loading HNSW graph\n"); - } + return loader.load(buf); } struct NeighborsByDocId { diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 12eecb7e44c..b5d57c2ebfd 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -133,7 +133,7 @@ public: void get_state(const vespalib::slime::Inserter& inserter) const override; std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override; - void load(const fileutil::LoadedBuffer& buf) override; + bool load(const fileutil::LoadedBuffer& buf) override; std::vector<Neighbor> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) const override; const DistanceFunction *distance_function() const override { return _distance_func.get(); } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp index 6626515afdb..75c62e5202f 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp @@ -9,7 +9,7 @@ namespace search::tensor { HnswIndexLoader::~HnswIndexLoader() {} HnswIndexLoader::HnswIndexLoader(HnswGraph &graph) - : _graph(graph) + : _graph(graph), _ptr(nullptr), _end(nullptr), _failed(false) { } @@ -17,29 +17,28 @@ bool HnswIndexLoader::load(const fileutil::LoadedBuffer& buf) { size_t num_readable = buf.size(sizeof(uint32_t)); - if (num_readable < 3) return false; - const uint32_t *ptr = static_cast<const uint32_t *>(buf.buffer()); - const uint32_t *end = ptr + num_readable; - uint32_t entry_docid = *ptr++; - int32_t entry_level = *ptr++; - uint32_t num_nodes = *ptr++; + _ptr = static_cast<const uint32_t *>(buf.buffer()); + _end = _ptr + num_readable; + uint32_t entry_docid = nextVal(); + int32_t entry_level = nextVal(); + uint32_t num_nodes = nextVal(); std::vector<uint32_t> link_array; for (uint32_t docid = 0; docid < num_nodes; ++docid) { - if (ptr == end) return false; - uint32_t num_levels = *ptr++; + uint32_t num_levels = nextVal(); if (num_levels > 0) { _graph.make_node_for_document(docid, num_levels); for (uint32_t level = 0; level < num_levels; ++level) { - uint32_t num_links = *ptr++; + uint32_t num_links = nextVal(); link_array.clear(); while (num_links-- > 0) { - if (ptr == end) return false; - link_array.push_back(*ptr++); + link_array.push_back(nextVal()); } _graph.set_link_array(docid, level, link_array); } } } + if (_failed) return false; + _graph.node_refs.ensure_size(num_nodes); _graph.set_entry_node(entry_docid, entry_level); return true; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h index e7653b37946..abc68889a1b 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h @@ -2,12 +2,17 @@ #pragma once +#include <cstdint> + namespace search::fileutil { class LoadedBuffer; } namespace search::tensor { class HnswGraph; +/** + * Implements loading of HNSW graph structure from binary format. + **/ class HnswIndexLoader { public: HnswIndexLoader(HnswGraph &graph); @@ -15,6 +20,16 @@ public: bool load(const fileutil::LoadedBuffer& buf); private: HnswGraph &_graph; + const uint32_t *_ptr; + const uint32_t *_end; + bool _failed; + uint32_t nextVal() { + if (__builtin_expect((_ptr == _end), false)) { + _failed = true; + return 0; + } + return *_ptr++; + } }; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp index e9d70a17794..acff30f8cbf 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp @@ -9,17 +9,17 @@ namespace search::tensor { HnswIndexSaver::~HnswIndexSaver() {} HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph) - : _graph(graph), _meta_data() + : _graph_links(graph.links), _meta_data() { - _meta_data.entry_docid = _graph.entry_docid; - _meta_data.entry_level = _graph.entry_level; - size_t num_nodes = _graph.node_refs.size(); + _meta_data.entry_docid = graph.entry_docid; + _meta_data.entry_level = graph.entry_level; + size_t num_nodes = graph.node_refs.size(); _meta_data.nodes.reserve(num_nodes); for (size_t i = 0; i < num_nodes; ++i) { LevelVector node; - auto node_ref = _graph.node_refs[i].load_acquire(); + auto node_ref = graph.node_refs[i].load_acquire(); if (node_ref.valid()) { - auto levels = _graph.nodes.get(node_ref); + auto levels = graph.nodes.get(node_ref); for (const auto& links_ref : levels) { auto level = links_ref.load_acquire(); node.push_back(level); @@ -41,7 +41,7 @@ HnswIndexSaver::save(BufferWriter& writer) const writer.write(&num_levels, sizeof(uint32_t)); for (auto links_ref : node) { if (links_ref.valid()) { - vespalib::ConstArrayRef<uint32_t> link_array = _graph.links.get(links_ref); + vespalib::ConstArrayRef<uint32_t> link_array = _graph_links.get(links_ref); uint32_t num_links = link_array.size(); writer.write(&num_links, sizeof(uint32_t)); writer.write(link_array.cbegin(), sizeof(uint32_t)*num_links); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h index 70dc37e38c5..d1d8e0db19d 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h @@ -3,32 +3,35 @@ #pragma once #include "nearest_neighbor_index_saver.h" +#include "hnsw_graph.h" #include <vespa/vespalib/datastore/entryref.h> #include <vector> namespace search::tensor { -class HnswGraph; - +/** + * Implements saving of HNSW graph structure in binary format. + * The constructor takes a snapshot of all meta-data, but + * the links will be fetched from the graph in the save() + * method. + **/ class HnswIndexSaver : public NearestNeighborIndexSaver { public: using LevelVector = std::vector<search::datastore::EntryRef>; + HnswIndexSaver(const HnswGraph &graph); + ~HnswIndexSaver(); + void save(BufferWriter& writer) const override; + +private: struct MetaData { uint32_t entry_docid; int32_t entry_level; std::vector<LevelVector> nodes; MetaData() : entry_docid(0), entry_level(-1), nodes() {} }; - - HnswIndexSaver(const HnswGraph &graph); - ~HnswIndexSaver(); - void save(BufferWriter& writer) const override; - -private: - const HnswGraph &_graph; + const HnswGraph::LinkStore &_graph_links; MetaData _meta_data; - }; } diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h index bb6ef012a56..aca2ce2af66 100644 --- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h @@ -47,7 +47,7 @@ public: * and the caller ensures that an attribute read guard is held during the lifetime of the saver. */ virtual std::unique_ptr<NearestNeighborIndexSaver> make_saver() const = 0; - virtual void load(const fileutil::LoadedBuffer& buf) = 0; + virtual bool load(const fileutil::LoadedBuffer& buf) = 0; virtual std::vector<Neighbor> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, |