summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-04-01 06:50:51 +0000
committerArne Juul <arnej@verizonmedia.com>2020-04-01 06:55:58 +0000
commit4c5b231949fabd1e91c3251c56480529acf5328d (patch)
treef3e73e52d07a2a963af3f837d08b1bc164f6552c /searchlib
parent95ed12aff1b091f5a8434e7c80642cf02d403028 (diff)
count reachables for statistics
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp40
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h1
2 files changed, 38 insertions, 3 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 10be90a7f30..b08d862ae6d 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -382,10 +382,17 @@ HnswIndex::get_state(const vespalib::slime::Inserter& inserter) const
auto& object = inserter.insertObject();
StateExplorerUtils::memory_usage_to_slime(memory_usage(), object.setObject("memory_usage"));
object.setLong("nodes", _graph.size());
- auto& level_histogram = object.setArray("level_histogram");
- for (uint32_t hist_val : _graph.level_histogram()) {
- level_histogram.addLong(hist_val);
+ auto& histogram_array = object.setArray("level_histogram");
+ auto level_histogram = _graph.level_histogram();
+ for (uint32_t hist_val : level_histogram) {
+ histogram_array.addLong(hist_val);
}
+ uint32_t reachable = count_reachable_nodes();
+ uint32_t unreachable = _graph.size() - reachable;
+ if (level_histogram.size() > 0) {
+ unreachable -= level_histogram[0];
+ }
+ object.setLong("unreachable_nodes", unreachable);
object.setLong("entry_docid", _graph.entry_docid);
object.setLong("entry_level", _graph.entry_level);
}
@@ -505,4 +512,31 @@ HnswIndex::check_link_symmetry() const
return all_sym;
}
+uint32_t
+HnswIndex::count_reachable_nodes() const
+{
+ int search_level = get_entry_level();
+ if (search_level < 0) {
+ return 0;
+ }
+ auto visited = _visited_set_pool.get(_graph.size());
+ uint32_t entry_id = get_entry_docid();
+ LinkArray found_links;
+ found_links.push_back(entry_id);
+ visited.mark(entry_id);
+ while (search_level >= 0) {
+ for (uint32_t idx = 0; idx < found_links.size(); ++idx) {
+ uint32_t docid = found_links[idx];
+ auto neighbors = _graph.get_link_array(docid, search_level);
+ for (uint32_t neighbor : neighbors) {
+ if (visited.is_marked(neighbor)) continue;
+ visited.mark(neighbor);
+ found_links.push_back(neighbor);
+ }
+ }
+ --search_level;
+ }
+ return found_links.size();
+}
+
} // namespace
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index b5d57c2ebfd..95001853710 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -147,6 +147,7 @@ public:
HnswNode get_node(uint32_t docid) const;
void set_node(uint32_t docid, const HnswNode &node);
bool check_link_symmetry() const;
+ uint32_t count_reachable_nodes() const;
static search::datastore::ArrayStoreConfig make_default_node_store_config();
static search::datastore::ArrayStoreConfig make_default_link_store_config();