aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h
blob: b0a36ba9b8457b9baa614194f56b7e799db602d3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include <vespa/vespalib/datastore/array_store.h>
#include <vespa/vespalib/datastore/entryref.h>
#include <vespa/vespalib/util/arrayref.h>
#include <vespa/vespalib/util/generation_hold_list.h>
#include <vespa/vespalib/util/growstrategy.h>
#include <vespa/vespalib/util/memoryusage.h>
#include <cstdint>
#include <vector>

namespace search::tensor {

class HnswNode;

/**
 * Class used to keep track of the mapping from docid to array of nodeids.
 * A nodeid is an identifier for a node in the HNSW graph that represents a single vector.
 *
 * The nodeids are allocated by this class.
 * Nodeids that are freed are reused when no reader threads are accessing them (after a hold cycle).
 *
 * Note: Only the writer thread should use this class.
 */
class HnswNodeidMapping {
private:
    using generation_t = vespalib::GenerationHandler::generation_t;
    using NodeidStore = vespalib::datastore::ArrayStore<uint32_t>;
    using NodeidHoldList = vespalib::GenerationHoldList<uint32_t, false, true>;
    using NodeidFreeList = std::vector<uint32_t>;

    // Maps from docid to EntryRef used to get the array of nodeids from the NodeidStore.
    std::vector<vespalib::datastore::EntryRef> _refs;
    vespalib::GrowStrategy _grow_strategy;
    uint32_t _nodeid_limit;
    NodeidStore _nodeids;
    NodeidHoldList _hold_list;
    NodeidFreeList _free_list;

    void ensure_refs_size(uint32_t docid);
    uint32_t allocate_id();
    void allocate_docid_to_nodeids_mapping(std::vector<uint32_t> histogram);
    void populate_docid_to_nodeids_mapping_and_free_list(vespalib::ConstArrayRef<HnswNode> nodes);
    void assert_all_subspaces_have_valid_nodeid(uint32_t docid_limit);

public:
    HnswNodeidMapping();
    ~HnswNodeidMapping();
    vespalib::ConstArrayRef<uint32_t> allocate_ids(uint32_t docid, uint32_t subspaces);
    vespalib::ConstArrayRef<uint32_t> get_ids(uint32_t docid) const;
    void free_ids(uint32_t docid);

    void assign_generation(generation_t current_gen);
    void reclaim_memory(generation_t oldest_used_gen);
    void on_load(vespalib::ConstArrayRef<HnswNode> nodes);
    vespalib::AddressSpace address_space_usage() const { return _nodeids.addressSpaceUsage(); }
    vespalib::MemoryUsage memory_usage() const;
    vespalib::MemoryUsage update_stat(const vespalib::datastore::CompactionStrategy& compaction_strategy);
    bool consider_compact() const noexcept { return _nodeids.consider_compact(); }
    void compact_worst(const vespalib::datastore::CompactionStrategy& compaction_strategy);
};

}

namespace vespalib {
    extern template class GenerationHoldList<uint32_t, false, true>;
}