aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/diskindex/field_length_scanner.h
blob: ac105de50cff0a4a285e6e393d61dbed052b2ba0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include <vector>
#include <unordered_map>
#include <limits>
#include <cstdint>

namespace search::index { class DocIdAndFeatures; }

namespace search::diskindex {

/*
 * Class used to reconstruct field lengths based on element lengths in
 * posting list file.
 */
class FieldLengthScanner {
    class FieldLengthEntry {
        uint16_t _field_length;
        uint16_t _elements; // first 16 elements

        static uint16_t make_element_mask(uint32_t element_id) { return (1u << element_id); }

    public:
        FieldLengthEntry() noexcept
            : _field_length(0),
              _elements(0)
        {
        }

        void add_element_length(uint32_t element_length) {
            // Cap field length
            if (element_length < std::numeric_limits<uint16_t>::max()) {
                uint32_t field_length32 = _field_length + element_length;
                _field_length = std::min(field_length32, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()));
            } else {
                _field_length = std::numeric_limits<uint16_t>::max();
            }
        }

        void add_element_length(uint32_t element_length, uint32_t element_id) {
            uint16_t element_mask = make_element_mask(element_id);
            if (!(_elements & element_mask)) {
                _elements |= element_mask;
                add_element_length(element_length);
            }
        }

        uint16_t get_field_length() const { return _field_length; }
    };
    std::vector<FieldLengthEntry> _field_length_vector;
    static constexpr uint32_t element_id_bias = 16;
    // bit vectors for element >= element_id_bias
    std::unordered_map<uint32_t, std::vector<bool>> _scanned_elements_map;

public:
    FieldLengthScanner(uint32_t doc_id_limit);
    ~FieldLengthScanner();
    void scan_features(const index::DocIdAndFeatures &features);
    uint16_t get_field_length(uint32_t doc_id) const { return _field_length_vector[doc_id].get_field_length(); }
};

}