aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/predicate/predicate_ref_cache.h
blob: c663cebb19b7b08aae0e4e65a7b147cace36cebe (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.


#pragma once

#include <set>
#include <cstdint>
#include <cassert>

namespace search::predicate {

/**
 * Holds the data used in a cache lookup operation.
 */
struct CurrentZeroRef {
    const uint32_t *buf;
    uint32_t size;
    void set(const uint32_t *b, uint32_t s) {
        buf = b;
        size = s;
    }
};

/**
 * Comparator (less) used in std::set. It holds a reference to a data
 * store, from which it looks up buffers from the "data_ref"-part of the
 * cached references.
 */
template <typename BufferStore, int SIZE_BITS>
class RefCacheComparator {
    enum { DATA_REF_BITS = 32 - SIZE_BITS,
           DATA_REF_MASK = (1 << DATA_REF_BITS) - 1,
           MAX_SIZE = (1 << SIZE_BITS) - 1,
           SIZE_SHIFT = DATA_REF_BITS };
    const BufferStore &_store;
    const CurrentZeroRef &_current_zero_ref;
public:
    RefCacheComparator(const BufferStore &store,
                       const CurrentZeroRef &zero_ref)
        : _store(store),
          _current_zero_ref(zero_ref){
    }

    void getSizeAndBuf(uint32_t ref, uint32_t &size,
                       const uint32_t *&buf) const {
        if (ref) {
            size = ref >> SIZE_SHIFT;
            buf = _store.getBuffer(ref & DATA_REF_MASK);
            if (size == MAX_SIZE) {
                size = *buf++;
            }
        } else {
            size = _current_zero_ref.size;
            buf = _current_zero_ref.buf;
        }
    }

    bool compareWithZeroRef(uint32_t lhs, uint32_t rhs) const {
        uint32_t lhs_size;
        const uint32_t *lhs_buf;
        getSizeAndBuf(lhs, lhs_size, lhs_buf);
        uint32_t rhs_size;
        const uint32_t *rhs_buf;
        getSizeAndBuf(rhs, rhs_size, rhs_buf);

        if (lhs_size != rhs_size) {
            return lhs_size < rhs_size;
        }
        for (uint32_t i = 0; i < lhs_size; ++i) {
            if (lhs_buf[i] != rhs_buf[i]) {
                return lhs_buf[i] < rhs_buf[i];
            }
        }
        return false;
    }

    bool operator() (uint32_t lhs, uint32_t rhs) const {
        if (!lhs || !rhs) {
            return compareWithZeroRef(lhs, rhs);
        }
        uint32_t lhs_size = lhs >> SIZE_SHIFT;
        uint32_t rhs_size = rhs >> SIZE_SHIFT;
        if (lhs_size != rhs_size) {
            return lhs_size < rhs_size;
        }
        if (lhs == rhs) {
            return false;
        }
        const uint32_t *lhs_buf = _store.getBuffer(lhs & DATA_REF_MASK);
        const uint32_t *rhs_buf = _store.getBuffer(rhs & DATA_REF_MASK);
        uint32_t size = lhs_size;
        if (lhs_size == MAX_SIZE) {
            size = lhs_buf[0] + 1;  // Compare sizes and data in loop
                                    // below. If actual size differs
                                    // then loop will exit in first
                                    // iteration.
        }
        for (uint32_t i = 0; i < size; ++i) {
            if (lhs_buf[i] != rhs_buf[i]) {
                return lhs_buf[i] < rhs_buf[i];
            }
        }
        return false;
    }
};

/**
 * Holds a set of refs and a reference to a datastore that is used to
 * lookup data based on the "data_ref"-part of the ref. Each ref also
 * uses the upper bits to hold the size of the data refered to. If the
 * size is too large to represent by the allocated bits, the max size
 * is used, and the actual size is stored in the first 32-bit value of
 * the data buffer.
 *
 * Note that this class is inherently single threaded, and thus needs
 * external synchronization if used from multiple threads. (Both
 * insert and find)
 */
template <typename BufferStore, int SIZE_BITS = 8>
class PredicateRefCache {
    using ComparatorType = RefCacheComparator<BufferStore, SIZE_BITS>;

    mutable CurrentZeroRef _current_zero_ref;
    std::set<uint32_t, ComparatorType> _ref_cache;

public:
    enum { DATA_REF_BITS = 32 - SIZE_BITS,
           DATA_REF_MASK = (1 << DATA_REF_BITS) - 1,
           MAX_SIZE = (1 << SIZE_BITS) - 1,
           SIZE_SHIFT = DATA_REF_BITS,
           SIZE_MASK = MAX_SIZE << SIZE_SHIFT};

    PredicateRefCache(const BufferStore &store)
        : _ref_cache(ComparatorType(store, _current_zero_ref)) {
    }

    /**
     * Inserts a ref into the cache. The ref refers to data already
     * inserted in the underlying data store.
     */
    uint32_t insert(uint32_t ref) {
        assert(ref);
        return *_ref_cache.insert(ref).first;
    }

    /**
     * Checks if a data sequence is already present in the
     * cache. Returns the datastore ref, or 0 if not present.
     */
    uint32_t find(const uint32_t *buf, uint32_t size) const {
        _current_zero_ref.set(buf, size);
        auto it = _ref_cache.find(0);
        if (it != _ref_cache.end()) {
            return *it;
        }
        return 0;
    }
};

}