aboutsummaryrefslogtreecommitdiffstats
path: root/document/src/vespa/document/fieldvalue/serializablearray.h
blob: 6e4734c01100fe8887e8bf8c751cc2fd5df1bbac (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
/**
 * \class document::SerializableArray
 * \brief key/value array that can be serialized and deserialized efficiently.
 *
 * The SerializableArray class is optimized for doing multiple
 * serialize()/deserialize() without changing attributes. Once
 * an attribute is changed, serialization is much slower. This makes
 * sense, since a document travels between a lot of processes and
 * queues, where nothing happens except serialization and deserialization.
 *
 * It also supports multiple deserializations, where serializations
 * from multiple other arrays are merged into one array.
 * Attributes that overlap Get the last known value.
 */

#pragma once

#include <vespa/vespalib/util/buffer.h>
#include <vespa/document/util/bytebuffer.h>
#include <vector>

namespace document {

namespace serializablearray {
    class BufferMap;
}

class SerializableArray
{
public:
    /**
     * Contains the id of a field, the size and a buffer reference that is either
     * a relative offset to a common buffer, or the buffer itself it it is not.
     * The most significant bit of the _sz member indicates which of the 2 it is.
     */
    class Entry {
    public:
        Entry() : _id(0), _sz(0), _data() {}
        Entry(int i) : _id(i), _sz(0), _data()  {}
        Entry(uint32_t i, uint32_t sz, uint32_t off) : _id(i), _sz(sz), _data(off) {}
        Entry(uint32_t i, uint32_t sz, const char * buf) : _id(i), _sz(sz | BUFFER_MASK), _data(buf) {}

        int32_t id() const { return _id; }
        uint32_t size() const { return _sz & ~BUFFER_MASK; }
        bool hasBuffer() const { return (_sz & BUFFER_MASK); }
        bool operator < (const Entry & e) const { return cmp(e) < 0; }
        int cmp(const Entry & e) const { return _id - e._id; }
        void setBuffer(const char * buffer) { _data._buffer = buffer; _sz |= BUFFER_MASK; }
        VESPA_DLL_LOCAL const char * getBuffer(const ByteBuffer * readOnlyBuffer) const;
    private:
        uint32_t getOffset() const { return _data._offset; }
        enum { BUFFER_MASK=0x80000000 };
        int32_t      _id;
        uint32_t     _sz;
        union Data {
           Data() : _buffer(0) { }
           Data(const char * buffer) : _buffer(buffer) { }
           Data(uint32_t offset) : _offset(offset) { }
           const char * _buffer;
           uint32_t     _offset;
        } _data;
    };
    class EntryMap : public std::vector<Entry>
    {
    private:
        using V=std::vector<Entry>;
    public:
        EntryMap() : V() { }
    };

    static const uint32_t ReservedId = 100;
    static const uint32_t ReservedIdUpper = 128;

    using UP = std::unique_ptr<SerializableArray>;

    SerializableArray();
    SerializableArray(const SerializableArray&);
    SerializableArray& operator=(const SerializableArray&);
    SerializableArray(SerializableArray &&) noexcept;
    SerializableArray& operator=(SerializableArray &&) noexcept;
    ~SerializableArray();

    void set(EntryMap entries, ByteBuffer buffer);
    /**
     * Stores a value in the array.
     *
     * @param id The ID to associate the value with.
     * @param value The value to store.
     * @param len The length of the buffer.
     */
    void set(int id, const char* value, int len);

    /** Stores a value in the array. */
    void set(int id, ByteBuffer buffer);

    /**
     * Gets a value from the array. This is the faster version of the above.
     * It will just give you the pointers needed. No refcounting or anything.
     *
     * @param id The ID of the value to Get.
     *
     * @return Returns a reference to a buffer. c_str and size will be zero if
     * none is found.
     */
    vespalib::ConstBufferRef get(int id) const;

    /** @return Returns true if the given ID is Set in the array. */
    bool has(int id) const;

    /**
     * clears an attribute.
     *
     * @param id The ID of the attribute to remove from the array.
     */
    void clear(int id);

    /** Deletes all stored attributes. */
    void clear();

    bool empty() const { return _entries.empty(); }

    const ByteBuffer* getSerializedBuffer() const {
        return &_uncompSerData;
    }

    const EntryMap & getEntries() const { return _entries; }
private:
    /** Contains the stored attributes, with reference to the real data.. */
    EntryMap                  _entries;
    /** Data we deserialized from, if applicable. */
    ByteBuffer                _uncompSerData;
    std::unique_ptr<serializablearray::BufferMap> _owned;

    VESPA_DLL_LOCAL EntryMap::const_iterator find(int id) const;
    VESPA_DLL_LOCAL EntryMap::iterator find(int id);
};

} // document