aboutsummaryrefslogtreecommitdiffstats
path: root/storage/src/vespa/storageapi/message/bucketsplitting.h
blob: a6399f3d8f9209d018c36d30279a6814b7c258a2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include <vespa/storageapi/buckets/bucketinfo.h>
#include <vespa/storageapi/messageapi/bucketcommand.h>
#include <vespa/storageapi/messageapi/bucketinforeply.h>
#include <vespa/storageapi/messageapi/maintenancecommand.h>

namespace storage::api {

/**
 * @class SplitBucketCommand
 * @ingroup message
 *
 * @brief Split a bucket
 *
 * Splits a bucket into two parts using the next split bit that is unused.
 *
 * Distributors can issue splits for multiple reasons:
 *   - Inconsistent buckets, so we need to split buckets containing others until
 *     they are either split equally, or no longer contains others.
 *   - Buckets that are too large are split to reduce file size.
 *   - Buckets with too many entries are split to reduce amount of metadata.
 *
 * In the first case, min and max split bits can be set. This will make storage
 * able to split several bits at a time, but know where to stop.
 *
 * In the second case, min byte size can be set, to ensure that we don't split
 * bucket more one step if the copy at the time of processing is
 * actually smaller. Since removes can happen in the meantime, the min byte size
 * should be smaller than the limit we use for splitting. Suggesting half.
 *
 * Similarily we can do as the second case in the third case too, just using
 * min doc count as limiter instead.
 *
 * If neither are specified, min/max split bits limits nothing, but the sizes
 * are set to max, which ensures that only one split step is taken.
 */
class SplitBucketCommand : public MaintenanceCommand {
private:
    uint8_t _minSplitBits;
    uint8_t _maxSplitBits;
    uint32_t _minByteSize;
    uint32_t _minDocCount;

public:
    SplitBucketCommand(const document::Bucket& bucket);

    uint8_t getMinSplitBits() const { return _minSplitBits; }
    uint8_t getMaxSplitBits() const { return _maxSplitBits; }
    uint32_t getMinByteSize() const { return _minByteSize; }
    uint32_t getMinDocCount() const { return _minDocCount; }

    void setMinSplitBits(uint8_t v) { _minSplitBits = v; }
    void setMaxSplitBits(uint8_t v) { _maxSplitBits = v; }
    void setMinByteSize(uint32_t v) { _minByteSize = v; }
    void setMinDocCount(uint32_t v) { _minDocCount = v; }

    void print(std::ostream& out, bool verbose, const std::string& indent) const override;

    DECLARE_STORAGECOMMAND(SplitBucketCommand, onSplitBucket)
};

/**
 * @class SplitBucketReply
 * @ingroup message
 *
 * @brief Reply of a split bucket command.
 */
class SplitBucketReply : public BucketReply {
public:
    using Entry = std::pair<document::BucketId, BucketInfo>;
    explicit SplitBucketReply(const SplitBucketCommand& cmd);
    std::vector<Entry>& getSplitInfo() { return _result; }
    const std::vector<Entry>& getSplitInfo() const { return _result; }

    void print(std::ostream& out, bool verbose, const std::string& indent) const override;
    DECLARE_STORAGEREPLY(SplitBucketReply, onSplitBucketReply)
private:
    std::vector<Entry> _result;
};

/**
 * @class JoinBucketCommand
 * @ingroup message
 *
 * @brief Join two buckets
 *
 * Joins two buckets on the same node into a bucket with one fewer split bit.
 */
class JoinBucketsCommand : public MaintenanceCommand {
    std::vector<document::BucketId> _sources;
    uint8_t _minJoinBits;
public:
    explicit JoinBucketsCommand(const document::Bucket &target);
    std::vector<document::BucketId>& getSourceBuckets() { return _sources; }
    const std::vector<document::BucketId>& getSourceBuckets() const { return _sources; }
    void setMinJoinBits(uint8_t minJoinBits) { _minJoinBits = minJoinBits; }
    uint8_t getMinJoinBits() const { return _minJoinBits; }
    void print(std::ostream& out, bool verbose, const std::string& indent) const override;
    DECLARE_STORAGECOMMAND(JoinBucketsCommand, onJoinBuckets)
};

/**
 * @class JoinBucketsReply
 * @ingroup message
 *
 * @brief Reply of a join bucket command.
 */
class JoinBucketsReply : public BucketInfoReply {
    std::vector<document::BucketId> _sources;
public:
    explicit JoinBucketsReply(const JoinBucketsCommand& cmd);
    JoinBucketsReply(const JoinBucketsCommand& cmd, const BucketInfo& bucketInfo);
    const std::vector<document::BucketId>& getSourceBuckets() const { return _sources; }
    void print(std::ostream& out, bool verbose, const std::string& indent) const override;
    DECLARE_STORAGEREPLY(JoinBucketsReply, onJoinBucketsReply)
};

}