aboutsummaryrefslogtreecommitdiffstats
path: root/storage/src/vespa/storage/distributor/idealstatemetricsset.cpp
blob: ad480b0cec2ecbc372562adc09c39b411973570c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "idealstatemetricsset.h"
namespace storage {

namespace distributor {

OperationMetricSet::OperationMetricSet(const std::string& name, metrics::Metric::Tags tags, const std::string& description, MetricSet* owner)
    : MetricSet(name, std::move(tags), description, owner),
      pending("pending",
              {{"logdefault"},{"yamasdefault"}},
              "The number of operations pending", this),
      ok("done_ok",
         {{"logdefault"},{"yamasdefault"}},
         "The number of operations successfully performed", this),
      failed("done_failed",
             {{"logdefault"},{"yamasdefault"}},
             "The number of operations that failed", this),
      blocked("blocked",
              {{"logdefault"},{"yamasdefault"}},
              "The number of operations blocked by blocking operation starter", this),
      throttled("throttled",
                {{"logdefault"},{"yamasdefault"}},
                "The number of operations throttled by throttling operation starter", this)
{}

OperationMetricSet::~OperationMetricSet() = default;

GcMetricSet::GcMetricSet(const std::string& name, metrics::Metric::Tags tags, const std::string& description, MetricSet* owner)
        : OperationMetricSet(name, std::move(tags), description, owner),
          documents_removed("documents_removed",
                           {{"logdefault"},{"yamasdefault"}},
                           "Number of documents removed by GC operations", this)
{}

GcMetricSet::~GcMetricSet() = default;

MergeBucketMetricSet::MergeBucketMetricSet(const std::string& name, metrics::Metric::Tags tags, const std::string& description, MetricSet* owner)
    : OperationMetricSet(name, std::move(tags), description, owner),
      source_only_copy_changed("source_only_copy_changed",
                               {{"logdefault"},{"yamasdefault"}},
                               "The number of merge operations where source-only copy changed", this),
      source_only_copy_delete_blocked("source_only_copy_delete_blocked",
                                      {{"logdefault"},{"yamasdefault"}},
                                      "The number of merge operations where delete of unchanged source-only copies was blocked", this),
      source_only_copy_delete_failed("source_only_copy_delete_failed",
                                      {{"logdefault"},{"yamasdefault"}},
                                      "The number of merge operations where delete of unchanged source-only copies failed", this)
{
}

MergeBucketMetricSet::~MergeBucketMetricSet() = default;

void
IdealStateMetricSet::createOperationMetrics() {
    using ISO = IdealStateOperation;
    operations.resize(ISO::OPERATION_COUNT);
    // Note: naked new is used instead of make_shared due to the latter not being
    // able to properly transitively deduce the types for the tag initializer lists.
    operations[ISO::DELETE_BUCKET] = std::shared_ptr<OperationMetricSet>(
            new OperationMetricSet("delete_bucket",
                                   {{"logdefault"},{"yamasdefault"}},
                                   "Operations to delete excess buckets on storage nodes", this));
    operations[ISO::MERGE_BUCKET] = std::make_shared<MergeBucketMetricSet>
                                    ("merge_bucket",
                                     metrics::Metric::Tags{{"logdefault"},{"yamasdefault"}},
                                     "Operations to merge buckets that are out of sync", this);
    operations[ISO::SPLIT_BUCKET] = std::shared_ptr<OperationMetricSet>(
            new OperationMetricSet("split_bucket",
                                   {{"logdefault"},{"yamasdefault"}},
                                   "Operations to split buckets that are larger than the configured size", this));
    operations[ISO::JOIN_BUCKET] = std::shared_ptr<OperationMetricSet>(
            new OperationMetricSet("join_bucket",
                                   {{"logdefault"},{"yamasdefault"}},
                                   "Operations to join buckets that in sum are smaller than the configured size", this));
    operations[ISO::SET_BUCKET_STATE] = std::shared_ptr<OperationMetricSet>(
            new OperationMetricSet("set_bucket_state",
                                   {{"logdefault"},{"yamasdefault"}},
                                   "Operations to set active/ready state for bucket copies", this));
    operations[ISO::GARBAGE_COLLECTION] = std::shared_ptr<OperationMetricSet>(
            new GcMetricSet("garbage_collection",
                            {{"logdefault"},{"yamasdefault"}},
                            "Operations to garbage collect data from buckets", this));
}

IdealStateMetricSet::IdealStateMetricSet()
    : MetricSet("idealstate", {{"idealstate"}}, "Statistics for ideal state generation"),
      idealstate_diff("idealstate_diff",
            {{"logdefault"},{"yamasdefault"}},
            "A number representing the current difference from the ideal "
            "state. This is a number that decreases steadily as the system "
            "is getting closer to the ideal state", this),
      buckets_toofewcopies("buckets_toofewcopies",
            {{"logdefault"},{"yamasdefault"}},
            "The number of buckets the distributor controls that have less "
            "than the desired redundancy", this),
      buckets_toomanycopies("buckets_toomanycopies",
            {{"logdefault"},{"yamasdefault"}},
            "The number of buckets the distributor controls that have more "
            "than the desired redundancy", this),
      buckets("buckets",
            {{"logdefault"},{"yamasdefault"}},
            "The number of buckets the distributor controls", this),
      buckets_notrusted("buckets_notrusted",
            {{"logdefault"},{"yamasdefault"}},
            "The number of buckets that have no trusted copies.", this),
      buckets_rechecking("buckets_rechecking",
            {{"logdefault"},{"yamasdefault"}},
            "The number of buckets that we are rechecking for "
            "ideal state operations", this),
      buckets_replicas_moving_out("bucket_replicas_moving_out",
            {{"logdefault"},{"yamasdefault"}},
            "Bucket replicas that should be moved out, e.g. retirement case or node "
            "added to cluster that has higher ideal state priority.", this),
      buckets_replicas_copying_in("bucket_replicas_copying_in",
            {{"logdefault"},{"yamasdefault"}},
            "Bucket replicas that should be copied in, e.g. node does not have a "
            "replica for a bucket that it is in ideal state for", this),
      buckets_replicas_copying_out("bucket_replicas_copying_out",
            {{"logdefault"},{"yamasdefault"}},
            "Bucket replicas that should be copied out, e.g. node is in ideal state "
            "but might have to provide data other nodes in a merge", this),
      buckets_replicas_syncing("bucket_replicas_syncing",
            {{"logdefault"},{"yamasdefault"}},
            "Bucket replicas that need syncing due to mismatching metadata", this),
      max_observed_time_since_last_gc_sec("max_observed_time_since_last_gc_sec",
            {{"logdefault"},{"yamasdefault"}},
            "Maximum time (in seconds) since GC was last successfully run for a bucket. "
            "Aggregated max value across all buckets on the distributor.", this),
      nodesPerMerge("nodes_per_merge", {}, "The number of nodes involved in a single merge operation.", this)
{
    createOperationMetrics();
}

IdealStateMetricSet::~IdealStateMetricSet() = default;

void IdealStateMetricSet::setPendingOperations(std::span<uint64_t, IdealStateOperation::OPERATION_COUNT> newMetrics) {
    for (uint32_t i = 0; i < IdealStateOperation::OPERATION_COUNT; i++) {
        operations[i]->pending.set(newMetrics[i]);
    }

    idealstate_diff.set(
        operations[IdealStateOperation::DELETE_BUCKET]->pending.getLast() +
        operations[IdealStateOperation::MERGE_BUCKET]->pending.getLast() * 10 +
        operations[IdealStateOperation::SPLIT_BUCKET]->pending.getLast() * 4 +
        operations[IdealStateOperation::JOIN_BUCKET]->pending.getLast() * 2 +
        operations[IdealStateOperation::SET_BUCKET_STATE]->pending.getLast());
}

}

}