summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2017-01-12 17:22:11 +0100
committerTor Brede Vekterli <vekterli@yahoo-inc.com>2017-01-12 17:22:11 +0100
commita2dff014ac0bf54860ef885711e3ae3a6ed8fdee (patch)
tree9e7f740064f1a5ba7f432c08040f88c100a28c83 /storage
parentd89de4d0f4ef794a36c5e0a0c0b1824f94f4ca5f (diff)
Track time spent transitioning between cluster states
Allows observing whether state transitions take an unexpected amount of time.
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/distributor/bucketdbupdatertest.cpp86
-rw-r--r--storage/src/vespa/storage/distributor/bucketdbupdater.cpp25
-rw-r--r--storage/src/vespa/storage/distributor/bucketdbupdater.h3
-rw-r--r--storage/src/vespa/storage/distributor/distributormetricsset.cpp17
-rw-r--r--storage/src/vespa/storage/distributor/distributormetricsset.h1
5 files changed, 120 insertions, 12 deletions
diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp
index d76132e24ab..e0c0a76f3e7 100644
--- a/storage/src/tests/distributor/bucketdbupdatertest.cpp
+++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp
@@ -69,6 +69,10 @@ class BucketDBUpdaterTest : public CppUnit::TestFixture,
CPPUNIT_TEST(changed_distributor_set_implies_ownership_transfer);
CPPUNIT_TEST(unchanged_distributor_set_implies_no_ownership_transfer);
CPPUNIT_TEST(changed_distribution_config_implies_ownership_transfer);
+ CPPUNIT_TEST(transition_time_tracked_for_single_state_change);
+ CPPUNIT_TEST(transition_time_reset_across_non_preempting_state_changes);
+ CPPUNIT_TEST(transition_time_tracked_for_distribution_config_change);
+ CPPUNIT_TEST(transition_time_tracked_across_preempted_transitions);
CPPUNIT_TEST_SUITE_END();
protected:
@@ -119,6 +123,10 @@ protected:
void changed_distributor_set_implies_ownership_transfer();
void unchanged_distributor_set_implies_no_ownership_transfer();
void changed_distribution_config_implies_ownership_transfer();
+ void transition_time_tracked_for_single_state_change();
+ void transition_time_reset_across_non_preempting_state_changes();
+ void transition_time_tracked_for_distribution_config_change();
+ void transition_time_tracked_across_preempted_transitions();
bool bucketExistsThatHasNode(int bucketCount, uint16_t node) const;
@@ -302,11 +310,10 @@ public:
sortSentMessagesByIndex(_sender, sizeBeforeState);
}
- void setAndEnableClusterState(const lib::ClusterState& state,
- uint32_t expectedMsgs,
- uint32_t nBuckets) {
- _sender.clear();
- setSystemState(state);
+ void completeBucketInfoGathering(const lib::ClusterState& state,
+ uint32_t expectedMsgs,
+ uint32_t nBuckets = 1)
+ {
CPPUNIT_ASSERT_EQUAL(size_t(expectedMsgs), _sender.commands.size());
for (uint32_t i = 0; i < _sender.commands.size(); i++) {
@@ -323,6 +330,29 @@ public:
}
}
+ void setAndEnableClusterState(const lib::ClusterState& state,
+ uint32_t expectedMsgs,
+ uint32_t nBuckets)
+ {
+ _sender.clear();
+ setSystemState(state);
+ completeBucketInfoGathering(state, expectedMsgs, nBuckets);
+ }
+
+ void completeStateTransitionInSeconds(const std::string& stateStr,
+ uint32_t seconds,
+ uint32_t expectedMsgs)
+ {
+ _sender.clear();
+ lib::ClusterState state(stateStr);
+ setSystemState(state);
+ getClock().addSecondsToTime(seconds);
+ completeBucketInfoGathering(state, expectedMsgs);
+ }
+
+ uint64_t lastTransitionTimeInMillis() {
+ return uint64_t(getDistributor().getMetrics().stateTransitionTime.getLast());
+ }
void setStorageNodes(uint32_t numStorageNodes) {
_sender.clear();
@@ -2338,5 +2368,51 @@ BucketDBUpdaterTest::changed_distribution_config_implies_ownership_transfer()
CPPUNIT_ASSERT(fixture->state->hasBucketOwnershipTransfer());
}
+void
+BucketDBUpdaterTest::transition_time_tracked_for_single_state_change()
+{
+ completeStateTransitionInSeconds("distributor:2 storage:2", 5, 2);
+
+ CPPUNIT_ASSERT_EQUAL(uint64_t(5000), lastTransitionTimeInMillis());
+}
+
+void
+BucketDBUpdaterTest::transition_time_reset_across_non_preempting_state_changes()
+{
+ completeStateTransitionInSeconds("distributor:2 storage:2", 5, 2);
+ completeStateTransitionInSeconds("distributor:2 storage:3", 3, 1);
+
+ CPPUNIT_ASSERT_EQUAL(uint64_t(3000), lastTransitionTimeInMillis());
+}
+
+void
+BucketDBUpdaterTest::transition_time_tracked_for_distribution_config_change()
+{
+ lib::ClusterState state("distributor:2 storage:2");
+ setAndEnableClusterState(state, 2, 1);
+
+ _sender.clear();
+ std::string distConfig(getDistConfig3Nodes1Group());
+ setDistribution(distConfig);
+ getClock().addSecondsToTime(4);
+ completeBucketInfoGathering(state, 2);
+ CPPUNIT_ASSERT_EQUAL(uint64_t(4000), lastTransitionTimeInMillis());
+}
+
+void
+BucketDBUpdaterTest::transition_time_tracked_across_preempted_transitions()
+{
+ _sender.clear();
+ lib::ClusterState state("distributor:2 storage:2");
+ setSystemState(state);
+ getClock().addSecondsToTime(5);
+ // Pre-empted with new state here, which will push out the old pending
+ // state and replace it with a new one. We should still count the time
+ // used processing the old state.
+ completeStateTransitionInSeconds("distributor:2 storage:3", 3, 3);
+
+ CPPUNIT_ASSERT_EQUAL(uint64_t(8000), lastTransitionTimeInMillis());
+}
+
} // distributor
} // storage
diff --git a/storage/src/vespa/storage/distributor/bucketdbupdater.cpp b/storage/src/vespa/storage/distributor/bucketdbupdater.cpp
index b88a22c8042..b3e2cf36955 100644
--- a/storage/src/vespa/storage/distributor/bucketdbupdater.cpp
+++ b/storage/src/vespa/storage/distributor/bucketdbupdater.cpp
@@ -28,7 +28,8 @@ BucketDBUpdater::BucketDBUpdater(Distributor& owner,
DistributorComponentRegister& compReg)
: framework::StatusReporter("bucketdb", "Bucket DB Updater"),
_bucketSpaceComponent(owner, bucketSpace, compReg, "Bucket DB Updater"),
- _sender(sender)
+ _sender(sender),
+ _transitionTimer(_bucketSpaceComponent.getClock())
{
}
@@ -160,9 +161,29 @@ BucketDBUpdater::removeSuperfluousBuckets(
}
void
+BucketDBUpdater::ensureTransitionTimerStarted()
+{
+ if (!_pendingClusterState) {
+ // Don't overwrite start time if we're already processing a state, as
+ // that will make transition times appear artificially low.
+ _transitionTimer = framework::MilliSecTimer(
+ _bucketSpaceComponent.getClock());
+ }
+}
+
+void
+BucketDBUpdater::completeTransitionTimer()
+{
+ _bucketSpaceComponent.getDistributor().getMetrics()
+ .stateTransitionTime.addValue(_transitionTimer.getElapsedTimeAsDouble());
+}
+
+void
BucketDBUpdater::storageDistributionChanged(
const lib::Distribution& distribution)
{
+ ensureTransitionTimerStarted();
+
removeSuperfluousBuckets(distribution,
_bucketSpaceComponent.getClusterState());
@@ -204,6 +225,7 @@ BucketDBUpdater::onSetSystemState(
if (state == oldState) {
return false;
}
+ ensureTransitionTimerStarted();
removeSuperfluousBuckets(
_bucketSpaceComponent.getDistribution(),
@@ -524,6 +546,7 @@ BucketDBUpdater::processCompletedPendingClusterState()
_pendingClusterState.reset();
_outdatedNodes.clear();
sendAllQueuedBucketRechecks();
+ completeTransitionTimer();
}
void
diff --git a/storage/src/vespa/storage/distributor/bucketdbupdater.h b/storage/src/vespa/storage/distributor/bucketdbupdater.h
index c87907fe0b2..8f050d87d95 100644
--- a/storage/src/vespa/storage/distributor/bucketdbupdater.h
+++ b/storage/src/vespa/storage/distributor/bucketdbupdater.h
@@ -170,6 +170,8 @@ private:
const BucketDatabase::Entry& e,
uint16_t node,
BucketListMerger::BucketList& existing) const;
+ void ensureTransitionTimerStarted();
+ void completeTransitionTimer();
/**
* Adds all buckets contained in the bucket database
* that are either contained
@@ -272,6 +274,7 @@ private:
DistributorMessageSender& _sender;
std::set<EnqueuedBucketRecheck> _enqueuedRechecks;
std::unordered_set<uint16_t> _outdatedNodes;
+ framework::MilliSecTimer _transitionTimer;
};
}
diff --git a/storage/src/vespa/storage/distributor/distributormetricsset.cpp b/storage/src/vespa/storage/distributor/distributormetricsset.cpp
index f842ed771c9..af22ec52c24 100644
--- a/storage/src/vespa/storage/distributor/distributormetricsset.cpp
+++ b/storage/src/vespa/storage/distributor/distributormetricsset.cpp
@@ -19,15 +19,20 @@ DistributorMetricSet::DistributorMetricSet(const metrics::LoadTypeSet& lt)
stats(lt, PersistenceOperationMetricSet("stats"), this),
multioperations(lt, PersistenceOperationMetricSet("multioperations"), this),
visits(lt, VisitorMetricSet(), this),
+ stateTransitionTime("state_transition_time", "",
+ "Time it takes to complete a cluster state transition. If a "
+ "state transition is preempted before completing, its elapsed "
+ "time is counted as part of the total time spent for the final, "
+ "completed state transition", this),
recoveryModeTime("recoverymodeschedulingtime", "",
- "Time spent scheduling operations in recovery mode "
- "after receiving new cluster state", this),
+ "Time spent scheduling operations in recovery mode "
+ "after receiving new cluster state", this),
docsStored("docsstored", "logdefault yamasdefault",
- "Number of documents stored in all buckets controlled by "
- "this distributor", this),
+ "Number of documents stored in all buckets controlled by "
+ "this distributor", this),
bytesStored("bytesstored", "logdefault yamasdefault",
- "Number of bytes stored in all buckets controlled by "
- "this distributor", this)
+ "Number of bytes stored in all buckets controlled by "
+ "this distributor", this)
{
docsStored.logOnlyIfSet();
bytesStored.logOnlyIfSet();
diff --git a/storage/src/vespa/storage/distributor/distributormetricsset.h b/storage/src/vespa/storage/distributor/distributormetricsset.h
index 705e7305e0d..753d9ae0056 100644
--- a/storage/src/vespa/storage/distributor/distributormetricsset.h
+++ b/storage/src/vespa/storage/distributor/distributormetricsset.h
@@ -21,6 +21,7 @@ public:
metrics::LoadMetric<PersistenceOperationMetricSet> stats;
metrics::LoadMetric<PersistenceOperationMetricSet> multioperations;
metrics::LoadMetric<VisitorMetricSet> visits;
+ metrics::DoubleAverageMetric stateTransitionTime;
metrics::DoubleAverageMetric recoveryModeTime;
metrics::LongValueMetric docsStored;
metrics::LongValueMetric bytesStored;