From a6a09ca19473b3629e82b7c426c63059ffb5f66b Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Thu, 1 Dec 2016 13:54:54 +0100 Subject: Use high resolution latency timing for persistence op metrics --- .../src/vespa/storage/distributor/persistencemessagetracker.cpp | 8 ++++---- storage/src/vespa/storage/distributor/persistencemessagetracker.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'storage') diff --git a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp index 34193bbb3c1..e09948928ef 100644 --- a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp +++ b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp @@ -21,11 +21,11 @@ PersistenceMessageTrackerImpl::PersistenceMessageTrackerImpl( _metric(metric), _reply(reply), _manager(link), - _success(true), _revertTimestamp(revertTimestamp), - _priority(reply->getPriority()) + _requestTimer(link.getClock()), + _priority(reply->getPriority()), + _success(true) { - _creationTime.SetNow(); } void @@ -62,7 +62,7 @@ PersistenceMessageTrackerImpl::updateMetrics() } else { ++_metric.failures.storagefailure; } - _metric.latency.addValue(_creationTime.MilliSecsToNow()); + _metric.latency.addValue(_requestTimer.getElapsedTimeAsDouble()); } void diff --git a/storage/src/vespa/storage/distributor/persistencemessagetracker.h b/storage/src/vespa/storage/distributor/persistencemessagetracker.h index b5c3e57e05e..06c8bd1135c 100644 --- a/storage/src/vespa/storage/distributor/persistencemessagetracker.h +++ b/storage/src/vespa/storage/distributor/persistencemessagetracker.h @@ -78,12 +78,12 @@ private: PersistenceOperationMetricSet& _metric; std::shared_ptr _reply; DistributorComponent& _manager; - FastOS_Time _creationTime; - bool _success; api::Timestamp _revertTimestamp; std::vector > _revertNodes; mbus::TraceNode _trace; + framework::MilliSecTimer _requestTimer; uint8_t _priority; + bool _success; bool canSendReplyEarly() const; void addBucketInfoFromReply(uint16_t node, -- cgit v1.2.3 From d2097443dd5e5cac9a2693de2f0d3d4ba77a79b7 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Fri, 2 Dec 2016 15:14:39 +0100 Subject: Factor out return code-based count metric updates --- storage/src/tests/distributor/CMakeLists.txt | 1 + .../distributor/distributor_metrics_set_test.cpp | 85 ++++++++++++++++++++++ .../storage/distributor/distributormetricsset.cpp | 19 +++++ .../storage/distributor/distributormetricsset.h | 12 +++ .../distributor/persistencemessagetracker.cpp | 12 +-- 5 files changed, 118 insertions(+), 11 deletions(-) create mode 100644 storage/src/tests/distributor/distributor_metrics_set_test.cpp (limited to 'storage') diff --git a/storage/src/tests/distributor/CMakeLists.txt b/storage/src/tests/distributor/CMakeLists.txt index 600a8231a8e..8be15645ea8 100644 --- a/storage/src/tests/distributor/CMakeLists.txt +++ b/storage/src/tests/distributor/CMakeLists.txt @@ -39,6 +39,7 @@ vespa_add_library(storage_testdistributor TEST nodemaintenancestatstrackertest.cpp distributor_host_info_reporter_test.cpp ownership_transfer_safe_time_point_calculator_test.cpp + distributor_metrics_set_test.cpp DEPENDS storage_distributor storage_testcommon diff --git a/storage/src/tests/distributor/distributor_metrics_set_test.cpp b/storage/src/tests/distributor/distributor_metrics_set_test.cpp new file mode 100644 index 00000000000..abee97dce68 --- /dev/null +++ b/storage/src/tests/distributor/distributor_metrics_set_test.cpp @@ -0,0 +1,85 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include + +namespace storage { +namespace distributor { + +// TODO name this PersistenceMetricsSetTest instead? +struct DistributorMetricsSetTest : CppUnit::TestFixture { + void successful_return_codes_are_counted_as_ok(); + void wrong_distribution_failure_is_counted(); + void timeout_failure_is_counted(); + // Note for these tests: busy, connection failures et al are sets of + // failure codes and not just a single code. We only test certain members + // of these sets here. See api::ReturnCode implementation for an exhaustive + // list. + void busy_failure_is_counted(); + void connection_failure_is_counted(); + void non_special_cased_failure_codes_are_catchall_counted(); + + CPPUNIT_TEST_SUITE(DistributorMetricsSetTest); + CPPUNIT_TEST(successful_return_codes_are_counted_as_ok); + CPPUNIT_TEST(wrong_distribution_failure_is_counted); + CPPUNIT_TEST(timeout_failure_is_counted); + CPPUNIT_TEST(busy_failure_is_counted); + CPPUNIT_TEST(connection_failure_is_counted); + CPPUNIT_TEST(non_special_cased_failure_codes_are_catchall_counted); + CPPUNIT_TEST_SUITE_END(); + + void assert_failure_is_counted(PersistenceOperationMetricSet& metrics, + api::ReturnCode::Result failure_code, + const metrics::LongCountMetric& checked) + { + metrics.updateFromResult(api::ReturnCode(failure_code)); + CPPUNIT_ASSERT_EQUAL(int64_t(1), checked.getLongValue("count")); + CPPUNIT_ASSERT_EQUAL(int64_t(0), metrics.ok.getLongValue("count")); + } +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(DistributorMetricsSetTest); + +void DistributorMetricsSetTest::successful_return_codes_are_counted_as_ok() { + PersistenceOperationMetricSet metrics("foo"); + metrics.updateFromResult(api::ReturnCode()); + CPPUNIT_ASSERT_EQUAL(int64_t(1), metrics.ok.getLongValue("count")); +} + +void DistributorMetricsSetTest::wrong_distribution_failure_is_counted() { + PersistenceOperationMetricSet metrics("foo"); + assert_failure_is_counted(metrics, api::ReturnCode::WRONG_DISTRIBUTION, + metrics.failures.wrongdistributor); +} + +void DistributorMetricsSetTest::timeout_failure_is_counted() { + PersistenceOperationMetricSet metrics("foo"); + assert_failure_is_counted(metrics, api::ReturnCode::TIMEOUT, + metrics.failures.timeout); +} + +void DistributorMetricsSetTest::busy_failure_is_counted() { + PersistenceOperationMetricSet metrics("foo"); + assert_failure_is_counted(metrics, api::ReturnCode::BUSY, + metrics.failures.busy); +} + +void DistributorMetricsSetTest::connection_failure_is_counted() { + PersistenceOperationMetricSet metrics("foo"); + // This is dirty enum value coercion, but this is how "parent protocol" + // error codes are handled already. + api::ReturnCode::Result error_code(static_cast( + mbus::ErrorCode::CONNECTION_ERROR)); + assert_failure_is_counted(metrics, error_code, + metrics.failures.notconnected); +} + +void DistributorMetricsSetTest::non_special_cased_failure_codes_are_catchall_counted() { + PersistenceOperationMetricSet metrics("foo"); + assert_failure_is_counted(metrics, api::ReturnCode::REJECTED, + metrics.failures.storagefailure); +} + +} +} diff --git a/storage/src/vespa/storage/distributor/distributormetricsset.cpp b/storage/src/vespa/storage/distributor/distributormetricsset.cpp index 0760ab6223d..608d4d93da2 100644 --- a/storage/src/vespa/storage/distributor/distributormetricsset.cpp +++ b/storage/src/vespa/storage/distributor/distributormetricsset.cpp @@ -1,5 +1,6 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "distributormetricsset.h" +#include #include #include @@ -66,6 +67,24 @@ PersistenceOperationMetricSet::clone(std::vector& ownerList, CopyTyp ->assignValues(*this); } +void +PersistenceOperationMetricSet::updateFromResult(const api::ReturnCode& result) +{ + if (result.success()) { + ++ok; + } else if (result.getResult() == api::ReturnCode::WRONG_DISTRIBUTION) { + ++failures.wrongdistributor; + } else if (result.getResult() == api::ReturnCode::TIMEOUT) { + ++failures.timeout; + } else if (result.isBusy()) { + ++failures.busy; + } else if (result.isNodeDownOrNetwork()) { + ++failures.notconnected; + } else { + ++failures.storagefailure; + } +} + DistributorMetricSet::DistributorMetricSet(const metrics::LoadTypeSet& lt) : MetricSet("distributor", "distributor", ""), puts(lt, PersistenceOperationMetricSet("puts"), this), diff --git a/storage/src/vespa/storage/distributor/distributormetricsset.h b/storage/src/vespa/storage/distributor/distributormetricsset.h index b8458d119e7..3ae975913de 100644 --- a/storage/src/vespa/storage/distributor/distributormetricsset.h +++ b/storage/src/vespa/storage/distributor/distributormetricsset.h @@ -7,6 +7,10 @@ namespace storage { +namespace api { +class ReturnCode; +} + class PersistenceFailuresMetricSet : public metrics::MetricSet { public: @@ -39,6 +43,14 @@ public: MetricSet * clone(std::vector& ownerList, CopyType copyType, metrics::MetricSet* owner, bool includeUnused) const override; + + /** + * Increments appropriate success/failure count metrics based on the return + * code provided in `result`. + * + * Does _not_ update latency metric. + */ + void updateFromResult(const api::ReturnCode& result); }; class DistributorMetricSet : public metrics::MetricSet diff --git a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp index e09948928ef..4ea765a2d4a 100644 --- a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp +++ b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp @@ -51,17 +51,7 @@ void PersistenceMessageTrackerImpl::updateMetrics() { const api::ReturnCode& result(_reply->getResult()); - if (result.success()) { - ++_metric.ok; - } else if (result.getResult() == api::ReturnCode::TIMEOUT) { - ++_metric.failures.timeout; - } else if (result.isBusy()) { - ++_metric.failures.busy; - } else if (result.isNodeDownOrNetwork()) { - ++_metric.failures.notconnected; - } else { - ++_metric.failures.storagefailure; - } + _metric.updateFromResult(result); _metric.latency.addValue(_requestTimer.getElapsedTimeAsDouble()); } -- cgit v1.2.3 From 06b6276640a4d9ed2c4f548814a4ba73ab5805fd Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Mon, 5 Dec 2016 14:00:48 +0100 Subject: Wire in persistence metrics for visitor metrics Add failure metric for failure codes indicating bucket inconsistencies --- .../distributor/distributor_metrics_set_test.cpp | 8 ++ .../src/tests/distributor/visitoroperationtest.cpp | 15 ++++ .../src/vespa/storage/distributor/CMakeLists.txt | 1 + .../storage/distributor/distributormetricsset.cpp | 80 +----------------- .../storage/distributor/distributormetricsset.h | 47 +---------- .../operations/external/visitoroperation.cpp | 1 + .../persistence_operation_metric_set.cpp | 98 ++++++++++++++++++++++ .../distributor/persistence_operation_metric_set.h | 57 +++++++++++++ .../storage/distributor/visitormetricsset.cpp | 7 +- .../vespa/storage/distributor/visitormetricsset.h | 6 +- 10 files changed, 187 insertions(+), 133 deletions(-) create mode 100644 storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp create mode 100644 storage/src/vespa/storage/distributor/persistence_operation_metric_set.h (limited to 'storage') diff --git a/storage/src/tests/distributor/distributor_metrics_set_test.cpp b/storage/src/tests/distributor/distributor_metrics_set_test.cpp index abee97dce68..6311e789dac 100644 --- a/storage/src/tests/distributor/distributor_metrics_set_test.cpp +++ b/storage/src/tests/distributor/distributor_metrics_set_test.cpp @@ -18,6 +18,7 @@ struct DistributorMetricsSetTest : CppUnit::TestFixture { // list. void busy_failure_is_counted(); void connection_failure_is_counted(); + void inconsistent_bucket_is_counted(); void non_special_cased_failure_codes_are_catchall_counted(); CPPUNIT_TEST_SUITE(DistributorMetricsSetTest); @@ -26,6 +27,7 @@ struct DistributorMetricsSetTest : CppUnit::TestFixture { CPPUNIT_TEST(timeout_failure_is_counted); CPPUNIT_TEST(busy_failure_is_counted); CPPUNIT_TEST(connection_failure_is_counted); + CPPUNIT_TEST(inconsistent_bucket_is_counted); CPPUNIT_TEST(non_special_cased_failure_codes_are_catchall_counted); CPPUNIT_TEST_SUITE_END(); @@ -75,6 +77,12 @@ void DistributorMetricsSetTest::connection_failure_is_counted() { metrics.failures.notconnected); } +void DistributorMetricsSetTest::inconsistent_bucket_is_counted() { + PersistenceOperationMetricSet metrics("foo"); + assert_failure_is_counted(metrics, api::ReturnCode::BUCKET_NOT_FOUND, + metrics.failures.inconsistent_bucket); +} + void DistributorMetricsSetTest::non_special_cased_failure_codes_are_catchall_counted() { PersistenceOperationMetricSet metrics("foo"); assert_failure_is_counted(metrics, api::ReturnCode::REJECTED, diff --git a/storage/src/tests/distributor/visitoroperationtest.cpp b/storage/src/tests/distributor/visitoroperationtest.cpp index 9b674029959..8891dab547c 100644 --- a/storage/src/tests/distributor/visitoroperationtest.cpp +++ b/storage/src/tests/distributor/visitoroperationtest.cpp @@ -188,6 +188,10 @@ private: return ost.str(); } + VisitorMetricSet& defaultVisitorMetrics() { + return getDistributor().getMetrics().visits[documentapi::LoadType::DEFAULT]; + } + std::unique_ptr createOpWithConfig( api::CreateVisitorCommand::SP msg, const VisitorOperation::Config& config) @@ -307,6 +311,8 @@ VisitorOperationTest::doStandardVisitTest(const std::string& clusterState) "last=BucketId(0x000000007fffffff)) " "ReturnCode(NONE)"), _sender.getLastReply()); + CPPUNIT_ASSERT_EQUAL(int64_t(1), defaultVisitorMetrics(). + ok.getLongValue("count")); } void @@ -603,6 +609,8 @@ VisitorOperationTest::testBucketRemovedWhileVisitorPending() std::string("CreateVisitorReply(last=BucketId(0x0000000000000000)) " "ReturnCode(BUCKET_NOT_FOUND)"), _sender.getLastReply()); + CPPUNIT_ASSERT_EQUAL(int64_t(1), defaultVisitorMetrics().failures. + inconsistent_bucket.getLongValue("count")); } void @@ -725,6 +733,8 @@ VisitorOperationTest::testTimeoutDoesNotOverrideCriticalError() "CreateVisitorReply(last=BucketId(0x0000000000000000)) " "ReturnCode(INTERNAL_FAILURE, [from content node 0] )"s, _sender.getLastReply()); + CPPUNIT_ASSERT_EQUAL(int64_t(1), defaultVisitorMetrics().failures. + storagefailure.getLongValue("count")); } void @@ -737,6 +747,8 @@ VisitorOperationTest::testWrongDistribution() std::string("CreateVisitorReply(last=BucketId(0x0000000000000000)) " "ReturnCode(WRONG_DISTRIBUTION, distributor:100 storage:2)"), runEmptyVisitor(createVisitorCommand("wrongdist", id, nullId))); + CPPUNIT_ASSERT_EQUAL(int64_t(1), defaultVisitorMetrics().failures. + wrongdistributor.getLongValue("count")); } void @@ -1181,6 +1193,9 @@ VisitorOperationTest::testFailureOnAllNodes() std::string("CreateVisitorReply(last=BucketId(0x0000000000000000)) " "ReturnCode(BUCKET_NOT_FOUND)"), _sender.getLastReply()); + // TODO it'd be much more accurate to increase the "notconnected" metric + // here, but our metrics are currently based on the reply sent back to the + // client, not the ones sent from the content nodes to the distributor. } diff --git a/storage/src/vespa/storage/distributor/CMakeLists.txt b/storage/src/vespa/storage/distributor/CMakeLists.txt index 4f6dd7da153..3b8843d39ea 100644 --- a/storage/src/vespa/storage/distributor/CMakeLists.txt +++ b/storage/src/vespa/storage/distributor/CMakeLists.txt @@ -4,6 +4,7 @@ vespa_add_library(storage_distributor distributor.cpp distributormetricsset.cpp visitormetricsset.cpp + persistence_operation_metric_set.cpp idealstatemetricsset.cpp operationowner.cpp distributorcomponent.cpp diff --git a/storage/src/vespa/storage/distributor/distributormetricsset.cpp b/storage/src/vespa/storage/distributor/distributormetricsset.cpp index 608d4d93da2..f842ed771c9 100644 --- a/storage/src/vespa/storage/distributor/distributormetricsset.cpp +++ b/storage/src/vespa/storage/distributor/distributormetricsset.cpp @@ -1,6 +1,5 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "distributormetricsset.h" -#include #include #include @@ -8,83 +7,6 @@ namespace storage { using metrics::MetricSet; -PersistenceFailuresMetricSet::PersistenceFailuresMetricSet(MetricSet* owner) - : MetricSet("failures", "", "Detailed failure statistics", owner), - sum("total", "logdefault yamasdefault", "Sum of all failures", this), - notready("notready", "", "The number of operations discarded because distributor was not ready", this), - notconnected("notconnected", "", "The number of operations discarded because there were no available storage nodes to send to", this), - wrongdistributor("wrongdistributor", "", "The number of operations discarded because they were sent to the wrong distributor", this), - safe_time_not_reached("safe_time_not_reached", "", - "The number of operations that were transiently" - " failed due to them arriving before the safe " - "time point for bucket ownership handovers has " - "passed", this), - storagefailure("storagefailure", "", "The number of operations that failed in storage", this), - timeout("timeout", "", "The number of operations that failed because the operation timed out towards storage", this), - busy("busy", "", "The number of messages from storage that failed because the storage node was busy", this), - notfound("notfound", "", "The number of operations that failed because the document did not exist", this) -{ - sum.addMetricToSum(notready); - sum.addMetricToSum(notconnected); - sum.addMetricToSum(wrongdistributor); - sum.addMetricToSum(safe_time_not_reached); - sum.addMetricToSum(storagefailure); - sum.addMetricToSum(timeout); - sum.addMetricToSum(busy); - sum.addMetricToSum(notfound); -} -PersistenceFailuresMetricSet::~PersistenceFailuresMetricSet() { } - -MetricSet * -PersistenceFailuresMetricSet::clone(std::vector& ownerList, CopyType copyType, - MetricSet* owner, bool includeUnused) const -{ - if (copyType == INACTIVE) { - return MetricSet::clone(ownerList, INACTIVE, owner, includeUnused); - } - return (PersistenceFailuresMetricSet*) - (new PersistenceFailuresMetricSet(owner))->assignValues(*this); -} - -PersistenceOperationMetricSet::PersistenceOperationMetricSet(const std::string& name, MetricSet* owner) - : MetricSet(name, "", vespalib::make_string("Statistics for the %s command", name.c_str()), owner, "operationtype"), - latency("latency", "yamasdefault", vespalib::make_string("The average latency of %s operations", name.c_str()), this), - ok("ok", "logdefault yamasdefault", vespalib::make_string("The number of successful %s operations performed", name.c_str()), this), - failures(this) -{ } - -PersistenceOperationMetricSet::~PersistenceOperationMetricSet() { } - -MetricSet * -PersistenceOperationMetricSet::clone(std::vector& ownerList, CopyType copyType, - MetricSet* owner, bool includeUnused) const -{ - if (copyType == INACTIVE) { - return MetricSet::clone(ownerList, INACTIVE, owner, includeUnused); - } - return (PersistenceOperationMetricSet*) - (new PersistenceOperationMetricSet(getName(), owner)) - ->assignValues(*this); -} - -void -PersistenceOperationMetricSet::updateFromResult(const api::ReturnCode& result) -{ - if (result.success()) { - ++ok; - } else if (result.getResult() == api::ReturnCode::WRONG_DISTRIBUTION) { - ++failures.wrongdistributor; - } else if (result.getResult() == api::ReturnCode::TIMEOUT) { - ++failures.timeout; - } else if (result.isBusy()) { - ++failures.busy; - } else if (result.isNodeDownOrNetwork()) { - ++failures.notconnected; - } else { - ++failures.storagefailure; - } -} - DistributorMetricSet::DistributorMetricSet(const metrics::LoadTypeSet& lt) : MetricSet("distributor", "distributor", ""), puts(lt, PersistenceOperationMetricSet("puts"), this), @@ -113,7 +35,7 @@ DistributorMetricSet::DistributorMetricSet(const metrics::LoadTypeSet& lt) DistributorMetricSet::~DistributorMetricSet() { } -} +} // storage template class metrics::LoadMetric; template class metrics::SumMetric; diff --git a/storage/src/vespa/storage/distributor/distributormetricsset.h b/storage/src/vespa/storage/distributor/distributormetricsset.h index 3ae975913de..705e7305e0d 100644 --- a/storage/src/vespa/storage/distributor/distributormetricsset.h +++ b/storage/src/vespa/storage/distributor/distributormetricsset.h @@ -1,58 +1,13 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include "persistence_operation_metric_set.h" #include "visitormetricsset.h" #include #include namespace storage { -namespace api { -class ReturnCode; -} - -class PersistenceFailuresMetricSet : public metrics::MetricSet -{ -public: - PersistenceFailuresMetricSet(metrics::MetricSet* owner); - ~PersistenceFailuresMetricSet(); - - metrics::SumMetric sum; - metrics::LongCountMetric notready; - metrics::LongCountMetric notconnected; - metrics::LongCountMetric wrongdistributor; - metrics::LongCountMetric safe_time_not_reached; - metrics::LongCountMetric storagefailure; - metrics::LongCountMetric timeout; - metrics::LongCountMetric busy; - metrics::LongCountMetric notfound; - - MetricSet * clone(std::vector& ownerList, CopyType copyType, - metrics::MetricSet* owner, bool includeUnused) const; -}; - -class PersistenceOperationMetricSet : public metrics::MetricSet -{ -public: - metrics::DoubleAverageMetric latency; - metrics::LongCountMetric ok; - PersistenceFailuresMetricSet failures; - - PersistenceOperationMetricSet(const std::string& name, metrics::MetricSet* owner = nullptr); - ~PersistenceOperationMetricSet(); - - MetricSet * clone(std::vector& ownerList, CopyType copyType, - metrics::MetricSet* owner, bool includeUnused) const override; - - /** - * Increments appropriate success/failure count metrics based on the return - * code provided in `result`. - * - * Does _not_ update latency metric. - */ - void updateFromResult(const api::ReturnCode& result); -}; - class DistributorMetricSet : public metrics::MetricSet { public: diff --git a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp index 7bb99ea4b43..412657cd615 100644 --- a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp @@ -985,6 +985,7 @@ VisitorOperation::sendReply(const api::ReturnCode& code, DistributorMessageSende sender.sendReply(reply); + _metrics.updateFromResult(code); _metrics.latency.addValue(_operationTimer.getElapsedTimeAsDouble()); _sentReply = true; } diff --git a/storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp b/storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp new file mode 100644 index 00000000000..4555e454e70 --- /dev/null +++ b/storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp @@ -0,0 +1,98 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "distributormetricsset.h" +#include +#include +#include + +namespace storage { + +using metrics::MetricSet; + +PersistenceFailuresMetricSet::PersistenceFailuresMetricSet(MetricSet* owner) + : MetricSet("failures", "", "Detailed failure statistics", owner), + sum("total", "logdefault yamasdefault", "Sum of all failures", this), + notready("notready", "", "The number of operations discarded because distributor was not ready", this), + notconnected("notconnected", "", "The number of operations discarded because there were no available storage nodes to send to", this), + wrongdistributor("wrongdistributor", "", "The number of operations discarded because they were sent to the wrong distributor", this), + safe_time_not_reached("safe_time_not_reached", "", + "The number of operations that were transiently" + " failed due to them arriving before the safe " + "time point for bucket ownership handovers has " + "passed", this), + storagefailure("storagefailure", "", "The number of operations that failed in storage", this), + timeout("timeout", "", "The number of operations that failed because the operation timed out towards storage", this), + busy("busy", "", "The number of messages from storage that failed because the storage node was busy", this), + inconsistent_bucket("inconsistent_bucket", "", + "The number of operations failed due to buckets " + "being in an inconsistent state or not found", this), + notfound("notfound", "", "The number of operations that failed because the document did not exist", this) +{ + sum.addMetricToSum(notready); + sum.addMetricToSum(notconnected); + sum.addMetricToSum(wrongdistributor); + sum.addMetricToSum(safe_time_not_reached); + sum.addMetricToSum(storagefailure); + sum.addMetricToSum(timeout); + sum.addMetricToSum(busy); + sum.addMetricToSum(inconsistent_bucket); + sum.addMetricToSum(notfound); +} +PersistenceFailuresMetricSet::~PersistenceFailuresMetricSet() { } + +MetricSet * +PersistenceFailuresMetricSet::clone(std::vector& ownerList, CopyType copyType, + MetricSet* owner, bool includeUnused) const +{ + if (copyType == INACTIVE) { + return MetricSet::clone(ownerList, INACTIVE, owner, includeUnused); + } + return (PersistenceFailuresMetricSet*) + (new PersistenceFailuresMetricSet(owner))->assignValues(*this); +} + +PersistenceOperationMetricSet::PersistenceOperationMetricSet(const std::string& name, MetricSet* owner) + : MetricSet(name, "", vespalib::make_string("Statistics for the %s command", name.c_str()), owner, "operationtype"), + latency("latency", "yamasdefault", vespalib::make_string("The average latency of %s operations", name.c_str()), this), + ok("ok", "logdefault yamasdefault", vespalib::make_string("The number of successful %s operations performed", name.c_str()), this), + failures(this) +{ } + +PersistenceOperationMetricSet::~PersistenceOperationMetricSet() { } + +MetricSet * +PersistenceOperationMetricSet::clone(std::vector& ownerList, CopyType copyType, + MetricSet* owner, bool includeUnused) const +{ + if (copyType == INACTIVE) { + return MetricSet::clone(ownerList, INACTIVE, owner, includeUnused); + } + return (PersistenceOperationMetricSet*) + (new PersistenceOperationMetricSet(getName(), owner)) + ->assignValues(*this); +} + +void +PersistenceOperationMetricSet::updateFromResult(const api::ReturnCode& result) +{ + if (result.success()) { + ++ok; + } else if (result.getResult() == api::ReturnCode::WRONG_DISTRIBUTION) { + ++failures.wrongdistributor; + } else if (result.getResult() == api::ReturnCode::TIMEOUT) { + ++failures.timeout; + } else if (result.isBusy()) { + ++failures.busy; + } else if (result.isBucketDisappearance()) { + // Bucket not found/deleted codes imply that replicas are transiently + // inconsistent in our DB or across replicas. + ++failures.inconsistent_bucket; + } else if (result.isNodeDownOrNetwork()) { + ++failures.notconnected; + } else { + ++failures.storagefailure; + } +} + +} // storage + diff --git a/storage/src/vespa/storage/distributor/persistence_operation_metric_set.h b/storage/src/vespa/storage/distributor/persistence_operation_metric_set.h new file mode 100644 index 00000000000..221662a2ba6 --- /dev/null +++ b/storage/src/vespa/storage/distributor/persistence_operation_metric_set.h @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace storage { + +namespace api { +class ReturnCode; +} + +class PersistenceFailuresMetricSet : public metrics::MetricSet +{ +public: + PersistenceFailuresMetricSet(metrics::MetricSet* owner); + ~PersistenceFailuresMetricSet(); + + metrics::SumMetric sum; + metrics::LongCountMetric notready; + metrics::LongCountMetric notconnected; + metrics::LongCountMetric wrongdistributor; + metrics::LongCountMetric safe_time_not_reached; + metrics::LongCountMetric storagefailure; + metrics::LongCountMetric timeout; + metrics::LongCountMetric busy; + metrics::LongCountMetric inconsistent_bucket; + metrics::LongCountMetric notfound; + + MetricSet * clone(std::vector& ownerList, CopyType copyType, + metrics::MetricSet* owner, bool includeUnused) const; +}; + +class PersistenceOperationMetricSet : public metrics::MetricSet +{ +public: + metrics::DoubleAverageMetric latency; + metrics::LongCountMetric ok; + PersistenceFailuresMetricSet failures; + + PersistenceOperationMetricSet(const std::string& name, metrics::MetricSet* owner = nullptr); + ~PersistenceOperationMetricSet(); + + MetricSet * clone(std::vector& ownerList, CopyType copyType, + metrics::MetricSet* owner, bool includeUnused) const override; + + /** + * Increments appropriate success/failure count metrics based on the return + * code provided in `result`. + * + * Does _not_ update latency metric. + */ + void updateFromResult(const api::ReturnCode& result); +}; + +} diff --git a/storage/src/vespa/storage/distributor/visitormetricsset.cpp b/storage/src/vespa/storage/distributor/visitormetricsset.cpp index ae8aeeb8463..e6235f4cdc9 100644 --- a/storage/src/vespa/storage/distributor/visitormetricsset.cpp +++ b/storage/src/vespa/storage/distributor/visitormetricsset.cpp @@ -9,10 +9,9 @@ namespace storage { using metrics::MetricSet; VisitorMetricSet::VisitorMetricSet(MetricSet* owner) - : MetricSet("visitor", "visitor", "", owner), - latency("latency", "", "Latency of visitor (in ms)", this), - failed("failed", "", "Number of visitors that failed or were aborted by the user", this) -{ } + : PersistenceOperationMetricSet("visitor", owner) +{ +} VisitorMetricSet::~VisitorMetricSet() { } diff --git a/storage/src/vespa/storage/distributor/visitormetricsset.h b/storage/src/vespa/storage/distributor/visitormetricsset.h index 24a3bbcce2f..fb9bdf099aa 100644 --- a/storage/src/vespa/storage/distributor/visitormetricsset.h +++ b/storage/src/vespa/storage/distributor/visitormetricsset.h @@ -1,15 +1,13 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include "persistence_operation_metric_set.h" #include #include namespace storage { -struct VisitorMetricSet : public metrics::MetricSet { - metrics::DoubleAverageMetric latency; - metrics::LongCountMetric failed; - +struct VisitorMetricSet : public PersistenceOperationMetricSet { VisitorMetricSet(MetricSet* owner = nullptr); ~VisitorMetricSet(); -- cgit v1.2.3 From 76ba68d79e0fee68060b4a6c99157debd4b8eb3e Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Mon, 5 Dec 2016 16:25:04 +0100 Subject: Add metrics for counting buckets/docs/bytes per visitor Don't modify statistical metrics upon WrongDistributionReply, as this happens as a natural part of client visitor sessions and doesn't indicate a "proper" error. --- .../src/tests/distributor/visitoroperationtest.cpp | 60 ++++++++++++++++++++++ .../operations/external/visitoroperation.cpp | 19 ++++++- .../operations/external/visitoroperation.h | 1 + .../storage/distributor/visitormetricsset.cpp | 11 +++- .../vespa/storage/distributor/visitormetricsset.h | 4 ++ 5 files changed, 92 insertions(+), 3 deletions(-) (limited to 'storage') diff --git a/storage/src/tests/distributor/visitoroperationtest.cpp b/storage/src/tests/distributor/visitoroperationtest.cpp index 8891dab547c..7378896f017 100644 --- a/storage/src/tests/distributor/visitoroperationtest.cpp +++ b/storage/src/tests/distributor/visitoroperationtest.cpp @@ -64,6 +64,8 @@ class VisitorOperationTest : public CppUnit::TestFixture, CPPUNIT_TEST(testNoClientReplyBeforeAllStorageRepliesReceived); CPPUNIT_TEST(testSkipFailedSubBucketsWhenVisitingInconsistent); CPPUNIT_TEST(testQueueTimeoutIsFactorOfTotalTimeout); + CPPUNIT_TEST(metrics_updated_with_visitor_statistics); + CPPUNIT_TEST(statistical_metrics_not_updated_on_wrong_distribution); CPPUNIT_TEST_SUITE_END(); protected: @@ -109,6 +111,8 @@ protected: void testNoClientReplyBeforeAllStorageRepliesReceived(); void testSkipFailedSubBucketsWhenVisitingInconsistent(); void testQueueTimeoutIsFactorOfTotalTimeout(); + void metrics_updated_with_visitor_statistics(); + void statistical_metrics_not_updated_on_wrong_distribution(); public: VisitorOperationTest() : defaultConfig(framework::MilliSecTime(0), @@ -237,6 +241,8 @@ private: std::unique_ptr startOperationWith2StorageNodeVisitors( bool inconsistent); + + void do_visitor_roundtrip_with_statistics(const api::ReturnCode& result); }; CPPUNIT_TEST_SUITE_REGISTRATION(VisitorOperationTest); @@ -1621,5 +1627,59 @@ VisitorOperationTest::testQueueTimeoutIsFactorOfTotalTimeout() CPPUNIT_ASSERT_EQUAL(uint32_t(5000), cmd.getQueueTimeout()); } +void +VisitorOperationTest::do_visitor_roundtrip_with_statistics( + const api::ReturnCode& result) +{ + document::BucketId id(0x400000000000007bULL); + _distributor->enableClusterState(ClusterState("distributor:1 storage:1")); + addNodesToBucketDB(id, "0=1/1/1/t"); + + auto op = createOpWithDefaultConfig( + createVisitorCommand("metricstats", id, nullId)); + + op->start(_sender, framework::MilliSecTime(0)); + CPPUNIT_ASSERT_EQUAL(std::string("Visitor Create => 0"), + _sender.getCommands(true)); + auto& cmd(dynamic_cast(*_sender.commands[0])); + auto reply = cmd.makeReply(); + vdslib::VisitorStatistics stats; + stats.setBucketsVisited(50); + stats.setDocumentsVisited(100); + stats.setBytesVisited(2000); + static_cast(*reply).setVisitorStatistics(stats); + reply->setResult(result); + + op->receive(_sender, api::StorageReply::SP(std::move(reply))); +} + +void +VisitorOperationTest::metrics_updated_with_visitor_statistics() +{ + do_visitor_roundtrip_with_statistics(api::ReturnCode(api::ReturnCode::OK)); + + CPPUNIT_ASSERT_EQUAL(int64_t(50), defaultVisitorMetrics().buckets_per_visitor.getLast()); + CPPUNIT_ASSERT_EQUAL(int64_t(100), defaultVisitorMetrics().docs_per_visitor.getLast()); + CPPUNIT_ASSERT_EQUAL(int64_t(2000), defaultVisitorMetrics().bytes_per_visitor.getLast()); +} + +void +VisitorOperationTest::statistical_metrics_not_updated_on_wrong_distribution() +{ + setupDistributor(1, 100, "distributor:100 storage:2"); + + document::BucketId id(uint64_t(0x400000000000127b)); + CPPUNIT_ASSERT_EQUAL( + std::string("CreateVisitorReply(last=BucketId(0x0000000000000000)) " + "ReturnCode(WRONG_DISTRIBUTION, distributor:100 storage:2)"), + runEmptyVisitor(createVisitorCommand("wrongdist", id, nullId))); + + CPPUNIT_ASSERT_EQUAL(int64_t(0), defaultVisitorMetrics().buckets_per_visitor.getCount()); + CPPUNIT_ASSERT_EQUAL(int64_t(0), defaultVisitorMetrics().docs_per_visitor.getCount()); + CPPUNIT_ASSERT_EQUAL(int64_t(0), defaultVisitorMetrics().bytes_per_visitor.getCount()); + // Fascinating that count is also a double... + CPPUNIT_ASSERT_EQUAL(0.0, defaultVisitorMetrics().latency.getCount()); +} + } // distributor } // storage diff --git a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp index 412657cd615..1bcc6abafb6 100644 --- a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp @@ -983,14 +983,29 @@ VisitorOperation::sendReply(const api::ReturnCode& code, DistributorMessageSende code.toString().c_str(), _msg->getInstanceId().c_str(), _msg->getMsgId()); + updateReplyMetrics(code); sender.sendReply(reply); - _metrics.updateFromResult(code); - _metrics.latency.addValue(_operationTimer.getElapsedTimeAsDouble()); _sentReply = true; } } +void +VisitorOperation::updateReplyMetrics(const api::ReturnCode& result) +{ + _metrics.updateFromResult(result); + // WrongDistributionReply happens as a normal and expected part of a visitor + // session's lifetime. If we pollute the metrics with measurements taken + // from such replies, the averages will not be representative. + if (result.getResult() == api::ReturnCode::WRONG_DISTRIBUTION) { + return; + } + _metrics.latency.addValue(_operationTimer.getElapsedTimeAsDouble()); + _metrics.buckets_per_visitor.inc(_visitorStatistics.getBucketsVisited()); + _metrics.docs_per_visitor.inc(_visitorStatistics.getDocumentsVisited()); + _metrics.bytes_per_visitor.inc(_visitorStatistics.getBytesVisited()); +} + void VisitorOperation::onClose(DistributorMessageSender& sender) { diff --git a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.h b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.h index 1824610355d..6fd18d8d01e 100644 --- a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.h +++ b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.h @@ -90,6 +90,7 @@ private: typedef std::map SentMessagesMap; void sendReply(const api::ReturnCode& code, DistributorMessageSender& sender); + void updateReplyMetrics(const api::ReturnCode& result); void verifyDistributorsAreAvailable(); void verifyVisitorDistributionBitCount(const document::BucketId&); void verifyDistributorIsNotDown(const lib::ClusterState&); diff --git a/storage/src/vespa/storage/distributor/visitormetricsset.cpp b/storage/src/vespa/storage/distributor/visitormetricsset.cpp index e6235f4cdc9..fb2bab51d1a 100644 --- a/storage/src/vespa/storage/distributor/visitormetricsset.cpp +++ b/storage/src/vespa/storage/distributor/visitormetricsset.cpp @@ -9,7 +9,16 @@ namespace storage { using metrics::MetricSet; VisitorMetricSet::VisitorMetricSet(MetricSet* owner) - : PersistenceOperationMetricSet("visitor", owner) + : PersistenceOperationMetricSet("visitor", owner), + buckets_per_visitor("buckets_per_visitor", "", + "The number of sub buckets visited as part of a " + "single client visitor command", this), + docs_per_visitor("docs_per_visitor", "", + "The number of documents visited on content nodes as " + "part of a single client visitor command", this), + bytes_per_visitor("bytes_per_visitor", "", + "The number of bytes visited on content nodes as part " + "of a single client visitor command", this) { } diff --git a/storage/src/vespa/storage/distributor/visitormetricsset.h b/storage/src/vespa/storage/distributor/visitormetricsset.h index fb9bdf099aa..34d65b9ec07 100644 --- a/storage/src/vespa/storage/distributor/visitormetricsset.h +++ b/storage/src/vespa/storage/distributor/visitormetricsset.h @@ -8,6 +8,10 @@ namespace storage { struct VisitorMetricSet : public PersistenceOperationMetricSet { + metrics::LongAverageMetric buckets_per_visitor; + metrics::LongAverageMetric docs_per_visitor; + metrics::LongAverageMetric bytes_per_visitor; + VisitorMetricSet(MetricSet* owner = nullptr); ~VisitorMetricSet(); -- cgit v1.2.3 From f8d883660f1b07b7e36e8f1693b8f9355bd06560 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Tue, 6 Dec 2016 12:55:52 +0100 Subject: Rename tests --- storage/src/tests/distributor/CMakeLists.txt | 2 +- .../distributor/distributor_metrics_set_test.cpp | 93 ---------------------- .../distributor/persistence_metrics_set_test.cpp | 92 +++++++++++++++++++++ 3 files changed, 93 insertions(+), 94 deletions(-) delete mode 100644 storage/src/tests/distributor/distributor_metrics_set_test.cpp create mode 100644 storage/src/tests/distributor/persistence_metrics_set_test.cpp (limited to 'storage') diff --git a/storage/src/tests/distributor/CMakeLists.txt b/storage/src/tests/distributor/CMakeLists.txt index 8be15645ea8..2676a31e832 100644 --- a/storage/src/tests/distributor/CMakeLists.txt +++ b/storage/src/tests/distributor/CMakeLists.txt @@ -39,7 +39,7 @@ vespa_add_library(storage_testdistributor TEST nodemaintenancestatstrackertest.cpp distributor_host_info_reporter_test.cpp ownership_transfer_safe_time_point_calculator_test.cpp - distributor_metrics_set_test.cpp + persistence_metrics_set_test.cpp DEPENDS storage_distributor storage_testcommon diff --git a/storage/src/tests/distributor/distributor_metrics_set_test.cpp b/storage/src/tests/distributor/distributor_metrics_set_test.cpp deleted file mode 100644 index 6311e789dac..00000000000 --- a/storage/src/tests/distributor/distributor_metrics_set_test.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include -#include -#include - -namespace storage { -namespace distributor { - -// TODO name this PersistenceMetricsSetTest instead? -struct DistributorMetricsSetTest : CppUnit::TestFixture { - void successful_return_codes_are_counted_as_ok(); - void wrong_distribution_failure_is_counted(); - void timeout_failure_is_counted(); - // Note for these tests: busy, connection failures et al are sets of - // failure codes and not just a single code. We only test certain members - // of these sets here. See api::ReturnCode implementation for an exhaustive - // list. - void busy_failure_is_counted(); - void connection_failure_is_counted(); - void inconsistent_bucket_is_counted(); - void non_special_cased_failure_codes_are_catchall_counted(); - - CPPUNIT_TEST_SUITE(DistributorMetricsSetTest); - CPPUNIT_TEST(successful_return_codes_are_counted_as_ok); - CPPUNIT_TEST(wrong_distribution_failure_is_counted); - CPPUNIT_TEST(timeout_failure_is_counted); - CPPUNIT_TEST(busy_failure_is_counted); - CPPUNIT_TEST(connection_failure_is_counted); - CPPUNIT_TEST(inconsistent_bucket_is_counted); - CPPUNIT_TEST(non_special_cased_failure_codes_are_catchall_counted); - CPPUNIT_TEST_SUITE_END(); - - void assert_failure_is_counted(PersistenceOperationMetricSet& metrics, - api::ReturnCode::Result failure_code, - const metrics::LongCountMetric& checked) - { - metrics.updateFromResult(api::ReturnCode(failure_code)); - CPPUNIT_ASSERT_EQUAL(int64_t(1), checked.getLongValue("count")); - CPPUNIT_ASSERT_EQUAL(int64_t(0), metrics.ok.getLongValue("count")); - } -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(DistributorMetricsSetTest); - -void DistributorMetricsSetTest::successful_return_codes_are_counted_as_ok() { - PersistenceOperationMetricSet metrics("foo"); - metrics.updateFromResult(api::ReturnCode()); - CPPUNIT_ASSERT_EQUAL(int64_t(1), metrics.ok.getLongValue("count")); -} - -void DistributorMetricsSetTest::wrong_distribution_failure_is_counted() { - PersistenceOperationMetricSet metrics("foo"); - assert_failure_is_counted(metrics, api::ReturnCode::WRONG_DISTRIBUTION, - metrics.failures.wrongdistributor); -} - -void DistributorMetricsSetTest::timeout_failure_is_counted() { - PersistenceOperationMetricSet metrics("foo"); - assert_failure_is_counted(metrics, api::ReturnCode::TIMEOUT, - metrics.failures.timeout); -} - -void DistributorMetricsSetTest::busy_failure_is_counted() { - PersistenceOperationMetricSet metrics("foo"); - assert_failure_is_counted(metrics, api::ReturnCode::BUSY, - metrics.failures.busy); -} - -void DistributorMetricsSetTest::connection_failure_is_counted() { - PersistenceOperationMetricSet metrics("foo"); - // This is dirty enum value coercion, but this is how "parent protocol" - // error codes are handled already. - api::ReturnCode::Result error_code(static_cast( - mbus::ErrorCode::CONNECTION_ERROR)); - assert_failure_is_counted(metrics, error_code, - metrics.failures.notconnected); -} - -void DistributorMetricsSetTest::inconsistent_bucket_is_counted() { - PersistenceOperationMetricSet metrics("foo"); - assert_failure_is_counted(metrics, api::ReturnCode::BUCKET_NOT_FOUND, - metrics.failures.inconsistent_bucket); -} - -void DistributorMetricsSetTest::non_special_cased_failure_codes_are_catchall_counted() { - PersistenceOperationMetricSet metrics("foo"); - assert_failure_is_counted(metrics, api::ReturnCode::REJECTED, - metrics.failures.storagefailure); -} - -} -} diff --git a/storage/src/tests/distributor/persistence_metrics_set_test.cpp b/storage/src/tests/distributor/persistence_metrics_set_test.cpp new file mode 100644 index 00000000000..131a588413b --- /dev/null +++ b/storage/src/tests/distributor/persistence_metrics_set_test.cpp @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include + +namespace storage { +namespace distributor { + +struct PersistenceMetricsSetTest : CppUnit::TestFixture { + void successful_return_codes_are_counted_as_ok(); + void wrong_distribution_failure_is_counted(); + void timeout_failure_is_counted(); + // Note for these tests: busy, connection failures et al are sets of + // failure codes and not just a single code. We only test certain members + // of these sets here. See api::ReturnCode implementation for an exhaustive + // list. + void busy_failure_is_counted(); + void connection_failure_is_counted(); + void inconsistent_bucket_is_counted(); + void non_special_cased_failure_codes_are_catchall_counted(); + + CPPUNIT_TEST_SUITE(PersistenceMetricsSetTest); + CPPUNIT_TEST(successful_return_codes_are_counted_as_ok); + CPPUNIT_TEST(wrong_distribution_failure_is_counted); + CPPUNIT_TEST(timeout_failure_is_counted); + CPPUNIT_TEST(busy_failure_is_counted); + CPPUNIT_TEST(connection_failure_is_counted); + CPPUNIT_TEST(inconsistent_bucket_is_counted); + CPPUNIT_TEST(non_special_cased_failure_codes_are_catchall_counted); + CPPUNIT_TEST_SUITE_END(); + + void assert_failure_is_counted(PersistenceOperationMetricSet& metrics, + api::ReturnCode::Result failure_code, + const metrics::LongCountMetric& checked) + { + metrics.updateFromResult(api::ReturnCode(failure_code)); + CPPUNIT_ASSERT_EQUAL(int64_t(1), checked.getLongValue("count")); + CPPUNIT_ASSERT_EQUAL(int64_t(0), metrics.ok.getLongValue("count")); + } +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(PersistenceMetricsSetTest); + +void PersistenceMetricsSetTest::successful_return_codes_are_counted_as_ok() { + PersistenceOperationMetricSet metrics("foo"); + metrics.updateFromResult(api::ReturnCode()); + CPPUNIT_ASSERT_EQUAL(int64_t(1), metrics.ok.getLongValue("count")); +} + +void PersistenceMetricsSetTest::wrong_distribution_failure_is_counted() { + PersistenceOperationMetricSet metrics("foo"); + assert_failure_is_counted(metrics, api::ReturnCode::WRONG_DISTRIBUTION, + metrics.failures.wrongdistributor); +} + +void PersistenceMetricsSetTest::timeout_failure_is_counted() { + PersistenceOperationMetricSet metrics("foo"); + assert_failure_is_counted(metrics, api::ReturnCode::TIMEOUT, + metrics.failures.timeout); +} + +void PersistenceMetricsSetTest::busy_failure_is_counted() { + PersistenceOperationMetricSet metrics("foo"); + assert_failure_is_counted(metrics, api::ReturnCode::BUSY, + metrics.failures.busy); +} + +void PersistenceMetricsSetTest::connection_failure_is_counted() { + PersistenceOperationMetricSet metrics("foo"); + // This is dirty enum value coercion, but this is how "parent protocol" + // error codes are handled already. + api::ReturnCode::Result error_code(static_cast( + mbus::ErrorCode::CONNECTION_ERROR)); + assert_failure_is_counted(metrics, error_code, + metrics.failures.notconnected); +} + +void PersistenceMetricsSetTest::inconsistent_bucket_is_counted() { + PersistenceOperationMetricSet metrics("foo"); + assert_failure_is_counted(metrics, api::ReturnCode::BUCKET_NOT_FOUND, + metrics.failures.inconsistent_bucket); +} + +void PersistenceMetricsSetTest::non_special_cased_failure_codes_are_catchall_counted() { + PersistenceOperationMetricSet metrics("foo"); + assert_failure_is_counted(metrics, api::ReturnCode::REJECTED, + metrics.failures.storagefailure); +} + +} +} -- cgit v1.2.3 From 7bc5edc9a3e9e7b998eb0def3930fc4ea9a72ac1 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Tue, 6 Dec 2016 13:16:58 +0100 Subject: Minor grammar tweaks --- storage/src/tests/distributor/visitoroperationtest.cpp | 6 +++--- .../vespa/storage/distributor/persistence_operation_metric_set.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'storage') diff --git a/storage/src/tests/distributor/visitoroperationtest.cpp b/storage/src/tests/distributor/visitoroperationtest.cpp index 7378896f017..055fe9b9ab9 100644 --- a/storage/src/tests/distributor/visitoroperationtest.cpp +++ b/storage/src/tests/distributor/visitoroperationtest.cpp @@ -64,7 +64,7 @@ class VisitorOperationTest : public CppUnit::TestFixture, CPPUNIT_TEST(testNoClientReplyBeforeAllStorageRepliesReceived); CPPUNIT_TEST(testSkipFailedSubBucketsWhenVisitingInconsistent); CPPUNIT_TEST(testQueueTimeoutIsFactorOfTotalTimeout); - CPPUNIT_TEST(metrics_updated_with_visitor_statistics); + CPPUNIT_TEST(metrics_are_updated_with_visitor_statistics_upon_replying); CPPUNIT_TEST(statistical_metrics_not_updated_on_wrong_distribution); CPPUNIT_TEST_SUITE_END(); @@ -111,7 +111,7 @@ protected: void testNoClientReplyBeforeAllStorageRepliesReceived(); void testSkipFailedSubBucketsWhenVisitingInconsistent(); void testQueueTimeoutIsFactorOfTotalTimeout(); - void metrics_updated_with_visitor_statistics(); + void metrics_are_updated_with_visitor_statistics_upon_replying(); void statistical_metrics_not_updated_on_wrong_distribution(); public: VisitorOperationTest() @@ -1654,7 +1654,7 @@ VisitorOperationTest::do_visitor_roundtrip_with_statistics( } void -VisitorOperationTest::metrics_updated_with_visitor_statistics() +VisitorOperationTest::metrics_are_updated_with_visitor_statistics_upon_replying() { do_visitor_roundtrip_with_statistics(api::ReturnCode(api::ReturnCode::OK)); diff --git a/storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp b/storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp index 4555e454e70..b7766713b01 100644 --- a/storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp +++ b/storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp @@ -85,7 +85,7 @@ PersistenceOperationMetricSet::updateFromResult(const api::ReturnCode& result) ++failures.busy; } else if (result.isBucketDisappearance()) { // Bucket not found/deleted codes imply that replicas are transiently - // inconsistent in our DB or across replicas. + // inconsistent in our DB or across replica nodes. ++failures.inconsistent_bucket; } else if (result.isNodeDownOrNetwork()) { ++failures.notconnected; -- cgit v1.2.3 From ba1bd01788166317d75805b976475297ce9794c7 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Tue, 6 Dec 2016 13:45:06 +0100 Subject: Add clarifying comment on metric update testing --- storage/src/tests/distributor/visitoroperationtest.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'storage') diff --git a/storage/src/tests/distributor/visitoroperationtest.cpp b/storage/src/tests/distributor/visitoroperationtest.cpp index 055fe9b9ab9..7bd9eea29b0 100644 --- a/storage/src/tests/distributor/visitoroperationtest.cpp +++ b/storage/src/tests/distributor/visitoroperationtest.cpp @@ -1674,6 +1674,9 @@ VisitorOperationTest::statistical_metrics_not_updated_on_wrong_distribution() "ReturnCode(WRONG_DISTRIBUTION, distributor:100 storage:2)"), runEmptyVisitor(createVisitorCommand("wrongdist", id, nullId))); + // Note that we're testing the number of _times_ the metric has been + // updated, not the value with which it's been updated (which would be zero + // even in the case we actually did update the statistical metrics). CPPUNIT_ASSERT_EQUAL(int64_t(0), defaultVisitorMetrics().buckets_per_visitor.getCount()); CPPUNIT_ASSERT_EQUAL(int64_t(0), defaultVisitorMetrics().docs_per_visitor.getCount()); CPPUNIT_ASSERT_EQUAL(int64_t(0), defaultVisitorMetrics().bytes_per_visitor.getCount()); -- cgit v1.2.3