diff options
author | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2016-12-05 16:25:04 +0100 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2016-12-05 16:39:05 +0100 |
commit | 76ba68d79e0fee68060b4a6c99157debd4b8eb3e (patch) | |
tree | 012079df8c3c0822234b30995b3befa6e190c72d /storage | |
parent | 06b6276640a4d9ed2c4f548814a4ba73ab5805fd (diff) |
Add metrics for counting buckets/docs/bytes per visitor
Don't modify statistical metrics upon WrongDistributionReply, as this
happens as a natural part of client visitor sessions and doesn't
indicate a "proper" error.
Diffstat (limited to 'storage')
5 files changed, 92 insertions, 3 deletions
diff --git a/storage/src/tests/distributor/visitoroperationtest.cpp b/storage/src/tests/distributor/visitoroperationtest.cpp index 8891dab547c..7378896f017 100644 --- a/storage/src/tests/distributor/visitoroperationtest.cpp +++ b/storage/src/tests/distributor/visitoroperationtest.cpp @@ -64,6 +64,8 @@ class VisitorOperationTest : public CppUnit::TestFixture, CPPUNIT_TEST(testNoClientReplyBeforeAllStorageRepliesReceived); CPPUNIT_TEST(testSkipFailedSubBucketsWhenVisitingInconsistent); CPPUNIT_TEST(testQueueTimeoutIsFactorOfTotalTimeout); + CPPUNIT_TEST(metrics_updated_with_visitor_statistics); + CPPUNIT_TEST(statistical_metrics_not_updated_on_wrong_distribution); CPPUNIT_TEST_SUITE_END(); protected: @@ -109,6 +111,8 @@ protected: void testNoClientReplyBeforeAllStorageRepliesReceived(); void testSkipFailedSubBucketsWhenVisitingInconsistent(); void testQueueTimeoutIsFactorOfTotalTimeout(); + void metrics_updated_with_visitor_statistics(); + void statistical_metrics_not_updated_on_wrong_distribution(); public: VisitorOperationTest() : defaultConfig(framework::MilliSecTime(0), @@ -237,6 +241,8 @@ private: std::unique_ptr<VisitorOperation> startOperationWith2StorageNodeVisitors( bool inconsistent); + + void do_visitor_roundtrip_with_statistics(const api::ReturnCode& result); }; CPPUNIT_TEST_SUITE_REGISTRATION(VisitorOperationTest); @@ -1621,5 +1627,59 @@ VisitorOperationTest::testQueueTimeoutIsFactorOfTotalTimeout() CPPUNIT_ASSERT_EQUAL(uint32_t(5000), cmd.getQueueTimeout()); } +void +VisitorOperationTest::do_visitor_roundtrip_with_statistics( + const api::ReturnCode& result) +{ + document::BucketId id(0x400000000000007bULL); + _distributor->enableClusterState(ClusterState("distributor:1 storage:1")); + addNodesToBucketDB(id, "0=1/1/1/t"); + + auto op = createOpWithDefaultConfig( + createVisitorCommand("metricstats", id, nullId)); + + op->start(_sender, framework::MilliSecTime(0)); + CPPUNIT_ASSERT_EQUAL(std::string("Visitor Create => 0"), + _sender.getCommands(true)); + auto& cmd(dynamic_cast<CreateVisitorCommand&>(*_sender.commands[0])); + auto reply = cmd.makeReply(); + vdslib::VisitorStatistics stats; + stats.setBucketsVisited(50); + stats.setDocumentsVisited(100); + stats.setBytesVisited(2000); + static_cast<CreateVisitorReply&>(*reply).setVisitorStatistics(stats); + reply->setResult(result); + + op->receive(_sender, api::StorageReply::SP(std::move(reply))); +} + +void +VisitorOperationTest::metrics_updated_with_visitor_statistics() +{ + do_visitor_roundtrip_with_statistics(api::ReturnCode(api::ReturnCode::OK)); + + CPPUNIT_ASSERT_EQUAL(int64_t(50), defaultVisitorMetrics().buckets_per_visitor.getLast()); + CPPUNIT_ASSERT_EQUAL(int64_t(100), defaultVisitorMetrics().docs_per_visitor.getLast()); + CPPUNIT_ASSERT_EQUAL(int64_t(2000), defaultVisitorMetrics().bytes_per_visitor.getLast()); +} + +void +VisitorOperationTest::statistical_metrics_not_updated_on_wrong_distribution() +{ + setupDistributor(1, 100, "distributor:100 storage:2"); + + document::BucketId id(uint64_t(0x400000000000127b)); + CPPUNIT_ASSERT_EQUAL( + std::string("CreateVisitorReply(last=BucketId(0x0000000000000000)) " + "ReturnCode(WRONG_DISTRIBUTION, distributor:100 storage:2)"), + runEmptyVisitor(createVisitorCommand("wrongdist", id, nullId))); + + CPPUNIT_ASSERT_EQUAL(int64_t(0), defaultVisitorMetrics().buckets_per_visitor.getCount()); + CPPUNIT_ASSERT_EQUAL(int64_t(0), defaultVisitorMetrics().docs_per_visitor.getCount()); + CPPUNIT_ASSERT_EQUAL(int64_t(0), defaultVisitorMetrics().bytes_per_visitor.getCount()); + // Fascinating that count is also a double... + CPPUNIT_ASSERT_EQUAL(0.0, defaultVisitorMetrics().latency.getCount()); +} + } // distributor } // storage diff --git a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp index 412657cd615..1bcc6abafb6 100644 --- a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp @@ -983,15 +983,30 @@ VisitorOperation::sendReply(const api::ReturnCode& code, DistributorMessageSende code.toString().c_str(), _msg->getInstanceId().c_str(), _msg->getMsgId()); + updateReplyMetrics(code); sender.sendReply(reply); - _metrics.updateFromResult(code); - _metrics.latency.addValue(_operationTimer.getElapsedTimeAsDouble()); _sentReply = true; } } void +VisitorOperation::updateReplyMetrics(const api::ReturnCode& result) +{ + _metrics.updateFromResult(result); + // WrongDistributionReply happens as a normal and expected part of a visitor + // session's lifetime. If we pollute the metrics with measurements taken + // from such replies, the averages will not be representative. + if (result.getResult() == api::ReturnCode::WRONG_DISTRIBUTION) { + return; + } + _metrics.latency.addValue(_operationTimer.getElapsedTimeAsDouble()); + _metrics.buckets_per_visitor.inc(_visitorStatistics.getBucketsVisited()); + _metrics.docs_per_visitor.inc(_visitorStatistics.getDocumentsVisited()); + _metrics.bytes_per_visitor.inc(_visitorStatistics.getBytesVisited()); +} + +void VisitorOperation::onClose(DistributorMessageSender& sender) { sendReply(api::ReturnCode(api::ReturnCode::ABORTED, "Process is shutting down"), diff --git a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.h b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.h index 1824610355d..6fd18d8d01e 100644 --- a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.h +++ b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.h @@ -90,6 +90,7 @@ private: typedef std::map<uint64_t, api::CreateVisitorCommand::SP> SentMessagesMap; void sendReply(const api::ReturnCode& code, DistributorMessageSender& sender); + void updateReplyMetrics(const api::ReturnCode& result); void verifyDistributorsAreAvailable(); void verifyVisitorDistributionBitCount(const document::BucketId&); void verifyDistributorIsNotDown(const lib::ClusterState&); diff --git a/storage/src/vespa/storage/distributor/visitormetricsset.cpp b/storage/src/vespa/storage/distributor/visitormetricsset.cpp index e6235f4cdc9..fb2bab51d1a 100644 --- a/storage/src/vespa/storage/distributor/visitormetricsset.cpp +++ b/storage/src/vespa/storage/distributor/visitormetricsset.cpp @@ -9,7 +9,16 @@ namespace storage { using metrics::MetricSet; VisitorMetricSet::VisitorMetricSet(MetricSet* owner) - : PersistenceOperationMetricSet("visitor", owner) + : PersistenceOperationMetricSet("visitor", owner), + buckets_per_visitor("buckets_per_visitor", "", + "The number of sub buckets visited as part of a " + "single client visitor command", this), + docs_per_visitor("docs_per_visitor", "", + "The number of documents visited on content nodes as " + "part of a single client visitor command", this), + bytes_per_visitor("bytes_per_visitor", "", + "The number of bytes visited on content nodes as part " + "of a single client visitor command", this) { } diff --git a/storage/src/vespa/storage/distributor/visitormetricsset.h b/storage/src/vespa/storage/distributor/visitormetricsset.h index fb9bdf099aa..34d65b9ec07 100644 --- a/storage/src/vespa/storage/distributor/visitormetricsset.h +++ b/storage/src/vespa/storage/distributor/visitormetricsset.h @@ -8,6 +8,10 @@ namespace storage { struct VisitorMetricSet : public PersistenceOperationMetricSet { + metrics::LongAverageMetric buckets_per_visitor; + metrics::LongAverageMetric docs_per_visitor; + metrics::LongAverageMetric bytes_per_visitor; + VisitorMetricSet(MetricSet* owner = nullptr); ~VisitorMetricSet(); |