From 89974a6ae0d6112a0a30f01731a23ad7794622f6 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Tue, 13 Mar 2018 11:58:22 +0000 Subject: Remove never used per storage node ops latencies in host info. --- .../core/hostinfo/StorageNode.java | 32 ------- .../core/hostinfo/HostInfoTest.java | 9 -- .../core/restapiv2/StateRestApiTest.java | 32 +------ protocols/getnodestate/distributor.json | 12 --- protocols/getnodestate/host_info.json | 12 --- .../distributor_host_info_reporter_test.cpp | 101 +++------------------ .../src/vespa/storage/distributor/distributor.cpp | 2 +- .../distributor/distributor_host_info_reporter.cpp | 30 +----- .../distributor/distributor_host_info_reporter.h | 4 +- 9 files changed, 23 insertions(+), 211 deletions(-) diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNode.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNode.java index bf7c1fad6ca..f53c3eed28a 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNode.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/StorageNode.java @@ -14,31 +14,6 @@ import java.util.List; */ public class StorageNode { - static public class Put { - private final Long latencyMsSum; - private final Long count; - - @JsonCreator - public Put(@JsonProperty("latency-ms-sum") Long latencyMsSum, @JsonProperty("count") Long count) { - this.latencyMsSum = latencyMsSum; - this.count = count; - } - - public Long getLatencyMsSum() { return latencyMsSum; } - public Long getCount() { return count; } - } - - static public class OpsLatency { - private final Put put; - - @JsonCreator - public OpsLatency(@JsonProperty("put") Put put) { - this.put = put; - } - - public Put getPut() { return put; } - } - static public class BucketStats { private final long total; private final long pending; @@ -80,9 +55,6 @@ public class StorageNode { private final Integer index; - @JsonProperty("ops-latency") - private OpsLatency opsLatencies; - // If a Distributor does not manage any bucket copies for a particular storage node, // then the distributor will not return any min-current-replication-factor for that // storage node. @@ -101,10 +73,6 @@ public class StorageNode { return index; } - public OpsLatency getOpsLatenciesOrNull() { - return opsLatencies; - } - // See documentation on minCurrentReplicationFactor. public Integer getMinCurrentReplicationFactorOrNull() { return minCurrentReplicationFactor; diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java index 9a69b998976..b96f5282971 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java @@ -48,9 +48,6 @@ public class HostInfoTest { List storageNodeList = hostInfo.getDistributor().getStorageNodes(); assertThat(storageNodeList.size(), is(2)); assertThat(storageNodeList.get(0).getIndex(), is(0)); - assertThat(storageNodeList.get(0).getOpsLatenciesOrNull().getPut().getCount(), is(16L)); - assertThat(storageNodeList.get(1).getOpsLatenciesOrNull().getPut().getCount(), is(18L)); - assertThat(storageNodeList.get(0).getOpsLatenciesOrNull().getPut().getLatencyMsSum(), is(15L)); List metrics = hostInfo.getMetrics().getMetrics(); assertThat(metrics.size(), is(2)); Metrics.Value value = metrics.get(0).getValue(); @@ -93,15 +90,9 @@ public class HostInfoTest { assertTrue(storageNodeByIndex.containsKey(0)); assertThat(storageNodeByIndex.get(0).getIndex(), is(0)); assertThat(storageNodeByIndex.get(0).getMinCurrentReplicationFactorOrNull(), is(2)); - assertNotNull(storageNodeByIndex.get(0).getOpsLatenciesOrNull()); - assertThat(storageNodeByIndex.get(0).getOpsLatenciesOrNull().getPut().getLatencyMsSum(), is(10000L)); - assertThat(storageNodeByIndex.get(0).getOpsLatenciesOrNull().getPut().getCount(), is(3L)); assertTrue(storageNodeByIndex.containsKey(5)); assertThat(storageNodeByIndex.get(5).getIndex(), is(5)); assertThat(storageNodeByIndex.get(5).getMinCurrentReplicationFactorOrNull(), is(9)); - assertNotNull(storageNodeByIndex.get(5).getOpsLatenciesOrNull()); - assertThat(storageNodeByIndex.get(5).getOpsLatenciesOrNull().getPut().getLatencyMsSum(), is(25000L)); - assertThat(storageNodeByIndex.get(5).getOpsLatenciesOrNull().getPut().getCount(), is(7L)); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java index 5079f91a2b9..46a50800c53 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java @@ -125,43 +125,19 @@ public abstract class StateRestApiTest { " \"storage-nodes\": [\n" + " {\n" + " \"node-index\": 1,\n" + - " \"min-current-replication-factor\": 2,\n" + - " \"ops-latency\": {\n" + - " \"put\": {\n" + - " \"latency-ms-sum\": 6,\n" + - " \"count\": 7\n" + - " }\n" + - " }\n" + + " \"min-current-replication-factor\": 2\n" + " },\n" + " {\n" + " \"node-index\": 3,\n" + - " \"min-current-replication-factor\": 2,\n" + - " \"ops-latency\": {\n" + - " \"put\": {\n" + - " \"latency-ms-sum\": 5,\n" + - " \"count\": 4\n" + - " }\n" + - " }\n" + + " \"min-current-replication-factor\": 2\n" + " },\n" + " {\n" + " \"node-index\": 5,\n" + - " \"min-current-replication-factor\": 2,\n" + - " \"ops-latency\": {\n" + - " \"put\": {\n" + - " \"latency-ms-sum\": 4,\n" + - " \"count\": 5\n" + - " }\n" + - " }\n" + + " \"min-current-replication-factor\": 2\n" + " },\n" + " {\n" + " \"node-index\": 7,\n" + - " \"min-current-replication-factor\": 2,\n" + - " \"ops-latency\": {\n" + - " \"put\": {\n" + - " \"latency-ms-sum\": 6,\n" + - " \"count\": 7\n" + - " }\n" + - " }\n" + + " \"min-current-replication-factor\": 2\n" + " }\n" + " ]\n" + " }\n" + diff --git a/protocols/getnodestate/distributor.json b/protocols/getnodestate/distributor.json index 970fd09f253..1db812bc78c 100644 --- a/protocols/getnodestate/distributor.json +++ b/protocols/getnodestate/distributor.json @@ -4,12 +4,6 @@ { "node-index": 0, "min-current-replication-factor": 2, - "ops-latency" : { - "put": { - "latency-ms-sum": 10000, - "count": 3 - } - }, "bucket-spaces" : [ { "name": "default", @@ -30,12 +24,6 @@ { "node-index": 5, "min-current-replication-factor": 9, - "ops-latency" : { - "put": { - "latency-ms-sum": 25000, - "count": 7 - } - }, "bucket-spaces" : [ { "name": "default" diff --git a/protocols/getnodestate/host_info.json b/protocols/getnodestate/host_info.json index 305f279c847..5bfa4f3171c 100644 --- a/protocols/getnodestate/host_info.json +++ b/protocols/getnodestate/host_info.json @@ -42,12 +42,6 @@ { "node-index": 0, "min-current-replication-factor": 2, - "ops-latency" : { - "put": { - "latency-ms-sum": 15, - "count": 16 - } - }, "bucket-spaces": [ { "name": "default", @@ -68,12 +62,6 @@ { "node-index": 1, "min-current-replication-factor": 2, - "ops-latency": { - "put": { - "latency-ms-sum": 17, - "count": 18 - } - }, "bucket-spaces": [ { "name": "default" diff --git a/storage/src/tests/distributor/distributor_host_info_reporter_test.cpp b/storage/src/tests/distributor/distributor_host_info_reporter_test.cpp index 64df05acb8f..df01b3e30a7 100644 --- a/storage/src/tests/distributor/distributor_host_info_reporter_test.cpp +++ b/storage/src/tests/distributor/distributor_host_info_reporter_test.cpp @@ -21,19 +21,13 @@ using Object = vespalib::JsonStream::Object; class DistributorHostInfoReporterTest : public CppUnit::TestFixture { CPPUNIT_TEST_SUITE(DistributorHostInfoReporterTest); - CPPUNIT_TEST(hostInfoWithPutLatenciesOnly); - CPPUNIT_TEST(hostInfoAllInfo); - CPPUNIT_TEST(generateExampleJson); - CPPUNIT_TEST(noReportGeneratedIfDisabled); + CPPUNIT_TEST(min_replica_stats_are_reported); + CPPUNIT_TEST(generate_example_json); + CPPUNIT_TEST(no_report_generated_if_disabled); CPPUNIT_TEST(bucket_spaces_stats_are_reported); CPPUNIT_TEST_SUITE_END(); - void hostInfoWithPutLatenciesOnly(); - void hostInfoAllInfo(); - void verifyReportedNodeLatencies(const vespalib::Slime& root, - uint16_t nodeIndex, - int64_t latencySum, - int64_t count); + void min_replica_stats_are_reported(); void verifyBucketSpaceStats(const vespalib::Slime& root, uint16_t nodeIndex, const vespalib::string& bucketSpaceName, @@ -42,8 +36,8 @@ class DistributorHostInfoReporterTest : public CppUnit::TestFixture void verifyBucketSpaceStats(const vespalib::Slime& root, uint16_t nodeIndex, const vespalib::string& bucketSpaceName); - void generateExampleJson(); - void noReportGeneratedIfDisabled(); + void generate_example_json(); + void no_report_generated_if_disabled(); void bucket_spaces_stats_are_reported(); }; @@ -53,25 +47,7 @@ using ms = std::chrono::milliseconds; namespace { -OperationStats -makeOpStats(std::chrono::milliseconds totalLatency, uint64_t numRequests) -{ - OperationStats stats; - stats.totalLatency = totalLatency; - stats.numRequests = numRequests; - return stats; -} - // My kingdom for GoogleMock! -struct MockedLatencyStatisticsProvider : LatencyStatisticsProvider -{ - NodeStatsSnapshot returnedSnapshot; - - NodeStatsSnapshot doGetLatencyStatistics() const override { - return returnedSnapshot; - } -}; - struct MockedMinReplicaProvider : MinReplicaProvider { std::unordered_map minReplica; @@ -109,12 +85,6 @@ getMinReplica(const vespalib::Slime& root, uint16_t nodeIndex) return getNode(root, nodeIndex)["min-current-replication-factor"].asLong(); } -const vespalib::slime::Inspector& -getLatenciesForNode(const vespalib::Slime& root, uint16_t nodeIndex) -{ - return getNode(root, nodeIndex)["ops-latency"]; -} - const vespalib::slime::Inspector& getBucketSpaceStats(const vespalib::Slime& root, uint16_t nodeIndex, const vespalib::string& bucketSpaceName) { @@ -129,18 +99,6 @@ getBucketSpaceStats(const vespalib::Slime& root, uint16_t nodeIndex, const vespa } -void -DistributorHostInfoReporterTest::verifyReportedNodeLatencies(const vespalib::Slime& root, - uint16_t nodeIndex, - int64_t latencySum, - int64_t count) -{ - auto& latencies = getLatenciesForNode(root, nodeIndex); - CPPUNIT_ASSERT_EQUAL(latencySum, - latencies["put"]["latency-ms-sum"].asLong()); - CPPUNIT_ASSERT_EQUAL(count, latencies["put"]["count"].asLong()); -} - void DistributorHostInfoReporterTest::verifyBucketSpaceStats(const vespalib::Slime& root, uint16_t nodeIndex, @@ -164,43 +122,21 @@ DistributorHostInfoReporterTest::verifyBucketSpaceStats(const vespalib::Slime& r } struct Fixture { - MockedLatencyStatisticsProvider latencyStatsProvider; MockedMinReplicaProvider minReplicaProvider; MockedBucketSpacesStatsProvider bucketSpacesStatsProvider; DistributorHostInfoReporter reporter; Fixture() - : latencyStatsProvider(), - minReplicaProvider(), + : minReplicaProvider(), bucketSpacesStatsProvider(), - reporter(latencyStatsProvider, minReplicaProvider, bucketSpacesStatsProvider) + reporter(minReplicaProvider, bucketSpacesStatsProvider) {} ~Fixture() {} }; void -DistributorHostInfoReporterTest::hostInfoWithPutLatenciesOnly() +DistributorHostInfoReporterTest::min_replica_stats_are_reported() { Fixture f; - NodeStatsSnapshot snapshot; - snapshot.nodeToStats[0] = { makeOpStats(ms(10000), 3) }; - snapshot.nodeToStats[5] = { makeOpStats(ms(25000), 7) }; - - f.latencyStatsProvider.returnedSnapshot = snapshot; - - vespalib::Slime root; - util::reporterToSlime(f.reporter, root); - verifyReportedNodeLatencies(root, 0, 10000, 3); - verifyReportedNodeLatencies(root, 5, 25000, 7); -} - -void -DistributorHostInfoReporterTest::hostInfoAllInfo() -{ - Fixture f; - NodeStatsSnapshot latencySnapshot; - latencySnapshot.nodeToStats[0] = { makeOpStats(ms(10000), 3) }; - latencySnapshot.nodeToStats[5] = { makeOpStats(ms(25000), 7) }; - f.latencyStatsProvider.returnedSnapshot = latencySnapshot; std::unordered_map minReplica; minReplica[0] = 2; @@ -209,21 +145,15 @@ DistributorHostInfoReporterTest::hostInfoAllInfo() vespalib::Slime root; util::reporterToSlime(f.reporter, root); - verifyReportedNodeLatencies(root, 0, 10000, 3); - verifyReportedNodeLatencies(root, 5, 25000, 7); CPPUNIT_ASSERT_EQUAL(2, getMinReplica(root, 0)); CPPUNIT_ASSERT_EQUAL(9, getMinReplica(root, 5)); } void -DistributorHostInfoReporterTest::generateExampleJson() +DistributorHostInfoReporterTest::generate_example_json() { Fixture f; - NodeStatsSnapshot snapshot; - snapshot.nodeToStats[0] = { makeOpStats(ms(10000), 3) }; - snapshot.nodeToStats[5] = { makeOpStats(ms(25000), 7) }; - f.latencyStatsProvider.returnedSnapshot = snapshot; std::unordered_map minReplica; minReplica[0] = 2; @@ -261,16 +191,15 @@ DistributorHostInfoReporterTest::generateExampleJson() } void -DistributorHostInfoReporterTest::noReportGeneratedIfDisabled() +DistributorHostInfoReporterTest::no_report_generated_if_disabled() { Fixture f; f.reporter.enableReporting(false); - NodeStatsSnapshot snapshot; - snapshot.nodeToStats[0] = { makeOpStats(ms(10000), 3) }; - snapshot.nodeToStats[5] = { makeOpStats(ms(25000), 7) }; - - f.latencyStatsProvider.returnedSnapshot = snapshot; + std::unordered_map minReplica; + minReplica[0] = 2; + minReplica[5] = 9; + f.minReplicaProvider.minReplica = minReplica; vespalib::Slime root; util::reporterToSlime(f.reporter, root); diff --git a/storage/src/vespa/storage/distributor/distributor.cpp b/storage/src/vespa/storage/distributor/distributor.cpp index ca0ca80bb4e..d698d07fa34 100644 --- a/storage/src/vespa/storage/distributor/distributor.cpp +++ b/storage/src/vespa/storage/distributor/distributor.cpp @@ -96,7 +96,7 @@ Distributor::Distributor(DistributorComponentRegister& compReg, _maintenanceStats(), _bucketSpacesStats(), _bucketDbStats(), - _hostInfoReporter(_pendingMessageTracker.getLatencyStatisticsProvider(), *this, *this), + _hostInfoReporter(*this, *this), _ownershipSafeTimeCalc( std::make_unique( std::chrono::seconds(0))) // Set by config later diff --git a/storage/src/vespa/storage/distributor/distributor_host_info_reporter.cpp b/storage/src/vespa/storage/distributor/distributor_host_info_reporter.cpp index 8d2e45655d2..3a588179eb9 100644 --- a/storage/src/vespa/storage/distributor/distributor_host_info_reporter.cpp +++ b/storage/src/vespa/storage/distributor/distributor_host_info_reporter.cpp @@ -20,11 +20,9 @@ using Array = vespalib::JsonStream::Array; using End = vespalib::JsonStream::End; DistributorHostInfoReporter::DistributorHostInfoReporter( - LatencyStatisticsProvider& latencyProvider, MinReplicaProvider& minReplicaProvider, BucketSpacesStatsProvider& bucketSpacesStatsProvider) - : _latencyProvider(latencyProvider), - _minReplicaProvider(minReplicaProvider), + : _minReplicaProvider(minReplicaProvider), _bucketSpacesStatsProvider(bucketSpacesStatsProvider), _enabled(true) { @@ -32,16 +30,6 @@ DistributorHostInfoReporter::DistributorHostInfoReporter( namespace { -void -writeOperationStats(vespalib::JsonStream& stream, - const OperationStats& stats) -{ - stream << "put" << Object() - << "latency-ms-sum" << stats.totalLatency.count() - << "count" << stats.numRequests - << End(); -} - void writeBucketSpacesStats(vespalib::JsonStream& stream, const BucketSpacesStats& stats) @@ -60,14 +48,10 @@ writeBucketSpacesStats(vespalib::JsonStream& stream, void outputStorageNodes(vespalib::JsonStream& output, - const unordered_map& nodeStats, const unordered_map& minReplica, const PerNodeBucketSpacesStats& bucketSpacesStats) { set nodes; - for (const auto& element : nodeStats) { - nodes.insert(element.first); - } for (const auto& element : minReplica) { nodes.insert(element.first); } @@ -80,15 +64,6 @@ outputStorageNodes(vespalib::JsonStream& output, { output << "node-index" << node; - auto nodeStatsIt = nodeStats.find(node); - if (nodeStatsIt != nodeStats.end()) { - output << "ops-latency" << Object(); - { - writeOperationStats(output, nodeStatsIt->second.puts); - } - output << End(); - } - auto minReplicaIt = minReplica.find(node); if (minReplicaIt != minReplica.end()) { output << "min-current-replication-factor" @@ -115,7 +90,6 @@ DistributorHostInfoReporter::report(vespalib::JsonStream& output) return; } - auto nodeStats = _latencyProvider.getLatencyStatistics(); auto minReplica = _minReplicaProvider.getMinReplica(); auto bucketSpacesStats = _bucketSpacesStatsProvider.getBucketSpacesStats(); @@ -123,7 +97,7 @@ DistributorHostInfoReporter::report(vespalib::JsonStream& output) { output << "storage-nodes" << Array(); - outputStorageNodes(output, nodeStats.nodeToStats, minReplica, bucketSpacesStats); + outputStorageNodes(output, minReplica, bucketSpacesStats); output << End(); } diff --git a/storage/src/vespa/storage/distributor/distributor_host_info_reporter.h b/storage/src/vespa/storage/distributor/distributor_host_info_reporter.h index 3e6a02120c2..f664b44135d 100644 --- a/storage/src/vespa/storage/distributor/distributor_host_info_reporter.h +++ b/storage/src/vespa/storage/distributor/distributor_host_info_reporter.h @@ -15,8 +15,7 @@ struct OperationStats; class DistributorHostInfoReporter : public HostReporter { public: - DistributorHostInfoReporter(LatencyStatisticsProvider& latencyProvider, - MinReplicaProvider& minReplicaProvider, + DistributorHostInfoReporter(MinReplicaProvider& minReplicaProvider, BucketSpacesStatsProvider& bucketSpacesStatsProvider); DistributorHostInfoReporter(const DistributorHostInfoReporter&) = delete; @@ -43,7 +42,6 @@ public: } private: - LatencyStatisticsProvider& _latencyProvider; MinReplicaProvider& _minReplicaProvider; BucketSpacesStatsProvider& _bucketSpacesStatsProvider; std::atomic _enabled; -- cgit v1.2.3 From fa5b695fa2dac2871125493ce71ef6c6bcc22497 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Tue, 13 Mar 2018 12:36:54 +0000 Subject: Remove LatencyStatisticsProvider and usage in PendingMessageTracker. --- .../distributor_host_info_reporter_test.cpp | 1 - .../distributor/pendingmessagetrackertest.cpp | 172 --------------------- .../src/vespa/storage/distributor/CMakeLists.txt | 1 - .../distributor/distributor_host_info_reporter.h | 1 - .../distributor/latency_statistics_provider.cpp | 28 ---- .../distributor/latency_statistics_provider.h | 58 ------- .../storage/distributor/pendingmessagetracker.cpp | 60 +------ .../storage/distributor/pendingmessagetracker.h | 55 ------- 8 files changed, 1 insertion(+), 375 deletions(-) delete mode 100644 storage/src/vespa/storage/distributor/latency_statistics_provider.cpp delete mode 100644 storage/src/vespa/storage/distributor/latency_statistics_provider.h diff --git a/storage/src/tests/distributor/distributor_host_info_reporter_test.cpp b/storage/src/tests/distributor/distributor_host_info_reporter_test.cpp index df01b3e30a7..c2e8367fa30 100644 --- a/storage/src/tests/distributor/distributor_host_info_reporter_test.cpp +++ b/storage/src/tests/distributor/distributor_host_info_reporter_test.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include diff --git a/storage/src/tests/distributor/pendingmessagetrackertest.cpp b/storage/src/tests/distributor/pendingmessagetrackertest.cpp index 7adadd226d7..a7fec5ac460 100644 --- a/storage/src/tests/distributor/pendingmessagetrackertest.cpp +++ b/storage/src/tests/distributor/pendingmessagetrackertest.cpp @@ -26,17 +26,6 @@ class PendingMessageTrackerTest : public CppUnit::TestFixture { CPPUNIT_TEST(testGetPendingMessageTypes); CPPUNIT_TEST(testHasPendingMessage); CPPUNIT_TEST(testGetAllMessagesForSingleBucket); - CPPUNIT_TEST(nodeStatsCanBeOutputStreamed); - CPPUNIT_TEST(totalPutLatencyIsInitiallyZero); - CPPUNIT_TEST(statsNotAlteredBeforeReplyReceived); - CPPUNIT_TEST(totalPutLatencyIsTrackedForSingleRequest); - CPPUNIT_TEST(statsAreTrackedSeparatelyPerNode); - CPPUNIT_TEST(onlyPutMessagesAreTracked); - CPPUNIT_TEST(totalPutLatencyIsAggregatedAcrossRequests); - CPPUNIT_TEST(clearingMessagesDoesNotAffectStats); - CPPUNIT_TEST(timeTravellingClockLatenciesNotRegistered); - CPPUNIT_TEST(statsSnapshotIncludesAllNodes); - CPPUNIT_TEST(latencyProviderForwardsToImplementation); CPPUNIT_TEST(busy_reply_marks_node_as_busy); CPPUNIT_TEST(busy_node_duration_can_be_adjusted); CPPUNIT_TEST_SUITE_END(); @@ -48,40 +37,14 @@ public: void testGetPendingMessageTypes(); void testHasPendingMessage(); void testGetAllMessagesForSingleBucket(); - void nodeStatsCanBeOutputStreamed(); - void totalPutLatencyIsInitiallyZero(); - void statsNotAlteredBeforeReplyReceived(); - void totalPutLatencyIsTrackedForSingleRequest(); - void statsAreTrackedSeparatelyPerNode(); - void onlyPutMessagesAreTracked(); - void totalPutLatencyIsAggregatedAcrossRequests(); - void clearingMessagesDoesNotAffectStats(); - void timeTravellingClockLatenciesNotRegistered(); - void statsSnapshotIncludesAllNodes(); - void latencyProviderForwardsToImplementation(); void busy_reply_marks_node_as_busy(); void busy_node_duration_can_be_adjusted(); private: void insertMessages(PendingMessageTracker& tracker); - OperationStats makeOpStats(std::chrono::milliseconds totalLatency, - uint64_t numRequests) const - { - OperationStats stats; - stats.totalLatency = totalLatency; - stats.numRequests = numRequests; - return stats; - } }; -bool -operator==(const OperationStats& a, const OperationStats& b) -{ - return (a.totalLatency == b.totalLatency - && a.numRequests == b.numRequests); -} - namespace { class RequestBuilder { @@ -160,10 +123,6 @@ public: sendPutReply(*put, RequestBuilder().atTime(1000ms + latency)); } - OperationStats getNodePutOperationStats(uint16_t node) { - return _tracker->getNodeStats(node).puts; - } - PendingMessageTracker& tracker() { return *_tracker; } auto& clock() { return _clock; } @@ -541,137 +500,6 @@ PendingMessageTrackerTest::testGetAllMessagesForSingleBucket() } } -void -PendingMessageTrackerTest::nodeStatsCanBeOutputStreamed() -{ - NodeStats stats; - stats.puts = makeOpStats(56789ms, 10); - - std::ostringstream os; - os << stats; - std::string expected( - "NodeStats(puts=OperationStats(" - "totalLatency=56789ms, " - "numRequests=10))"); - CPPUNIT_ASSERT_EQUAL(expected, os.str()); -} - -void -PendingMessageTrackerTest::totalPutLatencyIsInitiallyZero() -{ - Fixture fixture; - CPPUNIT_ASSERT_EQUAL(makeOpStats(0ms, 0), - fixture.getNodePutOperationStats(0)); -} - -void -PendingMessageTrackerTest::statsNotAlteredBeforeReplyReceived() -{ - Fixture fixture; - fixture.sendPut(RequestBuilder().atTime(1000ms).toNode(0)); - CPPUNIT_ASSERT_EQUAL(makeOpStats(0ms, 0), - fixture.getNodePutOperationStats(0)); -} - -void -PendingMessageTrackerTest::totalPutLatencyIsTrackedForSingleRequest() -{ - Fixture fixture; - fixture.sendPutAndReplyWithLatency(0, 500ms); - - CPPUNIT_ASSERT_EQUAL(makeOpStats(500ms, 1), - fixture.getNodePutOperationStats(0)); -} - -void -PendingMessageTrackerTest::statsAreTrackedSeparatelyPerNode() -{ - Fixture fixture; - fixture.sendPutAndReplyWithLatency(0, 500ms); - fixture.sendPutAndReplyWithLatency(1, 600ms); - - CPPUNIT_ASSERT_EQUAL(makeOpStats(500ms, 1), - fixture.getNodePutOperationStats(0)); - CPPUNIT_ASSERT_EQUAL(makeOpStats(600ms, 1), - fixture.getNodePutOperationStats(1)); -} - -// Necessarily, this test will have to be altered when we add tracking of -// other message types as well. -void -PendingMessageTrackerTest::onlyPutMessagesAreTracked() -{ - Fixture fixture; - auto remove = fixture.sendRemove( - RequestBuilder().atTime(1000ms).toNode(0)); - fixture.sendRemoveReply(*remove, RequestBuilder().atTime(2000ms)); - CPPUNIT_ASSERT_EQUAL(makeOpStats(0ms, 0), - fixture.getNodePutOperationStats(0)); -} - -void -PendingMessageTrackerTest::totalPutLatencyIsAggregatedAcrossRequests() -{ - Fixture fixture; - // Model 2 concurrent puts to node 0. - fixture.sendPutAndReplyWithLatency(0, 500ms); - fixture.sendPutAndReplyWithLatency(0, 600ms); - CPPUNIT_ASSERT_EQUAL(makeOpStats(1100ms, 2), - fixture.getNodePutOperationStats(0)); -} - -void -PendingMessageTrackerTest::clearingMessagesDoesNotAffectStats() -{ - Fixture fixture; - fixture.sendPutAndReplyWithLatency(2, 2000ms); - fixture.tracker().clearMessagesForNode(2); - CPPUNIT_ASSERT_EQUAL(makeOpStats(2000ms, 1), - fixture.getNodePutOperationStats(2)); -} - -void -PendingMessageTrackerTest::timeTravellingClockLatenciesNotRegistered() -{ - Fixture fixture; - auto put = fixture.sendPut(RequestBuilder().atTime(1000ms).toNode(0)); - fixture.sendPutReply(*put, RequestBuilder().atTime(999ms)); - // Latency increase of zero, but we do count the request itself. - CPPUNIT_ASSERT_EQUAL(makeOpStats(0ms, 1), - fixture.getNodePutOperationStats(0)); -} - -void -PendingMessageTrackerTest::statsSnapshotIncludesAllNodes() -{ - Fixture fixture; - fixture.sendPutAndReplyWithLatency(0, 500ms); - fixture.sendPutAndReplyWithLatency(1, 600ms); - - NodeStatsSnapshot snapshot = fixture.tracker().getLatencyStatistics(); - - CPPUNIT_ASSERT_EQUAL(size_t(2), snapshot.nodeToStats.size()); - CPPUNIT_ASSERT_EQUAL(makeOpStats(500ms, 1), - snapshot.nodeToStats[0].puts); - CPPUNIT_ASSERT_EQUAL(makeOpStats(600ms, 1), - snapshot.nodeToStats[1].puts); -} - -void -PendingMessageTrackerTest::latencyProviderForwardsToImplementation() -{ - Fixture fixture; - fixture.sendPutAndReplyWithLatency(0, 500ms); - - LatencyStatisticsProvider& provider( - fixture.tracker().getLatencyStatisticsProvider()); - NodeStatsSnapshot snapshot = provider.getLatencyStatistics(); - - CPPUNIT_ASSERT_EQUAL(size_t(1), snapshot.nodeToStats.size()); - CPPUNIT_ASSERT_EQUAL(makeOpStats(500ms, 1), - snapshot.nodeToStats[0].puts); -} - // TODO don't set busy for visitor replies? These will mark the node as busy today, // but have the same actual semantics as busy merges (i.e. "queue is full", not "node // is too busy to accept new requests in general"). diff --git a/storage/src/vespa/storage/distributor/CMakeLists.txt b/storage/src/vespa/storage/distributor/CMakeLists.txt index 8adbfcaf9da..2fd51433306 100644 --- a/storage/src/vespa/storage/distributor/CMakeLists.txt +++ b/storage/src/vespa/storage/distributor/CMakeLists.txt @@ -17,7 +17,6 @@ vespa_add_library(storage_distributor externaloperationhandler.cpp idealstatemanager.cpp idealstatemetricsset.cpp - latency_statistics_provider.cpp messagetracker.cpp nodeinfo.cpp operation_sequencer.cpp diff --git a/storage/src/vespa/storage/distributor/distributor_host_info_reporter.h b/storage/src/vespa/storage/distributor/distributor_host_info_reporter.h index f664b44135d..cfffb54799f 100644 --- a/storage/src/vespa/storage/distributor/distributor_host_info_reporter.h +++ b/storage/src/vespa/storage/distributor/distributor_host_info_reporter.h @@ -8,7 +8,6 @@ namespace storage { namespace distributor { class BucketSpacesStatsProvider; -class LatencyStatisticsProvider; class MinReplicaProvider; struct OperationStats; diff --git a/storage/src/vespa/storage/distributor/latency_statistics_provider.cpp b/storage/src/vespa/storage/distributor/latency_statistics_provider.cpp deleted file mode 100644 index 5f06e0b74a6..00000000000 --- a/storage/src/vespa/storage/distributor/latency_statistics_provider.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "latency_statistics_provider.h" -#include - -namespace storage { -namespace distributor { - -std::ostream& -operator<<(std::ostream& os, const OperationStats& op) -{ - os << "OperationStats(" - << "totalLatency=" << op.totalLatency.count() - << "ms, numRequests=" << op.numRequests - << ')'; - return os; -} - -std::ostream& -operator<<(std::ostream& os, const NodeStats& stats) -{ - os << "NodeStats(puts=" << stats.puts << ')'; - return os; -} - -} // distributor -} // storage - diff --git a/storage/src/vespa/storage/distributor/latency_statistics_provider.h b/storage/src/vespa/storage/distributor/latency_statistics_provider.h deleted file mode 100644 index 1aa0b5a74cb..00000000000 --- a/storage/src/vespa/storage/distributor/latency_statistics_provider.h +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include -#include -#include -#include - -namespace storage { -namespace distributor { - -struct OperationStats { - std::chrono::milliseconds totalLatency; - uint64_t numRequests; - - OperationStats() - : totalLatency(0), numRequests(0) - { - } -}; - -struct NodeStats { - OperationStats puts; -}; - -std::ostream& -operator<<(std::ostream&, const OperationStats&); - -std::ostream& -operator<<(std::ostream&, const NodeStats&); - -struct NodeStatsSnapshot -{ - std::unordered_map nodeToStats; -}; - -class LatencyStatisticsProvider -{ -public: - virtual ~LatencyStatisticsProvider() {} - - /** - * Get a snapshot representation of the latency statistics towards a set of - * nodes at the point of the call. - * - * Can be called at any time after registration from another thread context - * and the call must thus be thread safe and data race free. - */ - NodeStatsSnapshot getLatencyStatistics() const { - return doGetLatencyStatistics(); - } - -private: - virtual NodeStatsSnapshot doGetLatencyStatistics() const = 0; -}; - -} // distributor -} // storage diff --git a/storage/src/vespa/storage/distributor/pendingmessagetracker.cpp b/storage/src/vespa/storage/distributor/pendingmessagetracker.cpp index f7e207d7b8c..5035e6feeab 100644 --- a/storage/src/vespa/storage/distributor/pendingmessagetracker.cpp +++ b/storage/src/vespa/storage/distributor/pendingmessagetracker.cpp @@ -7,17 +7,13 @@ #include LOG_SETUP(".pendingmessages"); -namespace storage { - -namespace distributor { +namespace storage::distributor { PendingMessageTracker::PendingMessageTracker(framework::ComponentRegister& cr) : framework::HtmlStatusReporter("pendingmessages", "Pending messages to storage nodes"), _component(cr, "pendingmessagetracker"), - _nodeIndexToStats(), _nodeInfo(_component.getClock()), - _statisticsForwarder(*this), _nodeBusyDuration(60), _lock() { @@ -130,7 +126,6 @@ PendingMessageTracker::reply(const api::StorageReply& r) if (iter != msgs.end()) { bucket = iter->bucket; _nodeInfo.decPending(r.getAddress()->getIndex()); - updateNodeStatsOnReply(*iter); api::ReturnCode::Result code = r.getResult().getResult(); if (code == api::ReturnCode::BUSY || code == api::ReturnCode::TIMEOUT) { _nodeInfo.setBusy(r.getAddress()->getIndex(), _nodeBusyDuration); @@ -142,49 +137,6 @@ PendingMessageTracker::reply(const api::StorageReply& r) return bucket; } -void -PendingMessageTracker::updateNodeStatsOnReply(const MessageEntry& entry) -{ - NodeStats& stats(_nodeIndexToStats[entry.nodeIdx]); - switch (entry.msgType) { - case api::MessageType::PUT_ID: - updateOperationStats(stats.puts, entry); - break; - default: - return; // Message was for type not tracked by stats. - } -} - -void -PendingMessageTracker::updateOperationStats(OperationStats& opStats, - const MessageEntry& entry) const -{ - // Time might go backwards due to clock adjustments (here assuming clock - // implementation in storage framework is non-monotonic), so avoid negative - // latencies by clamping to delta of 0. - auto now = std::max(currentTime(), entry.timeStamp); - opStats.totalLatency += (now - entry.timeStamp); - ++opStats.numRequests; -} - - -NodeStatsSnapshot -PendingMessageTracker::getLatencyStatistics() const -{ - std::lock_guard guard(_lock); - NodeStatsSnapshot snapshot; - // Conveniently, snapshot data structure is exactly the same as our own. - snapshot.nodeToStats = _nodeIndexToStats; - return snapshot; -} - -NodeStatsSnapshot -PendingMessageTracker::ForwardingLatencyStatisticsProvider -::doGetLatencyStatistics() const -{ - return _messageTracker.getLatencyStatistics(); -} - namespace { template @@ -334,14 +286,4 @@ PendingMessageTracker::print(std::ostream& /*out*/, } -NodeStats -PendingMessageTracker::getNodeStats(uint16_t node) const -{ - std::lock_guard guard(_lock); - auto nodeIter = _nodeIndexToStats.find(node); - return (nodeIter != _nodeIndexToStats.end() ? nodeIter->second - : NodeStats()); } - -} // distributor -} // storage diff --git a/storage/src/vespa/storage/distributor/pendingmessagetracker.h b/storage/src/vespa/storage/distributor/pendingmessagetracker.h index e7bcf85a38d..59bcb206127 100644 --- a/storage/src/vespa/storage/distributor/pendingmessagetracker.h +++ b/storage/src/vespa/storage/distributor/pendingmessagetracker.h @@ -2,7 +2,6 @@ #pragma once #include "nodeinfo.h" -#include "latency_statistics_provider.h" #include #include #include @@ -28,19 +27,6 @@ namespace distributor { class PendingMessageTracker : public framework::HtmlStatusReporter { - class ForwardingLatencyStatisticsProvider - : public LatencyStatisticsProvider - { - PendingMessageTracker& _messageTracker; - public: - ForwardingLatencyStatisticsProvider( - PendingMessageTracker& messageTracker) - : _messageTracker(messageTracker) - { - } - - NodeStatsSnapshot doGetLatencyStatistics() const override; - }; public: class Checker { @@ -102,42 +88,12 @@ public: const NodeInfo& getNodeInfo() const { return _nodeInfo; } NodeInfo& getNodeInfo() { return _nodeInfo; } - /** - * Get the statistics for all completed operations towards a specific - * storage node. "Completed" here means both successful and failed - * operations. Statistics are monotonically increasing within the scope of - * the process' lifetime and are never reset. This models how the Linux - * kernel reports its internal stats and means the caller must maintain - * value snapshots to extract meaningful time series information. - * - * If stats are requested for a node that has not had any operations - * complete towards it, the returned stats will be all zero. - * - * Method is thread safe and data race free. - * - * It is assumed that NodeStats is sufficiently small that returning it - * by value does not incur a measurable performance impact. This also - * prevents any data race issues in case returned stats are e.g. - * concurrently read by another thread such the metric snapshotting thread. - */ - NodeStats getNodeStats(uint16_t node) const; - /** * Clears all pending messages for the given node, and returns * the messages erased. */ std::vector clearMessagesForNode(uint16_t node); - /** - * Must not be called when _lock is already held or there will be a - * deadlock. - */ - NodeStatsSnapshot getLatencyStatistics() const; - - LatencyStatisticsProvider& getLatencyStatisticsProvider() { - return _statisticsForwarder; - } - void setNodeBusyDuration(std::chrono::seconds secs) noexcept { _nodeBusyDuration = secs; } @@ -212,9 +168,7 @@ private: Messages _messages; framework::Component _component; - std::unordered_map _nodeIndexToStats; NodeInfo _nodeInfo; - ForwardingLatencyStatisticsProvider _statisticsForwarder; std::chrono::seconds _nodeBusyDuration; // Since distributor is currently single-threaded, this will only @@ -234,15 +188,6 @@ private: */ void updateNodeStatsOnReply(const MessageEntry& entry); - - /** - * Modifies opStats in-place with added latency based on delta from send - * time to current time and incremented operation count. - */ - void updateOperationStats(OperationStats& opStats, - const MessageEntry& entry) const; - - void getStatusStartPage(std::ostream& out) const; void getStatusPerNode(std::ostream& out) const; void getStatusPerBucket(std::ostream& out) const; -- cgit v1.2.3