summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@oath.com>2018-01-30 14:24:22 +0000
committerTor Brede Vekterli <vekterli@oath.com>2018-01-30 14:24:22 +0000
commit18a9a403b80aeb1464958b622a6eccb9b620c4e2 (patch)
treeda802007ab8666d06898ba67469fdc3d80adf235 /storage
parentee4d047e1a18cb0166d511ed0cad699b1accc8b7 (diff)
Log and increment failure metrics on concurrent mutation aborts
Greatly increases visibility for edge cases where multiple clients are sending many potentially conflicting operations to the same document set.
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/distributor/externaloperationhandlertest.cpp12
-rw-r--r--storage/src/vespa/storage/distributor/externaloperationhandler.cpp37
-rw-r--r--storage/src/vespa/storage/distributor/externaloperationhandler.h3
-rw-r--r--storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp7
-rw-r--r--storage/src/vespa/storage/distributor/persistence_operation_metric_set.h3
5 files changed, 38 insertions, 24 deletions
diff --git a/storage/src/tests/distributor/externaloperationhandlertest.cpp b/storage/src/tests/distributor/externaloperationhandlertest.cpp
index a0b8cd424ac..8276e38997f 100644
--- a/storage/src/tests/distributor/externaloperationhandlertest.cpp
+++ b/storage/src/tests/distributor/externaloperationhandlertest.cpp
@@ -60,6 +60,12 @@ class ExternalOperationHandlerTest : public CppUnit::TestFixture,
.safe_time_not_reached.getLongValue("count");
}
+ int64_t concurrent_mutatations_metric_count(
+ const metrics::LoadMetric<PersistenceOperationMetricSet>& metrics) const {
+ return metrics[documentapi::LoadType::DEFAULT].failures
+ .concurrent_mutations.getLongValue("count");
+ }
+
void set_up_distributor_for_sequencing_test();
const vespalib::string _dummy_id{"id:foo:testdoctype1::bar"};
@@ -395,35 +401,41 @@ void ExternalOperationHandlerTest::reject_put_with_concurrent_mutation_to_same_i
assert_second_command_rejected_due_to_concurrent_mutation(
makePutCommand("testdoctype1", _dummy_id),
makePutCommand("testdoctype1", _dummy_id), _dummy_id);
+ CPPUNIT_ASSERT_EQUAL(int64_t(1), concurrent_mutatations_metric_count(getDistributor().getMetrics().puts));
}
void ExternalOperationHandlerTest::do_not_reject_put_operations_to_different_ids() {
assert_second_command_not_rejected_due_to_concurrent_mutation(
makePutCommand("testdoctype1", "id:foo:testdoctype1::baz"),
makePutCommand("testdoctype1", "id:foo:testdoctype1::foo"));
+ CPPUNIT_ASSERT_EQUAL(int64_t(0), concurrent_mutatations_metric_count(getDistributor().getMetrics().puts));
}
void ExternalOperationHandlerTest::reject_remove_with_concurrent_mutation_to_same_id() {
assert_second_command_rejected_due_to_concurrent_mutation(
makeRemoveCommand(_dummy_id), makeRemoveCommand(_dummy_id), _dummy_id);
+ CPPUNIT_ASSERT_EQUAL(int64_t(1), concurrent_mutatations_metric_count(getDistributor().getMetrics().removes));
}
void ExternalOperationHandlerTest::do_not_reject_remove_operations_to_different_ids() {
assert_second_command_not_rejected_due_to_concurrent_mutation(
makeRemoveCommand("id:foo:testdoctype1::baz"),
makeRemoveCommand("id:foo:testdoctype1::foo"));
+ CPPUNIT_ASSERT_EQUAL(int64_t(0), concurrent_mutatations_metric_count(getDistributor().getMetrics().removes));
}
void ExternalOperationHandlerTest::reject_update_with_concurrent_mutation_to_same_id() {
assert_second_command_rejected_due_to_concurrent_mutation(
makeUpdateCommand("testdoctype1", _dummy_id),
makeUpdateCommand("testdoctype1", _dummy_id), _dummy_id);
+ CPPUNIT_ASSERT_EQUAL(int64_t(1), concurrent_mutatations_metric_count(getDistributor().getMetrics().updates));
}
void ExternalOperationHandlerTest::do_not_reject_update_operations_to_different_ids() {
assert_second_command_not_rejected_due_to_concurrent_mutation(
makeUpdateCommand("testdoctype1", "id:foo:testdoctype1::baz"),
makeUpdateCommand("testdoctype1", "id:foo:testdoctype1::foo"));
+ CPPUNIT_ASSERT_EQUAL(int64_t(0), concurrent_mutatations_metric_count(getDistributor().getMetrics().updates));
}
void ExternalOperationHandlerTest::operation_destruction_allows_new_mutations_for_id() {
diff --git a/storage/src/vespa/storage/distributor/externaloperationhandler.cpp b/storage/src/vespa/storage/distributor/externaloperationhandler.cpp
index 902726fff41..5985d990fcd 100644
--- a/storage/src/vespa/storage/distributor/externaloperationhandler.cpp
+++ b/storage/src/vespa/storage/distributor/externaloperationhandler.cpp
@@ -105,12 +105,15 @@ ExternalOperationHandler::checkTimestampMutationPreconditions(
std::shared_ptr<api::StorageMessage>
ExternalOperationHandler::makeConcurrentMutationRejectionReply(
api::StorageCommand& cmd,
- const document::DocumentId& docId) const {
+ const document::DocumentId& docId,
+ PersistenceOperationMetricSet& persistenceMetrics) const {
+ auto err_msg = vespalib::make_string(
+ "A mutating operation for document '%s' is already in progress",
+ docId.toString().c_str());
+ LOG(debug, "Aborting incoming %s operation: %s", cmd.getType().toString().c_str(), err_msg.c_str());
+ persistenceMetrics.failures.concurrent_mutations++;
api::StorageReply::UP reply(cmd.makeReply());
- reply->setResult(api::ReturnCode(
- api::ReturnCode::BUSY, vespalib::make_string(
- "A mutating operation for document '%s' is already in progress",
- docId.toString().c_str())));
+ reply->setResult(api::ReturnCode(api::ReturnCode::BUSY, err_msg));
return std::shared_ptr<api::StorageMessage>(reply.release());
}
@@ -125,10 +128,8 @@ bool ExternalOperationHandler::allowMutation(const SequencingHandle& handle) con
IMPL_MSG_COMMAND_H(ExternalOperationHandler, Put)
{
- if (!checkTimestampMutationPreconditions(
- *cmd, getBucketId(cmd->getDocumentId()),
- getMetrics().puts[cmd->getLoadType()]))
- {
+ auto& metrics = getMetrics().puts[cmd->getLoadType()];
+ if (!checkTimestampMutationPreconditions(*cmd, getBucketId(cmd->getDocumentId()), metrics)) {
return true;
}
@@ -142,7 +143,7 @@ IMPL_MSG_COMMAND_H(ExternalOperationHandler, Put)
_bucketSpaceRepo.get(cmd->getBucket().getBucketSpace()),
cmd, getMetrics().puts[cmd->getLoadType()], std::move(handle));
} else {
- sendUp(makeConcurrentMutationRejectionReply(*cmd, cmd->getDocumentId()));
+ sendUp(makeConcurrentMutationRejectionReply(*cmd, cmd->getDocumentId(), metrics));
}
return true;
@@ -151,10 +152,8 @@ IMPL_MSG_COMMAND_H(ExternalOperationHandler, Put)
IMPL_MSG_COMMAND_H(ExternalOperationHandler, Update)
{
- if (!checkTimestampMutationPreconditions(
- *cmd, getBucketId(cmd->getDocumentId()),
- getMetrics().updates[cmd->getLoadType()]))
- {
+ auto& metrics = getMetrics().updates[cmd->getLoadType()];
+ if (!checkTimestampMutationPreconditions(*cmd, getBucketId(cmd->getDocumentId()), metrics)) {
return true;
}
@@ -167,7 +166,7 @@ IMPL_MSG_COMMAND_H(ExternalOperationHandler, Update)
_bucketSpaceRepo.get(cmd->getBucket().getBucketSpace()),
cmd, getMetrics(), std::move(handle));
} else {
- sendUp(makeConcurrentMutationRejectionReply(*cmd, cmd->getDocumentId()));
+ sendUp(makeConcurrentMutationRejectionReply(*cmd, cmd->getDocumentId(), metrics));
}
return true;
@@ -176,10 +175,8 @@ IMPL_MSG_COMMAND_H(ExternalOperationHandler, Update)
IMPL_MSG_COMMAND_H(ExternalOperationHandler, Remove)
{
- if (!checkTimestampMutationPreconditions(
- *cmd, getBucketId(cmd->getDocumentId()),
- getMetrics().removes[cmd->getLoadType()]))
- {
+ auto& metrics = getMetrics().removes[cmd->getLoadType()];
+ if (!checkTimestampMutationPreconditions(*cmd, getBucketId(cmd->getDocumentId()), metrics)) {
return true;
}
@@ -196,7 +193,7 @@ IMPL_MSG_COMMAND_H(ExternalOperationHandler, Remove)
getMetrics().removes[cmd->getLoadType()],
std::move(handle));
} else {
- sendUp(makeConcurrentMutationRejectionReply(*cmd, cmd->getDocumentId()));
+ sendUp(makeConcurrentMutationRejectionReply(*cmd, cmd->getDocumentId(), metrics));
}
return true;
diff --git a/storage/src/vespa/storage/distributor/externaloperationhandler.h b/storage/src/vespa/storage/distributor/externaloperationhandler.h
index c405b63aa81..ae5b4ae21ac 100644
--- a/storage/src/vespa/storage/distributor/externaloperationhandler.h
+++ b/storage/src/vespa/storage/distributor/externaloperationhandler.h
@@ -64,7 +64,8 @@ private:
PersistenceOperationMetricSet& persistenceMetrics);
std::shared_ptr<api::StorageMessage> makeConcurrentMutationRejectionReply(
api::StorageCommand& cmd,
- const document::DocumentId& docId) const;
+ const document::DocumentId& docId,
+ PersistenceOperationMetricSet& persistenceMetrics) const;
bool allowMutation(const SequencingHandle& handle) const;
DistributorMetricSet& getMetrics() { return getDistributor().getMetrics(); }
diff --git a/storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp b/storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp
index b619325c036..88b28941e65 100644
--- a/storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp
+++ b/storage/src/vespa/storage/distributor/persistence_operation_metric_set.cpp
@@ -26,7 +26,9 @@ PersistenceFailuresMetricSet::PersistenceFailuresMetricSet(MetricSet* owner)
inconsistent_bucket("inconsistent_bucket", "",
"The number of operations failed due to buckets "
"being in an inconsistent state or not found", this),
- notfound("notfound", "", "The number of operations that failed because the document did not exist", this)
+ notfound("notfound", "", "The number of operations that failed because the document did not exist", this),
+ concurrent_mutations("concurrent_mutations", "", "The number of operations that were transiently failed due "
+ "to a mutating operation already being in progress for its document ID", this)
{
sum.addMetricToSum(notready);
sum.addMetricToSum(notconnected);
@@ -38,7 +40,8 @@ PersistenceFailuresMetricSet::PersistenceFailuresMetricSet(MetricSet* owner)
sum.addMetricToSum(inconsistent_bucket);
sum.addMetricToSum(notfound);
}
-PersistenceFailuresMetricSet::~PersistenceFailuresMetricSet() { }
+
+PersistenceFailuresMetricSet::~PersistenceFailuresMetricSet() = default;
MetricSet *
PersistenceFailuresMetricSet::clone(std::vector<Metric::UP>& ownerList, CopyType copyType,
diff --git a/storage/src/vespa/storage/distributor/persistence_operation_metric_set.h b/storage/src/vespa/storage/distributor/persistence_operation_metric_set.h
index 5346aa70912..4f51c664daf 100644
--- a/storage/src/vespa/storage/distributor/persistence_operation_metric_set.h
+++ b/storage/src/vespa/storage/distributor/persistence_operation_metric_set.h
@@ -14,7 +14,7 @@ class ReturnCode;
class PersistenceFailuresMetricSet : public metrics::MetricSet
{
public:
- PersistenceFailuresMetricSet(metrics::MetricSet* owner);
+ explicit PersistenceFailuresMetricSet(metrics::MetricSet* owner);
~PersistenceFailuresMetricSet();
metrics::SumMetric<metrics::LongCountMetric> sum;
@@ -27,6 +27,7 @@ public:
metrics::LongCountMetric busy;
metrics::LongCountMetric inconsistent_bucket;
metrics::LongCountMetric notfound;
+ metrics::LongCountMetric concurrent_mutations;
MetricSet * clone(std::vector<Metric::UP>& ownerList, CopyType copyType,
metrics::MetricSet* owner, bool includeUnused) const override;