aboutsummaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2019-02-25 10:49:23 +0100
committerGitHub <noreply@github.com>2019-02-25 10:49:23 +0100
commita53aa969015699f3518bd32e3acb8a1339150de8 (patch)
treeb321d297bddce54e70975c0c470632895d743847 /storage
parentc00ca40c3f022acbecf45bff12e0dc8abff81cd3 (diff)
parentf5414eadef7354893f64032d7ed1779777875502 (diff)
Merge pull request #8588 from vespa-engine/vekterli/do-not-bruteforce-abort-client-ops-during-orchestrated-down
Fail client ops gracefully when distributor is marked down
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/common/teststorageapp.cpp5
-rw-r--r--storage/src/tests/storageserver/bouncertest.cpp75
-rw-r--r--storage/src/vespa/storage/storageserver/bouncer.cpp19
-rw-r--r--storage/src/vespa/storage/storageserver/bouncer.h2
-rw-r--r--storage/src/vespa/storage/storageserver/bouncer_metrics.cpp4
-rw-r--r--storage/src/vespa/storage/storageserver/bouncer_metrics.h1
6 files changed, 70 insertions, 36 deletions
diff --git a/storage/src/tests/common/teststorageapp.cpp b/storage/src/tests/common/teststorageapp.cpp
index 8fd80d47bb3..a720cd191e4 100644
--- a/storage/src/tests/common/teststorageapp.cpp
+++ b/storage/src/tests/common/teststorageapp.cpp
@@ -248,8 +248,7 @@ TestDistributorApp::configure(vespalib::stringref id)
TestDistributorApp::TestDistributorApp(vespalib::stringref configId)
: TestStorageApp(
- StorageComponentRegisterImpl::UP(
- new DistributorComponentRegisterImpl),
+ std::make_unique<DistributorComponentRegisterImpl>(),
lib::NodeType::DISTRIBUTOR, getIndexFromConfig(configId), configId),
_compReg(dynamic_cast<DistributorComponentRegisterImpl&>(
TestStorageApp::getComponentRegister())),
@@ -263,7 +262,7 @@ TestDistributorApp::TestDistributorApp(vespalib::stringref configId)
TestDistributorApp::TestDistributorApp(NodeIndex index,
vespalib::stringref configId)
: TestStorageApp(
- StorageComponentRegisterImpl::UP(new StorageComponentRegisterImpl),
+ std::make_unique<DistributorComponentRegisterImpl>(),
lib::NodeType::DISTRIBUTOR, index, configId),
_compReg(dynamic_cast<DistributorComponentRegisterImpl&>(
TestStorageApp::getComponentRegister())),
diff --git a/storage/src/tests/storageserver/bouncertest.cpp b/storage/src/tests/storageserver/bouncertest.cpp
index be926722497..27c13a3707e 100644
--- a/storage/src/tests/storageserver/bouncertest.cpp
+++ b/storage/src/tests/storageserver/bouncertest.cpp
@@ -19,7 +19,7 @@ using document::test::makeDocumentBucket;
namespace storage {
struct BouncerTest : public CppUnit::TestFixture {
- std::unique_ptr<TestServiceLayerApp> _node;
+ std::unique_ptr<TestStorageApp> _node;
std::unique_ptr<DummyStorageLink> _upper;
Bouncer* _manager;
DummyStorageLink* _lower;
@@ -29,6 +29,8 @@ struct BouncerTest : public CppUnit::TestFixture {
void setUp() override;
void tearDown() override;
+ void setUpAsNode(const lib::NodeType& type);
+
void testFutureTimestamp();
void testAllowNotifyBucketChangeEvenWhenDistributorDown();
void rejectLowerPrioritizedFeedMessagesWhenConfigured();
@@ -40,6 +42,7 @@ struct BouncerTest : public CppUnit::TestFixture {
void internalOperationsAreNotRejected();
void outOfBoundsConfigValuesThrowException();
void abort_request_when_derived_bucket_space_node_state_is_marked_down();
+ void client_operations_are_allowed_through_on_cluster_state_down_distributor();
CPPUNIT_TEST_SUITE(BouncerTest);
CPPUNIT_TEST(testFutureTimestamp);
@@ -53,6 +56,7 @@ struct BouncerTest : public CppUnit::TestFixture {
CPPUNIT_TEST(internalOperationsAreNotRejected);
CPPUNIT_TEST(outOfBoundsConfigValuesThrowException);
CPPUNIT_TEST(abort_request_when_derived_bucket_space_node_state_is_marked_down);
+ CPPUNIT_TEST(client_operations_are_allowed_through_on_cluster_state_down_distributor);
CPPUNIT_TEST_SUITE_END();
using Priority = api::StorageMessage::Priority;
@@ -81,39 +85,42 @@ CPPUNIT_TEST_SUITE_REGISTRATION(BouncerTest);
BouncerTest::BouncerTest()
: _node(),
_upper(),
- _manager(0),
- _lower(0)
+ _manager(nullptr),
+ _lower(nullptr)
{
}
-void
-BouncerTest::setUp() {
- try{
- vdstestlib::DirConfig config(getStandardConfig(true));
- _node.reset(new TestServiceLayerApp(
- DiskCount(1), NodeIndex(2), config.getConfigId()));
- _upper.reset(new DummyStorageLink());
- _manager = new Bouncer(_node->getComponentRegister(),
- config.getConfigId());
- _lower = new DummyStorageLink();
- _upper->push_back(std::unique_ptr<StorageLink>(_manager));
- _upper->push_back(std::unique_ptr<StorageLink>(_lower));
- _upper->open();
- } catch (std::exception& e) {
- std::cerr << "Failed to static initialize objects: " << e.what()
- << "\n";
+void BouncerTest::setUpAsNode(const lib::NodeType& type) {
+ vdstestlib::DirConfig config(getStandardConfig(type == lib::NodeType::STORAGE));
+ if (type == lib::NodeType::STORAGE) {
+ _node.reset(new TestServiceLayerApp(DiskCount(1), NodeIndex(2), config.getConfigId()));
+ } else {
+ _node.reset(new TestDistributorApp(NodeIndex(2), config.getConfigId()));
}
+ _upper.reset(new DummyStorageLink());
+ _manager = new Bouncer(_node->getComponentRegister(), config.getConfigId());
+ _lower = new DummyStorageLink();
+ _upper->push_back(std::unique_ptr<StorageLink>(_manager));
+ _upper->push_back(std::unique_ptr<StorageLink>(_lower));
+ _upper->open();
_node->getClock().setAbsoluteTimeInSeconds(10);
}
void
+BouncerTest::setUp() {
+ setUpAsNode(lib::NodeType::STORAGE);
+}
+
+void
BouncerTest::tearDown() {
- _manager = 0;
- _lower = 0;
- _upper->close();
- _upper->flush();
- _upper.reset(0);
- _node.reset(0);
+ _manager = nullptr;
+ _lower = nullptr;
+ if (_upper) {
+ _upper->close();
+ _upper->flush();
+ _upper.reset();
+ }
+ _node.reset();
}
std::shared_ptr<api::StorageCommand>
@@ -334,13 +341,31 @@ makeClusterStateBundle(const vespalib::string &baselineState, const std::map<doc
void
BouncerTest::abort_request_when_derived_bucket_space_node_state_is_marked_down()
{
+ CPPUNIT_ASSERT_EQUAL(uint64_t(0), _manager->metrics().unavailable_node_aborts.getValue());
+
auto state = makeClusterStateBundle("distributor:3 storage:3", {{ document::FixedBucketSpaces::default_space(), "distributor:3 storage:3 .2.s:d" }});
_node->getNodeStateUpdater().setClusterStateBundle(state);
_upper->sendDown(createDummyFeedMessage(11 * 1000000, document::FixedBucketSpaces::default_space()));
assertMessageBouncedWithAbort();
+ CPPUNIT_ASSERT_EQUAL(uint64_t(1), _manager->metrics().unavailable_node_aborts.getValue());
+
_upper->reset();
_upper->sendDown(createDummyFeedMessage(11 * 1000000, document::FixedBucketSpaces::global_space()));
assertMessageNotBounced();
+ CPPUNIT_ASSERT_EQUAL(uint64_t(1), _manager->metrics().unavailable_node_aborts.getValue());
+}
+
+void BouncerTest::client_operations_are_allowed_through_on_cluster_state_down_distributor() {
+ tearDown();
+ setUpAsNode(lib::NodeType::DISTRIBUTOR);
+
+ // Distributor states never vary across bucket spaces, so not necessary to test with
+ // anything except baseline state here.
+ auto state = makeClusterStateBundle("distributor:3 .2.s:d storage:3", {});
+ _node->getNodeStateUpdater().setClusterStateBundle(state);
+ _upper->sendDown(createDummyFeedMessage(11 * 1000000, document::FixedBucketSpaces::default_space()));
+ assertMessageNotBounced();
+ CPPUNIT_ASSERT_EQUAL(uint64_t(0), _manager->metrics().unavailable_node_aborts.getValue());
}
} // storage
diff --git a/storage/src/vespa/storage/storageserver/bouncer.cpp b/storage/src/vespa/storage/storageserver/bouncer.cpp
index 82cf64423c6..0541c7322f1 100644
--- a/storage/src/vespa/storage/storageserver/bouncer.cpp
+++ b/storage/src/vespa/storage/storageserver/bouncer.cpp
@@ -118,6 +118,7 @@ Bouncer::abortCommandForUnavailableNode(api::StorageMessage& msg,
<< " when node is in state " << state.toString(true);
append_node_identity(ost);
reply->setResult(api::ReturnCode(api::ReturnCode::ABORTED, ost.str()));
+ _metrics->unavailable_node_aborts.inc();
sendUp(reply);
}
@@ -158,6 +159,10 @@ Bouncer::clusterIsUp() const
return (*_clusterState == lib::State::UP);
}
+bool Bouncer::isDistributor() const {
+ return (_component.getNodeType() == lib::NodeType::DISTRIBUTOR);
+}
+
uint64_t
Bouncer::extractMutationTimestampIfAny(const api::StorageMessage& msg)
{
@@ -243,12 +248,11 @@ Bouncer::onDown(const std::shared_ptr<api::StorageMessage>& msg)
int feedPriorityLowerBound;
{
vespalib::LockGuard lock(_lock);
- state = &getDerivedNodeState(msg->getBucket().getBucketSpace()).getState();
- maxClockSkewInSeconds = _config->maxClockSkewSeconds;
+ state = &getDerivedNodeState(msg->getBucket().getBucketSpace()).getState();
+ maxClockSkewInSeconds = _config->maxClockSkewSeconds;
abortLoadWhenClusterDown = _config->stopExternalLoadWhenClusterDown;
- isInAvailableState = state->oneOf(
- _config->stopAllLoadWhenNodestateNotIn.c_str());
- feedPriorityLowerBound = _config->feedRejectionPriorityThreshold;
+ isInAvailableState = state->oneOf(_config->stopAllLoadWhenNodestateNotIn.c_str());
+ feedPriorityLowerBound = _config->feedRejectionPriorityThreshold;
}
// Special case for messages storage nodes are expected to get during
// initializing. Request bucket info will be queued so storage can
@@ -258,13 +262,14 @@ Bouncer::onDown(const std::shared_ptr<api::StorageMessage>& msg)
{
return false;
}
- if (!isInAvailableState) {
+ const bool externalLoad = isExternalLoad(type);
+ if (!isInAvailableState && !(isDistributor() && externalLoad)) {
abortCommandForUnavailableNode(*msg, *state);
return true;
}
// Allow all internal load to go through at this point
- if (!isExternalLoad(type)) {
+ if (!externalLoad) {
return false;
}
if (priorityRejectionIsEnabled(feedPriorityLowerBound)
diff --git a/storage/src/vespa/storage/storageserver/bouncer.h b/storage/src/vespa/storage/storageserver/bouncer.h
index 258b0b18a32..19c8e084c7d 100644
--- a/storage/src/vespa/storage/storageserver/bouncer.h
+++ b/storage/src/vespa/storage/storageserver/bouncer.h
@@ -70,6 +70,8 @@ private:
bool clusterIsUp() const;
+ bool isDistributor() const;
+
bool isExternalLoad(const api::MessageType&) const noexcept;
bool isExternalWriteOperation(const api::MessageType&) const noexcept;
diff --git a/storage/src/vespa/storage/storageserver/bouncer_metrics.cpp b/storage/src/vespa/storage/storageserver/bouncer_metrics.cpp
index c0fac35263e..5f3ba5a236d 100644
--- a/storage/src/vespa/storage/storageserver/bouncer_metrics.cpp
+++ b/storage/src/vespa/storage/storageserver/bouncer_metrics.cpp
@@ -7,7 +7,9 @@ namespace storage {
BouncerMetrics::BouncerMetrics()
: MetricSet("bouncer", {}, "Metrics for Bouncer component", nullptr),
clock_skew_aborts("clock_skew_aborts", {}, "Number of client operations that were aborted due to "
- "clock skew between sender and receiver exceeding acceptable range", this)
+ "clock skew between sender and receiver exceeding acceptable range", this),
+ unavailable_node_aborts("unavailable_node_aborts", {}, "Number of operations that were aborted due "
+ "to the node (or target bucket space) being unavailable", this)
{
}
diff --git a/storage/src/vespa/storage/storageserver/bouncer_metrics.h b/storage/src/vespa/storage/storageserver/bouncer_metrics.h
index 9beca6c73b7..9842bed1c6f 100644
--- a/storage/src/vespa/storage/storageserver/bouncer_metrics.h
+++ b/storage/src/vespa/storage/storageserver/bouncer_metrics.h
@@ -8,6 +8,7 @@ namespace storage {
struct BouncerMetrics : metrics::MetricSet {
metrics::LongCountMetric clock_skew_aborts;
+ metrics::LongCountMetric unavailable_node_aborts;
BouncerMetrics();
~BouncerMetrics() override;