diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2021-06-07 23:10:18 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-07 23:10:18 +0200 |
commit | 429ff98c842a124515f66b8bcdaf0c5f64c678e9 (patch) | |
tree | 99d73b2f7810d005ff29f5db78685290fc3292f3 /configd/src/apps/sentinel/connectivity.cpp | |
parent | 4ffd09f5678559a5f71d3957514f1d52c61e88f0 (diff) | |
parent | a402fb4fab902b9f8c9a8859b1142e436bf439e3 (diff) |
Merge pull request #18132 from vespa-engine/arnej/actually-wait-for-connectivity
Arnej/actually wait for connectivity
Diffstat (limited to 'configd/src/apps/sentinel/connectivity.cpp')
-rw-r--r-- | configd/src/apps/sentinel/connectivity.cpp | 58 |
1 files changed, 34 insertions, 24 deletions
diff --git a/configd/src/apps/sentinel/connectivity.cpp b/configd/src/apps/sentinel/connectivity.cpp index 9cced1d3475..132b57fc884 100644 --- a/configd/src/apps/sentinel/connectivity.cpp +++ b/configd/src/apps/sentinel/connectivity.cpp @@ -1,5 +1,6 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "config-owner.h" #include "connectivity.h" #include "outward-check.h" #include <vespa/defaults.h> @@ -16,19 +17,14 @@ using namespace std::chrono_literals; namespace config::sentinel { -Connectivity::Connectivity(const SentinelConfig::Connectivity & config, RpcServer &rpcServer) - : _config(config), - _rpcServer(rpcServer) -{ - LOG(config, "connectivity.maxBadReverseCount = %d", _config.maxBadReverseCount); - LOG(config, "connectivity.maxBadOutPercent = %d", _config.maxBadOutPercent); -} +constexpr std::chrono::milliseconds MODEL_TIMEOUT_MS = 60s; +Connectivity::Connectivity() = default; Connectivity::~Connectivity() = default; namespace { -const char *toString(CcResult value) { +std::string toString(CcResult value) { switch (value) { case CcResult::UNKNOWN: return "BAD: missing result"; // very very bad case CcResult::REVERSE_FAIL: return "connect OK, but reverse check FAILED"; // very bad @@ -65,16 +61,28 @@ std::map<std::string, std::string> specsFrom(const ModelConfig &model) { } -Connectivity::CheckResult -Connectivity::checkConnectivity(const ModelConfig &model) { - const auto checkSpecs = specsFrom(model); - size_t clusterSize = checkSpecs.size(); +void Connectivity::configure(const SentinelConfig::Connectivity &config) { + _config = config; + LOG(config, "connectivity.maxBadReverseCount = %d", _config.maxBadReverseCount); + LOG(config, "connectivity.maxBadOutPercent = %d", _config.maxBadOutPercent); + if (auto up = ConfigOwner::fetchModelConfig(MODEL_TIMEOUT_MS)) { + _checkSpecs = specsFrom(*up); + } +} + +bool +Connectivity::checkConnectivity(RpcServer &rpcServer) { + size_t clusterSize = _checkSpecs.size(); + if (clusterSize == 0) { + LOG(warning, "could not get model config, skipping connectivity checks"); + return true; + } OutwardCheckContext checkContext(clusterSize, vespa::Defaults::vespaHostname(), - _rpcServer.getPort(), - _rpcServer.orb()); + rpcServer.getPort(), + rpcServer.orb()); std::map<std::string, OutwardCheck> connectivityMap; - for (const auto & [ hn, spec ] : checkSpecs) { + for (const auto & [ hn, spec ] : _checkSpecs) { connectivityMap.try_emplace(hn, spec, checkContext); } checkContext.latch.await(); @@ -82,6 +90,12 @@ Connectivity::checkConnectivity(const ModelConfig &model) { size_t numFailedReverse = 0; bool allChecksOk = true; for (const auto & [hostname, check] : connectivityMap) { + std::string detail = toString(check.result()); + std::string prev = _detailsPerHost[hostname]; + if (prev != detail) { + LOG(info, "Connectivity check details: %s -> %s", hostname.c_str(), detail.c_str()); + } + _detailsPerHost[hostname] = detail; LOG_ASSERT(check.result() != CcResult::UNKNOWN); if (check.result() == CcResult::CONN_FAIL) ++numFailedConns; if (check.result() == CcResult::REVERSE_FAIL) ++numFailedReverse; @@ -97,16 +111,12 @@ Connectivity::checkConnectivity(const ModelConfig &model) { numFailedConns, clusterSize, pct, _config.maxBadOutPercent); allChecksOk = false; } - std::vector<std::string> details; - for (const auto & [hostname, check] : connectivityMap) { - std::string detail = fmt("%s -> %s", hostname.c_str(), toString(check.result())); - details.push_back(detail); + if (allChecksOk && (numFailedConns == 0) && (numFailedReverse == 0)) { + LOG(info, "All connectivity checks OK, proceeding with service startup"); + } else if (allChecksOk) { + LOG(info, "Enough connectivity checks OK, proceeding with service startup"); } - CheckResult result{false, false, {}}; - result.enoughOk = allChecksOk; - result.allOk = (numFailedConns == 0) && (numFailedReverse == 0); - result.details = std::move(details); - return result; + return allChecksOk; } } |