diff options
author | Arne Juul <arnej@verizonmedia.com> | 2021-06-04 11:14:35 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2021-06-04 11:14:35 +0000 |
commit | c585fcd3e4732c8f0c7b0ad85e3602d2a5e9d61c (patch) | |
tree | 72cf153d2f66d077e2ad9dec8781a6f571a5456a /configd | |
parent | 439e913530f2a566cbc514df99c982e3b318298c (diff) |
add separate class for connectivity check
Diffstat (limited to 'configd')
-rw-r--r-- | configd/src/apps/sentinel/CMakeLists.txt | 1 | ||||
-rw-r--r-- | configd/src/apps/sentinel/connectivity.cpp | 113 | ||||
-rw-r--r-- | configd/src/apps/sentinel/connectivity.h | 36 |
3 files changed, 150 insertions, 0 deletions
diff --git a/configd/src/apps/sentinel/CMakeLists.txt b/configd/src/apps/sentinel/CMakeLists.txt index e77abc19077..43b4f79a0b2 100644 --- a/configd/src/apps/sentinel/CMakeLists.txt +++ b/configd/src/apps/sentinel/CMakeLists.txt @@ -4,6 +4,7 @@ vespa_add_executable(configd_config-sentinel_app check-completion-handler.cpp cmdq.cpp config-owner.cpp + connectivity.cpp env.cpp line-splitter.cpp manager.cpp diff --git a/configd/src/apps/sentinel/connectivity.cpp b/configd/src/apps/sentinel/connectivity.cpp new file mode 100644 index 00000000000..4ba16f95e15 --- /dev/null +++ b/configd/src/apps/sentinel/connectivity.cpp @@ -0,0 +1,113 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "connectivity.h" +#include "outward-check.h" +#include <vespa/defaults.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <thread> +#include <chrono> + +LOG_SETUP(".connectivity"); + +using vespalib::make_string_short::fmt; +using namespace std::chrono_literals; + +namespace config::sentinel { + +Connectivity::Connectivity(const SentinelConfig::Connectivity & config, RpcServer &rpcServer) + : _config(config), + _rpcServer(rpcServer) +{ + LOG(config, "connectivity.maxBadReverseCount = %d", _config.maxBadReverseCount); + LOG(config, "connectivity.maxBadOutPercent = %d", _config.maxBadOutPercent); +} + +Connectivity::~Connectivity() = default; + +namespace { + +const char *toString(CcResult value) { + switch (value) { + case CcResult::UNKNOWN: return "BAD: missing result"; // very very bad + case CcResult::REVERSE_FAIL: return "connect OK, but reverse check FAILED"; // very bad + case CcResult::CONN_FAIL: return "failed to connect"; // bad + case CcResult::REVERSE_UNAVAIL: return "connect OK (but reverse check unavailable)"; // unfortunate + case CcResult::ALL_OK: return "OK: both ways connectivity verified"; // good + } + LOG(error, "Unknown CcResult enum value: %d", (int)value); + LOG_ABORT("Unknown CcResult enum value"); +} + +std::map<std::string, std::string> specsFrom(const ModelConfig &model) { + std::map<std::string, std::string> checkSpecs; + for (const auto & h : model.hosts) { + bool foundSentinelPort = false; + for (const auto & s : h.services) { + if (s.name == "config-sentinel") { + for (const auto & p : s.ports) { + if (p.tags.find("rpc") != p.tags.npos) { + auto spec = fmt("tcp/%s:%d", h.name.c_str(), p.number); + checkSpecs[h.name] = spec; + foundSentinelPort = true; + } + } + } + } + if (! foundSentinelPort) { + LOG(warning, "Did not find 'config-sentinel' RPC port in model for host %s [%zd services]", + h.name.c_str(), h.services.size()); + } + } + return checkSpecs; +} + +} + +Connectivity::CheckResult +Connectivity::checkConnectivity(const ModelConfig &model) { + CheckResult result{false, false, {}}; + const auto checkSpecs = specsFrom(model); + size_t clusterSize = checkSpecs.size(); + OutwardCheckContext checkContext(clusterSize, + vespa::Defaults::vespaHostname(), + _rpcServer.getPort(), + _rpcServer.orb()); + std::map<std::string, OutwardCheck> connectivityMap; + for (const auto & [ hn, spec ] : checkSpecs) { + connectivityMap.try_emplace(hn, spec, checkContext); + } + checkContext.latch.await(); + size_t numFailedConns = 0; + size_t numFailedReverse = 0; + bool allChecksOk = true; + for (const auto & [hostname, check] : connectivityMap) { + if (check.result() == CcResult::CONN_FAIL) ++numFailedConns; + if (check.result() == CcResult::REVERSE_FAIL) ++numFailedReverse; + if (check.result() == CcResult::UNKNOWN) { + LOG(error, "Missing ConnectivityCheck result from %s", hostname.c_str()); + allChecksOk = false; + } + } + if (numFailedReverse > size_t(_config.maxBadReverseCount)) { + LOG(warning, "%zu of %zu nodes report problems connecting to me (max is %d)", + numFailedReverse, clusterSize, _config.maxBadReverseCount); + allChecksOk = false; + } + if (numFailedConns * 100.0 > _config.maxBadOutPercent * clusterSize) { + double pct = numFailedConns * 100ul / clusterSize; + LOG(warning, "Problems connecting to %zu of %zu nodes, %.2f %% (max is %d)", + numFailedConns, clusterSize, pct, _config.maxBadOutPercent); + allChecksOk = false; + } + for (const auto & [hostname, check] : connectivityMap) { + std::string detail = fmt("%s -> %s", hostname.c_str(), toString(check.result())); + result.details.push_back(detail); + } + result.enoughOk = allChecksOk; + result.allOk = (numFailedConns == 0) && (numFailedReverse == 0); + return result; +} + +} diff --git a/configd/src/apps/sentinel/connectivity.h b/configd/src/apps/sentinel/connectivity.h new file mode 100644 index 00000000000..d2ec075b75e --- /dev/null +++ b/configd/src/apps/sentinel/connectivity.h @@ -0,0 +1,36 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "rpcserver.h" +#include <vespa/config-sentinel.h> +#include <vespa/config-model.h> +#include <string> +#include <vector> + +using cloud::config::SentinelConfig; +using cloud::config::ModelConfig; + +namespace config::sentinel { + +/** + * Utility class for running connectivity check. + **/ +class Connectivity { +public: + Connectivity(const SentinelConfig::Connectivity & config, RpcServer &rpcServer); + ~Connectivity(); + + struct CheckResult { + bool enoughOk; + bool allOk; + std::vector<std::string> details; + }; + + CheckResult checkConnectivity(const ModelConfig &model); +private: + const SentinelConfig::Connectivity & _config; + RpcServer &_rpcServer; +}; + +} |