aboutsummaryrefslogtreecommitdiffstats
path: root/configd
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2021-06-04 11:14:35 +0000
committerArne Juul <arnej@verizonmedia.com>2021-06-04 11:14:35 +0000
commitc585fcd3e4732c8f0c7b0ad85e3602d2a5e9d61c (patch)
tree72cf153d2f66d077e2ad9dec8781a6f571a5456a /configd
parent439e913530f2a566cbc514df99c982e3b318298c (diff)
add separate class for connectivity check
Diffstat (limited to 'configd')
-rw-r--r--configd/src/apps/sentinel/CMakeLists.txt1
-rw-r--r--configd/src/apps/sentinel/connectivity.cpp113
-rw-r--r--configd/src/apps/sentinel/connectivity.h36
3 files changed, 150 insertions, 0 deletions
diff --git a/configd/src/apps/sentinel/CMakeLists.txt b/configd/src/apps/sentinel/CMakeLists.txt
index e77abc19077..43b4f79a0b2 100644
--- a/configd/src/apps/sentinel/CMakeLists.txt
+++ b/configd/src/apps/sentinel/CMakeLists.txt
@@ -4,6 +4,7 @@ vespa_add_executable(configd_config-sentinel_app
check-completion-handler.cpp
cmdq.cpp
config-owner.cpp
+ connectivity.cpp
env.cpp
line-splitter.cpp
manager.cpp
diff --git a/configd/src/apps/sentinel/connectivity.cpp b/configd/src/apps/sentinel/connectivity.cpp
new file mode 100644
index 00000000000..4ba16f95e15
--- /dev/null
+++ b/configd/src/apps/sentinel/connectivity.cpp
@@ -0,0 +1,113 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "connectivity.h"
+#include "outward-check.h"
+#include <vespa/defaults.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <thread>
+#include <chrono>
+
+LOG_SETUP(".connectivity");
+
+using vespalib::make_string_short::fmt;
+using namespace std::chrono_literals;
+
+namespace config::sentinel {
+
+Connectivity::Connectivity(const SentinelConfig::Connectivity & config, RpcServer &rpcServer)
+ : _config(config),
+ _rpcServer(rpcServer)
+{
+ LOG(config, "connectivity.maxBadReverseCount = %d", _config.maxBadReverseCount);
+ LOG(config, "connectivity.maxBadOutPercent = %d", _config.maxBadOutPercent);
+}
+
+Connectivity::~Connectivity() = default;
+
+namespace {
+
+const char *toString(CcResult value) {
+ switch (value) {
+ case CcResult::UNKNOWN: return "BAD: missing result"; // very very bad
+ case CcResult::REVERSE_FAIL: return "connect OK, but reverse check FAILED"; // very bad
+ case CcResult::CONN_FAIL: return "failed to connect"; // bad
+ case CcResult::REVERSE_UNAVAIL: return "connect OK (but reverse check unavailable)"; // unfortunate
+ case CcResult::ALL_OK: return "OK: both ways connectivity verified"; // good
+ }
+ LOG(error, "Unknown CcResult enum value: %d", (int)value);
+ LOG_ABORT("Unknown CcResult enum value");
+}
+
+std::map<std::string, std::string> specsFrom(const ModelConfig &model) {
+ std::map<std::string, std::string> checkSpecs;
+ for (const auto & h : model.hosts) {
+ bool foundSentinelPort = false;
+ for (const auto & s : h.services) {
+ if (s.name == "config-sentinel") {
+ for (const auto & p : s.ports) {
+ if (p.tags.find("rpc") != p.tags.npos) {
+ auto spec = fmt("tcp/%s:%d", h.name.c_str(), p.number);
+ checkSpecs[h.name] = spec;
+ foundSentinelPort = true;
+ }
+ }
+ }
+ }
+ if (! foundSentinelPort) {
+ LOG(warning, "Did not find 'config-sentinel' RPC port in model for host %s [%zd services]",
+ h.name.c_str(), h.services.size());
+ }
+ }
+ return checkSpecs;
+}
+
+}
+
+Connectivity::CheckResult
+Connectivity::checkConnectivity(const ModelConfig &model) {
+ CheckResult result{false, false, {}};
+ const auto checkSpecs = specsFrom(model);
+ size_t clusterSize = checkSpecs.size();
+ OutwardCheckContext checkContext(clusterSize,
+ vespa::Defaults::vespaHostname(),
+ _rpcServer.getPort(),
+ _rpcServer.orb());
+ std::map<std::string, OutwardCheck> connectivityMap;
+ for (const auto & [ hn, spec ] : checkSpecs) {
+ connectivityMap.try_emplace(hn, spec, checkContext);
+ }
+ checkContext.latch.await();
+ size_t numFailedConns = 0;
+ size_t numFailedReverse = 0;
+ bool allChecksOk = true;
+ for (const auto & [hostname, check] : connectivityMap) {
+ if (check.result() == CcResult::CONN_FAIL) ++numFailedConns;
+ if (check.result() == CcResult::REVERSE_FAIL) ++numFailedReverse;
+ if (check.result() == CcResult::UNKNOWN) {
+ LOG(error, "Missing ConnectivityCheck result from %s", hostname.c_str());
+ allChecksOk = false;
+ }
+ }
+ if (numFailedReverse > size_t(_config.maxBadReverseCount)) {
+ LOG(warning, "%zu of %zu nodes report problems connecting to me (max is %d)",
+ numFailedReverse, clusterSize, _config.maxBadReverseCount);
+ allChecksOk = false;
+ }
+ if (numFailedConns * 100.0 > _config.maxBadOutPercent * clusterSize) {
+ double pct = numFailedConns * 100ul / clusterSize;
+ LOG(warning, "Problems connecting to %zu of %zu nodes, %.2f %% (max is %d)",
+ numFailedConns, clusterSize, pct, _config.maxBadOutPercent);
+ allChecksOk = false;
+ }
+ for (const auto & [hostname, check] : connectivityMap) {
+ std::string detail = fmt("%s -> %s", hostname.c_str(), toString(check.result()));
+ result.details.push_back(detail);
+ }
+ result.enoughOk = allChecksOk;
+ result.allOk = (numFailedConns == 0) && (numFailedReverse == 0);
+ return result;
+}
+
+}
diff --git a/configd/src/apps/sentinel/connectivity.h b/configd/src/apps/sentinel/connectivity.h
new file mode 100644
index 00000000000..d2ec075b75e
--- /dev/null
+++ b/configd/src/apps/sentinel/connectivity.h
@@ -0,0 +1,36 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "rpcserver.h"
+#include <vespa/config-sentinel.h>
+#include <vespa/config-model.h>
+#include <string>
+#include <vector>
+
+using cloud::config::SentinelConfig;
+using cloud::config::ModelConfig;
+
+namespace config::sentinel {
+
+/**
+ * Utility class for running connectivity check.
+ **/
+class Connectivity {
+public:
+ Connectivity(const SentinelConfig::Connectivity & config, RpcServer &rpcServer);
+ ~Connectivity();
+
+ struct CheckResult {
+ bool enoughOk;
+ bool allOk;
+ std::vector<std::string> details;
+ };
+
+ CheckResult checkConnectivity(const ModelConfig &model);
+private:
+ const SentinelConfig::Connectivity & _config;
+ RpcServer &_rpcServer;
+};
+
+}