aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2023-02-17 10:22:38 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2023-02-17 13:20:08 +0000
commit9bad60ef6d692745fbbf98338dfb17751f47dac3 (patch)
tree282424a1d8f2072f37237522b94b10cccf0f30ef
parent8ca01ebd0196d2f01087ae1440f65e3584e87a0f (diff)
Add metrics tracking failed RPC and status page capability checks
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java3
-rw-r--r--fnet/src/tests/frt/rpc/invoke.cpp10
-rw-r--r--fnet/src/vespa/fnet/frt/require_capabilities.cpp2
-rw-r--r--storage/src/vespa/storage/frameworkimpl/status/statuswebserver.cpp2
-rw-r--r--storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.cpp14
-rw-r--r--storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.h4
-rw-r--r--vespalib/src/vespa/vespalib/net/tls/statistics.cpp16
-rw-r--r--vespalib/src/vespa/vespalib/net/tls/statistics.h39
8 files changed, 83 insertions, 7 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
index b8e71e76afc..95ec1431cdd 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
@@ -92,6 +92,9 @@ public class VespaMetricSet {
addMetric(metrics, "vds.server.network.server.insecure-connections-established");
addMetric(metrics, "vds.server.network.tls-connections-broken");
addMetric(metrics, "vds.server.network.failed-tls-config-reloads");
+ // C++ capability metrics
+ addMetric(metrics, "vds.server.network.rpc-capability-checks-failed");
+ addMetric(metrics, "vds.server.network.status-capability-checks-failed");
// C++ Fnet metrics
addMetric(metrics, "vds.server.fnet.num-connections");
diff --git a/fnet/src/tests/frt/rpc/invoke.cpp b/fnet/src/tests/frt/rpc/invoke.cpp
index 38f260dd202..e930c1252bf 100644
--- a/fnet/src/tests/frt/rpc/invoke.cpp
+++ b/fnet/src/tests/frt/rpc/invoke.cpp
@@ -2,6 +2,7 @@
#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/vespalib/net/socket_spec.h>
#include <vespa/vespalib/net/tls/capability_env_config.h>
+#include <vespa/vespalib/net/tls/statistics.h>
#include <vespa/vespalib/util/benchmark_timer.h>
#include <vespa/vespalib/util/latch.h>
#include <vespa/fnet/frt/supervisor.h>
@@ -16,6 +17,7 @@
using vespalib::SocketSpec;
using vespalib::BenchmarkTimer;
+using vespalib::net::tls::CapabilityStatistics;
using namespace vespalib::net::tls;
constexpr double timeout = 60.0;
@@ -486,6 +488,7 @@ TEST_F("request allowed by access filter invokes server method as usual", Fixtur
}
TEST_F("capability checking filter is enforced under mTLS unless overridden by env var", Fixture()) {
+ const auto cap_stats_before = CapabilityStatistics::get().snapshot();
MyReq req("capabilityRestricted"); // Requires content node cap set; disallowed
f1.target().InvokeSync(req.borrow(), timeout);
auto cap_mode = capability_enforcement_mode_from_env();
@@ -494,6 +497,9 @@ TEST_F("capability checking filter is enforced under mTLS unless overridden by e
// Default authz rule does not give required capabilities; must fail.
EXPECT_EQUAL(req.get().GetErrorCode(), FRTE_RPC_PERMISSION_DENIED);
EXPECT_FALSE(f1.server_instance().restricted_method_was_invoked());
+ // Permission denied should bump capability check failure statistic
+ const auto cap_stats = CapabilityStatistics::get().snapshot().subtract(cap_stats_before);
+ EXPECT_EQUAL(cap_stats.rpc_capability_checks_failed, 1u);
} else {
// Either no mTLS configured (implicit full capability set) or capabilities not enforced.
ASSERT_FALSE(req.get().IsError());
@@ -502,11 +508,15 @@ TEST_F("capability checking filter is enforced under mTLS unless overridden by e
}
TEST_F("access is allowed by capability filter when peer is granted the required capability", Fixture()) {
+ const auto cap_stats_before = CapabilityStatistics::get().snapshot();
MyReq req("capabilityAllowed"); // Requires telemetry cap set; allowed
f1.target().InvokeSync(req.borrow(), timeout);
// Should always be allowed, regardless of mTLS mode or capability enforcement
ASSERT_FALSE(req.get().IsError());
EXPECT_TRUE(f1.server_instance().restricted_method_was_invoked());
+ // Should _not_ bump capability check failure statistic
+ const auto cap_stats = CapabilityStatistics::get().snapshot().subtract(cap_stats_before);
+ EXPECT_EQUAL(cap_stats.rpc_capability_checks_failed, 0u);
}
TEST_F("access is allowed by capability filter when required capability set is empty", Fixture()) {
diff --git a/fnet/src/vespa/fnet/frt/require_capabilities.cpp b/fnet/src/vespa/fnet/frt/require_capabilities.cpp
index 6996557c91e..26504d06e0f 100644
--- a/fnet/src/vespa/fnet/frt/require_capabilities.cpp
+++ b/fnet/src/vespa/fnet/frt/require_capabilities.cpp
@@ -5,6 +5,7 @@
#include <vespa/fnet/connection.h>
#include <vespa/vespalib/net/connection_auth_context.h>
#include <vespa/vespalib/net/tls/capability_env_config.h>
+#include <vespa/vespalib/net/tls/statistics.h>
#include <vespa/log/bufferedlogger.h>
LOG_SETUP(".fnet.frt.require_capabilities");
@@ -19,6 +20,7 @@ FRT_RequireCapabilities::allow(FRT_RPCRequest& req) const noexcept
if (is_authorized) {
return true;
} else {
+ CapabilityStatistics::get().inc_rpc_capability_checks_failed();
const auto mode = capability_enforcement_mode_from_env();
if (mode == CapabilityEnforcementMode::Disable) {
return true;
diff --git a/storage/src/vespa/storage/frameworkimpl/status/statuswebserver.cpp b/storage/src/vespa/storage/frameworkimpl/status/statuswebserver.cpp
index 8690f6e122d..0b4e32d637d 100644
--- a/storage/src/vespa/storage/frameworkimpl/status/statuswebserver.cpp
+++ b/storage/src/vespa/storage/frameworkimpl/status/statuswebserver.cpp
@@ -10,6 +10,7 @@
#include <vespa/vespalib/component/vtag.h>
#include <vespa/vespalib/net/connection_auth_context.h>
#include <vespa/vespalib/net/crypto_engine.h>
+#include <vespa/vespalib/net/tls/statistics.h>
#include <vespa/config/subscription/configuri.h>
#include <vespa/config/helper/configfetcher.hpp>
#include <functional>
@@ -203,6 +204,7 @@ StatusWebServer::handlePage(const framework::HttpUrlPath& urlpath, vespalib::Por
if (auth_ctx.capabilities().contains_all(reporter->required_capabilities())) {
invoke_reporter(*reporter, urlpath, request);
} else {
+ vespalib::net::tls::CapabilityStatistics::get().inc_status_capability_checks_failed();
// TODO should print peer address as well; not currently exposed
LOG(warning, "Peer with %s denied status page access to '%s' due to insufficient "
"credentials (had %s, needed %s)",
diff --git a/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.cpp b/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.cpp
index 5e281152b2b..ad74e020a82 100644
--- a/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.cpp
+++ b/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.cpp
@@ -27,9 +27,14 @@ TlsStatisticsMetricsWrapper::TlsStatisticsMetricsWrapper(metrics::MetricSet* own
"connections broken due to failures during frame encoding or decoding", this),
failed_tls_config_reloads("failed-tls-config-reloads", {}, "Number of times "
"background reloading of TLS config has failed", this),
+ rpc_capability_checks_failed("rpc-capability-checks-failed", {},
+ "Number of RPC operations that failed to due one or more missing capabilities", this),
+ status_capability_checks_failed("status-capability-checks-failed", {},
+ "Number of status page operations that failed to due one or more missing capabilities", this),
last_client_stats_snapshot(),
last_server_stats_snapshot(),
- last_config_stats_snapshot()
+ last_config_stats_snapshot(),
+ last_capability_stats_snapshot()
{}
TlsStatisticsMetricsWrapper::~TlsStatisticsMetricsWrapper() = default;
@@ -60,9 +65,16 @@ void TlsStatisticsMetricsWrapper::update_metrics_with_snapshot_delta() {
failed_tls_config_reloads.set(config_delta.failed_config_reloads);
+ auto capability_current = vespalib::net::tls::CapabilityStatistics::get().snapshot();
+ auto capability_delta = capability_current.subtract(last_capability_stats_snapshot);
+
+ rpc_capability_checks_failed.set(capability_delta.rpc_capability_checks_failed);
+ status_capability_checks_failed.set(capability_delta.status_capability_checks_failed);
+
last_server_stats_snapshot = server_current;
last_client_stats_snapshot = client_current;
last_config_stats_snapshot = config_current;
+ last_capability_stats_snapshot = capability_current;
}
}
diff --git a/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.h b/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.h
index 7bb51acd1fe..daf02b53b7a 100644
--- a/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.h
+++ b/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.h
@@ -29,9 +29,13 @@ class TlsStatisticsMetricsWrapper : public metrics::MetricSet {
metrics::LongCountMetric failed_tls_config_reloads;
+ metrics::LongCountMetric rpc_capability_checks_failed;
+ metrics::LongCountMetric status_capability_checks_failed;
+
vespalib::net::tls::ConnectionStatistics::Snapshot last_client_stats_snapshot;
vespalib::net::tls::ConnectionStatistics::Snapshot last_server_stats_snapshot;
vespalib::net::tls::ConfigStatistics::Snapshot last_config_stats_snapshot;
+ vespalib::net::tls::CapabilityStatistics::Snapshot last_capability_stats_snapshot;
public:
explicit TlsStatisticsMetricsWrapper(metrics::MetricSet* owner);
diff --git a/vespalib/src/vespa/vespalib/net/tls/statistics.cpp b/vespalib/src/vespa/vespalib/net/tls/statistics.cpp
index a308a20bf2f..dc642518abb 100644
--- a/vespalib/src/vespa/vespalib/net/tls/statistics.cpp
+++ b/vespalib/src/vespa/vespalib/net/tls/statistics.cpp
@@ -9,6 +9,8 @@ ConnectionStatistics ConnectionStatistics::server_stats = {};
ConfigStatistics ConfigStatistics::instance = {};
+CapabilityStatistics CapabilityStatistics::instance = {};
+
ConnectionStatistics::Snapshot ConnectionStatistics::snapshot() const noexcept {
Snapshot s;
s.insecure_connections = insecure_connections.load(std::memory_order_relaxed);
@@ -43,4 +45,18 @@ ConfigStatistics::Snapshot ConfigStatistics::Snapshot::subtract(const Snapshot&
return s;
}
+CapabilityStatistics::Snapshot CapabilityStatistics::snapshot() const noexcept {
+ Snapshot s;
+ s.rpc_capability_checks_failed = rpc_capability_checks_failed.load(std::memory_order_relaxed);
+ s.status_capability_checks_failed = status_capability_checks_failed.load(std::memory_order_relaxed);
+ return s;
+}
+
+CapabilityStatistics::Snapshot CapabilityStatistics::Snapshot::subtract(const Snapshot& rhs) const noexcept {
+ Snapshot s;
+ s.rpc_capability_checks_failed = rpc_capability_checks_failed - rhs.rpc_capability_checks_failed;
+ s.status_capability_checks_failed = status_capability_checks_failed - rhs.status_capability_checks_failed;
+ return s;
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/net/tls/statistics.h b/vespalib/src/vespa/vespalib/net/tls/statistics.h
index 3e94ed95590..e693b09a3d2 100644
--- a/vespalib/src/vespa/vespalib/net/tls/statistics.h
+++ b/vespalib/src/vespa/vespalib/net/tls/statistics.h
@@ -55,12 +55,12 @@ struct ConnectionStatistics {
uint64_t invalid_peer_credentials = 0;
uint64_t broken_tls_connections = 0;
- Snapshot subtract(const Snapshot& rhs) const noexcept;
+ [[nodiscard]] Snapshot subtract(const Snapshot& rhs) const noexcept;
};
- // Acquires a snapshot of statistics that is expected to be reasonably up to date.
+ // Acquires a snapshot of statistics that is expected to be reasonably up-to-date.
// Thread safe.
- Snapshot snapshot() const noexcept;
+ [[nodiscard]] Snapshot snapshot() const noexcept;
static ConnectionStatistics client_stats;
static ConnectionStatistics server_stats;
@@ -85,15 +85,42 @@ struct ConfigStatistics {
uint64_t successful_config_reloads = 0;
uint64_t failed_config_reloads = 0;
- Snapshot subtract(const Snapshot& rhs) const noexcept;
+ [[nodiscard]] Snapshot subtract(const Snapshot& rhs) const noexcept;
};
- // Acquires a snapshot of statistics that is expected to be reasonably up to date.
+ // Acquires a snapshot of statistics that is expected to be reasonably up-to-date.
// Thread safe.
- Snapshot snapshot() const noexcept;
+ [[nodiscard]] Snapshot snapshot() const noexcept;
static ConfigStatistics instance;
static ConfigStatistics& get() noexcept { return instance; }
};
+struct CapabilityStatistics {
+ std::atomic<uint64_t> rpc_capability_checks_failed = 0;
+ std::atomic<uint64_t> status_capability_checks_failed = 0;
+
+ void inc_rpc_capability_checks_failed() noexcept {
+ rpc_capability_checks_failed.fetch_add(1, std::memory_order_relaxed);
+ }
+
+ void inc_status_capability_checks_failed() noexcept {
+ status_capability_checks_failed.fetch_add(1, std::memory_order_relaxed);
+ }
+
+ struct Snapshot {
+ uint64_t rpc_capability_checks_failed = 0;
+ uint64_t status_capability_checks_failed = 0;
+
+ [[nodiscard]] Snapshot subtract(const Snapshot& rhs) const noexcept;
+ };
+
+ // Acquires a snapshot of statistics that is expected to be reasonably up-to-date.
+ // Thread safe.
+ [[nodiscard]] Snapshot snapshot() const noexcept;
+
+ static CapabilityStatistics instance;
+ static CapabilityStatistics& get() noexcept { return instance; }
+};
+
}