diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2023-02-17 10:22:38 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahooinc.com> | 2023-02-17 13:20:08 +0000 |
commit | 9bad60ef6d692745fbbf98338dfb17751f47dac3 (patch) | |
tree | 282424a1d8f2072f37237522b94b10cccf0f30ef | |
parent | 8ca01ebd0196d2f01087ae1440f65e3584e87a0f (diff) |
Add metrics tracking failed RPC and status page capability checks
8 files changed, 83 insertions, 7 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java index b8e71e76afc..95ec1431cdd 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java @@ -92,6 +92,9 @@ public class VespaMetricSet { addMetric(metrics, "vds.server.network.server.insecure-connections-established"); addMetric(metrics, "vds.server.network.tls-connections-broken"); addMetric(metrics, "vds.server.network.failed-tls-config-reloads"); + // C++ capability metrics + addMetric(metrics, "vds.server.network.rpc-capability-checks-failed"); + addMetric(metrics, "vds.server.network.status-capability-checks-failed"); // C++ Fnet metrics addMetric(metrics, "vds.server.fnet.num-connections"); diff --git a/fnet/src/tests/frt/rpc/invoke.cpp b/fnet/src/tests/frt/rpc/invoke.cpp index 38f260dd202..e930c1252bf 100644 --- a/fnet/src/tests/frt/rpc/invoke.cpp +++ b/fnet/src/tests/frt/rpc/invoke.cpp @@ -2,6 +2,7 @@ #include <vespa/vespalib/testkit/test_kit.h> #include <vespa/vespalib/net/socket_spec.h> #include <vespa/vespalib/net/tls/capability_env_config.h> +#include <vespa/vespalib/net/tls/statistics.h> #include <vespa/vespalib/util/benchmark_timer.h> #include <vespa/vespalib/util/latch.h> #include <vespa/fnet/frt/supervisor.h> @@ -16,6 +17,7 @@ using vespalib::SocketSpec; using vespalib::BenchmarkTimer; +using vespalib::net::tls::CapabilityStatistics; using namespace vespalib::net::tls; constexpr double timeout = 60.0; @@ -486,6 +488,7 @@ TEST_F("request allowed by access filter invokes server method as usual", Fixtur } TEST_F("capability checking filter is enforced under mTLS unless overridden by env var", Fixture()) { + const auto cap_stats_before = CapabilityStatistics::get().snapshot(); MyReq req("capabilityRestricted"); // Requires content node cap set; disallowed f1.target().InvokeSync(req.borrow(), timeout); auto cap_mode = capability_enforcement_mode_from_env(); @@ -494,6 +497,9 @@ TEST_F("capability checking filter is enforced under mTLS unless overridden by e // Default authz rule does not give required capabilities; must fail. EXPECT_EQUAL(req.get().GetErrorCode(), FRTE_RPC_PERMISSION_DENIED); EXPECT_FALSE(f1.server_instance().restricted_method_was_invoked()); + // Permission denied should bump capability check failure statistic + const auto cap_stats = CapabilityStatistics::get().snapshot().subtract(cap_stats_before); + EXPECT_EQUAL(cap_stats.rpc_capability_checks_failed, 1u); } else { // Either no mTLS configured (implicit full capability set) or capabilities not enforced. ASSERT_FALSE(req.get().IsError()); @@ -502,11 +508,15 @@ TEST_F("capability checking filter is enforced under mTLS unless overridden by e } TEST_F("access is allowed by capability filter when peer is granted the required capability", Fixture()) { + const auto cap_stats_before = CapabilityStatistics::get().snapshot(); MyReq req("capabilityAllowed"); // Requires telemetry cap set; allowed f1.target().InvokeSync(req.borrow(), timeout); // Should always be allowed, regardless of mTLS mode or capability enforcement ASSERT_FALSE(req.get().IsError()); EXPECT_TRUE(f1.server_instance().restricted_method_was_invoked()); + // Should _not_ bump capability check failure statistic + const auto cap_stats = CapabilityStatistics::get().snapshot().subtract(cap_stats_before); + EXPECT_EQUAL(cap_stats.rpc_capability_checks_failed, 0u); } TEST_F("access is allowed by capability filter when required capability set is empty", Fixture()) { diff --git a/fnet/src/vespa/fnet/frt/require_capabilities.cpp b/fnet/src/vespa/fnet/frt/require_capabilities.cpp index 6996557c91e..26504d06e0f 100644 --- a/fnet/src/vespa/fnet/frt/require_capabilities.cpp +++ b/fnet/src/vespa/fnet/frt/require_capabilities.cpp @@ -5,6 +5,7 @@ #include <vespa/fnet/connection.h> #include <vespa/vespalib/net/connection_auth_context.h> #include <vespa/vespalib/net/tls/capability_env_config.h> +#include <vespa/vespalib/net/tls/statistics.h> #include <vespa/log/bufferedlogger.h> LOG_SETUP(".fnet.frt.require_capabilities"); @@ -19,6 +20,7 @@ FRT_RequireCapabilities::allow(FRT_RPCRequest& req) const noexcept if (is_authorized) { return true; } else { + CapabilityStatistics::get().inc_rpc_capability_checks_failed(); const auto mode = capability_enforcement_mode_from_env(); if (mode == CapabilityEnforcementMode::Disable) { return true; diff --git a/storage/src/vespa/storage/frameworkimpl/status/statuswebserver.cpp b/storage/src/vespa/storage/frameworkimpl/status/statuswebserver.cpp index 8690f6e122d..0b4e32d637d 100644 --- a/storage/src/vespa/storage/frameworkimpl/status/statuswebserver.cpp +++ b/storage/src/vespa/storage/frameworkimpl/status/statuswebserver.cpp @@ -10,6 +10,7 @@ #include <vespa/vespalib/component/vtag.h> #include <vespa/vespalib/net/connection_auth_context.h> #include <vespa/vespalib/net/crypto_engine.h> +#include <vespa/vespalib/net/tls/statistics.h> #include <vespa/config/subscription/configuri.h> #include <vespa/config/helper/configfetcher.hpp> #include <functional> @@ -203,6 +204,7 @@ StatusWebServer::handlePage(const framework::HttpUrlPath& urlpath, vespalib::Por if (auth_ctx.capabilities().contains_all(reporter->required_capabilities())) { invoke_reporter(*reporter, urlpath, request); } else { + vespalib::net::tls::CapabilityStatistics::get().inc_status_capability_checks_failed(); // TODO should print peer address as well; not currently exposed LOG(warning, "Peer with %s denied status page access to '%s' due to insufficient " "credentials (had %s, needed %s)", diff --git a/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.cpp b/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.cpp index 5e281152b2b..ad74e020a82 100644 --- a/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.cpp +++ b/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.cpp @@ -27,9 +27,14 @@ TlsStatisticsMetricsWrapper::TlsStatisticsMetricsWrapper(metrics::MetricSet* own "connections broken due to failures during frame encoding or decoding", this), failed_tls_config_reloads("failed-tls-config-reloads", {}, "Number of times " "background reloading of TLS config has failed", this), + rpc_capability_checks_failed("rpc-capability-checks-failed", {}, + "Number of RPC operations that failed to due one or more missing capabilities", this), + status_capability_checks_failed("status-capability-checks-failed", {}, + "Number of status page operations that failed to due one or more missing capabilities", this), last_client_stats_snapshot(), last_server_stats_snapshot(), - last_config_stats_snapshot() + last_config_stats_snapshot(), + last_capability_stats_snapshot() {} TlsStatisticsMetricsWrapper::~TlsStatisticsMetricsWrapper() = default; @@ -60,9 +65,16 @@ void TlsStatisticsMetricsWrapper::update_metrics_with_snapshot_delta() { failed_tls_config_reloads.set(config_delta.failed_config_reloads); + auto capability_current = vespalib::net::tls::CapabilityStatistics::get().snapshot(); + auto capability_delta = capability_current.subtract(last_capability_stats_snapshot); + + rpc_capability_checks_failed.set(capability_delta.rpc_capability_checks_failed); + status_capability_checks_failed.set(capability_delta.status_capability_checks_failed); + last_server_stats_snapshot = server_current; last_client_stats_snapshot = client_current; last_config_stats_snapshot = config_current; + last_capability_stats_snapshot = capability_current; } } diff --git a/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.h b/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.h index 7bb51acd1fe..daf02b53b7a 100644 --- a/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.h +++ b/storage/src/vespa/storage/storageserver/tls_statistics_metrics_wrapper.h @@ -29,9 +29,13 @@ class TlsStatisticsMetricsWrapper : public metrics::MetricSet { metrics::LongCountMetric failed_tls_config_reloads; + metrics::LongCountMetric rpc_capability_checks_failed; + metrics::LongCountMetric status_capability_checks_failed; + vespalib::net::tls::ConnectionStatistics::Snapshot last_client_stats_snapshot; vespalib::net::tls::ConnectionStatistics::Snapshot last_server_stats_snapshot; vespalib::net::tls::ConfigStatistics::Snapshot last_config_stats_snapshot; + vespalib::net::tls::CapabilityStatistics::Snapshot last_capability_stats_snapshot; public: explicit TlsStatisticsMetricsWrapper(metrics::MetricSet* owner); diff --git a/vespalib/src/vespa/vespalib/net/tls/statistics.cpp b/vespalib/src/vespa/vespalib/net/tls/statistics.cpp index a308a20bf2f..dc642518abb 100644 --- a/vespalib/src/vespa/vespalib/net/tls/statistics.cpp +++ b/vespalib/src/vespa/vespalib/net/tls/statistics.cpp @@ -9,6 +9,8 @@ ConnectionStatistics ConnectionStatistics::server_stats = {}; ConfigStatistics ConfigStatistics::instance = {}; +CapabilityStatistics CapabilityStatistics::instance = {}; + ConnectionStatistics::Snapshot ConnectionStatistics::snapshot() const noexcept { Snapshot s; s.insecure_connections = insecure_connections.load(std::memory_order_relaxed); @@ -43,4 +45,18 @@ ConfigStatistics::Snapshot ConfigStatistics::Snapshot::subtract(const Snapshot& return s; } +CapabilityStatistics::Snapshot CapabilityStatistics::snapshot() const noexcept { + Snapshot s; + s.rpc_capability_checks_failed = rpc_capability_checks_failed.load(std::memory_order_relaxed); + s.status_capability_checks_failed = status_capability_checks_failed.load(std::memory_order_relaxed); + return s; +} + +CapabilityStatistics::Snapshot CapabilityStatistics::Snapshot::subtract(const Snapshot& rhs) const noexcept { + Snapshot s; + s.rpc_capability_checks_failed = rpc_capability_checks_failed - rhs.rpc_capability_checks_failed; + s.status_capability_checks_failed = status_capability_checks_failed - rhs.status_capability_checks_failed; + return s; +} + } diff --git a/vespalib/src/vespa/vespalib/net/tls/statistics.h b/vespalib/src/vespa/vespalib/net/tls/statistics.h index 3e94ed95590..e693b09a3d2 100644 --- a/vespalib/src/vespa/vespalib/net/tls/statistics.h +++ b/vespalib/src/vespa/vespalib/net/tls/statistics.h @@ -55,12 +55,12 @@ struct ConnectionStatistics { uint64_t invalid_peer_credentials = 0; uint64_t broken_tls_connections = 0; - Snapshot subtract(const Snapshot& rhs) const noexcept; + [[nodiscard]] Snapshot subtract(const Snapshot& rhs) const noexcept; }; - // Acquires a snapshot of statistics that is expected to be reasonably up to date. + // Acquires a snapshot of statistics that is expected to be reasonably up-to-date. // Thread safe. - Snapshot snapshot() const noexcept; + [[nodiscard]] Snapshot snapshot() const noexcept; static ConnectionStatistics client_stats; static ConnectionStatistics server_stats; @@ -85,15 +85,42 @@ struct ConfigStatistics { uint64_t successful_config_reloads = 0; uint64_t failed_config_reloads = 0; - Snapshot subtract(const Snapshot& rhs) const noexcept; + [[nodiscard]] Snapshot subtract(const Snapshot& rhs) const noexcept; }; - // Acquires a snapshot of statistics that is expected to be reasonably up to date. + // Acquires a snapshot of statistics that is expected to be reasonably up-to-date. // Thread safe. - Snapshot snapshot() const noexcept; + [[nodiscard]] Snapshot snapshot() const noexcept; static ConfigStatistics instance; static ConfigStatistics& get() noexcept { return instance; } }; +struct CapabilityStatistics { + std::atomic<uint64_t> rpc_capability_checks_failed = 0; + std::atomic<uint64_t> status_capability_checks_failed = 0; + + void inc_rpc_capability_checks_failed() noexcept { + rpc_capability_checks_failed.fetch_add(1, std::memory_order_relaxed); + } + + void inc_status_capability_checks_failed() noexcept { + status_capability_checks_failed.fetch_add(1, std::memory_order_relaxed); + } + + struct Snapshot { + uint64_t rpc_capability_checks_failed = 0; + uint64_t status_capability_checks_failed = 0; + + [[nodiscard]] Snapshot subtract(const Snapshot& rhs) const noexcept; + }; + + // Acquires a snapshot of statistics that is expected to be reasonably up-to-date. + // Thread safe. + [[nodiscard]] Snapshot snapshot() const noexcept; + + static CapabilityStatistics instance; + static CapabilityStatistics& get() noexcept { return instance; } +}; + } |