diff options
5 files changed, 165 insertions, 20 deletions
diff --git a/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp b/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp index 1d1520c64e0..8ee10deead1 100644 --- a/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp +++ b/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp @@ -34,6 +34,7 @@ const char* hs_state_to_str(HandshakeResult::State state) noexcept { case HandshakeResult::State::Failed: return "Broken"; case HandshakeResult::State::Done: return "Done"; case HandshakeResult::State::NeedsMorePeerData: return "NeedsMorePeerData"; + case HandshakeResult::State::NeedsWork: return "NeedsWork"; } abort(); } @@ -183,14 +184,24 @@ struct Fixture { return hs_result; } + void do_handshake_work(CryptoCodec& codec) { + codec.do_handshake_work(); + } + bool handshake() { HandshakeResult cli_res; HandshakeResult serv_res; while (!(cli_res.done() && serv_res.done())) { - cli_res = do_handshake(*client, server_to_client, client_to_server); - serv_res = do_handshake(*server, client_to_server, server_to_client); - print_handshake_result("client", cli_res); - print_handshake_result("server", serv_res); + while ((cli_res = do_handshake(*client, server_to_client, client_to_server)).needs_work()) { + fprintf(stderr, "doing client handshake work\n"); + do_handshake_work(*client); + } + print_handshake_result("client handshake()", cli_res); + while ((serv_res = do_handshake(*server, client_to_server, server_to_client)).needs_work()) { + fprintf(stderr, "doing server handshake work\n"); + do_handshake_work(*server); + } + print_handshake_result("server handshake()", serv_res); if (cli_res.failed() || serv_res.failed()) { return false; @@ -207,6 +218,21 @@ TEST_F("client and server can complete handshake", Fixture) { EXPECT_TRUE(f.handshake()); } +TEST_F("client handshake() initially returns NeedsWork without producing anything", Fixture) { + auto res = f.do_handshake(*f.client, f.server_to_client, f.client_to_server); + EXPECT_TRUE(res.needs_work()); + EXPECT_EQUAL(0u, res.bytes_consumed); + EXPECT_EQUAL(0u, res.bytes_produced); +} + +TEST_F("server handshake() returns NeedsPeerData with empty input", Fixture) { + auto res = f.do_handshake(*f.server, f.client_to_server, f.server_to_client); + EXPECT_EQUAL(static_cast<int>(HandshakeResult::State::NeedsMorePeerData), + static_cast<int>(res.state)); + EXPECT_EQUAL(0u, res.bytes_consumed); + EXPECT_EQUAL(0u, res.bytes_produced); +} + TEST_F("clients and servers can send single data frame after handshake (not full duplex)", Fixture) { ASSERT_TRUE(f.handshake()); diff --git a/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h b/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h index 1eed1ae0612..fc5303dbc5b 100644 --- a/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h +++ b/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h @@ -13,12 +13,14 @@ struct HandshakeResult { enum class State { Failed, Done, - NeedsMorePeerData + NeedsMorePeerData, + NeedsWork }; State state = State::Failed; bool failed() const noexcept { return (state == State::Failed); } bool done() const noexcept { return (state == State::Done); } + bool needs_work() const noexcept { return (state == State::NeedsWork); } }; struct EncodeResult { @@ -76,14 +78,56 @@ public: virtual size_t min_decode_buffer_size() const noexcept = 0; /* + * Initiates or progresses a handshake towards a peer. Guaranteed to be lightweight + * in the sense that it will not perform any CPU-heavy operations by itself. When + * handshaking requires more heavy lifting (such as cryptographic operations), handshake() + * will return a result with needs_work() == true. When this is the case, the caller + * must call do_handshake_work() before retrying handshake() again. At that point, + * handshake() will return the result of the CPU-heavy work (which MAY itself be + * needs_work() again). + * + * Basic call flow: + * + * handshake() --> needs_work() ----. + * ^ | + * | | + * `---- do_handshake_work() <---' (possibly in different thread) + * * Precondition: to_peer_buf_size >= min_encode_buffer_size() - * Postcondition: if result.done(), the handshake process has completed + * + * The handshake() <-> do_handshake_work() flow invariant must hold. + * + * Postcondition: If result.done(), the handshake process has completed * and data may be passed through encode()/decode(). + * + * If result.needs_work(), do_handshake_work() MUST be called prior + * to calling handshake() again. The next time handshake() is called, + * it will return the result of the work performed as part of + * do_handshake_work(). The from/to buffers MUST remain valid and + * stable until do_handshake_work() is called. + * + * If result.needs_work() it is guaranteed that zero bytes have been + * consumed from the from_peer buffer or produced to the to_peer buffer. */ virtual HandshakeResult handshake(const char* from_peer, size_t from_peer_buf_size, char* to_peer, size_t to_peer_buf_size) noexcept = 0; /* + * Perform any CPU-heavy handshake operations that have been initiated by handshake(). + * + * MAY be called from a different thread than handshake() as long as caller guarantees + * external synchronization between the threads. MUST NOT be called concurrently with + * handshake() on the same instance. + * + * Precondition: handshake() has been called immediately prior on this instance with + * a result of needs_work() + * do_handshake_work() has NOT been called immediately prior on this instance. + * Postcondition: The next call to handshake() on this instance will return the result + * of the handshake work performed. + */ + virtual void do_handshake_work() noexcept = 0; + + /* * Encodes a single ciphertext frame into `ciphertext`. If plaintext_size * is greater than can fit into a frame, the returned result's consumed_bytes * field will be < plaintext_size. The number of actual ciphertext bytes produced diff --git a/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp b/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp index d8ee3ea89ab..46c3e2d3195 100644 --- a/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp +++ b/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp @@ -72,12 +72,16 @@ CryptoCodecAdapter::handshake() for (;;) { auto in = _input.obtain(); auto out = _output.reserve(_codec->min_encode_buffer_size()); - auto hs_res = _codec->handshake(in.data, in.size, out.data, out.size); + ::vespalib::net::tls::HandshakeResult hs_res; + while ((hs_res = _codec->handshake(in.data, in.size, out.data, out.size)).needs_work()) { + _codec->do_handshake_work(); + } _input.evict(hs_res.bytes_consumed); _output.commit(hs_res.bytes_produced); switch (hs_res.state) { case ::vespalib::net::tls::HandshakeResult::State::Failed: return HandshakeResult::FAIL; case ::vespalib::net::tls::HandshakeResult::State::Done: return hs_try_flush(); + case ::vespalib::net::tls::HandshakeResult::State::NeedsWork: abort(); // Impossible case ::vespalib::net::tls::HandshakeResult::State::NeedsMorePeerData: auto flush_res = hs_try_flush(); if (flush_res != HandshakeResult::DONE) { diff --git a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp index e8b1e32213d..3049c02b798 100644 --- a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp +++ b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp @@ -97,6 +97,14 @@ HandshakeResult handshake_completed() noexcept { return {0, 0, HandshakeResult::State::Done}; } +HandshakeResult handshake_needs_work() noexcept { + return {0, 0, HandshakeResult::State::NeedsWork}; +} + +HandshakeResult handshake_needs_peer_data() noexcept { + return {0, 0, HandshakeResult::State::NeedsMorePeerData}; +} + HandshakeResult handshake_failed() noexcept { return {0, 0, HandshakeResult::State::Failed}; } @@ -161,7 +169,9 @@ void log_ssl_error(const char* source, int ssl_error) { OpenSslCryptoCodecImpl::OpenSslCryptoCodecImpl(std::shared_ptr<OpenSslTlsContextImpl> ctx, Mode mode) : _ctx(std::move(ctx)), _ssl(::SSL_new(_ctx->native_context())), - _mode(mode) + _mode(mode), + _deferred_handshake_params(), + _deferred_handshake_result() { if (!_ssl) { throw CryptoException("Failed to create new SSL from SSL_CTX"); @@ -208,27 +218,62 @@ OpenSslCryptoCodecImpl::OpenSslCryptoCodecImpl(std::shared_ptr<OpenSslTlsContext OpenSslCryptoCodecImpl::~OpenSslCryptoCodecImpl() = default; -// TODO remove spammy logging once code is stable - HandshakeResult OpenSslCryptoCodecImpl::handshake(const char* from_peer, size_t from_peer_buf_size, - char* to_peer, size_t to_peer_buf_size) noexcept { + char* to_peer, size_t to_peer_buf_size) noexcept +{ LOG_ASSERT(verify_buf(from_peer, from_peer_buf_size) && verify_buf(to_peer, to_peer_buf_size)); + LOG_ASSERT(!_deferred_handshake_params.has_value()); // do_handshake_work() not called as expected + if (_deferred_handshake_result.has_value()) { + const auto result = *_deferred_handshake_result; + _deferred_handshake_result = std::optional<HandshakeResult>(); + return result; + } if (SSL_is_init_finished(_ssl.get())) { return handshake_completed(); } - ConstBufferViewGuard const_view_guard(*_input_bio, from_peer, from_peer_buf_size); - MutableBufferViewGuard mut_view_guard(*_output_bio, to_peer, to_peer_buf_size); + // We make the assumption that TLS handshake processing is primarily reactive, + // i.e. a handshake frame is received from the peer and this either produces + // output to send back and/or marks the handshake as complete or failed. + // One exception to this rule is if if we're a client. In this case we have to + // do work the first time we're called in order to prepare a ClientHello message. + // At that point there will be nothing on the wire to react to. + // + // Note that we will return a "needs work" false positive in the case of a short read, + // as whether or not a complete TLS frame has been received is entirely opaque to us. + // The end result will still be correct, as the do_handshake_work() call will signal + // "needs read" as expected, but we get extra thread round-trips and added latency. + // It is expected that this is not a common case. + const bool first_client_send = ((_mode == Mode::Client) && SSL_in_before(_ssl.get())); + const bool needs_work = ((from_peer_buf_size > 0) || first_client_send); + if (needs_work) { + _deferred_handshake_params = DeferredHandshakeParams(from_peer, from_peer_buf_size, to_peer, to_peer_buf_size); + return handshake_needs_work(); + } + return handshake_needs_peer_data(); +} + +void OpenSslCryptoCodecImpl::do_handshake_work() noexcept { + LOG_ASSERT(_deferred_handshake_params.has_value()); // handshake() not called as expected + LOG_ASSERT(!_deferred_handshake_result.has_value()); // do_handshake_work() called multiple times without handshake() + const auto params = *_deferred_handshake_params; + _deferred_handshake_params = std::optional<DeferredHandshakeParams>(); + + ConstBufferViewGuard const_view_guard(*_input_bio, params.from_peer, params.from_peer_buf_size); + MutableBufferViewGuard mut_view_guard(*_output_bio, params.to_peer, params.to_peer_buf_size); const auto consume_res = do_handshake_and_consume_peer_input_bytes(); - LOG_ASSERT(consume_res.bytes_produced == 0); + LOG_ASSERT(consume_res.bytes_produced == 0); // Measured via BIO_pending below, not from result. if (consume_res.failed()) { - return consume_res; + _deferred_handshake_result = consume_res; + return; } // SSL_do_handshake() might have produced more data to send. Note: handshake may // be complete at this point. - int produced = BIO_pending(_output_bio); - return handshaked_bytes(consume_res.bytes_consumed, static_cast<size_t>(produced), consume_res.state); + const int produced = BIO_pending(_output_bio); + _deferred_handshake_result = handshaked_bytes(consume_res.bytes_consumed, + static_cast<size_t>(produced), + consume_res.state); } HandshakeResult OpenSslCryptoCodecImpl::do_handshake_and_consume_peer_input_bytes() noexcept { diff --git a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h index 3c491c2bd72..ef7e0998994 100644 --- a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h +++ b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h @@ -5,6 +5,7 @@ #include <vespa/vespalib/net/tls/transport_security_options.h> #include <vespa/vespalib/net/tls/crypto_codec.h> #include <memory> +#include <optional> namespace vespalib::net::tls { struct TlsContext; } @@ -16,10 +17,31 @@ class OpenSslTlsContextImpl; * Frame-level OpenSSL-backed TLSv1.2/TLSv1.3 (depending on OpenSSL version) * crypto codec implementation. * - * NOT thread safe per instance, but independent instances may be - * used by different threads safely. + * NOT generally thread safe per instance, but independent instances may be + * used by different threads safely. One exception is that handshake() and + * do_handshake_work() may be called from different threads, as long as it + * happens with appropriate data visibility synchronization and not concurrently. */ class OpenSslCryptoCodecImpl : public CryptoCodec { + + struct DeferredHandshakeParams { + const char* from_peer = nullptr; + size_t from_peer_buf_size = 0; + char* to_peer = nullptr; + size_t to_peer_buf_size = 0; + + DeferredHandshakeParams(const char* from_peer_, size_t from_peer_buf_size_, + char* to_peer_, size_t to_peer_buf_size_) noexcept + : from_peer(from_peer_), + from_peer_buf_size(from_peer_buf_size_), + to_peer(to_peer_), + to_peer_buf_size(to_peer_buf_size_) + {} + + DeferredHandshakeParams(const DeferredHandshakeParams&) noexcept = default; + DeferredHandshakeParams& operator=(const DeferredHandshakeParams&) noexcept = default; + }; + // The context maintains shared verification callback state, so it must be // kept alive explictly for at least as long as any codecs. std::shared_ptr<OpenSslTlsContextImpl> _ctx; @@ -27,6 +49,8 @@ class OpenSslCryptoCodecImpl : public CryptoCodec { ::BIO* _input_bio; // Owned by _ssl ::BIO* _output_bio; // Owned by _ssl Mode _mode; + std::optional<DeferredHandshakeParams> _deferred_handshake_params; + std::optional<HandshakeResult> _deferred_handshake_result; public: OpenSslCryptoCodecImpl(std::shared_ptr<OpenSslTlsContextImpl> ctx, Mode mode); ~OpenSslCryptoCodecImpl() override; @@ -39,7 +63,7 @@ public: * typically this expansion is only 16 octets). TLS 1.3 reduces * the allowance for expansion to 256 octets." * - * We're on TLSv1.2, so make room for the worst case. + * We may be on TLSv1.2, so make room for the worst case. */ static constexpr size_t MaximumTlsFrameSize = 16384 + 2048; static constexpr size_t MaximumFramePlaintextSize = 16384; @@ -54,6 +78,8 @@ public: HandshakeResult handshake(const char* from_peer, size_t from_peer_buf_size, char* to_peer, size_t to_peer_buf_size) noexcept override; + void do_handshake_work() noexcept override; + EncodeResult encode(const char* plaintext, size_t plaintext_size, char* ciphertext, size_t ciphertext_size) noexcept override; DecodeResult decode(const char* ciphertext, size_t ciphertext_size, |