Merge pull request #8563 from vespa-engine/vekterli/enable-deferred-handshake-work-in-openssl-codec-impl

Enable deferred handshake work in OpenSSL codec implementation
author: Håvard Pettersen <havardpe@gmail.com> 2019-02-20 15:27:52 +0100
committer: GitHub <noreply@github.com> 2019-02-20 15:27:52 +0100
commit: 3946ef2e62b6ff041afe127ed499ac1ee4cd1a68 (patch)
tree: bbdc56e4509b312c7c68e053b0fb0edd22f7f851 /vespalib
parent: 89bd2ef74191280ea516066acf6f95f1a28ba0b8 (diff)
parent: ad25ac52d0e6629b3aabbf243342bbaa88d508f6 (diff)
5 files changed, 165 insertions, 20 deletions
diff --git a/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp b/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp
index 1d1520c64e0..8ee10deead1 100644
--- a/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp
+++ b/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp
@@ -34,6 +34,7 @@ const char* hs_state_to_str(HandshakeResult::State state) noexcept {
     case HandshakeResult::State::Failed: return "Broken";
     case HandshakeResult::State::Done:   return "Done";
     case HandshakeResult::State::NeedsMorePeerData: return "NeedsMorePeerData";
+    case HandshakeResult::State::NeedsWork: return "NeedsWork";
     }
     abort();
 }
@@ -183,14 +184,24 @@ struct Fixture {
         return hs_result;
     }
 
+    void do_handshake_work(CryptoCodec& codec) {
+        codec.do_handshake_work();
+    }
+
     bool handshake() {
         HandshakeResult cli_res;
         HandshakeResult serv_res;
         while (!(cli_res.done() && serv_res.done())) {
-            cli_res  = do_handshake(*client, server_to_client, client_to_server);
-            serv_res = do_handshake(*server, client_to_server, server_to_client);
-            print_handshake_result("client", cli_res);
-            print_handshake_result("server", serv_res);
+            while ((cli_res  = do_handshake(*client, server_to_client, client_to_server)).needs_work()) {
+                fprintf(stderr, "doing client handshake work\n");
+                do_handshake_work(*client);
+            }
+            print_handshake_result("client handshake()", cli_res);
+            while ((serv_res = do_handshake(*server, client_to_server, server_to_client)).needs_work()) {
+                fprintf(stderr, "doing server handshake work\n");
+                do_handshake_work(*server);
+            }
+            print_handshake_result("server handshake()", serv_res);
 
             if (cli_res.failed() || serv_res.failed()) {
                 return false;
@@ -207,6 +218,21 @@ TEST_F("client and server can complete handshake", Fixture) {
     EXPECT_TRUE(f.handshake());
 }
 
+TEST_F("client handshake() initially returns NeedsWork without producing anything", Fixture) {
+    auto res = f.do_handshake(*f.client, f.server_to_client, f.client_to_server);
+    EXPECT_TRUE(res.needs_work());
+    EXPECT_EQUAL(0u, res.bytes_consumed);
+    EXPECT_EQUAL(0u, res.bytes_produced);
+}
+
+TEST_F("server handshake() returns NeedsPeerData with empty input", Fixture) {
+    auto res = f.do_handshake(*f.server, f.client_to_server, f.server_to_client);
+    EXPECT_EQUAL(static_cast<int>(HandshakeResult::State::NeedsMorePeerData),
+                 static_cast<int>(res.state));
+    EXPECT_EQUAL(0u, res.bytes_consumed);
+    EXPECT_EQUAL(0u, res.bytes_produced);
+}
+
 TEST_F("clients and servers can send single data frame after handshake (not full duplex)", Fixture) {
     ASSERT_TRUE(f.handshake());
 
diff --git a/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h b/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h
index 1eed1ae0612..fc5303dbc5b 100644
--- a/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h
+++ b/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h
@@ -13,12 +13,14 @@ struct HandshakeResult {
     enum class State {
         Failed,
         Done,
-        NeedsMorePeerData
+        NeedsMorePeerData,
+        NeedsWork
     };
     State state = State::Failed;
 
     bool failed() const noexcept { return (state == State::Failed); }
     bool done() const noexcept { return (state == State::Done); }
+    bool needs_work() const noexcept { return (state == State::NeedsWork); }
 };
 
 struct EncodeResult {
@@ -76,14 +78,56 @@ public:
     virtual size_t min_decode_buffer_size() const noexcept = 0;
 
     /*
+     * Initiates or progresses a handshake towards a peer. Guaranteed to be lightweight
+     * in the sense that it will not perform any CPU-heavy operations by itself. When
+     * handshaking requires more heavy lifting (such as cryptographic operations), handshake()
+     * will return a result with needs_work() == true. When this is the case, the caller
+     * must call do_handshake_work() before retrying handshake() again. At that point,
+     * handshake() will return the result of the CPU-heavy work (which MAY itself be
+     * needs_work() again).
+     *
+     * Basic call flow:
+     *
+     *     handshake() --> needs_work() ----.
+     *        ^                             |
+     *        |                             |
+     *        `---- do_handshake_work() <---'  (possibly in different thread)
+     *
      * Precondition:  to_peer_buf_size >= min_encode_buffer_size()
-     * Postcondition: if result.done(), the handshake process has completed
+     *
+     *                The handshake() <-> do_handshake_work() flow invariant must hold.
+     *
+     * Postcondition: If result.done(), the handshake process has completed
      *                and data may be passed through encode()/decode().
+     *
+     *                If result.needs_work(), do_handshake_work() MUST be called prior
+     *                to calling handshake() again. The next time handshake() is called,
+     *                it will return the result of the work performed as part of
+     *                do_handshake_work(). The from/to buffers MUST remain valid and
+     *                stable until do_handshake_work() is called.
+     *
+     *                If result.needs_work() it is guaranteed that zero bytes have been
+     *                consumed from the from_peer buffer or produced to the to_peer buffer.
      */
     virtual HandshakeResult handshake(const char* from_peer, size_t from_peer_buf_size,
                                       char* to_peer, size_t to_peer_buf_size) noexcept = 0;
 
     /*
+     * Perform any CPU-heavy handshake operations that have been initiated by handshake().
+     *
+     * MAY be called from a different thread than handshake() as long as caller guarantees
+     * external synchronization between the threads. MUST NOT be called concurrently with
+     * handshake() on the same instance.
+     *
+     * Precondition:  handshake() has been called immediately prior on this instance with
+     *                a result of needs_work()
+     *                do_handshake_work() has NOT been called immediately prior on this instance.
+     * Postcondition: The next call to handshake() on this instance will return the result
+     *                of the handshake work performed.
+     */
+    virtual void do_handshake_work() noexcept = 0;
+
+    /*
      * Encodes a single ciphertext frame into `ciphertext`. If plaintext_size
      * is greater than can fit into a frame, the returned result's consumed_bytes
      * field will be < plaintext_size. The number of actual ciphertext bytes produced
diff --git a/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp b/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp
index d8ee3ea89ab..46c3e2d3195 100644
--- a/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp
+++ b/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp
@@ -72,12 +72,16 @@ CryptoCodecAdapter::handshake()
     for (;;) {
         auto in = _input.obtain();
         auto out = _output.reserve(_codec->min_encode_buffer_size());
-        auto hs_res = _codec->handshake(in.data, in.size, out.data, out.size);
+        ::vespalib::net::tls::HandshakeResult hs_res;
+        while ((hs_res = _codec->handshake(in.data, in.size, out.data, out.size)).needs_work()) {
+            _codec->do_handshake_work();
+        }
         _input.evict(hs_res.bytes_consumed);
         _output.commit(hs_res.bytes_produced);
         switch (hs_res.state) {
         case ::vespalib::net::tls::HandshakeResult::State::Failed: return HandshakeResult::FAIL;
         case ::vespalib::net::tls::HandshakeResult::State::Done: return hs_try_flush();
+        case ::vespalib::net::tls::HandshakeResult::State::NeedsWork: abort(); // Impossible
         case ::vespalib::net::tls::HandshakeResult::State::NeedsMorePeerData:
             auto flush_res = hs_try_flush();
             if (flush_res != HandshakeResult::DONE) {
diff --git a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp
index e8b1e32213d..3049c02b798 100644
--- a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp
+++ b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp
@@ -97,6 +97,14 @@ HandshakeResult handshake_completed() noexcept {
     return {0, 0, HandshakeResult::State::Done};
 }
 
+HandshakeResult handshake_needs_work() noexcept {
+    return {0, 0, HandshakeResult::State::NeedsWork};
+}
+
+HandshakeResult handshake_needs_peer_data() noexcept {
+    return {0, 0, HandshakeResult::State::NeedsMorePeerData};
+}
+
 HandshakeResult handshake_failed() noexcept {
     return {0, 0, HandshakeResult::State::Failed};
 }
@@ -161,7 +169,9 @@ void log_ssl_error(const char* source, int ssl_error) {
 OpenSslCryptoCodecImpl::OpenSslCryptoCodecImpl(std::shared_ptr<OpenSslTlsContextImpl> ctx, Mode mode)
     : _ctx(std::move(ctx)),
       _ssl(::SSL_new(_ctx->native_context())),
-      _mode(mode)
+      _mode(mode),
+      _deferred_handshake_params(),
+      _deferred_handshake_result()
 {
     if (!_ssl) {
         throw CryptoException("Failed to create new SSL from SSL_CTX");
@@ -208,27 +218,62 @@ OpenSslCryptoCodecImpl::OpenSslCryptoCodecImpl(std::shared_ptr<OpenSslTlsContext
 
 OpenSslCryptoCodecImpl::~OpenSslCryptoCodecImpl() = default;
 
-// TODO remove spammy logging once code is stable
-
 HandshakeResult OpenSslCryptoCodecImpl::handshake(const char* from_peer, size_t from_peer_buf_size,
-                                                  char* to_peer, size_t to_peer_buf_size) noexcept {
+                                                  char* to_peer, size_t to_peer_buf_size) noexcept
+{
     LOG_ASSERT(verify_buf(from_peer, from_peer_buf_size) && verify_buf(to_peer, to_peer_buf_size));
+    LOG_ASSERT(!_deferred_handshake_params.has_value()); // do_handshake_work() not called as expected
 
+    if (_deferred_handshake_result.has_value()) {
+        const auto result = *_deferred_handshake_result;
+        _deferred_handshake_result = std::optional<HandshakeResult>();
+        return result;
+    }
     if (SSL_is_init_finished(_ssl.get())) {
         return handshake_completed();
     }
-    ConstBufferViewGuard const_view_guard(*_input_bio, from_peer, from_peer_buf_size);
-    MutableBufferViewGuard mut_view_guard(*_output_bio, to_peer, to_peer_buf_size);
+    // We make the assumption that TLS handshake processing is primarily reactive,
+    // i.e. a handshake frame is received from the peer and this either produces
+    // output to send back and/or marks the handshake as complete or failed.
+    // One exception to this rule is if if we're a client. In this case we have to
+    // do work the first time we're called in order to prepare a ClientHello message.
+    // At that point there will be nothing on the wire to react to.
+    //
+    // Note that we will return a "needs work" false positive in the case of a short read,
+    // as whether or not a complete TLS frame has been received is entirely opaque to us.
+    // The end result will still be correct, as the do_handshake_work() call will signal
+    // "needs read" as expected, but we get extra thread round-trips and added latency.
+    // It is expected that this is not a common case.
+    const bool first_client_send = ((_mode == Mode::Client) && SSL_in_before(_ssl.get()));
+    const bool needs_work = ((from_peer_buf_size > 0) || first_client_send);
+    if (needs_work) {
+        _deferred_handshake_params = DeferredHandshakeParams(from_peer, from_peer_buf_size, to_peer, to_peer_buf_size);
+        return handshake_needs_work();
+    }
+    return handshake_needs_peer_data();
+}
+
+void OpenSslCryptoCodecImpl::do_handshake_work() noexcept {
+    LOG_ASSERT(_deferred_handshake_params.has_value());  // handshake() not called as expected
+    LOG_ASSERT(!_deferred_handshake_result.has_value()); // do_handshake_work() called multiple times without handshake()
+    const auto params = *_deferred_handshake_params;
+    _deferred_handshake_params = std::optional<DeferredHandshakeParams>();
+
+    ConstBufferViewGuard const_view_guard(*_input_bio, params.from_peer, params.from_peer_buf_size);
+    MutableBufferViewGuard mut_view_guard(*_output_bio, params.to_peer, params.to_peer_buf_size);
 
     const auto consume_res = do_handshake_and_consume_peer_input_bytes();
-    LOG_ASSERT(consume_res.bytes_produced == 0);
+    LOG_ASSERT(consume_res.bytes_produced == 0); // Measured via BIO_pending below, not from result.
     if (consume_res.failed()) {
-        return consume_res;
+        _deferred_handshake_result = consume_res;
+        return;
     }
     // SSL_do_handshake() might have produced more data to send. Note: handshake may
     // be complete at this point.
-    int produced = BIO_pending(_output_bio);
-    return handshaked_bytes(consume_res.bytes_consumed, static_cast<size_t>(produced), consume_res.state);
+    const int produced = BIO_pending(_output_bio);
+    _deferred_handshake_result = handshaked_bytes(consume_res.bytes_consumed,
+                                                  static_cast<size_t>(produced),
+                                                  consume_res.state);
 }
 
 HandshakeResult OpenSslCryptoCodecImpl::do_handshake_and_consume_peer_input_bytes() noexcept {
diff --git a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h
index 3c491c2bd72..ef7e0998994 100644
--- a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h
+++ b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h
@@ -5,6 +5,7 @@
 #include <vespa/vespalib/net/tls/transport_security_options.h>
 #include <vespa/vespalib/net/tls/crypto_codec.h>
 #include <memory>
+#include <optional>
 
 namespace vespalib::net::tls { struct TlsContext; }
 
@@ -16,10 +17,31 @@ class OpenSslTlsContextImpl;
  * Frame-level OpenSSL-backed TLSv1.2/TLSv1.3 (depending on OpenSSL version)
  * crypto codec implementation.
  *
- * NOT thread safe per instance, but independent instances may be
- * used by different threads safely.
+ * NOT generally thread safe per instance, but independent instances may be
+ * used by different threads safely. One exception is that handshake() and
+ * do_handshake_work() may be called from different threads, as long as it
+ * happens with appropriate data visibility synchronization and not concurrently.
  */
 class OpenSslCryptoCodecImpl : public CryptoCodec {
+
+    struct DeferredHandshakeParams {
+        const char* from_peer          = nullptr;
+        size_t      from_peer_buf_size = 0;
+        char*       to_peer            = nullptr;
+        size_t      to_peer_buf_size   = 0;
+
+        DeferredHandshakeParams(const char* from_peer_, size_t from_peer_buf_size_,
+                                char* to_peer_, size_t to_peer_buf_size_) noexcept
+            : from_peer(from_peer_),
+              from_peer_buf_size(from_peer_buf_size_),
+              to_peer(to_peer_),
+              to_peer_buf_size(to_peer_buf_size_)
+        {}
+
+        DeferredHandshakeParams(const DeferredHandshakeParams&) noexcept = default;
+        DeferredHandshakeParams& operator=(const DeferredHandshakeParams&) noexcept = default;
+    };
+
     // The context maintains shared verification callback state, so it must be
     // kept alive explictly for at least as long as any codecs.
     std::shared_ptr<OpenSslTlsContextImpl> _ctx;
@@ -27,6 +49,8 @@ class OpenSslCryptoCodecImpl : public CryptoCodec {
     ::BIO* _input_bio;  // Owned by _ssl
     ::BIO* _output_bio; // Owned by _ssl
     Mode _mode;
+    std::optional<DeferredHandshakeParams> _deferred_handshake_params;
+    std::optional<HandshakeResult> _deferred_handshake_result;
 public:
     OpenSslCryptoCodecImpl(std::shared_ptr<OpenSslTlsContextImpl> ctx, Mode mode);
     ~OpenSslCryptoCodecImpl() override;
@@ -39,7 +63,7 @@ public:
      *    typically this expansion is only 16 octets). TLS 1.3 reduces
      *    the allowance for expansion to 256 octets."
      *
-     * We're on TLSv1.2, so make room for the worst case.
+     * We may be on TLSv1.2, so make room for the worst case.
      */
     static constexpr size_t MaximumTlsFrameSize = 16384 + 2048;
     static constexpr size_t MaximumFramePlaintextSize = 16384;
@@ -54,6 +78,8 @@ public:
     HandshakeResult handshake(const char* from_peer, size_t from_peer_buf_size,
                               char* to_peer, size_t to_peer_buf_size) noexcept override;
 
+    void do_handshake_work() noexcept override;
+
     EncodeResult encode(const char* plaintext, size_t plaintext_size,
                         char* ciphertext, size_t ciphertext_size) noexcept override;
     DecodeResult decode(const char* ciphertext, size_t ciphertext_size,
author	Håvard Pettersen <havardpe@gmail.com>	2019-02-20 15:27:52 +0100
committer	GitHub <noreply@github.com>	2019-02-20 15:27:52 +0100
commit	3946ef2e62b6ff041afe127ed499ac1ee4cd1a68 (patch)
tree	bbdc56e4509b312c7c68e053b0fb0edd22f7f851 /vespalib
parent	89bd2ef74191280ea516066acf6f95f1a28ba0b8 (diff)
parent	ad25ac52d0e6629b3aabbf243342bbaa88d508f6 (diff)