summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorHÃ¥vard Pettersen <havardpe@gmail.com>2019-02-20 15:27:52 +0100
committerGitHub <noreply@github.com>2019-02-20 15:27:52 +0100
commit3946ef2e62b6ff041afe127ed499ac1ee4cd1a68 (patch)
treebbdc56e4509b312c7c68e053b0fb0edd22f7f851 /vespalib
parent89bd2ef74191280ea516066acf6f95f1a28ba0b8 (diff)
parentad25ac52d0e6629b3aabbf243342bbaa88d508f6 (diff)
Merge pull request #8563 from vespa-engine/vekterli/enable-deferred-handshake-work-in-openssl-codec-impl
Enable deferred handshake work in OpenSSL codec implementation
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp34
-rw-r--r--vespalib/src/vespa/vespalib/net/tls/crypto_codec.h48
-rw-r--r--vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp6
-rw-r--r--vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp65
-rw-r--r--vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h32
5 files changed, 165 insertions, 20 deletions
diff --git a/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp b/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp
index 1d1520c64e0..8ee10deead1 100644
--- a/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp
+++ b/vespalib/src/tests/net/tls/openssl_impl/openssl_impl_test.cpp
@@ -34,6 +34,7 @@ const char* hs_state_to_str(HandshakeResult::State state) noexcept {
case HandshakeResult::State::Failed: return "Broken";
case HandshakeResult::State::Done: return "Done";
case HandshakeResult::State::NeedsMorePeerData: return "NeedsMorePeerData";
+ case HandshakeResult::State::NeedsWork: return "NeedsWork";
}
abort();
}
@@ -183,14 +184,24 @@ struct Fixture {
return hs_result;
}
+ void do_handshake_work(CryptoCodec& codec) {
+ codec.do_handshake_work();
+ }
+
bool handshake() {
HandshakeResult cli_res;
HandshakeResult serv_res;
while (!(cli_res.done() && serv_res.done())) {
- cli_res = do_handshake(*client, server_to_client, client_to_server);
- serv_res = do_handshake(*server, client_to_server, server_to_client);
- print_handshake_result("client", cli_res);
- print_handshake_result("server", serv_res);
+ while ((cli_res = do_handshake(*client, server_to_client, client_to_server)).needs_work()) {
+ fprintf(stderr, "doing client handshake work\n");
+ do_handshake_work(*client);
+ }
+ print_handshake_result("client handshake()", cli_res);
+ while ((serv_res = do_handshake(*server, client_to_server, server_to_client)).needs_work()) {
+ fprintf(stderr, "doing server handshake work\n");
+ do_handshake_work(*server);
+ }
+ print_handshake_result("server handshake()", serv_res);
if (cli_res.failed() || serv_res.failed()) {
return false;
@@ -207,6 +218,21 @@ TEST_F("client and server can complete handshake", Fixture) {
EXPECT_TRUE(f.handshake());
}
+TEST_F("client handshake() initially returns NeedsWork without producing anything", Fixture) {
+ auto res = f.do_handshake(*f.client, f.server_to_client, f.client_to_server);
+ EXPECT_TRUE(res.needs_work());
+ EXPECT_EQUAL(0u, res.bytes_consumed);
+ EXPECT_EQUAL(0u, res.bytes_produced);
+}
+
+TEST_F("server handshake() returns NeedsPeerData with empty input", Fixture) {
+ auto res = f.do_handshake(*f.server, f.client_to_server, f.server_to_client);
+ EXPECT_EQUAL(static_cast<int>(HandshakeResult::State::NeedsMorePeerData),
+ static_cast<int>(res.state));
+ EXPECT_EQUAL(0u, res.bytes_consumed);
+ EXPECT_EQUAL(0u, res.bytes_produced);
+}
+
TEST_F("clients and servers can send single data frame after handshake (not full duplex)", Fixture) {
ASSERT_TRUE(f.handshake());
diff --git a/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h b/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h
index 1eed1ae0612..fc5303dbc5b 100644
--- a/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h
+++ b/vespalib/src/vespa/vespalib/net/tls/crypto_codec.h
@@ -13,12 +13,14 @@ struct HandshakeResult {
enum class State {
Failed,
Done,
- NeedsMorePeerData
+ NeedsMorePeerData,
+ NeedsWork
};
State state = State::Failed;
bool failed() const noexcept { return (state == State::Failed); }
bool done() const noexcept { return (state == State::Done); }
+ bool needs_work() const noexcept { return (state == State::NeedsWork); }
};
struct EncodeResult {
@@ -76,14 +78,56 @@ public:
virtual size_t min_decode_buffer_size() const noexcept = 0;
/*
+ * Initiates or progresses a handshake towards a peer. Guaranteed to be lightweight
+ * in the sense that it will not perform any CPU-heavy operations by itself. When
+ * handshaking requires more heavy lifting (such as cryptographic operations), handshake()
+ * will return a result with needs_work() == true. When this is the case, the caller
+ * must call do_handshake_work() before retrying handshake() again. At that point,
+ * handshake() will return the result of the CPU-heavy work (which MAY itself be
+ * needs_work() again).
+ *
+ * Basic call flow:
+ *
+ * handshake() --> needs_work() ----.
+ * ^ |
+ * | |
+ * `---- do_handshake_work() <---' (possibly in different thread)
+ *
* Precondition: to_peer_buf_size >= min_encode_buffer_size()
- * Postcondition: if result.done(), the handshake process has completed
+ *
+ * The handshake() <-> do_handshake_work() flow invariant must hold.
+ *
+ * Postcondition: If result.done(), the handshake process has completed
* and data may be passed through encode()/decode().
+ *
+ * If result.needs_work(), do_handshake_work() MUST be called prior
+ * to calling handshake() again. The next time handshake() is called,
+ * it will return the result of the work performed as part of
+ * do_handshake_work(). The from/to buffers MUST remain valid and
+ * stable until do_handshake_work() is called.
+ *
+ * If result.needs_work() it is guaranteed that zero bytes have been
+ * consumed from the from_peer buffer or produced to the to_peer buffer.
*/
virtual HandshakeResult handshake(const char* from_peer, size_t from_peer_buf_size,
char* to_peer, size_t to_peer_buf_size) noexcept = 0;
/*
+ * Perform any CPU-heavy handshake operations that have been initiated by handshake().
+ *
+ * MAY be called from a different thread than handshake() as long as caller guarantees
+ * external synchronization between the threads. MUST NOT be called concurrently with
+ * handshake() on the same instance.
+ *
+ * Precondition: handshake() has been called immediately prior on this instance with
+ * a result of needs_work()
+ * do_handshake_work() has NOT been called immediately prior on this instance.
+ * Postcondition: The next call to handshake() on this instance will return the result
+ * of the handshake work performed.
+ */
+ virtual void do_handshake_work() noexcept = 0;
+
+ /*
* Encodes a single ciphertext frame into `ciphertext`. If plaintext_size
* is greater than can fit into a frame, the returned result's consumed_bytes
* field will be < plaintext_size. The number of actual ciphertext bytes produced
diff --git a/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp b/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp
index d8ee3ea89ab..46c3e2d3195 100644
--- a/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp
+++ b/vespalib/src/vespa/vespalib/net/tls/crypto_codec_adapter.cpp
@@ -72,12 +72,16 @@ CryptoCodecAdapter::handshake()
for (;;) {
auto in = _input.obtain();
auto out = _output.reserve(_codec->min_encode_buffer_size());
- auto hs_res = _codec->handshake(in.data, in.size, out.data, out.size);
+ ::vespalib::net::tls::HandshakeResult hs_res;
+ while ((hs_res = _codec->handshake(in.data, in.size, out.data, out.size)).needs_work()) {
+ _codec->do_handshake_work();
+ }
_input.evict(hs_res.bytes_consumed);
_output.commit(hs_res.bytes_produced);
switch (hs_res.state) {
case ::vespalib::net::tls::HandshakeResult::State::Failed: return HandshakeResult::FAIL;
case ::vespalib::net::tls::HandshakeResult::State::Done: return hs_try_flush();
+ case ::vespalib::net::tls::HandshakeResult::State::NeedsWork: abort(); // Impossible
case ::vespalib::net::tls::HandshakeResult::State::NeedsMorePeerData:
auto flush_res = hs_try_flush();
if (flush_res != HandshakeResult::DONE) {
diff --git a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp
index e8b1e32213d..3049c02b798 100644
--- a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp
+++ b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.cpp
@@ -97,6 +97,14 @@ HandshakeResult handshake_completed() noexcept {
return {0, 0, HandshakeResult::State::Done};
}
+HandshakeResult handshake_needs_work() noexcept {
+ return {0, 0, HandshakeResult::State::NeedsWork};
+}
+
+HandshakeResult handshake_needs_peer_data() noexcept {
+ return {0, 0, HandshakeResult::State::NeedsMorePeerData};
+}
+
HandshakeResult handshake_failed() noexcept {
return {0, 0, HandshakeResult::State::Failed};
}
@@ -161,7 +169,9 @@ void log_ssl_error(const char* source, int ssl_error) {
OpenSslCryptoCodecImpl::OpenSslCryptoCodecImpl(std::shared_ptr<OpenSslTlsContextImpl> ctx, Mode mode)
: _ctx(std::move(ctx)),
_ssl(::SSL_new(_ctx->native_context())),
- _mode(mode)
+ _mode(mode),
+ _deferred_handshake_params(),
+ _deferred_handshake_result()
{
if (!_ssl) {
throw CryptoException("Failed to create new SSL from SSL_CTX");
@@ -208,27 +218,62 @@ OpenSslCryptoCodecImpl::OpenSslCryptoCodecImpl(std::shared_ptr<OpenSslTlsContext
OpenSslCryptoCodecImpl::~OpenSslCryptoCodecImpl() = default;
-// TODO remove spammy logging once code is stable
-
HandshakeResult OpenSslCryptoCodecImpl::handshake(const char* from_peer, size_t from_peer_buf_size,
- char* to_peer, size_t to_peer_buf_size) noexcept {
+ char* to_peer, size_t to_peer_buf_size) noexcept
+{
LOG_ASSERT(verify_buf(from_peer, from_peer_buf_size) && verify_buf(to_peer, to_peer_buf_size));
+ LOG_ASSERT(!_deferred_handshake_params.has_value()); // do_handshake_work() not called as expected
+ if (_deferred_handshake_result.has_value()) {
+ const auto result = *_deferred_handshake_result;
+ _deferred_handshake_result = std::optional<HandshakeResult>();
+ return result;
+ }
if (SSL_is_init_finished(_ssl.get())) {
return handshake_completed();
}
- ConstBufferViewGuard const_view_guard(*_input_bio, from_peer, from_peer_buf_size);
- MutableBufferViewGuard mut_view_guard(*_output_bio, to_peer, to_peer_buf_size);
+ // We make the assumption that TLS handshake processing is primarily reactive,
+ // i.e. a handshake frame is received from the peer and this either produces
+ // output to send back and/or marks the handshake as complete or failed.
+ // One exception to this rule is if if we're a client. In this case we have to
+ // do work the first time we're called in order to prepare a ClientHello message.
+ // At that point there will be nothing on the wire to react to.
+ //
+ // Note that we will return a "needs work" false positive in the case of a short read,
+ // as whether or not a complete TLS frame has been received is entirely opaque to us.
+ // The end result will still be correct, as the do_handshake_work() call will signal
+ // "needs read" as expected, but we get extra thread round-trips and added latency.
+ // It is expected that this is not a common case.
+ const bool first_client_send = ((_mode == Mode::Client) && SSL_in_before(_ssl.get()));
+ const bool needs_work = ((from_peer_buf_size > 0) || first_client_send);
+ if (needs_work) {
+ _deferred_handshake_params = DeferredHandshakeParams(from_peer, from_peer_buf_size, to_peer, to_peer_buf_size);
+ return handshake_needs_work();
+ }
+ return handshake_needs_peer_data();
+}
+
+void OpenSslCryptoCodecImpl::do_handshake_work() noexcept {
+ LOG_ASSERT(_deferred_handshake_params.has_value()); // handshake() not called as expected
+ LOG_ASSERT(!_deferred_handshake_result.has_value()); // do_handshake_work() called multiple times without handshake()
+ const auto params = *_deferred_handshake_params;
+ _deferred_handshake_params = std::optional<DeferredHandshakeParams>();
+
+ ConstBufferViewGuard const_view_guard(*_input_bio, params.from_peer, params.from_peer_buf_size);
+ MutableBufferViewGuard mut_view_guard(*_output_bio, params.to_peer, params.to_peer_buf_size);
const auto consume_res = do_handshake_and_consume_peer_input_bytes();
- LOG_ASSERT(consume_res.bytes_produced == 0);
+ LOG_ASSERT(consume_res.bytes_produced == 0); // Measured via BIO_pending below, not from result.
if (consume_res.failed()) {
- return consume_res;
+ _deferred_handshake_result = consume_res;
+ return;
}
// SSL_do_handshake() might have produced more data to send. Note: handshake may
// be complete at this point.
- int produced = BIO_pending(_output_bio);
- return handshaked_bytes(consume_res.bytes_consumed, static_cast<size_t>(produced), consume_res.state);
+ const int produced = BIO_pending(_output_bio);
+ _deferred_handshake_result = handshaked_bytes(consume_res.bytes_consumed,
+ static_cast<size_t>(produced),
+ consume_res.state);
}
HandshakeResult OpenSslCryptoCodecImpl::do_handshake_and_consume_peer_input_bytes() noexcept {
diff --git a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h
index 3c491c2bd72..ef7e0998994 100644
--- a/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h
+++ b/vespalib/src/vespa/vespalib/net/tls/impl/openssl_crypto_codec_impl.h
@@ -5,6 +5,7 @@
#include <vespa/vespalib/net/tls/transport_security_options.h>
#include <vespa/vespalib/net/tls/crypto_codec.h>
#include <memory>
+#include <optional>
namespace vespalib::net::tls { struct TlsContext; }
@@ -16,10 +17,31 @@ class OpenSslTlsContextImpl;
* Frame-level OpenSSL-backed TLSv1.2/TLSv1.3 (depending on OpenSSL version)
* crypto codec implementation.
*
- * NOT thread safe per instance, but independent instances may be
- * used by different threads safely.
+ * NOT generally thread safe per instance, but independent instances may be
+ * used by different threads safely. One exception is that handshake() and
+ * do_handshake_work() may be called from different threads, as long as it
+ * happens with appropriate data visibility synchronization and not concurrently.
*/
class OpenSslCryptoCodecImpl : public CryptoCodec {
+
+ struct DeferredHandshakeParams {
+ const char* from_peer = nullptr;
+ size_t from_peer_buf_size = 0;
+ char* to_peer = nullptr;
+ size_t to_peer_buf_size = 0;
+
+ DeferredHandshakeParams(const char* from_peer_, size_t from_peer_buf_size_,
+ char* to_peer_, size_t to_peer_buf_size_) noexcept
+ : from_peer(from_peer_),
+ from_peer_buf_size(from_peer_buf_size_),
+ to_peer(to_peer_),
+ to_peer_buf_size(to_peer_buf_size_)
+ {}
+
+ DeferredHandshakeParams(const DeferredHandshakeParams&) noexcept = default;
+ DeferredHandshakeParams& operator=(const DeferredHandshakeParams&) noexcept = default;
+ };
+
// The context maintains shared verification callback state, so it must be
// kept alive explictly for at least as long as any codecs.
std::shared_ptr<OpenSslTlsContextImpl> _ctx;
@@ -27,6 +49,8 @@ class OpenSslCryptoCodecImpl : public CryptoCodec {
::BIO* _input_bio; // Owned by _ssl
::BIO* _output_bio; // Owned by _ssl
Mode _mode;
+ std::optional<DeferredHandshakeParams> _deferred_handshake_params;
+ std::optional<HandshakeResult> _deferred_handshake_result;
public:
OpenSslCryptoCodecImpl(std::shared_ptr<OpenSslTlsContextImpl> ctx, Mode mode);
~OpenSslCryptoCodecImpl() override;
@@ -39,7 +63,7 @@ public:
* typically this expansion is only 16 octets). TLS 1.3 reduces
* the allowance for expansion to 256 octets."
*
- * We're on TLSv1.2, so make room for the worst case.
+ * We may be on TLSv1.2, so make room for the worst case.
*/
static constexpr size_t MaximumTlsFrameSize = 16384 + 2048;
static constexpr size_t MaximumFramePlaintextSize = 16384;
@@ -54,6 +78,8 @@ public:
HandshakeResult handshake(const char* from_peer, size_t from_peer_buf_size,
char* to_peer, size_t to_peer_buf_size) noexcept override;
+ void do_handshake_work() noexcept override;
+
EncodeResult encode(const char* plaintext, size_t plaintext_size,
char* ciphertext, size_t ciphertext_size) noexcept override;
DecodeResult decode(const char* ciphertext, size_t ciphertext_size,