diff options
author | Henning Baldersheim <balder@oath.com> | 2018-09-21 13:18:36 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@oath.com> | 2018-09-21 13:18:36 +0200 |
commit | 950e355e0fe616a89b4e7a9ddd73251da1bea92e (patch) | |
tree | 8424e4a94fa050adf371c621c5669354e9373959 /searchlib | |
parent | d27c24d43b83a216d6f7096dae4b49a0577eccf1 (diff) |
Add crc for uncompressed content to catch shady bugs.
Diffstat (limited to 'searchlib')
4 files changed, 54 insertions, 28 deletions
diff --git a/searchlib/src/tests/docstore/document_store/document_store_test.cpp b/searchlib/src/tests/docstore/document_store/document_store_test.cpp index a36f2585f28..da0c2ba3392 100644 --- a/searchlib/src/tests/docstore/document_store/document_store_test.cpp +++ b/searchlib/src/tests/docstore/document_store/document_store_test.cpp @@ -95,10 +95,11 @@ Value createValue(vespalib::stringref s, const CompressionConfig & cfg) { return v; } void verifyValue(vespalib::stringref s, const Value & v) { - vespalib::DataBuffer buf = v.decompressed(); + Value::Result result = v.decompressed(); + ASSERT_TRUE(result.second); EXPECT_EQUAL(s.size(), v.getUncompressedSize()); EXPECT_EQUAL(7u, v.getSyncToken()); - EXPECT_EQUAL(0, memcmp(s.data(), buf.getData(), buf.getDataLen())); + EXPECT_EQUAL(0, memcmp(s.data(), result.first.getData(), result.first.getDataLen())); } TEST("require that Value can store uncompressed data") { @@ -133,4 +134,10 @@ TEST("require that Value can store zstd compressed data") { verifyValue(S1, v); } +TEST("require that Value can detect if output not equal to input") { + Value v = createValue(S1, CompressionConfig::NONE); + static_cast<uint8_t *>(v.get())[8] ^= 0xff; + EXPECT_FALSE(v.decompressed().second); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp index 03fb89de1c3..513550bff96 100644 --- a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp @@ -10,6 +10,10 @@ #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/util/compressor.h> +#include <vespa/log/log.h> + +LOG_SETUP(".searchlib.docstore.documentstore"); + using document::DocumentTypeRepo; using vespalib::compression::CompressionConfig; @@ -39,8 +43,7 @@ DocumentVisitorAdapter::visit(uint32_t lid, vespalib::ConstBufferRef buf) { } document::Document::UP -deserializeDocument(const docstore::Value & value, const DocumentTypeRepo &repo) { - vespalib::DataBuffer uncompressed(value.decompressed()); +deserializeDocument(const vespalib::DataBuffer & uncompressed, const DocumentTypeRepo &repo) { vespalib::nbostream is(uncompressed.getData(), uncompressed.getDataLen()); return std::make_unique<document::Document>(repo, is); } @@ -91,8 +94,9 @@ BackingStore::read(DocumentIdT key, Value &value) const { void BackingStore::write(DocumentIdT lid, const Value & value) { - vespalib::DataBuffer buf = value.decompressed(); - _backingStore.write(value.getSyncToken(), lid, buf.getData(), buf.getDataLen()); + Value::Result buf = value.decompressed(); + assert(buf.second); + _backingStore.write(value.getSyncToken(), lid, buf.first.getData(), buf.first.getDataLen()); } void @@ -168,21 +172,32 @@ DocumentStore::visit(const LidVector & lids, const DocumentTypeRepo &repo, IDocu } } -document::Document::UP +std::unique_ptr<document::Document> DocumentStore::read(DocumentIdT lid, const DocumentTypeRepo &repo) const { - document::Document::UP retval; Value value; if (useCache()) { value = _cache->read(lid); - } else { - _uncached_lookups.fetch_add(1); - _store->read(lid, value); + if (value.empty()) { + return std::unique_ptr<document::Document>(); + } + Value::Result result = value.decompressed(); + if ( result.second ) { + return deserializeDocument(result.first, repo); + } else { + LOG(warning, "Summary cache for lid %u is corrupt. Invalidating and reading directly backing store", lid); + _cache->invalidate(lid); + } } + + _uncached_lookups.fetch_add(1); + _store->read(lid, value); if ( ! value.empty() ) { - retval = deserializeDocument(value, repo); + Value::Result result = value.decompressed(); + assert(result.second); + return deserializeDocument(result.first, repo); } - return retval; + return std::unique_ptr<document::Document>(); } void @@ -354,7 +369,7 @@ DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid, const void *buffer, siz value.set(std::move(buf), len); } if (! value.empty()) { - std::shared_ptr<document::Document> doc(deserializeDocument(value, _repo)); + std::shared_ptr<document::Document> doc(deserializeDocument(value.decompressed().first, _repo)); _visitor.visit(lid, doc); rewrite(lid, *doc); } else { diff --git a/searchlib/src/vespa/searchlib/docstore/value.cpp b/searchlib/src/vespa/searchlib/docstore/value.cpp index 94d0cb6fe55..61fcf8bac06 100644 --- a/searchlib/src/vespa/searchlib/docstore/value.cpp +++ b/searchlib/src/vespa/searchlib/docstore/value.cpp @@ -3,6 +3,7 @@ #include "value.h" #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/util/compressor.h> +#include <vespa/vespalib/xxhash/xxhash.h> using vespalib::compression::compress; using vespalib::compression::decompress; @@ -13,6 +14,7 @@ Value::Value() : _syncToken(0), _compressedSize(0), _uncompressedSize(0), + _uncompressedCrc(0), _compression(CompressionConfig::NONE) {} @@ -20,6 +22,7 @@ Value::Value(uint64_t syncToken) : _syncToken(syncToken), _compressedSize(0), _uncompressedSize(0), + _uncompressedCrc(0), _compression(CompressionConfig::NONE) {} @@ -27,6 +30,7 @@ Value::Value(const Value &rhs) : _syncToken(rhs._syncToken), _compressedSize(rhs._compressedSize), _uncompressedSize(rhs._uncompressedSize), + _uncompressedCrc(rhs._uncompressedCrc), _compression(rhs._compression), _buf(Alloc::alloc(rhs.size())) { @@ -34,12 +38,6 @@ Value::Value(const Value &rhs) } void -Value::setCompression(CompressionConfig::Type comp, size_t uncompressedSize) { - _compression = comp; - _uncompressedSize = uncompressedSize; -} - -void Value::set(vespalib::DataBuffer &&buf, ssize_t len) { set(std::move(buf), len, CompressionConfig()); } @@ -48,7 +46,8 @@ void Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression) { //Underlying buffer must be identical to allow swap. vespalib::DataBuffer compressed(buf.getData(), 0u); - CompressionConfig::Type type = compress(compression, vespalib::ConstBufferRef(buf.getData(), len), compressed, true); + vespalib::ConstBufferRef input(buf.getData(), len); + CompressionConfig::Type type = compress(compression, input, compressed, true); _compressedSize = compressed.getDataLen(); if (buf.getData() == compressed.getData()) { // Uncompressed so we can just steal the underlying buffer. @@ -60,14 +59,17 @@ Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &com (len == ssize_t(_compressedSize))) || ((type != CompressionConfig::NONE) && (len > ssize_t(_compressedSize)))); - setCompression(type, len); + _compression = type; + _uncompressedSize = len; + _uncompressedCrc = XXH64(input.c_str(), input.size(), 0); } -vespalib::DataBuffer +Value::Result Value::decompressed() const { vespalib::DataBuffer uncompressed(_buf.get(), (size_t) 0); decompress(getCompression(), getUncompressedSize(), vespalib::ConstBufferRef(*this, size()), uncompressed, true); - return uncompressed; + uint64_t crc = XXH64(uncompressed.getData(), uncompressed.getDataLen(), 0); + return std::make_pair<vespalib::DataBuffer, bool>(std::move(uncompressed), crc == _uncompressedCrc); } } diff --git a/searchlib/src/vespa/searchlib/docstore/value.h b/searchlib/src/vespa/searchlib/docstore/value.h index a3d6af984bd..cd60cac726a 100644 --- a/searchlib/src/vespa/searchlib/docstore/value.h +++ b/searchlib/src/vespa/searchlib/docstore/value.h @@ -11,8 +11,9 @@ class Value { public: using Alloc = vespalib::alloc::Alloc; using CompressionConfig = vespalib::compression::CompressionConfig; + using Result = std::pair<vespalib::DataBuffer, bool>; - Value(); + Value(); Value(uint64_t syncToken); Value(Value &&rhs) = default; @@ -33,7 +34,7 @@ public: // Keep buffer uncompressed void set(vespalib::DataBuffer &&buf, ssize_t len); - vespalib::DataBuffer decompressed() const; + Result decompressed() const; size_t size() const { return _compressedSize; } bool empty() const { return size() == 0; } @@ -42,8 +43,9 @@ public: void *get() { return _buf.get(); } private: uint64_t _syncToken; - size_t _compressedSize; - size_t _uncompressedSize; + size_t _compressedSize; + size_t _uncompressedSize; + uint64_t _uncompressedCrc; CompressionConfig::Type _compression; Alloc _buf; }; |