aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@oath.com>2018-09-21 13:18:36 +0200
committerHenning Baldersheim <balder@oath.com>2018-09-21 13:18:36 +0200
commit950e355e0fe616a89b4e7a9ddd73251da1bea92e (patch)
tree8424e4a94fa050adf371c621c5669354e9373959 /searchlib
parentd27c24d43b83a216d6f7096dae4b49a0577eccf1 (diff)
Add crc for uncompressed content to catch shady bugs.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/docstore/document_store/document_store_test.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/docstore/documentstore.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/docstore/value.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/docstore/value.h10
4 files changed, 54 insertions, 28 deletions
diff --git a/searchlib/src/tests/docstore/document_store/document_store_test.cpp b/searchlib/src/tests/docstore/document_store/document_store_test.cpp
index a36f2585f28..da0c2ba3392 100644
--- a/searchlib/src/tests/docstore/document_store/document_store_test.cpp
+++ b/searchlib/src/tests/docstore/document_store/document_store_test.cpp
@@ -95,10 +95,11 @@ Value createValue(vespalib::stringref s, const CompressionConfig & cfg) {
return v;
}
void verifyValue(vespalib::stringref s, const Value & v) {
- vespalib::DataBuffer buf = v.decompressed();
+ Value::Result result = v.decompressed();
+ ASSERT_TRUE(result.second);
EXPECT_EQUAL(s.size(), v.getUncompressedSize());
EXPECT_EQUAL(7u, v.getSyncToken());
- EXPECT_EQUAL(0, memcmp(s.data(), buf.getData(), buf.getDataLen()));
+ EXPECT_EQUAL(0, memcmp(s.data(), result.first.getData(), result.first.getDataLen()));
}
TEST("require that Value can store uncompressed data") {
@@ -133,4 +134,10 @@ TEST("require that Value can store zstd compressed data") {
verifyValue(S1, v);
}
+TEST("require that Value can detect if output not equal to input") {
+ Value v = createValue(S1, CompressionConfig::NONE);
+ static_cast<uint8_t *>(v.get())[8] ^= 0xff;
+ EXPECT_FALSE(v.decompressed().second);
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
index 03fb89de1c3..513550bff96 100644
--- a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
@@ -10,6 +10,10 @@
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/util/compressor.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.docstore.documentstore");
+
using document::DocumentTypeRepo;
using vespalib::compression::CompressionConfig;
@@ -39,8 +43,7 @@ DocumentVisitorAdapter::visit(uint32_t lid, vespalib::ConstBufferRef buf) {
}
document::Document::UP
-deserializeDocument(const docstore::Value & value, const DocumentTypeRepo &repo) {
- vespalib::DataBuffer uncompressed(value.decompressed());
+deserializeDocument(const vespalib::DataBuffer & uncompressed, const DocumentTypeRepo &repo) {
vespalib::nbostream is(uncompressed.getData(), uncompressed.getDataLen());
return std::make_unique<document::Document>(repo, is);
}
@@ -91,8 +94,9 @@ BackingStore::read(DocumentIdT key, Value &value) const {
void
BackingStore::write(DocumentIdT lid, const Value & value)
{
- vespalib::DataBuffer buf = value.decompressed();
- _backingStore.write(value.getSyncToken(), lid, buf.getData(), buf.getDataLen());
+ Value::Result buf = value.decompressed();
+ assert(buf.second);
+ _backingStore.write(value.getSyncToken(), lid, buf.first.getData(), buf.first.getDataLen());
}
void
@@ -168,21 +172,32 @@ DocumentStore::visit(const LidVector & lids, const DocumentTypeRepo &repo, IDocu
}
}
-document::Document::UP
+std::unique_ptr<document::Document>
DocumentStore::read(DocumentIdT lid, const DocumentTypeRepo &repo) const
{
- document::Document::UP retval;
Value value;
if (useCache()) {
value = _cache->read(lid);
- } else {
- _uncached_lookups.fetch_add(1);
- _store->read(lid, value);
+ if (value.empty()) {
+ return std::unique_ptr<document::Document>();
+ }
+ Value::Result result = value.decompressed();
+ if ( result.second ) {
+ return deserializeDocument(result.first, repo);
+ } else {
+ LOG(warning, "Summary cache for lid %u is corrupt. Invalidating and reading directly backing store", lid);
+ _cache->invalidate(lid);
+ }
}
+
+ _uncached_lookups.fetch_add(1);
+ _store->read(lid, value);
if ( ! value.empty() ) {
- retval = deserializeDocument(value, repo);
+ Value::Result result = value.decompressed();
+ assert(result.second);
+ return deserializeDocument(result.first, repo);
}
- return retval;
+ return std::unique_ptr<document::Document>();
}
void
@@ -354,7 +369,7 @@ DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid, const void *buffer, siz
value.set(std::move(buf), len);
}
if (! value.empty()) {
- std::shared_ptr<document::Document> doc(deserializeDocument(value, _repo));
+ std::shared_ptr<document::Document> doc(deserializeDocument(value.decompressed().first, _repo));
_visitor.visit(lid, doc);
rewrite(lid, *doc);
} else {
diff --git a/searchlib/src/vespa/searchlib/docstore/value.cpp b/searchlib/src/vespa/searchlib/docstore/value.cpp
index 94d0cb6fe55..61fcf8bac06 100644
--- a/searchlib/src/vespa/searchlib/docstore/value.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/value.cpp
@@ -3,6 +3,7 @@
#include "value.h"
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/util/compressor.h>
+#include <vespa/vespalib/xxhash/xxhash.h>
using vespalib::compression::compress;
using vespalib::compression::decompress;
@@ -13,6 +14,7 @@ Value::Value()
: _syncToken(0),
_compressedSize(0),
_uncompressedSize(0),
+ _uncompressedCrc(0),
_compression(CompressionConfig::NONE)
{}
@@ -20,6 +22,7 @@ Value::Value(uint64_t syncToken)
: _syncToken(syncToken),
_compressedSize(0),
_uncompressedSize(0),
+ _uncompressedCrc(0),
_compression(CompressionConfig::NONE)
{}
@@ -27,6 +30,7 @@ Value::Value(const Value &rhs)
: _syncToken(rhs._syncToken),
_compressedSize(rhs._compressedSize),
_uncompressedSize(rhs._uncompressedSize),
+ _uncompressedCrc(rhs._uncompressedCrc),
_compression(rhs._compression),
_buf(Alloc::alloc(rhs.size()))
{
@@ -34,12 +38,6 @@ Value::Value(const Value &rhs)
}
void
-Value::setCompression(CompressionConfig::Type comp, size_t uncompressedSize) {
- _compression = comp;
- _uncompressedSize = uncompressedSize;
-}
-
-void
Value::set(vespalib::DataBuffer &&buf, ssize_t len) {
set(std::move(buf), len, CompressionConfig());
}
@@ -48,7 +46,8 @@ void
Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression) {
//Underlying buffer must be identical to allow swap.
vespalib::DataBuffer compressed(buf.getData(), 0u);
- CompressionConfig::Type type = compress(compression, vespalib::ConstBufferRef(buf.getData(), len), compressed, true);
+ vespalib::ConstBufferRef input(buf.getData(), len);
+ CompressionConfig::Type type = compress(compression, input, compressed, true);
_compressedSize = compressed.getDataLen();
if (buf.getData() == compressed.getData()) {
// Uncompressed so we can just steal the underlying buffer.
@@ -60,14 +59,17 @@ Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &com
(len == ssize_t(_compressedSize))) ||
((type != CompressionConfig::NONE) &&
(len > ssize_t(_compressedSize))));
- setCompression(type, len);
+ _compression = type;
+ _uncompressedSize = len;
+ _uncompressedCrc = XXH64(input.c_str(), input.size(), 0);
}
-vespalib::DataBuffer
+Value::Result
Value::decompressed() const {
vespalib::DataBuffer uncompressed(_buf.get(), (size_t) 0);
decompress(getCompression(), getUncompressedSize(), vespalib::ConstBufferRef(*this, size()), uncompressed, true);
- return uncompressed;
+ uint64_t crc = XXH64(uncompressed.getData(), uncompressed.getDataLen(), 0);
+ return std::make_pair<vespalib::DataBuffer, bool>(std::move(uncompressed), crc == _uncompressedCrc);
}
}
diff --git a/searchlib/src/vespa/searchlib/docstore/value.h b/searchlib/src/vespa/searchlib/docstore/value.h
index a3d6af984bd..cd60cac726a 100644
--- a/searchlib/src/vespa/searchlib/docstore/value.h
+++ b/searchlib/src/vespa/searchlib/docstore/value.h
@@ -11,8 +11,9 @@ class Value {
public:
using Alloc = vespalib::alloc::Alloc;
using CompressionConfig = vespalib::compression::CompressionConfig;
+ using Result = std::pair<vespalib::DataBuffer, bool>;
- Value();
+ Value();
Value(uint64_t syncToken);
Value(Value &&rhs) = default;
@@ -33,7 +34,7 @@ public:
// Keep buffer uncompressed
void set(vespalib::DataBuffer &&buf, ssize_t len);
- vespalib::DataBuffer decompressed() const;
+ Result decompressed() const;
size_t size() const { return _compressedSize; }
bool empty() const { return size() == 0; }
@@ -42,8 +43,9 @@ public:
void *get() { return _buf.get(); }
private:
uint64_t _syncToken;
- size_t _compressedSize;
- size_t _uncompressedSize;
+ size_t _compressedSize;
+ size_t _uncompressedSize;
+ uint64_t _uncompressedCrc;
CompressionConfig::Type _compression;
Alloc _buf;
};