diff options
author | Henning Baldersheim <balder@oath.com> | 2018-09-21 12:08:28 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@oath.com> | 2018-09-21 12:09:03 +0200 |
commit | c1c5842611dbb3643f374946a8d6107246c13f5b (patch) | |
tree | 7d7ed795483ac6c4efb2d5e3ee4efd92493ae8f9 | |
parent | 0fbc4d579dd3a05a42dc224f7e883d56051052f4 (diff) |
Refactor for testability.
6 files changed, 201 insertions, 178 deletions
diff --git a/searchlib/src/tests/docstore/document_store/document_store_test.cpp b/searchlib/src/tests/docstore/document_store/document_store_test.cpp index 0ef04d0e722..fc2d2ab0536 100644 --- a/searchlib/src/tests/docstore/document_store/document_store_test.cpp +++ b/searchlib/src/tests/docstore/document_store/document_store_test.cpp @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/testkit/test_kit.h> #include <vespa/searchlib/docstore/logdocumentstore.h> +#include <vespa/searchlib/docstore/value.h> #include <vespa/searchlib/docstore/cachestats.h> #include <vespa/document/repo/documenttyperepo.h> #include <vespa/document/fieldvalue/document.h> @@ -81,4 +82,39 @@ TEST("require that LogDocumentStore::Config equality operator detects inequality EXPECT_FALSE(C(DC(), LC().setMaxBucketSpread(7)) == C()); } +using search::docstore::Value; +vespalib::stringref S1("this is a string long enough to be compressed and is just used for sanity checking of compression" + "Adding some repeatble sequences like aaaaaaaaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbbbbbbb to ensure compression"); + +Value createValue(vespalib::stringref s, const CompressionConfig & cfg) { + Value v(7); + vespalib::DataBuffer input; + input.writeBytes(s.data(), s.size()); + v.set(std::move(input), s.size(), cfg); + return v; +} +void verifyValue(vespalib::stringref s, const Value & v) { + vespalib::DataBuffer buf = v.decompressed(); + EXPECT_EQUAL(s.size(), v.getUncompressedSize()); + EXPECT_EQUAL(7u, v.getSyncToken()); + EXPECT_EQUAL(0, memcmp(s.data(), buf.getData(), buf.getDataLen())); +} +TEST("require that Value can store uncompressed data") { + Value v = createValue(S1, CompressionConfig::NONE); + verifyValue(S1, v); +} + +TEST("require that Value can be moved") { + Value v = createValue(S1, CompressionConfig::NONE); + Value m = std::move(v); + verifyValue(S1, m); +} + +TEST("require that Value can be copied") { + Value v = createValue(S1, CompressionConfig::NONE); + Value copy(v); + verifyValue(S1, v); + verifyValue(S1, copy); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt index 735b836cf17..2b82d9e5af7 100644 --- a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt @@ -18,6 +18,7 @@ vespa_add_library(searchlib_docstore OBJECT randreaders.cpp storebybucket.cpp summaryexceptions.cpp + value.cpp visitcache.cpp writeablefilechunk.cpp DEPENDS diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp index 95b6c3b1584..03fb89de1c3 100644 --- a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp @@ -4,6 +4,7 @@ #include "documentstore.h" #include "visitcache.h" #include "ibucketizer.h" +#include "value.h" #include <vespa/document/fieldvalue/document.h> #include <vespa/vespalib/stllike/cache.hpp> #include <vespa/vespalib/data/databuffer.h> @@ -11,8 +12,6 @@ using document::DocumentTypeRepo; using vespalib::compression::CompressionConfig; -using vespalib::compression::compress; -using vespalib::compression::decompress; namespace search { @@ -39,82 +38,19 @@ DocumentVisitorAdapter::visit(uint32_t lid, vespalib::ConstBufferRef buf) { } } +document::Document::UP +deserializeDocument(const docstore::Value & value, const DocumentTypeRepo &repo) { + vespalib::DataBuffer uncompressed(value.decompressed()); + vespalib::nbostream is(uncompressed.getData(), uncompressed.getDataLen()); + return std::make_unique<document::Document>(repo, is); +} + } using vespalib::nbostream; namespace docstore { -class Value { -public: - using Alloc = vespalib::alloc::Alloc; - typedef std::unique_ptr<Value> UP; - - Value() - : _syncToken(0), - _compressedSize(0), - _uncompressedSize(0), - _compression(CompressionConfig::NONE) - {} - - Value(uint64_t syncToken) - : _syncToken(syncToken), - _compressedSize(0), - _uncompressedSize(0), - _compression(CompressionConfig::NONE) - {} - - Value(Value &&rhs) = default; - Value &operator=(Value &&rhs) = default; - - Value(const Value &rhs) - : _syncToken(rhs._syncToken), - _compressedSize(rhs._compressedSize), - _uncompressedSize(rhs._uncompressedSize), - _compression(rhs._compression), - _buf(Alloc::alloc(rhs.size())) - { - memcpy(get(), rhs.get(), size()); - } - - void setCompression(CompressionConfig::Type comp, size_t uncompressedSize) { - _compression = comp; - _uncompressedSize = uncompressedSize; - } - uint64_t getSyncToken() const { return _syncToken; } - - CompressionConfig::Type getCompression() const { return _compression; } - - size_t getUncompressedSize() const { return _uncompressedSize; } - - /** - * Compress buffer into temporary buffer and copy temporary buffer to - * value along with compression config. - */ - void set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression); - // Keep buffer uncompressed - void set(vespalib::DataBuffer &&buf, ssize_t len); - - /** - * Decompress value into temporary buffer and deserialize document from - * the temporary buffer. - */ - document::Document::UP deserializeDocument(const DocumentTypeRepo &repo) const; - vespalib::DataBuffer decompressed() const; - - size_t size() const { return _compressedSize; } - bool empty() const { return size() == 0; } - operator const void *() const { return _buf.get(); } - const void *get() const { return _buf.get(); } - void *get() { return _buf.get(); } -private: - uint64_t _syncToken; - size_t _compressedSize; - size_t _uncompressedSize; - CompressionConfig::Type _compression; - Alloc _buf; -}; - class BackingStore { public: BackingStore(IDataStore &store, const CompressionConfig &compression) : @@ -133,45 +69,6 @@ private: CompressionConfig _compression; }; - -void -Value::set(vespalib::DataBuffer &&buf, ssize_t len) { - set(std::move(buf), len, CompressionConfig()); -} - -void -Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression) { - //Underlying buffer must be identical to allow swap. - vespalib::DataBuffer compressed(buf.getData(), 0u); - CompressionConfig::Type type = compress(compression, vespalib::ConstBufferRef(buf.getData(), len), compressed, true); - _compressedSize = compressed.getDataLen(); - if (buf.getData() == compressed.getData()) { - // Uncompressed so we can just steal the underlying buffer. - buf.stealBuffer().swap(_buf); - } else { - compressed.stealBuffer().swap(_buf); - } - assert(((type == CompressionConfig::NONE) && - (len == ssize_t(_compressedSize))) || - ((type != CompressionConfig::NONE) && - (len > ssize_t(_compressedSize)))); - setCompression(type, len); -} - -vespalib::DataBuffer -Value::decompressed() const { - vespalib::DataBuffer uncompressed(_buf.get(), (size_t) 0); - decompress(getCompression(), getUncompressedSize(), vespalib::ConstBufferRef(*this, size()), uncompressed, true); - return uncompressed; -} - -document::Document::UP -Value::deserializeDocument(const DocumentTypeRepo &repo) const { - vespalib::DataBuffer uncompressed(decompressed()); - vespalib::nbostream is(uncompressed.getData(), uncompressed.getDataLen()); - return std::make_unique<document::Document>(repo, is); -} - void BackingStore::visit(const IDocumentStore::LidVector &lids, const DocumentTypeRepo &repo, IDocumentVisitor &visitor) const { @@ -203,8 +100,6 @@ BackingStore::reconfigure(const CompressionConfig &compression) { _compression = compression; } -} - using CacheParams = vespalib::CacheParam< vespalib::LruParam<DocumentIdT, docstore::Value>, docstore::BackingStore, @@ -216,12 +111,14 @@ public: Cache(BackingStore & b, size_t maxBytes) : vespalib::cache<CacheParams>(b, maxBytes) { } }; +} + using VisitCache = docstore::VisitCache; using docstore::Value; bool DocumentStore::Config::operator == (const Config &rhs) const { - return (_maxCacheBytes == rhs._maxCacheBytes) && + return (_maxCacheBytes == rhs._maxCacheBytes) && (_allowVisitCaching == rhs._allowVisitCaching) && (_initialCacheEntries == rhs._initialCacheEntries) && (_updateStrategy == rhs._updateStrategy) && @@ -233,9 +130,9 @@ DocumentStore::DocumentStore(const Config & config, IDataStore & store) : IDocumentStore(), _config(config), _backingStore(store), - _store(new docstore::BackingStore(_backingStore, config.getCompression())), - _cache(new Cache(*_store, config.getMaxCacheBytes())), - _visitCache(new VisitCache(store, config.getMaxCacheBytes(), config.getCompression())), + _store(std::make_unique<docstore::BackingStore>(_backingStore, config.getCompression())), + _cache(std::make_unique<docstore::Cache>(*_store, config.getMaxCacheBytes())), + _visitCache(std::make_unique<docstore::VisitCache>(store, config.getMaxCacheBytes(), config.getCompression())), _uncached_lookups(0) { _cache->reserveElements(config.getInitialCacheEntries()); @@ -283,7 +180,7 @@ DocumentStore::read(DocumentIdT lid, const DocumentTypeRepo &repo) const _store->read(lid, value); } if ( ! value.empty() ) { - retval = value.deserializeDocument(repo); + retval = deserializeDocument(value, repo); } return retval; } @@ -380,15 +277,12 @@ class DocumentStore::WrapVisitor : public IDataStoreVisitor public: void visit(uint32_t lid, const void *buffer, size_t sz) override; - WrapVisitor(Visitor &visitor, - const DocumentTypeRepo &repo, - const CompressionConfig &compresion, - IDocumentStore &ds, - uint64_t syncToken); + WrapVisitor(Visitor &visitor, const DocumentTypeRepo &repo, const CompressionConfig &compresion, + IDocumentStore &ds, uint64_t syncToken); - inline void rewrite(uint32_t lid, const document::Document &doc); - inline void rewrite(uint32_t lid); - inline void visitRemove(uint32_t lid); + void rewrite(uint32_t lid, const document::Document &doc); + void rewrite(uint32_t lid); + void visitRemove(uint32_t lid); }; @@ -406,34 +300,26 @@ public: } }; - template <> void DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>:: -rewrite(uint32_t lid, const document::Document &doc) +rewrite(uint32_t , const document::Document &) { - (void) lid; - (void) doc; } template <> void -DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>:: -rewrite(uint32_t lid) +DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>::rewrite(uint32_t ) { - (void) lid; } - template <> void -DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>:: -visitRemove(uint32_t lid) +DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>::visitRemove(uint32_t lid) { _visitor.visit(lid); } - template <> void DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>:: @@ -444,33 +330,21 @@ rewrite(uint32_t lid, const document::Document &doc) template <> void -DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>:: -rewrite(uint32_t lid) +DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>::rewrite(uint32_t lid) { _ds.remove(_syncToken, lid); } - template <> void -DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>:: -visitRemove(uint32_t lid) +DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>::visitRemove(uint32_t ) { - (void) lid; } - - template <class Visitor> void -DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid, - const void *buffer, - size_t sz) +DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid, const void *buffer, size_t sz) { - (void) lid; - (void) buffer; - (void) sz; - Value value; vespalib::DataBuffer buf(4096); buf.clear(); @@ -480,7 +354,7 @@ DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid, value.set(std::move(buf), len); } if (! value.empty()) { - std::shared_ptr<document::Document> doc(value.deserializeDocument(_repo)); + std::shared_ptr<document::Document> doc(deserializeDocument(value, _repo)); _visitor.visit(lid, doc); rewrite(lid, *doc); } else { @@ -489,14 +363,10 @@ DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid, } } - template <class Visitor> DocumentStore::WrapVisitor<Visitor>:: -WrapVisitor(Visitor &visitor, - const DocumentTypeRepo &repo, - const CompressionConfig &compression, - IDocumentStore &ds, - uint64_t syncToken) +WrapVisitor(Visitor &visitor, const DocumentTypeRepo &repo, const CompressionConfig &compression, + IDocumentStore &ds, uint64_t syncToken) : _visitor(visitor), _repo(repo), _compression(compression), @@ -505,7 +375,6 @@ WrapVisitor(Visitor &visitor, { } - void DocumentStore::accept(IDocumentStoreReadVisitor &visitor, IDocumentStoreVisitorProgress &visitorProgress, const DocumentTypeRepo &repo) @@ -516,7 +385,6 @@ DocumentStore::accept(IDocumentStoreReadVisitor &visitor, IDocumentStoreVisitorP _backingStore.accept(wrap, wrapVisitorProgress, false); } - void DocumentStore::accept(IDocumentStoreRewriteVisitor &visitor, IDocumentStoreVisitorProgress &visitorProgress, const DocumentTypeRepo &repo) @@ -527,7 +395,6 @@ DocumentStore::accept(IDocumentStoreRewriteVisitor &visitor, IDocumentStoreVisit _backingStore.accept(wrap, wrapVisitorProgress, true); } - double DocumentStore::getVisitCost() const { @@ -584,5 +451,4 @@ DocumentStore::shrinkLidSpace() _backingStore.shrinkLidSpace(); } -} // namespace search - +} diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.h b/searchlib/src/vespa/searchlib/docstore/documentstore.h index 7bc3ab1c21d..baeed106531 100644 --- a/searchlib/src/vespa/searchlib/docstore/documentstore.h +++ b/searchlib/src/vespa/searchlib/docstore/documentstore.h @@ -5,17 +5,13 @@ #include "idocumentstore.h" #include <vespa/vespalib/util/compressionconfig.h> - -namespace search { - -namespace docstore { +namespace search::docstore { class VisitCache; class BackingStore; class Cache; } -using docstore::VisitCache; -using docstore::BackingStore; -using docstore::Cache; + +namespace search { /** * Simple document store that contains serialized Document instances. @@ -67,7 +63,7 @@ public: * @param baseDir The path to a directory where "simpledocstore.dat" will exist. **/ DocumentStore(const Config & config, IDataStore & store); - ~DocumentStore(); + ~DocumentStore() override; DocumentUP read(DocumentIdT lid, const document::DocumentTypeRepo &repo) const override; void visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const override; @@ -111,12 +107,12 @@ private: template <class> class WrapVisitor; class WrapVisitorProgress; - Config _config; - IDataStore & _backingStore; - std::unique_ptr<BackingStore> _store; - std::shared_ptr<Cache> _cache; - std::shared_ptr<VisitCache> _visitCache; - mutable std::atomic<uint64_t> _uncached_lookups; + Config _config; + IDataStore & _backingStore; + std::unique_ptr<docstore::BackingStore> _store; + std::unique_ptr<docstore::Cache> _cache; + std::unique_ptr<docstore::VisitCache> _visitCache; + mutable std::atomic<uint64_t> _uncached_lookups; }; } // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/value.cpp b/searchlib/src/vespa/searchlib/docstore/value.cpp new file mode 100644 index 00000000000..94d0cb6fe55 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/value.cpp @@ -0,0 +1,73 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "value.h" +#include <vespa/vespalib/data/databuffer.h> +#include <vespa/vespalib/util/compressor.h> + +using vespalib::compression::compress; +using vespalib::compression::decompress; + +namespace search::docstore { + +Value::Value() + : _syncToken(0), + _compressedSize(0), + _uncompressedSize(0), + _compression(CompressionConfig::NONE) +{} + +Value::Value(uint64_t syncToken) + : _syncToken(syncToken), + _compressedSize(0), + _uncompressedSize(0), + _compression(CompressionConfig::NONE) +{} + +Value::Value(const Value &rhs) + : _syncToken(rhs._syncToken), + _compressedSize(rhs._compressedSize), + _uncompressedSize(rhs._uncompressedSize), + _compression(rhs._compression), + _buf(Alloc::alloc(rhs.size())) +{ + memcpy(get(), rhs.get(), size()); +} + +void +Value::setCompression(CompressionConfig::Type comp, size_t uncompressedSize) { + _compression = comp; + _uncompressedSize = uncompressedSize; +} + +void +Value::set(vespalib::DataBuffer &&buf, ssize_t len) { + set(std::move(buf), len, CompressionConfig()); +} + +void +Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression) { + //Underlying buffer must be identical to allow swap. + vespalib::DataBuffer compressed(buf.getData(), 0u); + CompressionConfig::Type type = compress(compression, vespalib::ConstBufferRef(buf.getData(), len), compressed, true); + _compressedSize = compressed.getDataLen(); + if (buf.getData() == compressed.getData()) { + // Uncompressed so we can just steal the underlying buffer. + buf.stealBuffer().swap(_buf); + } else { + compressed.stealBuffer().swap(_buf); + } + assert(((type == CompressionConfig::NONE) && + (len == ssize_t(_compressedSize))) || + ((type != CompressionConfig::NONE) && + (len > ssize_t(_compressedSize)))); + setCompression(type, len); +} + +vespalib::DataBuffer +Value::decompressed() const { + vespalib::DataBuffer uncompressed(_buf.get(), (size_t) 0); + decompress(getCompression(), getUncompressedSize(), vespalib::ConstBufferRef(*this, size()), uncompressed, true); + return uncompressed; +} + +} diff --git a/searchlib/src/vespa/searchlib/docstore/value.h b/searchlib/src/vespa/searchlib/docstore/value.h new file mode 100644 index 00000000000..a3d6af984bd --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/value.h @@ -0,0 +1,51 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/util/compressionconfig.h> +#include <vespa/vespalib/data/databuffer.h> + +namespace search::docstore { + +class Value { +public: + using Alloc = vespalib::alloc::Alloc; + using CompressionConfig = vespalib::compression::CompressionConfig; + + Value(); + Value(uint64_t syncToken); + + Value(Value &&rhs) = default; + Value &operator=(Value &&rhs) = default; + + Value(const Value &rhs); + + void setCompression(CompressionConfig::Type comp, size_t uncompressedSize); + uint64_t getSyncToken() const { return _syncToken; } + CompressionConfig::Type getCompression() const { return _compression; } + size_t getUncompressedSize() const { return _uncompressedSize; } + + /** + * Compress buffer into temporary buffer and copy temporary buffer to + * value along with compression config. + */ + void set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression); + // Keep buffer uncompressed + void set(vespalib::DataBuffer &&buf, ssize_t len); + + vespalib::DataBuffer decompressed() const; + + size_t size() const { return _compressedSize; } + bool empty() const { return size() == 0; } + operator const void *() const { return _buf.get(); } + const void *get() const { return _buf.get(); } + void *get() { return _buf.get(); } +private: + uint64_t _syncToken; + size_t _compressedSize; + size_t _uncompressedSize; + CompressionConfig::Type _compression; + Alloc _buf; +}; + +} |