summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@oath.com>2018-09-21 12:08:28 +0200
committerHenning Baldersheim <balder@oath.com>2018-09-21 12:09:03 +0200
commitc1c5842611dbb3643f374946a8d6107246c13f5b (patch)
tree7d7ed795483ac6c4efb2d5e3ee4efd92493ae8f9 /searchlib
parent0fbc4d579dd3a05a42dc224f7e883d56051052f4 (diff)
Refactor for testability.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/docstore/document_store/document_store_test.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/docstore/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/docstore/documentstore.cpp194
-rw-r--r--searchlib/src/vespa/searchlib/docstore/documentstore.h24
-rw-r--r--searchlib/src/vespa/searchlib/docstore/value.cpp73
-rw-r--r--searchlib/src/vespa/searchlib/docstore/value.h51
6 files changed, 201 insertions, 178 deletions
diff --git a/searchlib/src/tests/docstore/document_store/document_store_test.cpp b/searchlib/src/tests/docstore/document_store/document_store_test.cpp
index 0ef04d0e722..fc2d2ab0536 100644
--- a/searchlib/src/tests/docstore/document_store/document_store_test.cpp
+++ b/searchlib/src/tests/docstore/document_store/document_store_test.cpp
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/searchlib/docstore/logdocumentstore.h>
+#include <vespa/searchlib/docstore/value.h>
#include <vespa/searchlib/docstore/cachestats.h>
#include <vespa/document/repo/documenttyperepo.h>
#include <vespa/document/fieldvalue/document.h>
@@ -81,4 +82,39 @@ TEST("require that LogDocumentStore::Config equality operator detects inequality
EXPECT_FALSE(C(DC(), LC().setMaxBucketSpread(7)) == C());
}
+using search::docstore::Value;
+vespalib::stringref S1("this is a string long enough to be compressed and is just used for sanity checking of compression"
+ "Adding some repeatble sequences like aaaaaaaaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbbbbbbb to ensure compression");
+
+Value createValue(vespalib::stringref s, const CompressionConfig & cfg) {
+ Value v(7);
+ vespalib::DataBuffer input;
+ input.writeBytes(s.data(), s.size());
+ v.set(std::move(input), s.size(), cfg);
+ return v;
+}
+void verifyValue(vespalib::stringref s, const Value & v) {
+ vespalib::DataBuffer buf = v.decompressed();
+ EXPECT_EQUAL(s.size(), v.getUncompressedSize());
+ EXPECT_EQUAL(7u, v.getSyncToken());
+ EXPECT_EQUAL(0, memcmp(s.data(), buf.getData(), buf.getDataLen()));
+}
+TEST("require that Value can store uncompressed data") {
+ Value v = createValue(S1, CompressionConfig::NONE);
+ verifyValue(S1, v);
+}
+
+TEST("require that Value can be moved") {
+ Value v = createValue(S1, CompressionConfig::NONE);
+ Value m = std::move(v);
+ verifyValue(S1, m);
+}
+
+TEST("require that Value can be copied") {
+ Value v = createValue(S1, CompressionConfig::NONE);
+ Value copy(v);
+ verifyValue(S1, v);
+ verifyValue(S1, copy);
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt
index 735b836cf17..2b82d9e5af7 100644
--- a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt
@@ -18,6 +18,7 @@ vespa_add_library(searchlib_docstore OBJECT
randreaders.cpp
storebybucket.cpp
summaryexceptions.cpp
+ value.cpp
visitcache.cpp
writeablefilechunk.cpp
DEPENDS
diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
index 95b6c3b1584..03fb89de1c3 100644
--- a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
@@ -4,6 +4,7 @@
#include "documentstore.h"
#include "visitcache.h"
#include "ibucketizer.h"
+#include "value.h"
#include <vespa/document/fieldvalue/document.h>
#include <vespa/vespalib/stllike/cache.hpp>
#include <vespa/vespalib/data/databuffer.h>
@@ -11,8 +12,6 @@
using document::DocumentTypeRepo;
using vespalib::compression::CompressionConfig;
-using vespalib::compression::compress;
-using vespalib::compression::decompress;
namespace search {
@@ -39,82 +38,19 @@ DocumentVisitorAdapter::visit(uint32_t lid, vespalib::ConstBufferRef buf) {
}
}
+document::Document::UP
+deserializeDocument(const docstore::Value & value, const DocumentTypeRepo &repo) {
+ vespalib::DataBuffer uncompressed(value.decompressed());
+ vespalib::nbostream is(uncompressed.getData(), uncompressed.getDataLen());
+ return std::make_unique<document::Document>(repo, is);
+}
+
}
using vespalib::nbostream;
namespace docstore {
-class Value {
-public:
- using Alloc = vespalib::alloc::Alloc;
- typedef std::unique_ptr<Value> UP;
-
- Value()
- : _syncToken(0),
- _compressedSize(0),
- _uncompressedSize(0),
- _compression(CompressionConfig::NONE)
- {}
-
- Value(uint64_t syncToken)
- : _syncToken(syncToken),
- _compressedSize(0),
- _uncompressedSize(0),
- _compression(CompressionConfig::NONE)
- {}
-
- Value(Value &&rhs) = default;
- Value &operator=(Value &&rhs) = default;
-
- Value(const Value &rhs)
- : _syncToken(rhs._syncToken),
- _compressedSize(rhs._compressedSize),
- _uncompressedSize(rhs._uncompressedSize),
- _compression(rhs._compression),
- _buf(Alloc::alloc(rhs.size()))
- {
- memcpy(get(), rhs.get(), size());
- }
-
- void setCompression(CompressionConfig::Type comp, size_t uncompressedSize) {
- _compression = comp;
- _uncompressedSize = uncompressedSize;
- }
- uint64_t getSyncToken() const { return _syncToken; }
-
- CompressionConfig::Type getCompression() const { return _compression; }
-
- size_t getUncompressedSize() const { return _uncompressedSize; }
-
- /**
- * Compress buffer into temporary buffer and copy temporary buffer to
- * value along with compression config.
- */
- void set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression);
- // Keep buffer uncompressed
- void set(vespalib::DataBuffer &&buf, ssize_t len);
-
- /**
- * Decompress value into temporary buffer and deserialize document from
- * the temporary buffer.
- */
- document::Document::UP deserializeDocument(const DocumentTypeRepo &repo) const;
- vespalib::DataBuffer decompressed() const;
-
- size_t size() const { return _compressedSize; }
- bool empty() const { return size() == 0; }
- operator const void *() const { return _buf.get(); }
- const void *get() const { return _buf.get(); }
- void *get() { return _buf.get(); }
-private:
- uint64_t _syncToken;
- size_t _compressedSize;
- size_t _uncompressedSize;
- CompressionConfig::Type _compression;
- Alloc _buf;
-};
-
class BackingStore {
public:
BackingStore(IDataStore &store, const CompressionConfig &compression) :
@@ -133,45 +69,6 @@ private:
CompressionConfig _compression;
};
-
-void
-Value::set(vespalib::DataBuffer &&buf, ssize_t len) {
- set(std::move(buf), len, CompressionConfig());
-}
-
-void
-Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression) {
- //Underlying buffer must be identical to allow swap.
- vespalib::DataBuffer compressed(buf.getData(), 0u);
- CompressionConfig::Type type = compress(compression, vespalib::ConstBufferRef(buf.getData(), len), compressed, true);
- _compressedSize = compressed.getDataLen();
- if (buf.getData() == compressed.getData()) {
- // Uncompressed so we can just steal the underlying buffer.
- buf.stealBuffer().swap(_buf);
- } else {
- compressed.stealBuffer().swap(_buf);
- }
- assert(((type == CompressionConfig::NONE) &&
- (len == ssize_t(_compressedSize))) ||
- ((type != CompressionConfig::NONE) &&
- (len > ssize_t(_compressedSize))));
- setCompression(type, len);
-}
-
-vespalib::DataBuffer
-Value::decompressed() const {
- vespalib::DataBuffer uncompressed(_buf.get(), (size_t) 0);
- decompress(getCompression(), getUncompressedSize(), vespalib::ConstBufferRef(*this, size()), uncompressed, true);
- return uncompressed;
-}
-
-document::Document::UP
-Value::deserializeDocument(const DocumentTypeRepo &repo) const {
- vespalib::DataBuffer uncompressed(decompressed());
- vespalib::nbostream is(uncompressed.getData(), uncompressed.getDataLen());
- return std::make_unique<document::Document>(repo, is);
-}
-
void
BackingStore::visit(const IDocumentStore::LidVector &lids, const DocumentTypeRepo &repo,
IDocumentVisitor &visitor) const {
@@ -203,8 +100,6 @@ BackingStore::reconfigure(const CompressionConfig &compression) {
_compression = compression;
}
-}
-
using CacheParams = vespalib::CacheParam<
vespalib::LruParam<DocumentIdT, docstore::Value>,
docstore::BackingStore,
@@ -216,12 +111,14 @@ public:
Cache(BackingStore & b, size_t maxBytes) : vespalib::cache<CacheParams>(b, maxBytes) { }
};
+}
+
using VisitCache = docstore::VisitCache;
using docstore::Value;
bool
DocumentStore::Config::operator == (const Config &rhs) const {
- return (_maxCacheBytes == rhs._maxCacheBytes) &&
+ return (_maxCacheBytes == rhs._maxCacheBytes) &&
(_allowVisitCaching == rhs._allowVisitCaching) &&
(_initialCacheEntries == rhs._initialCacheEntries) &&
(_updateStrategy == rhs._updateStrategy) &&
@@ -233,9 +130,9 @@ DocumentStore::DocumentStore(const Config & config, IDataStore & store)
: IDocumentStore(),
_config(config),
_backingStore(store),
- _store(new docstore::BackingStore(_backingStore, config.getCompression())),
- _cache(new Cache(*_store, config.getMaxCacheBytes())),
- _visitCache(new VisitCache(store, config.getMaxCacheBytes(), config.getCompression())),
+ _store(std::make_unique<docstore::BackingStore>(_backingStore, config.getCompression())),
+ _cache(std::make_unique<docstore::Cache>(*_store, config.getMaxCacheBytes())),
+ _visitCache(std::make_unique<docstore::VisitCache>(store, config.getMaxCacheBytes(), config.getCompression())),
_uncached_lookups(0)
{
_cache->reserveElements(config.getInitialCacheEntries());
@@ -283,7 +180,7 @@ DocumentStore::read(DocumentIdT lid, const DocumentTypeRepo &repo) const
_store->read(lid, value);
}
if ( ! value.empty() ) {
- retval = value.deserializeDocument(repo);
+ retval = deserializeDocument(value, repo);
}
return retval;
}
@@ -380,15 +277,12 @@ class DocumentStore::WrapVisitor : public IDataStoreVisitor
public:
void visit(uint32_t lid, const void *buffer, size_t sz) override;
- WrapVisitor(Visitor &visitor,
- const DocumentTypeRepo &repo,
- const CompressionConfig &compresion,
- IDocumentStore &ds,
- uint64_t syncToken);
+ WrapVisitor(Visitor &visitor, const DocumentTypeRepo &repo, const CompressionConfig &compresion,
+ IDocumentStore &ds, uint64_t syncToken);
- inline void rewrite(uint32_t lid, const document::Document &doc);
- inline void rewrite(uint32_t lid);
- inline void visitRemove(uint32_t lid);
+ void rewrite(uint32_t lid, const document::Document &doc);
+ void rewrite(uint32_t lid);
+ void visitRemove(uint32_t lid);
};
@@ -406,34 +300,26 @@ public:
}
};
-
template <>
void
DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>::
-rewrite(uint32_t lid, const document::Document &doc)
+rewrite(uint32_t , const document::Document &)
{
- (void) lid;
- (void) doc;
}
template <>
void
-DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>::
-rewrite(uint32_t lid)
+DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>::rewrite(uint32_t )
{
- (void) lid;
}
-
template <>
void
-DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>::
-visitRemove(uint32_t lid)
+DocumentStore::WrapVisitor<IDocumentStoreReadVisitor>::visitRemove(uint32_t lid)
{
_visitor.visit(lid);
}
-
template <>
void
DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>::
@@ -444,33 +330,21 @@ rewrite(uint32_t lid, const document::Document &doc)
template <>
void
-DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>::
-rewrite(uint32_t lid)
+DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>::rewrite(uint32_t lid)
{
_ds.remove(_syncToken, lid);
}
-
template <>
void
-DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>::
-visitRemove(uint32_t lid)
+DocumentStore::WrapVisitor<IDocumentStoreRewriteVisitor>::visitRemove(uint32_t )
{
- (void) lid;
}
-
-
template <class Visitor>
void
-DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid,
- const void *buffer,
- size_t sz)
+DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid, const void *buffer, size_t sz)
{
- (void) lid;
- (void) buffer;
- (void) sz;
-
Value value;
vespalib::DataBuffer buf(4096);
buf.clear();
@@ -480,7 +354,7 @@ DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid,
value.set(std::move(buf), len);
}
if (! value.empty()) {
- std::shared_ptr<document::Document> doc(value.deserializeDocument(_repo));
+ std::shared_ptr<document::Document> doc(deserializeDocument(value, _repo));
_visitor.visit(lid, doc);
rewrite(lid, *doc);
} else {
@@ -489,14 +363,10 @@ DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid,
}
}
-
template <class Visitor>
DocumentStore::WrapVisitor<Visitor>::
-WrapVisitor(Visitor &visitor,
- const DocumentTypeRepo &repo,
- const CompressionConfig &compression,
- IDocumentStore &ds,
- uint64_t syncToken)
+WrapVisitor(Visitor &visitor, const DocumentTypeRepo &repo, const CompressionConfig &compression,
+ IDocumentStore &ds, uint64_t syncToken)
: _visitor(visitor),
_repo(repo),
_compression(compression),
@@ -505,7 +375,6 @@ WrapVisitor(Visitor &visitor,
{
}
-
void
DocumentStore::accept(IDocumentStoreReadVisitor &visitor, IDocumentStoreVisitorProgress &visitorProgress,
const DocumentTypeRepo &repo)
@@ -516,7 +385,6 @@ DocumentStore::accept(IDocumentStoreReadVisitor &visitor, IDocumentStoreVisitorP
_backingStore.accept(wrap, wrapVisitorProgress, false);
}
-
void
DocumentStore::accept(IDocumentStoreRewriteVisitor &visitor, IDocumentStoreVisitorProgress &visitorProgress,
const DocumentTypeRepo &repo)
@@ -527,7 +395,6 @@ DocumentStore::accept(IDocumentStoreRewriteVisitor &visitor, IDocumentStoreVisit
_backingStore.accept(wrap, wrapVisitorProgress, true);
}
-
double
DocumentStore::getVisitCost() const
{
@@ -584,5 +451,4 @@ DocumentStore::shrinkLidSpace()
_backingStore.shrinkLidSpace();
}
-} // namespace search
-
+}
diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.h b/searchlib/src/vespa/searchlib/docstore/documentstore.h
index 7bc3ab1c21d..baeed106531 100644
--- a/searchlib/src/vespa/searchlib/docstore/documentstore.h
+++ b/searchlib/src/vespa/searchlib/docstore/documentstore.h
@@ -5,17 +5,13 @@
#include "idocumentstore.h"
#include <vespa/vespalib/util/compressionconfig.h>
-
-namespace search {
-
-namespace docstore {
+namespace search::docstore {
class VisitCache;
class BackingStore;
class Cache;
}
-using docstore::VisitCache;
-using docstore::BackingStore;
-using docstore::Cache;
+
+namespace search {
/**
* Simple document store that contains serialized Document instances.
@@ -67,7 +63,7 @@ public:
* @param baseDir The path to a directory where "simpledocstore.dat" will exist.
**/
DocumentStore(const Config & config, IDataStore & store);
- ~DocumentStore();
+ ~DocumentStore() override;
DocumentUP read(DocumentIdT lid, const document::DocumentTypeRepo &repo) const override;
void visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const override;
@@ -111,12 +107,12 @@ private:
template <class> class WrapVisitor;
class WrapVisitorProgress;
- Config _config;
- IDataStore & _backingStore;
- std::unique_ptr<BackingStore> _store;
- std::shared_ptr<Cache> _cache;
- std::shared_ptr<VisitCache> _visitCache;
- mutable std::atomic<uint64_t> _uncached_lookups;
+ Config _config;
+ IDataStore & _backingStore;
+ std::unique_ptr<docstore::BackingStore> _store;
+ std::unique_ptr<docstore::Cache> _cache;
+ std::unique_ptr<docstore::VisitCache> _visitCache;
+ mutable std::atomic<uint64_t> _uncached_lookups;
};
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/docstore/value.cpp b/searchlib/src/vespa/searchlib/docstore/value.cpp
new file mode 100644
index 00000000000..94d0cb6fe55
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/value.cpp
@@ -0,0 +1,73 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "value.h"
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/util/compressor.h>
+
+using vespalib::compression::compress;
+using vespalib::compression::decompress;
+
+namespace search::docstore {
+
+Value::Value()
+ : _syncToken(0),
+ _compressedSize(0),
+ _uncompressedSize(0),
+ _compression(CompressionConfig::NONE)
+{}
+
+Value::Value(uint64_t syncToken)
+ : _syncToken(syncToken),
+ _compressedSize(0),
+ _uncompressedSize(0),
+ _compression(CompressionConfig::NONE)
+{}
+
+Value::Value(const Value &rhs)
+ : _syncToken(rhs._syncToken),
+ _compressedSize(rhs._compressedSize),
+ _uncompressedSize(rhs._uncompressedSize),
+ _compression(rhs._compression),
+ _buf(Alloc::alloc(rhs.size()))
+{
+ memcpy(get(), rhs.get(), size());
+}
+
+void
+Value::setCompression(CompressionConfig::Type comp, size_t uncompressedSize) {
+ _compression = comp;
+ _uncompressedSize = uncompressedSize;
+}
+
+void
+Value::set(vespalib::DataBuffer &&buf, ssize_t len) {
+ set(std::move(buf), len, CompressionConfig());
+}
+
+void
+Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression) {
+ //Underlying buffer must be identical to allow swap.
+ vespalib::DataBuffer compressed(buf.getData(), 0u);
+ CompressionConfig::Type type = compress(compression, vespalib::ConstBufferRef(buf.getData(), len), compressed, true);
+ _compressedSize = compressed.getDataLen();
+ if (buf.getData() == compressed.getData()) {
+ // Uncompressed so we can just steal the underlying buffer.
+ buf.stealBuffer().swap(_buf);
+ } else {
+ compressed.stealBuffer().swap(_buf);
+ }
+ assert(((type == CompressionConfig::NONE) &&
+ (len == ssize_t(_compressedSize))) ||
+ ((type != CompressionConfig::NONE) &&
+ (len > ssize_t(_compressedSize))));
+ setCompression(type, len);
+}
+
+vespalib::DataBuffer
+Value::decompressed() const {
+ vespalib::DataBuffer uncompressed(_buf.get(), (size_t) 0);
+ decompress(getCompression(), getUncompressedSize(), vespalib::ConstBufferRef(*this, size()), uncompressed, true);
+ return uncompressed;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/docstore/value.h b/searchlib/src/vespa/searchlib/docstore/value.h
new file mode 100644
index 00000000000..a3d6af984bd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/docstore/value.h
@@ -0,0 +1,51 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/compressionconfig.h>
+#include <vespa/vespalib/data/databuffer.h>
+
+namespace search::docstore {
+
+class Value {
+public:
+ using Alloc = vespalib::alloc::Alloc;
+ using CompressionConfig = vespalib::compression::CompressionConfig;
+
+ Value();
+ Value(uint64_t syncToken);
+
+ Value(Value &&rhs) = default;
+ Value &operator=(Value &&rhs) = default;
+
+ Value(const Value &rhs);
+
+ void setCompression(CompressionConfig::Type comp, size_t uncompressedSize);
+ uint64_t getSyncToken() const { return _syncToken; }
+ CompressionConfig::Type getCompression() const { return _compression; }
+ size_t getUncompressedSize() const { return _uncompressedSize; }
+
+ /**
+ * Compress buffer into temporary buffer and copy temporary buffer to
+ * value along with compression config.
+ */
+ void set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression);
+ // Keep buffer uncompressed
+ void set(vespalib::DataBuffer &&buf, ssize_t len);
+
+ vespalib::DataBuffer decompressed() const;
+
+ size_t size() const { return _compressedSize; }
+ bool empty() const { return size() == 0; }
+ operator const void *() const { return _buf.get(); }
+ const void *get() const { return _buf.get(); }
+ void *get() { return _buf.get(); }
+private:
+ uint64_t _syncToken;
+ size_t _compressedSize;
+ size_t _uncompressedSize;
+ CompressionConfig::Type _compression;
+ Alloc _buf;
+};
+
+}