aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2017-06-13 13:46:02 +0200
committerHenning Baldersheim <balder@yahoo-inc.com>2017-06-13 13:46:02 +0200
commit024f86caf8031731f24054688a4c783b493eb116 (patch)
tree5d282cdc35bd43d0129a3d2a72d2812ee371c706 /searchlib
parent3b6a3b0f4beafa3ea9baec7e17197042d947767d (diff)
Move to separate compression namespace to avoid pollution.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/common/packets.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/docstore/chunkformat.cpp33
-rw-r--r--searchlib/src/vespa/searchlib/docstore/documentstore.cpp80
-rw-r--r--searchlib/src/vespa/searchlib/docstore/visitcache.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/grouping/sketch.h4
5 files changed, 60 insertions, 69 deletions
diff --git a/searchlib/src/vespa/searchlib/common/packets.cpp b/searchlib/src/vespa/searchlib/common/packets.cpp
index 50a02995a40..d885442ec03 100644
--- a/searchlib/src/vespa/searchlib/common/packets.cpp
+++ b/searchlib/src/vespa/searchlib/common/packets.cpp
@@ -151,7 +151,7 @@ FS4PersistentPacketStreamer::Decode(FNET_DataBuffer *src, uint32_t plen, uint32_
uint32_t uncompressed_size = src->ReadInt32();
ConstBufferRef org(src->GetData(), plen - sizeof(uint32_t));
vespalib::DataBuffer uncompressed(uncompressed_size);
- document::decompress(compressionType, uncompressed_size, org, uncompressed, false);
+ document::compression::decompress(compressionType, uncompressed_size, org, uncompressed, false);
FNET_DataBuffer buf(uncompressed.getData(), uncompressed.getDataLen());
decodePacket(packet, buf, uncompressed_size, pcode);
src->DataToDead(plen - sizeof(uint32_t));
@@ -192,7 +192,7 @@ FS4PersistentPacketStreamer::Encode(FNET_Packet *packet, uint32_t chid, FNET_Dat
CompressionConfig config(_compressionType, _compressionLevel, 90);
ConstBufferRef org(dst->GetData() + packet_start + header_len, body_len);
vespalib::DataBuffer compressed(org.size());
- CompressionConfig::Type r = document::compress(config, org, compressed, false);
+ CompressionConfig::Type r = document::compression::compress(config, org, compressed, false);
if (r != CompressionConfig::NONE) {
dst->DataToFree(body_len + header_len);
// sizeof(data + header + uncompressed_size) - sizeof(uint32_t)
@@ -455,7 +455,7 @@ FS4Packet_PreSerialized::FS4Packet_PreSerialized(FNET_Packet & packet)
90);
ConstBufferRef org(tmp.GetData(), tmp.GetDataLen());
vespalib::DataBuffer compressed(org.size());
- _compressionType = document::compress(config, org, compressed, false);
+ _compressionType = document::compression::compress(config, org, compressed, false);
if (_compressionType != CompressionConfig::NONE) {
_data.WriteInt32Fast(body_len);
_data.WriteBytes(compressed.getData(), compressed.getDataLen());
diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp
index c690e662136..4d4d4c48130 100644
--- a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp
@@ -1,14 +1,17 @@
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "chunkformats.h"
-#include <vespa/document/util/lz4compressor.h>
-#include <vespa/document/util/zstdcompressor.h>
+#include <vespa/document/util/compressor.h>
#include <vespa/vespalib/util/stringfmt.h>
namespace search {
using vespalib::make_string;
using vespalib::Exception;
+using document::compression::compress;
+using document::compression::decompress;
+using document::compression::computeMaxCompressedsize;
+using document::CompressionConfig;
ChunkException::ChunkException(const vespalib::stringref & msg, const vespalib::stringref & location) :
Exception(make_string("Illegal chunk: %s", msg.c_str()), location)
@@ -16,7 +19,7 @@ ChunkException::ChunkException(const vespalib::stringref & msg, const vespalib::
}
void
-ChunkFormat::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const document::CompressionConfig & compression)
+ChunkFormat::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const CompressionConfig & compression)
{
vespalib::nbostream & os = _dataBuf;
os << lastSerial;
@@ -30,7 +33,7 @@ ChunkFormat::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const
const size_t oldPos(compressed.getDataLen());
compressed.writeInt8(compression.type);
compressed.writeInt32(os.size());
- document::CompressionConfig::Type type(document::compress(compression, vespalib::ConstBufferRef(os.c_str(), os.size()), compressed, false));
+ CompressionConfig::Type type(compress(compression, vespalib::ConstBufferRef(os.c_str(), os.size()), compressed, false));
if (compression.type != type) {
compressed.getData()[oldPos] = type;
}
@@ -43,32 +46,22 @@ ChunkFormat::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const
}
size_t
-ChunkFormat::getMaxPackSize(const document::CompressionConfig & compression) const
+ChunkFormat::getMaxPackSize(const CompressionConfig & compression) const
{
const size_t OVERHEAD(0);
const size_t MINSIZE(1 + 1 + 4 + 4 + includeSerializedSize() ? 4 : 0); // version + type + real length + crc + lastserial
const size_t formatSpecificSize(getHeaderSize());
size_t rawSize(MINSIZE + formatSpecificSize + OVERHEAD);
const size_t payloadSize(_dataBuf.size() + 8);
- // This is a little dirty -> need interface.
- if (compression.type == document::CompressionConfig::LZ4) {
- document::LZ4Compressor lz4;
- rawSize += lz4.adjustProcessLen(0, payloadSize);
- } else if (compression.type == document::CompressionConfig::ZSTD) {
- document::ZStdCompressor zstd;
- rawSize += zstd.adjustProcessLen(0, payloadSize);
- } else {
- rawSize += payloadSize;
- }
- return rawSize;
+ return rawSize + computeMaxCompressedsize(compression.type, payloadSize);
}
void
ChunkFormat::verifyCompression(uint8_t type)
{
- if ((type != document::CompressionConfig::LZ4) &&
- (type != document::CompressionConfig::ZSTD) &&
- (type != document::CompressionConfig::NONE)) {
+ if ((type != CompressionConfig::LZ4) &&
+ (type != CompressionConfig::ZSTD) &&
+ (type != CompressionConfig::NONE)) {
throw ChunkException(make_string("Unknown compressiontype %d", type), VESPA_STRLOC);
}
}
@@ -150,7 +143,7 @@ ChunkFormat::deserializeBody(vespalib::nbostream & is)
// This is a dirty trick to fool some odd sanity checking in DataBuffer::swap
vespalib::DataBuffer uncompressed(const_cast<char *>(is.peek()), (size_t)0);
vespalib::ConstBufferRef data(is.peek(), is.size() - sizeof(uint32_t));
- document::decompress(document::CompressionConfig::Type(type), uncompressedLen, data, uncompressed, true);
+ decompress(CompressionConfig::Type(type), uncompressedLen, data, uncompressed, true);
assert(uncompressed.getData() == uncompressed.getDead());
if (uncompressed.getData() != data.c_str()) {
const size_t sz(uncompressed.getDataLen());
diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
index fd7df29cd49..ffb56407198 100644
--- a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
@@ -8,6 +8,11 @@
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/document/util/compressor.h>
+using document::DocumentTypeRepo;
+using document::CompressionConfig;
+using document::compression::compress;
+using document::compression::decompress;
+
namespace search {
namespace {
@@ -15,13 +20,13 @@ namespace {
class DocumentVisitorAdapter : public IBufferVisitor
{
public:
- DocumentVisitorAdapter(const document::DocumentTypeRepo & repo, IDocumentVisitor & visitor) :
+ DocumentVisitorAdapter(const DocumentTypeRepo & repo, IDocumentVisitor & visitor) :
_repo(repo),
_visitor(visitor)
{ }
void visit(uint32_t lid, vespalib::ConstBufferRef buf) override;
private:
- const document::DocumentTypeRepo & _repo;
+ const DocumentTypeRepo & _repo;
IDocumentVisitor & _visitor;
};
@@ -45,7 +50,7 @@ public:
using Alloc = vespalib::alloc::Alloc;
typedef std::unique_ptr<Value> UP;
- Value() : _compressedSize(0), _uncompressedSize(0), _compression(document::CompressionConfig::NONE) {}
+ Value() : _compressedSize(0), _uncompressedSize(0), _compression(CompressionConfig::NONE) {}
Value(Value &&rhs) :
_compressedSize(rhs._compressedSize),
@@ -69,12 +74,12 @@ public:
return *this;
}
- void setCompression(document::CompressionConfig::Type comp, size_t uncompressedSize) {
+ void setCompression(CompressionConfig::Type comp, size_t uncompressedSize) {
_compression = comp;
_uncompressedSize = uncompressedSize;
}
- document::CompressionConfig::Type getCompression() const { return _compression; }
+ CompressionConfig::Type getCompression() const { return _compression; }
size_t getUncompressedSize() const { return _uncompressedSize; }
@@ -82,13 +87,13 @@ public:
* Compress buffer into temporary buffer and copy temporary buffer to
* value along with compression config.
*/
- void set(vespalib::DataBuffer &&buf, ssize_t len, const document::CompressionConfig &compression);
+ void set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression);
/**
* Decompress value into temporary buffer and deserialize document from
* the temporary buffer.
*/
- document::Document::UP deserializeDocument(const document::DocumentTypeRepo &repo);
+ document::Document::UP deserializeDocument(const DocumentTypeRepo &repo);
size_t size() const { return _compressedSize; }
bool empty() const { return size() == 0; }
@@ -98,36 +103,32 @@ public:
private:
size_t _compressedSize;
size_t _uncompressedSize;
- document::CompressionConfig::Type _compression;
+ CompressionConfig::Type _compression;
Alloc _buf;
};
class BackingStore {
public:
- BackingStore(IDataStore &store, const document::CompressionConfig &compression) :
+ BackingStore(IDataStore &store, const CompressionConfig &compression) :
_backingStore(store),
_compression(compression) { }
bool read(DocumentIdT key, Value &value) const;
- void visit(const IDocumentStore::LidVector &lids, const document::DocumentTypeRepo &repo, IDocumentVisitor &visitor) const;
+ void visit(const IDocumentStore::LidVector &lids, const DocumentTypeRepo &repo, IDocumentVisitor &visitor) const;
void write(DocumentIdT, const Value &) {}
void erase(DocumentIdT) {}
- const document::CompressionConfig &getCompression(void) const { return _compression; }
+ const CompressionConfig &getCompression(void) const { return _compression; }
private:
IDataStore &_backingStore;
- const document::CompressionConfig _compression;
+ const CompressionConfig _compression;
};
void
-Value::set(vespalib::DataBuffer &&buf,
- ssize_t len,
- const document::CompressionConfig &compression) {
+Value::set(vespalib::DataBuffer &&buf, ssize_t len, const CompressionConfig &compression) {
//Underlying buffer must be identical to allow swap.
vespalib::DataBuffer compressed(buf.getData(), 0u);
- document::CompressionConfig::Type type =
- document::compress(compression,
- vespalib::ConstBufferRef(buf.getData(), len),
- compressed, true);
+ CompressionConfig::Type type = compress(compression, vespalib::ConstBufferRef(buf.getData(), len),
+ compressed, true);
_compressedSize = compressed.getDataLen();
if (buf.getData() == compressed.getData()) {
// Uncompressed so we can just steal the underlying buffer.
@@ -135,28 +136,25 @@ Value::set(vespalib::DataBuffer &&buf,
} else {
compressed.stealBuffer().swap(_buf);
}
- assert(((type == document::CompressionConfig::NONE) &&
+ assert(((type == CompressionConfig::NONE) &&
(len == ssize_t(_compressedSize))) ||
- ((type != document::CompressionConfig::NONE) &&
+ ((type != CompressionConfig::NONE) &&
(len > ssize_t(_compressedSize))));
setCompression(type, len);
}
document::Document::UP
-Value::deserializeDocument(const document::DocumentTypeRepo &repo) {
+Value::deserializeDocument(const DocumentTypeRepo &repo) {
vespalib::DataBuffer uncompressed((char *) _buf.get(), (size_t) 0);
- document::decompress(getCompression(),
- getUncompressedSize(),
- vespalib::ConstBufferRef(*this, size()),
- uncompressed, true);
+ decompress(getCompression(), getUncompressedSize(), vespalib::ConstBufferRef(*this, size()), uncompressed, true);
vespalib::nbostream is(uncompressed.getData(), uncompressed.getDataLen());
return document::Document::UP(new document::Document(repo, is));
}
void
-BackingStore::visit(const IDocumentStore::LidVector &lids, const document::DocumentTypeRepo &repo,
+BackingStore::visit(const IDocumentStore::LidVector &lids, const DocumentTypeRepo &repo,
IDocumentVisitor &visitor) const {
DocumentVisitorAdapter adapter(repo, visitor);
_backingStore.read(lids, adapter);
@@ -213,7 +211,7 @@ DocumentStore::useCache() const {
}
void
-DocumentStore::visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const
+DocumentStore::visit(const LidVector & lids, const DocumentTypeRepo &repo, IDocumentVisitor & visitor) const
{
if (useCache() && _config.allowVisitCaching() && visitor.allowVisitCaching()) {
docstore::BlobSet blobSet = _visitCache->read(lids).getBlobSet();
@@ -227,7 +225,7 @@ DocumentStore::visit(const LidVector & lids, const document::DocumentTypeRepo &r
}
document::Document::UP
-DocumentStore::read(DocumentIdT lid, const document::DocumentTypeRepo &repo) const
+DocumentStore::read(DocumentIdT lid, const DocumentTypeRepo &repo) const
{
document::Document::UP retval;
Value value;
@@ -305,25 +303,23 @@ DocumentStore::getLastFlushTime() const
template <class Visitor>
class DocumentStore::WrapVisitor : public IDataStoreVisitor
{
- Visitor &_visitor;
- const document::DocumentTypeRepo &_repo;
- const document::CompressionConfig &_compression;
- IDocumentStore &_ds;
- uint64_t _syncToken;
+ Visitor &_visitor;
+ const DocumentTypeRepo &_repo;
+ const CompressionConfig &_compression;
+ IDocumentStore &_ds;
+ uint64_t _syncToken;
public:
void visit(uint32_t lid, const void *buffer, size_t sz) override;
WrapVisitor(Visitor &visitor,
- const document::DocumentTypeRepo &repo,
- const document::CompressionConfig &compresion,
+ const DocumentTypeRepo &repo,
+ const CompressionConfig &compresion,
IDocumentStore &ds,
uint64_t syncToken);
inline void rewrite(uint32_t lid, const document::Document &doc);
-
inline void rewrite(uint32_t lid);
-
inline void visitRemove(uint32_t lid);
};
@@ -429,8 +425,8 @@ DocumentStore::WrapVisitor<Visitor>::visit(uint32_t lid,
template <class Visitor>
DocumentStore::WrapVisitor<Visitor>::
WrapVisitor(Visitor &visitor,
- const document::DocumentTypeRepo &repo,
- const document::CompressionConfig &compression,
+ const DocumentTypeRepo &repo,
+ const CompressionConfig &compression,
IDocumentStore &ds,
uint64_t syncToken)
: _visitor(visitor),
@@ -445,7 +441,7 @@ WrapVisitor(Visitor &visitor,
void
DocumentStore::accept(IDocumentStoreReadVisitor &visitor,
IDocumentStoreVisitorProgress &visitorProgress,
- const document::DocumentTypeRepo &repo)
+ const DocumentTypeRepo &repo)
{
WrapVisitor<IDocumentStoreReadVisitor> wrap(visitor, repo,
_store->getCompression(),
@@ -460,7 +456,7 @@ DocumentStore::accept(IDocumentStoreReadVisitor &visitor,
void
DocumentStore::accept(IDocumentStoreRewriteVisitor &visitor,
IDocumentStoreVisitorProgress &visitorProgress,
- const document::DocumentTypeRepo &repo)
+ const DocumentTypeRepo &repo)
{
WrapVisitor<IDocumentStoreRewriteVisitor> wrap(visitor,
repo,
diff --git a/searchlib/src/vespa/searchlib/docstore/visitcache.cpp b/searchlib/src/vespa/searchlib/docstore/visitcache.cpp
index 8fac288a23a..6771fb3bec6 100644
--- a/searchlib/src/vespa/searchlib/docstore/visitcache.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/visitcache.cpp
@@ -90,7 +90,7 @@ CompressedBlobSet::CompressedBlobSet(const document::CompressionConfig &compress
if ( ! _positions.empty() ) {
DataBuffer compressed;
ConstBufferRef org = uncompressed.getBuffer();
- _compression = document::compress(compression, org, compressed, false);
+ _compression = document::compression::compress(compression, org, compressed, false);
_buffer.resize(compressed.getDataLen());
memcpy(_buffer, compressed.getData(), compressed.getDataLen());
}
@@ -99,10 +99,12 @@ CompressedBlobSet::CompressedBlobSet(const document::CompressionConfig &compress
BlobSet
CompressedBlobSet::getBlobSet() const
{
+ using document::compression::decompress;
// These are frequent lage allocations that are to expensive to mmap.
DataBuffer uncompressed(0, 1, Alloc::alloc(0, 16 * MemoryAllocator::HUGEPAGE_SIZE));
if ( ! _positions.empty() ) {
- document::decompress(_compression, getBufferSize(_positions), ConstBufferRef(_buffer.c_str(), _buffer.size()), uncompressed, false);
+ decompress(_compression, getBufferSize(_positions),
+ ConstBufferRef(_buffer.c_str(), _buffer.size()), uncompressed, false);
}
return BlobSet(_positions, uncompressed.stealBuffer());
}
diff --git a/searchlib/src/vespa/searchlib/grouping/sketch.h b/searchlib/src/vespa/searchlib/grouping/sketch.h
index 50209b19bbd..a036d23ef36 100644
--- a/searchlib/src/vespa/searchlib/grouping/sketch.h
+++ b/searchlib/src/vespa/searchlib/grouping/sketch.h
@@ -209,7 +209,7 @@ compress_buckets_into(char *buffer, uint32_t size) const {
vespalib::ConstBufferRef org(&bucket[0], BUCKET_COUNT);
vespalib::DataBuffer compress_buffer(buffer, size);
document::CompressionConfig::Type r =
- document::compress(config, org, compress_buffer, false);
+ document::compression::compress(config, org, compress_buffer, false);
assert(compress_buffer.getDead() == buffer);
if (r == document::CompressionConfig::LZ4) {
assert(compress_buffer.getDataLen() < BUCKET_COUNT);
@@ -228,7 +228,7 @@ decompress_buckets_from(char *buffer, uint32_t size) {
} else {
vespalib::ConstBufferRef compressed(buffer, size);
vespalib::DataBuffer uncompressed(reinterpret_cast<char *>(&bucket[0]), BUCKET_COUNT);
- document::decompress(document::CompressionConfig::LZ4, BUCKET_COUNT, compressed, uncompressed, false);
+ document::compression::decompress(document::CompressionConfig::LZ4, BUCKET_COUNT, compressed, uncompressed, false);
}
}
template <int BucketBits, typename HashT>