summaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-11-10 11:09:04 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2021-11-10 11:37:54 +0000
commit518a7e7e470f1c0f792dd9e9fd36b23359b31048 (patch)
treec253ccfc4cab67d389bf6c64fbb27301c8d6e10d /document
parent8efde630ff73357e0f31a34f131a120d327bf224 (diff)
remove support for compression of document content in backend.
Diffstat (limited to 'document')
-rw-r--r--document/src/tests/documenttestcase.cpp107
-rw-r--r--document/src/tests/repo/documenttyperepo_test.cpp24
-rw-r--r--document/src/tests/serialization/vespadocumentserializer_test.cpp15
-rw-r--r--document/src/vespa/document/datatype/structdatatype.cpp14
-rw-r--r--document/src/vespa/document/datatype/structdatatype.h6
-rw-r--r--document/src/vespa/document/fieldvalue/serializablearray.cpp48
-rw-r--r--document/src/vespa/document/fieldvalue/serializablearray.h7
-rw-r--r--document/src/vespa/document/fieldvalue/structfieldvalue.cpp15
-rw-r--r--document/src/vespa/document/fieldvalue/structfieldvalue.h10
-rw-r--r--document/src/vespa/document/repo/documenttyperepo.cpp9
-rw-r--r--document/src/vespa/document/serialization/vespadocumentdeserializer.cpp56
-rw-r--r--document/src/vespa/document/serialization/vespadocumentserializer.cpp59
12 files changed, 66 insertions, 304 deletions
diff --git a/document/src/tests/documenttestcase.cpp b/document/src/tests/documenttestcase.cpp
index 9fa72b7c1e7..aae4b5c6f40 100644
--- a/document/src/tests/documenttestcase.cpp
+++ b/document/src/tests/documenttestcase.cpp
@@ -19,7 +19,6 @@
#include <gmock/gmock.h>
using vespalib::nbostream;
-using vespalib::compression::CompressionConfig;
using namespace ::testing;
using namespace document::config_builder;
@@ -699,7 +698,6 @@ TEST(DocumentTest,testReadSerializedAllVersions)
doc.setValue("rawfield", RawFieldValue("RAW DATA", 8));
Document docInDoc(*docInDocType, DocumentId("id:ns:docindoc::http://doc.in.doc/"));
docInDoc.set("stringindocfield", "Elvis is dead");
- //docInDoc.setCompression(CompressionConfig(CompressionConfig::NONE, 0, 0));
doc.setValue("docfield", docInDoc);
ArrayFieldValue floatArray(*arrayOfFloatDataType);
floatArray.add(1.0);
@@ -713,7 +711,6 @@ TEST(DocumentTest,testReadSerializedAllVersions)
// Write document to disk, (when you bump version and alter stuff,
// you can copy this current to new test for new version)
{
- //doc.setCompression(CompressionConfig(CompressionConfig::NONE, 0, 0));
nbostream buf = doc.serialize();
int fd = open(TEST_PATH("data/document-cpp-currentversion-uncompressed.dat").c_str(),
O_WRONLY | O_CREAT | O_TRUNC, 0644);
@@ -722,20 +719,6 @@ TEST(DocumentTest,testReadSerializedAllVersions)
EXPECT_EQ(buf.size(), len);
close(fd);
}
- {
- CompressionConfig oldCfg(doc.getType().getFieldsType().getCompressionConfig());
- CompressionConfig newCfg(CompressionConfig::LZ4, 9, 95);
- const_cast<StructDataType &>(doc.getType().getFieldsType()).setCompressionConfig(newCfg);
- nbostream buf = doc.serialize();
- EXPECT_TRUE(buf.size() <= buf.capacity());
- int fd = open(TEST_PATH("data/document-cpp-currentversion-lz4-9.dat").c_str(),
- O_WRONLY | O_CREAT | O_TRUNC, 0644);
- EXPECT_TRUE(fd > 0);
- size_t len = write(fd, buf.peek(), buf.size());
- EXPECT_EQ(buf.size(), len);
- close(fd);
- const_cast<StructDataType &>(doc.getType().getFieldsType()).setCompressionConfig(oldCfg);
- }
}
std::string jpath = TEST_PATH("../test/serializeddocuments/");
@@ -877,18 +860,6 @@ TEST(DocumentTest, testGenerateSerializedFile)
throw vespalib::Exception("write failed");
}
close(fd);
-
- CompressionConfig newCfg(CompressionConfig::LZ4, 9, 95);
- const_cast<StructDataType &>(doc.getType().getFieldsType()).setCompressionConfig(newCfg);
-
- nbostream lz4buf = doc.serialize();
-
- fd = open((serializedDir + "/serializecpp-lz4-level9.dat").c_str(),
- O_WRONLY | O_TRUNC | O_CREAT, 0644);
- if (write(fd, lz4buf.data(), lz4buf.size()) != (ssize_t)lz4buf.size()) {
- throw vespalib::Exception("write failed");
- }
- close(fd);
}
TEST(DocumentTest, testBogusserialize)
@@ -1013,84 +984,6 @@ TEST(DocumentTest, testSliceSerialize)
EXPECT_EQ(*doc2, doc4);
}
-TEST(DocumentTest, testCompression)
-{
- TestDocMan testDocMan;
- Document::UP doc = testDocMan.createDocument();
-
- std::string bigString("compress me");
- for (int i = 0; i < 8; ++i) { bigString += bigString; }
-
- doc->setValue("hstringval", StringFieldValue(bigString));
-
- nbostream buf_uncompressed = doc->serialize();
-
- CompressionConfig oldCfg(doc->getType().getFieldsType().getCompressionConfig());
-
- CompressionConfig newCfg(CompressionConfig::LZ4, 9, 95);
- const_cast<StructDataType &>(doc->getType().getFieldsType()).setCompressionConfig(newCfg);
- nbostream buf_lz4 = doc->serialize();
-
- const_cast<StructDataType &>(doc->getType().getFieldsType()).setCompressionConfig(oldCfg);
-
- EXPECT_TRUE(buf_lz4.size() < buf_uncompressed.size());
-
- Document doc_lz4(testDocMan.getTypeRepo(), buf_lz4);
-
- EXPECT_EQ(*doc, doc_lz4);
-}
-
-TEST(DocumentTest, testCompressionConfigured)
-{
- DocumenttypesConfigBuilderHelper builder;
- builder.document(43, "serializetest",
- Struct("serializetest.header").setId(44),
- Struct("serializetest.body").setId(45)
- .addField("stringfield", DataType::T_STRING));
- DocumentTypeRepo repo(builder.config());
- Document doc_uncompressed(*repo.getDocumentType("serializetest"),
- DocumentId("id:ns:serializetest::1"));
-
- std::string bigString("compress me");
- for (int i = 0; i < 8; ++i) { bigString += bigString; }
-
- doc_uncompressed.setValue("stringfield", StringFieldValue(bigString));
- nbostream buf_uncompressed;
- doc_uncompressed.serialize(buf_uncompressed);
-
- size_t uncompressedSize = buf_uncompressed.size();
-
- DocumenttypesConfigBuilderHelper builder2;
- builder2.document(43, "serializetest",
- Struct("serializetest.header").setId(44),
- Struct("serializetest.body").setId(45)
- .addField("stringfield", DataType::T_STRING)
- .setCompression(DocumenttypesConfig::Documenttype::
- Datatype::Sstruct::Compression::Type::LZ4,
- 9, 99, 0));
- DocumentTypeRepo repo2(builder2.config());
-
- Document doc(repo2, buf_uncompressed);
-
- nbostream buf_compressed;
- doc.serialize(buf_compressed);
- size_t compressedSize = buf_compressed.size();
-
- EXPECT_TRUE(compressedSize < uncompressedSize);
-
- Document doc2(repo2, buf_compressed);
-
- nbostream buf_compressed2;
- doc2.serialize(buf_compressed2);
-
- EXPECT_EQ(compressedSize, buf_compressed2.size());
-
- Document doc3(repo2, buf_compressed2);
-
- EXPECT_EQ(doc2, doc_uncompressed);
- EXPECT_EQ(doc2, doc3);
-}
-
TEST(DocumentTest, testUnknownEntries)
{
// We should be able to deserialize a document with unknown values in it.
diff --git a/document/src/tests/repo/documenttyperepo_test.cpp b/document/src/tests/repo/documenttyperepo_test.cpp
index d1456fb40c2..6619d53e609 100644
--- a/document/src/tests/repo/documenttyperepo_test.cpp
+++ b/document/src/tests/repo/documenttyperepo_test.cpp
@@ -17,7 +17,6 @@
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/util/exceptions.h>
#include <set>
-#include <vespa/config/helper/configgetter.h>
using config::AsciiConfigWriter;
@@ -26,7 +25,6 @@ using std::vector;
using vespalib::Identifiable;
using vespalib::IllegalArgumentException;
using vespalib::string;
-using vespalib::compression::CompressionConfig;
using namespace document::config_builder;
using namespace document;
@@ -42,9 +40,6 @@ const int32_t body_id = 31;
const string type_name_2 = "test_2";
const string header_name_2 = type_name_2 + ".header";
const string body_name_2 = type_name_2 + ".body";
-const int32_t comp_level = 10;
-const int32_t comp_minres = 80;
-const size_t comp_minsize = 120;
const string field_name = "field_name";
const string derived_name = "derived";
@@ -80,25 +75,6 @@ TEST("requireThatDocumentTypeCanBeLookedUpWhenIdIsNotAHash") {
ASSERT_TRUE(type);
}
-TEST("requireThatStructsCanConfigureCompression") {
- DocumenttypesConfigBuilderHelper builder;
- typedef DocumenttypesConfig::Documenttype::Datatype::Sstruct Sstruct;
- builder.document(doc_type_id, type_name,
- Struct(header_name),
- Struct(body_name).setCompression(
- Sstruct::Compression::Type::LZ4,
- comp_level, comp_minres, comp_minsize));
- DocumentTypeRepo repo(builder.config());
-
- const CompressionConfig &comp_config =
- repo.getDocumentType(type_name)->getFieldsType()
- .getCompressionConfig();
- EXPECT_EQUAL(CompressionConfig::LZ4, comp_config.type);
- EXPECT_EQUAL(comp_level, comp_config.compressionLevel);
- EXPECT_EQUAL(comp_minres, comp_config.threshold);
- EXPECT_EQUAL(comp_minsize, comp_config.minSize);
-}
-
TEST("requireThatStructsCanHaveFields") {
DocumenttypesConfigBuilderHelper builder;
builder.document(doc_type_id, type_name,
diff --git a/document/src/tests/serialization/vespadocumentserializer_test.cpp b/document/src/tests/serialization/vespadocumentserializer_test.cpp
index fc5dd2c110a..443c7d1885a 100644
--- a/document/src/tests/serialization/vespadocumentserializer_test.cpp
+++ b/document/src/tests/serialization/vespadocumentserializer_test.cpp
@@ -44,6 +44,7 @@
#include <vespa/vespalib/objects/nbostream.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/document/base/exceptions.h>
+#include <vespa/vespalib/util/compressionconfig.h>
using document::DocumenttypesConfig;
using vespalib::File;
@@ -488,21 +489,11 @@ TEST("requireThatUncompressedStructFieldValueCanBeSerialized") {
checkStructSerialization(value, CompressionConfig::NONE);
}
-TEST("requireThatCompressedStructFieldValueCanBeSerialized") {
- StructDataType structType(getStructDataType());
- StructFieldValue value = getStructFieldValue(structType);
- const_cast<StructDataType *>(static_cast<const StructDataType *>(value.getDataType()))
- ->setCompressionConfig(CompressionConfig(CompressionConfig::LZ4, 0, 95));
- checkStructSerialization(value, CompressionConfig::LZ4);
-}
-
TEST("requireThatReserializationIsUnompressedIfUnmodified") {
StructDataType structType(getStructDataType());
StructFieldValue value = getStructFieldValue(structType);
- const_cast<StructDataType *>(static_cast<const StructDataType *>(value.getDataType()))
- ->setCompressionConfig(CompressionConfig(CompressionConfig::LZ4, 0, 95));
- TEST_DO(checkStructSerialization(value, CompressionConfig::LZ4));
+ TEST_DO(checkStructSerialization(value, CompressionConfig::NONE));
nbostream os;
VespaDocumentSerializer serializer(os);
@@ -512,7 +503,7 @@ TEST("requireThatReserializationIsUnompressedIfUnmodified") {
StructFieldValue value2(struct_type);
VespaDocumentDeserializer deserializer(repo, os, serialization_version);
deserializer.read(value2);
- TEST_DO(checkStructSerialization(value, CompressionConfig::LZ4));
+ TEST_DO(checkStructSerialization(value, CompressionConfig::NONE));
// Lazy serialization of structs....
TEST_DO(checkStructSerialization(value2, CompressionConfig::NONE));
EXPECT_EQUAL(value, value2);
diff --git a/document/src/vespa/document/datatype/structdatatype.cpp b/document/src/vespa/document/datatype/structdatatype.cpp
index bb927e6e872..e4b39099d08 100644
--- a/document/src/vespa/document/datatype/structdatatype.cpp
+++ b/document/src/vespa/document/datatype/structdatatype.cpp
@@ -22,22 +22,19 @@ IMPLEMENT_IDENTIFIABLE(StructDataType, StructuredDataType);
StructDataType::StructDataType() :
StructuredDataType(),
_nameFieldMap(),
- _idFieldMap(),
- _compressionConfig()
+ _idFieldMap()
{ }
StructDataType::StructDataType(vespalib::stringref name)
: StructuredDataType(name),
_nameFieldMap(),
- _idFieldMap(),
- _compressionConfig()
+ _idFieldMap()
{ }
StructDataType::StructDataType(vespalib::stringref name, int32_t dataTypeId)
: StructuredDataType(name, dataTypeId),
_nameFieldMap(),
- _idFieldMap(),
- _compressionConfig()
+ _idFieldMap()
{ }
StructDataType::~StructDataType() = default;
@@ -54,11 +51,6 @@ StructDataType::print(std::ostream& out, bool verbose,
out << "StructDataType(" << getName();
if (verbose) {
out << ", id " << getId();
- if (_compressionConfig.type != CompressionConfig::NONE) {
- out << ", Compression(" << _compressionConfig.type << ","
- << int(_compressionConfig.compressionLevel) << ","
- << int(_compressionConfig.threshold) << ")";
- }
}
out << ")";
if (verbose) {
diff --git a/document/src/vespa/document/datatype/structdatatype.h b/document/src/vespa/document/datatype/structdatatype.h
index 624ce3011ff..ace9edfb0ab 100644
--- a/document/src/vespa/document/datatype/structdatatype.h
+++ b/document/src/vespa/document/datatype/structdatatype.h
@@ -11,7 +11,6 @@
#include <vespa/document/datatype/structureddatatype.h>
#include <vespa/vespalib/stllike/hash_map.h>
-#include <vespa/vespalib/util/compressionconfig.h>
#include <memory>
namespace document {
@@ -20,7 +19,6 @@ class StructDataType final : public StructuredDataType {
public:
using UP = std::unique_ptr<StructDataType>;
using SP = std::shared_ptr<StructDataType>;
- using CompressionConfig = vespalib::compression::CompressionConfig;
StructDataType();
StructDataType(vespalib::stringref name);
@@ -65,9 +63,6 @@ public:
Field::Set getFieldSet() const override;
StructDataType* clone() const override;
- void setCompressionConfig(const CompressionConfig& cfg) { _compressionConfig = cfg; };
- const CompressionConfig& getCompressionConfig() const { return _compressionConfig; }
-
DECLARE_IDENTIFIABLE(StructDataType);
private:
@@ -75,7 +70,6 @@ private:
using IntFieldMap = vespalib::hash_map<int32_t, Field::SP>;
StringFieldMap _nameFieldMap;
IntFieldMap _idFieldMap;
- CompressionConfig _compressionConfig;
/** @return "" if not conflicting. Error message otherwise. */
vespalib::string containsConflictingField(const Field& field) const;
diff --git a/document/src/vespa/document/fieldvalue/serializablearray.cpp b/document/src/vespa/document/fieldvalue/serializablearray.cpp
index 7fdb7005fa8..605e4a698df 100644
--- a/document/src/vespa/document/fieldvalue/serializablearray.cpp
+++ b/document/src/vespa/document/fieldvalue/serializablearray.cpp
@@ -2,11 +2,9 @@
#include "serializablearray.h"
#include <vespa/document/util/serializableexceptions.h>
#include <vespa/document/util/bytebuffer.h>
-#include <vespa/vespalib/util/compressor.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
-#include <vespa/vespalib/data/databuffer.h>
#include <algorithm>
-
+#include <cassert>
#include <vespa/log/log.h>
LOG_SETUP(".document.serializable-array");
@@ -27,15 +25,10 @@ public:
}
void
-SerializableArray::set(EntryMap entries, ByteBuffer buffer,
- CompressionConfig::Type comp_type, uint32_t uncompressed_length)
+SerializableArray::set(EntryMap entries, ByteBuffer buffer)
{
_entries = std::move(entries);
- if (CompressionConfig::isCompressed(comp_type)) {
- _uncompSerData = deCompress(comp_type, uncompressed_length, std::move(buffer));
- } else {
- _uncompSerData = std::move(buffer);
- }
+ _uncompSerData = std::move(buffer);
}
SerializableArray::SerializableArray() = default;
@@ -148,41 +141,6 @@ SerializableArray::clear(int id)
}
}
-ByteBuffer
-SerializableArray::deCompress(CompressionConfig::Type compression, uint32_t uncompressedLength, ByteBuffer compressed)
-{
- using vespalib::compression::decompress;
- // will only do this once
-
- assert(compressed.getRemaining() != 0);
- assert(CompressionConfig::isCompressed(compression));
-
- ByteBuffer newSerialization(vespalib::alloc::Alloc::alloc(uncompressedLength), uncompressedLength);
- vespalib::DataBuffer unCompressed(newSerialization.getBuffer(), newSerialization.getLength());
- unCompressed.clear();
- try {
- decompress(compression,
- uncompressedLength,
- vespalib::ConstBufferRef(compressed.getBufferAtPos(), compressed.getRemaining()),
- unCompressed,
- false);
- } catch (const std::runtime_error & e) {
- throw DeserializeException(
- make_string( "Document was compressed with code unknown code %d", compression),
- VESPA_STRLOC);
- }
-
- if (unCompressed.getDataLen() != (size_t)uncompressedLength) {
- throw DeserializeException(
- make_string("Did not decompress to the expected length: had %u, wanted %d, got %zu",
- compressed.getRemaining(), uncompressedLength, unCompressed.getDataLen()),
- VESPA_STRLOC);
- }
- assert(newSerialization.getBuffer() == unCompressed.getData());
- LOG_ASSERT(uncompressedLength == newSerialization.getRemaining());
- return newSerialization;
-}
-
const char *
SerializableArray::Entry::getBuffer(const ByteBuffer * readOnlyBuffer) const {
return hasBuffer() ? _data._buffer : readOnlyBuffer->getBuffer() + getOffset();
diff --git a/document/src/vespa/document/fieldvalue/serializablearray.h b/document/src/vespa/document/fieldvalue/serializablearray.h
index a396fd01a39..3fef1d21c3c 100644
--- a/document/src/vespa/document/fieldvalue/serializablearray.h
+++ b/document/src/vespa/document/fieldvalue/serializablearray.h
@@ -16,7 +16,6 @@
#pragma once
-#include <vespa/vespalib/util/compressionconfig.h>
#include <vespa/vespalib/util/buffer.h>
#include <vespa/vespalib/util/memory.h>
#include <vespa/document/util/bytebuffer.h>
@@ -80,7 +79,6 @@ public:
using CP = vespalib::CloneablePtr<SerializableArray>;
using UP = std::unique_ptr<SerializableArray>;
- using CompressionConfig = vespalib::compression::CompressionConfig;
SerializableArray();
SerializableArray(const SerializableArray&);
@@ -89,8 +87,7 @@ public:
SerializableArray& operator=(SerializableArray &&) noexcept;
~SerializableArray();
- void set(EntryMap entries, ByteBuffer buffer,
- CompressionConfig::Type comp_type, uint32_t uncompressed_length);
+ void set(EntryMap entries, ByteBuffer buffer);
/**
* Stores a value in the array.
*
@@ -141,8 +138,6 @@ private:
ByteBuffer _uncompSerData;
std::unique_ptr<serializablearray::BufferMap> _owned;
-
- static ByteBuffer deCompress(CompressionConfig::Type compression, uint32_t uncompressedLength, ByteBuffer compressed); // throw (DeserializeException);
VESPA_DLL_LOCAL EntryMap::const_iterator find(int id) const;
VESPA_DLL_LOCAL EntryMap::iterator find(int id);
};
diff --git a/document/src/vespa/document/fieldvalue/structfieldvalue.cpp b/document/src/vespa/document/fieldvalue/structfieldvalue.cpp
index fc87fbe3a59..555964d8b34 100644
--- a/document/src/vespa/document/fieldvalue/structfieldvalue.cpp
+++ b/document/src/vespa/document/fieldvalue/structfieldvalue.cpp
@@ -23,7 +23,6 @@ using std::vector;
using vespalib::nbostream;
using vespalib::nbostream_longlivedbuf;
using vespalib::make_string;
-using vespalib::compression::CompressionConfig;
using namespace vespalib::xml;
namespace document {
@@ -50,24 +49,14 @@ StructFieldValue::getStructType() const {
return static_cast<const StructDataType &>(getType());
}
-const CompressionConfig &
-StructFieldValue::getCompressionConfig() const {
- return getStructType().getCompressionConfig();
-}
-
void
-StructFieldValue::lazyDeserialize(const FixedTypeRepo &repo,
- uint16_t version,
- SerializableArray::EntryMap && fm,
- ByteBuffer buffer,
- CompressionConfig::Type comp_type,
- int32_t uncompressed_length)
+StructFieldValue::lazyDeserialize(const FixedTypeRepo &repo, uint16_t version, SerializableArray::EntryMap && fm, ByteBuffer buffer)
{
_repo = &repo.getDocumentTypeRepo();
_doc_type = &repo.getDocumentType();
_version = version;
- _fields.set(std::move(fm), std::move(buffer), comp_type, uncompressed_length);
+ _fields.set(std::move(fm), std::move(buffer));
_hasChanged = false;
}
diff --git a/document/src/vespa/document/fieldvalue/structfieldvalue.h b/document/src/vespa/document/fieldvalue/structfieldvalue.h
index cd8bd0fea0f..ab35dc04421 100644
--- a/document/src/vespa/document/fieldvalue/structfieldvalue.h
+++ b/document/src/vespa/document/fieldvalue/structfieldvalue.h
@@ -34,7 +34,6 @@ private:
public:
using UP = std::unique_ptr<StructFieldValue>;
- using CompressionConfig = vespalib::compression::CompressionConfig;
StructFieldValue(const DataType &type);
StructFieldValue(const StructFieldValue & rhs);
@@ -48,12 +47,7 @@ public:
void setDocumentType(const DocumentType & docType) { _doc_type = & docType; }
const SerializableArray & getFields() const { return _fields; }
- void lazyDeserialize(const FixedTypeRepo &repo,
- uint16_t version,
- SerializableArray::EntryMap && fields,
- ByteBuffer buffer,
- CompressionConfig::Type comp_type,
- int32_t uncompressed_length);
+ void lazyDeserialize(const FixedTypeRepo &repo, uint16_t version, SerializableArray::EntryMap && fields, ByteBuffer buffer);
// returns false if the field could not be serialized.
bool serializeField(int raw_field_id, uint16_t version, FieldValueWriter &writer) const;
@@ -70,8 +64,6 @@ public:
const Field& getField(vespalib::stringref name) const override;
void clear() override;
- const CompressionConfig &getCompressionConfig() const;
-
// FieldValue implementation.
FieldValue& assign(const FieldValue&) override;
int compare(const FieldValue& other) const override;
diff --git a/document/src/vespa/document/repo/documenttyperepo.cpp b/document/src/vespa/document/repo/documenttyperepo.cpp
index b993f50d7b6..15730d14a86 100644
--- a/document/src/vespa/document/repo/documenttyperepo.cpp
+++ b/document/src/vespa/document/repo/documenttyperepo.cpp
@@ -31,7 +31,6 @@ using vespalib::hash_map;
using vespalib::make_string;
using vespalib::string;
using vespalib::stringref;
-using vespalib::compression::CompressionConfig;
namespace document {
@@ -304,14 +303,6 @@ void addStruct(int32_t id, const Datatype::Sstruct &s, Repo &repo) {
}
}
- CompressionConfig::Type type = CompressionConfig::NONE;
- if (s.compression.type == Datatype::Sstruct::Compression::Type::LZ4) {
- type = CompressionConfig::LZ4;
- }
-
- struct_type->setCompressionConfig(
- CompressionConfig(type, s.compression.level, s.compression.threshold, s.compression.minsize));
-
for (size_t i = 0; i < s.field.size(); ++i) {
addField(s.field[i], repo, *struct_type);
}
diff --git a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp
index 6dd6a4c21bd..d15ebf311bb 100644
--- a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp
+++ b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp
@@ -22,6 +22,8 @@
#include <vespa/vespalib/data/slime/slime.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/util/backtrace.h>
+#include <vespa/vespalib/util/compressor.h>
+#include <vespa/vespalib/data/databuffer.h>
#include <vespa/eval/eval/fast_value.h>
#include <vespa/eval/eval/value_codec.h>
#include <vespa/eval/eval/value.h>
@@ -42,6 +44,8 @@ using vespalib::nbostream;
using vespalib::Memory;
using vespalib::stringref;
using vespalib::compression::CompressionConfig;
+using vespalib::ConstBufferRef;
+using vespalib::make_string_short::fmt;
using vespalib::eval::FastValueBuilderFactory;
namespace document {
@@ -274,7 +278,8 @@ typedef SerializableArray::EntryMap FieldInfo;
void readFieldInfo(nbostream& input, SerializableArray::EntryMap & field_info) __attribute__((noinline));
-void readFieldInfo(nbostream& input, SerializableArray::EntryMap & field_info) {
+void
+readFieldInfo(nbostream& input, SerializableArray::EntryMap & field_info) {
size_t field_count = getInt1_4Bytes(input);
field_info.reserve(field_count);
uint32_t offset = 0;
@@ -285,6 +290,36 @@ void readFieldInfo(nbostream& input, SerializableArray::EntryMap & field_info) {
offset += size;
}
}
+
+ByteBuffer
+deCompress(CompressionConfig::Type compression, uint32_t uncompressedLength, vespalib::ConstBufferRef compressed) __attribute__((noinline));
+
+ByteBuffer
+deCompress(CompressionConfig::Type compression, uint32_t uncompressedLength, vespalib::ConstBufferRef compressed)
+{
+ using vespalib::compression::decompress;
+
+ assert(compressed.size() != 0);
+
+ ByteBuffer newSerialization(vespalib::alloc::Alloc::alloc(uncompressedLength), uncompressedLength);
+ vespalib::DataBuffer unCompressed(newSerialization.getBuffer(), newSerialization.getLength());
+ unCompressed.clear();
+ try {
+ decompress(compression, uncompressedLength, compressed,unCompressed,false);
+ } catch (const std::runtime_error & e) {
+ throw DeserializeException(fmt( "Document was compressed with code unknown code %d", compression), VESPA_STRLOC);
+ }
+
+ if (unCompressed.getDataLen() != (size_t)uncompressedLength) {
+ throw DeserializeException(fmt("Did not decompress to the expected length: had %lu, wanted %d, got %zu",
+ compressed.size(), uncompressedLength, unCompressed.getDataLen()),
+ VESPA_STRLOC);
+ }
+ assert(newSerialization.getBuffer() == unCompressed.getData());
+ LOG_ASSERT(uncompressedLength == newSerialization.getRemaining());
+ return newSerialization;
+}
+
} // namespace
void VespaDocumentDeserializer::readStructNoReset(StructFieldValue &value) {
@@ -308,19 +343,18 @@ void VespaDocumentDeserializer::readStructNoReset(StructFieldValue &value) {
}
if (data_size > 0) {
- ByteBuffer buffer(_stream.isLongLivedBuffer()
- ? ByteBuffer(_stream.peek(), data_size)
- : ByteBuffer::copyBuffer(_stream.peek(), data_size));
+ ByteBuffer buffer = CompressionConfig::isCompressed(compression_type)
+ ? deCompress(compression_type, uncompressed_size, ConstBufferRef(_stream.peek(), data_size))
+ : _stream.isLongLivedBuffer()
+ ? ByteBuffer(_stream.peek(), data_size)
+ : ByteBuffer::copyBuffer(_stream.peek(), data_size);
if (value.getFields().empty()) {
- LOG(spam, "Lazy deserializing into %s with _version %u",
- value.getDataType()->getName().c_str(), _version);
- value.lazyDeserialize(_repo, _version, std::move(field_info),
- std::move(buffer), compression_type, uncompressed_size);
+ LOG(spam, "Lazy deserializing into %s with _version %u", value.getDataType()->getName().c_str(), _version);
+ value.lazyDeserialize(_repo, _version, std::move(field_info), std::move(buffer));
} else {
LOG(debug, "Legacy dual header/body format. -> Merging.");
StructFieldValue tmp(*value.getDataType());
- tmp.lazyDeserialize(_repo, _version, std::move(field_info),
- std::move(buffer), compression_type, uncompressed_size);
+ tmp.lazyDeserialize(_repo, _version, std::move(field_info), std::move(buffer));
for (const auto & entry : tmp) {
try {
FieldValue::UP decoded = tmp.getValue(entry);
@@ -367,7 +401,7 @@ VespaDocumentDeserializer::readTensor()
{
size_t length = _stream.getInt1_4Bytes();
if (length > _stream.size()) {
- throw DeserializeException(vespalib::make_string("Stream failed size(%zu), needed(%zu) to deserialize tensor field value", _stream.size(), length),
+ throw DeserializeException(fmt("Stream failed size(%zu), needed(%zu) to deserialize tensor field value", _stream.size(), length),
VESPA_STRLOC);
}
std::unique_ptr<vespalib::eval::Value> tensor;
diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.cpp b/document/src/vespa/document/serialization/vespadocumentserializer.cpp
index 45a4375a19d..dc37e036775 100644
--- a/document/src/vespa/document/serialization/vespadocumentserializer.cpp
+++ b/document/src/vespa/document/serialization/vespadocumentserializer.cpp
@@ -235,36 +235,6 @@ serializeFields(const StructFieldValue &value, nbostream &stream,
}
}
-bool compressionSufficient(const CompressionConfig &config, uint64_t old_size, size_t new_size)
-{
- return (new_size + 8) < (old_size * config.threshold / 100);
-}
-
-bool bigEnough(size_t size, const CompressionConfig &config)
-{
- return (size >= config.minSize);
-}
-
-vespalib::ConstBufferRef
-compressStream(const CompressionConfig &config, nbostream &stream, vespalib::DataBuffer & compressed_data)
-{
- using vespalib::compression::compress;
- vespalib::ConstBufferRef buf(stream.data(), stream.size());
- if (config.useCompression() && bigEnough(stream.size(), config)) {
- CompressionConfig::Type compressedType = compress(config,
- vespalib::ConstBufferRef(stream.data(), stream.size()),
- compressed_data, false);
- if (compressedType != config.type ||
- ! compressionSufficient(config, stream.size(), compressed_data.getDataLen()))
- {
- compressed_data.clear();
- } else {
- buf = vespalib::ConstBufferRef(compressed_data.getData(), compressed_data.getDataLen());
- }
- }
- return buf;
-}
-
void
putFieldInfo(nbostream &output, const vector<pair<uint32_t, uint32_t> > &field_info) {
putInt1_4Bytes(output, field_info.size());
@@ -293,14 +263,11 @@ VespaDocumentSerializer::structNeedsReserialization(const StructFieldValue &valu
return true;
}
- if (value.getCompressionConfig().type == CompressionConfig::NONE) {
- return false;
- }
-
- return true;
+ return false;
}
-void VespaDocumentSerializer::writeUnchanged(const SerializableArray &value) {
+void
+VespaDocumentSerializer::writeUnchanged(const SerializableArray &value) {
vector<pair<uint32_t, uint32_t> > field_info;
const std::vector<SerializableArray::Entry>& entries = value.getEntries();
@@ -321,28 +288,18 @@ void VespaDocumentSerializer::writeUnchanged(const SerializableArray &value) {
}
}
-void VespaDocumentSerializer::write(const StructFieldValue &value, const FieldSet& fieldSet)
+void
+VespaDocumentSerializer::write(const StructFieldValue &value, const FieldSet& fieldSet)
{
nbostream value_stream;
vector<pair<uint32_t, uint32_t> > field_info;
serializeFields(value, value_stream, field_info, fieldSet);
- const CompressionConfig &comp_config = value.getCompressionConfig();
- vespalib::DataBuffer compressed_data;
- vespalib::ConstBufferRef toSerialize = compressStream(comp_config, value_stream, compressed_data);
-
- uint8_t comp_type = (compressed_data.getDataLen() == 0)
- ? (comp_config.type == CompressionConfig::NONE
- ? CompressionConfig::NONE
- : CompressionConfig::UNCOMPRESSABLE)
- : comp_config.type;
- _stream << static_cast<uint32_t>(toSerialize.size());
+ uint8_t comp_type = CompressionConfig::NONE;
+ _stream << static_cast<uint32_t>(value_stream.size());
_stream << comp_type;
- if (compressed_data.getDataLen() != 0) {
- putInt2_4_8Bytes(_stream, value_stream.size());
- }
putFieldInfo(_stream, field_info);
- _stream.write(toSerialize.c_str(), toSerialize.size());
+ _stream.write(value_stream.data(), value_stream.size());
}
void