aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-12-08 09:01:12 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2021-12-08 11:36:25 +0000
commitbf37710f4ce4dd8e676e19e102e77272237d8b52 (patch)
tree95c98ec72fb7877206e2626797aba83066500fbb
parent2dd0a7ad258fa182ed3ac8a199751dd60f73b0f7 (diff)
Separate spread and bloat
-rw-r--r--searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp18
-rw-r--r--searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h1
-rw-r--r--searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.cpp85
-rw-r--r--searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.h38
-rw-r--r--searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/test/dummy_document_store.h11
-rw-r--r--searchcore/src/vespa/searchcore/proton/test/test.h1
-rw-r--r--searchlib/src/tests/docstore/document_store/document_store_test.cpp2
-rw-r--r--searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/docstore/documentstore.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/docstore/documentstore.h5
-rw-r--r--searchlib/src/vespa/searchlib/docstore/idatastore.h2
-rw-r--r--searchlib/src/vespa/searchlib/docstore/idocumentstore.h12
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.cpp90
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdatastore.h15
-rw-r--r--searchlib/src/vespa/searchlib/docstore/logdocumentstore.h3
17 files changed, 186 insertions, 130 deletions
diff --git a/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp b/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp
index 7f28ccd0737..c16dc349d83 100644
--- a/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp
@@ -702,29 +702,31 @@ assertTarget(const vespalib::string &name,
TEST_F("require that flush targets can be retrieved", FastAccessFixture)
{
IFlushTarget::List targets = getFlushTargets(f);
- EXPECT_EQUAL(7u, targets.size());
+ EXPECT_EQUAL(8u, targets.size());
EXPECT_EQUAL("subdb.attribute.flush.attr1", targets[0]->getName());
EXPECT_EQUAL("subdb.attribute.shrink.attr1", targets[1]->getName());
EXPECT_EQUAL("subdb.documentmetastore.flush", targets[2]->getName());
EXPECT_EQUAL("subdb.documentmetastore.shrink", targets[3]->getName());
- EXPECT_EQUAL("subdb.summary.compact", targets[4]->getName());
- EXPECT_EQUAL("subdb.summary.flush", targets[5]->getName());
- EXPECT_EQUAL("subdb.summary.shrink", targets[6]->getName());
+ EXPECT_EQUAL("subdb.summary.compact_bloat", targets[4]->getName());
+ EXPECT_EQUAL("subdb.summary.compact_spread", targets[5]->getName());
+ EXPECT_EQUAL("subdb.summary.flush", targets[6]->getName());
+ EXPECT_EQUAL("subdb.summary.shrink", targets[7]->getName());
}
TEST_F("require that flush targets can be retrieved", SearchableFixture)
{
IFlushTarget::List targets = getFlushTargets(f);
- EXPECT_EQUAL(9u, targets.size());
+ EXPECT_EQUAL(10u, targets.size());
EXPECT_TRUE(assertTarget("subdb.attribute.flush.attr1", FType::SYNC, FComponent::ATTRIBUTE, *targets[0]));
EXPECT_TRUE(assertTarget("subdb.attribute.shrink.attr1", FType::GC, FComponent::ATTRIBUTE, *targets[1]));
EXPECT_TRUE(assertTarget("subdb.documentmetastore.flush", FType::SYNC, FComponent::ATTRIBUTE, *targets[2]));
EXPECT_TRUE(assertTarget("subdb.documentmetastore.shrink", FType::GC, FComponent::ATTRIBUTE, *targets[3]));
EXPECT_TRUE(assertTarget("subdb.memoryindex.flush", FType::FLUSH, FComponent::INDEX, *targets[4]));
EXPECT_TRUE(assertTarget("subdb.memoryindex.fusion", FType::GC, FComponent::INDEX, *targets[5]));
- EXPECT_TRUE(assertTarget("subdb.summary.compact", FType::GC, FComponent::DOCUMENT_STORE, *targets[6]));
- EXPECT_TRUE(assertTarget("subdb.summary.flush", FType::SYNC, FComponent::DOCUMENT_STORE, *targets[7]));
- EXPECT_TRUE(assertTarget("subdb.summary.shrink", FType::GC, FComponent::DOCUMENT_STORE, *targets[8]));
+ EXPECT_TRUE(assertTarget("subdb.summary.compact_bloat", FType::GC, FComponent::DOCUMENT_STORE, *targets[6]));
+ EXPECT_TRUE(assertTarget("subdb.summary.compact_spread", FType::GC, FComponent::DOCUMENT_STORE, *targets[7]));
+ EXPECT_TRUE(assertTarget("subdb.summary.flush", FType::SYNC, FComponent::DOCUMENT_STORE, *targets[8]));
+ EXPECT_TRUE(assertTarget("subdb.summary.shrink", FType::GC, FComponent::DOCUMENT_STORE, *targets[9]));
}
TEST_F("require that only fast-access attributes are instantiated", FastAccessOnlyFixture)
diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h
index 42976104836..8e1b23eba67 100644
--- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h
+++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h
@@ -15,6 +15,7 @@
#include <vespa/searchcore/proton/test/clusterstatehandler.h>
#include <vespa/searchcore/proton/test/disk_mem_usage_notifier.h>
#include <vespa/searchcore/proton/test/test.h>
+#include <vespa/searchcore/proton/test/dummy_document_store.h>
#include <vespa/vespalib/util/idestructorcallback.h>
#include <vespa/searchlib/index/docbuilder.h>
diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.cpp
index 4e0cf3f9059..06bf8d0a8a6 100644
--- a/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.cpp
+++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.cpp
@@ -9,21 +9,26 @@ using search::SerialNum;
using vespalib::makeLambdaTask;
using searchcorespi::FlushStats;
using searchcorespi::IFlushTarget;
+using searchcorespi::FlushTask;
namespace proton {
namespace {
-class Compacter : public searchcorespi::FlushTask {
+class Compacter : public FlushTask {
private:
IDocumentStore & _docStore;
FlushStats & _stats;
SerialNum _currSerial;
+ virtual void compact(IDocumentStore & docStore, SerialNum currSerial) const = 0;
public:
- Compacter(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) :
- _docStore(docStore), _stats(stats), _currSerial(currSerial) {}
+ Compacter(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial)
+ : _docStore(docStore),
+ _stats(stats),
+ _currSerial(currSerial)
+ {}
void run() override {
- _docStore.compact(_currSerial);
+ compact(_docStore, _currSerial);
updateStats();
}
void updateStats() {
@@ -36,10 +41,32 @@ public:
}
};
+class CompactBloat : public Compacter {
+public:
+ CompactBloat(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial)
+ : Compacter(docStore, stats, currSerial)
+ {}
+private:
+ void compact(IDocumentStore & docStore, SerialNum currSerial) const override {
+ docStore.compactBloat(currSerial);
+ }
+};
+
+class CompactSpread : public Compacter {
+public:
+ CompactSpread(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial)
+ : Compacter(docStore, stats, currSerial)
+ {}
+private:
+ void compact(IDocumentStore & docStore, SerialNum currSerial) const override {
+ docStore.compactSpread(currSerial);
+ }
+};
+
}
-SummaryCompactTarget::SummaryCompactTarget(vespalib::Executor & summaryService, IDocumentStore & docStore)
- : IFlushTarget("summary.compact", Type::GC, Component::DOCUMENT_STORE),
+SummaryGCTarget::SummaryGCTarget(const vespalib::string & name, vespalib::Executor & summaryService, IDocumentStore & docStore)
+ : IFlushTarget(name, Type::GC, Component::DOCUMENT_STORE),
_summaryService(summaryService),
_docStore(docStore),
_lastStats()
@@ -48,37 +75,69 @@ SummaryCompactTarget::SummaryCompactTarget(vespalib::Executor & summaryService,
}
IFlushTarget::MemoryGain
-SummaryCompactTarget::getApproxMemoryGain() const
+SummaryGCTarget::getApproxMemoryGain() const
{
return MemoryGain::noGain(_docStore.memoryUsed());
}
IFlushTarget::DiskGain
-SummaryCompactTarget::getApproxDiskGain() const
+SummaryGCTarget::getApproxDiskGain() const
{
size_t total(_docStore.getDiskFootprint());
- return DiskGain(total, total - std::min(total, _docStore.getMaxCompactGain()));
+ return DiskGain(total, total - std::min(total, getBloat(_docStore)));
}
IFlushTarget::Time
-SummaryCompactTarget::getLastFlushTime() const
+SummaryGCTarget::getLastFlushTime() const
{
return vespalib::system_clock::now();
}
SerialNum
-SummaryCompactTarget::getFlushedSerialNum() const
+SummaryGCTarget::getFlushedSerialNum() const
{
return _docStore.tentativeLastSyncToken();
}
IFlushTarget::Task::UP
-SummaryCompactTarget::initFlush(SerialNum currentSerial, std::shared_ptr<search::IFlushToken>)
+SummaryGCTarget::initFlush(SerialNum currentSerial, std::shared_ptr<search::IFlushToken>)
{
std::promise<Task::UP> promise;
std::future<Task::UP> future = promise.get_future();
- _summaryService.execute(makeLambdaTask([&]() { promise.set_value(std::make_unique<Compacter>(_docStore, _lastStats, currentSerial)); }));
+ _summaryService.execute(makeLambdaTask([this, &promise,currentSerial]() {
+ promise.set_value(create(_docStore, _lastStats, currentSerial));
+ }));
return future.get();
}
+SummaryCompactBloatTarget::SummaryCompactBloatTarget(vespalib::Executor & summaryService, IDocumentStore & docStore)
+ : SummaryGCTarget("summary.compact_bloat", summaryService, docStore)
+{
+}
+
+size_t
+SummaryCompactBloatTarget::getBloat(const search::IDocumentStore & docStore) const {
+ return docStore.getDiskBloat();
+}
+
+FlushTask::UP
+SummaryCompactBloatTarget::create(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) {
+ return std::make_unique<CompactBloat>(docStore, stats, currSerial);
+}
+
+SummaryCompactSpreadTarget::SummaryCompactSpreadTarget(vespalib::Executor & summaryService, IDocumentStore & docStore)
+ : SummaryGCTarget("summary.compact_spread", summaryService, docStore)
+{
+}
+
+size_t
+SummaryCompactSpreadTarget::getBloat(const search::IDocumentStore & docStore) const {
+ return docStore.getMaxSpreadAsBloat();
+}
+
+FlushTask::UP
+SummaryCompactSpreadTarget::create(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) {
+ return std::make_unique<CompactSpread>(docStore, stats, currSerial);
+}
+
} // namespace proton
diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.h b/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.h
index c8035a544f2..529aa3b816e 100644
--- a/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.h
+++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.h
@@ -12,16 +12,10 @@ namespace proton {
/**
* This class implements the IFlushTarget interface to proxy a summary manager.
*/
-class SummaryCompactTarget : public searchcorespi::IFlushTarget {
-private:
- using FlushStats = searchcorespi::FlushStats;
- vespalib::Executor &_summaryService;
- search::IDocumentStore & _docStore;
- FlushStats _lastStats;
-
+class SummaryGCTarget : public searchcorespi::IFlushTarget {
public:
- SummaryCompactTarget(vespalib::Executor & summaryService, search::IDocumentStore & docStore);
-
+ using FlushStats = searchcorespi::FlushStats;
+ using IDocumentStore = search::IDocumentStore;
MemoryGain getApproxMemoryGain() const override;
DiskGain getApproxDiskGain() const override;
SerialNum getFlushedSerialNum() const override;
@@ -31,6 +25,32 @@ public:
FlushStats getLastFlushStats() const override { return _lastStats; }
uint64_t getApproxBytesToWriteToDisk() const override { return 0; }
+protected:
+ SummaryGCTarget(const vespalib::string &, vespalib::Executor & summaryService, IDocumentStore & docStore);
+private:
+
+ virtual size_t getBloat(const IDocumentStore & docStore) const = 0;
+ virtual Task::UP create(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) = 0;
+
+ vespalib::Executor &_summaryService;
+ IDocumentStore & _docStore;
+ FlushStats _lastStats;
+};
+
+class SummaryCompactBloatTarget : public SummaryGCTarget {
+private:
+ size_t getBloat(const search::IDocumentStore & docStore) const override;
+ Task::UP create(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) override;
+public:
+ SummaryCompactBloatTarget(vespalib::Executor & summaryService, IDocumentStore & docStore);
+};
+
+class SummaryCompactSpreadTarget : public SummaryGCTarget {
+private:
+ size_t getBloat(const search::IDocumentStore & docStore) const override;
+ Task::UP create(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) override;
+public:
+ SummaryCompactSpreadTarget(vespalib::Executor & summaryService, IDocumentStore & docStore);
};
} // namespace proton
diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp
index eaf5a907808..28a91e1444d 100644
--- a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp
+++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp
@@ -200,7 +200,8 @@ SummaryManager::getFlushTargets(vespalib::Executor & summaryService)
IFlushTarget::List ret;
ret.push_back(std::make_shared<SummaryFlushTarget>(getBackingStore(), summaryService));
if (dynamic_cast<LogDocumentStore *>(_docStore.get()) != nullptr) {
- ret.push_back(std::make_shared<SummaryCompactTarget>(summaryService, getBackingStore()));
+ ret.push_back(std::make_shared<SummaryCompactBloatTarget>(summaryService, getBackingStore()));
+ ret.push_back(std::make_shared<SummaryCompactSpreadTarget>(summaryService, getBackingStore()));
}
ret.push_back(createShrinkLidSpaceFlushTarget(summaryService, _docStore));
return ret;
diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
index a9873a80d0e..a77ff93c002 100644
--- a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp
@@ -197,7 +197,7 @@ getStoreConfig(const ProtonConfig::Summary::Cache & cache, const HwInfo & hwInfo
}
LogDocumentStore::Config
-deriveConfig(const ProtonConfig::Summary & summary, const ProtonConfig::Flush::Memory & flush, const HwInfo & hwInfo) {
+deriveConfig(const ProtonConfig::Summary & summary, const HwInfo & hwInfo) {
DocumentStore::Config config(getStoreConfig(summary.cache, hwInfo));
const ProtonConfig::Summary::Log & log(summary.log);
const ProtonConfig::Summary::Log::Chunk & chunk(log.chunk);
@@ -205,7 +205,6 @@ deriveConfig(const ProtonConfig::Summary & summary, const ProtonConfig::Flush::M
LogDataStore::Config logConfig;
logConfig.setMaxFileSize(log.maxfilesize)
.setMaxNumLids(log.maxnumlids)
- .setMaxDiskBloatFactor(std::min(flush.diskbloatfactor, flush.each.diskbloatfactor))
.setMaxBucketSpread(log.maxbucketspread).setMinFileSizeFactor(log.minfilesizefactor)
.compactCompression(deriveCompression(log.compact.compression))
.setFileConfig(fileConfig).disableCrcOnRead(chunk.skipcrconread);
@@ -213,7 +212,7 @@ deriveConfig(const ProtonConfig::Summary & summary, const ProtonConfig::Flush::M
}
search::LogDocumentStore::Config buildStoreConfig(const ProtonConfig & proton, const HwInfo & hwInfo) {
- return deriveConfig(proton.summary, proton.flush.memory, hwInfo);
+ return deriveConfig(proton.summary, hwInfo);
}
using AttributesConfigSP = DocumentDBConfig::AttributesConfigSP;
diff --git a/searchcore/src/vespa/searchcore/proton/test/dummy_document_store.h b/searchcore/src/vespa/searchcore/proton/test/dummy_document_store.h
index d9b83bfc3a8..7194cc4d403 100644
--- a/searchcore/src/vespa/searchcore/proton/test/dummy_document_store.h
+++ b/searchcore/src/vespa/searchcore/proton/test/dummy_document_store.h
@@ -10,13 +10,11 @@ struct DummyDocumentStore : public search::IDocumentStore
{
vespalib::string _baseDir;
- DummyDocumentStore()
- : _baseDir("")
- {}
+ DummyDocumentStore() = default;
DummyDocumentStore(const vespalib::string &baseDir)
: _baseDir(baseDir)
{}
- ~DummyDocumentStore() {}
+ ~DummyDocumentStore() = default;
DocumentUP read(search::DocumentIdT, const document::DocumentTypeRepo &) const override {
return DocumentUP();
}
@@ -25,7 +23,8 @@ struct DummyDocumentStore : public search::IDocumentStore
void remove(uint64_t, search::DocumentIdT) override {}
void flush(uint64_t) override {}
uint64_t initFlush(uint64_t) override { return 0; }
- void compact(uint64_t) override {}
+ void compactBloat(uint64_t) override {}
+ void compactSpread(uint64_t) override {}
uint64_t lastSyncToken() const override { return 0; }
uint64_t tentativeLastSyncToken() const override { return 0; }
vespalib::system_time getLastFlushTime() const override { return vespalib::system_time(); }
@@ -34,7 +33,7 @@ struct DummyDocumentStore : public search::IDocumentStore
size_t memoryMeta() const override { return 0; }
size_t getDiskFootprint() const override { return 0; }
size_t getDiskBloat() const override { return 0; }
- size_t getMaxCompactGain() const override { return getDiskBloat(); }
+ size_t getMaxSpreadAsBloat() const override { return getDiskBloat(); }
search::CacheStats getCacheStats() const override { return search::CacheStats(); }
const vespalib::string &getBaseDir() const override { return _baseDir; }
void accept(search::IDocumentStoreReadVisitor &,
diff --git a/searchcore/src/vespa/searchcore/proton/test/test.h b/searchcore/src/vespa/searchcore/proton/test/test.h
index 1494823e899..4231d5e7717 100644
--- a/searchcore/src/vespa/searchcore/proton/test/test.h
+++ b/searchcore/src/vespa/searchcore/proton/test/test.h
@@ -5,7 +5,6 @@
#include "bucketdocuments.h"
#include "bucketstatecalculator.h"
#include "document.h"
-#include "dummy_document_store.h"
#include "dummy_feed_view.h"
#include "dummy_summary_manager.h"
#include "resulthandler.h"
diff --git a/searchlib/src/tests/docstore/document_store/document_store_test.cpp b/searchlib/src/tests/docstore/document_store/document_store_test.cpp
index 1a6b0a5a1c6..f2bec30a349 100644
--- a/searchlib/src/tests/docstore/document_store/document_store_test.cpp
+++ b/searchlib/src/tests/docstore/document_store/document_store_test.cpp
@@ -25,7 +25,7 @@ struct NullDataStore : IDataStore {
size_t memoryMeta() const override { return 0; }
size_t getDiskFootprint() const override { return 0; }
size_t getDiskBloat() const override { return 0; }
- size_t getMaxCompactGain() const override { return 0; }
+ size_t getMaxSpreadAsBloat() const override { return 0; }
uint64_t lastSyncToken() const override { return 0; }
uint64_t tentativeLastSyncToken() const override { return 0; }
vespalib::system_time getLastFlushTime() const override { return vespalib::system_time(); }
diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
index 07652dfd336..378babb6ee1 100644
--- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
+++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
@@ -236,7 +236,7 @@ void verifyGrowing(const LogDataStore::Config & config, uint32_t minFiles, uint3
datastore.remove(i + 20000, i);
}
datastore.flush(datastore.initFlush(lastSyncToken));
- datastore.compact(30000);
+ datastore.compactBloat(30000);
datastore.remove(31000, 0);
checkStats(datastore, 31000, 30000);
EXPECT_LESS_EQUAL(minFiles, datastore.getAllActiveFiles().size());
@@ -252,7 +252,7 @@ void verifyGrowing(const LogDataStore::Config & config, uint32_t minFiles, uint3
}
TEST("testGrowingChunkedBySize") {
LogDataStore::Config config;
- config.setMaxFileSize(100000).setMaxDiskBloatFactor(0.1).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2)
+ config.setMaxFileSize(100000).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2)
.compactCompression({CompressionConfig::LZ4})
.setFileConfig({{CompressionConfig::LZ4, 9, 60}, 1000});
verifyGrowing(config, 40, 120);
@@ -260,7 +260,7 @@ TEST("testGrowingChunkedBySize") {
TEST("testGrowingChunkedByNumLids") {
LogDataStore::Config config;
- config.setMaxNumLids(1000).setMaxDiskBloatFactor(0.1).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2)
+ config.setMaxNumLids(1000).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2)
.compactCompression({CompressionConfig::LZ4})
.setFileConfig({{CompressionConfig::LZ4, 9, 60}, 1000});
verifyGrowing(config,10, 10);
@@ -679,7 +679,7 @@ TEST("testWriteRead") {
EXPECT_LESS(0u, headerFootprint);
EXPECT_EQUAL(datastore.getDiskFootprint(), headerFootprint);
EXPECT_EQUAL(datastore.getDiskBloat(), 0ul);
- EXPECT_EQUAL(datastore.getMaxCompactGain(), 0ul);
+ EXPECT_EQUAL(datastore.getMaxSpreadAsBloat(), 0ul);
datastore.write(1, 0, a[0].c_str(), a[0].size());
fetchAndTest(datastore, 0, a[0].c_str(), a[0].size());
datastore.write(2, 0, a[1].c_str(), a[1].size());
@@ -701,7 +701,7 @@ TEST("testWriteRead") {
EXPECT_EQUAL(datastore.getDiskFootprint(),
2711ul + headerFootprint);
EXPECT_EQUAL(datastore.getDiskBloat(), 0ul);
- EXPECT_EQUAL(datastore.getMaxCompactGain(), 0ul);
+ EXPECT_EQUAL(datastore.getMaxSpreadAsBloat(), 0ul);
datastore.flush(datastore.initFlush(lastSyncToken));
}
{
@@ -715,7 +715,7 @@ TEST("testWriteRead") {
EXPECT_LESS(0u, headerFootprint);
EXPECT_EQUAL(4944ul + headerFootprint, datastore.getDiskFootprint());
EXPECT_EQUAL(0ul, datastore.getDiskBloat());
- EXPECT_EQUAL(0ul, datastore.getMaxCompactGain());
+ EXPECT_EQUAL(0ul, datastore.getMaxSpreadAsBloat());
for(size_t i=0; i < 100; i++) {
fetchAndTest(datastore, i, a[i%2].c_str(), a[i%2].size());
@@ -730,7 +730,7 @@ TEST("testWriteRead") {
EXPECT_EQUAL(7594ul + headerFootprint, datastore.getDiskFootprint());
EXPECT_EQUAL(0ul, datastore.getDiskBloat());
- EXPECT_EQUAL(0ul, datastore.getMaxCompactGain());
+ EXPECT_EQUAL(0ul, datastore.getMaxSpreadAsBloat());
}
FastOS_File::EmptyAndRemoveDirectory("empty");
}
@@ -1050,7 +1050,6 @@ TEST("require that config equality operator detects inequality") {
using C = LogDataStore::Config;
EXPECT_TRUE(C() == C());
EXPECT_FALSE(C() == C().setMaxFileSize(1));
- EXPECT_FALSE(C() == C().setMaxDiskBloatFactor(0.3));
EXPECT_FALSE(C() == C().setMaxBucketSpread(0.3));
EXPECT_FALSE(C() == C().setMinFileSizeFactor(0.3));
EXPECT_FALSE(C() == C().setFileConfig(WriteableFileChunk::Config({}, 70)));
diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
index 7aaee7180df..b4ff050c0f6 100644
--- a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp
@@ -112,7 +112,6 @@ public:
}
-using VisitCache = docstore::VisitCache;
using docstore::Value;
bool
@@ -239,7 +238,14 @@ DocumentStore::remove(uint64_t syncToken, DocumentIdT lid)
}
void
-DocumentStore::compact(uint64_t syncToken)
+DocumentStore::compactBloat(uint64_t syncToken)
+{
+ (void) syncToken;
+ // Most implementations does not offer compact.
+}
+
+void
+DocumentStore::compactSpread(uint64_t syncToken)
{
(void) syncToken;
// Most implementations does not offer compact.
diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.h b/searchlib/src/vespa/searchlib/docstore/documentstore.h
index b6021d34bef..6402c16cd5e 100644
--- a/searchlib/src/vespa/searchlib/docstore/documentstore.h
+++ b/searchlib/src/vespa/searchlib/docstore/documentstore.h
@@ -72,7 +72,8 @@ public:
void remove(uint64_t syncToken, DocumentIdT lid) override;
void flush(uint64_t syncToken) override;
uint64_t initFlush(uint64_t synctoken) override;
- void compact(uint64_t syncToken) override;
+ void compactBloat(uint64_t syncToken) override;
+ void compactSpread(uint64_t syncToken) override;
uint64_t lastSyncToken() const override;
uint64_t tentativeLastSyncToken() const override;
vespalib::system_time getLastFlushTime() const override;
@@ -80,7 +81,7 @@ public:
size_t memoryUsed() const override { return _backingStore.memoryUsed(); }
size_t getDiskFootprint() const override { return _backingStore.getDiskFootprint(); }
size_t getDiskBloat() const override { return _backingStore.getDiskBloat(); }
- size_t getMaxCompactGain() const override { return _backingStore.getMaxCompactGain(); }
+ size_t getMaxSpreadAsBloat() const override { return _backingStore.getMaxSpreadAsBloat(); }
CacheStats getCacheStats() const override;
size_t memoryMeta() const override { return _backingStore.memoryMeta(); }
const vespalib::string & getBaseDir() const override { return _backingStore.getBaseDir(); }
diff --git a/searchlib/src/vespa/searchlib/docstore/idatastore.h b/searchlib/src/vespa/searchlib/docstore/idatastore.h
index 82656ad7e69..fc0eae1d15e 100644
--- a/searchlib/src/vespa/searchlib/docstore/idatastore.h
+++ b/searchlib/src/vespa/searchlib/docstore/idatastore.h
@@ -121,7 +121,7 @@ public:
* to avoid misuse we let the report a more conservative number here if necessary.
* @return diskspace to be gained.
*/
- virtual size_t getMaxCompactGain() const = 0;
+ virtual size_t getMaxSpreadAsBloat() const = 0;
/**
diff --git a/searchlib/src/vespa/searchlib/docstore/idocumentstore.h b/searchlib/src/vespa/searchlib/docstore/idocumentstore.h
index 0e73e4d7993..d84a5ad7e7e 100644
--- a/searchlib/src/vespa/searchlib/docstore/idocumentstore.h
+++ b/searchlib/src/vespa/searchlib/docstore/idocumentstore.h
@@ -100,7 +100,8 @@ public:
/**
* If possible compact the disk.
**/
- virtual void compact(uint64_t syncToken) = 0;
+ virtual void compactBloat(uint64_t syncToken) = 0;
+ virtual void compactSpread(uint64_t syncToken) = 0;
/**
* The sync token used for the last successful flush() operation,
@@ -153,12 +154,11 @@ public:
virtual size_t getDiskBloat() const = 0;
/**
- * Calculates how much diskspace can be compacted during a flush.
- * default is to return th ebloat limit, but as some targets have some internal limits
- * to avoid misuse we let the report a more conservative number here if necessary.
- * @return diskspace to be gained.
+ * Calculates the gain from keeping buckets close. It is converted to diskbloat
+ * so it can be prioritized accordingly.
+ * @return spread as disk bloat.
*/
- virtual size_t getMaxCompactGain() const = 0;
+ virtual size_t getMaxSpreadAsBloat() const = 0;
/**
* Returns statistics about the cache.
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
index fd25dd56235..e98cfc810d3 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp
@@ -36,7 +36,6 @@ using namespace std::literals;
LogDataStore::Config::Config()
: _maxFileSize(DEFAULT_MAX_FILESIZE),
- _maxDiskBloatFactor(0.2),
_maxBucketSpread(2.5),
_minFileSizeFactor(0.2),
_maxNumLids(DEFAULT_MAX_LIDS_PER_FILE),
@@ -48,7 +47,6 @@ LogDataStore::Config::Config()
bool
LogDataStore::Config::operator == (const Config & rhs) const {
return (_maxBucketSpread == rhs._maxBucketSpread) &&
- (_maxDiskBloatFactor == rhs._maxDiskBloatFactor) &&
(_maxFileSize == rhs._maxFileSize) &&
(_minFileSizeFactor == rhs._minFileSizeFactor) &&
(_skipCrcOnRead == rhs._skipCrcOnRead) &&
@@ -294,46 +292,14 @@ vespalib::string bloatMsg(size_t bloat, size_t usage) {
}
-void
-LogDataStore::compact(uint64_t syncToken)
-{
- uint64_t usage = getDiskFootprint();
- uint64_t bloat = getDiskBloat();
- LOG(debug, "%s", bloatMsg(bloat, usage).c_str());
- const bool doCompact = (_fileChunks.size() > 1);
- if (doCompact) {
- LOG(info, "%s. Will compact", bloatMsg(bloat, usage).c_str());
- compactWorst(_config.getMaxDiskBloatFactor(), _config.getMaxBucketSpread(), isTotalDiskBloatExceeded(usage, bloat));
- }
- flushActiveAndWait(syncToken);
- if (doCompact) {
- usage = getDiskFootprint();
- bloat = getDiskBloat();
- LOG(info, "Done compacting. %s", bloatMsg(bloat, usage).c_str());
- }
-}
-
-bool
-LogDataStore::isTotalDiskBloatExceeded(size_t diskFootPrint, size_t bloat) const {
- const size_t maxConfiguredDiskBloat = diskFootPrint * _config.getMaxDiskBloatFactor();
- return bloat > maxConfiguredDiskBloat;
-}
-
size_t
-LogDataStore::getMaxCompactGain() const
+LogDataStore::getMaxSpreadAsBloat() const
{
- size_t bloat = getDiskBloat();
const size_t diskFootPrint = getDiskFootprint();
- if ( ! isTotalDiskBloatExceeded(diskFootPrint, bloat) ) {
- bloat = 0;
- }
-
const double maxSpread = getMaxBucketSpread();
- size_t spreadAsBloat = diskFootPrint * (1.0 - 1.0/maxSpread);
- if ( maxSpread < _config.getMaxBucketSpread()) {
- spreadAsBloat = 0;
- }
- return (bloat + spreadAsBloat);
+ return (maxSpread > _config.getMaxBucketSpread())
+ ? diskFootPrint * (1.0 - 1.0/maxSpread)
+ : 0;
}
void
@@ -380,40 +346,34 @@ LogDataStore::getMaxBucketSpread() const
}
std::pair<bool, LogDataStore::FileId>
-LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat)
+LogDataStore::findNextToCompact(bool dueToBloat)
{
typedef std::multimap<double, FileId, std::greater<double>> CostMap;
- CostMap worstBloat;
- CostMap worstSpread;
+ CostMap worst;
MonitorGuard guard(_updateLock);
for (size_t i(0); i < _fileChunks.size(); i++) {
const auto & fc(_fileChunks[i]);
if (fc && fc->frozen() && (_currentlyCompacting.find(fc->getNameId()) == _currentlyCompacting.end())) {
uint64_t usage = fc->getDiskFootprint();
- uint64_t bloat = fc->getDiskBloat();
- if (_bucketizer) {
- worstSpread.emplace(fc->getBucketSpread(), FileId(i));
- }
- if (usage > 0) {
- double tmp(double(bloat)/usage);
- worstBloat.emplace(tmp, FileId(i));
+ if ( ! dueToBloat && _bucketizer) {
+ worst.emplace(fc->getBucketSpread(), FileId(i));
+ } else if (dueToBloat && usage > 0) {
+ double tmp(double(fc->getDiskBloat())/usage);
+ worst.emplace(tmp, FileId(i));
}
}
}
if (LOG_WOULD_LOG(debug)) {
- for (const auto & it : worstBloat) {
+ for (const auto & it : worst) {
const FileChunk & fc = *_fileChunks[it.second.getId()];
LOG(debug, "File '%s' has bloat '%2.2f' and bucket-spread '%1.4f numChunks=%d , numBuckets=%ld, numUniqueBuckets=%ld",
fc.getName().c_str(), it.first * 100, fc.getBucketSpread(), fc.getNumChunks(), fc.getNumBuckets(), fc.getNumUniqueBuckets());
}
}
std::pair<bool, FileId> retval(false, FileId(-1));
- if ( ! worstBloat.empty() && (worstBloat.begin()->first > bloatLimit) && prioritizeDiskBloat) {
- retval.first = true;
- retval.second = worstBloat.begin()->second;
- } else if ( ! worstSpread.empty() && (worstSpread.begin()->first > spreadLimit)) {
+ if ( ! worst.empty()) {
retval.first = true;
- retval.second = worstSpread.begin()->second;
+ retval.second = worst.begin()->second;
}
if (retval.first) {
_currentlyCompacting.insert(_fileChunks[retval.second.getId()]->getNameId());
@@ -422,10 +382,24 @@ LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit, bool prio
}
void
-LogDataStore::compactWorst(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat) {
- auto worst = findNextToCompact(bloatLimit, spreadLimit, prioritizeDiskBloat);
- if (worst.first) {
- compactFile(worst.second);
+LogDataStore::compactWorst(uint64_t syncToken, bool compactDiskBloat) {
+ uint64_t usage = getDiskFootprint();
+ uint64_t bloat = getDiskBloat();
+ const char * reason = compactDiskBloat ? "bloat" : "spread";
+ LOG(debug, "%s", bloatMsg(bloat, usage).c_str());
+ const bool doCompact = (_fileChunks.size() > 1);
+ if (doCompact) {
+ LOG(info, "%s. Will compact due to %s", reason, bloatMsg(bloat, usage).c_str());
+ auto worst = findNextToCompact(compactDiskBloat);
+ if (worst.first) {
+ compactFile(worst.second);
+ }
+ flushActiveAndWait(syncToken);
+ usage = getDiskFootprint();
+ bloat = getDiskBloat();
+ LOG(info, "Done compacting due to %s. %s", reason, bloatMsg(bloat, usage).c_str());
+ } else {
+ flushActiveAndWait(syncToken);
}
}
diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
index f43dc96fac9..62f87076759 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h
+++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h
@@ -41,7 +41,6 @@ public:
Config & setMaxFileSize(size_t v) { _maxFileSize = v; return *this; }
Config & setMaxNumLids(size_t v) { _maxNumLids = v; return *this; }
- Config & setMaxDiskBloatFactor(double v) { _maxDiskBloatFactor = v; return *this; }
Config & setMaxBucketSpread(double v) { _maxBucketSpread = v; return *this; }
Config & setMinFileSizeFactor(double v) { _minFileSizeFactor = v; return *this; }
@@ -49,7 +48,6 @@ public:
Config & setFileConfig(WriteableFileChunk::Config v) { _fileConfig = v; return *this; }
size_t getMaxFileSize() const { return _maxFileSize; }
- double getMaxDiskBloatFactor() const { return _maxDiskBloatFactor; }
double getMaxBucketSpread() const { return _maxBucketSpread; }
double getMinFileSizeFactor() const { return _minFileSizeFactor; }
uint32_t getMaxNumLids() const { return _maxNumLids; }
@@ -63,7 +61,6 @@ public:
bool operator == (const Config &) const;
private:
size_t _maxFileSize;
- double _maxDiskBloatFactor;
double _maxBucketSpread;
double _minFileSizeFactor;
uint32_t _maxNumLids;
@@ -109,9 +106,10 @@ public:
size_t getDiskFootprint() const override;
size_t getDiskHeaderFootprint() const override;
size_t getDiskBloat() const override;
- size_t getMaxCompactGain() const override;
+ size_t getMaxSpreadAsBloat() const override;
- void compact(uint64_t syncToken);
+ void compactBloat(uint64_t syncToken) { compactWorst(syncToken, true); }
+ void compactSpread(uint64_t syncToken) { compactWorst(syncToken, false);}
const Config & getConfig() const { return _config; }
Config & getConfig() { return _config; }
@@ -180,10 +178,9 @@ private:
class WrapVisitorProgress;
class FileChunkHolder;
- // Implements ISetLid API
void setLid(const ISetLid::unique_lock & guard, uint32_t lid, const LidInfo & lm) override;
- void compactWorst(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat);
+ void compactWorst(uint64_t syncToken, bool compactDiskBloat);
void compactFile(FileId chunkId);
typedef vespalib::RcuVector<uint64_t> LidInfoVector;
@@ -199,8 +196,6 @@ private:
NameIdSet eraseIncompleteCompactedFiles(NameIdSet partList);
void internalFlushAll();
- bool isTotalDiskBloatExceeded(size_t diskFootPrint, size_t bloat) const;
-
NameIdSet scanDir(const vespalib::string &dir, const vespalib::string &suffix);
FileId allocateFileId(const MonitorGuard & guard);
void setNewFileChunk(const MonitorGuard & guard, FileChunk::UP fileChunk);
@@ -245,7 +240,7 @@ private:
return (_fileChunks.empty() ? 0 : _fileChunks.back()->getLastPersistedSerialNum());
}
bool shouldCompactToActiveFile(size_t compactedSize) const;
- std::pair<bool, FileId> findNextToCompact(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat);
+ std::pair<bool, FileId> findNextToCompact(bool compactDiskBloat);
void incGeneration();
bool canShrinkLidSpace(const MonitorGuard &guard) const;
diff --git a/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h
index de36155bedb..2931f8bce2d 100644
--- a/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h
+++ b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h
@@ -51,7 +51,8 @@ public:
~LogDocumentStore() override;
void reconfigure(const Config & config);
private:
- void compact(uint64_t syncToken) override { _backingStore.compact(syncToken); }
+ void compactBloat(uint64_t syncToken) override { _backingStore.compactBloat(syncToken); }
+ void compactSpread(uint64_t syncToken) override { _backingStore.compactSpread(syncToken); }
LogDataStore _backingStore;
};