diff options
17 files changed, 186 insertions, 130 deletions
diff --git a/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp b/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp index 7f28ccd0737..c16dc349d83 100644 --- a/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp +++ b/searchcore/src/tests/proton/documentdb/document_subdbs/document_subdbs_test.cpp @@ -702,29 +702,31 @@ assertTarget(const vespalib::string &name, TEST_F("require that flush targets can be retrieved", FastAccessFixture) { IFlushTarget::List targets = getFlushTargets(f); - EXPECT_EQUAL(7u, targets.size()); + EXPECT_EQUAL(8u, targets.size()); EXPECT_EQUAL("subdb.attribute.flush.attr1", targets[0]->getName()); EXPECT_EQUAL("subdb.attribute.shrink.attr1", targets[1]->getName()); EXPECT_EQUAL("subdb.documentmetastore.flush", targets[2]->getName()); EXPECT_EQUAL("subdb.documentmetastore.shrink", targets[3]->getName()); - EXPECT_EQUAL("subdb.summary.compact", targets[4]->getName()); - EXPECT_EQUAL("subdb.summary.flush", targets[5]->getName()); - EXPECT_EQUAL("subdb.summary.shrink", targets[6]->getName()); + EXPECT_EQUAL("subdb.summary.compact_bloat", targets[4]->getName()); + EXPECT_EQUAL("subdb.summary.compact_spread", targets[5]->getName()); + EXPECT_EQUAL("subdb.summary.flush", targets[6]->getName()); + EXPECT_EQUAL("subdb.summary.shrink", targets[7]->getName()); } TEST_F("require that flush targets can be retrieved", SearchableFixture) { IFlushTarget::List targets = getFlushTargets(f); - EXPECT_EQUAL(9u, targets.size()); + EXPECT_EQUAL(10u, targets.size()); EXPECT_TRUE(assertTarget("subdb.attribute.flush.attr1", FType::SYNC, FComponent::ATTRIBUTE, *targets[0])); EXPECT_TRUE(assertTarget("subdb.attribute.shrink.attr1", FType::GC, FComponent::ATTRIBUTE, *targets[1])); EXPECT_TRUE(assertTarget("subdb.documentmetastore.flush", FType::SYNC, FComponent::ATTRIBUTE, *targets[2])); EXPECT_TRUE(assertTarget("subdb.documentmetastore.shrink", FType::GC, FComponent::ATTRIBUTE, *targets[3])); EXPECT_TRUE(assertTarget("subdb.memoryindex.flush", FType::FLUSH, FComponent::INDEX, *targets[4])); EXPECT_TRUE(assertTarget("subdb.memoryindex.fusion", FType::GC, FComponent::INDEX, *targets[5])); - EXPECT_TRUE(assertTarget("subdb.summary.compact", FType::GC, FComponent::DOCUMENT_STORE, *targets[6])); - EXPECT_TRUE(assertTarget("subdb.summary.flush", FType::SYNC, FComponent::DOCUMENT_STORE, *targets[7])); - EXPECT_TRUE(assertTarget("subdb.summary.shrink", FType::GC, FComponent::DOCUMENT_STORE, *targets[8])); + EXPECT_TRUE(assertTarget("subdb.summary.compact_bloat", FType::GC, FComponent::DOCUMENT_STORE, *targets[6])); + EXPECT_TRUE(assertTarget("subdb.summary.compact_spread", FType::GC, FComponent::DOCUMENT_STORE, *targets[7])); + EXPECT_TRUE(assertTarget("subdb.summary.flush", FType::SYNC, FComponent::DOCUMENT_STORE, *targets[8])); + EXPECT_TRUE(assertTarget("subdb.summary.shrink", FType::GC, FComponent::DOCUMENT_STORE, *targets[9])); } TEST_F("require that only fast-access attributes are instantiated", FastAccessOnlyFixture) diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h index 42976104836..8e1b23eba67 100644 --- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h +++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h @@ -15,6 +15,7 @@ #include <vespa/searchcore/proton/test/clusterstatehandler.h> #include <vespa/searchcore/proton/test/disk_mem_usage_notifier.h> #include <vespa/searchcore/proton/test/test.h> +#include <vespa/searchcore/proton/test/dummy_document_store.h> #include <vespa/vespalib/util/idestructorcallback.h> #include <vespa/searchlib/index/docbuilder.h> diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.cpp index 4e0cf3f9059..06bf8d0a8a6 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.cpp @@ -9,21 +9,26 @@ using search::SerialNum; using vespalib::makeLambdaTask; using searchcorespi::FlushStats; using searchcorespi::IFlushTarget; +using searchcorespi::FlushTask; namespace proton { namespace { -class Compacter : public searchcorespi::FlushTask { +class Compacter : public FlushTask { private: IDocumentStore & _docStore; FlushStats & _stats; SerialNum _currSerial; + virtual void compact(IDocumentStore & docStore, SerialNum currSerial) const = 0; public: - Compacter(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) : - _docStore(docStore), _stats(stats), _currSerial(currSerial) {} + Compacter(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) + : _docStore(docStore), + _stats(stats), + _currSerial(currSerial) + {} void run() override { - _docStore.compact(_currSerial); + compact(_docStore, _currSerial); updateStats(); } void updateStats() { @@ -36,10 +41,32 @@ public: } }; +class CompactBloat : public Compacter { +public: + CompactBloat(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) + : Compacter(docStore, stats, currSerial) + {} +private: + void compact(IDocumentStore & docStore, SerialNum currSerial) const override { + docStore.compactBloat(currSerial); + } +}; + +class CompactSpread : public Compacter { +public: + CompactSpread(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) + : Compacter(docStore, stats, currSerial) + {} +private: + void compact(IDocumentStore & docStore, SerialNum currSerial) const override { + docStore.compactSpread(currSerial); + } +}; + } -SummaryCompactTarget::SummaryCompactTarget(vespalib::Executor & summaryService, IDocumentStore & docStore) - : IFlushTarget("summary.compact", Type::GC, Component::DOCUMENT_STORE), +SummaryGCTarget::SummaryGCTarget(const vespalib::string & name, vespalib::Executor & summaryService, IDocumentStore & docStore) + : IFlushTarget(name, Type::GC, Component::DOCUMENT_STORE), _summaryService(summaryService), _docStore(docStore), _lastStats() @@ -48,37 +75,69 @@ SummaryCompactTarget::SummaryCompactTarget(vespalib::Executor & summaryService, } IFlushTarget::MemoryGain -SummaryCompactTarget::getApproxMemoryGain() const +SummaryGCTarget::getApproxMemoryGain() const { return MemoryGain::noGain(_docStore.memoryUsed()); } IFlushTarget::DiskGain -SummaryCompactTarget::getApproxDiskGain() const +SummaryGCTarget::getApproxDiskGain() const { size_t total(_docStore.getDiskFootprint()); - return DiskGain(total, total - std::min(total, _docStore.getMaxCompactGain())); + return DiskGain(total, total - std::min(total, getBloat(_docStore))); } IFlushTarget::Time -SummaryCompactTarget::getLastFlushTime() const +SummaryGCTarget::getLastFlushTime() const { return vespalib::system_clock::now(); } SerialNum -SummaryCompactTarget::getFlushedSerialNum() const +SummaryGCTarget::getFlushedSerialNum() const { return _docStore.tentativeLastSyncToken(); } IFlushTarget::Task::UP -SummaryCompactTarget::initFlush(SerialNum currentSerial, std::shared_ptr<search::IFlushToken>) +SummaryGCTarget::initFlush(SerialNum currentSerial, std::shared_ptr<search::IFlushToken>) { std::promise<Task::UP> promise; std::future<Task::UP> future = promise.get_future(); - _summaryService.execute(makeLambdaTask([&]() { promise.set_value(std::make_unique<Compacter>(_docStore, _lastStats, currentSerial)); })); + _summaryService.execute(makeLambdaTask([this, &promise,currentSerial]() { + promise.set_value(create(_docStore, _lastStats, currentSerial)); + })); return future.get(); } +SummaryCompactBloatTarget::SummaryCompactBloatTarget(vespalib::Executor & summaryService, IDocumentStore & docStore) + : SummaryGCTarget("summary.compact_bloat", summaryService, docStore) +{ +} + +size_t +SummaryCompactBloatTarget::getBloat(const search::IDocumentStore & docStore) const { + return docStore.getDiskBloat(); +} + +FlushTask::UP +SummaryCompactBloatTarget::create(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) { + return std::make_unique<CompactBloat>(docStore, stats, currSerial); +} + +SummaryCompactSpreadTarget::SummaryCompactSpreadTarget(vespalib::Executor & summaryService, IDocumentStore & docStore) + : SummaryGCTarget("summary.compact_spread", summaryService, docStore) +{ +} + +size_t +SummaryCompactSpreadTarget::getBloat(const search::IDocumentStore & docStore) const { + return docStore.getMaxSpreadAsBloat(); +} + +FlushTask::UP +SummaryCompactSpreadTarget::create(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) { + return std::make_unique<CompactSpread>(docStore, stats, currSerial); +} + } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.h b/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.h index c8035a544f2..529aa3b816e 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.h +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarycompacttarget.h @@ -12,16 +12,10 @@ namespace proton { /** * This class implements the IFlushTarget interface to proxy a summary manager. */ -class SummaryCompactTarget : public searchcorespi::IFlushTarget { -private: - using FlushStats = searchcorespi::FlushStats; - vespalib::Executor &_summaryService; - search::IDocumentStore & _docStore; - FlushStats _lastStats; - +class SummaryGCTarget : public searchcorespi::IFlushTarget { public: - SummaryCompactTarget(vespalib::Executor & summaryService, search::IDocumentStore & docStore); - + using FlushStats = searchcorespi::FlushStats; + using IDocumentStore = search::IDocumentStore; MemoryGain getApproxMemoryGain() const override; DiskGain getApproxDiskGain() const override; SerialNum getFlushedSerialNum() const override; @@ -31,6 +25,32 @@ public: FlushStats getLastFlushStats() const override { return _lastStats; } uint64_t getApproxBytesToWriteToDisk() const override { return 0; } +protected: + SummaryGCTarget(const vespalib::string &, vespalib::Executor & summaryService, IDocumentStore & docStore); +private: + + virtual size_t getBloat(const IDocumentStore & docStore) const = 0; + virtual Task::UP create(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) = 0; + + vespalib::Executor &_summaryService; + IDocumentStore & _docStore; + FlushStats _lastStats; +}; + +class SummaryCompactBloatTarget : public SummaryGCTarget { +private: + size_t getBloat(const search::IDocumentStore & docStore) const override; + Task::UP create(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) override; +public: + SummaryCompactBloatTarget(vespalib::Executor & summaryService, IDocumentStore & docStore); +}; + +class SummaryCompactSpreadTarget : public SummaryGCTarget { +private: + size_t getBloat(const search::IDocumentStore & docStore) const override; + Task::UP create(IDocumentStore & docStore, FlushStats & stats, SerialNum currSerial) override; +public: + SummaryCompactSpreadTarget(vespalib::Executor & summaryService, IDocumentStore & docStore); }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp index eaf5a907808..28a91e1444d 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp @@ -200,7 +200,8 @@ SummaryManager::getFlushTargets(vespalib::Executor & summaryService) IFlushTarget::List ret; ret.push_back(std::make_shared<SummaryFlushTarget>(getBackingStore(), summaryService)); if (dynamic_cast<LogDocumentStore *>(_docStore.get()) != nullptr) { - ret.push_back(std::make_shared<SummaryCompactTarget>(summaryService, getBackingStore())); + ret.push_back(std::make_shared<SummaryCompactBloatTarget>(summaryService, getBackingStore())); + ret.push_back(std::make_shared<SummaryCompactSpreadTarget>(summaryService, getBackingStore())); } ret.push_back(createShrinkLidSpaceFlushTarget(summaryService, _docStore)); return ret; diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp index a9873a80d0e..a77ff93c002 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp @@ -197,7 +197,7 @@ getStoreConfig(const ProtonConfig::Summary::Cache & cache, const HwInfo & hwInfo } LogDocumentStore::Config -deriveConfig(const ProtonConfig::Summary & summary, const ProtonConfig::Flush::Memory & flush, const HwInfo & hwInfo) { +deriveConfig(const ProtonConfig::Summary & summary, const HwInfo & hwInfo) { DocumentStore::Config config(getStoreConfig(summary.cache, hwInfo)); const ProtonConfig::Summary::Log & log(summary.log); const ProtonConfig::Summary::Log::Chunk & chunk(log.chunk); @@ -205,7 +205,6 @@ deriveConfig(const ProtonConfig::Summary & summary, const ProtonConfig::Flush::M LogDataStore::Config logConfig; logConfig.setMaxFileSize(log.maxfilesize) .setMaxNumLids(log.maxnumlids) - .setMaxDiskBloatFactor(std::min(flush.diskbloatfactor, flush.each.diskbloatfactor)) .setMaxBucketSpread(log.maxbucketspread).setMinFileSizeFactor(log.minfilesizefactor) .compactCompression(deriveCompression(log.compact.compression)) .setFileConfig(fileConfig).disableCrcOnRead(chunk.skipcrconread); @@ -213,7 +212,7 @@ deriveConfig(const ProtonConfig::Summary & summary, const ProtonConfig::Flush::M } search::LogDocumentStore::Config buildStoreConfig(const ProtonConfig & proton, const HwInfo & hwInfo) { - return deriveConfig(proton.summary, proton.flush.memory, hwInfo); + return deriveConfig(proton.summary, hwInfo); } using AttributesConfigSP = DocumentDBConfig::AttributesConfigSP; diff --git a/searchcore/src/vespa/searchcore/proton/test/dummy_document_store.h b/searchcore/src/vespa/searchcore/proton/test/dummy_document_store.h index d9b83bfc3a8..7194cc4d403 100644 --- a/searchcore/src/vespa/searchcore/proton/test/dummy_document_store.h +++ b/searchcore/src/vespa/searchcore/proton/test/dummy_document_store.h @@ -10,13 +10,11 @@ struct DummyDocumentStore : public search::IDocumentStore { vespalib::string _baseDir; - DummyDocumentStore() - : _baseDir("") - {} + DummyDocumentStore() = default; DummyDocumentStore(const vespalib::string &baseDir) : _baseDir(baseDir) {} - ~DummyDocumentStore() {} + ~DummyDocumentStore() = default; DocumentUP read(search::DocumentIdT, const document::DocumentTypeRepo &) const override { return DocumentUP(); } @@ -25,7 +23,8 @@ struct DummyDocumentStore : public search::IDocumentStore void remove(uint64_t, search::DocumentIdT) override {} void flush(uint64_t) override {} uint64_t initFlush(uint64_t) override { return 0; } - void compact(uint64_t) override {} + void compactBloat(uint64_t) override {} + void compactSpread(uint64_t) override {} uint64_t lastSyncToken() const override { return 0; } uint64_t tentativeLastSyncToken() const override { return 0; } vespalib::system_time getLastFlushTime() const override { return vespalib::system_time(); } @@ -34,7 +33,7 @@ struct DummyDocumentStore : public search::IDocumentStore size_t memoryMeta() const override { return 0; } size_t getDiskFootprint() const override { return 0; } size_t getDiskBloat() const override { return 0; } - size_t getMaxCompactGain() const override { return getDiskBloat(); } + size_t getMaxSpreadAsBloat() const override { return getDiskBloat(); } search::CacheStats getCacheStats() const override { return search::CacheStats(); } const vespalib::string &getBaseDir() const override { return _baseDir; } void accept(search::IDocumentStoreReadVisitor &, diff --git a/searchcore/src/vespa/searchcore/proton/test/test.h b/searchcore/src/vespa/searchcore/proton/test/test.h index 1494823e899..4231d5e7717 100644 --- a/searchcore/src/vespa/searchcore/proton/test/test.h +++ b/searchcore/src/vespa/searchcore/proton/test/test.h @@ -5,7 +5,6 @@ #include "bucketdocuments.h" #include "bucketstatecalculator.h" #include "document.h" -#include "dummy_document_store.h" #include "dummy_feed_view.h" #include "dummy_summary_manager.h" #include "resulthandler.h" diff --git a/searchlib/src/tests/docstore/document_store/document_store_test.cpp b/searchlib/src/tests/docstore/document_store/document_store_test.cpp index 1a6b0a5a1c6..f2bec30a349 100644 --- a/searchlib/src/tests/docstore/document_store/document_store_test.cpp +++ b/searchlib/src/tests/docstore/document_store/document_store_test.cpp @@ -25,7 +25,7 @@ struct NullDataStore : IDataStore { size_t memoryMeta() const override { return 0; } size_t getDiskFootprint() const override { return 0; } size_t getDiskBloat() const override { return 0; } - size_t getMaxCompactGain() const override { return 0; } + size_t getMaxSpreadAsBloat() const override { return 0; } uint64_t lastSyncToken() const override { return 0; } uint64_t tentativeLastSyncToken() const override { return 0; } vespalib::system_time getLastFlushTime() const override { return vespalib::system_time(); } diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp index 07652dfd336..378babb6ee1 100644 --- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp +++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp @@ -236,7 +236,7 @@ void verifyGrowing(const LogDataStore::Config & config, uint32_t minFiles, uint3 datastore.remove(i + 20000, i); } datastore.flush(datastore.initFlush(lastSyncToken)); - datastore.compact(30000); + datastore.compactBloat(30000); datastore.remove(31000, 0); checkStats(datastore, 31000, 30000); EXPECT_LESS_EQUAL(minFiles, datastore.getAllActiveFiles().size()); @@ -252,7 +252,7 @@ void verifyGrowing(const LogDataStore::Config & config, uint32_t minFiles, uint3 } TEST("testGrowingChunkedBySize") { LogDataStore::Config config; - config.setMaxFileSize(100000).setMaxDiskBloatFactor(0.1).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2) + config.setMaxFileSize(100000).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2) .compactCompression({CompressionConfig::LZ4}) .setFileConfig({{CompressionConfig::LZ4, 9, 60}, 1000}); verifyGrowing(config, 40, 120); @@ -260,7 +260,7 @@ TEST("testGrowingChunkedBySize") { TEST("testGrowingChunkedByNumLids") { LogDataStore::Config config; - config.setMaxNumLids(1000).setMaxDiskBloatFactor(0.1).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2) + config.setMaxNumLids(1000).setMaxBucketSpread(3.0).setMinFileSizeFactor(0.2) .compactCompression({CompressionConfig::LZ4}) .setFileConfig({{CompressionConfig::LZ4, 9, 60}, 1000}); verifyGrowing(config,10, 10); @@ -679,7 +679,7 @@ TEST("testWriteRead") { EXPECT_LESS(0u, headerFootprint); EXPECT_EQUAL(datastore.getDiskFootprint(), headerFootprint); EXPECT_EQUAL(datastore.getDiskBloat(), 0ul); - EXPECT_EQUAL(datastore.getMaxCompactGain(), 0ul); + EXPECT_EQUAL(datastore.getMaxSpreadAsBloat(), 0ul); datastore.write(1, 0, a[0].c_str(), a[0].size()); fetchAndTest(datastore, 0, a[0].c_str(), a[0].size()); datastore.write(2, 0, a[1].c_str(), a[1].size()); @@ -701,7 +701,7 @@ TEST("testWriteRead") { EXPECT_EQUAL(datastore.getDiskFootprint(), 2711ul + headerFootprint); EXPECT_EQUAL(datastore.getDiskBloat(), 0ul); - EXPECT_EQUAL(datastore.getMaxCompactGain(), 0ul); + EXPECT_EQUAL(datastore.getMaxSpreadAsBloat(), 0ul); datastore.flush(datastore.initFlush(lastSyncToken)); } { @@ -715,7 +715,7 @@ TEST("testWriteRead") { EXPECT_LESS(0u, headerFootprint); EXPECT_EQUAL(4944ul + headerFootprint, datastore.getDiskFootprint()); EXPECT_EQUAL(0ul, datastore.getDiskBloat()); - EXPECT_EQUAL(0ul, datastore.getMaxCompactGain()); + EXPECT_EQUAL(0ul, datastore.getMaxSpreadAsBloat()); for(size_t i=0; i < 100; i++) { fetchAndTest(datastore, i, a[i%2].c_str(), a[i%2].size()); @@ -730,7 +730,7 @@ TEST("testWriteRead") { EXPECT_EQUAL(7594ul + headerFootprint, datastore.getDiskFootprint()); EXPECT_EQUAL(0ul, datastore.getDiskBloat()); - EXPECT_EQUAL(0ul, datastore.getMaxCompactGain()); + EXPECT_EQUAL(0ul, datastore.getMaxSpreadAsBloat()); } FastOS_File::EmptyAndRemoveDirectory("empty"); } @@ -1050,7 +1050,6 @@ TEST("require that config equality operator detects inequality") { using C = LogDataStore::Config; EXPECT_TRUE(C() == C()); EXPECT_FALSE(C() == C().setMaxFileSize(1)); - EXPECT_FALSE(C() == C().setMaxDiskBloatFactor(0.3)); EXPECT_FALSE(C() == C().setMaxBucketSpread(0.3)); EXPECT_FALSE(C() == C().setMinFileSizeFactor(0.3)); EXPECT_FALSE(C() == C().setFileConfig(WriteableFileChunk::Config({}, 70))); diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp index 7aaee7180df..b4ff050c0f6 100644 --- a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp @@ -112,7 +112,6 @@ public: } -using VisitCache = docstore::VisitCache; using docstore::Value; bool @@ -239,7 +238,14 @@ DocumentStore::remove(uint64_t syncToken, DocumentIdT lid) } void -DocumentStore::compact(uint64_t syncToken) +DocumentStore::compactBloat(uint64_t syncToken) +{ + (void) syncToken; + // Most implementations does not offer compact. +} + +void +DocumentStore::compactSpread(uint64_t syncToken) { (void) syncToken; // Most implementations does not offer compact. diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.h b/searchlib/src/vespa/searchlib/docstore/documentstore.h index b6021d34bef..6402c16cd5e 100644 --- a/searchlib/src/vespa/searchlib/docstore/documentstore.h +++ b/searchlib/src/vespa/searchlib/docstore/documentstore.h @@ -72,7 +72,8 @@ public: void remove(uint64_t syncToken, DocumentIdT lid) override; void flush(uint64_t syncToken) override; uint64_t initFlush(uint64_t synctoken) override; - void compact(uint64_t syncToken) override; + void compactBloat(uint64_t syncToken) override; + void compactSpread(uint64_t syncToken) override; uint64_t lastSyncToken() const override; uint64_t tentativeLastSyncToken() const override; vespalib::system_time getLastFlushTime() const override; @@ -80,7 +81,7 @@ public: size_t memoryUsed() const override { return _backingStore.memoryUsed(); } size_t getDiskFootprint() const override { return _backingStore.getDiskFootprint(); } size_t getDiskBloat() const override { return _backingStore.getDiskBloat(); } - size_t getMaxCompactGain() const override { return _backingStore.getMaxCompactGain(); } + size_t getMaxSpreadAsBloat() const override { return _backingStore.getMaxSpreadAsBloat(); } CacheStats getCacheStats() const override; size_t memoryMeta() const override { return _backingStore.memoryMeta(); } const vespalib::string & getBaseDir() const override { return _backingStore.getBaseDir(); } diff --git a/searchlib/src/vespa/searchlib/docstore/idatastore.h b/searchlib/src/vespa/searchlib/docstore/idatastore.h index 82656ad7e69..fc0eae1d15e 100644 --- a/searchlib/src/vespa/searchlib/docstore/idatastore.h +++ b/searchlib/src/vespa/searchlib/docstore/idatastore.h @@ -121,7 +121,7 @@ public: * to avoid misuse we let the report a more conservative number here if necessary. * @return diskspace to be gained. */ - virtual size_t getMaxCompactGain() const = 0; + virtual size_t getMaxSpreadAsBloat() const = 0; /** diff --git a/searchlib/src/vespa/searchlib/docstore/idocumentstore.h b/searchlib/src/vespa/searchlib/docstore/idocumentstore.h index 0e73e4d7993..d84a5ad7e7e 100644 --- a/searchlib/src/vespa/searchlib/docstore/idocumentstore.h +++ b/searchlib/src/vespa/searchlib/docstore/idocumentstore.h @@ -100,7 +100,8 @@ public: /** * If possible compact the disk. **/ - virtual void compact(uint64_t syncToken) = 0; + virtual void compactBloat(uint64_t syncToken) = 0; + virtual void compactSpread(uint64_t syncToken) = 0; /** * The sync token used for the last successful flush() operation, @@ -153,12 +154,11 @@ public: virtual size_t getDiskBloat() const = 0; /** - * Calculates how much diskspace can be compacted during a flush. - * default is to return th ebloat limit, but as some targets have some internal limits - * to avoid misuse we let the report a more conservative number here if necessary. - * @return diskspace to be gained. + * Calculates the gain from keeping buckets close. It is converted to diskbloat + * so it can be prioritized accordingly. + * @return spread as disk bloat. */ - virtual size_t getMaxCompactGain() const = 0; + virtual size_t getMaxSpreadAsBloat() const = 0; /** * Returns statistics about the cache. diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp index fd25dd56235..e98cfc810d3 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp @@ -36,7 +36,6 @@ using namespace std::literals; LogDataStore::Config::Config() : _maxFileSize(DEFAULT_MAX_FILESIZE), - _maxDiskBloatFactor(0.2), _maxBucketSpread(2.5), _minFileSizeFactor(0.2), _maxNumLids(DEFAULT_MAX_LIDS_PER_FILE), @@ -48,7 +47,6 @@ LogDataStore::Config::Config() bool LogDataStore::Config::operator == (const Config & rhs) const { return (_maxBucketSpread == rhs._maxBucketSpread) && - (_maxDiskBloatFactor == rhs._maxDiskBloatFactor) && (_maxFileSize == rhs._maxFileSize) && (_minFileSizeFactor == rhs._minFileSizeFactor) && (_skipCrcOnRead == rhs._skipCrcOnRead) && @@ -294,46 +292,14 @@ vespalib::string bloatMsg(size_t bloat, size_t usage) { } -void -LogDataStore::compact(uint64_t syncToken) -{ - uint64_t usage = getDiskFootprint(); - uint64_t bloat = getDiskBloat(); - LOG(debug, "%s", bloatMsg(bloat, usage).c_str()); - const bool doCompact = (_fileChunks.size() > 1); - if (doCompact) { - LOG(info, "%s. Will compact", bloatMsg(bloat, usage).c_str()); - compactWorst(_config.getMaxDiskBloatFactor(), _config.getMaxBucketSpread(), isTotalDiskBloatExceeded(usage, bloat)); - } - flushActiveAndWait(syncToken); - if (doCompact) { - usage = getDiskFootprint(); - bloat = getDiskBloat(); - LOG(info, "Done compacting. %s", bloatMsg(bloat, usage).c_str()); - } -} - -bool -LogDataStore::isTotalDiskBloatExceeded(size_t diskFootPrint, size_t bloat) const { - const size_t maxConfiguredDiskBloat = diskFootPrint * _config.getMaxDiskBloatFactor(); - return bloat > maxConfiguredDiskBloat; -} - size_t -LogDataStore::getMaxCompactGain() const +LogDataStore::getMaxSpreadAsBloat() const { - size_t bloat = getDiskBloat(); const size_t diskFootPrint = getDiskFootprint(); - if ( ! isTotalDiskBloatExceeded(diskFootPrint, bloat) ) { - bloat = 0; - } - const double maxSpread = getMaxBucketSpread(); - size_t spreadAsBloat = diskFootPrint * (1.0 - 1.0/maxSpread); - if ( maxSpread < _config.getMaxBucketSpread()) { - spreadAsBloat = 0; - } - return (bloat + spreadAsBloat); + return (maxSpread > _config.getMaxBucketSpread()) + ? diskFootPrint * (1.0 - 1.0/maxSpread) + : 0; } void @@ -380,40 +346,34 @@ LogDataStore::getMaxBucketSpread() const } std::pair<bool, LogDataStore::FileId> -LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat) +LogDataStore::findNextToCompact(bool dueToBloat) { typedef std::multimap<double, FileId, std::greater<double>> CostMap; - CostMap worstBloat; - CostMap worstSpread; + CostMap worst; MonitorGuard guard(_updateLock); for (size_t i(0); i < _fileChunks.size(); i++) { const auto & fc(_fileChunks[i]); if (fc && fc->frozen() && (_currentlyCompacting.find(fc->getNameId()) == _currentlyCompacting.end())) { uint64_t usage = fc->getDiskFootprint(); - uint64_t bloat = fc->getDiskBloat(); - if (_bucketizer) { - worstSpread.emplace(fc->getBucketSpread(), FileId(i)); - } - if (usage > 0) { - double tmp(double(bloat)/usage); - worstBloat.emplace(tmp, FileId(i)); + if ( ! dueToBloat && _bucketizer) { + worst.emplace(fc->getBucketSpread(), FileId(i)); + } else if (dueToBloat && usage > 0) { + double tmp(double(fc->getDiskBloat())/usage); + worst.emplace(tmp, FileId(i)); } } } if (LOG_WOULD_LOG(debug)) { - for (const auto & it : worstBloat) { + for (const auto & it : worst) { const FileChunk & fc = *_fileChunks[it.second.getId()]; LOG(debug, "File '%s' has bloat '%2.2f' and bucket-spread '%1.4f numChunks=%d , numBuckets=%ld, numUniqueBuckets=%ld", fc.getName().c_str(), it.first * 100, fc.getBucketSpread(), fc.getNumChunks(), fc.getNumBuckets(), fc.getNumUniqueBuckets()); } } std::pair<bool, FileId> retval(false, FileId(-1)); - if ( ! worstBloat.empty() && (worstBloat.begin()->first > bloatLimit) && prioritizeDiskBloat) { - retval.first = true; - retval.second = worstBloat.begin()->second; - } else if ( ! worstSpread.empty() && (worstSpread.begin()->first > spreadLimit)) { + if ( ! worst.empty()) { retval.first = true; - retval.second = worstSpread.begin()->second; + retval.second = worst.begin()->second; } if (retval.first) { _currentlyCompacting.insert(_fileChunks[retval.second.getId()]->getNameId()); @@ -422,10 +382,24 @@ LogDataStore::findNextToCompact(double bloatLimit, double spreadLimit, bool prio } void -LogDataStore::compactWorst(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat) { - auto worst = findNextToCompact(bloatLimit, spreadLimit, prioritizeDiskBloat); - if (worst.first) { - compactFile(worst.second); +LogDataStore::compactWorst(uint64_t syncToken, bool compactDiskBloat) { + uint64_t usage = getDiskFootprint(); + uint64_t bloat = getDiskBloat(); + const char * reason = compactDiskBloat ? "bloat" : "spread"; + LOG(debug, "%s", bloatMsg(bloat, usage).c_str()); + const bool doCompact = (_fileChunks.size() > 1); + if (doCompact) { + LOG(info, "%s. Will compact due to %s", reason, bloatMsg(bloat, usage).c_str()); + auto worst = findNextToCompact(compactDiskBloat); + if (worst.first) { + compactFile(worst.second); + } + flushActiveAndWait(syncToken); + usage = getDiskFootprint(); + bloat = getDiskBloat(); + LOG(info, "Done compacting due to %s. %s", reason, bloatMsg(bloat, usage).c_str()); + } else { + flushActiveAndWait(syncToken); } } diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h index f43dc96fac9..62f87076759 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h @@ -41,7 +41,6 @@ public: Config & setMaxFileSize(size_t v) { _maxFileSize = v; return *this; } Config & setMaxNumLids(size_t v) { _maxNumLids = v; return *this; } - Config & setMaxDiskBloatFactor(double v) { _maxDiskBloatFactor = v; return *this; } Config & setMaxBucketSpread(double v) { _maxBucketSpread = v; return *this; } Config & setMinFileSizeFactor(double v) { _minFileSizeFactor = v; return *this; } @@ -49,7 +48,6 @@ public: Config & setFileConfig(WriteableFileChunk::Config v) { _fileConfig = v; return *this; } size_t getMaxFileSize() const { return _maxFileSize; } - double getMaxDiskBloatFactor() const { return _maxDiskBloatFactor; } double getMaxBucketSpread() const { return _maxBucketSpread; } double getMinFileSizeFactor() const { return _minFileSizeFactor; } uint32_t getMaxNumLids() const { return _maxNumLids; } @@ -63,7 +61,6 @@ public: bool operator == (const Config &) const; private: size_t _maxFileSize; - double _maxDiskBloatFactor; double _maxBucketSpread; double _minFileSizeFactor; uint32_t _maxNumLids; @@ -109,9 +106,10 @@ public: size_t getDiskFootprint() const override; size_t getDiskHeaderFootprint() const override; size_t getDiskBloat() const override; - size_t getMaxCompactGain() const override; + size_t getMaxSpreadAsBloat() const override; - void compact(uint64_t syncToken); + void compactBloat(uint64_t syncToken) { compactWorst(syncToken, true); } + void compactSpread(uint64_t syncToken) { compactWorst(syncToken, false);} const Config & getConfig() const { return _config; } Config & getConfig() { return _config; } @@ -180,10 +178,9 @@ private: class WrapVisitorProgress; class FileChunkHolder; - // Implements ISetLid API void setLid(const ISetLid::unique_lock & guard, uint32_t lid, const LidInfo & lm) override; - void compactWorst(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat); + void compactWorst(uint64_t syncToken, bool compactDiskBloat); void compactFile(FileId chunkId); typedef vespalib::RcuVector<uint64_t> LidInfoVector; @@ -199,8 +196,6 @@ private: NameIdSet eraseIncompleteCompactedFiles(NameIdSet partList); void internalFlushAll(); - bool isTotalDiskBloatExceeded(size_t diskFootPrint, size_t bloat) const; - NameIdSet scanDir(const vespalib::string &dir, const vespalib::string &suffix); FileId allocateFileId(const MonitorGuard & guard); void setNewFileChunk(const MonitorGuard & guard, FileChunk::UP fileChunk); @@ -245,7 +240,7 @@ private: return (_fileChunks.empty() ? 0 : _fileChunks.back()->getLastPersistedSerialNum()); } bool shouldCompactToActiveFile(size_t compactedSize) const; - std::pair<bool, FileId> findNextToCompact(double bloatLimit, double spreadLimit, bool prioritizeDiskBloat); + std::pair<bool, FileId> findNextToCompact(bool compactDiskBloat); void incGeneration(); bool canShrinkLidSpace(const MonitorGuard &guard) const; diff --git a/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h index de36155bedb..2931f8bce2d 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h +++ b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h @@ -51,7 +51,8 @@ public: ~LogDocumentStore() override; void reconfigure(const Config & config); private: - void compact(uint64_t syncToken) override { _backingStore.compact(syncToken); } + void compactBloat(uint64_t syncToken) override { _backingStore.compactBloat(syncToken); } + void compactSpread(uint64_t syncToken) override { _backingStore.compactSpread(syncToken); } LogDataStore _backingStore; }; |