diff options
author | Geir Storli <geirst@oath.com> | 2017-11-09 14:08:22 +0000 |
---|---|---|
committer | Geir Storli <geirst@oath.com> | 2017-11-09 14:08:22 +0000 |
commit | 7fd45bb656a845b66be56827627e44d45afd4567 (patch) | |
tree | 23b11297fcace27ccfd57c9586638af09e6b38eb /searchcore | |
parent | 4632be4f92f3d06f805a8818f7a66f6402172045 (diff) |
Incorporate replay cost of a single operation in prepare restart flush strategy.
Diffstat (limited to 'searchcore')
7 files changed, 72 insertions, 30 deletions
diff --git a/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp b/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp index 428cbf60996..6bc7e1b5556 100644 --- a/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp +++ b/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp @@ -15,7 +15,7 @@ using SimpleFlushHandler = test::DummyFlushHandler; using FlushCandidatesList = std::vector<FlushTargetCandidates>; using Config = PrepareRestartFlushStrategy::Config; -const Config DEFAULT_CFG(2.0, 4.0); +const Config DEFAULT_CFG(2.0, 0.0, 4.0); struct SimpleFlushTarget : public test::DummyFlushTarget { @@ -107,7 +107,7 @@ public: : _sortedFlushContexts(&sortedFlushContexts), _numCandidates(sortedFlushContexts.size()), _tlsStats(1000, 11, 110), - _cfg(DEFAULT_CFG) + _cfg(2.0, 3.0, 4.0) {} CandidatesBuilder &flushContexts(const FlushContext::List &sortedFlushContexts) { _sortedFlushContexts = &sortedFlushContexts; @@ -140,28 +140,35 @@ struct CandidatesFixture CandidatesFixture() : emptyContexts(), builder(emptyContexts) {} }; +void +assertCosts(double tlsReplayBytesCost, double tlsReplayOperationsCost, double flushTargetsWriteCost, const FlushTargetCandidates &candidates) +{ + EXPECT_EQUAL(tlsReplayBytesCost, candidates.getTlsReplayCost().bytesCost); + EXPECT_EQUAL(tlsReplayOperationsCost, candidates.getTlsReplayCost().operationsCost); + EXPECT_EQUAL(flushTargetsWriteCost, candidates.getFlushTargetsWriteCost()); + EXPECT_EQUAL(tlsReplayBytesCost + tlsReplayOperationsCost + flushTargetsWriteCost, candidates.getTotalCost()); +} + TEST_F("require that tls replay cost is correct for 100% replay", CandidatesFixture) { - EXPECT_EQUAL(2000, f.builder.replayEnd(110).build().getTlsReplayCost()); + TEST_DO(assertCosts(1000 * 2, 100 * 3, 0, f.builder.replayEnd(110).build())); } TEST_F("require that tls replay cost is correct for 75% replay", CandidatesFixture) { FlushContext::List contexts = ContextsBuilder().add("target1", 10, 0).add("target2", 35, 0).build(); - EXPECT_EQUAL(1500, f.builder.flushContexts(contexts).numCandidates(1).replayEnd(110). - build().getTlsReplayCost()); + TEST_DO(assertCosts(750 * 2, 75 * 3, 0, f.builder.flushContexts(contexts).numCandidates(1).replayEnd(110).build())); } TEST_F("require that tls replay cost is correct for 25% replay", CandidatesFixture) { FlushContext::List contexts = ContextsBuilder().add("target1", 10, 0).add("target2", 85, 0).build(); - EXPECT_EQUAL(500, f.builder.flushContexts(contexts).numCandidates(1).replayEnd(110). - build().getTlsReplayCost()); + TEST_DO(assertCosts(250 * 2, 25 * 3, 0, f.builder.flushContexts(contexts).numCandidates(1).replayEnd(110).build())); } TEST_F("require that tls replay cost is correct for zero operations to replay", CandidatesFixture) { - EXPECT_EQUAL(0, f.builder.replayEnd(10).build().getTlsReplayCost()); + TEST_DO(assertCosts(0, 0, 0, f.builder.replayEnd(10).build())); } TEST_F("require that flush cost is correct for zero flush targets", CandidatesFixture) @@ -172,7 +179,7 @@ TEST_F("require that flush cost is correct for zero flush targets", CandidatesFi TEST_F("require that flush cost is sum of flush targets", CandidatesFixture) { FlushContext::List contexts = ContextsBuilder().add("target1", 20, 1000).add("target2", 30, 2000).build(); - EXPECT_EQUAL(12000, f.builder.flushContexts(contexts).build().getFlushTargetsWriteCost()); + TEST_DO(assertCosts(0, 0, 1000 * 4 + 2000 * 4, f.builder.flushContexts(contexts).build())); } @@ -227,7 +234,7 @@ assertFlushContexts(const vespalib::string &expected, const FlushContext::List & * - handler1: serial numbers 10 -> 110, 1000 bytes * - handler2: serial numbers 10 -> 110, 2000 bytes * - * The cost config is: tlsReplayCost=2.0, flushTargetsWriteCost=4.0. + * The cost config is: tlsReplayByteCost=2.0, tlsReplayOperationCost=0.0, flushTargetsWriteCost=4.0. * The cost of replaying the complete TLS is then: * - handler1: 1000*2.0 = 2000 * - handler2: 2000*2.0 = 4000 diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def index eab889ea28c..c6c810ae72f 100644 --- a/searchcore/src/vespa/searchcore/config/proton.def +++ b/searchcore/src/vespa/searchcore/config/proton.def @@ -82,17 +82,31 @@ flush.memory.conservative.disklimitfactor double default=0.5 ## watermark indicating when to go back from conservative to normal mode for the flush strategy. flush.memory.conservative.lowwatermarkfactor double default=0.9 -## The cost of doing replay when replaying the transaction log. +## The cost of replaying a byte when replaying the transaction log. ## -## The number of bytes to replay * replaycost gives an estimate of the -## total cost of replaying the transaction log. +## The estimate of the total cost of replaying the transaction log: +## (number of bytes to replay) * replaycost + (number of operations to replay) * replayoperationcost ## ## The prepare for restart flush strategy will choose a set of components to flush ## such that the cost of flushing these + the cost of replaying the transaction log ## is as low as possible. -flush.preparerestart.replaycost double default=4.0 +flush.preparerestart.replaycost double default=2.0 -## The cost of doing writes when flushing components to disk. +## The cost of replaying an operation when replaying the transaction log. +## +## The estimate of the total cost of replaying the transaction log: +## (number of bytes to replay) * replaycost + (number of operations to replay) * replayoperationcost +## +## The default value is chosen based on the following example: +## Assume we can replay 9 MB/s and this corresponds to 24000 ops/s. +## replayoperationcost = (bytes to replay) * replaycost / (operations to replay) = 9 MB * 2.0 / 24000 = 750 +## +## The prepare for restart flush strategy will choose a set of components to flush +## such that the cost of flushing these + the cost of replaying the transaction log +## is as low as possible. +flush.preparerestart.replayoperationcost double default=750.0 + +## The cost of writing a byte when flushing components to disk. ## ## The number of bytes to write (for a set of flushed components) * writecost ## gives an estimate of the total cost of flushing this set of components. diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp index da54b909759..0051c209ef9 100644 --- a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp +++ b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp @@ -8,6 +8,7 @@ namespace proton { using search::SerialNum; using Config = PrepareRestartFlushStrategy::Config; +using TlsReplayCost = FlushTargetCandidates::TlsReplayCost; namespace { @@ -25,7 +26,7 @@ calculateReplayStartSerial(const FlushContext::List &sortedFlushContexts, return sortedFlushContexts[numCandidates]->getTarget()->getFlushedSerialNum() + 1; } -double +TlsReplayCost calculateTlsReplayCost(const flushengine::TlsStats &tlsStats, const Config &cfg, SerialNum replayStartSerial) @@ -33,13 +34,13 @@ calculateTlsReplayCost(const flushengine::TlsStats &tlsStats, SerialNum replayEndSerial = tlsStats.getLastSerial(); SerialNum numTotalOperations = replayEndSerial - tlsStats.getFirstSerial() + 1; if (numTotalOperations == 0) { - return 0; + return TlsReplayCost(0.0, 0.0); } double numBytesPerOperation = (double)tlsStats.getNumBytes() / (double)numTotalOperations; SerialNum numOperationsToReplay = replayEndSerial + 1 - replayStartSerial; double numBytesToReplay = numBytesPerOperation * numOperationsToReplay; - return numBytesToReplay * cfg.tlsReplayCost; + return TlsReplayCost((numBytesToReplay * cfg.tlsReplayByteCost), (numOperationsToReplay * cfg.tlsReplayOperationCost)); } double diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h index 5498d8c46a8..ea09989de31 100644 --- a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h +++ b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h @@ -16,10 +16,20 @@ namespace flushengine { class TlsStats; } */ class FlushTargetCandidates { +public: + struct TlsReplayCost { + double bytesCost; + double operationsCost; + TlsReplayCost(double bytesCost_, double operationsCost_) + : bytesCost(bytesCost_), + operationsCost(operationsCost_) + {} + double totalCost() const { return bytesCost + operationsCost; } + }; private: const FlushContext::List *_sortedFlushContexts; // NOTE: ownership is handled outside size_t _numCandidates; - double _tlsReplayCost; + TlsReplayCost _tlsReplayCost; double _flushTargetsWriteCost; using Config = PrepareRestartFlushStrategy::Config; @@ -32,9 +42,9 @@ public: const flushengine::TlsStats &tlsStats, const Config &cfg); - double getTlsReplayCost() const { return _tlsReplayCost; } + TlsReplayCost getTlsReplayCost() const { return _tlsReplayCost; } double getFlushTargetsWriteCost() const { return _flushTargetsWriteCost; } - double getTotalCost() const { return getTlsReplayCost() + getFlushTargetsWriteCost(); } + double getTotalCost() const { return getTlsReplayCost().totalCost() + getFlushTargetsWriteCost(); } FlushContext::List getCandidates() const; }; diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp b/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp index e9df78dbf4f..6cfb8cb6c3d 100644 --- a/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp +++ b/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp @@ -18,9 +18,11 @@ using Config = PrepareRestartFlushStrategy::Config; using FlushContextsMap = std::map<vespalib::string, FlushContext::List>; using FlushTargetCandidatesList = std::vector<FlushTargetCandidates::UP>; -PrepareRestartFlushStrategy::Config::Config(double tlsReplayCost_, +PrepareRestartFlushStrategy::Config::Config(double tlsReplayByteCost_, + double tlsReplayOperationCost_, double flushTargetWriteCost_) - : tlsReplayCost(tlsReplayCost_), + : tlsReplayByteCost(tlsReplayByteCost_), + tlsReplayOperationCost(tlsReplayOperationCost_), flushTargetWriteCost(flushTargetWriteCost_) { } @@ -108,18 +110,22 @@ findBestTargetsToFlush(const FlushContext::List &unsortedFlushContexts, for (size_t numCandidates = 1; numCandidates <= sortedFlushContexts.size(); ++numCandidates) { FlushTargetCandidates nextSet(sortedFlushContexts, numCandidates, tlsStats, cfg); LOG(debug, "findBestTargetsToFlush(): Created candidate set: " - "flushTargets=[%s], tlsReplayCost=%f, flushTargetsWriteCost=%f, totalCost=%f", + "flushTargets=[%s], tlsReplayBytesCost=%f, tlsReplayOperationsCost=%f, flushTargetsWriteCost=%f, totalCost=%f", toString(nextSet.getCandidates()).c_str(), - nextSet.getTlsReplayCost(), nextSet.getFlushTargetsWriteCost(), + nextSet.getTlsReplayCost().bytesCost, + nextSet.getTlsReplayCost().operationsCost, + nextSet.getFlushTargetsWriteCost(), nextSet.getTotalCost()); if (nextSet.getTotalCost() < bestSet.getTotalCost()) { bestSet = nextSet; } } LOG(info, "findBestTargetsToFlush(): Best candidate set: " - "flushTargets=[%s], tlsReplayCost=%f, flushTargetsWriteCost=%f, totalCost=%f", + "flushTargets=[%s], tlsReplayBytesCost=%f, tlsReplayOperationsCost=%f, flushTargetsWriteCost=%f, totalCost=%f", toString(bestSet.getCandidates()).c_str(), - bestSet.getTlsReplayCost(), bestSet.getFlushTargetsWriteCost(), + bestSet.getTlsReplayCost().bytesCost, + bestSet.getTlsReplayCost().operationsCost, + bestSet.getFlushTargetsWriteCost(), bestSet.getTotalCost()); return bestSet.getCandidates(); } diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.h b/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.h index 19a5cf45670..df5c5a9c569 100644 --- a/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.h +++ b/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.h @@ -21,9 +21,12 @@ class PrepareRestartFlushStrategy : public IFlushStrategy public: struct Config { - double tlsReplayCost; + double tlsReplayByteCost; + double tlsReplayOperationCost; double flushTargetWriteCost; - Config(double tlsReplayCost_, double flushTargetWriteCost_); + Config(double tlsReplayByteCost_, + double tlsReplayOperationCost_, + double flushTargetWriteCost_); }; private: diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp index cd016e5cfc4..2794619273c 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp @@ -655,7 +655,8 @@ PrepareRestartFlushStrategy::Config createPrepareRestartConfig(const ProtonConfig &protonConfig) { return PrepareRestartFlushStrategy::Config(protonConfig.flush.preparerestart.replaycost, - protonConfig.flush.preparerestart.writecost); + protonConfig.flush.preparerestart.replayoperationcost, + protonConfig.flush.preparerestart.writecost); } } |