Incorporate replay cost of a single operation in prepare restart flush strategy.

author: Geir Storli <geirst@oath.com> 2017-11-09 14:08:22 +0000
committer: Geir Storli <geirst@oath.com> 2017-11-09 14:08:22 +0000
commit: 7fd45bb656a845b66be56827627e44d45afd4567 (patch)
tree: 23b11297fcace27ccfd57c9586638af09e6b38eb /searchcore
parent: 4632be4f92f3d06f805a8818f7a66f6402172045 (diff)
7 files changed, 72 insertions, 30 deletions
diff --git a/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp b/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp
index 428cbf60996..6bc7e1b5556 100644
--- a/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp
+++ b/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp
@@ -15,7 +15,7 @@ using SimpleFlushHandler = test::DummyFlushHandler;
 using FlushCandidatesList = std::vector<FlushTargetCandidates>;
 using Config = PrepareRestartFlushStrategy::Config;
 
-const Config DEFAULT_CFG(2.0, 4.0);
+const Config DEFAULT_CFG(2.0, 0.0, 4.0);
 
 struct SimpleFlushTarget : public test::DummyFlushTarget
 {
@@ -107,7 +107,7 @@ public:
         : _sortedFlushContexts(&sortedFlushContexts),
           _numCandidates(sortedFlushContexts.size()),
           _tlsStats(1000, 11, 110),
-          _cfg(DEFAULT_CFG)
+          _cfg(2.0, 3.0, 4.0)
     {}
     CandidatesBuilder &flushContexts(const FlushContext::List &sortedFlushContexts) {
         _sortedFlushContexts = &sortedFlushContexts;
@@ -140,28 +140,35 @@ struct CandidatesFixture
     CandidatesFixture() : emptyContexts(), builder(emptyContexts) {}
 };
 
+void
+assertCosts(double tlsReplayBytesCost, double tlsReplayOperationsCost, double flushTargetsWriteCost, const FlushTargetCandidates &candidates)
+{
+    EXPECT_EQUAL(tlsReplayBytesCost, candidates.getTlsReplayCost().bytesCost);
+    EXPECT_EQUAL(tlsReplayOperationsCost, candidates.getTlsReplayCost().operationsCost);
+    EXPECT_EQUAL(flushTargetsWriteCost, candidates.getFlushTargetsWriteCost());
+    EXPECT_EQUAL(tlsReplayBytesCost + tlsReplayOperationsCost + flushTargetsWriteCost, candidates.getTotalCost());
+}
+
 TEST_F("require that tls replay cost is correct for 100% replay", CandidatesFixture)
 {
-    EXPECT_EQUAL(2000, f.builder.replayEnd(110).build().getTlsReplayCost());
+    TEST_DO(assertCosts(1000 * 2, 100 * 3, 0, f.builder.replayEnd(110).build()));
 }
 
 TEST_F("require that tls replay cost is correct for 75% replay", CandidatesFixture)
 {
     FlushContext::List contexts = ContextsBuilder().add("target1", 10, 0).add("target2", 35, 0).build();
-    EXPECT_EQUAL(1500, f.builder.flushContexts(contexts).numCandidates(1).replayEnd(110).
-            build().getTlsReplayCost());
+    TEST_DO(assertCosts(750 * 2, 75 * 3, 0, f.builder.flushContexts(contexts).numCandidates(1).replayEnd(110).build()));
 }
 
 TEST_F("require that tls replay cost is correct for 25% replay", CandidatesFixture)
 {
     FlushContext::List contexts = ContextsBuilder().add("target1", 10, 0).add("target2", 85, 0).build();
-    EXPECT_EQUAL(500, f.builder.flushContexts(contexts).numCandidates(1).replayEnd(110).
-            build().getTlsReplayCost());
+    TEST_DO(assertCosts(250 * 2, 25 * 3, 0, f.builder.flushContexts(contexts).numCandidates(1).replayEnd(110).build()));
 }
 
 TEST_F("require that tls replay cost is correct for zero operations to replay", CandidatesFixture)
 {
-    EXPECT_EQUAL(0, f.builder.replayEnd(10).build().getTlsReplayCost());
+    TEST_DO(assertCosts(0, 0, 0, f.builder.replayEnd(10).build()));
 }
 
 TEST_F("require that flush cost is correct for zero flush targets", CandidatesFixture)
@@ -172,7 +179,7 @@ TEST_F("require that flush cost is correct for zero flush targets", CandidatesFi
 TEST_F("require that flush cost is sum of flush targets", CandidatesFixture)
 {
     FlushContext::List contexts = ContextsBuilder().add("target1", 20, 1000).add("target2", 30, 2000).build();
-    EXPECT_EQUAL(12000, f.builder.flushContexts(contexts).build().getFlushTargetsWriteCost());
+    TEST_DO(assertCosts(0, 0, 1000 * 4 + 2000 * 4, f.builder.flushContexts(contexts).build()));
 }
 
 
@@ -227,7 +234,7 @@ assertFlushContexts(const vespalib::string &expected, const FlushContext::List &
  *   - handler1: serial numbers 10 -> 110, 1000 bytes
  *   - handler2: serial numbers 10 -> 110, 2000 bytes
  *
- * The cost config is: tlsReplayCost=2.0, flushTargetsWriteCost=4.0.
+ * The cost config is: tlsReplayByteCost=2.0, tlsReplayOperationCost=0.0, flushTargetsWriteCost=4.0.
  * The cost of replaying the complete TLS is then:
  *   - handler1: 1000*2.0 = 2000
  *   - handler2: 2000*2.0 = 4000
diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def
index eab889ea28c..c6c810ae72f 100644
--- a/searchcore/src/vespa/searchcore/config/proton.def
+++ b/searchcore/src/vespa/searchcore/config/proton.def
@@ -82,17 +82,31 @@ flush.memory.conservative.disklimitfactor double default=0.5
 ## watermark indicating when to go back from conservative to normal mode for the flush strategy.
 flush.memory.conservative.lowwatermarkfactor double default=0.9
 
-## The cost of doing replay when replaying the transaction log.
+## The cost of replaying a byte when replaying the transaction log.
 ##
-## The number of bytes to replay * replaycost gives an estimate of the
-## total cost of replaying the transaction log.
+## The estimate of the total cost of replaying the transaction log:
+## (number of bytes to replay) * replaycost + (number of operations to replay) * replayoperationcost
 ##
 ## The prepare for restart flush strategy will choose a set of components to flush
 ## such that the cost of flushing these + the cost of replaying the transaction log
 ## is as low as possible.
-flush.preparerestart.replaycost double default=4.0
+flush.preparerestart.replaycost double default=2.0
 
-## The cost of doing writes when flushing components to disk.
+## The cost of replaying an operation when replaying the transaction log.
+##
+## The estimate of the total cost of replaying the transaction log:
+## (number of bytes to replay) * replaycost + (number of operations to replay) * replayoperationcost
+##
+## The default value is chosen based on the following example:
+## Assume we can replay 9 MB/s and this corresponds to 24000 ops/s.
+## replayoperationcost = (bytes to replay) * replaycost / (operations to replay) = 9 MB * 2.0 / 24000 = 750
+##
+## The prepare for restart flush strategy will choose a set of components to flush
+## such that the cost of flushing these + the cost of replaying the transaction log
+## is as low as possible.
+flush.preparerestart.replayoperationcost double default=750.0
+
+## The cost of writing a byte when flushing components to disk.
 ##
 ## The number of bytes to write (for a set of flushed components) * writecost
 ## gives an estimate of the total cost of flushing this set of components.
diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp
index da54b909759..0051c209ef9 100644
--- a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp
+++ b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp
@@ -8,6 +8,7 @@ namespace proton {
 using search::SerialNum;
 
 using Config = PrepareRestartFlushStrategy::Config;
+using TlsReplayCost = FlushTargetCandidates::TlsReplayCost;
 
 namespace {
 
@@ -25,7 +26,7 @@ calculateReplayStartSerial(const FlushContext::List &sortedFlushContexts,
     return sortedFlushContexts[numCandidates]->getTarget()->getFlushedSerialNum() + 1;
 }
 
-double
+TlsReplayCost
 calculateTlsReplayCost(const flushengine::TlsStats &tlsStats,
                        const Config &cfg,
                        SerialNum replayStartSerial)
@@ -33,13 +34,13 @@ calculateTlsReplayCost(const flushengine::TlsStats &tlsStats,
     SerialNum replayEndSerial = tlsStats.getLastSerial();
     SerialNum numTotalOperations = replayEndSerial - tlsStats.getFirstSerial() + 1;
     if (numTotalOperations == 0) {
-        return 0;
+        return TlsReplayCost(0.0, 0.0);
     }
     double numBytesPerOperation =
         (double)tlsStats.getNumBytes() / (double)numTotalOperations;
     SerialNum numOperationsToReplay = replayEndSerial + 1 - replayStartSerial;
     double numBytesToReplay = numBytesPerOperation * numOperationsToReplay;
-    return numBytesToReplay * cfg.tlsReplayCost;
+    return TlsReplayCost((numBytesToReplay * cfg.tlsReplayByteCost), (numOperationsToReplay * cfg.tlsReplayOperationCost));
 }
 
 double
diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h
index 5498d8c46a8..ea09989de31 100644
--- a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h
+++ b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h
@@ -16,10 +16,20 @@ namespace flushengine { class TlsStats; }
  */
 class FlushTargetCandidates
 {
+public:
+    struct TlsReplayCost {
+        double bytesCost;
+        double operationsCost;
+        TlsReplayCost(double bytesCost_, double operationsCost_)
+            : bytesCost(bytesCost_),
+              operationsCost(operationsCost_)
+        {}
+        double totalCost() const { return bytesCost + operationsCost; }
+    };
 private:
     const FlushContext::List *_sortedFlushContexts; // NOTE: ownership is handled outside
     size_t _numCandidates;
-    double _tlsReplayCost;
+    TlsReplayCost _tlsReplayCost;
     double _flushTargetsWriteCost;
 
     using Config = PrepareRestartFlushStrategy::Config;
@@ -32,9 +42,9 @@ public:
                           const flushengine::TlsStats &tlsStats,
                           const Config &cfg);
 
-    double getTlsReplayCost() const { return _tlsReplayCost; }
+    TlsReplayCost getTlsReplayCost() const { return _tlsReplayCost; }
     double getFlushTargetsWriteCost() const { return _flushTargetsWriteCost; }
-    double getTotalCost() const { return getTlsReplayCost() + getFlushTargetsWriteCost(); }
+    double getTotalCost() const { return getTlsReplayCost().totalCost() + getFlushTargetsWriteCost(); }
     FlushContext::List getCandidates() const;
 };
 
diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp b/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp
index e9df78dbf4f..6cfb8cb6c3d 100644
--- a/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp
+++ b/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp
@@ -18,9 +18,11 @@ using Config = PrepareRestartFlushStrategy::Config;
 using FlushContextsMap = std::map<vespalib::string, FlushContext::List>;
 using FlushTargetCandidatesList = std::vector<FlushTargetCandidates::UP>;
 
-PrepareRestartFlushStrategy::Config::Config(double tlsReplayCost_,
+PrepareRestartFlushStrategy::Config::Config(double tlsReplayByteCost_,
+                                            double tlsReplayOperationCost_,
                                             double flushTargetWriteCost_)
-    : tlsReplayCost(tlsReplayCost_),
+    : tlsReplayByteCost(tlsReplayByteCost_),
+      tlsReplayOperationCost(tlsReplayOperationCost_),
       flushTargetWriteCost(flushTargetWriteCost_)
 {
 }
@@ -108,18 +110,22 @@ findBestTargetsToFlush(const FlushContext::List &unsortedFlushContexts,
     for (size_t numCandidates = 1; numCandidates <= sortedFlushContexts.size(); ++numCandidates) {
         FlushTargetCandidates nextSet(sortedFlushContexts, numCandidates, tlsStats, cfg);
         LOG(debug, "findBestTargetsToFlush(): Created candidate set: "
-                "flushTargets=[%s], tlsReplayCost=%f, flushTargetsWriteCost=%f, totalCost=%f",
+                "flushTargets=[%s], tlsReplayBytesCost=%f, tlsReplayOperationsCost=%f, flushTargetsWriteCost=%f, totalCost=%f",
                 toString(nextSet.getCandidates()).c_str(),
-                nextSet.getTlsReplayCost(), nextSet.getFlushTargetsWriteCost(),
+                nextSet.getTlsReplayCost().bytesCost,
+                nextSet.getTlsReplayCost().operationsCost,
+                nextSet.getFlushTargetsWriteCost(),
                 nextSet.getTotalCost());
         if (nextSet.getTotalCost() < bestSet.getTotalCost()) {
             bestSet = nextSet;
         }
     }
     LOG(info, "findBestTargetsToFlush(): Best candidate set: "
-            "flushTargets=[%s], tlsReplayCost=%f, flushTargetsWriteCost=%f, totalCost=%f",
+            "flushTargets=[%s], tlsReplayBytesCost=%f, tlsReplayOperationsCost=%f, flushTargetsWriteCost=%f, totalCost=%f",
             toString(bestSet.getCandidates()).c_str(),
-            bestSet.getTlsReplayCost(), bestSet.getFlushTargetsWriteCost(),
+            bestSet.getTlsReplayCost().bytesCost,
+            bestSet.getTlsReplayCost().operationsCost,
+            bestSet.getFlushTargetsWriteCost(),
             bestSet.getTotalCost());
     return bestSet.getCandidates();
 }
diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.h b/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.h
index 19a5cf45670..df5c5a9c569 100644
--- a/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.h
+++ b/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.h
@@ -21,9 +21,12 @@ class PrepareRestartFlushStrategy : public IFlushStrategy
 public:
     struct Config
     {
-        double tlsReplayCost;
+        double tlsReplayByteCost;
+        double tlsReplayOperationCost;
         double flushTargetWriteCost;
-        Config(double tlsReplayCost_, double flushTargetWriteCost_);
+        Config(double tlsReplayByteCost_,
+               double tlsReplayOperationCost_,
+               double flushTargetWriteCost_);
     };
 
 private:
diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp
index cd016e5cfc4..2794619273c 100644
--- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp
@@ -655,7 +655,8 @@ PrepareRestartFlushStrategy::Config
 createPrepareRestartConfig(const ProtonConfig &protonConfig)
 {
     return PrepareRestartFlushStrategy::Config(protonConfig.flush.preparerestart.replaycost,
-            protonConfig.flush.preparerestart.writecost);
+                                               protonConfig.flush.preparerestart.replayoperationcost,
+                                               protonConfig.flush.preparerestart.writecost);
 }
 
 }
author	Geir Storli <geirst@oath.com>	2017-11-09 14:08:22 +0000
committer	Geir Storli <geirst@oath.com>	2017-11-09 14:08:22 +0000
commit	7fd45bb656a845b66be56827627e44d45afd4567 (patch)
tree	23b11297fcace27ccfd57c9586638af09e6b38eb /searchcore
parent	4632be4f92f3d06f805a8818f7a66f6402172045 (diff)