aboutsummaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2020-06-10 13:42:16 +0200
committerTor Egge <Tor.Egge@broadpark.no>2020-06-10 15:57:21 +0200
commitec1c7434958d1a0f2f4854477627ae864354b313 (patch)
tree36c38a2a446b411a5be5f27e261024dbfea1d546 /searchcore
parent3d1b445b10f70771cf7a4bce1ba2d285c553f930 (diff)
Adjust prepare restart strategy for differentiated replay operation costs.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp64
-rw-r--r--searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.cpp14
-rw-r--r--searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.h2
-rw-r--r--searchcore/src/vespa/searchcore/proton/flushengine/CMakeLists.txt1
-rw-r--r--searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidate.cpp22
-rw-r--r--searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidate.h37
-rw-r--r--searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp48
-rw-r--r--searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h11
-rw-r--r--searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp33
9 files changed, 169 insertions, 63 deletions
diff --git a/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp b/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp
index 0f55a4c30de..5f93f97f165 100644
--- a/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp
+++ b/searchcore/src/tests/proton/flushengine/prepare_restart_flush_strategy/prepare_restart_flush_strategy_test.cpp
@@ -3,6 +3,7 @@
#include <vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.h>
#include <vespa/searchcore/proton/flushengine/flush_target_candidates.h>
+#include <vespa/searchcore/proton/flushengine/flush_target_candidate.h>
#include <vespa/searchcore/proton/flushengine/tls_stats_map.h>
#include <vespa/searchcore/proton/test/dummy_flush_handler.h>
#include <vespa/searchcore/proton/test/dummy_flush_target.h>
@@ -21,20 +22,16 @@ struct SimpleFlushTarget : public test::DummyFlushTarget
{
SerialNum flushedSerial;
uint64_t approxDiskBytes;
- SimpleFlushTarget(const vespalib::string &name,
- SerialNum flushedSerial_,
- uint64_t approxDiskBytes_)
- : test::DummyFlushTarget(name),
- flushedSerial(flushedSerial_),
- approxDiskBytes(approxDiskBytes_)
- {}
+ double replay_operation_cost;
SimpleFlushTarget(const vespalib::string &name,
const Type &type,
SerialNum flushedSerial_,
- uint64_t approxDiskBytes_)
+ uint64_t approxDiskBytes_,
+ double replay_operation_cost_)
: test::DummyFlushTarget(name, type, Component::OTHER),
flushedSerial(flushedSerial_),
- approxDiskBytes(approxDiskBytes_)
+ approxDiskBytes(approxDiskBytes_),
+ replay_operation_cost(replay_operation_cost_)
{}
virtual SerialNum getFlushedSerialNum() const override {
return flushedSerial;
@@ -42,6 +39,9 @@ struct SimpleFlushTarget : public test::DummyFlushTarget
virtual uint64_t getApproxBytesToWriteToDisk() const override {
return approxDiskBytes;
}
+ double get_replay_operation_cost() const override {
+ return replay_operation_cost;
+ }
};
class ContextsBuilder
@@ -66,30 +66,35 @@ public:
const vespalib::string &targetName,
IFlushTarget::Type targetType,
SerialNum flushedSerial,
- uint64_t approxDiskBytes) {
+ uint64_t approxDiskBytes,
+ double replay_operation_cost) {
IFlushHandler::SP handler = createAndGetHandler(handlerName);
IFlushTarget::SP target = std::make_shared<SimpleFlushTarget>(targetName,
targetType,
flushedSerial,
- approxDiskBytes);
+ approxDiskBytes,
+ replay_operation_cost);
_result.push_back(std::make_shared<FlushContext>(handler, target, 0));
return *this;
}
ContextsBuilder &add(const vespalib::string &handlerName,
const vespalib::string &targetName,
SerialNum flushedSerial,
- uint64_t approxDiskBytes) {
- return add(handlerName, targetName, IFlushTarget::Type::FLUSH, flushedSerial, approxDiskBytes);
+ uint64_t approxDiskBytes,
+ double replay_operation_cost = 0.0) {
+ return add(handlerName, targetName, IFlushTarget::Type::FLUSH, flushedSerial, approxDiskBytes, replay_operation_cost);
}
ContextsBuilder &add(const vespalib::string &targetName,
SerialNum flushedSerial,
- uint64_t approxDiskBytes) {
- return add("handler1", targetName, IFlushTarget::Type::FLUSH, flushedSerial, approxDiskBytes);
+ uint64_t approxDiskBytes,
+ double replay_operation_cost = 0.0) {
+ return add("handler1", targetName, IFlushTarget::Type::FLUSH, flushedSerial, approxDiskBytes, replay_operation_cost);
}
ContextsBuilder &addGC(const vespalib::string &targetName,
SerialNum flushedSerial,
- uint64_t approxDiskBytes) {
- return add("handler1", targetName, IFlushTarget::Type::GC, flushedSerial, approxDiskBytes);
+ uint64_t approxDiskBytes,
+ double replay_operation_cost = 0.0) {
+ return add("handler1", targetName, IFlushTarget::Type::GC, flushedSerial, approxDiskBytes, replay_operation_cost);
}
FlushContext::List build() const { return _result; }
};
@@ -99,6 +104,7 @@ class CandidatesBuilder
private:
const FlushContext::List *_sortedFlushContexts;
size_t _numCandidates;
+ mutable std::vector<FlushTargetCandidate> _candidates;
flushengine::TlsStats _tlsStats;
Config _cfg;
@@ -106,6 +112,7 @@ public:
CandidatesBuilder(const FlushContext::List &sortedFlushContexts)
: _sortedFlushContexts(&sortedFlushContexts),
_numCandidates(sortedFlushContexts.size()),
+ _candidates(),
_tlsStats(1000, 11, 110),
_cfg(2.0, 3.0, 4.0)
{}
@@ -125,8 +132,16 @@ public:
replayEndSerial);
return *this;
}
+ void setup_candidates() const {
+ _candidates.clear();
+ _candidates.reserve(_sortedFlushContexts->size());
+ for (const auto &flush_context : *_sortedFlushContexts) {
+ _candidates.emplace_back(flush_context, _tlsStats.getLastSerial(), _cfg);
+ }
+ }
FlushTargetCandidates build() const {
- return FlushTargetCandidates(*_sortedFlushContexts,
+ setup_candidates();
+ return FlushTargetCandidates(_candidates,
_numCandidates,
_tlsStats,
_cfg);
@@ -196,9 +211,12 @@ struct FlushStrategyFixture
{
flushengine::TlsStatsMap _tlsStatsMap;
PrepareRestartFlushStrategy strategy;
- FlushStrategyFixture()
+ FlushStrategyFixture(const Config &config)
: _tlsStatsMap(defaultTransactionLogStats()),
- strategy(DEFAULT_CFG)
+ strategy(config)
+ {}
+ FlushStrategyFixture()
+ : FlushStrategyFixture(DEFAULT_CFG)
{}
FlushContext::List getFlushTargets(const FlushContext::List &targetList,
const flushengine::TlsStatsMap &tlsStatsMap) const {
@@ -297,6 +315,12 @@ TEST_F("require that flush targets for different flush handlers are treated inde
TEST_DO(assertFlushContexts("[foo,baz,quz]", targets));
}
+TEST_F("require that expensive to replay target is flushed", FlushStrategyFixture(Config(2.0, 1.0, 4.0)))
+{
+ FlushContext::List targets = f.getFlushTargets(ContextsBuilder().
+ add("foo", 10, 249).add("bar", 60, 150).add("baz", 60, 150, 12.0).build(), f._tlsStatsMap);
+ TEST_DO(assertFlushContexts("[foo,baz]", targets));
+}
TEST_MAIN()
{
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.cpp b/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.cpp
index 0a61ec8d882..2b5f4b028dc 100644
--- a/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.cpp
+++ b/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.cpp
@@ -165,9 +165,15 @@ FlushableAttribute::FlushableAttribute(const AttributeVectorSP attr,
_fileHeaderContext(fileHeaderContext),
_attributeFieldWriter(attributeFieldWriter),
_hwInfo(hwInfo),
- _attrDir(attrDir)
+ _attrDir(attrDir),
+ _replay_operation_cost(0.0)
{
_lastStats.setPathElementsToLog(8);
+ auto &config = attr->getConfig();
+ if (config.basicType() == search::attribute::BasicType::Type::TENSOR &&
+ config.tensorType().is_tensor() && config.tensorType().is_dense() && config.hnsw_index_params().has_value()) {
+ _replay_operation_cost = 100.0; // replaying operations to hnsw index is 100 times more expensive than reading from tls
+ }
}
@@ -236,4 +242,10 @@ FlushableAttribute::getApproxBytesToWriteToDisk() const
return _attr->getEstimatedSaveByteSize();
}
+double
+FlushableAttribute::get_replay_operation_cost() const
+{
+ return _replay_operation_cost;
+}
+
} // namespace proton
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.h b/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.h
index 8d807c153c0..a759bcce26e 100644
--- a/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.h
+++ b/searchcore/src/vespa/searchcore/proton/attribute/flushableattribute.h
@@ -38,6 +38,7 @@ private:
vespalib::ISequencedTaskExecutor &_attributeFieldWriter;
HwInfo _hwInfo;
std::shared_ptr<AttributeDirectory> _attrDir;
+ double _replay_operation_cost;
Task::UP internalInitFlush(SerialNum currentSerial);
@@ -71,6 +72,7 @@ public:
virtual Task::UP initFlush(SerialNum currentSerial) override;
virtual FlushStats getLastFlushStats() const override { return _lastStats; }
virtual uint64_t getApproxBytesToWriteToDisk() const override;
+ virtual double get_replay_operation_cost() const override;
};
} // namespace proton
diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/flushengine/CMakeLists.txt
index ecd90d8a992..340007f4513 100644
--- a/searchcore/src/vespa/searchcore/proton/flushengine/CMakeLists.txt
+++ b/searchcore/src/vespa/searchcore/proton/flushengine/CMakeLists.txt
@@ -7,6 +7,7 @@ vespa_add_library(searchcore_flushengine STATIC
flushcontext.cpp
flushengine.cpp
flush_engine_explorer.cpp
+ flush_target_candidate.cpp
flush_target_candidates.cpp
flushtargetproxy.cpp
flushtask.cpp
diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidate.cpp b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidate.cpp
new file mode 100644
index 00000000000..6be6d31372a
--- /dev/null
+++ b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidate.cpp
@@ -0,0 +1,22 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "flush_target_candidate.h"
+#include "flushcontext.h"
+
+namespace proton {
+
+FlushTargetCandidate::FlushTargetCandidate(std::shared_ptr<FlushContext> flush_context, search::SerialNum current_serial, const Config &cfg)
+ : _flush_context(std::move(flush_context)),
+ _replay_operation_cost(_flush_context->getTarget()->get_replay_operation_cost() * cfg.tlsReplayOperationCost),
+ _flushed_serial(_flush_context->getTarget()->getFlushedSerialNum()),
+ _current_serial(current_serial),
+ _replay_cost(_replay_operation_cost * (_current_serial - _flushed_serial)),
+ _approx_bytes_to_write_to_disk(_flush_context->getTarget()->getApproxBytesToWriteToDisk()),
+ _write_cost(_approx_bytes_to_write_to_disk * cfg.flushTargetWriteCost),
+ _always_flush(_replay_cost >= _write_cost)
+{
+}
+
+FlushTargetCandidate::~FlushTargetCandidate() = default;
+
+}
diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidate.h b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidate.h
new file mode 100644
index 00000000000..5920fff6942
--- /dev/null
+++ b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidate.h
@@ -0,0 +1,37 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "prepare_restart_flush_strategy.h"
+#include <memory>
+#include <vespa/searchlib/common/serialnum.h>
+
+namespace proton {
+
+class FlushContext;
+
+/**
+ * Class describing a flush target candidate for the prepare restart flush strategy.
+ */
+class FlushTargetCandidate
+{
+ std::shared_ptr<FlushContext> _flush_context;
+ double _replay_operation_cost;
+ search::SerialNum _flushed_serial;
+ search::SerialNum _current_serial;
+ double _replay_cost;
+ uint64_t _approx_bytes_to_write_to_disk;
+ double _write_cost;
+ bool _always_flush;
+
+ using Config = PrepareRestartFlushStrategy::Config;
+public:
+ FlushTargetCandidate(std::shared_ptr<FlushContext> flush_context, search::SerialNum current_serial, const Config &cfg);
+ ~FlushTargetCandidate();
+ const std::shared_ptr<FlushContext> &get_flush_context() const { return _flush_context; }
+ search::SerialNum get_flushed_serial() const { return _flushed_serial; }
+ double get_write_cost() const { return _write_cost; }
+ bool get_always_flush() const { return _always_flush; }
+};
+
+}
diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp
index 0051c209ef9..f71da453559 100644
--- a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp
+++ b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.cpp
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "flush_target_candidates.h"
+#include "flush_target_candidate.h"
#include "tls_stats.h"
namespace proton {
@@ -13,17 +14,17 @@ using TlsReplayCost = FlushTargetCandidates::TlsReplayCost;
namespace {
SerialNum
-calculateReplayStartSerial(const FlushContext::List &sortedFlushContexts,
- size_t numCandidates,
+calculateReplayStartSerial(vespalib::ConstArrayRef<FlushTargetCandidate> candidates,
+ size_t num_candidates,
const flushengine::TlsStats &tlsStats)
{
- if (numCandidates == 0) {
+ if (num_candidates == 0) {
return tlsStats.getFirstSerial();
}
- if (numCandidates == sortedFlushContexts.size()) {
+ if (num_candidates == candidates.size()) {
return tlsStats.getLastSerial() + 1;
}
- return sortedFlushContexts[numCandidates]->getTarget()->getFlushedSerialNum() + 1;
+ return candidates[num_candidates].get_flushed_serial() + 1;
}
TlsReplayCost
@@ -44,43 +45,44 @@ calculateTlsReplayCost(const flushengine::TlsStats &tlsStats,
}
double
-calculateFlushTargetsWriteCost(const FlushContext::List &sortedFlushContexts,
- size_t numCandidates,
- const Config &cfg)
+calculateFlushTargetsWriteCost(vespalib::ConstArrayRef<FlushTargetCandidate> candidates,
+ size_t num_candidates)
{
double result = 0;
- for (size_t i = 0; i < numCandidates; ++i) {
- const auto &flushContext = sortedFlushContexts[i];
- result += (flushContext->getTarget()->getApproxBytesToWriteToDisk() *
- cfg.flushTargetWriteCost);
+ for (size_t i = 0; i < num_candidates; ++i) {
+ result += candidates[i].get_write_cost();
}
return result;
}
}
-FlushTargetCandidates::FlushTargetCandidates(const FlushContext::List &sortedFlushContexts,
- size_t numCandidates,
+FlushTargetCandidates::FlushTargetCandidates(vespalib::ConstArrayRef<FlushTargetCandidate> candidates,
+ size_t num_candidates,
const flushengine::TlsStats &tlsStats,
const Config &cfg)
- : _sortedFlushContexts(&sortedFlushContexts),
- _numCandidates(numCandidates),
+ : _candidates(candidates),
+ _num_candidates(std::min(num_candidates, _candidates.size())),
_tlsReplayCost(calculateTlsReplayCost(tlsStats,
cfg,
- calculateReplayStartSerial(sortedFlushContexts,
- numCandidates,
+ calculateReplayStartSerial(_candidates,
+ _num_candidates,
tlsStats))),
- _flushTargetsWriteCost(calculateFlushTargetsWriteCost(sortedFlushContexts,
- numCandidates,
- cfg))
+ _flushTargetsWriteCost(calculateFlushTargetsWriteCost(_candidates,
+ _num_candidates))
{
}
FlushContext::List
FlushTargetCandidates::getCandidates() const
{
- FlushContext::List result(_sortedFlushContexts->begin(),
- _sortedFlushContexts->begin() + _numCandidates);
+ FlushContext::List result;
+ result.reserve(_num_candidates);
+ for (const auto &candidate : _candidates) {
+ if (result.size() < _num_candidates || candidate.get_always_flush()) {
+ result.emplace_back(candidate.get_flush_context());
+ }
+ }
return result;
}
diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h
index ea09989de31..2979173331c 100644
--- a/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h
+++ b/searchcore/src/vespa/searchcore/proton/flushengine/flush_target_candidates.h
@@ -2,11 +2,14 @@
#pragma once
#include "prepare_restart_flush_strategy.h"
+#include <vespa/vespalib/util/arrayref.h>
namespace proton {
namespace flushengine { class TlsStats; }
+class FlushTargetCandidate;
+
/**
* A set of flush targets that are candidates to be flushed.
*
@@ -27,8 +30,8 @@ public:
double totalCost() const { return bytesCost + operationsCost; }
};
private:
- const FlushContext::List *_sortedFlushContexts; // NOTE: ownership is handled outside
- size_t _numCandidates;
+ vespalib::ConstArrayRef<FlushTargetCandidate> _candidates; // NOTE: ownership is handled outside
+ size_t _num_candidates;
TlsReplayCost _tlsReplayCost;
double _flushTargetsWriteCost;
@@ -37,8 +40,8 @@ private:
public:
using UP = std::unique_ptr<FlushTargetCandidates>;
- FlushTargetCandidates(const FlushContext::List &sortedFlushContexts,
- size_t numCandidates,
+ FlushTargetCandidates(vespalib::ConstArrayRef<FlushTargetCandidate> candidates,
+ size_t num_candidates,
const flushengine::TlsStats &tlsStats,
const Config &cfg);
diff --git a/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp b/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp
index 6cfb8cb6c3d..21f9c8465b0 100644
--- a/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp
+++ b/searchcore/src/vespa/searchcore/proton/flushengine/prepare_restart_flush_strategy.cpp
@@ -2,6 +2,7 @@
#include "prepare_restart_flush_strategy.h"
#include "flush_target_candidates.h"
+#include "flush_target_candidate.h"
#include "tls_stats_map.h"
#include <sstream>
#include <algorithm>
@@ -70,17 +71,15 @@ flatten(const FlushContextsMap &flushContextsPerHandler)
}
void
-sortByOldestFlushedSerialNumber(FlushContext::List &flushContexts)
+sortByOldestFlushedSerialNumber(std::vector<FlushTargetCandidate>& candidates)
{
- std::sort(flushContexts.begin(), flushContexts.end(),
- [](const auto &lhs, const auto &rhs) {
- if (lhs->getTarget()->getFlushedSerialNum() ==
- rhs->getTarget()->getFlushedSerialNum()) {
- return lhs->getName() < rhs->getName();
- }
- return lhs->getTarget()->getFlushedSerialNum() <
- rhs->getTarget()->getFlushedSerialNum();
- });
+ std::sort(candidates.begin(), candidates.end(),
+ [](const auto &lhs, const auto &rhs) {
+ if (lhs.get_flushed_serial() == rhs.get_flushed_serial()) {
+ return lhs.get_flush_context()->getName() < rhs.get_flush_context()->getName();
+ }
+ return lhs.get_flushed_serial() < rhs.get_flushed_serial();
+ });
}
vespalib::string
@@ -103,12 +102,16 @@ findBestTargetsToFlush(const FlushContext::List &unsortedFlushContexts,
const flushengine::TlsStats &tlsStats,
const Config &cfg)
{
- FlushContext::List sortedFlushContexts = unsortedFlushContexts;
- sortByOldestFlushedSerialNumber(sortedFlushContexts);
+ std::vector<FlushTargetCandidate> candidates;
+ candidates.reserve(unsortedFlushContexts.size());
+ for (const auto &flush_context : unsortedFlushContexts) {
+ candidates.emplace_back(flush_context, tlsStats.getLastSerial(), cfg);
+ }
+ sortByOldestFlushedSerialNumber(candidates);
- FlushTargetCandidates bestSet(sortedFlushContexts, 0, tlsStats, cfg);
- for (size_t numCandidates = 1; numCandidates <= sortedFlushContexts.size(); ++numCandidates) {
- FlushTargetCandidates nextSet(sortedFlushContexts, numCandidates, tlsStats, cfg);
+ FlushTargetCandidates bestSet(candidates, 0, tlsStats, cfg);
+ for (size_t numCandidates = 1; numCandidates <= candidates.size(); ++numCandidates) {
+ FlushTargetCandidates nextSet(candidates, numCandidates, tlsStats, cfg);
LOG(debug, "findBestTargetsToFlush(): Created candidate set: "
"flushTargets=[%s], tlsReplayBytesCost=%f, tlsReplayOperationsCost=%f, flushTargetsWriteCost=%f, totalCost=%f",
toString(nextSet.getCandidates()).c_str(),