diff options
32 files changed, 225 insertions, 395 deletions
diff --git a/client/js/app/yarn.lock b/client/js/app/yarn.lock index 175dd9efb23..7e833cfb8fb 100644 --- a/client/js/app/yarn.lock +++ b/client/js/app/yarn.lock @@ -4553,9 +4553,9 @@ prettier-linter-helpers@^1.0.0: fast-diff "^1.1.2" prettier@3: - version "3.2.4" - resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.2.4.tgz#4723cadeac2ce7c9227de758e5ff9b14e075f283" - integrity sha512-FWu1oLHKCrtpO1ypU6J0SbK2d9Ckwysq6bHj/uaCP26DxrPpppCLQRGVuqAxSTvhF00AcvDRyYrLNW7ocBhFFQ== + version "3.2.5" + resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.2.5.tgz#e52bc3090586e824964a8813b09aba6233b28368" + integrity sha512-3/GWa9aOC0YeD7LUfvOG2NiDyhOWRvt1k+rcKhOuYnMY24iiCphgneUfJDyFXd6rZCAnuLBv6UeAULtrhT/F4A== pretty-format@^29.7.0: version "29.7.0" diff --git a/config-model-api/abi-spec.json b/config-model-api/abi-spec.json index 21374061bfa..f5f9215d29d 100644 --- a/config-model-api/abi-spec.json +++ b/config-model-api/abi-spec.json @@ -1517,7 +1517,6 @@ "abstract" ], "methods" : [ - "public com.yahoo.config.model.api.OnnxModelCost$Calculator newCalculator(com.yahoo.config.application.api.ApplicationPackage, com.yahoo.config.provision.ApplicationId)", "public abstract com.yahoo.config.model.api.OnnxModelCost$Calculator newCalculator(com.yahoo.config.application.api.ApplicationPackage, com.yahoo.config.provision.ApplicationId, com.yahoo.config.provision.ClusterSpec$Id)", "public static com.yahoo.config.model.api.OnnxModelCost disabled()" ], diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java b/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java index 1efd98184cc..49ef3cf4929 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java @@ -15,10 +15,6 @@ import java.util.Map; */ public interface OnnxModelCost { - // TODO: Remove when no longer in use (oldest model version is 8.283) - default Calculator newCalculator(ApplicationPackage appPkg, ApplicationId applicationId) { - return newCalculator(appPkg, applicationId, null); - } Calculator newCalculator(ApplicationPackage appPkg, ApplicationId applicationId, ClusterSpec.Id clusterId); interface Calculator { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java index a53ef233746..52d861ac902 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java @@ -29,8 +29,6 @@ import java.util.concurrent.atomic.AtomicLong; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; /** * @author bjorncs @@ -119,7 +117,6 @@ class JvmHeapSizeValidatorTest { ModelCostDummy(long modelCost) { this.modelCost = modelCost; } - @Override public Calculator newCalculator(ApplicationPackage appPkg, ApplicationId applicationId) { return this; } @Override public Calculator newCalculator(ApplicationPackage appPkg, ApplicationId applicationId, ClusterSpec.Id clusterId) { return this; } @Override public Map<String, ModelInfo> models() { return Map.of(); } @Override public void setRestartOnDeploy() {} diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java index c25f623b41c..574f5b04075 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageClusterTest.java @@ -336,7 +336,6 @@ public class StorageClusterTest { assertEquals(20, config.async_operation_throttler().min_window_size()); assertEquals(-1, config.async_operation_throttler().max_window_size()); // <=0 implies +inf assertEquals(3.0, config.async_operation_throttler().resize_rate(), 0.0001); - assertTrue(config.async_operation_throttler().throttle_individual_merge_feed_ops()); } @Test diff --git a/configd/src/apps/sentinel/manager.cpp b/configd/src/apps/sentinel/manager.cpp index 9fef1af0fa8..36bdef0dd8a 100644 --- a/configd/src/apps/sentinel/manager.cpp +++ b/configd/src/apps/sentinel/manager.cpp @@ -106,6 +106,9 @@ Manager::doConfigure() } } _env.notifyConfigUpdated(); + if (_services.empty()) { + _env.metrics().reset(); + } } @@ -316,10 +319,4 @@ Manager::handleCmd(const Cmd& cmd) } } -void -Manager::updateMetrics() -{ - _env.metrics().maybeLog(); -} - } diff --git a/configd/src/apps/sentinel/manager.h b/configd/src/apps/sentinel/manager.h index c5de8c99b91..765803b5da6 100644 --- a/configd/src/apps/sentinel/manager.h +++ b/configd/src/apps/sentinel/manager.h @@ -47,7 +47,6 @@ private: void handleChildDeaths(); void handleRestarts(); - void updateMetrics(); void terminateServices(bool catchable, bool printDebug = false); void doConfigure(); public: diff --git a/configd/src/apps/sentinel/metrics.cpp b/configd/src/apps/sentinel/metrics.cpp index d94e1b94465..f68311f6792 100644 --- a/configd/src/apps/sentinel/metrics.cpp +++ b/configd/src/apps/sentinel/metrics.cpp @@ -36,8 +36,7 @@ StartMetrics::maybeLog() using namespace std::chrono_literals; vespalib::steady_time curTime = vespalib::steady_clock::now(); if (curTime - lastRestartTime > 2h) { - totalRestartsCounter = 0; - lastRestartTime = vespalib::steady_clock::now(); + reset(); } sentinel_totalRestarts.sample(totalRestartsCounter); sentinel_running.sample(currentlyRunningServices); @@ -51,4 +50,9 @@ StartMetrics::incRestartsCounter() lastRestartTime = vespalib::steady_clock::now(); } +void StartMetrics::reset() { + totalRestartsCounter = 0; + lastRestartTime = vespalib::steady_clock::now(); +} + } diff --git a/configd/src/apps/sentinel/metrics.h b/configd/src/apps/sentinel/metrics.h index a2099ca1add..729b143a5da 100644 --- a/configd/src/apps/sentinel/metrics.h +++ b/configd/src/apps/sentinel/metrics.h @@ -27,6 +27,7 @@ struct StartMetrics { void maybeLog(); void incRestartsCounter(); + void reset(); }; } diff --git a/configdefinitions/src/vespa/stor-filestor.def b/configdefinitions/src/vespa/stor-filestor.def index cefce5fc648..e064cbefb46 100644 --- a/configdefinitions/src/vespa/stor-filestor.def +++ b/configdefinitions/src/vespa/stor-filestor.def @@ -24,11 +24,6 @@ num_network_threads int default=2 restart ## Type of sequenced thread executor use for persistence replies. response_sequencer_type enum {LATENCY, THROUGHPUT, ADAPTIVE} default=ADAPTIVE restart -## When merging, if we find more than this number of documents that exist on all -## of the same copies, send a separate apply bucket diff with these entries -## to an optimized merge chain that guarantuees minimum data transfer. -common_merge_chain_optimalization_minimum_size int default=64 restart - ## Chunksize to use while merging buckets between nodes. ## ## Should follow stor-distributormanager:splitsize (16MB). @@ -37,6 +32,7 @@ bucket_merge_chunk_size int default=16772216 restart ## Whether or not to enable the multibit split optimalization. This is useful ## if splitting is expensive, but listing document identifiers is fairly cheap. ## This is true for memfile persistence layer, but not for vespa search. +## TODO verify its usage enable_multibit_split_optimalization bool default=true restart ## Whether or not to use async message handling when scheduling storage messages from FileStorManager. @@ -71,8 +67,3 @@ async_operation_throttler.window_size_backoff double default=0.95 async_operation_throttler.min_window_size int default=20 async_operation_throttler.max_window_size int default=-1 # < 0 implies INT_MAX async_operation_throttler.resize_rate double default=3.0 -## If true, each put/remove contained within a merge is individually throttled as if it -## were a put/remove from a client. If false, merges are throttled at a persistence thread -## level, i.e. per ApplyBucketDiff message, regardless of how many document operations -## are contained within. -async_operation_throttler.throttle_individual_merge_feed_ops bool default=true diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index 852b3c7fdcf..28f563bfbaa 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -65,7 +65,7 @@ <assertj.vespa.version>3.25.2</assertj.vespa.version> <!-- Athenz dependencies. Make sure these dependencies match those in Vespa's internal repositories --> - <aws-sdk.vespa.version>1.12.650</aws-sdk.vespa.version> + <aws-sdk.vespa.version>1.12.651</aws-sdk.vespa.version> <athenz.vespa.version>1.11.51</athenz.vespa.version> <!-- Athenz END --> @@ -123,7 +123,7 @@ <netty.vespa.version>4.1.106.Final</netty.vespa.version> <netty-tcnative.vespa.version>2.0.62.Final</netty-tcnative.vespa.version> <onnxruntime.vespa.version>1.16.3</onnxruntime.vespa.version> - <opennlp.vespa.version>2.3.1</opennlp.vespa.version> + <opennlp.vespa.version>2.3.2</opennlp.vespa.version> <opentest4j.vespa.version>1.3.0</opentest4j.vespa.version> <org.json.vespa.version>20231013</org.json.vespa.version> <org.lz4.vespa.version>1.8.0</org.lz4.vespa.version> diff --git a/screwdriver.yaml b/screwdriver.yaml index 1a3bada2c42..65633be75c3 100644 --- a/screwdriver.yaml +++ b/screwdriver.yaml @@ -303,7 +303,7 @@ jobs: screwdriver.cd/buildPeriodically: H 6 1 * * environment: - IMAGE_NAME: "docker.io/vespaengine/vespa-generic-intel-x86_64" + IMAGE_NAME: "vespaengine/vespa-generic-intel-x86_64" secrets: - DOCKER_HUB_DEPLOY_KEY @@ -357,7 +357,7 @@ jobs: $SD_SOURCE_DIR/screwdriver/test-quick-start-guide.sh - publish-image: | if [[ -z $SD_PULL_REQUEST ]]; then - if curl -fsSL https://index.docker.io/v1/repositories/$IMAGE_NAME/tags/$VESPA_VERSION &> /dev/null; then + if curl -fsSL https://hub.docker.com/v2/repositories/$IMAGE_NAME/tags/$VESPA_VERSION/ &> /dev/null; then echo "Container image docker.io/$IMAGE_NAME:$VESPA_VERSION aldready exists." else OPT_STATE="$(set +o)" @@ -382,7 +382,7 @@ jobs: environment: BASE_IMAGE: "el9" - IMAGE_NAME: "vespa-el9-preview" + IMAGE_NAME: "vespaengine/vespa-el9-preview" secrets: - DOCKER_HUB_DEPLOY_KEY @@ -418,14 +418,14 @@ jobs: --build-arg VESPA_VERSION=$VESPA_VERSION \ --file Dockerfile \ --tag docker.io/vespaengine/vespa:latest \ - --tag docker.io/vespaengine/$IMAGE_NAME:latest \ + --tag docker.io/$IMAGE_NAME:latest \ . - verify-container-image: | # Run quick start guide $SD_SOURCE_DIR/screwdriver/test-quick-start-guide.sh - publish-image: | if [[ -z $SD_PULL_REQUEST ]]; then - if curl -fsSL https://index.docker.io/v1/repositories/$IMAGE_NAME/tags/$VESPA_VERSION &> /dev/null; then + if curl -fsSL https://hub.docker.com/v2/repositories/$IMAGE_NAME/tags/$VESPA_VERSION/ &> /dev/null; then echo "Container image docker.io/$IMAGE_NAME:$VESPA_VERSION aldready exists." else OPT_STATE="$(set +o)" @@ -439,9 +439,9 @@ jobs: --build-arg VESPA_BASE_IMAGE=$BASE_IMAGE \ --build-arg VESPA_VERSION=$VESPA_VERSION \ --file Dockerfile \ - --tag docker.io/vespaengine/$IMAGE_NAME:$VESPA_VERSION \ - --tag docker.io/vespaengine/$IMAGE_NAME:$VESPA_MAJOR \ - --tag docker.io/vespaengine/$IMAGE_NAME:latest \ + --tag docker.io/$IMAGE_NAME:$VESPA_VERSION \ + --tag docker.io/$IMAGE_NAME:$VESPA_MAJOR \ + --tag docker.io/$IMAGE_NAME:latest \ . fi fi diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp index 9e9199bc8ba..5a58aa869e0 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.cpp @@ -13,6 +13,7 @@ LOG_SETUP(".proton.documentmetastore.lid_allocator"); using search::fef::TermFieldMatchDataArray; using search::queryeval::Blueprint; +using search::queryeval::FlowStats; using search::queryeval::FullSearch; using search::queryeval::SearchIterator; using search::queryeval::SimpleLeafBlueprint; @@ -222,8 +223,9 @@ public: setEstimate(HitEstimate(_activeLids.size(), false)); } - double calculate_relative_estimate() const final { - return abs_to_rel_est(getState().estimate().estHits, get_docid_limit()); + FlowStats calculate_flow_stats(uint32_t docid_limit) const override { + auto est = abs_to_rel_est(getState().estimate().estHits, docid_limit); + return {est, 1.0, est}; } bool isWhiteList() const noexcept final { return true; } diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index 90452f1d12b..51164427690 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -23,14 +23,10 @@ class MyOr : public IntermediateBlueprint { private: public: - double calculate_relative_estimate() const final { - return OrFlow::estimate_of(get_children()); - } - double calculate_cost() const final { - return OrFlow::cost_of(get_children(), false); - } - double calculate_strict_cost() const final { - return OrFlow::cost_of(get_children(), true); + FlowStats calculate_flow_stats(uint32_t) const final { + return {OrFlow::estimate_of(get_children()), + OrFlow::cost_of(get_children(), false), + OrFlow::cost_of(get_children(), true)}; } HitEstimate combine(const std::vector<HitEstimate> &data) const override { return max(data); @@ -798,7 +794,7 @@ TEST("requireThatDocIdLimitInjectionWorks") TEST("Control object sizes") { EXPECT_EQUAL(32u, sizeof(Blueprint::State)); EXPECT_EQUAL(56u, sizeof(Blueprint)); - EXPECT_EQUAL(96u, sizeof(LeafBlueprint)); + EXPECT_EQUAL(88u, sizeof(LeafBlueprint)); } TEST_MAIN() { diff --git a/searchlib/src/tests/queryeval/blueprint/mysearch.h b/searchlib/src/tests/queryeval/blueprint/mysearch.h index 6eb27364c2b..79c9885bb7d 100644 --- a/searchlib/src/tests/queryeval/blueprint/mysearch.h +++ b/searchlib/src/tests/queryeval/blueprint/mysearch.h @@ -117,8 +117,15 @@ public: MyLeaf() : SimpleLeafBlueprint() {} MyLeaf(FieldSpecBaseList fields) : SimpleLeafBlueprint(std::move(fields)) {} void set_cost(double value) noexcept { _cost = value; } - double calculate_cost() const override { return _cost; } - + FlowStats calculate_flow_stats(uint32_t docid_limit) const override { + double rel_est = abs_to_rel_est(getState().estimate().estHits, docid_limit); + if (rel_est > 0.9) { + return {0.5, _cost, _cost}; + } else { + return {rel_est, _cost, _cost * rel_est}; + } + } + MyLeaf &estimate(uint32_t hits, bool empty = false) { setEstimate(HitEstimate(hits, empty)); return *this; diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp index 4fc8922b9a3..ca450c6d712 100644 --- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp +++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp @@ -47,9 +47,7 @@ concept ChildCollector = requires(T a, std::unique_ptr<Blueprint> bp) { // inherit Blueprint to capture the default filter factory struct DefaultBlueprint : Blueprint { - double calculate_relative_estimate() const override { abort(); } - double calculate_cost() const override { abort(); } - double calculate_strict_cost() const override { abort(); } + FlowStats calculate_flow_stats(uint32_t) const override { abort(); } void optimize(Blueprint* &, OptimizePass) override { abort(); } void sort(bool, bool) override { abort(); } const State &getState() const override { abort(); } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 2b9b45c990e..7f34bc82c03 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -118,9 +118,7 @@ Blueprint::State::~State() = default; Blueprint::Blueprint() noexcept : _parent(nullptr), - _relative_estimate(0.0), - _cost(0.0), - _strict_cost(0.0), + _flow_stats(0.0, 0.0, 0.0), _sourceId(0xffffffff), _docid_limit(0), _frozen(false) @@ -357,9 +355,9 @@ Blueprint::visitMembers(vespalib::ObjectVisitor &visitor) const visitor.visitInt("tree_size", state.tree_size()); visitor.visitBool("allow_termwise_eval", state.allow_termwise_eval()); visitor.closeStruct(); - visitor.visitFloat("relative_estimate", _relative_estimate); - visitor.visitFloat("cost", _cost); - visitor.visitFloat("strict_cost", _strict_cost); + visitor.visitFloat("relative_estimate", estimate()); + visitor.visitFloat("cost", cost()); + visitor.visitFloat("strict_cost", strict_cost()); visitor.visitInt("sourceId", _sourceId); visitor.visitInt("docid_limit", _docid_limit); } @@ -558,9 +556,7 @@ IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass) } optimize_self(pass); if (pass == OptimizePass::LAST) { - set_relative_estimate(calculate_relative_estimate()); - set_cost(calculate_cost()); - set_strict_cost(calculate_strict_cost()); + update_flow_stats(get_docid_limit()); } maybe_eliminate_self(self, get_replacement()); } @@ -718,34 +714,20 @@ IntermediateBlueprint::calculateUnpackInfo(const fef::MatchData & md) const //----------------------------------------------------------------------------- -double -LeafBlueprint::calculate_relative_estimate() const +FlowStats +LeafBlueprint::calculate_flow_stats(uint32_t docid_limit) const { - double rel_est = abs_to_rel_est(_state.estimate().estHits, get_docid_limit()); + double rel_est = abs_to_rel_est(_state.estimate().estHits, docid_limit); if (rel_est > 0.9) { // Assume we do not really know how much we are matching when // we claim to match 'everything'. Also assume we are not able // to skip documents efficiently when strict. - _can_skip = false; - return 0.5; + return {0.5, 1.0, 1.0}; } else { - _can_skip = true; - return rel_est; + return {rel_est, 1.0, rel_est}; } } -double -LeafBlueprint::calculate_cost() const -{ - return 1.0; -} - -double -LeafBlueprint::calculate_strict_cost() const -{ - return _can_skip ? estimate() * cost() : cost(); -} - void LeafBlueprint::fetchPostings(const ExecuteInfo &) { @@ -780,9 +762,7 @@ LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass) assert(self == this); optimize_self(pass); if (pass == OptimizePass::LAST) { - set_relative_estimate(calculate_relative_estimate()); - set_cost(calculate_cost()); - set_strict_cost(calculate_strict_cost()); + update_flow_stats(get_docid_limit()); } maybe_eliminate_self(self, get_replacement()); } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index e080d667dfa..20606c713a5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -2,6 +2,7 @@ #pragma once +#include "flow.h" #include "field_spec.h" #include "unpackinfo.h" #include "executeinfo.h" @@ -178,9 +179,7 @@ public: private: Blueprint *_parent; - double _relative_estimate; - double _cost; - double _strict_cost; + FlowStats _flow_stats; uint32_t _sourceId; uint32_t _docid_limit; bool _frozen; @@ -196,10 +195,6 @@ protected: _frozen = true; } - void set_relative_estimate(double value) noexcept { _relative_estimate = value; } - void set_cost(double value) noexcept { _cost = value; } - void set_strict_cost(double value) noexcept { _strict_cost = value; } - public: class IPredicate { public: @@ -259,25 +254,31 @@ public: double hit_ratio() const { return getState().hit_ratio(_docid_limit); } // The flow statistics for a blueprint is calculated during the - // LAST optimize pass (just prior to sorting). The relative - // estimate may be used to calculate the costs and the non-strict - // cost may be used to calculate the strict cost. After being + // LAST optimize pass (just prior to sorting). After being // calculated, each value is available through a simple accessor - // function. Note that these values may not be available for - // blueprints used inside complex leafs (this case will probably - // be solved using custom flow adapters that has knowledge of - // docid limit). + // function. Since the optimize process is performed bottom-up, a + // blueprint can expect all children to already have these values + // calculated when the calculate_flow_stats function is called. + // + // Note that values are not automatically available for blueprints + // used inside complex leafs since they are not part of the tree + // seen by optimize. When the calculate_flow_stats function is + // called on a complex leaf, it can call the update_flow_stats + // function directly (the function that is normally called by + // optimize) on interal blueprints to make these values available + // before using them to calculate its own flow stats. // // 'estimate': relative estimate in the range [0,1] // 'cost': per-document cost of non-strict evaluation // 'strict_cost': per-document cost of strict evaluation - double estimate() const noexcept { return _relative_estimate; } - double cost() const noexcept { return _cost; } - double strict_cost() const noexcept { return _strict_cost; } - virtual double calculate_relative_estimate() const = 0; - virtual double calculate_cost() const = 0; - virtual double calculate_strict_cost() const = 0; - + double estimate() const noexcept { return _flow_stats.estimate; } + double cost() const noexcept { return _flow_stats.cost; } + double strict_cost() const noexcept { return _flow_stats.strict_cost; } + virtual FlowStats calculate_flow_stats(uint32_t docid_limit) const = 0; + void update_flow_stats(uint32_t docid_limit) { + _flow_stats = calculate_flow_stats(docid_limit); + } + virtual void fetchPostings(const ExecuteInfo &execInfo) = 0; virtual void freeze() = 0; bool frozen() const { return _frozen; } @@ -417,7 +418,6 @@ class LeafBlueprint : public Blueprint { private: State _state; - mutable bool _can_skip = true; protected: void optimize(Blueprint* &self, OptimizePass pass) final; void sort(bool strict, bool sort_by_cost) override; @@ -453,9 +453,7 @@ protected: public: ~LeafBlueprint() override = default; const State &getState() const final { return _state; } - double calculate_relative_estimate() const override; - double calculate_cost() const override; - double calculate_strict_cost() const override; + FlowStats calculate_flow_stats(uint32_t docid_limit) const override; void fetchPostings(const ExecuteInfo &execInfo) override; void freeze() final; SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index b90321581b5..01a5922d864 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -10,6 +10,14 @@ namespace search::queryeval { +struct FlowStats { + double estimate; + double cost; + double strict_cost; + constexpr FlowStats(double estimate_in, double cost_in, double strict_cost_in) noexcept + : estimate(estimate_in), cost(cost_in), strict_cost(strict_cost_in) {} +}; + namespace flow { // the default adapter expects the shape of std::unique_ptr<Blueprint> diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index 8cabe189b0e..089351b4ae5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -1,7 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "intermediate_blueprints.h" -#include "flow.h" #include "andnotsearch.h" #include "andsearch.h" #include "orsearch.h" @@ -86,22 +85,12 @@ need_normal_features_for_children(const IntermediateBlueprint &blueprint, fef::M //----------------------------------------------------------------------------- -double -AndNotBlueprint::calculate_relative_estimate() const -{ - return AndNotFlow::estimate_of(get_children()); -} - -double -AndNotBlueprint::calculate_cost() const -{ - return AndNotFlow::cost_of(get_children(), false); -} - -double -AndNotBlueprint::calculate_strict_cost() const +FlowStats +AndNotBlueprint::calculate_flow_stats(uint32_t) const { - return AndNotFlow::cost_of(get_children(), true); + return {AndNotFlow::estimate_of(get_children()), + AndNotFlow::cost_of(get_children(), false), + AndNotFlow::cost_of(get_children(), true)}; } Blueprint::HitEstimate @@ -218,19 +207,11 @@ AndNotBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) co //----------------------------------------------------------------------------- -double -AndBlueprint::calculate_relative_estimate() const { - return AndFlow::estimate_of(get_children()); -} - -double -AndBlueprint::calculate_cost() const { - return AndFlow::cost_of(get_children(), false); -} - -double -AndBlueprint::calculate_strict_cost() const { - return AndFlow::cost_of(get_children(), true); +FlowStats +AndBlueprint::calculate_flow_stats(uint32_t) const { + return {AndFlow::estimate_of(get_children()), + AndFlow::cost_of(get_children(), false), + AndFlow::cost_of(get_children(), true)}; } Blueprint::HitEstimate @@ -332,19 +313,11 @@ OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const OrBlueprint::~OrBlueprint() = default; -double -OrBlueprint::calculate_relative_estimate() const { - return OrFlow::estimate_of(get_children()); -} - -double -OrBlueprint::calculate_cost() const { - return OrFlow::cost_of(get_children(), false); -} - -double -OrBlueprint::calculate_strict_cost() const { - return OrFlow::cost_of(get_children(), true); +FlowStats +OrBlueprint::calculate_flow_stats(uint32_t) const { + return {OrFlow::estimate_of(get_children()), + OrFlow::cost_of(get_children(), false), + OrFlow::cost_of(get_children(), true)}; } Blueprint::HitEstimate @@ -442,21 +415,13 @@ OrBlueprint::calculate_cost_tier() const //----------------------------------------------------------------------------- WeakAndBlueprint::~WeakAndBlueprint() = default; -double -WeakAndBlueprint::calculate_relative_estimate() const { +FlowStats +WeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const { double child_est = OrFlow::estimate_of(get_children()); - double my_est = abs_to_rel_est(_n, get_docid_limit()); - return std::min(my_est, child_est); -} - -double -WeakAndBlueprint::calculate_cost() const { - return OrFlow::cost_of(get_children(), false); -} - -double -WeakAndBlueprint::calculate_strict_cost() const { - return OrFlow::cost_of(get_children(), true); + double my_est = abs_to_rel_est(_n, docid_limit); + return {std::min(my_est, child_est), + OrFlow::cost_of(get_children(), false), + OrFlow::cost_of(get_children(), true)}; } Blueprint::HitEstimate @@ -518,19 +483,12 @@ WeakAndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) c //----------------------------------------------------------------------------- -double -NearBlueprint::calculate_relative_estimate() const { - return AndFlow::estimate_of(get_children()); -} - -double -NearBlueprint::calculate_cost() const { - return AndFlow::cost_of(get_children(), false) + childCnt() * estimate(); -} - -double -NearBlueprint::calculate_strict_cost() const { - return AndFlow::cost_of(get_children(), true) + childCnt() * estimate(); +FlowStats +NearBlueprint::calculate_flow_stats(uint32_t) const { + double est = AndFlow::estimate_of(get_children()); + return {est, + AndFlow::cost_of(get_children(), false) + childCnt() * est, + AndFlow::cost_of(get_children(), true) + childCnt() * est}; } Blueprint::HitEstimate @@ -590,19 +548,12 @@ NearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) cons //----------------------------------------------------------------------------- -double -ONearBlueprint::calculate_relative_estimate() const { - return AndFlow::estimate_of(get_children()); -} - -double -ONearBlueprint::calculate_cost() const { - return AndFlow::cost_of(get_children(), false) + childCnt() * estimate(); -} - -double -ONearBlueprint::calculate_strict_cost() const { - return AndFlow::cost_of(get_children(), true) + childCnt() * estimate(); +FlowStats +ONearBlueprint::calculate_flow_stats(uint32_t) const { + double est = AndFlow::estimate_of(get_children()); + return {est, + AndFlow::cost_of(get_children(), false) + childCnt() * est, + AndFlow::cost_of(get_children(), true) + childCnt() * est}; } Blueprint::HitEstimate @@ -660,19 +611,14 @@ ONearBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) con //----------------------------------------------------------------------------- -double -RankBlueprint::calculate_relative_estimate() const { - return (childCnt() == 0) ? 0.0 : getChild(0).estimate(); -} - -double -RankBlueprint::calculate_cost() const { - return (childCnt() == 0) ? 0.0 : getChild(0).cost(); -} - -double -RankBlueprint::calculate_strict_cost() const { - return (childCnt() == 0) ? 0.0 : getChild(0).strict_cost(); +FlowStats +RankBlueprint::calculate_flow_stats(uint32_t) const { + if (childCnt() == 0) { + return {0.0, 0.0, 0.0}; + } + return {getChild(0).estimate(), + getChild(0).cost(), + getChild(0).strict_cost()}; } Blueprint::HitEstimate @@ -766,27 +712,15 @@ SourceBlenderBlueprint::SourceBlenderBlueprint(const ISourceSelector &selector) SourceBlenderBlueprint::~SourceBlenderBlueprint() = default; -double -SourceBlenderBlueprint::calculate_relative_estimate() const { - return OrFlow::estimate_of(get_children()); -} - -double -SourceBlenderBlueprint::calculate_cost() const { +FlowStats +SourceBlenderBlueprint::calculate_flow_stats(uint32_t) const { double my_cost = 0.0; + double my_strict_cost = 0.0; for (const auto &child: get_children()) { my_cost = std::max(my_cost, child->cost()); + my_strict_cost = std::max(my_strict_cost, child->strict_cost()); } - return my_cost; -} - -double -SourceBlenderBlueprint::calculate_strict_cost() const { - double my_cost = 0.0; - for (const auto &child: get_children()) { - my_cost = std::max(my_cost, child->strict_cost()); - } - return my_cost; + return {OrFlow::estimate_of(get_children()), my_cost, my_strict_cost}; } Blueprint::HitEstimate diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index 368cbd35c69..1da70b4fa70 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -15,9 +15,7 @@ class AndNotBlueprint : public IntermediateBlueprint { public: bool supports_termwise_children() const override { return true; } - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void optimize_self(OptimizePass pass) override; @@ -44,9 +42,7 @@ class AndBlueprint : public IntermediateBlueprint { public: bool supports_termwise_children() const override { return true; } - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void optimize_self(OptimizePass pass) override; @@ -71,9 +67,7 @@ class OrBlueprint : public IntermediateBlueprint public: ~OrBlueprint() override; bool supports_termwise_children() const override { return true; } - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void optimize_self(OptimizePass pass) override; @@ -100,9 +94,7 @@ private: std::vector<uint32_t> _weights; public: - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, bool strict, bool sort_on_cost) const override; @@ -132,9 +124,7 @@ private: uint32_t _window; public: - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, bool strict, bool sort_by_cost) const override; @@ -156,9 +146,7 @@ private: uint32_t _window; public: - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, bool strict, bool sort_by_cost) const override; @@ -177,9 +165,7 @@ public: class RankBlueprint final : public IntermediateBlueprint { public: - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void optimize_self(OptimizePass pass) override; @@ -207,9 +193,7 @@ private: public: explicit SourceBlenderBlueprint(const ISourceSelector &selector) noexcept; ~SourceBlenderBlueprint() override; - double calculate_relative_estimate() const final; - double calculate_cost() const final; - double calculate_strict_cost() const final; + FlowStats calculate_flow_stats(uint32_t docid_limit) const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, bool strict, bool sort_by_cost) const override; diff --git a/storage/src/tests/persistence/mergehandlertest.cpp b/storage/src/tests/persistence/mergehandlertest.cpp index f79235ae505..4bd0570efa8 100644 --- a/storage/src/tests/persistence/mergehandlertest.cpp +++ b/storage/src/tests/persistence/mergehandlertest.cpp @@ -170,11 +170,11 @@ struct MergeHandlerTest : PersistenceTestUtils { MergeHandler createHandler(size_t maxChunkSize = 0x400000) { return MergeHandler(getEnv(), getPersistenceProvider(), - getEnv()._component.cluster_context(), getEnv()._component.getClock(), *_sequenceTaskExecutor, maxChunkSize, 64); + getEnv()._component.cluster_context(), getEnv()._component.getClock(), *_sequenceTaskExecutor, maxChunkSize); } MergeHandler createHandler(spi::PersistenceProvider & spi) { return MergeHandler(getEnv(), spi, - getEnv()._component.cluster_context(), getEnv()._component.getClock(), *_sequenceTaskExecutor, 4190208, 64); + getEnv()._component.cluster_context(), getEnv()._component.getClock(), *_sequenceTaskExecutor, 4190208); } std::shared_ptr<api::StorageMessage> get_queued_reply() { diff --git a/storage/src/vespa/storage/config/stor-distributormanager.def b/storage/src/vespa/storage/config/stor-distributormanager.def index 7e0d557776c..a880ab6f00d 100644 --- a/storage/src/vespa/storage/config/stor-distributormanager.def +++ b/storage/src/vespa/storage/config/stor-distributormanager.def @@ -181,3 +181,7 @@ enable_operation_cancellation bool default=false ## TODO GC very soon, it has no effect. priority_merge_out_of_sync_copies int default=120 + +## TODO GC as it has no effect +use_btree_database bool default=true + diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp index 90703050009..093c11fb913 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp @@ -25,7 +25,6 @@ #include <vespa/vespalib/util/sequencedtaskexecutor.h> #include <vespa/vespalib/util/string_escape.h> #include <vespa/vespalib/util/stringfmt.h> -#include <vespa/config/subscription/configuri.h> #include <vespa/config/helper/configfetcher.hpp> #include <thread> @@ -49,7 +48,7 @@ namespace { class BucketExecutorWrapper : public spi::BucketExecutor { public: - BucketExecutorWrapper(spi::BucketExecutor & executor) noexcept : _executor(executor) { } + explicit BucketExecutorWrapper(spi::BucketExecutor & executor) noexcept : _executor(executor) { } void execute(const spi::Bucket &bucket, std::unique_ptr<spi::BucketTask> task) override { _executor.execute(bucket, std::move(task)); @@ -213,7 +212,6 @@ FileStorManager::on_configure(const StorFilestorConfig& config) _use_async_message_handling_on_schedule = config.useAsyncMessageHandlingOnSchedule; _host_info_reporter.set_noise_level(config.resourceUsageReporterNoiseLevel); const bool use_dynamic_throttling = (config.asyncOperationThrottler.type == StorFilestorConfig::AsyncOperationThrottler::Type::DYNAMIC); - const bool throttle_merge_feed_ops = config.asyncOperationThrottler.throttleIndividualMergeFeedOps; if (!liveUpdate) { _config = std::make_unique<StorFilestorConfig>(config); @@ -243,11 +241,7 @@ FileStorManager::on_configure(const StorFilestorConfig& config) // TODO remove once desired throttling behavior is set in stone { _filestorHandler->use_dynamic_operation_throttling(use_dynamic_throttling); - _filestorHandler->set_throttle_apply_bucket_diff_ops(!throttle_merge_feed_ops); - std::lock_guard guard(_lock); - for (auto& ph : _persistenceHandlers) { - ph->set_throttle_merge_feed_ops(throttle_merge_feed_ops); - } + _filestorHandler->set_throttle_apply_bucket_diff_ops(false); } } @@ -312,7 +306,7 @@ FileStorManager::mapOperationToBucketAndDisk(api::BucketCommand& cmd, const docu if (results.size() > 1) { error << "Bucket was inconsistent with " << results.size() << " entries so no automatic remapping done:"; - BucketMap::const_iterator it = results.begin(); + auto it = results.begin(); for (uint32_t i=0; i <= 4 && it != results.end(); ++it, ++i) { error << " " << it->first; } @@ -551,10 +545,7 @@ FileStorManager::onDeleteBucket(const shared_ptr<api::DeleteBucketCommand>& cmd) StorBucketDatabase::WrappedEntry -FileStorManager::ensureConsistentBucket( - const document::Bucket& bucket, - api::StorageMessage& msg, - const char* callerId) +FileStorManager::ensureConsistentBucket(const document::Bucket& bucket, api::StorageMessage& msg, const char* callerId) { StorBucketDatabase::WrappedEntry entry(_component.getBucketDatabase(bucket.getBucketSpace()).get( bucket.getBucketId(), callerId, StorBucketDatabase::CREATE_IF_NONEXISTING)); @@ -565,7 +556,7 @@ FileStorManager::ensureConsistentBucket( entry.remove(); } replyDroppedOperation(msg, bucket, api::ReturnCode::ABORTED, "bucket is inconsistently split"); - return StorBucketDatabase::WrappedEntry(); + return {}; } return entry; @@ -899,7 +890,7 @@ FileStorManager::maintenance_in_all_spaces(const lib::Node& node) const noexcept if (!derived_cluster_state->getNodeState(node).getState().oneOf("m")) { return false; } - }; + } return true; } diff --git a/storage/src/vespa/storage/persistence/mergehandler.cpp b/storage/src/vespa/storage/persistence/mergehandler.cpp index 1b7041583e8..7ee2d9f37bf 100644 --- a/storage/src/vespa/storage/persistence/mergehandler.cpp +++ b/storage/src/vespa/storage/persistence/mergehandler.cpp @@ -2,7 +2,6 @@ #include "mergehandler.h" #include "persistenceutil.h" -#include "shared_operation_throttler.h" #include "apply_bucket_diff_entry_complete.h" #include "apply_bucket_diff_state.h" #include <vespa/storage/persistence/filestorage/mergestatus.h> @@ -28,17 +27,14 @@ namespace storage { MergeHandler::MergeHandler(PersistenceUtil& env, spi::PersistenceProvider& spi, const ClusterContext& cluster_context, const framework::Clock & clock, vespalib::ISequencedTaskExecutor& executor, - uint32_t maxChunkSize, - uint32_t commonMergeChainOptimalizationMinimumSize) + uint32_t maxChunkSize) : _clock(clock), _cluster_context(cluster_context), _env(env), _spi(spi), _monitored_ref_count(std::make_unique<MonitoredRefCount>()), _maxChunkSize(maxChunkSize), - _commonMergeChainOptimalizationMinimumSize(commonMergeChainOptimalizationMinimumSize), - _executor(executor), - _throttle_merge_feed_ops(true) + _executor(executor) { } @@ -50,6 +46,8 @@ MergeHandler::~MergeHandler() namespace { +constexpr uint32_t COMMON_MERGE_CHAIN_OPTIMIZATION_SIZE = 64u; + constexpr int getDeleteFlag() { // Referred into old slotfile code before. Where should this number come from? return 2; @@ -177,7 +175,7 @@ MergeHandler::buildBucketInfoList( std::vector<api::GetBucketDiffCommand::Entry>& output, spi::Context& context) const { - assert(output.size() == 0); + assert(output.empty()); assert(myNodeIndex < 16); uint32_t oldSize = output.size(); using DbBucketInfo = api::BucketInfo; @@ -489,13 +487,12 @@ MergeHandler::fetchLocalData( } document::Document::UP -MergeHandler::deserializeDiffDocument( - const api::ApplyBucketDiffCommand::Entry& e, - const document::DocumentTypeRepo& repo) const +MergeHandler::deserializeDiffDocument(const api::ApplyBucketDiffCommand::Entry& e, + const document::DocumentTypeRepo& repo) const { auto doc = std::make_unique<document::Document>(); vespalib::nbostream hbuf(&e._headerBlob[0], e._headerBlob.size()); - if (e._bodyBlob.size() > 0) { + if (!e._bodyBlob.empty()) { // TODO Remove this branch and add warning on error. vespalib::nbostream bbuf(&e._bodyBlob[0], e._bodyBlob.size()); doc->deserialize(repo, hbuf, bbuf); @@ -511,8 +508,7 @@ MergeHandler::applyDiffEntry(std::shared_ptr<ApplyBucketDiffState> async_results const api::ApplyBucketDiffCommand::Entry& e, const document::DocumentTypeRepo& repo) const { - auto throttle_token = throttle_merge_feed_ops() ? _env._fileStorHandler.operation_throttler().blocking_acquire_one() - : vespalib::SharedOperationThrottler::Token(); + auto throttle_token = _env._fileStorHandler.operation_throttler().blocking_acquire_one(); spi::Timestamp timestamp(e._entry._timestamp); if (!(e._entry._flags & (DELETED | DELETED_IN_PLACE))) { // Regular put entry @@ -536,17 +532,13 @@ MergeHandler::applyDiffEntry(std::shared_ptr<ApplyBucketDiffState> async_results * Apply the diffs needed locally. */ void -MergeHandler::applyDiffLocally( - const spi::Bucket& bucket, - std::vector<api::ApplyBucketDiffCommand::Entry>& diff, - uint8_t nodeIndex, - spi::Context& context, - std::shared_ptr<ApplyBucketDiffState> async_results) const +MergeHandler::applyDiffLocally(const spi::Bucket& bucket, std::vector<api::ApplyBucketDiffCommand::Entry>& diff, + uint8_t nodeIndex, spi::Context& context, + const std::shared_ptr<ApplyBucketDiffState> & async_results) const { // Sort the data to apply by which file they should be added to LOG(spam, "Merge(%s): Applying data locally. Diff has %zu entries", - bucket.toString().c_str(), - diff.size()); + bucket.toString().c_str(), diff.size()); uint32_t nodeMask = 1 << nodeIndex; uint32_t byteCount = 0; uint32_t addedCount = 0; @@ -566,9 +558,8 @@ MergeHandler::applyDiffLocally( if (spi::Timestamp(e._entry._timestamp) > existing.getTimestamp()) { ++j; - LOG(spam, "ApplyBucketDiff(%s): slot %s not in diff and " - "already present in persistence", bucket.toString().c_str(), - existing.toString().c_str()); + LOG(spam, "ApplyBucketDiff(%s): slot %s not in diff and already present in persistence", + bucket.toString().c_str(), existing.toString().c_str()); continue; } if ((e._entry._hasMask & nodeMask) != 0) { @@ -579,8 +570,7 @@ MergeHandler::applyDiffLocally( } if (!e.filled()) { ++i; - LOG(debug, "Failed to apply unretrieved entry %s to diff " - "locally on %s. Entry was probably compacted away.", + LOG(debug, "Failed to apply unretrieved entry %s to diff locally on %s. Entry was probably compacted away.", e.toString().c_str(), bucket.toString().c_str()); continue; } @@ -599,19 +589,14 @@ MergeHandler::applyDiffLocally( ++i; ++j; if ((e._entry._flags & DELETED) && !existing.isRemove()) { - LOG(debug, "Slot in diff is remove for existing " - "timestamp in %s. Diff slot: %s. Existing slot: %s", - bucket.toString().c_str(), e.toString().c_str(), - existing.toString().c_str()); + LOG(debug, "Slot in diff is remove for existing timestamp in %s. Diff slot: %s. Existing slot: %s", + bucket.toString().c_str(), e.toString().c_str(), existing.toString().c_str()); applyDiffEntry(async_results, bucket, e, repo); } else { // Duplicate put, just ignore it. - LOG(debug, "During diff apply, attempting to add slot " - "whose timestamp already exists in %s, but assuming " - "these are for the same entry--ignoring it. " - "Diff slot: %s. Existing slot: %s", - bucket.toString().c_str(), e.toString().c_str(), - existing.toString().c_str()); + LOG(debug, "During diff apply, attempting to add slot whose timestamp already exists in %s, " + " but assuming these are for the same entry--ignoring it. Diff slot: %s. Existing slot: %s", + bucket.toString().c_str(), e.toString().c_str(), existing.toString().c_str()); } continue; } @@ -626,8 +611,7 @@ MergeHandler::applyDiffLocally( continue; } if (!e.filled()) { - LOG(debug, "Failed to apply unretrieved entry %s to diff " - "locally on %s. Entry was probably compacted away.", + LOG(debug, "Failed to apply unretrieved entry %s to diff locally on %s. Entry was probably compacted away.", e.toString().c_str(), bucket.toString().c_str()); continue; } @@ -653,19 +637,13 @@ MergeHandler::sync_bucket_info(const spi::Bucket& bucket) const spi::BucketInfoResult infoResult(_spi.getBucketInfo(bucket)); if (infoResult.getErrorCode() != spi::Result::ErrorType::NONE) { LOG(warning, "Failed to get bucket info for %s: %s", - bucket.toString().c_str(), - infoResult.getErrorMessage().c_str()); - throw std::runtime_error("Failed to invoke getBucketInfo on " - "persistence provider"); + bucket.toString().c_str(), infoResult.getErrorMessage().c_str()); + throw std::runtime_error("Failed to invoke getBucketInfo on persistence provider"); } const spi::BucketInfo& tmpInfo(infoResult.getBucketInfo()); - api::BucketInfo providerInfo(tmpInfo.getChecksum(), - tmpInfo.getDocumentCount(), - tmpInfo.getDocumentSize(), - tmpInfo.getEntryCount(), - tmpInfo.getUsedSize(), - tmpInfo.isReady(), - tmpInfo.isActive()); + api::BucketInfo providerInfo(tmpInfo.getChecksum(), tmpInfo.getDocumentCount(), tmpInfo.getDocumentSize(), + tmpInfo.getEntryCount(), tmpInfo.getUsedSize(), + tmpInfo.isReady(), tmpInfo.isActive()); _env.updateBucketDatabase(bucket.getBucket(), providerInfo); } @@ -701,9 +679,7 @@ MergeHandler::processBucketMerge(const spi::Bucket& bucket, MergeStatus& status, // If last action failed, fail the whole merge if (status.reply->getResult().failed()) { LOG(warning, "Done with merge of %s (failed: %s) %s", - bucket.toString().c_str(), - status.reply->getResult().toString().c_str(), - status.toString().c_str()); + bucket.toString().c_str(), status.reply->getResult().toString().c_str(), status.toString().c_str()); return status.reply; } @@ -735,13 +711,9 @@ MergeHandler::processBucketMerge(const spi::Bucket& bucket, MergeStatus& status, cmd = std::make_shared<api::ApplyBucketDiffCommand>(bucket.getBucket(), nodes); cmd->setAddress(createAddress(_cluster_context.cluster_name_ptr(), nodes[1].index)); - findCandidates(status, - active_nodes_mask, - true, - 1 << (status.nodeList.size() - 1), - 1 << (nodes.size() - 1), - *cmd); - if (cmd->getDiff().size() != 0) { + findCandidates(status, active_nodes_mask, true, 1 << (status.nodeList.size() - 1), + 1 << (nodes.size() - 1), *cmd); + if (!cmd->getDiff().empty()) { break; } cmd.reset(); @@ -751,8 +723,7 @@ MergeHandler::processBucketMerge(const spi::Bucket& bucket, MergeStatus& status, active_nodes_mask = (1u << status.nodeList.size()) - 1; // If only one node left in the merge, return ok. if (status.nodeList.size() == 1) { - LOG(debug, "Done with merge of %s as there is only one node " - "that is not source only left in the merge.", + LOG(debug, "Done with merge of %s as there is only one node that is not source only left in the merge.", bucket.toString().c_str()); return status.reply; } @@ -780,11 +751,8 @@ MergeHandler::processBucketMerge(const spi::Bucket& bucket, MergeStatus& status, if (e.first == 0u) { continue; } - if (e.second >= uint32_t(_commonMergeChainOptimalizationMinimumSize) - || counts.size() == 1) - { - LOG(spam, "Sending separate apply bucket diff for path %x " - "with size %u", + if ((e.second >= COMMON_MERGE_CHAIN_OPTIMIZATION_SIZE) || (counts.size() == 1)) { + LOG(spam, "Sending separate apply bucket diff for path %x with size %u", e.first, e.second); std::vector<api::MergeBucketCommand::Node> nodes; // This node always has to be first in chain. @@ -840,7 +808,7 @@ MergeHandler::processBucketMerge(const spi::Bucket& bucket, MergeStatus& status, status.pendingId = cmd->getMsgId(); LOG(debug, "Sending %s", cmd->toString().c_str()); sender.sendCommand(cmd); - return api::StorageReply::SP(); + return {}; } /** Ensures merge states are deleted if we fail operation */ @@ -1206,7 +1174,7 @@ MergeHandler::handleGetBucketDiffReply(api::GetBucketDiffReply& reply, MessageSe assert(reply.getNodes().size() >= 2); // Get bucket diff should retrieve all info at once - assert(s->diff.size() == 0); + assert(s->diff.empty()); s->diff.insert(s->diff.end(), reply.getDiff().begin(), reply.getDiff().end()); diff --git a/storage/src/vespa/storage/persistence/mergehandler.h b/storage/src/vespa/storage/persistence/mergehandler.h index 43b51662fe6..f3bef802229 100644 --- a/storage/src/vespa/storage/persistence/mergehandler.h +++ b/storage/src/vespa/storage/persistence/mergehandler.h @@ -20,7 +20,6 @@ #include <vespa/storage/common/messagesender.h> #include <vespa/vespalib/util/monitored_refcount.h> #include <vespa/storageframework/generic/clock/time.h> -#include <atomic> namespace vespalib { class ISequencedTaskExecutor; } namespace document { class Document; } @@ -52,26 +51,17 @@ public: MergeHandler(PersistenceUtil& env, spi::PersistenceProvider& spi, const ClusterContext& cluster_context, const framework::Clock & clock, vespalib::ISequencedTaskExecutor& executor, - uint32_t maxChunkSize = 4190208, - uint32_t commonMergeChainOptimalizationMinimumSize = 64); + uint32_t maxChunkSize = 4190208); ~MergeHandler() override; - bool buildBucketInfoList( - const spi::Bucket& bucket, - Timestamp maxTimestamp, - uint8_t myNodeIndex, - std::vector<api::GetBucketDiffCommand::Entry>& output, - spi::Context& context) const; - void fetchLocalData(const spi::Bucket& bucket, - std::vector<api::ApplyBucketDiffCommand::Entry>& diff, - uint8_t nodeIndex, - spi::Context& context) const; - void applyDiffLocally(const spi::Bucket& bucket, - std::vector<api::ApplyBucketDiffCommand::Entry>& diff, - uint8_t nodeIndex, - spi::Context& context, - std::shared_ptr<ApplyBucketDiffState> async_results) const; + bool buildBucketInfoList(const spi::Bucket& bucket, Timestamp maxTimestamp, uint8_t myNodeIndex, + std::vector<api::GetBucketDiffCommand::Entry>& output, spi::Context& context) const; + void fetchLocalData(const spi::Bucket& bucket, std::vector<api::ApplyBucketDiffCommand::Entry>& diff, + uint8_t nodeIndex, spi::Context& context) const; + void applyDiffLocally(const spi::Bucket& bucket, std::vector<api::ApplyBucketDiffCommand::Entry>& diff, + uint8_t nodeIndex, spi::Context& context, + const std::shared_ptr<ApplyBucketDiffState> & async_results) const; void sync_bucket_info(const spi::Bucket& bucket) const override; void schedule_delayed_delete(std::unique_ptr<ApplyBucketDiffState>) const override; @@ -82,15 +72,6 @@ public: void handleApplyBucketDiffReply(api::ApplyBucketDiffReply&, MessageSender&, MessageTrackerUP) const; void drain_async_writes(); - // Thread safe, as it's set during live reconfig from the main filestor manager. - void set_throttle_merge_feed_ops(bool throttle) noexcept { - _throttle_merge_feed_ops.store(throttle, std::memory_order_relaxed); - } - - [[nodiscard]] bool throttle_merge_feed_ops() const noexcept { - return _throttle_merge_feed_ops.load(std::memory_order_relaxed); - } - private: using DocEntryList = std::vector<std::unique_ptr<spi::DocEntry>>; const framework::Clock &_clock; @@ -99,36 +80,26 @@ private: spi::PersistenceProvider &_spi; std::unique_ptr<vespalib::MonitoredRefCount> _monitored_ref_count; const uint32_t _maxChunkSize; - const uint32_t _commonMergeChainOptimalizationMinimumSize; vespalib::ISequencedTaskExecutor& _executor; - std::atomic<bool> _throttle_merge_feed_ops; MessageTrackerUP handleGetBucketDiffStage2(api::GetBucketDiffCommand&, MessageTrackerUP) const; /** Returns a reply if merge is complete */ - api::StorageReply::SP processBucketMerge(const spi::Bucket& bucket, - MergeStatus& status, - MessageSender& sender, - spi::Context& context, - std::shared_ptr<ApplyBucketDiffState>& async_results) const; + api::StorageReply::SP processBucketMerge(const spi::Bucket& bucket, MergeStatus& status, MessageSender& sender, + spi::Context& context, std::shared_ptr<ApplyBucketDiffState>& async_results) const; /** * Invoke either put, remove or unrevertable remove on the SPI * depending on the flags in the diff entry. */ - void applyDiffEntry(std::shared_ptr<ApplyBucketDiffState> async_results, - const spi::Bucket&, - const api::ApplyBucketDiffCommand::Entry&, - const document::DocumentTypeRepo& repo) const; + void applyDiffEntry(std::shared_ptr<ApplyBucketDiffState> async_results, const spi::Bucket&, + const api::ApplyBucketDiffCommand::Entry&, const document::DocumentTypeRepo& repo) const; /** * Fill entries-vector with metadata for bucket up to maxTimestamp, * sorted ascendingly on entry timestamp. * Throws std::runtime_error upon iteration failure. */ - void populateMetaData(const spi::Bucket&, - Timestamp maxTimestamp, - DocEntryList & entries, - spi::Context& context) const; + void populateMetaData(const spi::Bucket&, Timestamp maxTimestamp, DocEntryList & entries, spi::Context& context) const; std::unique_ptr<document::Document> deserializeDiffDocument(const api::ApplyBucketDiffCommand::Entry& e, const document::DocumentTypeRepo& repo) const; diff --git a/storage/src/vespa/storage/persistence/persistencehandler.cpp b/storage/src/vespa/storage/persistence/persistencehandler.cpp index 29d39845f5a..87c1f83794e 100644 --- a/storage/src/vespa/storage/persistence/persistencehandler.cpp +++ b/storage/src/vespa/storage/persistence/persistencehandler.cpp @@ -19,9 +19,7 @@ PersistenceHandler::PersistenceHandler(vespalib::ISequencedTaskExecutor & sequen : _clock(component.getClock()), _env(component, filestorHandler, metrics, provider), _processAllHandler(_env, provider), - _mergeHandler(_env, provider, component.cluster_context(), _clock, sequencedExecutor, - cfg.bucketMergeChunkSize, - cfg.commonMergeChainOptimalizationMinimumSize), + _mergeHandler(_env, provider, component.cluster_context(), _clock, sequencedExecutor, cfg.bucketMergeChunkSize), _asyncHandler(_env, provider, bucketOwnershipNotifier, sequencedExecutor, component.getBucketIdFactory()), _splitJoinHandler(_env, provider, bucketOwnershipNotifier, cfg.enableMultibitSplitOptimalization), _simpleHandler(_env, provider, component.getBucketIdFactory()) @@ -175,10 +173,4 @@ PersistenceHandler::processLockedMessage(FileStorHandler::LockedMessage lock) co } } -void -PersistenceHandler::set_throttle_merge_feed_ops(bool throttle) noexcept -{ - _mergeHandler.set_throttle_merge_feed_ops(throttle); -} - } diff --git a/storage/src/vespa/storage/persistence/persistencehandler.h b/storage/src/vespa/storage/persistence/persistencehandler.h index 595815d2bb3..1835b56528e 100644 --- a/storage/src/vespa/storage/persistence/persistencehandler.h +++ b/storage/src/vespa/storage/persistence/persistencehandler.h @@ -36,8 +36,6 @@ public: const AsyncHandler & asyncHandler() const { return _asyncHandler; } const SplitJoinHandler & splitjoinHandler() const { return _splitJoinHandler; } const SimpleMessageHandler & simpleMessageHandler() const { return _simpleHandler; } - - void set_throttle_merge_feed_ops(bool throttle) noexcept; private: // Message handling functions MessageTracker::UP handleCommandSplitByType(api::StorageCommand&, MessageTracker::UP tracker) const; diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index 070563859a5..96e8ca89a04 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -120,7 +120,7 @@ RankProcessor::initHitCollector(size_t wantedHitCount) void RankProcessor::setupRankProgram(RankProgram &program) { - program.setup(*_match_data, _queryEnv, search::fef::Properties()); + program.setup(*_match_data, _queryEnv, _featureOverrides); } void @@ -153,13 +153,15 @@ RankProcessor::RankProcessor(std::shared_ptr<const RankManager::Snapshot> snapsh const vespalib::string &rankProfile, Query & query, const vespalib::string & location, - Properties & queryProperties, + const Properties & queryProperties, + const Properties & featureOverrides, const search::IAttributeManager * attrMgr) : _rankManagerSnapshot(std::move(snapshot)), _rankSetup(_rankManagerSnapshot->getRankSetup(rankProfile)), _query(query), _queryEnv(location, _rankManagerSnapshot->getIndexEnvironment(rankProfile), queryProperties, attrMgr), + _featureOverrides(featureOverrides), _mdLayout(), _match_data(), _rankProgram(), diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h index 3001da086ec..5651917ce7a 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h @@ -31,6 +31,7 @@ private: QueryWrapper _query; QueryEnvironment _queryEnv; + const search::fef::Properties &_featureOverrides; search::fef::MatchDataLayout _mdLayout; search::fef::MatchData::UP _match_data; search::fef::RankProgram::UP _rankProgram; @@ -62,7 +63,8 @@ public: const vespalib::string &rankProfile, search::streaming::Query & query, const vespalib::string & location, - search::fef::Properties & queryProperties, + const search::fef::Properties & queryProperties, + const search::fef::Properties & featureOverrides, const search::IAttributeManager * attrMgr); void initForRanking(size_t wantedHitCount); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index 979e5f25b6a..a1e8fddc3bf 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -431,9 +431,18 @@ SearchVisitor::init(const Parameters & params) LOG(debug, "Properties[%u]: name '%s', size '%u'", i, prop.getName(), prop.size()); if (strcmp(prop.getName(), "rank") == 0) { // pick up rank properties for (uint32_t j = 0; j < prop.size(); ++j) { - LOG(debug, "Properties[%u][%u]: key '%s' -> value '%s'", i, j, prop.getKey(j), prop.getValue(j)); - _rankController.getQueryProperties().add(vespalib::string(prop.getKey(j), prop.getKeyLen(j)), - vespalib::string(prop.getValue(j), prop.getValueLen(j))); + vespalib::string k{prop.getKey(j), prop.getKeyLen(j)}; + vespalib::string v{prop.getValue(j), prop.getValueLen(j)}; + LOG(debug, "Properties[%u][%u]: key '%s' -> value '%s'", i, j, k.c_str(), v.c_str()); + _rankController.getQueryProperties().add(k, v); + } + } + if (strcmp(prop.getName(), "feature") == 0) { // pick up feature overrides + for (uint32_t j = 0; j < prop.size(); ++j) { + vespalib::string k{prop.getKey(j), prop.getKeyLen(j)}; + vespalib::string v{prop.getValue(j), prop.getValueLen(j)}; + LOG(debug, "Feature override[%u][%u]: key '%s' -> value '%s'", i, j, k.c_str(), v.c_str()); + _rankController.getFeatureOverrides().add(k, v); } } } @@ -647,6 +656,7 @@ SearchVisitor::RankController::RankController() : _rankManagerSnapshot(nullptr), _rankSetup(nullptr), _queryProperties(), + _featureOverrides(), _hasRanking(false), _rankProcessor(), _dumpFeatures(false), @@ -664,13 +674,13 @@ SearchVisitor::RankController::setupRankProcessors(Query & query, std::vector<AttrInfo> & attributeFields) { _rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile); - _rankProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan); + _rankProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, _featureOverrides, &attrMan); _rankProcessor->initForRanking(wantedHitCount); // register attribute vectors needed for ranking processAccessedAttributes(_rankProcessor->get_real_query_env(), true, attrMan, attributeFields); if (_dumpFeatures) { - _dumpProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan); + _dumpProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, _featureOverrides, &attrMan); LOG(debug, "Initialize dump processor"); _dumpProcessor->initForDumping(wantedHitCount); // register attribute vectors needed for dumping diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index ce40b5ba742..98d0747baec 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -129,6 +129,7 @@ private: std::shared_ptr<const RankManager::Snapshot> _rankManagerSnapshot; const search::fef::RankSetup * _rankSetup; search::fef::Properties _queryProperties; + search::fef::Properties _featureOverrides; bool _hasRanking; RankProcessor::UP _rankProcessor; bool _dumpFeatures; @@ -151,6 +152,7 @@ private: const vespalib::string &getRankProfile() const { return _rankProfile; } void setRankManagerSnapshot(const std::shared_ptr<const RankManager::Snapshot>& snapshot) { _rankManagerSnapshot = snapshot; } search::fef::Properties & getQueryProperties() { return _queryProperties; } + search::fef::Properties & getFeatureOverrides() { return _featureOverrides; } RankProcessor * getRankProcessor() { return _rankProcessor.get(); } void setDumpFeatures(bool dumpFeatures) { _dumpFeatures = dumpFeatures; } bool getDumpFeatures() const { return _dumpFeatures; } |