diff options
Diffstat (limited to 'searchlib')
26 files changed, 203 insertions, 179 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index 96472200952..db0fe76b7af 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -26,6 +26,18 @@ using vespalib::make_string_short::fmt; const vespalib::string field_name = "myfield"; double budget_sec = 1.0; +double estimate_actual_cost(Blueprint &bp, InFlow in_flow) { + if (in_flow.strict()) { + assert(bp.strict()); + return bp.strict_cost(); + } else if (bp.strict()) { + auto stats = FlowStats::from(flow::DefaultAdapter(), &bp); + return flow::forced_strict_cost(stats, in_flow.rate()); + } else { + return bp.cost() * in_flow.rate(); + } +} + enum class PlanningAlgo { Order, Estimate, @@ -236,7 +248,7 @@ strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, Planning timer.after(); } FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost()); - double actual_cost = ctx.blueprint->estimate_actual_cost(InFlow(true)); + double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(true)); return {timer.min_time() * 1000.0, hits + 1, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)}; } @@ -270,7 +282,7 @@ non_strict_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, doub timer.after(); } FlowStats flow(ctx.blueprint->estimate(), ctx.blueprint->cost(), ctx.blueprint->strict_cost()); - double actual_cost = ctx.blueprint->estimate_actual_cost(InFlow(filter_hit_ratio)); + double actual_cost = estimate_actual_cost(*ctx.blueprint, InFlow(filter_hit_ratio)); return {timer.min_time() * 1000.0, seeks, hits, flow, actual_cost, get_class_name(*ctx.iterator), factory.get_name(*ctx.blueprint)}; } diff --git a/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp b/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp index 4ffc1fe366e..136878f0ea5 100644 --- a/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp +++ b/searchlib/src/tests/tensor/distance_calculator/distance_calculator_test.cpp @@ -9,7 +9,6 @@ #include <vespa/searchlib/test/attribute_builder.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/util/exceptions.h> -#include <iostream> using namespace search::attribute::test; using namespace search::attribute; diff --git a/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt b/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt index e1a54f7883a..92ad9ae2648 100644 --- a/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt +++ b/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt @@ -7,3 +7,10 @@ vespa_add_executable(searchlib_distance_functions_test_app TEST GTest::GTest ) vespa_add_test(NAME searchlib_distance_functions_test_app COMMAND searchlib_distance_functions_test_app) + +vespa_add_executable(searchlib_distance_functions_benchmark_app TEST + SOURCES + distance_functions_benchmark.cpp + DEPENDS + searchlib +) diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp new file mode 100644 index 00000000000..15d6040a11a --- /dev/null +++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp @@ -0,0 +1,129 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/typed_cells.h> +#include <vespa/searchlib/common/geo_gcd.h> +#include <vespa/searchlib/tensor/distance_functions.h> +#include <vespa/searchlib/tensor/distance_function_factory.h> +#include <vespa/searchlib/tensor/mips_distance_transform.h> +#include <vespa/vespalib/util/benchmark_timer.h> +#include <vespa/vespalib/util/classname.h> + +using namespace search::tensor; +using vespalib::eval::Int8Float; +using vespalib::BFloat16; +using vespalib::eval::TypedCells; +using search::attribute::DistanceMetric; + +size_t npos = std::string::npos; + +double run_calc(size_t iterations, TypedCells b, const BoundDistanceFunction & df) __attribute_noinline__; +double run_calc_with_limit(size_t iterations, TypedCells b, const BoundDistanceFunction & df) __attribute_noinline__; + +double +run_calc(size_t iterations, TypedCells b, const BoundDistanceFunction & df) { + vespalib::BenchmarkTimer timer(1.0); + double min_result = std::numeric_limits<double>::max(); + while (timer.has_budget()) { + timer.before(); + for (size_t i(0); i < iterations; i++) { + min_result = std::min(df.calc(b), min_result); + } + timer.after(); + } + printf("%s::calc: Time used = %1.3f, min_result=%3.3f\n", + vespalib::getClassName(df).c_str(), timer.min_time(), min_result); + return min_result; +} + +double +run_calc_with_limit(size_t iterations, TypedCells b, const BoundDistanceFunction & df) { + vespalib::BenchmarkTimer timer(1.0); + double min_result = std::numeric_limits<double>::max(); + while (timer.has_budget()) { + timer.before(); + for (size_t i(0); i < iterations; i++) { + min_result = std::min(df.calc_with_limit(b, std::numeric_limits<double>::max()), min_result); + } + timer.after(); + } + + printf("%s::calc_with_limit: Time used = %1.3f, min_result=%3.3f\n", + vespalib::getClassName(df).c_str(), timer.min_time(), min_result); + return min_result; +} + +template<typename T> +void benchmark(size_t iterations, size_t elems) __attribute_noinline__; + +template<typename T> +void benchmark(size_t iterations, size_t elems, const DistanceFunctionFactory & df) { + std::vector<T> av, bv; + srand(7); + av.reserve(elems); + bv.reserve(elems); + for (size_t i(0); i < elems; i++) { + av.push_back(rand()); + bv.push_back(rand()); + } + TypedCells a_cells(av), b_cells(bv); + + double calc_result = run_calc(iterations, b_cells, *df.for_query_vector(a_cells)); + double calc_with_limit_result = run_calc_with_limit(iterations, b_cells, *df.for_query_vector(a_cells)); + assert(calc_result == calc_with_limit_result); +} + +template<typename T> +void benchmark(size_t iterations, size_t elems, const std::string & dist_functions) { + if (dist_functions.find("euclid") != npos) { + benchmark<T>(iterations, elems, EuclideanDistanceFunctionFactory<T>()); + } + if (dist_functions.find("angular") != npos) { + if (std::is_same<T, double>() || std::is_same<T, float>()) { + benchmark<T>(iterations, elems, AngularDistanceFunctionFactory<T>()); + } + } + if (dist_functions.find("prenorm") != npos) { + if (std::is_same<T, double>() || std::is_same<T, float>()) { + benchmark<T>(iterations, elems, PrenormalizedAngularDistanceFunctionFactory<T>()); + } + } + if (dist_functions.find("mips") != npos) { + if (std::is_same<T, double>() || std::is_same<T, float>() || std::is_same<T, Int8Float>()) { + benchmark<T>(iterations, elems, MipsDistanceFunctionFactory<T>()); + } + } +} + +void +benchmark(size_t iterations, size_t elems, const std::string & dist_functions, const std::string & data_types) { + if (data_types.find("double") != npos) { + benchmark<double>(iterations, elems, dist_functions); + } + if (data_types.find("float32") != npos) { + benchmark<float>(iterations, elems, dist_functions); + } + if (data_types.find("bfloat16") != npos) { + benchmark<BFloat16>(iterations, elems, dist_functions); + } + if (data_types.find("float8") != npos) { + benchmark<Int8Float>(iterations, elems, dist_functions); + } +} + +int +main(int argc, char *argv[]) { + size_t num_iterations = 10000000; + size_t num_elems = 1024; + std::string dist_functions = "angular euclid prenorm mips"; + std::string data_types = "double float32 bfloat16 float8"; + if (argc > 1) { num_iterations = atol(argv[1]); } + if (argc > 2) { num_elems = atol(argv[2]); } + if (argc > 3) { dist_functions = argv[3]; } + if (argc > 4) { data_types = argv[4]; } + + printf("Benchmarking %ld iterations with vector length %ld with distance functions '%s' for data types '%s'\n", + num_iterations, num_elems, dist_functions.c_str(), data_types.c_str()); + benchmark(num_iterations, num_elems, dist_functions, data_types); + + return 0; +} diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index a1cf86c95cc..97b88bc787a 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -111,7 +111,7 @@ class MyBoundDistanceFunction : public BoundDistanceFunction { std::unique_ptr<BoundDistanceFunction> _real; public: - MyBoundDistanceFunction(std::unique_ptr<BoundDistanceFunction> real) + explicit MyBoundDistanceFunction(std::unique_ptr<BoundDistanceFunction> real) : _real(std::move(real)) { } @@ -147,19 +147,19 @@ class MyDistanceFunctionFactory : public DistanceFunctionFactory { std::unique_ptr<DistanceFunctionFactory> _real; public: - MyDistanceFunctionFactory(std::unique_ptr<DistanceFunctionFactory> real) + explicit MyDistanceFunctionFactory(std::unique_ptr<DistanceFunctionFactory> real) : _real(std::move(real)) { } ~MyDistanceFunctionFactory() override; - std::unique_ptr<BoundDistanceFunction> for_query_vector(TypedCells lhs) override { + std::unique_ptr<BoundDistanceFunction> for_query_vector(TypedCells lhs) const override { EXPECT_FALSE(lhs.non_existing_attribute_value()); return std::make_unique<MyBoundDistanceFunction>(_real->for_query_vector(lhs)); } - std::unique_ptr<BoundDistanceFunction> for_insertion_vector(TypedCells lhs) override { + std::unique_ptr<BoundDistanceFunction> for_insertion_vector(TypedCells lhs) const override { EXPECT_FALSE(lhs.non_existing_attribute_value()); return std::make_unique<MyBoundDistanceFunction>(_real->for_insertion_vector(lhs)); } diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 25588cf3229..ba5abb35141 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -195,7 +195,7 @@ RankSetup::compile() _firstPhaseRankFeature = parser.featureName(); _first_phase_resolver->addSeed(_firstPhaseRankFeature); } else { - vespalib::string e = fmt("invalid feature name for initial rank: '%s'", _firstPhaseRankFeature.c_str()); + vespalib::string e = fmt("invalid feature name for first phase rank: '%s'", _firstPhaseRankFeature.c_str()); _warnings.emplace_back(e); _compileError = true; } @@ -206,7 +206,7 @@ RankSetup::compile() _secondPhaseRankFeature = parser.featureName(); _second_phase_resolver->addSeed(_secondPhaseRankFeature); } else { - vespalib::string e = fmt("invalid feature name for final rank: '%s'", _secondPhaseRankFeature.c_str()); + vespalib::string e = fmt("invalid feature name for second phase rank: '%s'", _secondPhaseRankFeature.c_str()); _warnings.emplace_back(e); _compileError = true; } diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp index 3645496e4fb..41551ac1062 100644 --- a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp +++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp @@ -10,11 +10,11 @@ LOG_SETUP(".fef.matchdatabuilder"); namespace search::fef::test { -MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) : - _queryEnv(queryEnv), - _data(data), - _index(), - _match() +MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) + : _queryEnv(queryEnv), + _data(data), + _index(), + _match() { // reset all match data objects. for (TermFieldHandle handle = 0; handle < _data.getNumTermFields(); ++handle) { @@ -22,7 +22,7 @@ MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) } } -MatchDataBuilder::~MatchDataBuilder() {} +MatchDataBuilder::~MatchDataBuilder() = default; TermFieldMatchData * MatchDataBuilder::getTermFieldMatchData(uint32_t termId, uint32_t fieldId) @@ -59,7 +59,7 @@ MatchDataBuilder::addElement(const vespalib::string &fieldName, int32_t weight, LOG(error, "Field '%s' does not exist.", fieldName.c_str()); return false; } - _index[info->id()].elements.push_back(MyElement(weight, length)); + _index[info->id()].elements.emplace_back(weight, length); return true; } @@ -77,8 +77,7 @@ MatchDataBuilder::addOccurence(const vespalib::string &fieldName, uint32_t termI } const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id()); if (tfd == nullptr) { - LOG(error, "Field '%s' is not searched by the given term.", - fieldName.c_str()); + LOG(error, "Field '%s' is not searched by the given term.", fieldName.c_str()); return false; } _match[termId][info->id()].insert(Position(pos, element)); @@ -99,14 +98,13 @@ MatchDataBuilder::setWeight(const vespalib::string &fieldName, uint32_t termId, } const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id()); if (tfd == nullptr) { - LOG(error, "Field '%s' is not searched by the given term.", - fieldName.c_str()); + LOG(error, "Field '%s' is not searched by the given term.", fieldName.c_str()); return false; } uint32_t eid = _index[info->id()].elements.size(); _match[termId][info->id()].clear(); _match[termId][info->id()].insert(Position(0, eid)); - _index[info->id()].elements.push_back(MyElement(weight, 1)); + _index[info->id()].elements.emplace_back(weight, 1); return true; } @@ -142,19 +140,13 @@ MatchDataBuilder::apply(uint32_t docId) // For each occurence of that term, in that field, do for (const auto& occ : field_elem.second) { // Append a term match position to the term match data. - match->appendPosition(TermFieldMatchDataPosition( - occ.eid, - occ.pos, - field.getWeight(occ.eid), - field.getLength(occ.eid))); - LOG(debug, - "Added occurence of term '%u' in field '%s'" - " at position '%u'.", + match->appendPosition(TermFieldMatchDataPosition(occ.eid, occ.pos, + field.getWeight(occ.eid), + field.getLength(occ.eid))); + LOG(debug, "Added occurence of term '%u' in field '%s' at position '%u'.", termId, name.c_str(), occ.pos); if (occ.pos >= field.getLength(occ.eid)) { - LOG(warning, - "Added occurence of term '%u' in field '%s'" - " at position '%u' >= fieldLen '%u'.", + LOG(warning, "Added occurence of term '%u' in field '%s' at position '%u' >= fieldLen '%u'.", termId, name.c_str(), occ.pos, field.getLength(occ.eid)); } } diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h index 0e5025efd37..753e1596520 100644 --- a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h +++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h @@ -13,7 +13,7 @@ public: struct MyElement { int32_t weight; uint32_t length; - MyElement(int32_t w, uint32_t l) : weight(w), length(l) {} + MyElement(int32_t w, uint32_t l) noexcept : weight(w), length(l) {} }; struct MyField { uint32_t fieldLength; @@ -21,7 +21,7 @@ public: MyField() : fieldLength(0), elements() {} MyElement &getElement(uint32_t eid) { while (elements.size() <= eid) { - elements.push_back(MyElement(0, 0)); + elements.emplace_back(0, 0); } return elements[eid]; } @@ -68,6 +68,8 @@ public: * @param data The match data to build in. */ MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data); + MatchDataBuilder(const MatchDataBuilder &) = delete; + MatchDataBuilder & operator=(const MatchDataBuilder &) = delete; ~MatchDataBuilder(); /** @@ -133,10 +135,6 @@ public: bool apply(uint32_t docId); private: - MatchDataBuilder(const MatchDataBuilder &); // hide - MatchDataBuilder & operator=(const MatchDataBuilder &); // hide - -private: QueryEnvironment &_queryEnv; MatchData &_data; IndexData _index; diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index cfa165be067..412a5973ad8 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -169,31 +169,6 @@ Blueprint::null_plan(InFlow in_flow, uint32_t docid_limit) sort(in_flow); } -double -Blueprint::estimate_actual_cost(InFlow in_flow) const noexcept -{ - double res = estimate_strict_cost_diff(in_flow); - if (in_flow.strict()) { - res += strict_cost(); - } else { - res += in_flow.rate() * cost(); - } - return res; -} - -double -Blueprint::estimate_strict_cost_diff(InFlow &in_flow) const noexcept -{ - if (in_flow.strict()) { - REQUIRE(strict()); - } else if (strict()) { - double rate = in_flow.rate(); - in_flow.force_strict(); - return flow::strict_cost_diff(estimate(), rate); - } - return 0.0; -} - Blueprint::UP Blueprint::optimize(Blueprint::UP bp) { Blueprint *root = bp.release(); @@ -624,24 +599,6 @@ IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double return (count_termwise_nodes(unpack) > 1); } -double -IntermediateBlueprint::estimate_self_cost(InFlow) const noexcept -{ - return 0.0; -} - -double -IntermediateBlueprint::estimate_actual_cost(InFlow in_flow) const noexcept -{ - double res = estimate_strict_cost_diff(in_flow); - auto cost_of = [](const auto &child, InFlow child_flow)noexcept{ - return child->estimate_actual_cost(child_flow); - }; - res += flow::actual_cost_of(flow::DefaultAdapter(), _children, my_flow(in_flow), cost_of); - res += estimate_self_cost(in_flow); - return res; -} - void IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass) { diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index a493c725407..a443f34f856 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -313,20 +313,6 @@ public: // optimal ordering. Used for testing. void null_plan(InFlow in_flow, uint32_t docid_limit); - // Estimate the actual cost of evaluating the (sub-)query - // represented by this blueprint with the given in-flow. This - // function should be called after query planning has been - // performed. This function could be useful to predict very - // expensive queries, but the initial use-case is to understand - // query cost better in micro-benchmarks to improve low-level cost - // tuning. - virtual double estimate_actual_cost(InFlow in_flow) const noexcept; - // Estimate the change in cost caused by having a strict iterator - // with a non-strict in-flow. Note that this function might force - // the in_flow to be strict in order to align it with the - // strictness of this blueprint. - double estimate_strict_cost_diff(InFlow &in_flow) const noexcept; - static Blueprint::UP optimize(Blueprint::UP bp); virtual void sort(InFlow in_flow) = 0; static Blueprint::UP optimize_and_sort(Blueprint::UP bp, InFlow in_flow, const Options &opts) { @@ -496,9 +482,6 @@ public: void setDocIdLimit(uint32_t limit) noexcept final; void each_node_post_order(const std::function<void(Blueprint&)> &f) override; - // additional cost not attributed to the children flow (heap merge/unpack/etc) - virtual double estimate_self_cost(InFlow in_flow) const noexcept; - double estimate_actual_cost(InFlow in_flow) const noexcept override; void optimize(Blueprint* &self, OptimizePass pass) final; void sort(InFlow in_flow) override; void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override; diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index be7b9031c00..b7841dc2017 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -204,16 +204,6 @@ double ordered_cost_of(ADAPTER adapter, const T &children, F flow, bool allow_fo return total_cost; } -static double actual_cost_of(auto adapter, const auto &children, auto flow, auto cost_of) noexcept { - double total_cost = 0.0; - for (const auto &child: children) { - double child_cost = cost_of(child, InFlow(flow.strict(), flow.flow())); - flow.update_cost(total_cost, child_cost); - flow.add(adapter.estimate(child)); - } - return total_cost; -} - auto select_strict_and_child(auto adapter, const auto &children, size_t first, double est, bool native_strict) { double cost = 0.0; size_t best_idx = first; diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index b0b3b302e82..5b8fa79b8af 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -318,11 +318,6 @@ OrBlueprint::calculate_flow_stats(uint32_t) const { OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())}; } -double -OrBlueprint::estimate_self_cost(InFlow in_flow) const noexcept { - return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0; -} - Blueprint::HitEstimate OrBlueprint::combine(const std::vector<HitEstimate> &data) const { @@ -436,11 +431,6 @@ WeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const { OrFlow::cost_of(get_children(), true) + flow::heap_cost(est, get_children().size())}; } -double -WeakAndBlueprint::estimate_self_cost(InFlow in_flow) const noexcept { - return in_flow.strict() ? flow::heap_cost(estimate(), get_children().size()) : 0.0; -} - Blueprint::HitEstimate WeakAndBlueprint::combine(const std::vector<HitEstimate> &data) const { @@ -519,11 +509,6 @@ NearBlueprint::calculate_flow_stats(uint32_t) const { AndFlow::cost_of(get_children(), true) + childCnt() * est}; } -double -NearBlueprint::estimate_self_cost(InFlow) const noexcept { - return childCnt() * estimate(); -} - Blueprint::HitEstimate NearBlueprint::combine(const std::vector<HitEstimate> &data) const { @@ -589,11 +574,6 @@ ONearBlueprint::calculate_flow_stats(uint32_t) const { AndFlow::cost_of(get_children(), true) + childCnt() * est}; } -double -ONearBlueprint::estimate_self_cost(InFlow) const noexcept { - return childCnt() * estimate(); -} - Blueprint::HitEstimate ONearBlueprint::combine(const std::vector<HitEstimate> &data) const { @@ -762,16 +742,6 @@ SourceBlenderBlueprint::calculate_flow_stats(uint32_t) const { return {my_est, my_cost + 1.0, my_strict_cost + my_est}; } -double -SourceBlenderBlueprint::estimate_self_cost(InFlow in_flow) const noexcept -{ - if (in_flow.strict()) { - return estimate(); - } else { - return in_flow.rate(); - } -} - Blueprint::HitEstimate SourceBlenderBlueprint::combine(const std::vector<HitEstimate> &data) const { diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index f7eeace3e8b..913370caae1 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -67,7 +67,6 @@ public: ~OrBlueprint() override; bool supports_termwise_children() const override { return true; } FlowStats calculate_flow_stats(uint32_t docid_limit) const final; - double estimate_self_cost(InFlow in_flow) const noexcept override; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void optimize_self(OptimizePass pass) override; @@ -96,7 +95,6 @@ private: AnyFlow my_flow(InFlow in_flow) const override; public: FlowStats calculate_flow_stats(uint32_t docid_limit) const final; - double estimate_self_cost(InFlow in_flow) const noexcept override; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; Blueprint::UP get_replacement() override; @@ -129,7 +127,6 @@ private: AnyFlow my_flow(InFlow in_flow) const override; public: FlowStats calculate_flow_stats(uint32_t docid_limit) const final; - double estimate_self_cost(InFlow in_flow) const noexcept override; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, InFlow in_flow) const override; @@ -152,7 +149,6 @@ private: AnyFlow my_flow(InFlow in_flow) const override; public: FlowStats calculate_flow_stats(uint32_t docid_limit) const final; - double estimate_self_cost(InFlow in_flow) const noexcept override; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, InFlow in_flow) const override; @@ -201,7 +197,6 @@ public: explicit SourceBlenderBlueprint(const ISourceSelector &selector) noexcept; ~SourceBlenderBlueprint() override; FlowStats calculate_flow_stats(uint32_t docid_limit) const final; - double estimate_self_cost(InFlow in_flow) const noexcept override; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; void sort(Children &children, InFlow in_flow) const override; diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp index af99260979d..ef40381c807 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp @@ -70,14 +70,14 @@ template class BoundAngularDistance<double>; template <typename FloatType> BoundDistanceFunction::UP -AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { +AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const { using DFT = BoundAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { +AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const { using DFT = BoundAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h index 5e0a060e060..aa51f58b3cd 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h @@ -15,8 +15,8 @@ template <typename FloatType> class AngularDistanceFunctionFactory : public DistanceFunctionFactory { public: AngularDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; - BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h index 356366d6a77..3b0a0ac91fd 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h @@ -17,8 +17,8 @@ struct DistanceFunctionFactory { using TypedCells = vespalib::eval::TypedCells; DistanceFunctionFactory() noexcept = default; virtual ~DistanceFunctionFactory() = default; - virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) = 0; - virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) = 0; + virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const = 0; + virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const = 0; using UP = std::unique_ptr<DistanceFunctionFactory>; }; diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp index 3ab3a1123eb..355110b2f90 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp @@ -44,16 +44,8 @@ public: double score = 1.0 / (1.0 + d); return score; } - double calc_with_limit(TypedCells rhs, double limit) const noexcept override { - vespalib::ConstArrayRef<AttributeCellType> rhs_vector = rhs.typify<AttributeCellType>(); - double sum = 0.0; - size_t sz = _lhs_vector.size(); - assert(sz == rhs_vector.size()); - for (size_t i = 0; i < sz && sum <= limit; ++i) { - double diff = _lhs_vector[i] - rhs_vector[i]; - sum += diff*diff; - } - return sum; + double calc_with_limit(TypedCells rhs, double) const noexcept override { + return calc(rhs); } }; @@ -64,14 +56,14 @@ template class BoundEuclideanDistance<double>; template <typename FloatType> BoundDistanceFunction::UP -EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { +EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const { using DFT = BoundEuclideanDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { +EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const { using DFT = BoundEuclideanDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h index 8c39a12bf86..78460c93307 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h @@ -15,8 +15,8 @@ template <typename FloatType> class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory { public: EuclideanDistanceFunctionFactory() noexcept = default; - BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; - BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp index f5484f40271..a8a48ae4116 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp @@ -82,12 +82,12 @@ public: }; BoundDistanceFunction::UP -GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) { +GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) const { return std::make_unique<BoundGeoDistance>(lhs); } BoundDistanceFunction::UP -GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) { +GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) const { return std::make_unique<BoundGeoDistance>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h index 1464898421b..a85e31e8ecc 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h @@ -14,8 +14,8 @@ namespace search::tensor { class GeoDistanceFunctionFactory : public DistanceFunctionFactory { public: GeoDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; - BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp index 7f29a100492..7ea2e440a51 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp @@ -49,14 +49,14 @@ public: template <typename FloatType> BoundDistanceFunction::UP -HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { +HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const { using DFT = BoundHammingDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { +HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const { using DFT = BoundHammingDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h index 6e7f96e1e2f..2e3b75cc61f 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h @@ -16,8 +16,8 @@ template <typename FloatType> class HammingDistanceFunctionFactory : public DistanceFunctionFactory { public: HammingDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; - BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp index c42242d8dc8..fa47187fec9 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp @@ -76,13 +76,13 @@ public: template<typename FloatType> BoundDistanceFunction::UP -MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { +MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const { return std::make_unique<BoundMipsDistanceFunction<FloatType, false>>(lhs, *_sq_norm_store); } template<typename FloatType> BoundDistanceFunction::UP -MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { +MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const { return std::make_unique<BoundMipsDistanceFunction<FloatType, true>>(lhs, *_sq_norm_store); }; diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h index 67a6eb58de0..336511ab78f 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h @@ -62,8 +62,8 @@ public: MipsDistanceFunctionFactory() noexcept = default; ~MipsDistanceFunctionFactory() override = default; - BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; - BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp index 4bc90001227..58e92cbe2d4 100644 --- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp @@ -62,14 +62,14 @@ template class BoundPrenormalizedAngularDistance<double>; template <typename FloatType> BoundDistanceFunction::UP -PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { +PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const { using DFT = BoundPrenormalizedAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { +PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const { using DFT = BoundPrenormalizedAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h index 7e3a8c2c676..6a791e0b6ec 100644 --- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h @@ -14,8 +14,8 @@ template <typename FloatType> class PrenormalizedAngularDistanceFunctionFactory : public DistanceFunctionFactory { public: PrenormalizedAngularDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; - BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override; }; } |