diff options
author | Jon Bratseth <bratseth@gmail.com> | 2024-05-13 21:55:47 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-13 21:55:47 +0200 |
commit | ee0cd3294fbfea91aa20816fd56c621724017939 (patch) | |
tree | 5c28c45090422e270c6163f81e75e4fcb5f17b55 /searchlib | |
parent | e1bb1b57d9cef2aba9a5c0191e649d7aac8147eb (diff) | |
parent | e59b79fdc60d6b6994013caf50ab1f5decb930ce (diff) |
Merge branch 'master' into bratseth/stemming-trace
Diffstat (limited to 'searchlib')
14 files changed, 171 insertions, 107 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp index 8591ec1415d..51177850155 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp @@ -2,14 +2,14 @@ #include "intermediate_blueprint_factory.h" #include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> #include <iomanip> #include <sstream> namespace search::queryeval::test { -template <typename BlueprintType> char -IntermediateBlueprintFactory<BlueprintType>::child_name(void* blueprint) const +IntermediateBlueprintFactory::child_name(void* blueprint) const { auto itr = _child_names.find(blueprint); if (itr != _child_names.end()) { @@ -18,35 +18,33 @@ IntermediateBlueprintFactory<BlueprintType>::child_name(void* blueprint) const return '?'; } -template <typename BlueprintType> -IntermediateBlueprintFactory<BlueprintType>::IntermediateBlueprintFactory(vespalib::stringref name) +IntermediateBlueprintFactory::IntermediateBlueprintFactory(vespalib::stringref name) : _name(name), _children(), _child_names() { } -template <typename BlueprintType> -IntermediateBlueprintFactory<BlueprintType>::~IntermediateBlueprintFactory() = default; +IntermediateBlueprintFactory::~IntermediateBlueprintFactory() = default; -template <typename BlueprintType> std::unique_ptr<Blueprint> -IntermediateBlueprintFactory<BlueprintType>::make_blueprint() +IntermediateBlueprintFactory::make_blueprint() { - auto res = std::make_unique<BlueprintType>(); + auto res = make_self(); _child_names.clear(); char name = 'A'; + uint32_t source = 1; for (const auto& factory : _children) { auto child = factory->make_blueprint(); _child_names[child.get()] = name++; + child->setSourceId(source++); // ignored by non-source-blender blueprints res->addChild(std::move(child)); } return res; } -template <typename BlueprintType> vespalib::string -IntermediateBlueprintFactory<BlueprintType>::get_name(Blueprint& blueprint) const +IntermediateBlueprintFactory::get_name(Blueprint& blueprint) const { auto* intermediate = blueprint.asIntermediate(); if (intermediate != nullptr) { @@ -69,11 +67,29 @@ IntermediateBlueprintFactory<BlueprintType>::get_name(Blueprint& blueprint) cons return get_class_name(blueprint); } -template class IntermediateBlueprintFactory<AndBlueprint>; +//----------------------------------------------------------------------------- AndBlueprintFactory::AndBlueprintFactory() - : IntermediateBlueprintFactory<AndBlueprint>("AND") + : IntermediateBlueprintFactory("AND") {} +std::unique_ptr<IntermediateBlueprint> +AndBlueprintFactory::make_self() const +{ + return std::make_unique<AndBlueprint>(); +} + +//----------------------------------------------------------------------------- + +SourceBlenderBlueprintFactory::SourceBlenderBlueprintFactory() + : IntermediateBlueprintFactory("SB"), + _selector(250, "my_source_blender", 1000) +{} + +std::unique_ptr<IntermediateBlueprint> +SourceBlenderBlueprintFactory::make_self() const +{ + return std::make_unique<SourceBlenderBlueprint>(_selector); } +} diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h index 6f7fe4f9ee7..c791d866612 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h +++ b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h @@ -4,6 +4,7 @@ #include "benchmark_blueprint_factory.h" #include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/attribute/fixedsourceselector.h> #include <unordered_map> namespace search::queryeval::test { @@ -11,7 +12,6 @@ namespace search::queryeval::test { /** * Factory that creates an IntermediateBlueprint (of the given type) with children created by the given factories. */ -template <typename BlueprintType> class IntermediateBlueprintFactory : public BenchmarkBlueprintFactory { private: vespalib::string _name; @@ -19,7 +19,8 @@ private: std::unordered_map<void*, char> _child_names; char child_name(void* blueprint) const; - +protected: + virtual std::unique_ptr<IntermediateBlueprint> make_self() const = 0; public: IntermediateBlueprintFactory(vespalib::stringref name); ~IntermediateBlueprintFactory(); @@ -30,10 +31,26 @@ public: vespalib::string get_name(Blueprint& blueprint) const override; }; -class AndBlueprintFactory : public IntermediateBlueprintFactory<AndBlueprint> { +class AndBlueprintFactory : public IntermediateBlueprintFactory { +protected: + std::unique_ptr<IntermediateBlueprint> make_self() const override; public: AndBlueprintFactory(); }; -} +class SourceBlenderBlueprintFactory : public IntermediateBlueprintFactory +{ +private: + FixedSourceSelector _selector; +protected: + std::unique_ptr<IntermediateBlueprint> make_self() const override; +public: + SourceBlenderBlueprintFactory(); + void init_selector(auto f, uint32_t limit) { + for (uint32_t i = 0; i < limit; ++i) { + _selector.setSource(i, f(i)); + } + } +}; +} diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp index 2977664f6ad..96472200952 100644 --- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp @@ -292,10 +292,6 @@ benchmark_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, bool } } - - - - //----------------------------------------------------------------------------- double est_forced_strict_cost(double estimate, double strict_cost, double rate) { @@ -430,15 +426,29 @@ to_string(bool val) void print_result_header() { - std::cout << "| chn | f_ratio | o_ratio | a_ratio | f.est | f.cost | f.act_cost | f.scost | f.act_scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl; + std::cout << "| in_flow | chn | o_ratio | a_ratio | f.est | f.cost | f.act_cost | f.scost | f.act_scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl; +} + +std::ostream &operator<<(std::ostream &dst, InFlow in_flow) { + auto old_w = dst.width(); + auto old_p = dst.precision(); + dst << std::setw(7) << std::setprecision(5); + if (in_flow.strict()) { + dst << " STRICT"; + } else { + dst << in_flow.rate(); + } + dst << std::setw(old_w); + dst << std::setprecision(old_p); + return dst; } void print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, InFlow in_flow, uint32_t num_docs) { std::cout << std::fixed << std::setprecision(5) - << "| " << std::setw(5) << children - << " | " << std::setw(7) << in_flow.rate() + << "| " << in_flow + << " | " << std::setw(5) << children << " | " << std::setw(7) << op_hit_ratio << " | " << std::setw(7) << ((double) res.hits / (double) num_docs) << " | " << std::setw(6) << res.flow.estimate @@ -684,23 +694,25 @@ run_benchmarks(const BenchmarkSetup& setup) void print_intermediate_blueprint_result_header(size_t children) { + std::cout << "| in_flow"; // This matches the naming scheme in IntermediateBlueprintFactory. char name = 'A'; for (size_t i = 0; i < children; ++i) { - std::cout << "| " << name++ << ".ratio "; + std::cout << " | " << name++ << ".ratio"; } - std::cout << "| flow.cost | flow.scost | flow.est | ratio | hits | seeks | ms_per_cost | time_ms | algo | blueprint |" << std::endl; + std::cout << " | flow.cost | flow.scost | flow.est | ratio | hits | seeks | ms_per_cost | time_ms | algo | blueprint |" << std::endl; } void -print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, uint32_t num_docs) +print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, InFlow in_flow, uint32_t num_docs) { - std::cout << std::fixed << std::setprecision(5); + std::cout << std::fixed << std::setprecision(5) + << "| " << in_flow; for (auto ratio : children_ratios) { - std::cout << "| " << std::setw(7) << ratio << " "; + std::cout << " | " << std::setw(7) << ratio; } std::cout << std::setprecision(5) - << "| " << std::setw(10) << res.flow.cost + << " | " << std::setw(10) << res.flow.cost << " | " << std::setw(10) << res.flow.strict_cost << " | " << std::setw(8) << res.flow.estimate << " | " << std::setw(7) << ((double) res.hits / (double) num_docs) @@ -748,9 +760,8 @@ struct BlueprintFactorySetup { BlueprintFactorySetup::~BlueprintFactorySetup() = default; -template <typename IntermediateBlueprintFactoryType> void -run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs) +run_intermediate_blueprint_benchmark(auto factory_factory, std::vector<InFlow> in_flows, const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs) { print_intermediate_blueprint_result_header(2); double max_speedup = 0.0; @@ -758,26 +769,28 @@ run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const Bluep for (double b_hit_ratio: b.op_hit_ratios) { auto b_factory = b.make_factory_shared(num_docs, b_hit_ratio); for (double a_hit_ratio : a.op_hit_ratios) { - IntermediateBlueprintFactoryType factory; - factory.add_child(a.make_factory(num_docs, a_hit_ratio)); - factory.add_child(b_factory); + auto factory = factory_factory(); + factory->add_child(a.make_factory(num_docs, a_hit_ratio)); + factory->add_child(b_factory); double time_ms_esti = 0.0; - for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost, - PlanningAlgo::CostForceStrict}) { - auto res = benchmark_search(factory, num_docs + 1, true, false, false, 1.0, algo); - print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, num_docs); - if (algo == PlanningAlgo::Estimate) { - time_ms_esti = res.time_ms; - } - if (algo == PlanningAlgo::CostForceStrict) { - double speedup = time_ms_esti / res.time_ms; - if (speedup > max_speedup) { - max_speedup = speedup; + for (InFlow in_flow: in_flows) { + for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost, + PlanningAlgo::CostForceStrict}) { + auto res = benchmark_search(*factory, num_docs + 1, in_flow.strict(), false, false, in_flow.rate(), algo); + print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, in_flow, num_docs); + if (algo == PlanningAlgo::Estimate) { + time_ms_esti = res.time_ms; } - if (speedup < min_speedup) { - min_speedup = speedup; + if (algo == PlanningAlgo::CostForceStrict) { + double speedup = time_ms_esti / res.time_ms; + if (speedup > max_speedup) { + max_speedup = speedup; + } + if (speedup < min_speedup) { + min_speedup = speedup; + } + std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl; } - std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl; } } } @@ -789,7 +802,19 @@ void run_and_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs) { std::cout << "AND[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl; - run_intermediate_blueprint_benchmark<AndBlueprintFactory>(a, b, num_docs); + run_intermediate_blueprint_benchmark([](){ return std::make_unique<AndBlueprintFactory>(); }, {true}, a, b, num_docs); +} + +void +run_source_blender_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs) +{ + std::cout << "SB[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl; + auto factory_factory = [&](){ + auto factory = std::make_unique<SourceBlenderBlueprintFactory>(); + factory->init_selector([](uint32_t i){ return (i%10 == 0) ? 1 : 2; }, num_docs + 1); + return factory; + }; + run_intermediate_blueprint_benchmark(factory_factory, {true, 0.75, 0.5, 0.25, 0.1, 0.01, 0.001}, a, b, num_docs); } //------------------------------------------------------------------------------------- @@ -973,6 +998,15 @@ TEST(IteratorBenchmark, analyze_AND_bitvector_vs_IN) } } +TEST(IteratorBenchmark, analyze_strict_SOURCEBLENDER_memory_and_disk) +{ + for (double small_ratio: {0.001, 0.005, 0.01, 0.05}) { + run_source_blender_benchmark({str_fs, QueryOperator::Term, {small_ratio}}, + {str_index, QueryOperator::Term, {small_ratio * 10}}, + num_docs); + } +} + TEST(IteratorBenchmark, analyze_OR_non_strict_fs) { for (auto or_hit_ratio : {0.01, 0.1, 0.5}) { diff --git a/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp b/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp index e6944e257e9..5eb42bb8ac4 100644 --- a/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp +++ b/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp @@ -118,7 +118,7 @@ TEST_F(TokenExtractorTest, empty_string) TEST_F(TokenExtractorTest, plain_string) { - EXPECT_EQ((Words{"Plain string"}), process(StringFieldValue("Plain string"))); + EXPECT_EQ((Words{}), process(StringFieldValue("Plain string"))); } TEST_F(TokenExtractorTest, normal_string) diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 5b17b491a20..70b86bf22a1 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -94,6 +94,7 @@ using search::queryeval::StrictHeapOrSearch; using search::queryeval::WeightedSetTermBlueprint; using search::queryeval::flow::btree_cost; using search::queryeval::flow::btree_strict_cost; +using search::queryeval::flow::estimate_when_unknown; using search::queryeval::flow::get_num_indirections; using search::queryeval::flow::lookup_cost; using search::queryeval::flow::lookup_strict_cost; @@ -150,10 +151,9 @@ public: search::queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override { if (_hit_estimate.is_unknown()) { // E.g. attributes without fast-search are not able to provide a hit estimate. - // In this case we just assume matching half of the document corpus. // In addition, matching is lookup based, and we are not able to skip documents efficiently when being strict. size_t indirections = get_num_indirections(_attr.getBasicType(), _attr.getCollectionType()); - return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)}; + return {estimate_when_unknown(), lookup_cost(indirections), lookup_strict_cost(indirections)}; } else { double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit); return {rel_est, btree_cost(rel_est), btree_strict_cost(rel_est)}; diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp index 3645496e4fb..41551ac1062 100644 --- a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp +++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp @@ -10,11 +10,11 @@ LOG_SETUP(".fef.matchdatabuilder"); namespace search::fef::test { -MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) : - _queryEnv(queryEnv), - _data(data), - _index(), - _match() +MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) + : _queryEnv(queryEnv), + _data(data), + _index(), + _match() { // reset all match data objects. for (TermFieldHandle handle = 0; handle < _data.getNumTermFields(); ++handle) { @@ -22,7 +22,7 @@ MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) } } -MatchDataBuilder::~MatchDataBuilder() {} +MatchDataBuilder::~MatchDataBuilder() = default; TermFieldMatchData * MatchDataBuilder::getTermFieldMatchData(uint32_t termId, uint32_t fieldId) @@ -59,7 +59,7 @@ MatchDataBuilder::addElement(const vespalib::string &fieldName, int32_t weight, LOG(error, "Field '%s' does not exist.", fieldName.c_str()); return false; } - _index[info->id()].elements.push_back(MyElement(weight, length)); + _index[info->id()].elements.emplace_back(weight, length); return true; } @@ -77,8 +77,7 @@ MatchDataBuilder::addOccurence(const vespalib::string &fieldName, uint32_t termI } const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id()); if (tfd == nullptr) { - LOG(error, "Field '%s' is not searched by the given term.", - fieldName.c_str()); + LOG(error, "Field '%s' is not searched by the given term.", fieldName.c_str()); return false; } _match[termId][info->id()].insert(Position(pos, element)); @@ -99,14 +98,13 @@ MatchDataBuilder::setWeight(const vespalib::string &fieldName, uint32_t termId, } const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id()); if (tfd == nullptr) { - LOG(error, "Field '%s' is not searched by the given term.", - fieldName.c_str()); + LOG(error, "Field '%s' is not searched by the given term.", fieldName.c_str()); return false; } uint32_t eid = _index[info->id()].elements.size(); _match[termId][info->id()].clear(); _match[termId][info->id()].insert(Position(0, eid)); - _index[info->id()].elements.push_back(MyElement(weight, 1)); + _index[info->id()].elements.emplace_back(weight, 1); return true; } @@ -142,19 +140,13 @@ MatchDataBuilder::apply(uint32_t docId) // For each occurence of that term, in that field, do for (const auto& occ : field_elem.second) { // Append a term match position to the term match data. - match->appendPosition(TermFieldMatchDataPosition( - occ.eid, - occ.pos, - field.getWeight(occ.eid), - field.getLength(occ.eid))); - LOG(debug, - "Added occurence of term '%u' in field '%s'" - " at position '%u'.", + match->appendPosition(TermFieldMatchDataPosition(occ.eid, occ.pos, + field.getWeight(occ.eid), + field.getLength(occ.eid))); + LOG(debug, "Added occurence of term '%u' in field '%s' at position '%u'.", termId, name.c_str(), occ.pos); if (occ.pos >= field.getLength(occ.eid)) { - LOG(warning, - "Added occurence of term '%u' in field '%s'" - " at position '%u' >= fieldLen '%u'.", + LOG(warning, "Added occurence of term '%u' in field '%s' at position '%u' >= fieldLen '%u'.", termId, name.c_str(), occ.pos, field.getLength(occ.eid)); } } diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h index 0e5025efd37..753e1596520 100644 --- a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h +++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h @@ -13,7 +13,7 @@ public: struct MyElement { int32_t weight; uint32_t length; - MyElement(int32_t w, uint32_t l) : weight(w), length(l) {} + MyElement(int32_t w, uint32_t l) noexcept : weight(w), length(l) {} }; struct MyField { uint32_t fieldLength; @@ -21,7 +21,7 @@ public: MyField() : fieldLength(0), elements() {} MyElement &getElement(uint32_t eid) { while (elements.size() <= eid) { - elements.push_back(MyElement(0, 0)); + elements.emplace_back(0, 0); } return elements[eid]; } @@ -68,6 +68,8 @@ public: * @param data The match data to build in. */ MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data); + MatchDataBuilder(const MatchDataBuilder &) = delete; + MatchDataBuilder & operator=(const MatchDataBuilder &) = delete; ~MatchDataBuilder(); /** @@ -133,10 +135,6 @@ public: bool apply(uint32_t docId); private: - MatchDataBuilder(const MatchDataBuilder &); // hide - MatchDataBuilder & operator=(const MatchDataBuilder &); // hide - -private: QueryEnvironment &_queryEnv; MatchData &_data; IndexData _index; diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index 2bc94073c92..49a0f0621d2 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -213,6 +213,17 @@ FieldIndex<interleaved_features>::getMemoryUsage() const } template <bool interleaved_features> +void +FieldIndex<interleaved_features>::commit() +{ + _remover.flush(); + freeze(); + assign_generation(); + incGeneration(); + reclaim_memory(); +} + +template <bool interleaved_features> queryeval::SearchIterator::UP FieldIndex<interleaved_features>::make_search_iterator(const vespalib::string& term, uint32_t field_id, @@ -248,7 +259,7 @@ public: : SimpleLeafBlueprint(field), _guard(), _field(field), - _posting_itr(posting_itr), + _posting_itr(std::move(posting_itr)), _feature_store(feature_store), _field_id(field_id), _query_term(query_term), @@ -302,7 +313,7 @@ FieldIndex<interleaved_features>::make_term_blueprint(const vespalib::string& te auto posting_itr = findFrozen(term); bool use_bit_vector = field.isFilter(); return std::make_unique<MemoryTermBlueprint<interleaved_features>> - (std::move(guard), posting_itr, getFeatureStore(), field, field_id, term, use_bit_vector); + (std::move(guard), std::move(posting_itr), getFeatureStore(), field, field_id, term, use_bit_vector); } template class FieldIndex<false>; diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h index 0b245300a7b..18e60cf2194 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h @@ -87,13 +87,7 @@ public: vespalib::MemoryUsage getMemoryUsage() const override; PostingListStore &getPostingListStore() { return _postingListStore; } - void commit() override { - _remover.flush(); - freeze(); - assign_generation(); - incGeneration(); - reclaim_memory(); - } + void commit() override; /** * Should only by used by unit tests. diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 7334db4b716..cfa165be067 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -1,14 +1,15 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "blueprint.h" -#include "leaf_blueprints.h" +#include "andnotsearch.h" +#include "andsearch.h" #include "emptysearch.h" -#include "full_search.h" #include "field_spec.hpp" -#include "andsearch.h" -#include "orsearch.h" -#include "andnotsearch.h" +#include "flow_tuning.h" +#include "full_search.h" +#include "leaf_blueprints.h" #include "matching_elements_search.h" +#include "orsearch.h" #include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/vespalib/objects/visit.hpp> #include <vespa/vespalib/objects/objectdumper.h> @@ -238,7 +239,7 @@ Blueprint::default_flow_stats(uint32_t docid_limit, uint32_t abs_est, size_t chi FlowStats Blueprint::default_flow_stats(size_t child_cnt) { - return {0.5, 1.0 + child_cnt, 1.0 + child_cnt}; + return {flow::estimate_when_unknown(), 1.0 + child_cnt, 1.0 + child_cnt}; } std::unique_ptr<MatchingElementsSearch> diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h index 22faa920bc0..5ed61ef9fc8 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h @@ -60,6 +60,12 @@ inline size_t get_num_indirections(const attribute::BasicType& basic_type, return res; } +// Some blueprints are not able to provide a hit estimate (e.g. attributes without fast-search). +// In such cases the following estimate is used instead. In most cases this is an overestimate. +inline double estimate_when_unknown() { + return 0.1; +} + // Non-strict cost of lookup based matching in an attribute (not fast-search). // Test used: IteratorBenchmark::analyze_term_search_in_attributes_non_strict inline double lookup_cost(size_t num_indirections) { diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp index 2b25aa29747..c5435b557b0 100644 --- a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp @@ -191,16 +191,14 @@ SimplePhraseSearch::doSeek(uint32_t doc_id) { void SimplePhraseSearch::doStrictSeek(uint32_t doc_id) { uint32_t next_candidate = doc_id; - while (getDocId() < doc_id || getDocId() == beginId()) { - getChildren()[0]->seek(next_candidate + 1); - next_candidate = getChildren()[0]->getDocId(); + auto &best_child = *getChildren()[_eval_order[0]]; + while (getDocId() < doc_id) { + best_child.seek(next_candidate + 1); + next_candidate = best_child.getDocId(); if (isAtEnd(next_candidate)) { setAtEnd(); return; } - // child must behave as strict. - assert(next_candidate > doc_id && next_candidate != beginId()); - phraseSeek(next_candidate); } } diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp index 3ab3a1123eb..441ade27d1f 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp @@ -45,10 +45,9 @@ public: return score; } double calc_with_limit(TypedCells rhs, double limit) const noexcept override { - vespalib::ConstArrayRef<AttributeCellType> rhs_vector = rhs.typify<AttributeCellType>(); + vespalib::ConstArrayRef<AttributeCellType> rhs_vector = rhs.unsafe_typify<AttributeCellType>(); double sum = 0.0; size_t sz = _lhs_vector.size(); - assert(sz == rhs_vector.size()); for (size_t i = 0; i < sz && sum <= limit; ++i) { double diff = _lhs_vector[i] - rhs_vector[i]; sum += diff*diff; diff --git a/searchlib/src/vespa/searchlib/util/token_extractor.cpp b/searchlib/src/vespa/searchlib/util/token_extractor.cpp index a78f30afe21..6e1573c4551 100644 --- a/searchlib/src/vespa/searchlib/util/token_extractor.cpp +++ b/searchlib/src/vespa/searchlib/util/token_extractor.cpp @@ -143,8 +143,6 @@ TokenExtractor::extract(std::vector<SpanTerm>& terms, const document::StringFiel { auto tree = StringFieldValue::findTree(trees, SPANTREE_NAME); if (tree == nullptr) { - /* field might not be annotated if match type is exact */ - consider_word(terms, text, Span(0, text.size()), nullptr, doc); return; } for (const Annotation & annotation : *tree) { |