aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2024-05-13 21:55:47 +0200
committerGitHub <noreply@github.com>2024-05-13 21:55:47 +0200
commitee0cd3294fbfea91aa20816fd56c621724017939 (patch)
tree5c28c45090422e270c6163f81e75e4fcb5f17b55 /searchlib
parente1bb1b57d9cef2aba9a5c0191e649d7aac8147eb (diff)
parente59b79fdc60d6b6994013caf50ab1f5decb930ce (diff)
Merge branch 'master' into bratseth/stemming-trace
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp42
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h25
-rw-r--r--searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp100
-rw-r--r--searchlib/src/tests/util/token_extractor/token_extractor_test.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h10
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.h8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/flow_tuning.h6
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/util/token_extractor.cpp2
14 files changed, 171 insertions, 107 deletions
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp
index 8591ec1415d..51177850155 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.cpp
@@ -2,14 +2,14 @@
#include "intermediate_blueprint_factory.h"
#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
#include <iomanip>
#include <sstream>
namespace search::queryeval::test {
-template <typename BlueprintType>
char
-IntermediateBlueprintFactory<BlueprintType>::child_name(void* blueprint) const
+IntermediateBlueprintFactory::child_name(void* blueprint) const
{
auto itr = _child_names.find(blueprint);
if (itr != _child_names.end()) {
@@ -18,35 +18,33 @@ IntermediateBlueprintFactory<BlueprintType>::child_name(void* blueprint) const
return '?';
}
-template <typename BlueprintType>
-IntermediateBlueprintFactory<BlueprintType>::IntermediateBlueprintFactory(vespalib::stringref name)
+IntermediateBlueprintFactory::IntermediateBlueprintFactory(vespalib::stringref name)
: _name(name),
_children(),
_child_names()
{
}
-template <typename BlueprintType>
-IntermediateBlueprintFactory<BlueprintType>::~IntermediateBlueprintFactory() = default;
+IntermediateBlueprintFactory::~IntermediateBlueprintFactory() = default;
-template <typename BlueprintType>
std::unique_ptr<Blueprint>
-IntermediateBlueprintFactory<BlueprintType>::make_blueprint()
+IntermediateBlueprintFactory::make_blueprint()
{
- auto res = std::make_unique<BlueprintType>();
+ auto res = make_self();
_child_names.clear();
char name = 'A';
+ uint32_t source = 1;
for (const auto& factory : _children) {
auto child = factory->make_blueprint();
_child_names[child.get()] = name++;
+ child->setSourceId(source++); // ignored by non-source-blender blueprints
res->addChild(std::move(child));
}
return res;
}
-template <typename BlueprintType>
vespalib::string
-IntermediateBlueprintFactory<BlueprintType>::get_name(Blueprint& blueprint) const
+IntermediateBlueprintFactory::get_name(Blueprint& blueprint) const
{
auto* intermediate = blueprint.asIntermediate();
if (intermediate != nullptr) {
@@ -69,11 +67,29 @@ IntermediateBlueprintFactory<BlueprintType>::get_name(Blueprint& blueprint) cons
return get_class_name(blueprint);
}
-template class IntermediateBlueprintFactory<AndBlueprint>;
+//-----------------------------------------------------------------------------
AndBlueprintFactory::AndBlueprintFactory()
- : IntermediateBlueprintFactory<AndBlueprint>("AND")
+ : IntermediateBlueprintFactory("AND")
{}
+std::unique_ptr<IntermediateBlueprint>
+AndBlueprintFactory::make_self() const
+{
+ return std::make_unique<AndBlueprint>();
+}
+
+//-----------------------------------------------------------------------------
+
+SourceBlenderBlueprintFactory::SourceBlenderBlueprintFactory()
+ : IntermediateBlueprintFactory("SB"),
+ _selector(250, "my_source_blender", 1000)
+{}
+
+std::unique_ptr<IntermediateBlueprint>
+SourceBlenderBlueprintFactory::make_self() const
+{
+ return std::make_unique<SourceBlenderBlueprint>(_selector);
}
+}
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h
index 6f7fe4f9ee7..c791d866612 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/intermediate_blueprint_factory.h
@@ -4,6 +4,7 @@
#include "benchmark_blueprint_factory.h"
#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
+#include <vespa/searchlib/attribute/fixedsourceselector.h>
#include <unordered_map>
namespace search::queryeval::test {
@@ -11,7 +12,6 @@ namespace search::queryeval::test {
/**
* Factory that creates an IntermediateBlueprint (of the given type) with children created by the given factories.
*/
-template <typename BlueprintType>
class IntermediateBlueprintFactory : public BenchmarkBlueprintFactory {
private:
vespalib::string _name;
@@ -19,7 +19,8 @@ private:
std::unordered_map<void*, char> _child_names;
char child_name(void* blueprint) const;
-
+protected:
+ virtual std::unique_ptr<IntermediateBlueprint> make_self() const = 0;
public:
IntermediateBlueprintFactory(vespalib::stringref name);
~IntermediateBlueprintFactory();
@@ -30,10 +31,26 @@ public:
vespalib::string get_name(Blueprint& blueprint) const override;
};
-class AndBlueprintFactory : public IntermediateBlueprintFactory<AndBlueprint> {
+class AndBlueprintFactory : public IntermediateBlueprintFactory {
+protected:
+ std::unique_ptr<IntermediateBlueprint> make_self() const override;
public:
AndBlueprintFactory();
};
-}
+class SourceBlenderBlueprintFactory : public IntermediateBlueprintFactory
+{
+private:
+ FixedSourceSelector _selector;
+protected:
+ std::unique_ptr<IntermediateBlueprint> make_self() const override;
+public:
+ SourceBlenderBlueprintFactory();
+ void init_selector(auto f, uint32_t limit) {
+ for (uint32_t i = 0; i < limit; ++i) {
+ _selector.setSource(i, f(i));
+ }
+ }
+};
+}
diff --git a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
index 2977664f6ad..96472200952 100644
--- a/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
+++ b/searchlib/src/tests/queryeval/iterator_benchmark/iterator_benchmark_test.cpp
@@ -292,10 +292,6 @@ benchmark_search(BenchmarkBlueprintFactory& factory, uint32_t docid_limit, bool
}
}
-
-
-
-
//-----------------------------------------------------------------------------
double est_forced_strict_cost(double estimate, double strict_cost, double rate) {
@@ -430,15 +426,29 @@ to_string(bool val)
void
print_result_header()
{
- std::cout << "| chn | f_ratio | o_ratio | a_ratio | f.est | f.cost | f.act_cost | f.scost | f.act_scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl;
+ std::cout << "| in_flow | chn | o_ratio | a_ratio | f.est | f.cost | f.act_cost | f.scost | f.act_scost | hits | seeks | time_ms | act_cost | ns_per_seek | ms_per_act_cost | iterator | blueprint |" << std::endl;
+}
+
+std::ostream &operator<<(std::ostream &dst, InFlow in_flow) {
+ auto old_w = dst.width();
+ auto old_p = dst.precision();
+ dst << std::setw(7) << std::setprecision(5);
+ if (in_flow.strict()) {
+ dst << " STRICT";
+ } else {
+ dst << in_flow.rate();
+ }
+ dst << std::setw(old_w);
+ dst << std::setprecision(old_p);
+ return dst;
}
void
print_result(const BenchmarkResult& res, uint32_t children, double op_hit_ratio, InFlow in_flow, uint32_t num_docs)
{
std::cout << std::fixed << std::setprecision(5)
- << "| " << std::setw(5) << children
- << " | " << std::setw(7) << in_flow.rate()
+ << "| " << in_flow
+ << " | " << std::setw(5) << children
<< " | " << std::setw(7) << op_hit_ratio
<< " | " << std::setw(7) << ((double) res.hits / (double) num_docs)
<< " | " << std::setw(6) << res.flow.estimate
@@ -684,23 +694,25 @@ run_benchmarks(const BenchmarkSetup& setup)
void
print_intermediate_blueprint_result_header(size_t children)
{
+ std::cout << "| in_flow";
// This matches the naming scheme in IntermediateBlueprintFactory.
char name = 'A';
for (size_t i = 0; i < children; ++i) {
- std::cout << "| " << name++ << ".ratio ";
+ std::cout << " | " << name++ << ".ratio";
}
- std::cout << "| flow.cost | flow.scost | flow.est | ratio | hits | seeks | ms_per_cost | time_ms | algo | blueprint |" << std::endl;
+ std::cout << " | flow.cost | flow.scost | flow.est | ratio | hits | seeks | ms_per_cost | time_ms | algo | blueprint |" << std::endl;
}
void
-print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, uint32_t num_docs)
+print_intermediate_blueprint_result(const BenchmarkResult& res, const std::vector<double>& children_ratios, PlanningAlgo algo, InFlow in_flow, uint32_t num_docs)
{
- std::cout << std::fixed << std::setprecision(5);
+ std::cout << std::fixed << std::setprecision(5)
+ << "| " << in_flow;
for (auto ratio : children_ratios) {
- std::cout << "| " << std::setw(7) << ratio << " ";
+ std::cout << " | " << std::setw(7) << ratio;
}
std::cout << std::setprecision(5)
- << "| " << std::setw(10) << res.flow.cost
+ << " | " << std::setw(10) << res.flow.cost
<< " | " << std::setw(10) << res.flow.strict_cost
<< " | " << std::setw(8) << res.flow.estimate
<< " | " << std::setw(7) << ((double) res.hits / (double) num_docs)
@@ -748,9 +760,8 @@ struct BlueprintFactorySetup {
BlueprintFactorySetup::~BlueprintFactorySetup() = default;
-template <typename IntermediateBlueprintFactoryType>
void
-run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
+run_intermediate_blueprint_benchmark(auto factory_factory, std::vector<InFlow> in_flows, const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
{
print_intermediate_blueprint_result_header(2);
double max_speedup = 0.0;
@@ -758,26 +769,28 @@ run_intermediate_blueprint_benchmark(const BlueprintFactorySetup& a, const Bluep
for (double b_hit_ratio: b.op_hit_ratios) {
auto b_factory = b.make_factory_shared(num_docs, b_hit_ratio);
for (double a_hit_ratio : a.op_hit_ratios) {
- IntermediateBlueprintFactoryType factory;
- factory.add_child(a.make_factory(num_docs, a_hit_ratio));
- factory.add_child(b_factory);
+ auto factory = factory_factory();
+ factory->add_child(a.make_factory(num_docs, a_hit_ratio));
+ factory->add_child(b_factory);
double time_ms_esti = 0.0;
- for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost,
- PlanningAlgo::CostForceStrict}) {
- auto res = benchmark_search(factory, num_docs + 1, true, false, false, 1.0, algo);
- print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, num_docs);
- if (algo == PlanningAlgo::Estimate) {
- time_ms_esti = res.time_ms;
- }
- if (algo == PlanningAlgo::CostForceStrict) {
- double speedup = time_ms_esti / res.time_ms;
- if (speedup > max_speedup) {
- max_speedup = speedup;
+ for (InFlow in_flow: in_flows) {
+ for (auto algo: {PlanningAlgo::Order, PlanningAlgo::Estimate, PlanningAlgo::Cost,
+ PlanningAlgo::CostForceStrict}) {
+ auto res = benchmark_search(*factory, num_docs + 1, in_flow.strict(), false, false, in_flow.rate(), algo);
+ print_intermediate_blueprint_result(res, {a_hit_ratio, b_hit_ratio}, algo, in_flow, num_docs);
+ if (algo == PlanningAlgo::Estimate) {
+ time_ms_esti = res.time_ms;
}
- if (speedup < min_speedup) {
- min_speedup = speedup;
+ if (algo == PlanningAlgo::CostForceStrict) {
+ double speedup = time_ms_esti / res.time_ms;
+ if (speedup > max_speedup) {
+ max_speedup = speedup;
+ }
+ if (speedup < min_speedup) {
+ min_speedup = speedup;
+ }
+ std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl;
}
- std::cout << "speedup (esti/forc)=" << std::setprecision(4) << speedup << std::endl;
}
}
}
@@ -789,7 +802,19 @@ void
run_and_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
{
std::cout << "AND[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl;
- run_intermediate_blueprint_benchmark<AndBlueprintFactory>(a, b, num_docs);
+ run_intermediate_blueprint_benchmark([](){ return std::make_unique<AndBlueprintFactory>(); }, {true}, a, b, num_docs);
+}
+
+void
+run_source_blender_benchmark(const BlueprintFactorySetup& a, const BlueprintFactorySetup& b, size_t num_docs)
+{
+ std::cout << "SB[A={" << a.to_string() << "},B={" << b.to_string() << "}]" << std::endl;
+ auto factory_factory = [&](){
+ auto factory = std::make_unique<SourceBlenderBlueprintFactory>();
+ factory->init_selector([](uint32_t i){ return (i%10 == 0) ? 1 : 2; }, num_docs + 1);
+ return factory;
+ };
+ run_intermediate_blueprint_benchmark(factory_factory, {true, 0.75, 0.5, 0.25, 0.1, 0.01, 0.001}, a, b, num_docs);
}
//-------------------------------------------------------------------------------------
@@ -973,6 +998,15 @@ TEST(IteratorBenchmark, analyze_AND_bitvector_vs_IN)
}
}
+TEST(IteratorBenchmark, analyze_strict_SOURCEBLENDER_memory_and_disk)
+{
+ for (double small_ratio: {0.001, 0.005, 0.01, 0.05}) {
+ run_source_blender_benchmark({str_fs, QueryOperator::Term, {small_ratio}},
+ {str_index, QueryOperator::Term, {small_ratio * 10}},
+ num_docs);
+ }
+}
+
TEST(IteratorBenchmark, analyze_OR_non_strict_fs)
{
for (auto or_hit_ratio : {0.01, 0.1, 0.5}) {
diff --git a/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp b/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp
index e6944e257e9..5eb42bb8ac4 100644
--- a/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp
+++ b/searchlib/src/tests/util/token_extractor/token_extractor_test.cpp
@@ -118,7 +118,7 @@ TEST_F(TokenExtractorTest, empty_string)
TEST_F(TokenExtractorTest, plain_string)
{
- EXPECT_EQ((Words{"Plain string"}), process(StringFieldValue("Plain string")));
+ EXPECT_EQ((Words{}), process(StringFieldValue("Plain string")));
}
TEST_F(TokenExtractorTest, normal_string)
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 5b17b491a20..70b86bf22a1 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -94,6 +94,7 @@ using search::queryeval::StrictHeapOrSearch;
using search::queryeval::WeightedSetTermBlueprint;
using search::queryeval::flow::btree_cost;
using search::queryeval::flow::btree_strict_cost;
+using search::queryeval::flow::estimate_when_unknown;
using search::queryeval::flow::get_num_indirections;
using search::queryeval::flow::lookup_cost;
using search::queryeval::flow::lookup_strict_cost;
@@ -150,10 +151,9 @@ public:
search::queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
if (_hit_estimate.is_unknown()) {
// E.g. attributes without fast-search are not able to provide a hit estimate.
- // In this case we just assume matching half of the document corpus.
// In addition, matching is lookup based, and we are not able to skip documents efficiently when being strict.
size_t indirections = get_num_indirections(_attr.getBasicType(), _attr.getCollectionType());
- return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)};
+ return {estimate_when_unknown(), lookup_cost(indirections), lookup_strict_cost(indirections)};
} else {
double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit);
return {rel_est, btree_cost(rel_est), btree_strict_cost(rel_est)};
diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp
index 3645496e4fb..41551ac1062 100644
--- a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp
+++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp
@@ -10,11 +10,11 @@ LOG_SETUP(".fef.matchdatabuilder");
namespace search::fef::test {
-MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) :
- _queryEnv(queryEnv),
- _data(data),
- _index(),
- _match()
+MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data)
+ : _queryEnv(queryEnv),
+ _data(data),
+ _index(),
+ _match()
{
// reset all match data objects.
for (TermFieldHandle handle = 0; handle < _data.getNumTermFields(); ++handle) {
@@ -22,7 +22,7 @@ MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data)
}
}
-MatchDataBuilder::~MatchDataBuilder() {}
+MatchDataBuilder::~MatchDataBuilder() = default;
TermFieldMatchData *
MatchDataBuilder::getTermFieldMatchData(uint32_t termId, uint32_t fieldId)
@@ -59,7 +59,7 @@ MatchDataBuilder::addElement(const vespalib::string &fieldName, int32_t weight,
LOG(error, "Field '%s' does not exist.", fieldName.c_str());
return false;
}
- _index[info->id()].elements.push_back(MyElement(weight, length));
+ _index[info->id()].elements.emplace_back(weight, length);
return true;
}
@@ -77,8 +77,7 @@ MatchDataBuilder::addOccurence(const vespalib::string &fieldName, uint32_t termI
}
const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id());
if (tfd == nullptr) {
- LOG(error, "Field '%s' is not searched by the given term.",
- fieldName.c_str());
+ LOG(error, "Field '%s' is not searched by the given term.", fieldName.c_str());
return false;
}
_match[termId][info->id()].insert(Position(pos, element));
@@ -99,14 +98,13 @@ MatchDataBuilder::setWeight(const vespalib::string &fieldName, uint32_t termId,
}
const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id());
if (tfd == nullptr) {
- LOG(error, "Field '%s' is not searched by the given term.",
- fieldName.c_str());
+ LOG(error, "Field '%s' is not searched by the given term.", fieldName.c_str());
return false;
}
uint32_t eid = _index[info->id()].elements.size();
_match[termId][info->id()].clear();
_match[termId][info->id()].insert(Position(0, eid));
- _index[info->id()].elements.push_back(MyElement(weight, 1));
+ _index[info->id()].elements.emplace_back(weight, 1);
return true;
}
@@ -142,19 +140,13 @@ MatchDataBuilder::apply(uint32_t docId)
// For each occurence of that term, in that field, do
for (const auto& occ : field_elem.second) {
// Append a term match position to the term match data.
- match->appendPosition(TermFieldMatchDataPosition(
- occ.eid,
- occ.pos,
- field.getWeight(occ.eid),
- field.getLength(occ.eid)));
- LOG(debug,
- "Added occurence of term '%u' in field '%s'"
- " at position '%u'.",
+ match->appendPosition(TermFieldMatchDataPosition(occ.eid, occ.pos,
+ field.getWeight(occ.eid),
+ field.getLength(occ.eid)));
+ LOG(debug, "Added occurence of term '%u' in field '%s' at position '%u'.",
termId, name.c_str(), occ.pos);
if (occ.pos >= field.getLength(occ.eid)) {
- LOG(warning,
- "Added occurence of term '%u' in field '%s'"
- " at position '%u' >= fieldLen '%u'.",
+ LOG(warning, "Added occurence of term '%u' in field '%s' at position '%u' >= fieldLen '%u'.",
termId, name.c_str(), occ.pos, field.getLength(occ.eid));
}
}
diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h
index 0e5025efd37..753e1596520 100644
--- a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h
+++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h
@@ -13,7 +13,7 @@ public:
struct MyElement {
int32_t weight;
uint32_t length;
- MyElement(int32_t w, uint32_t l) : weight(w), length(l) {}
+ MyElement(int32_t w, uint32_t l) noexcept : weight(w), length(l) {}
};
struct MyField {
uint32_t fieldLength;
@@ -21,7 +21,7 @@ public:
MyField() : fieldLength(0), elements() {}
MyElement &getElement(uint32_t eid) {
while (elements.size() <= eid) {
- elements.push_back(MyElement(0, 0));
+ elements.emplace_back(0, 0);
}
return elements[eid];
}
@@ -68,6 +68,8 @@ public:
* @param data The match data to build in.
*/
MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data);
+ MatchDataBuilder(const MatchDataBuilder &) = delete;
+ MatchDataBuilder & operator=(const MatchDataBuilder &) = delete;
~MatchDataBuilder();
/**
@@ -133,10 +135,6 @@ public:
bool apply(uint32_t docId);
private:
- MatchDataBuilder(const MatchDataBuilder &); // hide
- MatchDataBuilder & operator=(const MatchDataBuilder &); // hide
-
-private:
QueryEnvironment &_queryEnv;
MatchData &_data;
IndexData _index;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
index 2bc94073c92..49a0f0621d2 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
@@ -213,6 +213,17 @@ FieldIndex<interleaved_features>::getMemoryUsage() const
}
template <bool interleaved_features>
+void
+FieldIndex<interleaved_features>::commit()
+{
+ _remover.flush();
+ freeze();
+ assign_generation();
+ incGeneration();
+ reclaim_memory();
+}
+
+template <bool interleaved_features>
queryeval::SearchIterator::UP
FieldIndex<interleaved_features>::make_search_iterator(const vespalib::string& term,
uint32_t field_id,
@@ -248,7 +259,7 @@ public:
: SimpleLeafBlueprint(field),
_guard(),
_field(field),
- _posting_itr(posting_itr),
+ _posting_itr(std::move(posting_itr)),
_feature_store(feature_store),
_field_id(field_id),
_query_term(query_term),
@@ -302,7 +313,7 @@ FieldIndex<interleaved_features>::make_term_blueprint(const vespalib::string& te
auto posting_itr = findFrozen(term);
bool use_bit_vector = field.isFilter();
return std::make_unique<MemoryTermBlueprint<interleaved_features>>
- (std::move(guard), posting_itr, getFeatureStore(), field, field_id, term, use_bit_vector);
+ (std::move(guard), std::move(posting_itr), getFeatureStore(), field, field_id, term, use_bit_vector);
}
template class FieldIndex<false>;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
index 0b245300a7b..18e60cf2194 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
@@ -87,13 +87,7 @@ public:
vespalib::MemoryUsage getMemoryUsage() const override;
PostingListStore &getPostingListStore() { return _postingListStore; }
- void commit() override {
- _remover.flush();
- freeze();
- assign_generation();
- incGeneration();
- reclaim_memory();
- }
+ void commit() override;
/**
* Should only by used by unit tests.
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index 7334db4b716..cfa165be067 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -1,14 +1,15 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "blueprint.h"
-#include "leaf_blueprints.h"
+#include "andnotsearch.h"
+#include "andsearch.h"
#include "emptysearch.h"
-#include "full_search.h"
#include "field_spec.hpp"
-#include "andsearch.h"
-#include "orsearch.h"
-#include "andnotsearch.h"
+#include "flow_tuning.h"
+#include "full_search.h"
+#include "leaf_blueprints.h"
#include "matching_elements_search.h"
+#include "orsearch.h"
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/vespalib/objects/visit.hpp>
#include <vespa/vespalib/objects/objectdumper.h>
@@ -238,7 +239,7 @@ Blueprint::default_flow_stats(uint32_t docid_limit, uint32_t abs_est, size_t chi
FlowStats
Blueprint::default_flow_stats(size_t child_cnt)
{
- return {0.5, 1.0 + child_cnt, 1.0 + child_cnt};
+ return {flow::estimate_when_unknown(), 1.0 + child_cnt, 1.0 + child_cnt};
}
std::unique_ptr<MatchingElementsSearch>
diff --git a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
index 22faa920bc0..5ed61ef9fc8 100644
--- a/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
+++ b/searchlib/src/vespa/searchlib/queryeval/flow_tuning.h
@@ -60,6 +60,12 @@ inline size_t get_num_indirections(const attribute::BasicType& basic_type,
return res;
}
+// Some blueprints are not able to provide a hit estimate (e.g. attributes without fast-search).
+// In such cases the following estimate is used instead. In most cases this is an overestimate.
+inline double estimate_when_unknown() {
+ return 0.1;
+}
+
// Non-strict cost of lookup based matching in an attribute (not fast-search).
// Test used: IteratorBenchmark::analyze_term_search_in_attributes_non_strict
inline double lookup_cost(size_t num_indirections) {
diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
index 2b25aa29747..c5435b557b0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
@@ -191,16 +191,14 @@ SimplePhraseSearch::doSeek(uint32_t doc_id) {
void
SimplePhraseSearch::doStrictSeek(uint32_t doc_id) {
uint32_t next_candidate = doc_id;
- while (getDocId() < doc_id || getDocId() == beginId()) {
- getChildren()[0]->seek(next_candidate + 1);
- next_candidate = getChildren()[0]->getDocId();
+ auto &best_child = *getChildren()[_eval_order[0]];
+ while (getDocId() < doc_id) {
+ best_child.seek(next_candidate + 1);
+ next_candidate = best_child.getDocId();
if (isAtEnd(next_candidate)) {
setAtEnd();
return;
}
- // child must behave as strict.
- assert(next_candidate > doc_id && next_candidate != beginId());
-
phraseSeek(next_candidate);
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
index 3ab3a1123eb..441ade27d1f 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
@@ -45,10 +45,9 @@ public:
return score;
}
double calc_with_limit(TypedCells rhs, double limit) const noexcept override {
- vespalib::ConstArrayRef<AttributeCellType> rhs_vector = rhs.typify<AttributeCellType>();
+ vespalib::ConstArrayRef<AttributeCellType> rhs_vector = rhs.unsafe_typify<AttributeCellType>();
double sum = 0.0;
size_t sz = _lhs_vector.size();
- assert(sz == rhs_vector.size());
for (size_t i = 0; i < sz && sum <= limit; ++i) {
double diff = _lhs_vector[i] - rhs_vector[i];
sum += diff*diff;
diff --git a/searchlib/src/vespa/searchlib/util/token_extractor.cpp b/searchlib/src/vespa/searchlib/util/token_extractor.cpp
index a78f30afe21..6e1573c4551 100644
--- a/searchlib/src/vespa/searchlib/util/token_extractor.cpp
+++ b/searchlib/src/vespa/searchlib/util/token_extractor.cpp
@@ -143,8 +143,6 @@ TokenExtractor::extract(std::vector<SpanTerm>& terms, const document::StringFiel
{
auto tree = StringFieldValue::findTree(trees, SPANTREE_NAME);
if (tree == nullptr) {
- /* field might not be annotated if match type is exact */
- consider_word(terms, text, Span(0, text.size()), nullptr, doc);
return;
}
for (const Annotation & annotation : *tree) {