aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-04-30 17:35:32 +0200
committerGitHub <noreply@github.com>2024-04-30 17:35:32 +0200
commite07817698a49af176f34b58f8a9b479f401a7a64 (patch)
treeacd10c507fa86f25ea87b122ca563acd2e55c594
parentd6a4ccf4ce3c9528cfe30821236d04e3d2618721 (diff)
parentd8409af89e3dc4cab8133a3f0e8bef739046e306 (diff)
Merge pull request #31081 from vespa-engine/balder/wire-in-wand-range
Balder/wire in wand range
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp4
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp3
-rw-r--r--searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp98
-rw-r--r--searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h10
-rw-r--r--searchlib/src/vespa/searchlib/features/bm25_feature.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/features/bm25_feature.h2
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h12
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h53
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp37
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h8
17 files changed, 199 insertions, 86 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp
index 0b2660824c0..919309c5dae 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp
@@ -9,6 +9,7 @@
#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
#include <vespa/searchlib/queryeval/equiv_blueprint.h>
#include <vespa/searchlib/queryeval/get_weight_from_node.h>
+#include <vespa/searchlib/attribute/attribute_blueprint_params.h>
#include <vespa/vespalib/util/issue.h>
using namespace search::queryeval;
@@ -21,7 +22,7 @@ namespace {
struct Mixer {
std::unique_ptr<OrBlueprint> attributes;
- Mixer() : attributes() {}
+ Mixer() noexcept: attributes() {}
void addAttribute(Blueprint::UP attr) {
if ( ! attributes) {
@@ -66,7 +67,7 @@ private:
void buildIntermediate(IntermediateBlueprint *b, NodeType &n) __attribute__((noinline));
void buildWeakAnd(ProtonWeakAnd &n) {
- auto *wand = new WeakAndBlueprint(n.getTargetNumHits());
+ auto *wand = new WeakAndBlueprint(n.getTargetNumHits(), _requestContext.get_attribute_blueprint_params().weakand_range);
Blueprint::UP result(wand);
for (auto node : n.getChildren()) {
uint32_t weight = getWeightFromNode(*node).percent();
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index 532ec2f63bd..06290386a31 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -340,6 +340,7 @@ MatchToolsFactory::extract_attribute_blueprint_params(const RankSetup& rank_setu
double upper_limit = GlobalFilterUpperLimit::lookup(rank_properties, rank_setup.get_global_filter_upper_limit());
double target_hits_max_adjustment_factor = TargetHitsMaxAdjustmentFactor::lookup(rank_properties, rank_setup.get_target_hits_max_adjustment_factor());
auto fuzzy_matching_algorithm = FuzzyAlgorithm::lookup(rank_properties, rank_setup.get_fuzzy_matching_algorithm());
+ double weakand_range = temporary::WeakAndRange::lookup(rank_properties, rank_setup.get_weakand_range());
// Note that we count the reserved docid 0 as active.
// This ensures that when searchable-copies=1, the ratio is 1.0.
@@ -348,7 +349,8 @@ MatchToolsFactory::extract_attribute_blueprint_params(const RankSetup& rank_setu
return {lower_limit * active_hit_ratio,
upper_limit * active_hit_ratio,
target_hits_max_adjustment_factor,
- fuzzy_matching_algorithm};
+ fuzzy_matching_algorithm,
+ weakand_range};
}
AttributeOperationTask::AttributeOperationTask(const RequestContext & requestContext,
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index bddc9f92111..48ddeed47e9 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -27,8 +27,9 @@
LOG_SETUP("blueprint_test");
using namespace search::queryeval;
-using namespace search::fef;
using namespace search::query;
+using search::fef::MatchData;
+using search::queryeval::Blueprint;
using search::BitVector;
using BlueprintVector = std::vector<std::unique_ptr<Blueprint>>;
using vespalib::Slime;
diff --git a/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp
index 5e056eb6c0e..55dc3868ed4 100644
--- a/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp
+++ b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp
@@ -29,17 +29,17 @@ struct Stats {
size_t unpackCnt;
size_t skippedDocs;
size_t skippedHits;
- Stats() : hitCnt(0), seekCnt(0), unpackCnt(0),
+ Stats() noexcept : hitCnt(0), seekCnt(0), unpackCnt(0),
skippedDocs(0), skippedHits(0) {}
- void hit() {
+ void hit() noexcept {
++hitCnt;
}
- void seek(size_t docs, size_t hits) {
+ void seek(size_t docs, size_t hits) noexcept {
++seekCnt;
skippedDocs += docs;
skippedHits += hits;
}
- void unpack() {
+ void unpack() noexcept {
++unpackCnt;
}
void print() {
@@ -77,7 +77,7 @@ struct ModSearch : SearchIterator {
}
}
void doUnpack(uint32_t docid) override {
- if (tfmd != NULL) {
+ if (tfmd != nullptr) {
tfmd->reset(docid);
search::fef::TermFieldMatchDataPosition pos;
pos.setElementWeight(info.getMaxWeight());
@@ -96,16 +96,16 @@ ModSearch::~ModSearch() = default;
struct WandFactory {
virtual std::string name() const = 0;
virtual SearchIterator::UP create(const wand::Terms &terms) = 0;
- virtual ~WandFactory() {}
+ virtual ~WandFactory() = default;
};
struct VespaWandFactory : WandFactory {
uint32_t n;
- VespaWandFactory(uint32_t n_in) : n(n_in) {}
+ explicit VespaWandFactory(uint32_t n_in) noexcept : n(n_in) {}
~VespaWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA WAND (n=%u)", n); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(WeakAndSearch::create(terms, n, true));
+ std::string name() const override { return make_string("VESPA WAND (n=%u)", n); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return WeakAndSearch::create(terms, n, true);
}
};
@@ -113,11 +113,11 @@ VespaWandFactory::~VespaWandFactory() = default;
struct VespaArrayWandFactory : WandFactory {
uint32_t n;
- VespaArrayWandFactory(uint32_t n_in) : n(n_in) {}
+ explicit VespaArrayWandFactory(uint32_t n_in) noexcept : n(n_in) {}
~VespaArrayWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA ARRAY WAND (n=%u)", n); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(WeakAndSearch::createArrayWand(terms, n, true));
+ std::string name() const override { return make_string("VESPA ARRAY WAND (n=%u)", n); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return WeakAndSearch::createArrayWand(terms, wand::TermFrequencyScorer(), n, true);
}
};
@@ -125,11 +125,11 @@ VespaArrayWandFactory::~VespaArrayWandFactory() = default;
struct VespaHeapWandFactory : WandFactory {
uint32_t n;
- VespaHeapWandFactory(uint32_t n_in) : n(n_in) {}
+ explicit VespaHeapWandFactory(uint32_t n_in) noexcept : n(n_in) {}
~VespaHeapWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA HEAP WAND (n=%u)", n); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(WeakAndSearch::createHeapWand(terms, n, true));
+ std::string name() const override { return make_string("VESPA HEAP WAND (n=%u)", n); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return WeakAndSearch::createHeapWand(terms, wand::TermFrequencyScorer(), n, true);
}
};
@@ -138,39 +138,39 @@ VespaHeapWandFactory::~VespaHeapWandFactory() = default;
struct VespaParallelWandFactory : public WandFactory {
SharedWeakAndPriorityQueue scores;
TermFieldMatchData rootMatchData;
- VespaParallelWandFactory(uint32_t n) : scores(n), rootMatchData() {}
+ explicit VespaParallelWandFactory(uint32_t n) noexcept : scores(n), rootMatchData() {}
~VespaParallelWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA PWAND (n=%u)", scores.getScoresToTrack()); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(ParallelWeakAndSearch::create(terms,
+ std::string name() const override { return make_string("VESPA PWAND (n=%u)", scores.getScoresToTrack()); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return ParallelWeakAndSearch::create(terms,
PWMatchParams(scores, 0, 1, 1),
- PWRankParams(rootMatchData, MatchData::UP()), true));
+ PWRankParams(rootMatchData, {}), true);
}
};
VespaParallelWandFactory::~VespaParallelWandFactory() = default;
struct VespaParallelArrayWandFactory : public VespaParallelWandFactory {
- VespaParallelArrayWandFactory(uint32_t n) : VespaParallelWandFactory(n) {}
+ VespaParallelArrayWandFactory(uint32_t n) noexcept : VespaParallelWandFactory(n) {}
~VespaParallelArrayWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA ARRAY PWAND (n=%u)", scores.getScoresToTrack()); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(ParallelWeakAndSearch::createArrayWand(terms,
+ std::string name() const override { return make_string("VESPA ARRAY PWAND (n=%u)", scores.getScoresToTrack()); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return ParallelWeakAndSearch::createArrayWand(terms,
PWMatchParams(scores, 0, 1, 1),
- PWRankParams(rootMatchData, MatchData::UP()), true));
+ PWRankParams(rootMatchData, {}), true);
}
};
VespaParallelArrayWandFactory::~VespaParallelArrayWandFactory() = default;
struct VespaParallelHeapWandFactory : public VespaParallelWandFactory {
- VespaParallelHeapWandFactory(uint32_t n) : VespaParallelWandFactory(n) {}
+ explicit VespaParallelHeapWandFactory(uint32_t n) noexcept : VespaParallelWandFactory(n) {}
~VespaParallelHeapWandFactory() override;
- virtual std::string name() const override { return make_string("VESPA HEAP PWAND (n=%u)", scores.getScoresToTrack()); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(ParallelWeakAndSearch::createHeapWand(terms,
+ std::string name() const override { return make_string("VESPA HEAP PWAND (n=%u)", scores.getScoresToTrack()); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return ParallelWeakAndSearch::createHeapWand(terms,
PWMatchParams(scores, 0, 1, 1),
- PWRankParams(rootMatchData, MatchData::UP()), true));
+ PWRankParams(rootMatchData, {}), true);
}
};
@@ -178,11 +178,11 @@ VespaParallelHeapWandFactory::~VespaParallelHeapWandFactory() = default;
struct TermFrequencyRiseWandFactory : WandFactory {
uint32_t n;
- TermFrequencyRiseWandFactory(uint32_t n_in) : n(n_in) {}
+ explicit TermFrequencyRiseWandFactory(uint32_t n_in) noexcept : n(n_in) {}
~TermFrequencyRiseWandFactory() override;
- virtual std::string name() const override { return make_string("RISE WAND TF (n=%u)", n); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(new rise::TermFrequencyRiseWand(terms, n));
+ std::string name() const override { return make_string("RISE WAND TF (n=%u)", n); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return std::make_unique<rise::TermFrequencyRiseWand>(terms, n);
}
};
@@ -190,11 +190,11 @@ TermFrequencyRiseWandFactory::~TermFrequencyRiseWandFactory() = default;
struct DotProductRiseWandFactory : WandFactory {
uint32_t n;
- DotProductRiseWandFactory(uint32_t n_in) : n(n_in) {}
+ explicit DotProductRiseWandFactory(uint32_t n_in) noexcept : n(n_in) {}
~DotProductRiseWandFactory() override;
- virtual std::string name() const override { return make_string("RISE WAND DP (n=%u)", n); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
- return SearchIterator::UP(new rise::DotProductRiseWand(terms, n));
+ std::string name() const override { return make_string("RISE WAND DP (n=%u)", n); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
+ return std::make_unique<rise::DotProductRiseWand>(terms, n);
}
};
@@ -204,13 +204,13 @@ struct FilterFactory : WandFactory {
WandFactory &factory;
Stats stats;
uint32_t n;
- FilterFactory(WandFactory &f, uint32_t n_in) : factory(f), n(n_in) {}
+ FilterFactory(WandFactory &f, uint32_t n_in) noexcept : factory(f), n(n_in) {}
~FilterFactory() override;
- virtual std::string name() const override { return make_string("Filter (mod=%u) [%s]", n, factory.name().c_str()); }
- virtual SearchIterator::UP create(const wand::Terms &terms) override {
+ std::string name() const override { return make_string("Filter (mod=%u) [%s]", n, factory.name().c_str()); }
+ SearchIterator::UP create(const wand::Terms &terms) override {
AndNotSearch::Children children;
children.push_back(factory.create(terms));
- children.emplace_back(new ModSearch(stats, n, search::endDocId, n, NULL));
+ children.emplace_back(new ModSearch(stats, n, search::endDocId, n, nullptr));
return AndNotSearch::create(std::move(children), true);
}
};
@@ -220,8 +220,8 @@ FilterFactory::~FilterFactory() = default;
struct Setup {
Stats stats;
vespalib::duration minTime;
- Setup() : stats(), minTime(10000s) {}
- virtual ~Setup() {}
+ Setup() noexcept : stats(), minTime(10000s) {}
+ virtual ~Setup() = default;
virtual std::string name() const = 0;
virtual SearchIterator::UP create() = 0;
void perform() {
@@ -256,10 +256,10 @@ struct WandSetup : Setup {
MatchData::UP matchData;
WandSetup(WandFactory &f, uint32_t c, uint32_t l) : Setup(), factory(f), childCnt(c), limit(l), weight(100), matchData() {}
~WandSetup() override;
- virtual std::string name() const override {
+ std::string name() const override {
return make_string("Wand Setup (terms=%u,docs=%u) [%s]", childCnt, limit, factory.name().c_str());
}
- virtual SearchIterator::UP create() override {
+ SearchIterator::UP create() override {
MatchDataLayout layout;
std::vector<TermFieldHandle> handles;
for (size_t i = 0; i < childCnt; ++i) {
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
index e1f3f0805d9..8a0bc28f4dd 100644
--- a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
@@ -63,4 +63,27 @@ TEST("require that DotProductScorer calculates term score")
EXPECT_EQUAL(11u, itr->_unpackDocId);
}
+TEST("test bm25 idf scorer for wand")
+{
+ wand::Bm25TermFrequencyScorer scorer(1000000, 1.0);
+ EXPECT_EQUAL(13410046, scorer.calculateMaxScore(1, 1));
+ EXPECT_EQUAL(11464136, scorer.calculateMaxScore(10, 1));
+ EXPECT_EQUAL(6907256, scorer.calculateMaxScore(1000, 1));
+ EXPECT_EQUAL(4605121, scorer.calculateMaxScore(10000, 1));
+ EXPECT_EQUAL(2302581, scorer.calculateMaxScore(100000, 1));
+ EXPECT_EQUAL(693147, scorer.calculateMaxScore(500000, 1));
+ EXPECT_EQUAL(105360, scorer.calculateMaxScore(900000, 1));
+ EXPECT_EQUAL(10050, scorer.calculateMaxScore(990000, 1));
+}
+
+TEST("test limited range of bm25 idf scorer for wand")
+{
+ wand::Bm25TermFrequencyScorer scorer08(1000000, 0.8);
+ wand::Bm25TermFrequencyScorer scorer10(1000000, 1.0);
+ EXPECT_EQUAL(8207814, scorer08.calculateMaxScore(1000, 1));
+ EXPECT_EQUAL(2690049, scorer08.calculateMaxScore(990000, 1));
+ EXPECT_EQUAL(6907256, scorer10.calculateMaxScore(1000, 1));
+ EXPECT_EQUAL(10050, scorer10.calculateMaxScore(990000, 1));
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
index e2928710a32..ac6fc6f603a 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
@@ -16,15 +16,18 @@ struct AttributeBlueprintParams
double global_filter_upper_limit;
double target_hits_max_adjustment_factor;
vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm;
+ double weakand_range;
AttributeBlueprintParams(double global_filter_lower_limit_in,
double global_filter_upper_limit_in,
double target_hits_max_adjustment_factor_in,
- vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in)
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in,
+ double weakand_range_in)
: global_filter_lower_limit(global_filter_lower_limit_in),
global_filter_upper_limit(global_filter_upper_limit_in),
target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in),
- fuzzy_matching_algorithm(fuzzy_matching_algorithm_in)
+ fuzzy_matching_algorithm(fuzzy_matching_algorithm_in),
+ weakand_range(weakand_range_in)
{
}
@@ -32,7 +35,8 @@ struct AttributeBlueprintParams
: AttributeBlueprintParams(fef::indexproperties::matching::GlobalFilterLowerLimit::DEFAULT_VALUE,
fef::indexproperties::matching::GlobalFilterUpperLimit::DEFAULT_VALUE,
fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE,
- fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE)
+ fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE,
+ fef::indexproperties::temporary::WeakAndRange::DEFAULT_VALUE)
{
}
};
diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
index 505b8166ee7..03d2e94b5d0 100644
--- a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
@@ -68,7 +68,7 @@ Bm25Executor::Bm25Executor(const fef::FieldInfo& field,
}
double
-Bm25Executor::calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count)
+Bm25Executor::calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count) noexcept
{
return std::log(1 + (static_cast<double>(total_doc_count - matching_doc_count + 0.5) /
static_cast<double>(matching_doc_count + 0.5)));
diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.h b/searchlib/src/vespa/searchlib/features/bm25_feature.h
index a1b45375285..637d656990b 100644
--- a/searchlib/src/vespa/searchlib/features/bm25_feature.h
+++ b/searchlib/src/vespa/searchlib/features/bm25_feature.h
@@ -39,7 +39,7 @@ public:
double k1_param,
double b_param);
- double static calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count);
+ double static calculate_inverse_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count) noexcept;
void handle_bind_match_data(const fef::MatchData& match_data) override;
void execute(uint32_t docId) override;
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index 4637ad5a4e8..1f88c34bef3 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -179,6 +179,21 @@ namespace onsummary {
namespace temporary {
+const vespalib::string WeakAndRange::NAME("vespa.weakand.range");
+const double WeakAndRange::DEFAULT_VALUE(0.0);
+
+double
+WeakAndRange::lookup(const Properties &props)
+{
+ return lookup(props, DEFAULT_VALUE);
+}
+
+double
+WeakAndRange::lookup(const Properties &props, double defaultValue)
+{
+ return lookupDouble(props, NAME, defaultValue);
+}
+
}
namespace mutate {
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index db8de8209a9..d047eb13347 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -178,6 +178,18 @@ namespace mutate {
// Add temporary flags used for safe rollout of new features here
namespace temporary {
+/**
+ * A number in the range [0,1] for the effective idf range for WeakAndOperator.
+ * 1.0 will give the complete range as used by default by bm25.
+ * scaled_idf = (1.0 - range) * max_idf + (range * idf)
+ * 0.0 which is default gives default legacy behavior.
+ **/
+struct WeakAndRange {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ static double lookup(const Properties &props, double defaultValue);
+};
}
namespace mutate::on_match {
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index aadc5300ede..25588cf3229 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -71,6 +71,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_global_filter_lower_limit(0.0),
_global_filter_upper_limit(1.0),
_target_hits_max_adjustment_factor(20.0),
+ _weakand_range(0.0),
_fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm::DfaTable),
_mutateOnMatch(),
_mutateOnFirstPhase(),
@@ -126,6 +127,7 @@ RankSetup::configure()
set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties()));
set_target_hits_max_adjustment_factor(matching::TargetHitsMaxAdjustmentFactor::lookup(_indexEnv.getProperties()));
set_fuzzy_matching_algorithm(matching::FuzzyAlgorithm::lookup(_indexEnv.getProperties()));
+ set_weakand_range(temporary::WeakAndRange::lookup(_indexEnv.getProperties()));
_mutateOnMatch._attribute = mutate::on_match::Attribute::lookup(_indexEnv.getProperties());
_mutateOnMatch._operation = mutate::on_match::Operation::lookup(_indexEnv.getProperties());
_mutateOnFirstPhase._attribute = mutate::on_first_phase::Attribute::lookup(_indexEnv.getProperties());
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index d8b977a0331..f20ecd4b42b 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -80,6 +80,7 @@ private:
double _global_filter_lower_limit;
double _global_filter_upper_limit;
double _target_hits_max_adjustment_factor;
+ double _weakand_range;
vespalib::FuzzyMatchingAlgorithm _fuzzy_matching_algorithm;
MutateOperation _mutateOnMatch;
MutateOperation _mutateOnFirstPhase;
@@ -402,6 +403,8 @@ public:
double get_target_hits_max_adjustment_factor() const { return _target_hits_max_adjustment_factor; }
void set_fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm v) { _fuzzy_matching_algorithm = v; }
vespalib::FuzzyMatchingAlgorithm get_fuzzy_matching_algorithm() const { return _fuzzy_matching_algorithm; }
+ void set_weakand_range(double v) { _weakand_range = v; }
+ double get_weakand_range() const { return _weakand_range; }
/**
* This method may be used to indicate that certain features
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 33b249572f0..99f7604e1a3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -492,7 +492,9 @@ WeakAndBlueprint::createIntermediateSearch(MultiSearch::Children sub_searches,
_weights[i],
getChild(i).getState().estimate().estHits);
}
- return WeakAndSearch::create(terms, _n, strict());
+ return (_idf_range == 0.0)
+ ? WeakAndSearch::create(terms, wand::TermFrequencyScorer(), _n, strict())
+ : WeakAndSearch::create(terms, wand::Bm25TermFrequencyScorer(get_docid_limit(), _idf_range), _n, strict());
}
SearchIterator::UP
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index ade4c9318e4..7f4796c5f43 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -90,6 +90,7 @@ class WeakAndBlueprint : public IntermediateBlueprint
{
private:
uint32_t _n;
+ float _idf_range;
std::vector<uint32_t> _weights;
AnyFlow my_flow(InFlow in_flow) const override;
@@ -107,7 +108,8 @@ public:
fef::MatchData &md) const override;
SearchIterator::UP createFilterSearch(FilterConstraint constraint) const override;
- explicit WeakAndBlueprint(uint32_t n) noexcept : _n(n) {}
+ explicit WeakAndBlueprint(uint32_t n) noexcept : WeakAndBlueprint(n, 0.0) {}
+ WeakAndBlueprint(uint32_t n, float idf_range) noexcept : _n(n), _idf_range(idf_range), _weights() {}
~WeakAndBlueprint() override;
void addTerm(Blueprint::UP bp, uint32_t weight) {
addChild(std::move(bp));
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
index 4e781f8497b..88f0c9288f9 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
@@ -2,10 +2,9 @@
#pragma once
-#include <algorithm>
-#include <cmath>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/features/bm25_feature.h>
#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/searchlib/queryeval/iterator_pack.h>
#include <vespa/searchlib/attribute/posting_iterator_pack.h>
@@ -13,20 +12,16 @@
#include <vespa/vespalib/util/priority_queue.h>
#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h>
#include <vespa/vespalib/util/stringfmt.h>
+#include <cmath>
namespace search::queryeval::wand {
//-----------------------------------------------------------------------------
-struct Term;
-using Terms = std::vector<Term>;
using score_t = int64_t;
using docid_t = uint32_t;
using ref_t = uint16_t;
-using Attr = IDirectPostingStore;
-using AttrDictEntry = Attr::LookupResult;
-
//-----------------------------------------------------------------------------
/**
@@ -46,7 +41,7 @@ struct Term {
Term(SearchIterator *s, int32_t w, uint32_t e) noexcept : Term(s, w, e, nullptr) {}
Term(SearchIterator::UP s, int32_t w, uint32_t e) noexcept : Term(s.release(), w, e, nullptr) {}
};
-
+using Terms = std::vector<Term>;
//-----------------------------------------------------------------------------
// input manipulation utilities
@@ -75,7 +70,7 @@ auto assemble(const F &f, const Order &order)->std::vector<decltype(f(0))> {
}
int32_t get_max_weight(const SearchIterator &search) {
- const MinMaxPostingInfo *minMax = dynamic_cast<const MinMaxPostingInfo *>(search.getPostingInfo());
+ const auto *minMax = dynamic_cast<const MinMaxPostingInfo *>(search.getPostingInfo());
return (minMax != nullptr) ? minMax->getMaxWeight() : std::numeric_limits<int32_t>::max();
}
@@ -291,7 +286,7 @@ struct VectorizedAttributeTerms : VectorizedState<DocidWithWeightIteratorPack> {
**/
struct DocIdOrder {
const docid_t *termPos;
- explicit DocIdOrder(docid_t *pos) noexcept : termPos(pos) {}
+ explicit DocIdOrder(const docid_t *pos) noexcept : termPos(pos) {}
bool at_end(ref_t ref) const noexcept { return termPos[ref] == search::endDocId; }
docid_t get_pos(ref_t ref) const noexcept { return termPos[ref]; }
bool operator()(ref_t a, ref_t b) const noexcept {
@@ -389,7 +384,7 @@ DualHeap<FutureHeap, PastHeap>::stringify() const {
}
//-----------------------------------------------------------------------------
-#define TermFrequencyScorer_TERM_SCORE_FACTOR 1000000.0
+constexpr double TermFrequencyScorer_TERM_SCORE_FACTOR = 1000000.0;
/**
* Scorer used with WeakAndAlgorithm that calculates a pseudo term frequency
@@ -412,6 +407,38 @@ struct TermFrequencyScorer
}
};
+class Bm25TermFrequencyScorer
+{
+public:
+ using Bm25Executor = features::Bm25Executor;
+ Bm25TermFrequencyScorer(uint32_t num_docs, float range) noexcept
+ : _num_docs(num_docs),
+ _range(range),
+ _max_idf(Bm25Executor::calculate_inverse_document_frequency(1, _num_docs))
+ { }
+ double apply_range(double idf) const noexcept {
+ return (1.0 - _range)*_max_idf + _range * idf;
+ }
+ // weight * scaled_bm25_idf, scaled to fixedpoint
+ score_t calculateMaxScore(double estHits, double weight) const noexcept {
+ return score_t(TermFrequencyScorer_TERM_SCORE_FACTOR * weight *
+ apply_range(Bm25Executor::calculate_inverse_document_frequency(estHits, _num_docs)));
+ }
+
+ score_t calculateMaxScore(const Term &term) const noexcept {
+ return calculateMaxScore(term.estHits, term.weight) + 1;
+ }
+
+ template <typename Input>
+ score_t calculate_max_score(const Input &input, ref_t ref) const noexcept {
+ return calculateMaxScore(input.get_est_hits(ref), input.get_weight(ref)) + 1;
+ }
+private:
+ uint32_t _num_docs;
+ float _range;
+ double _max_idf;
+};
+
//-----------------------------------------------------------------------------
/**
@@ -453,14 +480,14 @@ struct DotProductScorer
// used with parallel wand where we can safely discard hits based on score
struct GreaterThan {
score_t threshold;
- GreaterThan(score_t t) : threshold(t) {}
+ explicit GreaterThan(score_t t) noexcept : threshold(t) {}
bool operator()(score_t score) const { return (score > threshold); }
};
// used with old-style vespa wand to ensure at least AND'ish results
struct GreaterThanEqual {
score_t threshold;
- GreaterThanEqual(score_t t) : threshold(t) {}
+ explicit GreaterThanEqual(score_t t) noexcept : threshold(t) {}
bool operator()(score_t score) const { return (score >= threshold); }
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
index 04b1cb75da4..cf3fd44ad4f 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
@@ -42,8 +42,9 @@ private:
}
public:
- WeakAndSearchLR(const Terms &terms, uint32_t n)
- : _terms(terms, TermFrequencyScorer(), 0, {}),
+ template<typename Scorer>
+ WeakAndSearchLR(const Terms &terms, const Scorer & scorer, uint32_t n)
+ : _terms(terms, scorer, 0, {}),
_heaps(DocIdOrder(_terms.docId()), _terms.size()),
_algo(),
_threshold(1),
@@ -102,36 +103,50 @@ WeakAndSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
//-----------------------------------------------------------------------------
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::createArrayWand(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::createArrayWand(const Terms &terms, const Scorer & scorer, uint32_t n, bool strict)
{
if (strict) {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>>(terms, scorer, n);
} else {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>>(terms, scorer, n);
}
}
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::createHeapWand(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::createHeapWand(const Terms &terms, const Scorer & scorer, uint32_t n, bool strict)
{
if (strict) {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, true>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, true>>(terms, scorer, n);
} else {
- return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, false>>(terms, n);
+ return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, false>>(terms, scorer, n);
}
}
+template<typename Scorer>
SearchIterator::UP
-WeakAndSearch::create(const Terms &terms, uint32_t n, bool strict)
+WeakAndSearch::create(const Terms &terms, const Scorer & scorer, uint32_t n, bool strict)
{
if (terms.size() < 128) {
- return createArrayWand(terms, n, strict);
+ return createArrayWand(terms, scorer, n, strict);
} else {
- return createHeapWand(terms, n, strict);
+ return createHeapWand(terms, scorer, n, strict);
}
}
+SearchIterator::UP
+WeakAndSearch::create(const Terms &terms, uint32_t n, bool strict)
+{
+ return create(terms, wand::TermFrequencyScorer(), n, strict);
+}
+
//-----------------------------------------------------------------------------
+template SearchIterator::UP WeakAndSearch::create<wand::TermFrequencyScorer>(const Terms &terms, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::create<wand::Bm25TermFrequencyScorer>(const Terms &terms, const wand::Bm25TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::createArrayWand<wand::TermFrequencyScorer>(const Terms &terms, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::createHeapWand<wand::TermFrequencyScorer>(const Terms &terms, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
index 6a56a04887c..a91b2860a63 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h
@@ -15,8 +15,12 @@ struct WeakAndSearch : SearchIterator {
virtual const Terms &getTerms() const = 0;
virtual uint32_t getN() const = 0;
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
- static SearchIterator::UP createArrayWand(const Terms &terms, uint32_t n, bool strict);
- static SearchIterator::UP createHeapWand(const Terms &terms, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP createArrayWand(const Terms &terms, const Scorer & scorer, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP createHeapWand(const Terms &terms, const Scorer & scorer, uint32_t n, bool strict);
+ template<typename Scorer>
+ static SearchIterator::UP create(const Terms &terms, const Scorer & scorer, uint32_t n, bool strict);
static SearchIterator::UP create(const Terms &terms, uint32_t n, bool strict);
};