aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-04-29 13:44:11 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-04-29 13:44:11 +0000
commit388c29c6db7dea0ad256db7b3abf9e5f9d5cfb60 (patch)
tree5248ae2b5212eb9b28df1509e487ff1a5f951916 /searchlib
parentf58fab3b4c021ff03f9d8e70f3bc029de8c0d13b (diff)
Wire control of scoring range for weakand scorer from rank/query properties.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h10
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h12
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp1
10 files changed, 74 insertions, 8 deletions
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
index e1f3f0805d9..8a0bc28f4dd 100644
--- a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
@@ -63,4 +63,27 @@ TEST("require that DotProductScorer calculates term score")
EXPECT_EQUAL(11u, itr->_unpackDocId);
}
+TEST("test bm25 idf scorer for wand")
+{
+ wand::Bm25TermFrequencyScorer scorer(1000000, 1.0);
+ EXPECT_EQUAL(13410046, scorer.calculateMaxScore(1, 1));
+ EXPECT_EQUAL(11464136, scorer.calculateMaxScore(10, 1));
+ EXPECT_EQUAL(6907256, scorer.calculateMaxScore(1000, 1));
+ EXPECT_EQUAL(4605121, scorer.calculateMaxScore(10000, 1));
+ EXPECT_EQUAL(2302581, scorer.calculateMaxScore(100000, 1));
+ EXPECT_EQUAL(693147, scorer.calculateMaxScore(500000, 1));
+ EXPECT_EQUAL(105360, scorer.calculateMaxScore(900000, 1));
+ EXPECT_EQUAL(10050, scorer.calculateMaxScore(990000, 1));
+}
+
+TEST("test limited range of bm25 idf scorer for wand")
+{
+ wand::Bm25TermFrequencyScorer scorer08(1000000, 0.8);
+ wand::Bm25TermFrequencyScorer scorer10(1000000, 1.0);
+ EXPECT_EQUAL(8207814, scorer08.calculateMaxScore(1000, 1));
+ EXPECT_EQUAL(2690049, scorer08.calculateMaxScore(990000, 1));
+ EXPECT_EQUAL(6907256, scorer10.calculateMaxScore(1000, 1));
+ EXPECT_EQUAL(10050, scorer10.calculateMaxScore(990000, 1));
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
index e2928710a32..ac6fc6f603a 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
@@ -16,15 +16,18 @@ struct AttributeBlueprintParams
double global_filter_upper_limit;
double target_hits_max_adjustment_factor;
vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm;
+ double weakand_range;
AttributeBlueprintParams(double global_filter_lower_limit_in,
double global_filter_upper_limit_in,
double target_hits_max_adjustment_factor_in,
- vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in)
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in,
+ double weakand_range_in)
: global_filter_lower_limit(global_filter_lower_limit_in),
global_filter_upper_limit(global_filter_upper_limit_in),
target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in),
- fuzzy_matching_algorithm(fuzzy_matching_algorithm_in)
+ fuzzy_matching_algorithm(fuzzy_matching_algorithm_in),
+ weakand_range(weakand_range_in)
{
}
@@ -32,7 +35,8 @@ struct AttributeBlueprintParams
: AttributeBlueprintParams(fef::indexproperties::matching::GlobalFilterLowerLimit::DEFAULT_VALUE,
fef::indexproperties::matching::GlobalFilterUpperLimit::DEFAULT_VALUE,
fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE,
- fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE)
+ fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE,
+ fef::indexproperties::temporary::WeakAndRange::DEFAULT_VALUE)
{
}
};
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index 4637ad5a4e8..1f88c34bef3 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -179,6 +179,21 @@ namespace onsummary {
namespace temporary {
+const vespalib::string WeakAndRange::NAME("vespa.weakand.range");
+const double WeakAndRange::DEFAULT_VALUE(0.0);
+
+double
+WeakAndRange::lookup(const Properties &props)
+{
+ return lookup(props, DEFAULT_VALUE);
+}
+
+double
+WeakAndRange::lookup(const Properties &props, double defaultValue)
+{
+ return lookupDouble(props, NAME, defaultValue);
+}
+
}
namespace mutate {
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index db8de8209a9..db97db0d894 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -178,6 +178,18 @@ namespace mutate {
// Add temporary flags used for safe rollout of new features here
namespace temporary {
+/**
+ * A number in the range [0,1] for the effective idf range for WeakAndOperator.
+ * 1.0 will give the complete range as used by default by bm25.
+ * idf = (1.0 - range) + (range * idf)
+ * 0.0 which is default gives default legacy behavior.
+ **/
+struct WeakAndRange {
+ static const vespalib::string NAME;
+ static const double DEFAULT_VALUE;
+ static double lookup(const Properties &props);
+ static double lookup(const Properties &props, double defaultValue);
+};
}
namespace mutate::on_match {
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index aadc5300ede..25588cf3229 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -71,6 +71,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_global_filter_lower_limit(0.0),
_global_filter_upper_limit(1.0),
_target_hits_max_adjustment_factor(20.0),
+ _weakand_range(0.0),
_fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm::DfaTable),
_mutateOnMatch(),
_mutateOnFirstPhase(),
@@ -126,6 +127,7 @@ RankSetup::configure()
set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties()));
set_target_hits_max_adjustment_factor(matching::TargetHitsMaxAdjustmentFactor::lookup(_indexEnv.getProperties()));
set_fuzzy_matching_algorithm(matching::FuzzyAlgorithm::lookup(_indexEnv.getProperties()));
+ set_weakand_range(temporary::WeakAndRange::lookup(_indexEnv.getProperties()));
_mutateOnMatch._attribute = mutate::on_match::Attribute::lookup(_indexEnv.getProperties());
_mutateOnMatch._operation = mutate::on_match::Operation::lookup(_indexEnv.getProperties());
_mutateOnFirstPhase._attribute = mutate::on_first_phase::Attribute::lookup(_indexEnv.getProperties());
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index d8b977a0331..f20ecd4b42b 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -80,6 +80,7 @@ private:
double _global_filter_lower_limit;
double _global_filter_upper_limit;
double _target_hits_max_adjustment_factor;
+ double _weakand_range;
vespalib::FuzzyMatchingAlgorithm _fuzzy_matching_algorithm;
MutateOperation _mutateOnMatch;
MutateOperation _mutateOnFirstPhase;
@@ -402,6 +403,8 @@ public:
double get_target_hits_max_adjustment_factor() const { return _target_hits_max_adjustment_factor; }
void set_fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm v) { _fuzzy_matching_algorithm = v; }
vespalib::FuzzyMatchingAlgorithm get_fuzzy_matching_algorithm() const { return _fuzzy_matching_algorithm; }
+ void set_weakand_range(double v) { _weakand_range = v; }
+ double get_weakand_range() const { return _weakand_range; }
/**
* This method may be used to indicate that certain features
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index 74c8b3534b8..99f7604e1a3 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -492,9 +492,9 @@ WeakAndBlueprint::createIntermediateSearch(MultiSearch::Children sub_searches,
_weights[i],
getChild(i).getState().estimate().estHits);
}
- return (true)
+ return (_idf_range == 0.0)
? WeakAndSearch::create(terms, wand::TermFrequencyScorer(), _n, strict())
- : WeakAndSearch::create(terms, wand::Bm25TermFrequencyScorer(get_docid_limit(), 1.0), _n, strict());
+ : WeakAndSearch::create(terms, wand::Bm25TermFrequencyScorer(get_docid_limit(), _idf_range), _n, strict());
}
SearchIterator::UP
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index ade4c9318e4..7f4796c5f43 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -90,6 +90,7 @@ class WeakAndBlueprint : public IntermediateBlueprint
{
private:
uint32_t _n;
+ float _idf_range;
std::vector<uint32_t> _weights;
AnyFlow my_flow(InFlow in_flow) const override;
@@ -107,7 +108,8 @@ public:
fef::MatchData &md) const override;
SearchIterator::UP createFilterSearch(FilterConstraint constraint) const override;
- explicit WeakAndBlueprint(uint32_t n) noexcept : _n(n) {}
+ explicit WeakAndBlueprint(uint32_t n) noexcept : WeakAndBlueprint(n, 0.0) {}
+ WeakAndBlueprint(uint32_t n, float idf_range) noexcept : _n(n), _idf_range(idf_range), _weights() {}
~WeakAndBlueprint() override;
void addTerm(Blueprint::UP bp, uint32_t weight) {
addChild(std::move(bp));
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
index 69901993dfe..bc666c9996b 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
@@ -384,7 +384,7 @@ DualHeap<FutureHeap, PastHeap>::stringify() const {
}
//-----------------------------------------------------------------------------
-#define TermFrequencyScorer_TERM_SCORE_FACTOR 1000000.0
+constexpr double TermFrequencyScorer_TERM_SCORE_FACTOR = 1000000.0;
/**
* Scorer used with WeakAndAlgorithm that calculates a pseudo term frequency
@@ -416,9 +416,13 @@ public:
_range(range),
_max_idf(Bm25Executor::calculate_inverse_document_frequency(1, _num_docs))
{ }
+ double apply_range(double idf) const noexcept {
+ return (1.0 - _range)*_max_idf + _range * idf;
+ }
// weight * idf, scaled to fixedpoint
score_t calculateMaxScore(double estHits, double weight) const noexcept {
- return weight * Bm25Executor::calculate_inverse_document_frequency(estHits, _num_docs);
+ return score_t(TermFrequencyScorer_TERM_SCORE_FACTOR * weight *
+ apply_range(Bm25Executor::calculate_inverse_document_frequency(estHits, _num_docs)));
}
score_t calculateMaxScore(const Term &term) const noexcept {
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
index 58ffcfe17bc..b0e1fee8e51 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp
@@ -145,6 +145,7 @@ WeakAndSearch::create(const Terms &terms, uint32_t n, bool strict)
//-----------------------------------------------------------------------------
template SearchIterator::UP WeakAndSearch::create<wand::TermFrequencyScorer>(const Terms &terms, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
+template SearchIterator::UP WeakAndSearch::create<wand::Bm25TermFrequencyScorer>(const Terms &terms, const wand::Bm25TermFrequencyScorer & scorer, uint32_t n, bool strict);
template SearchIterator::UP WeakAndSearch::createArrayWand<wand::TermFrequencyScorer>(const Terms &terms, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);
template SearchIterator::UP WeakAndSearch::createHeapWand<wand::TermFrequencyScorer>(const Terms &terms, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict);