summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-06-18 09:32:08 +0200
committerGitHub <noreply@github.com>2019-06-18 09:32:08 +0200
commita62b4b191fcdde36066e9ea362e4ea2dd4fd0114 (patch)
tree391d0eb46af6681306f968b36584604fe6195e78
parent348803531e06434b30ad39624359c4a3d4ca41ec (diff)
parent9066be8c9738ea438cb3320fcbcaab2e13573706 (diff)
Merge pull request #9828 from vespa-engine/geirst/bm25-feature-optimize-inner-loop
Reduce number of math operations in inner loop of bm25 feature.
-rw-r--r--searchlib/src/vespa/searchlib/features/bm25_feature.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/features/bm25_feature.h13
2 files changed, 14 insertions, 10 deletions
diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
index 6e889b48343..f2114e4705d 100644
--- a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp
@@ -47,8 +47,8 @@ Bm25Executor::Bm25Executor(const fef::FieldInfo& field,
: FeatureExecutor(),
_terms(),
_avg_field_length(avg_field_length),
- _k1_param(k1_param),
- _b_param(b_param)
+ _k1_mul_b(k1_param * b_param),
+ _k1_mul_one_minus_b(k1_param * (1 - b_param))
{
for (size_t i = 0; i < env.getNumTerms(); ++i) {
const ITermData* term = env.getTerm(i);
@@ -56,7 +56,8 @@ Bm25Executor::Bm25Executor(const fef::FieldInfo& field,
const ITermFieldData& term_field = term->field(j);
if (field.id() == term_field.getFieldId()) {
_terms.emplace_back(term_field.getHandle(MatchDataDetails::Cheap),
- get_inverse_document_frequency(term_field, env, *term));
+ get_inverse_document_frequency(term_field, env, *term),
+ k1_param);
}
}
}
@@ -86,8 +87,8 @@ Bm25Executor::execute(uint32_t doc_id)
feature_t num_occs = term.tfmd->getNumOccs();
feature_t norm_field_length = ((feature_t)term.tfmd->getFieldLength()) / _avg_field_length;
- feature_t numerator = term.inverse_doc_freq * num_occs * (_k1_param + 1);
- feature_t denominator = num_occs + (_k1_param * (1 - _b_param + (_b_param * norm_field_length)));
+ feature_t numerator = num_occs * term.idf_mul_k1_plus_one;
+ feature_t denominator = num_occs + (_k1_mul_one_minus_b + _k1_mul_b * norm_field_length);
score += numerator / denominator;
}
diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.h b/searchlib/src/vespa/searchlib/features/bm25_feature.h
index 533c7487a2f..0afd14e7ac8 100644
--- a/searchlib/src/vespa/searchlib/features/bm25_feature.h
+++ b/searchlib/src/vespa/searchlib/features/bm25_feature.h
@@ -13,11 +13,11 @@ private:
struct QueryTerm {
fef::TermFieldHandle handle;
const fef::TermFieldMatchData* tfmd;
- double inverse_doc_freq;
- QueryTerm(fef::TermFieldHandle handle_, double inverse_doc_freq_)
+ double idf_mul_k1_plus_one;
+ QueryTerm(fef::TermFieldHandle handle_, double inverse_doc_freq, double k1_param)
: handle(handle_),
tfmd(nullptr),
- inverse_doc_freq(inverse_doc_freq_)
+ idf_mul_k1_plus_one(inverse_doc_freq * (k1_param + 1))
{}
};
@@ -25,8 +25,11 @@ private:
QueryTermVector _terms;
double _avg_field_length;
- double _k1_param; // Determines term frequency saturation characteristics.
- double _b_param; // Adjusts the effects of the field length of the document matched compared to the average field length.
+
+ // The 'k1' param determines term frequency saturation characteristics.
+ // The 'b' param adjusts the effects of the field length of the document matched compared to the average field length.
+ double _k1_mul_b;
+ double _k1_mul_one_minus_b;
public:
Bm25Executor(const fef::FieldInfo& field,