From 9066be8c9738ea438cb3320fcbcaab2e13573706 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Mon, 17 Jun 2019 11:58:29 +0000 Subject: Reduce number of math operations in inner loop of bm25 feature. --- searchlib/src/vespa/searchlib/features/bm25_feature.cpp | 11 ++++++----- searchlib/src/vespa/searchlib/features/bm25_feature.h | 13 ++++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp index 6e889b48343..f2114e4705d 100644 --- a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp @@ -47,8 +47,8 @@ Bm25Executor::Bm25Executor(const fef::FieldInfo& field, : FeatureExecutor(), _terms(), _avg_field_length(avg_field_length), - _k1_param(k1_param), - _b_param(b_param) + _k1_mul_b(k1_param * b_param), + _k1_mul_one_minus_b(k1_param * (1 - b_param)) { for (size_t i = 0; i < env.getNumTerms(); ++i) { const ITermData* term = env.getTerm(i); @@ -56,7 +56,8 @@ Bm25Executor::Bm25Executor(const fef::FieldInfo& field, const ITermFieldData& term_field = term->field(j); if (field.id() == term_field.getFieldId()) { _terms.emplace_back(term_field.getHandle(MatchDataDetails::Cheap), - get_inverse_document_frequency(term_field, env, *term)); + get_inverse_document_frequency(term_field, env, *term), + k1_param); } } } @@ -86,8 +87,8 @@ Bm25Executor::execute(uint32_t doc_id) feature_t num_occs = term.tfmd->getNumOccs(); feature_t norm_field_length = ((feature_t)term.tfmd->getFieldLength()) / _avg_field_length; - feature_t numerator = term.inverse_doc_freq * num_occs * (_k1_param + 1); - feature_t denominator = num_occs + (_k1_param * (1 - _b_param + (_b_param * norm_field_length))); + feature_t numerator = num_occs * term.idf_mul_k1_plus_one; + feature_t denominator = num_occs + (_k1_mul_one_minus_b + _k1_mul_b * norm_field_length); score += numerator / denominator; } diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.h b/searchlib/src/vespa/searchlib/features/bm25_feature.h index 533c7487a2f..0afd14e7ac8 100644 --- a/searchlib/src/vespa/searchlib/features/bm25_feature.h +++ b/searchlib/src/vespa/searchlib/features/bm25_feature.h @@ -13,11 +13,11 @@ private: struct QueryTerm { fef::TermFieldHandle handle; const fef::TermFieldMatchData* tfmd; - double inverse_doc_freq; - QueryTerm(fef::TermFieldHandle handle_, double inverse_doc_freq_) + double idf_mul_k1_plus_one; + QueryTerm(fef::TermFieldHandle handle_, double inverse_doc_freq, double k1_param) : handle(handle_), tfmd(nullptr), - inverse_doc_freq(inverse_doc_freq_) + idf_mul_k1_plus_one(inverse_doc_freq * (k1_param + 1)) {} }; @@ -25,8 +25,11 @@ private: QueryTermVector _terms; double _avg_field_length; - double _k1_param; // Determines term frequency saturation characteristics. - double _b_param; // Adjusts the effects of the field length of the document matched compared to the average field length. + + // The 'k1' param determines term frequency saturation characteristics. + // The 'b' param adjusts the effects of the field length of the document matched compared to the average field length. + double _k1_mul_b; + double _k1_mul_one_minus_b; public: Bm25Executor(const fef::FieldInfo& field, -- cgit v1.2.3