diff options
7 files changed, 41 insertions, 47 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp b/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp index 6d810594aa7..bb8a669f91a 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp @@ -34,10 +34,10 @@ ProtonTermData & ProtonTermData::operator = (const ProtonTermData &) = default; ProtonTermData::~ProtonTermData() = default; void -ProtonTermData::setDocumentFrequency(double freq) +ProtonTermData::propagate_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count) { for (size_t i = 0; i < _fields.size(); ++i) { - _fields[i].setDocFreq(freq); + _fields[i].setDocFreq(matching_doc_count, total_doc_count); } } @@ -97,10 +97,9 @@ void ProtonTermData::setDocumentFrequency(uint32_t estHits, uint32_t docIdLimit) { if (docIdLimit > 1) { - double hits = estHits; - setDocumentFrequency(hits / (docIdLimit - 1)); + propagate_document_frequency(estHits, docIdLimit - 1); } else { - setDocumentFrequency(0.0); + propagate_document_frequency(0, 1); } } diff --git a/searchcore/src/vespa/searchcore/proton/matching/querynodes.h b/searchcore/src/vespa/searchcore/proton/matching/querynodes.h index 8cf65c1e67b..6454845b247 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/querynodes.h +++ b/searchcore/src/vespa/searchcore/proton/matching/querynodes.h @@ -47,7 +47,7 @@ public: private: std::vector<FieldEntry> _fields; - void setDocumentFrequency(double docFreq); + void propagate_document_frequency(uint32_t matching_count_doc, uint32_t total_doc_count); protected: void resolve(const ViewResolver &resolver, diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 626a470cb5c..70250b05bf1 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -1968,8 +1968,10 @@ Test::testTerm() .addField(FieldType::INDEX, CollectionType::SINGLE, "idx2") // field 1 .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "attr"); // field 2 ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(0); - ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(1).setWeight(search::query::Weight(200)).lookupField(0)->setDocFreq(0.5); - ft.getQueryEnv().getBuilder().addAttributeNode("attr")->setUniqueId(2).setWeight(search::query::Weight(400)).lookupField(2)->setDocFreq(0.25); + ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(1) + .setWeight(search::query::Weight(200)).lookupField(0)->setDocFreq(50, 100); + ft.getQueryEnv().getBuilder().addAttributeNode("attr")->setUniqueId(2) + .setWeight(search::query::Weight(400)).lookupField(2)->setDocFreq(25, 100); // setup connectedness between term 1 and term 0 ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0"); ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0.7"); diff --git a/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp index 9ed94c02287..3a0c334fbba 100644 --- a/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp +++ b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp @@ -50,7 +50,7 @@ void testSetup(State &state) { { int i = 1; for (SFR iter(state.term); iter.valid(); iter.next()) { - iter.get().setDocFreq(0.25 * i++); + iter.get().setDocFreq(25 * i++, 100); } } diff --git a/searchlib/src/vespa/searchlib/fef/itermfielddata.h b/searchlib/src/vespa/searchlib/fef/itermfielddata.h index 80343db2250..6fb467ce25c 100644 --- a/searchlib/src/vespa/searchlib/fef/itermfielddata.h +++ b/searchlib/src/vespa/searchlib/fef/itermfielddata.h @@ -27,13 +27,26 @@ public: **/ virtual uint32_t getFieldId() const = 0; + + /** + * Returns the number of documents matching this term. + */ + virtual uint32_t get_matching_doc_count() const = 0; + + /** + * Returns the total number of documents in the corpus. + */ + virtual uint32_t get_total_doc_count() const = 0; + /** * Obtain the document frequency. This is a value between 0 and 1 * indicating the ratio of the matching documents to the corpus. * * @return document frequency - **/ - virtual double getDocFreq() const = 0; + **/ + double getDocFreq() const { + return (double)get_matching_doc_count() / (double)get_total_doc_count(); + } /** * Obtain the match handle for this field, diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp index d1edee7fd07..64906eed22e 100644 --- a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp +++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp @@ -2,22 +2,22 @@ #include "simpletermfielddata.h" -namespace search { -namespace fef { +namespace search::fef { SimpleTermFieldData::SimpleTermFieldData(uint32_t fieldId) : _fieldId(fieldId), - _docFreq(0), + _matching_doc_count(0), + _total_doc_count(1), _handle(IllegalHandle) { } SimpleTermFieldData::SimpleTermFieldData(const ITermFieldData &rhs) : _fieldId(rhs.getFieldId()), - _docFreq(rhs.getDocFreq()), + _matching_doc_count(rhs.get_matching_doc_count()), + _total_doc_count(rhs.get_total_doc_count()), _handle(rhs.getHandle()) { } -} // namespace fef -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h index 6f0fbc9af64..d92d3a48f03 100644 --- a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h +++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h @@ -4,8 +4,7 @@ #include "itermfielddata.h" -namespace search { -namespace fef { +namespace search::fef { /** * Information about a single field that is being searched for a term @@ -17,7 +16,8 @@ class SimpleTermFieldData : public ITermFieldData { private: uint32_t _fieldId; - double _docFreq; + uint32_t _matching_doc_count; + uint32_t _total_doc_count; TermFieldHandle _handle; public: @@ -33,28 +33,14 @@ public: **/ SimpleTermFieldData(uint32_t fieldId); - /** - * Obtain the field id. - * - * @return field id - **/ uint32_t getFieldId() const override final { return _fieldId; } - /** - * Obtain the document frequency. - * - * @return document frequency - **/ - double getDocFreq() const override final { return _docFreq; } + uint32_t get_matching_doc_count() const override { return _matching_doc_count; } + + uint32_t get_total_doc_count() const override { return _total_doc_count; } using ITermFieldData::getHandle; - /** - * Obtain the match handle for this field, - * requesting match data with the given details in the corresponding TermFieldMatchData. - * - * @return match handle (or IllegalHandle) - **/ TermFieldHandle getHandle(MatchDataDetails requestedDetails) const override { (void) requestedDetails; return _handle; @@ -62,20 +48,15 @@ public: /** * Sets the document frequency. - * - * @return this object (for chaining) - * @param docFreq document frequency **/ - SimpleTermFieldData &setDocFreq(double docFreq) { - _docFreq = docFreq; + SimpleTermFieldData &setDocFreq(uint32_t matching_doc_count, uint32_t total_doc_count) { + _matching_doc_count = matching_doc_count; + _total_doc_count = total_doc_count; return *this; } /** * Sets the match handle for this field. - * - * @return this object (for chaining) - * @param handle match handle **/ SimpleTermFieldData &setHandle(TermFieldHandle handle) { _handle = handle; @@ -83,6 +64,5 @@ public: } }; -} // namespace fef -} // namespace search +} |