summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp9
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/querynodes.h2
-rw-r--r--searchlib/src/tests/features/prod_features.cpp6
-rw-r--r--searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/fef/itermfielddata.h17
-rw-r--r--searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/fef/simpletermfielddata.h40
7 files changed, 41 insertions, 47 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp b/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp
index 6d810594aa7..bb8a669f91a 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/querynodes.cpp
@@ -34,10 +34,10 @@ ProtonTermData & ProtonTermData::operator = (const ProtonTermData &) = default;
ProtonTermData::~ProtonTermData() = default;
void
-ProtonTermData::setDocumentFrequency(double freq)
+ProtonTermData::propagate_document_frequency(uint32_t matching_doc_count, uint32_t total_doc_count)
{
for (size_t i = 0; i < _fields.size(); ++i) {
- _fields[i].setDocFreq(freq);
+ _fields[i].setDocFreq(matching_doc_count, total_doc_count);
}
}
@@ -97,10 +97,9 @@ void
ProtonTermData::setDocumentFrequency(uint32_t estHits, uint32_t docIdLimit)
{
if (docIdLimit > 1) {
- double hits = estHits;
- setDocumentFrequency(hits / (docIdLimit - 1));
+ propagate_document_frequency(estHits, docIdLimit - 1);
} else {
- setDocumentFrequency(0.0);
+ propagate_document_frequency(0, 1);
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/querynodes.h b/searchcore/src/vespa/searchcore/proton/matching/querynodes.h
index 8cf65c1e67b..6454845b247 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/querynodes.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/querynodes.h
@@ -47,7 +47,7 @@ public:
private:
std::vector<FieldEntry> _fields;
- void setDocumentFrequency(double docFreq);
+ void propagate_document_frequency(uint32_t matching_count_doc, uint32_t total_doc_count);
protected:
void resolve(const ViewResolver &resolver,
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
index 626a470cb5c..70250b05bf1 100644
--- a/searchlib/src/tests/features/prod_features.cpp
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -1968,8 +1968,10 @@ Test::testTerm()
.addField(FieldType::INDEX, CollectionType::SINGLE, "idx2") // field 1
.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "attr"); // field 2
ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(0);
- ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(1).setWeight(search::query::Weight(200)).lookupField(0)->setDocFreq(0.5);
- ft.getQueryEnv().getBuilder().addAttributeNode("attr")->setUniqueId(2).setWeight(search::query::Weight(400)).lookupField(2)->setDocFreq(0.25);
+ ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(1)
+ .setWeight(search::query::Weight(200)).lookupField(0)->setDocFreq(50, 100);
+ ft.getQueryEnv().getBuilder().addAttributeNode("attr")->setUniqueId(2)
+ .setWeight(search::query::Weight(400)).lookupField(2)->setDocFreq(25, 100);
// setup connectedness between term 1 and term 0
ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0");
ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0.7");
diff --git a/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp
index 9ed94c02287..3a0c334fbba 100644
--- a/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp
+++ b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp
@@ -50,7 +50,7 @@ void testSetup(State &state) {
{
int i = 1;
for (SFR iter(state.term); iter.valid(); iter.next()) {
- iter.get().setDocFreq(0.25 * i++);
+ iter.get().setDocFreq(25 * i++, 100);
}
}
diff --git a/searchlib/src/vespa/searchlib/fef/itermfielddata.h b/searchlib/src/vespa/searchlib/fef/itermfielddata.h
index 80343db2250..6fb467ce25c 100644
--- a/searchlib/src/vespa/searchlib/fef/itermfielddata.h
+++ b/searchlib/src/vespa/searchlib/fef/itermfielddata.h
@@ -27,13 +27,26 @@ public:
**/
virtual uint32_t getFieldId() const = 0;
+
+ /**
+ * Returns the number of documents matching this term.
+ */
+ virtual uint32_t get_matching_doc_count() const = 0;
+
+ /**
+ * Returns the total number of documents in the corpus.
+ */
+ virtual uint32_t get_total_doc_count() const = 0;
+
/**
* Obtain the document frequency. This is a value between 0 and 1
* indicating the ratio of the matching documents to the corpus.
*
* @return document frequency
- **/
- virtual double getDocFreq() const = 0;
+ **/
+ double getDocFreq() const {
+ return (double)get_matching_doc_count() / (double)get_total_doc_count();
+ }
/**
* Obtain the match handle for this field,
diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp
index d1edee7fd07..64906eed22e 100644
--- a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp
+++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp
@@ -2,22 +2,22 @@
#include "simpletermfielddata.h"
-namespace search {
-namespace fef {
+namespace search::fef {
SimpleTermFieldData::SimpleTermFieldData(uint32_t fieldId)
: _fieldId(fieldId),
- _docFreq(0),
+ _matching_doc_count(0),
+ _total_doc_count(1),
_handle(IllegalHandle)
{
}
SimpleTermFieldData::SimpleTermFieldData(const ITermFieldData &rhs)
: _fieldId(rhs.getFieldId()),
- _docFreq(rhs.getDocFreq()),
+ _matching_doc_count(rhs.get_matching_doc_count()),
+ _total_doc_count(rhs.get_total_doc_count()),
_handle(rhs.getHandle())
{
}
-} // namespace fef
-} // namespace search
+}
diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h
index 6f0fbc9af64..d92d3a48f03 100644
--- a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h
+++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h
@@ -4,8 +4,7 @@
#include "itermfielddata.h"
-namespace search {
-namespace fef {
+namespace search::fef {
/**
* Information about a single field that is being searched for a term
@@ -17,7 +16,8 @@ class SimpleTermFieldData : public ITermFieldData
{
private:
uint32_t _fieldId;
- double _docFreq;
+ uint32_t _matching_doc_count;
+ uint32_t _total_doc_count;
TermFieldHandle _handle;
public:
@@ -33,28 +33,14 @@ public:
**/
SimpleTermFieldData(uint32_t fieldId);
- /**
- * Obtain the field id.
- *
- * @return field id
- **/
uint32_t getFieldId() const override final { return _fieldId; }
- /**
- * Obtain the document frequency.
- *
- * @return document frequency
- **/
- double getDocFreq() const override final { return _docFreq; }
+ uint32_t get_matching_doc_count() const override { return _matching_doc_count; }
+
+ uint32_t get_total_doc_count() const override { return _total_doc_count; }
using ITermFieldData::getHandle;
- /**
- * Obtain the match handle for this field,
- * requesting match data with the given details in the corresponding TermFieldMatchData.
- *
- * @return match handle (or IllegalHandle)
- **/
TermFieldHandle getHandle(MatchDataDetails requestedDetails) const override {
(void) requestedDetails;
return _handle;
@@ -62,20 +48,15 @@ public:
/**
* Sets the document frequency.
- *
- * @return this object (for chaining)
- * @param docFreq document frequency
**/
- SimpleTermFieldData &setDocFreq(double docFreq) {
- _docFreq = docFreq;
+ SimpleTermFieldData &setDocFreq(uint32_t matching_doc_count, uint32_t total_doc_count) {
+ _matching_doc_count = matching_doc_count;
+ _total_doc_count = total_doc_count;
return *this;
}
/**
* Sets the match handle for this field.
- *
- * @return this object (for chaining)
- * @param handle match handle
**/
SimpleTermFieldData &setHandle(TermFieldHandle handle) {
_handle = handle;
@@ -83,6 +64,5 @@ public:
}
};
-} // namespace fef
-} // namespace search
+}