aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2024-02-02 15:33:36 +0000
committerGeir Storli <geirst@yahooinc.com>2024-02-02 15:33:36 +0000
commit2a051981b4f41c0d6e35f2c8d65ece7c47b994e3 (patch)
tree556260c5c7ea2c691b776114fe7600c53973b55f
parentfdff9a2b553d2c3c0c81aca3bd8bb6d9a491e443 (diff)
Tag hit estimates from attribute search contexts as unknown when applicable.
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/search_context.cpp6
-rw-r--r--searchcore/src/vespa/searchcore/proton/documentmetastore/search_context.h2
-rw-r--r--searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp20
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp8
-rw-r--r--searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp36
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/hit_estimate.h31
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/i_search_context.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/empty_search_context.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/empty_search_context.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp18
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_search_context.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h10
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/search_context.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/attribute/search_context.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp2
22 files changed, 136 insertions, 65 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/search_context.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/search_context.cpp
index 3b8759aac77..a93e7543ca1 100644
--- a/searchcore/src/vespa/searchcore/proton/documentmetastore/search_context.cpp
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/search_context.cpp
@@ -113,10 +113,10 @@ SearchContext::onFind(DocId, int32_t ) const
throw vespalib::IllegalStateException("The function is not implemented for documentmetastore::SearchContext");
}
-unsigned int
-SearchContext::approximateHits() const
+search::attribute::HitEstimate
+SearchContext::calc_hit_estimate() const
{
- return _isWord ? 1 : search::attribute::SearchContext::approximateHits();
+ return _isWord ? search::attribute::HitEstimate(1) : search::attribute::SearchContext::calc_hit_estimate();
}
SearchIterator::UP
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/search_context.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/search_context.h
index 9c868bb1454..d7bd2c7b630 100644
--- a/searchcore/src/vespa/searchcore/proton/documentmetastore/search_context.h
+++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/search_context.h
@@ -20,7 +20,7 @@ private:
document::GlobalId _gid;
uint32_t _docid_limit;
- unsigned int approximateHits() const override;
+ search::attribute::HitEstimate calc_hit_estimate() const override;
int32_t onFind(DocId docId, int32_t elemId, int32_t &weight) const override;
int32_t onFind(DocId docId, int32_t elemId) const override;
diff --git a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
index 79bdd83dc88..93ae9cb13cb 100644
--- a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
+++ b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
@@ -76,15 +76,17 @@ bool is_strict_hit_with_weight(Iterator& iter, TermFieldMatchData& match,
EXPECT_EQUAL(weight, match.getWeight()));
}
-TEST_F("approximateHits() returns document count of reference attribute when not using fast-search target attribute", Fixture) {
+TEST_F("calc_hit_estimate() returns document count of reference attribute when not using fast-search target attribute", Fixture) {
add_n_docs_with_undefined_values(*f.target_attr, 10);
add_n_docs_with_undefined_values(*f.reference_attr, 101);
auto ctx = f.create_context(word_term("foo"));
- EXPECT_EQUAL(101u, ctx->approximateHits());
+ auto est = ctx->calc_hit_estimate();
+ EXPECT_EQUAL(101u, est.est_hits());
+ EXPECT_TRUE(est.is_unknown());
}
-TEST_F("approximateHits() estimates hits when using fast-search target attribute", Fixture(false, FastSearchConfig::ExplicitlyEnabled))
+TEST_F("calc_hit_estimate() estimates hits when using fast-search target attribute", Fixture(false, FastSearchConfig::ExplicitlyEnabled))
{
constexpr uint32_t target_docs = 1000;
constexpr uint32_t docs = 10000;
@@ -115,16 +117,22 @@ TEST_F("approximateHits() estimates hits when using fast-search target attribute
f.reference_attr->commit();
auto ctx = f.create_context(word_term("10"));
// Exact count: 0 target hits => 0
- EXPECT_EQUAL(0u, ctx->approximateHits());
+ auto est = ctx->calc_hit_estimate();
+ EXPECT_EQUAL(0u, est.est_hits());
+ EXPECT_FALSE(est.is_unknown());
TermFieldMatchData match;
auto iter = f.create_iterator(*ctx, match, false);
EXPECT_TRUE(iter->matches_any() == Trinary::False);
ctx = f.create_context(word_term("20"));
// Exact count: 2 target hits, 2 docs / target doc => 2 * 2 = 4
- EXPECT_EQUAL(4u, ctx->approximateHits());
+ est = ctx->calc_hit_estimate();
+ EXPECT_EQUAL(4u, est.est_hits());
+ EXPECT_FALSE(est.is_unknown());
ctx = f.create_context(word_term("30"));
// Approximation: 110 target hits => 110 * 10001 / 1001 = 1099
- EXPECT_EQUAL(1099u, ctx->approximateHits());
+ est = ctx->calc_hit_estimate();
+ EXPECT_EQUAL(1099u, est.est_hits());
+ EXPECT_FALSE(est.is_unknown());
}
TEST_F("attributeName() returns imported attribute name", Fixture) {
diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
index 3d3ff469546..ed91cefd35c 100644
--- a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
+++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
@@ -498,8 +498,10 @@ PostingListAttributeTest::checkSearch(bool useBitVector, bool need_unpack, bool
SearchContextPtr sc = getSearch(vec, term, false, attribute::SearchContextParams().useBitVector(useBitVector));
EXPECT_FALSE( ! sc );
sc->fetchPostings(queryeval::ExecuteInfo::TRUE);
- size_t approx = sc->approximateHits();
- EXPECT_EQ(numHits, approx);
+ auto est = sc->calc_hit_estimate();
+ uint32_t est_hits = est.est_hits();
+ EXPECT_FALSE(est.is_unknown());
+ EXPECT_EQ(numHits, est_hits);
if (docBegin == 0) {
// Approximation does not know about the special 0
// But the iterator does....
@@ -523,7 +525,7 @@ PostingListAttributeTest::checkSearch(bool useBitVector, bool need_unpack, bool
hits++;
}
EXPECT_EQ(numHits, hits);
- EXPECT_GE(approx, hits);
+ EXPECT_GE(est_hits, hits);
EXPECT_EQ(docEnd, lastDocId+1);
}
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
index 741a86b0beb..85b13c20f88 100644
--- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
@@ -60,6 +60,7 @@ using largeint_t = AttributeVector::largeint_t;
using attribute::BasicType;
using attribute::CollectionType;
using attribute::Config;
+using attribute::HitEstimate;
using attribute::SearchContextParams;
using attribute::test::AttributeBuilder;
using fef::MatchData;
@@ -87,6 +88,12 @@ public:
DocSet::DocSet() noexcept = default;
DocSet::~DocSet() = default;
+bool is_flag_attribute(const Config& cfg) {
+ return cfg.fastSearch() &&
+ (cfg.basicType() == BasicType::INT8) &&
+ (cfg.collectionType() == CollectionType::ARRAY);
+}
+
template <typename V, typename T>
class PostingList
{
@@ -104,6 +111,21 @@ public:
DocSet & getHits() { return _hits; }
const DocSet & getHits() const { return _hits; }
uint32_t getHitCount() const { return _hits.size(); }
+ attribute::HitEstimate expected_hit_estimate() const {
+ if (getHitCount() == 0) {
+ return HitEstimate(0);
+ }
+ uint32_t docid_limit = _vec->getStatus().getNumDocs();
+ if (is_flag_attribute(_vec->getConfig())) {
+ return HitEstimate::unknown(docid_limit);
+ } else if (_vec->getConfig().fastSearch()) {
+ return HitEstimate(getHitCount());
+ } else if (_vec->getConfig().collectionType() == CollectionType::SINGLE) {
+ return HitEstimate::unknown(docid_limit);
+ } else {
+ return HitEstimate::unknown(std::max((uint64_t)docid_limit, _vec->getStatus().getNumValues()));
+ }
+ }
};
template <typename V, typename T>
@@ -166,7 +188,7 @@ private:
void testSearchIteratorConformance();
// test search functionality
template <typename V, typename T>
- void testFind(const PostingList<V, T> & first);
+ void testFind(const PostingList<V, T> & first, bool verify_hit_estimate);
template <typename V, typename T>
void testSearch(V & attribute, uint32_t numDocs, const std::vector<T> & values);
@@ -536,10 +558,16 @@ SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected,
//-----------------------------------------------------------------------------
template <typename V, typename T>
void
-SearchContextTest::testFind(const PostingList<V, T> & pl)
+SearchContextTest::testFind(const PostingList<V, T> & pl, bool verify_hit_estimate)
{
{ // strict search iterator
SearchContextPtr sc = getSearch(pl.getAttribute(), pl.getValue());
+ if (verify_hit_estimate) {
+ auto act_est = sc->calc_hit_estimate();
+ auto exp_est = pl.expected_hit_estimate();
+ EXPECT_EQUAL(exp_est.est_hits(), act_est.est_hits());
+ EXPECT_EQUAL(exp_est.is_unknown(), act_est.is_unknown());
+ }
sc->fetchPostings(queryeval::ExecuteInfo::TRUE);
TermFieldMatchData dummy;
SearchBasePtr sb = sc->createIterator(&dummy, true);
@@ -571,7 +599,7 @@ SearchContextTest::testSearch(V & attribute, uint32_t numDocs, const std::vector
// test find()
for (const auto & list : lists) {
- testFind(list);
+ testFind(list, true);
}
}
@@ -591,7 +619,7 @@ SearchContextTest::testMultiValueSearchHelper(V & vec, const std::vector<T> & va
for (const auto & list : lists) {
//std::cout << "testFind(lists[" << i << "]): value = " << lists[i].getValue()
// << ", hit count = " << lists[i].getHitCount() << std::endl;
- testFind(list);
+ testFind(list, false);
}
}
diff --git a/searchlib/src/vespa/searchcommon/attribute/hit_estimate.h b/searchlib/src/vespa/searchcommon/attribute/hit_estimate.h
new file mode 100644
index 00000000000..b9063bec1f6
--- /dev/null
+++ b/searchlib/src/vespa/searchcommon/attribute/hit_estimate.h
@@ -0,0 +1,31 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace search::attribute {
+
+/**
+ * Class encapsulating the estimated number of hits an attribute search context will provide.
+ *
+ * E.g. for attributes without fast-search an estimate is not known.
+ * Instead the total number of values to match against is returned as the estimate.
+ * This is always at least the docid limit space.
+ */
+class HitEstimate {
+private:
+ uint32_t _est_hits;
+ bool _unknown;
+
+ HitEstimate(uint32_t est_hits_in, bool unknown_in) : _est_hits(est_hits_in), _unknown(unknown_in) {}
+
+public:
+ explicit HitEstimate(uint32_t est_hits_in) : HitEstimate(est_hits_in, false) {}
+ static HitEstimate unknown(uint32_t total_value_count) { return HitEstimate(total_value_count, true); }
+ uint32_t est_hits() const { return _est_hits; }
+ bool is_unknown() const { return _unknown; }
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchcommon/attribute/i_search_context.h b/searchlib/src/vespa/searchcommon/attribute/i_search_context.h
index 67c88b25d83..8ea6f0f60af 100644
--- a/searchlib/src/vespa/searchcommon/attribute/i_search_context.h
+++ b/searchlib/src/vespa/searchcommon/attribute/i_search_context.h
@@ -2,6 +2,7 @@
#pragma once
+#include "hit_estimate.h"
#include <vespa/searchcommon/common/range.h>
#include <vespa/vespalib/stllike/string.h>
#include <memory>
@@ -27,7 +28,7 @@ private:
public:
virtual ~ISearchContext() = default;
- virtual unsigned int approximateHits() const = 0;
+ virtual HitEstimate calc_hit_estimate() const = 0;
/**
* Creates an attribute search iterator associated with this
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index d5c67664a5e..8742f53bc8e 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -184,7 +184,7 @@ AttributeFieldBlueprint::AttributeFieldBlueprint(FieldSpecBase field, const IAtt
_search_context(attribute.createSearchContext(std::move(term), params)),
_type(OTHER)
{
- uint32_t estHits = _search_context->approximateHits();
+ uint32_t estHits = _search_context->calc_hit_estimate().est_hits();
HitEstimate estimate(estHits, estHits == 0);
setEstimate(estimate);
if (attribute.isFloatingPointType()) {
@@ -235,7 +235,7 @@ public:
query::SimpleRangeTerm rt(qr, "", 0, query::Weight(0));
string stack(StackDumpCreator::create(rt));
_rangeSearches.push_back(attr.createSearchContext(QueryTermDecoder::decodeTerm(stack), scParams));
- estHits += _rangeSearches.back()->approximateHits();
+ estHits += _rangeSearches.back()->calc_hit_estimate().est_hits();
LOG(debug, "Range '%s' estHits %" PRId64, qr.getRangeString().c_str(), estHits);
}
if (estHits > attr.getNumDocs()) {
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
index d0353ab8947..a5003ba542b 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
@@ -110,7 +110,7 @@ AttributeWeightedSetBlueprint::~AttributeWeightedSetBlueprint()
void
AttributeWeightedSetBlueprint::addToken(std::unique_ptr<ISearchContext> context, int32_t weight)
{
- _estHits = std::min(_estHits + context->approximateHits(), _numDocs);
+ _estHits = std::min(_estHits + context->calc_hit_estimate().est_hits(), _numDocs);
setEstimate(HitEstimate(_estHits, (_estHits == 0)));
_weights.push_back(weight);
_contexts.push_back(context.release());
diff --git a/searchlib/src/vespa/searchlib/attribute/empty_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/empty_search_context.cpp
index 379118226a8..795cbc3d6f0 100644
--- a/searchlib/src/vespa/searchlib/attribute/empty_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/empty_search_context.cpp
@@ -24,10 +24,10 @@ EmptySearchContext::onFind(DocId, int32_t) const
return -1;
}
-unsigned int
-EmptySearchContext::approximateHits() const
+HitEstimate
+EmptySearchContext::calc_hit_estimate() const
{
- return 0u;
+ return HitEstimate(0);
}
uint32_t
diff --git a/searchlib/src/vespa/searchlib/attribute/empty_search_context.h b/searchlib/src/vespa/searchlib/attribute/empty_search_context.h
index 23e981f18a8..f946288df61 100644
--- a/searchlib/src/vespa/searchlib/attribute/empty_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/empty_search_context.h
@@ -13,7 +13,7 @@ class EmptySearchContext : public SearchContext
{
int32_t onFind(DocId, int32_t, int32_t&) const override;
int32_t onFind(DocId, int32_t) const override;
- unsigned int approximateHits() const override;
+ HitEstimate calc_hit_estimate() const override;
std::unique_ptr<queryeval::SearchIterator> createIterator(fef::TermFieldMatchData*, bool) override;
std::unique_ptr<queryeval::SearchIterator> createFilterIterator(fef::TermFieldMatchData*, bool) override;
public:
diff --git a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp
index 9c5218be16e..22362231cc1 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp
@@ -54,12 +54,12 @@ EnumHintSearchContext::createPostingIterator(TermFieldMatchData *, bool )
}
-unsigned int
-EnumHintSearchContext::approximateHits() const
+HitEstimate
+EnumHintSearchContext::calc_hit_estimate() const
{
return (_uniqueValues == 0u)
- ? 0u
- : std::max(uint64_t(_docIdLimit), _numValues);
+ ? HitEstimate(0u)
+ : HitEstimate::unknown(std::max(uint64_t(_docIdLimit), _numValues));
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h
index ee07f852af5..23a0a6c560b 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h
@@ -40,7 +40,7 @@ protected:
createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) override;
void fetchPostings(const queryeval::ExecuteInfo & execInfo) override;
- unsigned int approximateHits() const override;
+ HitEstimate calc_hit_estimate() const override;
uint32_t get_committed_docid_limit() const noexcept { return _docIdLimit; }
};
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp
index b27a67e893b..79a021989fc 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp
@@ -97,21 +97,21 @@ ImportedSearchContext::calc_exact_hits() const
return sum_hits;
}
-unsigned int
-ImportedSearchContext::approximateHits() const
+HitEstimate
+ImportedSearchContext::calc_hit_estimate() const
{
- uint32_t target_approx_hits = _target_search_context->approximateHits();
- if (target_approx_hits == 0) {
+ uint32_t target_est_hits = _target_search_context->calc_hit_estimate().est_hits();
+ if (target_est_hits == 0) {
_zero_hits.store(true, std::memory_order_relaxed);
- return 0;
+ return HitEstimate(0);
}
if (!_target_attribute.getIsFastSearch()) {
- return _reference_attribute.getNumDocs();
+ return HitEstimate::unknown(_reference_attribute.getNumDocs());
}
- if (target_approx_hits >= MIN_TARGET_HITS_FOR_APPROXIMATION) {
- return calc_approx_hits(target_approx_hits);
+ if (target_est_hits >= MIN_TARGET_HITS_FOR_APPROXIMATION) {
+ return HitEstimate(calc_approx_hits(target_est_hits));
} else {
- return calc_exact_hits();
+ return HitEstimate(calc_exact_hits());
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h
index 6a0c43f8578..305a8bf2379 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h
@@ -63,10 +63,9 @@ public:
const attribute::IAttributeVector &target_attribute);
~ImportedSearchContext() override;
-
std::unique_ptr<queryeval::SearchIterator>
createIterator(fef::TermFieldMatchData* matchData, bool strict) override;
- unsigned int approximateHits() const override;
+ HitEstimate calc_hit_estimate() const override;
void fetchPostings(const queryeval::ExecuteInfo &execInfo) override;
bool valid() const override;
Int64Range getAsIntegerTerm() const override;
diff --git a/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h
index 38dfefb878b..ac1abc56329 100644
--- a/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h
@@ -2,6 +2,7 @@
#pragma once
+#include <vespa/searchcommon/attribute/hit_estimate.h>
#include <memory>
namespace search::queryeval {
@@ -30,7 +31,7 @@ protected:
public:
virtual void fetchPostings(const queryeval::ExecuteInfo & execInfo) = 0;
virtual std::unique_ptr<queryeval::SearchIterator> createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) = 0;
- virtual unsigned int approximateHits() const = 0;
+ virtual HitEstimate calc_hit_estimate() const = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
index 3e0794835ae..41a12dce756 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -119,7 +119,7 @@ protected:
createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) override;
unsigned int singleHits() const;
- unsigned int approximateHits() const override;
+ HitEstimate calc_hit_estimate() const override;
void applyRangeLimit(long rangeLimit);
struct FillPart;
};
@@ -217,11 +217,11 @@ private:
void getIterators(bool shouldApplyRangeLimit);
bool valid() const override { return this->isValid(); }
- unsigned int approximateHits() const override {
- const unsigned int estimate = PostingListSearchContextT<DataT>::approximateHits();
+ HitEstimate calc_hit_estimate() const override {
+ HitEstimate estimate = PostingListSearchContextT<DataT>::calc_hit_estimate();
const unsigned int limit = std::abs(this->getRangeLimit());
- return ((limit > 0) && (limit < estimate))
- ? limit
+ return ((limit > 0) && (limit < estimate.est_hits()))
+ ? HitEstimate(limit)
: estimate;
}
void fetchPostings(const ExecuteInfo & execInfo) override {
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
index f937d567588..77f773d0469 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
@@ -284,8 +284,8 @@ PostingListSearchContextT<DataT>::singleHits() const
}
template <typename DataT>
-unsigned int
-PostingListSearchContextT<DataT>::approximateHits() const
+HitEstimate
+PostingListSearchContextT<DataT>::calc_hit_estimate() const
{
size_t numHits = 0;
if (_uniqueValues == 0u) {
@@ -294,9 +294,9 @@ PostingListSearchContextT<DataT>::approximateHits() const
} else if (_dictionary.get_has_btree_dictionary()) {
numHits = estimated_hits_in_range();
} else {
- numHits = _docIdLimit;
+ return HitEstimate::unknown(_docIdLimit);
}
- return std::min(numHits, size_t(std::numeric_limits<uint32_t>::max()));
+ return HitEstimate(std::min(numHits, size_t(std::numeric_limits<uint32_t>::max())));
}
template <typename DataT>
diff --git a/searchlib/src/vespa/searchlib/attribute/search_context.cpp b/searchlib/src/vespa/searchlib/attribute/search_context.cpp
index e59326bc808..0c0318bf1e3 100644
--- a/searchlib/src/vespa/searchlib/attribute/search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/search_context.cpp
@@ -10,20 +10,20 @@ using search::queryeval::SearchIterator;
namespace search::attribute {
-unsigned int
-SearchContext::approximateHits() const
+HitEstimate
+SearchContext::calc_hit_estimate() const
{
if (_plsc != nullptr) {
- return _plsc->approximateHits();
+ return _plsc->calc_hit_estimate();
}
- return std::max(uint64_t(_attr.getNumDocs()), _attr.getStatus().getNumValues());
+ return HitEstimate::unknown(std::max(uint64_t(_attr.getNumDocs()), _attr.getStatus().getNumValues()));
}
std::unique_ptr<SearchIterator>
SearchContext::createIterator(fef::TermFieldMatchData* matchData, bool strict)
{
if (_plsc != nullptr) {
- std::unique_ptr<SearchIterator> res = _plsc->createPostingIterator(matchData, strict);
+ auto res = _plsc->createPostingIterator(matchData, strict);
if (res) {
return res;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/search_context.h b/searchlib/src/vespa/searchlib/attribute/search_context.h
index a7e87ee86fc..a18e15ae8f4 100644
--- a/searchlib/src/vespa/searchlib/attribute/search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/search_context.h
@@ -32,7 +32,7 @@ public:
SearchContext& operator=(SearchContext&&) noexcept = delete;
~SearchContext() override = default;
- unsigned int approximateHits() const override;
+ HitEstimate calc_hit_estimate() const override;
std::unique_ptr<queryeval::SearchIterator> createIterator(fef::TermFieldMatchData* matchData, bool strict) override;
void fetchPostings(const queryeval::ExecuteInfo& execInfo) override;
bool valid() const override { return false; }
diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
index c3e47548f85..4713f452adc 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
@@ -18,6 +18,7 @@
namespace search {
using attribute::Config;
+using attribute::HitEstimate;
SingleBoolAttribute::
SingleBoolAttribute(const vespalib::string &baseFileName, const GrowStrategy & grow, bool paged)
@@ -131,7 +132,7 @@ public:
createFilterIterator(fef::TermFieldMatchData * matchData, bool strict) override;
void fetchPostings(const queryeval::ExecuteInfo &execInfo) override;
std::unique_ptr<queryeval::SearchIterator> createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) override;
- unsigned int approximateHits() const override;
+ HitEstimate calc_hit_estimate() const override;
uint32_t get_committed_docid_limit() const noexcept override;
};
@@ -169,13 +170,13 @@ BitVectorSearchContext::createPostingIterator(fef::TermFieldMatchData *matchData
return createFilterIterator(matchData, strict);
}
-unsigned int
-BitVectorSearchContext::approximateHits() const {
+HitEstimate
+BitVectorSearchContext::calc_hit_estimate() const {
return valid()
- ? (_invert)
- ? (_bv.size() - _bv.countTrueBits())
- : _bv.countTrueBits()
- : 0;
+ ? (_invert
+ ? HitEstimate(_bv.size() - _bv.countTrueBits())
+ : HitEstimate(_bv.countTrueBits()))
+ : HitEstimate(0);
}
uint32_t
diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
index 930f6ed3a21..6c619ee085b 100644
--- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp
@@ -107,7 +107,7 @@ struct FakeContext : attribute::ISearchContext {
int32_t ignore_weight;
return onFind(docid, elem, ignore_weight);
}
- unsigned int approximateHits() const override { return 0; }
+ attribute::HitEstimate calc_hit_estimate() const override { return attribute::HitEstimate(0); }
std::unique_ptr<SearchIterator> createIterator(fef::TermFieldMatchData *, bool) override { abort(); }
void fetchPostings(const ExecuteInfo &) override { }
bool valid() const override { return true; }