summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2024-02-02 15:33:36 +0000
committerGeir Storli <geirst@yahooinc.com>2024-02-02 15:33:36 +0000
commit2a051981b4f41c0d6e35f2c8d65ece7c47b994e3 (patch)
tree556260c5c7ea2c691b776114fe7600c53973b55f /searchlib/src/tests
parentfdff9a2b553d2c3c0c81aca3bd8bb6d9a491e443 (diff)
Tag hit estimates from attribute search contexts as unknown when applicable.
Diffstat (limited to 'searchlib/src/tests')
-rw-r--r--searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp20
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp8
-rw-r--r--searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp36
3 files changed, 51 insertions, 13 deletions
diff --git a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
index 79bdd83dc88..93ae9cb13cb 100644
--- a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
+++ b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
@@ -76,15 +76,17 @@ bool is_strict_hit_with_weight(Iterator& iter, TermFieldMatchData& match,
EXPECT_EQUAL(weight, match.getWeight()));
}
-TEST_F("approximateHits() returns document count of reference attribute when not using fast-search target attribute", Fixture) {
+TEST_F("calc_hit_estimate() returns document count of reference attribute when not using fast-search target attribute", Fixture) {
add_n_docs_with_undefined_values(*f.target_attr, 10);
add_n_docs_with_undefined_values(*f.reference_attr, 101);
auto ctx = f.create_context(word_term("foo"));
- EXPECT_EQUAL(101u, ctx->approximateHits());
+ auto est = ctx->calc_hit_estimate();
+ EXPECT_EQUAL(101u, est.est_hits());
+ EXPECT_TRUE(est.is_unknown());
}
-TEST_F("approximateHits() estimates hits when using fast-search target attribute", Fixture(false, FastSearchConfig::ExplicitlyEnabled))
+TEST_F("calc_hit_estimate() estimates hits when using fast-search target attribute", Fixture(false, FastSearchConfig::ExplicitlyEnabled))
{
constexpr uint32_t target_docs = 1000;
constexpr uint32_t docs = 10000;
@@ -115,16 +117,22 @@ TEST_F("approximateHits() estimates hits when using fast-search target attribute
f.reference_attr->commit();
auto ctx = f.create_context(word_term("10"));
// Exact count: 0 target hits => 0
- EXPECT_EQUAL(0u, ctx->approximateHits());
+ auto est = ctx->calc_hit_estimate();
+ EXPECT_EQUAL(0u, est.est_hits());
+ EXPECT_FALSE(est.is_unknown());
TermFieldMatchData match;
auto iter = f.create_iterator(*ctx, match, false);
EXPECT_TRUE(iter->matches_any() == Trinary::False);
ctx = f.create_context(word_term("20"));
// Exact count: 2 target hits, 2 docs / target doc => 2 * 2 = 4
- EXPECT_EQUAL(4u, ctx->approximateHits());
+ est = ctx->calc_hit_estimate();
+ EXPECT_EQUAL(4u, est.est_hits());
+ EXPECT_FALSE(est.is_unknown());
ctx = f.create_context(word_term("30"));
// Approximation: 110 target hits => 110 * 10001 / 1001 = 1099
- EXPECT_EQUAL(1099u, ctx->approximateHits());
+ est = ctx->calc_hit_estimate();
+ EXPECT_EQUAL(1099u, est.est_hits());
+ EXPECT_FALSE(est.is_unknown());
}
TEST_F("attributeName() returns imported attribute name", Fixture) {
diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
index 3d3ff469546..ed91cefd35c 100644
--- a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
+++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
@@ -498,8 +498,10 @@ PostingListAttributeTest::checkSearch(bool useBitVector, bool need_unpack, bool
SearchContextPtr sc = getSearch(vec, term, false, attribute::SearchContextParams().useBitVector(useBitVector));
EXPECT_FALSE( ! sc );
sc->fetchPostings(queryeval::ExecuteInfo::TRUE);
- size_t approx = sc->approximateHits();
- EXPECT_EQ(numHits, approx);
+ auto est = sc->calc_hit_estimate();
+ uint32_t est_hits = est.est_hits();
+ EXPECT_FALSE(est.is_unknown());
+ EXPECT_EQ(numHits, est_hits);
if (docBegin == 0) {
// Approximation does not know about the special 0
// But the iterator does....
@@ -523,7 +525,7 @@ PostingListAttributeTest::checkSearch(bool useBitVector, bool need_unpack, bool
hits++;
}
EXPECT_EQ(numHits, hits);
- EXPECT_GE(approx, hits);
+ EXPECT_GE(est_hits, hits);
EXPECT_EQ(docEnd, lastDocId+1);
}
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
index 741a86b0beb..85b13c20f88 100644
--- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
@@ -60,6 +60,7 @@ using largeint_t = AttributeVector::largeint_t;
using attribute::BasicType;
using attribute::CollectionType;
using attribute::Config;
+using attribute::HitEstimate;
using attribute::SearchContextParams;
using attribute::test::AttributeBuilder;
using fef::MatchData;
@@ -87,6 +88,12 @@ public:
DocSet::DocSet() noexcept = default;
DocSet::~DocSet() = default;
+bool is_flag_attribute(const Config& cfg) {
+ return cfg.fastSearch() &&
+ (cfg.basicType() == BasicType::INT8) &&
+ (cfg.collectionType() == CollectionType::ARRAY);
+}
+
template <typename V, typename T>
class PostingList
{
@@ -104,6 +111,21 @@ public:
DocSet & getHits() { return _hits; }
const DocSet & getHits() const { return _hits; }
uint32_t getHitCount() const { return _hits.size(); }
+ attribute::HitEstimate expected_hit_estimate() const {
+ if (getHitCount() == 0) {
+ return HitEstimate(0);
+ }
+ uint32_t docid_limit = _vec->getStatus().getNumDocs();
+ if (is_flag_attribute(_vec->getConfig())) {
+ return HitEstimate::unknown(docid_limit);
+ } else if (_vec->getConfig().fastSearch()) {
+ return HitEstimate(getHitCount());
+ } else if (_vec->getConfig().collectionType() == CollectionType::SINGLE) {
+ return HitEstimate::unknown(docid_limit);
+ } else {
+ return HitEstimate::unknown(std::max((uint64_t)docid_limit, _vec->getStatus().getNumValues()));
+ }
+ }
};
template <typename V, typename T>
@@ -166,7 +188,7 @@ private:
void testSearchIteratorConformance();
// test search functionality
template <typename V, typename T>
- void testFind(const PostingList<V, T> & first);
+ void testFind(const PostingList<V, T> & first, bool verify_hit_estimate);
template <typename V, typename T>
void testSearch(V & attribute, uint32_t numDocs, const std::vector<T> & values);
@@ -536,10 +558,16 @@ SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected,
//-----------------------------------------------------------------------------
template <typename V, typename T>
void
-SearchContextTest::testFind(const PostingList<V, T> & pl)
+SearchContextTest::testFind(const PostingList<V, T> & pl, bool verify_hit_estimate)
{
{ // strict search iterator
SearchContextPtr sc = getSearch(pl.getAttribute(), pl.getValue());
+ if (verify_hit_estimate) {
+ auto act_est = sc->calc_hit_estimate();
+ auto exp_est = pl.expected_hit_estimate();
+ EXPECT_EQUAL(exp_est.est_hits(), act_est.est_hits());
+ EXPECT_EQUAL(exp_est.is_unknown(), act_est.is_unknown());
+ }
sc->fetchPostings(queryeval::ExecuteInfo::TRUE);
TermFieldMatchData dummy;
SearchBasePtr sb = sc->createIterator(&dummy, true);
@@ -571,7 +599,7 @@ SearchContextTest::testSearch(V & attribute, uint32_t numDocs, const std::vector
// test find()
for (const auto & list : lists) {
- testFind(list);
+ testFind(list, true);
}
}
@@ -591,7 +619,7 @@ SearchContextTest::testMultiValueSearchHelper(V & vec, const std::vector<T> & va
for (const auto & list : lists) {
//std::cout << "testFind(lists[" << i << "]): value = " << lists[i].getValue()
// << ", hit count = " << lists[i].getHitCount() << std::endl;
- testFind(list);
+ testFind(list, false);
}
}