diff options
author | Geir Storli <geirst@yahooinc.com> | 2024-02-02 15:33:36 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2024-02-02 15:33:36 +0000 |
commit | 2a051981b4f41c0d6e35f2c8d65ece7c47b994e3 (patch) | |
tree | 556260c5c7ea2c691b776114fe7600c53973b55f /searchlib/src/tests | |
parent | fdff9a2b553d2c3c0c81aca3bd8bb6d9a491e443 (diff) |
Tag hit estimates from attribute search contexts as unknown when applicable.
Diffstat (limited to 'searchlib/src/tests')
3 files changed, 51 insertions, 13 deletions
diff --git a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp index 79bdd83dc88..93ae9cb13cb 100644 --- a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp +++ b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp @@ -76,15 +76,17 @@ bool is_strict_hit_with_weight(Iterator& iter, TermFieldMatchData& match, EXPECT_EQUAL(weight, match.getWeight())); } -TEST_F("approximateHits() returns document count of reference attribute when not using fast-search target attribute", Fixture) { +TEST_F("calc_hit_estimate() returns document count of reference attribute when not using fast-search target attribute", Fixture) { add_n_docs_with_undefined_values(*f.target_attr, 10); add_n_docs_with_undefined_values(*f.reference_attr, 101); auto ctx = f.create_context(word_term("foo")); - EXPECT_EQUAL(101u, ctx->approximateHits()); + auto est = ctx->calc_hit_estimate(); + EXPECT_EQUAL(101u, est.est_hits()); + EXPECT_TRUE(est.is_unknown()); } -TEST_F("approximateHits() estimates hits when using fast-search target attribute", Fixture(false, FastSearchConfig::ExplicitlyEnabled)) +TEST_F("calc_hit_estimate() estimates hits when using fast-search target attribute", Fixture(false, FastSearchConfig::ExplicitlyEnabled)) { constexpr uint32_t target_docs = 1000; constexpr uint32_t docs = 10000; @@ -115,16 +117,22 @@ TEST_F("approximateHits() estimates hits when using fast-search target attribute f.reference_attr->commit(); auto ctx = f.create_context(word_term("10")); // Exact count: 0 target hits => 0 - EXPECT_EQUAL(0u, ctx->approximateHits()); + auto est = ctx->calc_hit_estimate(); + EXPECT_EQUAL(0u, est.est_hits()); + EXPECT_FALSE(est.is_unknown()); TermFieldMatchData match; auto iter = f.create_iterator(*ctx, match, false); EXPECT_TRUE(iter->matches_any() == Trinary::False); ctx = f.create_context(word_term("20")); // Exact count: 2 target hits, 2 docs / target doc => 2 * 2 = 4 - EXPECT_EQUAL(4u, ctx->approximateHits()); + est = ctx->calc_hit_estimate(); + EXPECT_EQUAL(4u, est.est_hits()); + EXPECT_FALSE(est.is_unknown()); ctx = f.create_context(word_term("30")); // Approximation: 110 target hits => 110 * 10001 / 1001 = 1099 - EXPECT_EQUAL(1099u, ctx->approximateHits()); + est = ctx->calc_hit_estimate(); + EXPECT_EQUAL(1099u, est.est_hits()); + EXPECT_FALSE(est.is_unknown()); } TEST_F("attributeName() returns imported attribute name", Fixture) { diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp index 3d3ff469546..ed91cefd35c 100644 --- a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp +++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp @@ -498,8 +498,10 @@ PostingListAttributeTest::checkSearch(bool useBitVector, bool need_unpack, bool SearchContextPtr sc = getSearch(vec, term, false, attribute::SearchContextParams().useBitVector(useBitVector)); EXPECT_FALSE( ! sc ); sc->fetchPostings(queryeval::ExecuteInfo::TRUE); - size_t approx = sc->approximateHits(); - EXPECT_EQ(numHits, approx); + auto est = sc->calc_hit_estimate(); + uint32_t est_hits = est.est_hits(); + EXPECT_FALSE(est.is_unknown()); + EXPECT_EQ(numHits, est_hits); if (docBegin == 0) { // Approximation does not know about the special 0 // But the iterator does.... @@ -523,7 +525,7 @@ PostingListAttributeTest::checkSearch(bool useBitVector, bool need_unpack, bool hits++; } EXPECT_EQ(numHits, hits); - EXPECT_GE(approx, hits); + EXPECT_GE(est_hits, hits); EXPECT_EQ(docEnd, lastDocId+1); } diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp index 741a86b0beb..85b13c20f88 100644 --- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp @@ -60,6 +60,7 @@ using largeint_t = AttributeVector::largeint_t; using attribute::BasicType; using attribute::CollectionType; using attribute::Config; +using attribute::HitEstimate; using attribute::SearchContextParams; using attribute::test::AttributeBuilder; using fef::MatchData; @@ -87,6 +88,12 @@ public: DocSet::DocSet() noexcept = default; DocSet::~DocSet() = default; +bool is_flag_attribute(const Config& cfg) { + return cfg.fastSearch() && + (cfg.basicType() == BasicType::INT8) && + (cfg.collectionType() == CollectionType::ARRAY); +} + template <typename V, typename T> class PostingList { @@ -104,6 +111,21 @@ public: DocSet & getHits() { return _hits; } const DocSet & getHits() const { return _hits; } uint32_t getHitCount() const { return _hits.size(); } + attribute::HitEstimate expected_hit_estimate() const { + if (getHitCount() == 0) { + return HitEstimate(0); + } + uint32_t docid_limit = _vec->getStatus().getNumDocs(); + if (is_flag_attribute(_vec->getConfig())) { + return HitEstimate::unknown(docid_limit); + } else if (_vec->getConfig().fastSearch()) { + return HitEstimate(getHitCount()); + } else if (_vec->getConfig().collectionType() == CollectionType::SINGLE) { + return HitEstimate::unknown(docid_limit); + } else { + return HitEstimate::unknown(std::max((uint64_t)docid_limit, _vec->getStatus().getNumValues())); + } + } }; template <typename V, typename T> @@ -166,7 +188,7 @@ private: void testSearchIteratorConformance(); // test search functionality template <typename V, typename T> - void testFind(const PostingList<V, T> & first); + void testFind(const PostingList<V, T> & first, bool verify_hit_estimate); template <typename V, typename T> void testSearch(V & attribute, uint32_t numDocs, const std::vector<T> & values); @@ -536,10 +558,16 @@ SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected, //----------------------------------------------------------------------------- template <typename V, typename T> void -SearchContextTest::testFind(const PostingList<V, T> & pl) +SearchContextTest::testFind(const PostingList<V, T> & pl, bool verify_hit_estimate) { { // strict search iterator SearchContextPtr sc = getSearch(pl.getAttribute(), pl.getValue()); + if (verify_hit_estimate) { + auto act_est = sc->calc_hit_estimate(); + auto exp_est = pl.expected_hit_estimate(); + EXPECT_EQUAL(exp_est.est_hits(), act_est.est_hits()); + EXPECT_EQUAL(exp_est.is_unknown(), act_est.is_unknown()); + } sc->fetchPostings(queryeval::ExecuteInfo::TRUE); TermFieldMatchData dummy; SearchBasePtr sb = sc->createIterator(&dummy, true); @@ -571,7 +599,7 @@ SearchContextTest::testSearch(V & attribute, uint32_t numDocs, const std::vector // test find() for (const auto & list : lists) { - testFind(list); + testFind(list, true); } } @@ -591,7 +619,7 @@ SearchContextTest::testMultiValueSearchHelper(V & vec, const std::vector<T> & va for (const auto & list : lists) { //std::cout << "testFind(lists[" << i << "]): value = " << lists[i].getValue() // << ", hit count = " << lists[i].getHitCount() << std::endl; - testFind(list); + testFind(list, false); } } |