aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2022-11-14 12:37:47 +0100
committerTor Egge <Tor.Egge@online.no>2022-11-14 12:37:47 +0100
commit3cd84044662d3770bb580e89e39e7c699b19823a (patch)
tree2234a662e74f1d99e7fc8df1839c3caf28a2b255
parent64aa97631e1ac9c2b968efcfeb84be89a15666fc (diff)
Improve hit estimate for ImportedSearchContext.
-rw-r--r--searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp47
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp66
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_search_context.h5
3 files changed, 114 insertions, 4 deletions
diff --git a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
index 6cb0dec3c1f..847a992d241 100644
--- a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
+++ b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
@@ -6,6 +6,7 @@
#include <vespa/searchlib/query/query_term_ucs4.h>
#include <vespa/searchlib/queryeval/simpleresult.h>
#include <vespa/searchlib/test/imported_attribute_fixture.h>
+#include <vespa/searchlib/test/mock_gid_to_lid_mapping.h>
#include <vespa/vespalib/test/insertion_operators.h>
#include <vespa/searchlib/queryeval/executeinfo.h>
@@ -75,13 +76,57 @@ bool is_strict_hit_with_weight(Iterator& iter, TermFieldMatchData& match,
EXPECT_EQUAL(weight, match.getWeight()));
}
-TEST_F("approximateHits() returns document count of reference attribute", Fixture) {
+TEST_F("approximateHits() returns document count of reference attribute when not using fast-search target attribute", Fixture) {
+ add_n_docs_with_undefined_values(*f.target_attr, 10);
add_n_docs_with_undefined_values(*f.reference_attr, 101);
auto ctx = f.create_context(word_term("foo"));
EXPECT_EQUAL(101u, ctx->approximateHits());
}
+TEST_F("approximateHits() estimates hits when using fast-search target attribute", Fixture(false, FastSearchConfig::ExplicitlyEnabled))
+{
+ constexpr uint32_t target_docs = 1000;
+ constexpr uint32_t docs = 10000;
+ constexpr uint32_t target_gids = 200;
+ f.target_attr->addReservedDoc();
+ add_n_docs_with_undefined_values(*f.target_attr, target_docs);
+ f.reference_attr->addReservedDoc();
+ add_n_docs_with_undefined_values(*f.reference_attr, docs);
+ auto target_attr = dynamic_cast<IntegerAttribute *>(f.target_attr.get());
+ // 2 documents with value 20, 110 documents with value 30.
+ for (uint32_t i = 1; i < 3; ++i) {
+ target_attr->update(i, 20);
+ }
+ for (uint32_t i = 10; i < 120; ++i) {
+ target_attr->update(i, 30);
+ }
+ f.target_attr->commit();
+ // Assign target gids
+ for (uint32_t i = 1; i <= target_gids; ++i) {
+ auto target_gid = dummy_gid(i);
+ f.mapper_factory->_map[target_gid] = i;
+ f.reference_attr->notifyReferencedPut(target_gid, i);
+ }
+ // Add 2 references to each target gid
+ for (uint32_t i = 1; i <= target_gids * 2; ++i) {
+ f.reference_attr->update(i, dummy_gid(((i - 1) % target_gids) + 1));
+ }
+ f.reference_attr->commit();
+ auto ctx = f.create_context(word_term("10"));
+ // Exact count: 0 target hits => 0
+ EXPECT_EQUAL(0u, ctx->approximateHits());
+ TermFieldMatchData match;
+ auto iter = f.create_iterator(*ctx, match, false);
+ EXPECT_TRUE(iter->matches_any() == Trinary::False);
+ ctx = f.create_context(word_term("20"));
+ // Exact count: 2 target hits, 2 docs / target doc => 2 * 2 = 4
+ EXPECT_EQUAL(4u, ctx->approximateHits());
+ ctx = f.create_context(word_term("30"));
+ // Approximation: 110 target hits => 110 * 10001 / 1001 = 1099
+ EXPECT_EQUAL(1099u, ctx->approximateHits());
+}
+
TEST_F("attributeName() returns imported attribute name", Fixture) {
auto ctx = f.create_context(word_term("foo"));
EXPECT_EQUAL(f.default_imported_attr_name(), ctx->attributeName());
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp
index 9c4f8395fec..1e8adc3922e 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp
@@ -44,18 +44,78 @@ ImportedSearchContext::ImportedSearchContext(
_target_search_context(_target_attribute.createSearchContext(std::move(term), params)),
_targetLids(_reference_attribute.getTargetLids()),
_merger(_reference_attribute.getCommittedDocIdLimit()),
- _params(params)
+ _params(params),
+ _zero_hits(false)
{
}
ImportedSearchContext::~ImportedSearchContext() = default;
-unsigned int ImportedSearchContext::approximateHits() const {
- return _reference_attribute.getNumDocs();
+uint32_t
+ImportedSearchContext::calc_approx_hits(uint32_t target_approx_hits) const
+{
+ uint32_t docid_limit = _targetLids.size();
+ uint32_t target_docid_limit = _target_attribute.getCommittedDocIdLimit();
+ double approx_hits_multiplier = static_cast<double>(docid_limit) / target_docid_limit;
+ if (approx_hits_multiplier < 1.0) {
+ approx_hits_multiplier = 1.0;
+ }
+ uint64_t approx_hits = target_approx_hits * approx_hits_multiplier;
+ if (approx_hits > docid_limit) {
+ approx_hits = docid_limit;
+ }
+ return approx_hits;
+}
+
+uint32_t
+ImportedSearchContext::calc_exact_hits() const
+{
+ uint32_t docid_limit = _targetLids.size();
+ uint32_t target_docid_limit = _target_attribute.getCommittedDocIdLimit();
+ auto reverse_mapping_refs = _reference_attribute.getReverseMappingRefs();
+ auto& reverse_mapping = _reference_attribute.getReverseMapping();
+ if (target_docid_limit > reverse_mapping_refs.size()) {
+ target_docid_limit = reverse_mapping_refs.size();
+ }
+ fef::TermFieldMatchData matchData;
+ auto it = _target_search_context->createIterator(&matchData, true);
+ uint64_t sum_hits = 0;
+ it->initRange(1, target_docid_limit);
+ for (uint32_t lid = it->seekFirst(1); !it->isAtEnd(); lid = it->seekNext(lid + 1)) {
+ EntryRef ref = reverse_mapping_refs[lid].load_acquire();
+ if (__builtin_expect(ref.valid(), true)) {
+ sum_hits += reverse_mapping.frozenSize(ref);
+ }
+ }
+ if (sum_hits > docid_limit) {
+ sum_hits = docid_limit;
+ }
+ return sum_hits;
+}
+
+unsigned int
+ImportedSearchContext::approximateHits() const
+{
+ uint32_t target_approx_hits = _target_search_context->approximateHits();
+ if (target_approx_hits == 0) {
+ _zero_hits.store(true, std::memory_order_relaxed);
+ return 0;
+ }
+ if (!_target_attribute.getIsFastSearch()) {
+ return _reference_attribute.getNumDocs();
+ }
+ if (target_approx_hits >= MIN_TARGET_HITS_FOR_APPROXIMATION) {
+ return calc_approx_hits(target_approx_hits);
+ } else {
+ return calc_exact_hits();
+ }
}
std::unique_ptr<queryeval::SearchIterator>
ImportedSearchContext::createIterator(fef::TermFieldMatchData* matchData, bool strict) {
+ if (_zero_hits.load(std::memory_order_relaxed)) {
+ return std::make_unique<EmptySearch>();
+ }
if (_searchCacheLookup) {
return BitVectorIterator::create(_searchCacheLookup->bitVector.get(), _searchCacheLookup->docIdLimit, *matchData, strict);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h
index c77799ef7a5..0f55bd85a39 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h
@@ -41,6 +41,9 @@ class ImportedSearchContext : public ISearchContext {
TargetLids _targetLids;
PostingListMerger<int32_t> _merger;
SearchContextParams _params;
+ mutable std::atomic<bool> _zero_hits;
+
+ static constexpr uint32_t MIN_TARGET_HITS_FOR_APPROXIMATION = 100;
uint32_t getTargetLid(uint32_t lid) const {
// Check range to avoid reading memory beyond end of mapping array
@@ -49,6 +52,8 @@ class ImportedSearchContext : public ISearchContext {
void makeMergedPostings(bool isFilter);
void considerAddSearchCacheEntry();
+ uint32_t calc_approx_hits(uint32_t target_approx_hits) const;
+ uint32_t calc_exact_hits() const;
public:
ImportedSearchContext(std::unique_ptr<QueryTermSimple> term,
const SearchContextParams& params,