diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-06-11 10:48:25 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-06-11 10:48:25 +0000 |
commit | 7bdd3d21d9e3a39a2beb52ea030532fbe06c999e (patch) | |
tree | 05c9957151dd29817bbd0f8b97f2ddc0d3fb9374 /searchlib/src | |
parent | d4fd07237321e8b67fc22906a95ef87b2e712b3e (diff) |
Expose bug reletate to initialization of most costly features K computation.
Diffstat (limited to 'searchlib/src')
6 files changed, 74 insertions, 41 deletions
diff --git a/searchlib/src/tests/predicate/predicate_index_test.cpp b/searchlib/src/tests/predicate/predicate_index_test.cpp index facf0054c4a..19ad0301b5c 100644 --- a/searchlib/src/tests/predicate/predicate_index_test.cpp +++ b/searchlib/src/tests/predicate/predicate_index_test.cpp @@ -113,7 +113,6 @@ TEST("require that PredicateIndex can index document") { EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); indexFeature(index, doc_id, min_feature, {{hash, interval}}, {}); index.commit(); - auto posting_it = lookupPosting(index, hash); EXPECT_EQUAL(doc_id, posting_it.getKey()); uint32_t size; @@ -123,6 +122,25 @@ TEST("require that PredicateIndex can index document") { EXPECT_EQUAL(interval, interval_list[0]); } +TEST("require that bit vector cache is initialized correctly") { + BitVectorCache::KeyAndCountSet keySet; + keySet.emplace_back(hash, dummy_provider.getDocIdLimit()/2); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + indexFeature(index, doc_id, min_feature, {{hash, interval}}, {}); + index.requireCachePopulation(); + index.populateIfNeeded(dummy_provider.getDocIdLimit()); + EXPECT_TRUE(index.lookupCachedSet(keySet).empty()); + index.commit(); + EXPECT_TRUE(index.getIntervalIndex().lookup(hash).valid()); + EXPECT_TRUE(index.lookupCachedSet(keySet).empty()); + + index.requireCachePopulation(); + index.populateIfNeeded(dummy_provider.getDocIdLimit()); + EXPECT_FALSE(index.lookupCachedSet(keySet).empty()); +} + + TEST("require that PredicateIndex can index document with bounds") { PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); diff --git a/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp b/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp index 3dd2ec26dea..5b8d5f5b9ce 100644 --- a/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp @@ -86,8 +86,7 @@ TEST_F("require that blueprint with empty index estimates empty.", Fixture) { EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); } -TEST_F("require that blueprint with zero-constraint doc estimates non-empty.", - Fixture) { +TEST_F("require that blueprint with zero-constraint doc estimates non-empty.", Fixture) { f.indexEmptyDocument(42); PredicateBlueprint blueprint(f.field, f.guard(), f.query); EXPECT_FALSE(blueprint.getState().estimate().empty); @@ -98,11 +97,9 @@ const int min_feature = 1; const uint32_t doc_id = 2; const uint32_t interval = 0x0001ffff; -TEST_F("require that blueprint with posting list entry estimates non-empty.", - Fixture) { +TEST_F("require that blueprint with posting list entry estimates non-empty.", Fixture) { PredicateTreeAnnotations annotations(min_feature); - annotations.interval_map[PredicateHash::hash64("key=value")] = - std::vector<Interval>{{interval}}; + annotations.interval_map[PredicateHash::hash64("key=value")] = std::vector<Interval>{{interval}}; f.indexDocument(doc_id, annotations); PredicateBlueprint blueprint(f.field, f.guard(), f.query); @@ -110,8 +107,7 @@ TEST_F("require that blueprint with posting list entry estimates non-empty.", EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); } -TEST_F("require that blueprint with 'bounds' posting list entry estimates " - "non-empty.", Fixture) { +TEST_F("require that blueprint with 'bounds' posting list entry estimates non-empty.", Fixture) { PredicateTreeAnnotations annotations(min_feature); annotations.bounds_map[PredicateHash::hash64("range_key=40")] = std::vector<IntervalWithBounds>{{interval, 0x80000003}}; @@ -122,34 +118,50 @@ TEST_F("require that blueprint with 'bounds' posting list entry estimates " EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); } -TEST_F("require that blueprint with zstar-compressed estimates non-empty.", - Fixture) { +TEST_F("require that blueprint with zstar-compressed estimates non-empty.", Fixture) { PredicateTreeAnnotations annotations(1); - annotations.interval_map[Constants::z_star_compressed_hash] =std::vector<Interval>{{0xfffe0000}}; + annotations.interval_map[Constants::z_star_compressed_hash] = std::vector<Interval>{{0xfffe0000}}; f.indexDocument(doc_id, annotations); PredicateBlueprint blueprint(f.field, f.guard(), f.query); EXPECT_FALSE(blueprint.getState().estimate().empty); EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); } -TEST_F("require that blueprint can create search", Fixture) { - PredicateTreeAnnotations annotations(1); - annotations.interval_map[PredicateHash::hash64("key=value")] =std::vector<Interval>{{interval}}; - f.indexDocument(doc_id, annotations); - +void +runQuery(Fixture & f, std::vector<uint32_t> expected, bool expectCachedSize, uint32_t expectedKV) { PredicateBlueprint blueprint(f.field, f.guard(), f.query); blueprint.fetchPostings(ExecuteInfo::TRUE); + EXPECT_EQUAL(expectCachedSize, blueprint.getCachedFeatures().size()); + for (uint32_t docId : expected) { + EXPECT_EQUAL(expectedKV, uint32_t(blueprint.getKV()[docId])); + } TermFieldMatchDataArray tfmda; SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true); ASSERT_TRUE(it.get()); it->initFullRange(); EXPECT_EQUAL(SearchIterator::beginId(), it->getDocId()); - EXPECT_FALSE(it->seek(doc_id - 1)); - EXPECT_EQUAL(doc_id, it->getDocId()); - EXPECT_TRUE(it->seek(doc_id)); - EXPECT_EQUAL(doc_id, it->getDocId()); - EXPECT_FALSE(it->seek(doc_id + 1)); - EXPECT_TRUE(it->isAtEnd()); + std::vector<uint32_t> actual; + for (it->seek(1); ! it->isAtEnd(); it->seek(it->getDocId()+1)) { + actual.push_back(it->getDocId()); + } + EXPECT_EQUAL(expected.size(), actual.size()); + for (size_t i(0); i < expected.size(); i++) { + EXPECT_EQUAL(expected[i], actual[i]); + } +} + +TEST_F("require that blueprint can create search", Fixture) { + PredicateTreeAnnotations annotations(1); + annotations.interval_map[PredicateHash::hash64("key=value")] = std::vector<Interval>{{interval}}; + for (size_t i(0); i < 9; i++) { + f.indexDocument(doc_id + i, annotations); + } + runQuery(f, {2,3,4,5,6,7,8,9,10}, 0, 1); + f.indexDocument(doc_id+9, annotations); + runQuery(f, {2, 3,4,5,6,7,8,9,10,11}, 0, 1); + f.index().requireCachePopulation(); + f.indexDocument(doc_id+10, annotations); + runQuery(f, {2,3,4,5,6,7,8,9,10,11,12}, 1, 1); } TEST_F("require that blueprint can create more advanced search", Fixture) { diff --git a/searchlib/src/vespa/searchlib/common/bitvectorcache.h b/searchlib/src/vespa/searchlib/common/bitvectorcache.h index f81fd0163d8..a642d66f42f 100644 --- a/searchlib/src/vespa/searchlib/common/bitvectorcache.h +++ b/searchlib/src/vespa/searchlib/common/bitvectorcache.h @@ -41,6 +41,7 @@ public: void adjustDocIdLimit(uint32_t docId); void populate(uint32_t count, const PopulateInterface &); bool needPopulation() const { return _needPopulation; } + void requirePopulation() { _needPopulation = true; } private: class KeyMeta { public: diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.h b/searchlib/src/vespa/searchlib/predicate/predicate_index.h index f4c89a2b369..49bf77f2fcc 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_index.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.h @@ -54,8 +54,6 @@ private: template <typename IntervalT> void indexDocumentFeatures(uint32_t doc_id, const FeatureMap<IntervalT> &interval_map); - PopulateInterface::Iterator::UP lookup(uint64_t key) const override; - public: PredicateIndex(GenerationHolder &genHolder, const DocIdLimitProvider &limit_provider, @@ -105,6 +103,9 @@ public: * Adjust size of structures to have space for docId. */ void adjustDocIdLimit(uint32_t docId); + PopulateInterface::Iterator::UP lookup(uint64_t key) const override; + // Exposed for testing + void requireCachePopulation() const { _cache.requirePopulation(); } }; extern template class SimpleIndex<vespalib::datastore::EntryRef>; diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.h b/searchlib/src/vespa/searchlib/predicate/simple_index.h index 1398bb0817c..75dc540f787 100644 --- a/searchlib/src/vespa/searchlib/predicate/simple_index.h +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.h @@ -168,8 +168,6 @@ private: } public: - SimpleIndex(GenerationHolder &generation_holder, const DocIdLimitProvider &provider) : - SimpleIndex(generation_holder, provider, SimpleIndexConfig()) {} SimpleIndex(GenerationHolder &generation_holder, const DocIdLimitProvider &provider, const SimpleIndexConfig &config) : _generation_holder(generation_holder), _config(config), _limit_provider(provider) {} diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h index 9609cd4f6c9..ef225e86c50 100644 --- a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h @@ -50,8 +50,11 @@ public: void fetchPostings(const ExecuteInfo &execInfo) override; SearchIterator::UP - createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, - bool strict) const override; + createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool strict) const override; + + // Exposed for testing + const BitVectorCache::CountVector & getKV() const { return _kV; } + const BitVectorCache::KeySet & getCachedFeatures() const { return _cachedFeatures; } private: using BTreeIterator = predicate::SimpleIndex<vespalib::datastore::EntryRef>::BTreeIterator; using VectorIterator = predicate::SimpleIndex<vespalib::datastore::EntryRef>::VectorIterator; @@ -70,24 +73,24 @@ private: void addZeroConstraintToK(); std::vector<predicate::PredicatePostingList::UP> createPostingLists() const; - const PredicateAttribute & _attribute; + const PredicateAttribute & _attribute; const predicate::PredicateIndex &_index; - Alloc _kVBacking; - BitVectorCache::CountVector _kV; - BitVectorCache::KeySet _cachedFeatures; + Alloc _kVBacking; + BitVectorCache::CountVector _kV; + BitVectorCache::KeySet _cachedFeatures; - std::vector<IntervalEntry> _interval_dict_entries; - std::vector<BoundsEntry> _bounds_dict_entries; - vespalib::datastore::EntryRef _zstar_dict_entry; + std::vector<IntervalEntry> _interval_dict_entries; + std::vector<BoundsEntry> _bounds_dict_entries; + vespalib::datastore::EntryRef _zstar_dict_entry; - std::vector<IntervalIteratorEntry<BTreeIterator>> _interval_btree_iterators; + std::vector<IntervalIteratorEntry<BTreeIterator>> _interval_btree_iterators; std::vector<IntervalIteratorEntry<VectorIterator>> _interval_vector_iterators; - std::vector<BoundsIteratorEntry<BTreeIterator>> _bounds_btree_iterators; - std::vector<BoundsIteratorEntry<VectorIterator>> _bounds_vector_iterators; + std::vector<BoundsIteratorEntry<BTreeIterator>> _bounds_btree_iterators; + std::vector<BoundsIteratorEntry<VectorIterator>> _bounds_vector_iterators; // The zstar iterator is either a vector or a btree iterator. - optional<BTreeIterator> _zstar_btree_iterator; + optional<BTreeIterator> _zstar_btree_iterator; optional<VectorIterator> _zstar_vector_iterator; - bool _fetch_postings_done; + bool _fetch_postings_done; }; } |