summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-06-11 10:48:25 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2021-06-11 10:48:25 +0000
commit7bdd3d21d9e3a39a2beb52ea030532fbe06c999e (patch)
tree05c9957151dd29817bbd0f8b97f2ddc0d3fb9374 /searchlib
parentd4fd07237321e8b67fc22906a95ef87b2e712b3e (diff)
Expose bug reletate to initialization of most costly features K computation.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/predicate/predicate_index_test.cpp20
-rw-r--r--searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp56
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvectorcache.h1
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_index.h5
-rw-r--r--searchlib/src/vespa/searchlib/predicate/simple_index.h2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h31
6 files changed, 74 insertions, 41 deletions
diff --git a/searchlib/src/tests/predicate/predicate_index_test.cpp b/searchlib/src/tests/predicate/predicate_index_test.cpp
index facf0054c4a..19ad0301b5c 100644
--- a/searchlib/src/tests/predicate/predicate_index_test.cpp
+++ b/searchlib/src/tests/predicate/predicate_index_test.cpp
@@ -113,7 +113,6 @@ TEST("require that PredicateIndex can index document") {
EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid());
indexFeature(index, doc_id, min_feature, {{hash, interval}}, {});
index.commit();
-
auto posting_it = lookupPosting(index, hash);
EXPECT_EQUAL(doc_id, posting_it.getKey());
uint32_t size;
@@ -123,6 +122,25 @@ TEST("require that PredicateIndex can index document") {
EXPECT_EQUAL(interval, interval_list[0]);
}
+TEST("require that bit vector cache is initialized correctly") {
+ BitVectorCache::KeyAndCountSet keySet;
+ keySet.emplace_back(hash, dummy_provider.getDocIdLimit()/2);
+ PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10);
+ EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid());
+ indexFeature(index, doc_id, min_feature, {{hash, interval}}, {});
+ index.requireCachePopulation();
+ index.populateIfNeeded(dummy_provider.getDocIdLimit());
+ EXPECT_TRUE(index.lookupCachedSet(keySet).empty());
+ index.commit();
+ EXPECT_TRUE(index.getIntervalIndex().lookup(hash).valid());
+ EXPECT_TRUE(index.lookupCachedSet(keySet).empty());
+
+ index.requireCachePopulation();
+ index.populateIfNeeded(dummy_provider.getDocIdLimit());
+ EXPECT_FALSE(index.lookupCachedSet(keySet).empty());
+}
+
+
TEST("require that PredicateIndex can index document with bounds") {
PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10);
EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid());
diff --git a/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp b/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp
index 3dd2ec26dea..5b8d5f5b9ce 100644
--- a/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp
+++ b/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp
@@ -86,8 +86,7 @@ TEST_F("require that blueprint with empty index estimates empty.", Fixture) {
EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits);
}
-TEST_F("require that blueprint with zero-constraint doc estimates non-empty.",
- Fixture) {
+TEST_F("require that blueprint with zero-constraint doc estimates non-empty.", Fixture) {
f.indexEmptyDocument(42);
PredicateBlueprint blueprint(f.field, f.guard(), f.query);
EXPECT_FALSE(blueprint.getState().estimate().empty);
@@ -98,11 +97,9 @@ const int min_feature = 1;
const uint32_t doc_id = 2;
const uint32_t interval = 0x0001ffff;
-TEST_F("require that blueprint with posting list entry estimates non-empty.",
- Fixture) {
+TEST_F("require that blueprint with posting list entry estimates non-empty.", Fixture) {
PredicateTreeAnnotations annotations(min_feature);
- annotations.interval_map[PredicateHash::hash64("key=value")] =
- std::vector<Interval>{{interval}};
+ annotations.interval_map[PredicateHash::hash64("key=value")] = std::vector<Interval>{{interval}};
f.indexDocument(doc_id, annotations);
PredicateBlueprint blueprint(f.field, f.guard(), f.query);
@@ -110,8 +107,7 @@ TEST_F("require that blueprint with posting list entry estimates non-empty.",
EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits);
}
-TEST_F("require that blueprint with 'bounds' posting list entry estimates "
- "non-empty.", Fixture) {
+TEST_F("require that blueprint with 'bounds' posting list entry estimates non-empty.", Fixture) {
PredicateTreeAnnotations annotations(min_feature);
annotations.bounds_map[PredicateHash::hash64("range_key=40")] =
std::vector<IntervalWithBounds>{{interval, 0x80000003}};
@@ -122,34 +118,50 @@ TEST_F("require that blueprint with 'bounds' posting list entry estimates "
EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits);
}
-TEST_F("require that blueprint with zstar-compressed estimates non-empty.",
- Fixture) {
+TEST_F("require that blueprint with zstar-compressed estimates non-empty.", Fixture) {
PredicateTreeAnnotations annotations(1);
- annotations.interval_map[Constants::z_star_compressed_hash] =std::vector<Interval>{{0xfffe0000}};
+ annotations.interval_map[Constants::z_star_compressed_hash] = std::vector<Interval>{{0xfffe0000}};
f.indexDocument(doc_id, annotations);
PredicateBlueprint blueprint(f.field, f.guard(), f.query);
EXPECT_FALSE(blueprint.getState().estimate().empty);
EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits);
}
-TEST_F("require that blueprint can create search", Fixture) {
- PredicateTreeAnnotations annotations(1);
- annotations.interval_map[PredicateHash::hash64("key=value")] =std::vector<Interval>{{interval}};
- f.indexDocument(doc_id, annotations);
-
+void
+runQuery(Fixture & f, std::vector<uint32_t> expected, bool expectCachedSize, uint32_t expectedKV) {
PredicateBlueprint blueprint(f.field, f.guard(), f.query);
blueprint.fetchPostings(ExecuteInfo::TRUE);
+ EXPECT_EQUAL(expectCachedSize, blueprint.getCachedFeatures().size());
+ for (uint32_t docId : expected) {
+ EXPECT_EQUAL(expectedKV, uint32_t(blueprint.getKV()[docId]));
+ }
TermFieldMatchDataArray tfmda;
SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true);
ASSERT_TRUE(it.get());
it->initFullRange();
EXPECT_EQUAL(SearchIterator::beginId(), it->getDocId());
- EXPECT_FALSE(it->seek(doc_id - 1));
- EXPECT_EQUAL(doc_id, it->getDocId());
- EXPECT_TRUE(it->seek(doc_id));
- EXPECT_EQUAL(doc_id, it->getDocId());
- EXPECT_FALSE(it->seek(doc_id + 1));
- EXPECT_TRUE(it->isAtEnd());
+ std::vector<uint32_t> actual;
+ for (it->seek(1); ! it->isAtEnd(); it->seek(it->getDocId()+1)) {
+ actual.push_back(it->getDocId());
+ }
+ EXPECT_EQUAL(expected.size(), actual.size());
+ for (size_t i(0); i < expected.size(); i++) {
+ EXPECT_EQUAL(expected[i], actual[i]);
+ }
+}
+
+TEST_F("require that blueprint can create search", Fixture) {
+ PredicateTreeAnnotations annotations(1);
+ annotations.interval_map[PredicateHash::hash64("key=value")] = std::vector<Interval>{{interval}};
+ for (size_t i(0); i < 9; i++) {
+ f.indexDocument(doc_id + i, annotations);
+ }
+ runQuery(f, {2,3,4,5,6,7,8,9,10}, 0, 1);
+ f.indexDocument(doc_id+9, annotations);
+ runQuery(f, {2, 3,4,5,6,7,8,9,10,11}, 0, 1);
+ f.index().requireCachePopulation();
+ f.indexDocument(doc_id+10, annotations);
+ runQuery(f, {2,3,4,5,6,7,8,9,10,11,12}, 1, 1);
}
TEST_F("require that blueprint can create more advanced search", Fixture) {
diff --git a/searchlib/src/vespa/searchlib/common/bitvectorcache.h b/searchlib/src/vespa/searchlib/common/bitvectorcache.h
index f81fd0163d8..a642d66f42f 100644
--- a/searchlib/src/vespa/searchlib/common/bitvectorcache.h
+++ b/searchlib/src/vespa/searchlib/common/bitvectorcache.h
@@ -41,6 +41,7 @@ public:
void adjustDocIdLimit(uint32_t docId);
void populate(uint32_t count, const PopulateInterface &);
bool needPopulation() const { return _needPopulation; }
+ void requirePopulation() { _needPopulation = true; }
private:
class KeyMeta {
public:
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.h b/searchlib/src/vespa/searchlib/predicate/predicate_index.h
index f4c89a2b369..49bf77f2fcc 100644
--- a/searchlib/src/vespa/searchlib/predicate/predicate_index.h
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.h
@@ -54,8 +54,6 @@ private:
template <typename IntervalT>
void indexDocumentFeatures(uint32_t doc_id, const FeatureMap<IntervalT> &interval_map);
- PopulateInterface::Iterator::UP lookup(uint64_t key) const override;
-
public:
PredicateIndex(GenerationHolder &genHolder,
const DocIdLimitProvider &limit_provider,
@@ -105,6 +103,9 @@ public:
* Adjust size of structures to have space for docId.
*/
void adjustDocIdLimit(uint32_t docId);
+ PopulateInterface::Iterator::UP lookup(uint64_t key) const override;
+ // Exposed for testing
+ void requireCachePopulation() const { _cache.requirePopulation(); }
};
extern template class SimpleIndex<vespalib::datastore::EntryRef>;
diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.h b/searchlib/src/vespa/searchlib/predicate/simple_index.h
index 1398bb0817c..75dc540f787 100644
--- a/searchlib/src/vespa/searchlib/predicate/simple_index.h
+++ b/searchlib/src/vespa/searchlib/predicate/simple_index.h
@@ -168,8 +168,6 @@ private:
}
public:
- SimpleIndex(GenerationHolder &generation_holder, const DocIdLimitProvider &provider) :
- SimpleIndex(generation_holder, provider, SimpleIndexConfig()) {}
SimpleIndex(GenerationHolder &generation_holder,
const DocIdLimitProvider &provider, const SimpleIndexConfig &config)
: _generation_holder(generation_holder), _config(config), _limit_provider(provider) {}
diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h
index 9609cd4f6c9..ef225e86c50 100644
--- a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h
@@ -50,8 +50,11 @@ public:
void fetchPostings(const ExecuteInfo &execInfo) override;
SearchIterator::UP
- createLeafSearch(const fef::TermFieldMatchDataArray &tfmda,
- bool strict) const override;
+ createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool strict) const override;
+
+ // Exposed for testing
+ const BitVectorCache::CountVector & getKV() const { return _kV; }
+ const BitVectorCache::KeySet & getCachedFeatures() const { return _cachedFeatures; }
private:
using BTreeIterator = predicate::SimpleIndex<vespalib::datastore::EntryRef>::BTreeIterator;
using VectorIterator = predicate::SimpleIndex<vespalib::datastore::EntryRef>::VectorIterator;
@@ -70,24 +73,24 @@ private:
void addZeroConstraintToK();
std::vector<predicate::PredicatePostingList::UP> createPostingLists() const;
- const PredicateAttribute & _attribute;
+ const PredicateAttribute & _attribute;
const predicate::PredicateIndex &_index;
- Alloc _kVBacking;
- BitVectorCache::CountVector _kV;
- BitVectorCache::KeySet _cachedFeatures;
+ Alloc _kVBacking;
+ BitVectorCache::CountVector _kV;
+ BitVectorCache::KeySet _cachedFeatures;
- std::vector<IntervalEntry> _interval_dict_entries;
- std::vector<BoundsEntry> _bounds_dict_entries;
- vespalib::datastore::EntryRef _zstar_dict_entry;
+ std::vector<IntervalEntry> _interval_dict_entries;
+ std::vector<BoundsEntry> _bounds_dict_entries;
+ vespalib::datastore::EntryRef _zstar_dict_entry;
- std::vector<IntervalIteratorEntry<BTreeIterator>> _interval_btree_iterators;
+ std::vector<IntervalIteratorEntry<BTreeIterator>> _interval_btree_iterators;
std::vector<IntervalIteratorEntry<VectorIterator>> _interval_vector_iterators;
- std::vector<BoundsIteratorEntry<BTreeIterator>> _bounds_btree_iterators;
- std::vector<BoundsIteratorEntry<VectorIterator>> _bounds_vector_iterators;
+ std::vector<BoundsIteratorEntry<BTreeIterator>> _bounds_btree_iterators;
+ std::vector<BoundsIteratorEntry<VectorIterator>> _bounds_vector_iterators;
// The zstar iterator is either a vector or a btree iterator.
- optional<BTreeIterator> _zstar_btree_iterator;
+ optional<BTreeIterator> _zstar_btree_iterator;
optional<VectorIterator> _zstar_vector_iterator;
- bool _fetch_postings_done;
+ bool _fetch_postings_done;
};
}