diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-06-09 21:15:21 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-06-09 21:32:13 +0000 |
commit | 9e031a7d659b6f9ad4afe32792e3fd25faea10ab (patch) | |
tree | ea05140bb84f29e72c984deca749fdf0c9a22229 /searchlib | |
parent | 2a2f3144d8a65302843202156aa0109f583eae75 (diff) |
Clean up code layout and GC unused members. No semtic changes.
Diffstat (limited to 'searchlib')
25 files changed, 239 insertions, 204 deletions
diff --git a/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp index 191c7495271..31aebf95ea2 100644 --- a/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp +++ b/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp @@ -29,7 +29,7 @@ SimpleIndexConfig config; const uint64_t hash = 0x123; TEST("require that empty bounds posting list starts at 0.") { - PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + PredicateIndex index(generation_holder, limit_provider, config, 8); vespalib::datastore::EntryRef ref; PredicateBoundsPostingList<PredicateIndex::BTreeIterator> posting_list(index.getIntervalStore(), @@ -54,7 +54,7 @@ void checkNext(PredicateBoundsPostingList<PredicateIndex::BTreeIterator> &postin } TEST("require that bounds posting list checks bounds.") { - PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + PredicateIndex index(generation_holder, limit_provider, config, 8); const auto &bounds_index = index.getBoundsIndex(); for (uint32_t id = 1; id < 100; ++id) { PredicateTreeAnnotations annotations(id); diff --git a/searchlib/src/tests/predicate/predicate_index_test.cpp b/searchlib/src/tests/predicate/predicate_index_test.cpp index 669f70dd544..facf0054c4a 100644 --- a/searchlib/src/tests/predicate/predicate_index_test.cpp +++ b/searchlib/src/tests/predicate/predicate_index_test.cpp @@ -33,7 +33,7 @@ DummyDocIdLimitProvider dummy_provider; SimpleIndexConfig simple_index_config; TEST("require that PredicateIndex can index empty documents") { - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); index.indexEmptyDocument(2); index.commit(); @@ -41,7 +41,7 @@ TEST("require that PredicateIndex can index empty documents") { } TEST("require that indexDocument don't index empty documents") { - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); PredicateTreeAnnotations annotations; index.indexDocument(3, annotations); @@ -50,7 +50,7 @@ TEST("require that indexDocument don't index empty documents") { } TEST("require that PredicateIndex can remove empty documents") { - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); index.indexEmptyDocument(2); index.commit(); @@ -61,7 +61,7 @@ TEST("require that PredicateIndex can remove empty documents") { } TEST("require that indexing the same empty document multiple times is ok") { - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); index.indexEmptyDocument(2); index.commit(); @@ -109,7 +109,7 @@ const IntervalWithBounds bounds = {0x0001ffff, 0x03}; Interval single_buf; TEST("require that PredicateIndex can index document") { - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); indexFeature(index, doc_id, min_feature, {{hash, interval}}, {}); index.commit(); @@ -124,7 +124,7 @@ TEST("require that PredicateIndex can index document") { } TEST("require that PredicateIndex can index document with bounds") { - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); indexFeature(index, doc_id, min_feature, {}, {{hash, bounds}}); index.commit(); @@ -149,7 +149,7 @@ TEST("require that PredicateIndex can index document with bounds") { TEST("require that PredicateIndex can index multiple documents " "with the same feature") { - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); for (uint32_t id = 1; id < 100; ++id) { indexFeature(index, id, min_feature, {{hash, interval}}, {}); @@ -171,7 +171,7 @@ TEST("require that PredicateIndex can index multiple documents " } TEST("require that PredicateIndex can remove indexed documents") { - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); indexFeature(index, doc_id, min_feature, {{hash, interval}}, {{hash2, bounds}}); @@ -187,7 +187,7 @@ TEST("require that PredicateIndex can remove indexed documents") { } TEST("require that PredicateIndex can remove multiple documents") { - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); const auto &interval_index = index.getIntervalIndex(); EXPECT_FALSE(interval_index.lookup(hash).valid()); for (uint32_t id = 1; id < 100; ++id) { @@ -214,7 +214,7 @@ TEST("require that PredicateIndex can remove multiple documents with " intervals.push_back(make_pair(hash + i, interval)); bounds_intervals.push_back(make_pair(hash2 + i, bounds)); } - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); const auto &interval_index = index.getIntervalIndex(); EXPECT_FALSE(interval_index.lookup(hash).valid()); for (uint32_t id = 1; id < 100; ++id) { @@ -272,7 +272,7 @@ TEST("require that PredicateIndex can be (de)serialized") { intervals.push_back(make_pair(hash + i, interval)); bounds_intervals.push_back(make_pair(hash2 + i, bounds)); } - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 8); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 8); EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); for (uint32_t id = 1; id < 100; ++id) { indexFeature(index, id, id, intervals, bounds_intervals); @@ -284,7 +284,7 @@ TEST("require that PredicateIndex can be (de)serialized") { index.serialize(buffer); uint32_t doc_id_limit; DocIdLimitFinder finder(doc_id_limit); - PredicateIndex index2(generation_handler, generation_holder, dummy_provider, simple_index_config, + PredicateIndex index2(generation_holder, dummy_provider, simple_index_config, buffer, finder, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION); const PredicateIntervalStore &interval_store = index2.getIntervalStore(); EXPECT_EQUAL(199u, doc_id_limit); @@ -322,7 +322,7 @@ TEST("require that PredicateIndex can be (de)serialized") { } TEST("require that DocumentFeaturesStore is restored on deserialization") { - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); indexFeature(index, doc_id, min_feature, {{hash, interval}}, {{hash2, bounds}}); @@ -330,7 +330,7 @@ TEST("require that DocumentFeaturesStore is restored on deserialization") { index.serialize(buffer); uint32_t doc_id_limit; DocIdLimitFinder finder(doc_id_limit); - PredicateIndex index2(generation_handler, generation_holder, dummy_provider, simple_index_config, + PredicateIndex index2(generation_holder, dummy_provider, simple_index_config, buffer, finder, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION); const auto &interval_index = index2.getIntervalIndex(); const auto &bounds_index = index2.getBoundsIndex(); @@ -351,7 +351,7 @@ TEST("require that DocumentFeaturesStore is restored on deserialization") { } TEST("require that hold lists are attempted emptied on destruction") { - PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + PredicateIndex index(generation_holder, dummy_provider, simple_index_config, 10); indexFeature(index, doc_id, min_feature, {{hash, interval}}, {{hash2, bounds}}); { diff --git a/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp index a77542f364e..660d8556b5c 100644 --- a/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp +++ b/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp @@ -28,7 +28,7 @@ SimpleIndexConfig config; const uint64_t hash = 0x123; TEST("require that empty posting list starts at 0.") { - PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + PredicateIndex index(generation_holder, limit_provider, config, 8); vespalib::datastore::EntryRef ref; PredicateIntervalPostingList<PredicateIndex::BTreeIterator> posting_list(index.getIntervalStore(), index.getIntervalIndex().getBTreePostingList(ref)); @@ -38,7 +38,7 @@ TEST("require that empty posting list starts at 0.") { } TEST("require that posting list can iterate.") { - PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + PredicateIndex index(generation_holder, limit_provider, config, 8); const auto &interval_index = index.getIntervalIndex(); for (uint32_t id = 1; id < 100; ++id) { PredicateTreeAnnotations annotations(id); diff --git a/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp index e427c99c007..12de48b5d31 100644 --- a/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp +++ b/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp @@ -25,7 +25,7 @@ DummyDocIdLimitProvider limit_provider; SimpleIndexConfig config; TEST("require that empty posting list starts at 0.") { - PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + PredicateIndex index(generation_holder, limit_provider, config, 8); PredicateZeroConstraintPostingList posting_list(index.getZeroConstraintDocs().begin()); EXPECT_EQUAL(0u, posting_list.getDocId()); EXPECT_EQUAL(0x00010001u, posting_list.getInterval()); @@ -33,7 +33,7 @@ TEST("require that empty posting list starts at 0.") { } TEST("require that posting list can iterate.") { - PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + PredicateIndex index(generation_holder, limit_provider, config, 8); for (uint32_t id = 1; id < 100; ++id) { index.indexEmptyDocument(id); } diff --git a/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp index 4e86e996704..6d00b45a283 100644 --- a/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp +++ b/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp @@ -29,7 +29,7 @@ SimpleIndexConfig config; const uint64_t hash = 0x123; TEST("require that empty posting list starts at 0.") { - PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + PredicateIndex index(generation_holder, limit_provider, config, 8); vespalib::datastore::EntryRef ref; PredicateZstarCompressedPostingList<PredicateIndex::BTreeIterator> posting_list(index.getIntervalStore(), index.getIntervalIndex().getBTreePostingList(ref)); @@ -39,7 +39,7 @@ TEST("require that empty posting list starts at 0.") { } TEST("require that posting list can iterate.") { - PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + PredicateIndex index(generation_holder, limit_provider, config, 8); const auto &interval_index = index.getIntervalIndex(); vector<vector<Interval>> intervals = {{{0x00010000}}, diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp index 7913e617d70..c4a0e036a01 100644 --- a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp @@ -26,7 +26,8 @@ constexpr uint8_t MAX_MIN_FEATURE = 255; constexpr uint16_t MAX_INTERVAL_RANGE = static_cast<uint16_t>(predicate::MAX_INTERVAL); -int64_t adjustBound(int32_t arity, int64_t bound) { +int64_t +adjustBound(int32_t arity, int64_t bound) { int64_t adjusted = arity; int64_t value = bound; int64_t max = LLONG_MAX / arity; @@ -39,7 +40,8 @@ int64_t adjustBound(int32_t arity, int64_t bound) { return adjusted - 1; } -int64_t adjustLowerBound(int32_t arity, int64_t lower_bound) { +int64_t +adjustLowerBound(int32_t arity, int64_t lower_bound) { if (lower_bound == LLONG_MIN) { return lower_bound; } else if (lower_bound > 0) { @@ -49,7 +51,8 @@ int64_t adjustLowerBound(int32_t arity, int64_t lower_bound) { } } -int64_t adjustUpperBound(int32_t arity, int64_t upper_bound) { +int64_t +adjustUpperBound(int32_t arity, int64_t upper_bound) { if (upper_bound == LLONG_MAX) { return upper_bound; } else if (upper_bound < 0) { @@ -66,13 +69,11 @@ SimpleIndexConfig createSimpleIndexConfig(const search::attribute::Config &confi } // namespace -PredicateAttribute::PredicateAttribute(const vespalib::string &base_file_name, - const Config &config) +PredicateAttribute::PredicateAttribute(const vespalib::string &base_file_name, const Config &config) : NotImplementedAttribute(base_file_name, config), - _base_file_name(base_file_name), _limit_provider(*this), - _index(new PredicateIndex(getGenerationHandler(), getGenerationHolder(), - _limit_provider, createSimpleIndexConfig(config), config.predicateParams().arity())), + _index(std::make_unique<PredicateIndex>(getGenerationHolder(), _limit_provider, + createSimpleIndexConfig(config), config.predicateParams().arity())), _lower_bound(adjustLowerBound(config.predicateParams().arity(), config.predicateParams().lower_bound())), _upper_bound(adjustUpperBound(config.predicateParams().arity(), config.predicateParams().upper_bound())), _min_feature(config.getGrowStrategy().to_generic_strategy(), getGenerationHolder()), @@ -183,7 +184,8 @@ struct DummyObserver : SimpleIndexDeserializeObserver<> { } -bool PredicateAttribute::onLoad() +bool +PredicateAttribute::onLoad() { auto loaded_buffer = attribute::LoadUtils::loadDAT(*this); char *rawBuffer = const_cast<char *>(static_cast<const char *>(loaded_buffer->buffer())); @@ -202,12 +204,12 @@ bool PredicateAttribute::onLoad() DocId highest_doc_id; if (version == 0) { DocIdLimitFinderAndMinFeatureFiller<MinFeatureVector> observer(_min_feature, *_index); - _index = std::make_unique<PredicateIndex>(getGenerationHandler(), getGenerationHolder(), _limit_provider, + _index = std::make_unique<PredicateIndex>(getGenerationHolder(), _limit_provider, createSimpleIndexConfig(getConfig()), buffer, observer, 0); highest_doc_id = observer._highest_doc_id; } else { DummyObserver observer; - _index = std::make_unique<PredicateIndex>(getGenerationHandler(), getGenerationHolder(), _limit_provider, + _index = std::make_unique<PredicateIndex>(getGenerationHolder(), _limit_provider, createSimpleIndexConfig(getConfig()), buffer, observer, version); highest_doc_id = buffer.readInt32(); // Deserialize min feature vector @@ -240,6 +242,7 @@ PredicateAttribute::addDoc(DocId &doc_id) _min_feature.ensure_size(doc_id + 1); return true; } + uint32_t PredicateAttribute::clearDoc(DocId doc_id) { diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h index 6e3f0c4399f..4d7fd3c235b 100644 --- a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h @@ -80,7 +80,6 @@ public: void populateIfNeeded(); private: - vespalib::string _base_file_name; const AttributeVectorDocIdLimitProvider _limit_provider; std::unique_ptr<predicate::PredicateIndex> _index; int64_t _lower_bound; diff --git a/searchlib/src/vespa/searchlib/common/bitvectorcache.h b/searchlib/src/vespa/searchlib/common/bitvectorcache.h index c1415d9130f..f81fd0163d8 100644 --- a/searchlib/src/vespa/searchlib/common/bitvectorcache.h +++ b/searchlib/src/vespa/searchlib/common/bitvectorcache.h @@ -3,6 +3,7 @@ #include "condensedbitvectors.h" #include <vespa/vespalib/stllike/hash_set.h> +#include <vespa/vespalib/stllike/hash_map.h> #include <vespa/fastos/dynamiclibrary.h> #include <mutex> @@ -76,12 +77,12 @@ private: VESPA_DLL_LOCAL static void populate(Key2Index & newKeys, CondensedBitVector & chunk, const PopulateInterface & lookup); VESPA_DLL_LOCAL bool hasCostChanged(const std::lock_guard<std::mutex> &); - uint64_t _lookupCount; - bool _needPopulation; + uint64_t _lookupCount; + bool _needPopulation; mutable std::mutex _lock; - Key2Index _keys; - ChunkV _chunks; - GenerationHolder &_genHolder; + Key2Index _keys; + ChunkV _chunks; + GenerationHolder &_genHolder; }; } diff --git a/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp b/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp index d6efc4fddc2..50b971f499f 100644 --- a/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp +++ b/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp @@ -129,9 +129,7 @@ void throwIllegalKey(size_t numKeys, size_t key) } -CondensedBitVector::~CondensedBitVector() -{ -} +CondensedBitVector::~CondensedBitVector() = default; void CondensedBitVector::addKey(Key key) const @@ -144,7 +142,7 @@ CondensedBitVector::addKey(Key key) const CondensedBitVector::UP CondensedBitVector::create(size_t size, GenerationHolder &genHolder) { - return UP(new CondensedBitVectorT<uint32_t>(size, genHolder)); + return std::make_unique<CondensedBitVectorT<uint32_t>>(size, genHolder); } } diff --git a/searchlib/src/vespa/searchlib/common/condensedbitvectors.h b/searchlib/src/vespa/searchlib/common/condensedbitvectors.h index 4bda29894cc..02355a61e40 100644 --- a/searchlib/src/vespa/searchlib/common/condensedbitvectors.h +++ b/searchlib/src/vespa/searchlib/common/condensedbitvectors.h @@ -1,7 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include <vespa/vespalib/stllike/hash_map.h> #include <vespa/vespalib/util/generationholder.h> #include <vespa/vespalib/util/arrayref.h> #include <set> @@ -31,9 +30,6 @@ public: bool hasKey(Key key) const { return key < getKeyCapacity(); } void addKey(Key key) const; static CondensedBitVector::UP create(size_t size, vespalib::GenerationHolder &genHolder); -private: - typedef vespalib::hash_map<Key, uint32_t> Key2Index; - Key2Index _keys; }; } diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp index dcda13cac54..ad7d6fe3456 100644 --- a/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp +++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp @@ -7,8 +7,6 @@ #include <vespa/vespalib/btree/btreeroot.hpp> #include <vespa/vespalib/btree/btreenodeallocator.hpp> -//#include "predicate_index.h" - using vespalib::btree::BTreeNoLeafData; using vespalib::datastore::EntryRef; using vespalib::DataBuffer; @@ -38,10 +36,8 @@ DocumentFeaturesStore::DocumentFeaturesStore(uint32_t arity) namespace { template <typename KeyComp, typename WordIndex> -void deserializeWords(DataBuffer &buffer, - memoryindex::WordStore &word_store, - WordIndex &word_index, - vector<EntryRef> &word_refs) { +void +deserializeWords(DataBuffer &buffer, memoryindex::WordStore &word_store, WordIndex &word_index, vector<EntryRef> &word_refs) { uint32_t word_list_size = buffer.readInt32(); word_refs.reserve(word_list_size); vector<char> word; @@ -57,8 +53,8 @@ void deserializeWords(DataBuffer &buffer, } template <typename RangeFeaturesMap> -void deserializeRanges(DataBuffer &buffer, vector<EntryRef> &word_refs, - RangeFeaturesMap &ranges, size_t &num_ranges) { +void +deserializeRanges(DataBuffer &buffer, vector<EntryRef> &word_refs, RangeFeaturesMap &ranges, size_t &num_ranges) { typedef typename RangeFeaturesMap::mapped_type::value_type Range; uint32_t ranges_size = buffer.readInt32(); for (uint32_t i = 0; i < ranges_size; ++i) { @@ -78,8 +74,8 @@ void deserializeRanges(DataBuffer &buffer, vector<EntryRef> &word_refs, } template <typename DocumentFeaturesMap> -void deserializeDocs(DataBuffer &buffer, DocumentFeaturesMap &docs, - size_t &num_features) { +void +deserializeDocs(DataBuffer &buffer, DocumentFeaturesMap &docs, size_t &num_features) { uint32_t docs_size = buffer.readInt32(); for (uint32_t i = 0; i < docs_size; ++i) { uint32_t doc_id = buffer.readInt32(); @@ -111,7 +107,8 @@ DocumentFeaturesStore::~DocumentFeaturesStore() { _word_index.clear(); } -void DocumentFeaturesStore::insert(uint64_t featureId, uint32_t docId) { +void +DocumentFeaturesStore::insert(uint64_t featureId, uint32_t docId) { assert(docId != 0); if (_currDocId != docId) { auto docsItr = _docs.find(docId); @@ -125,8 +122,8 @@ void DocumentFeaturesStore::insert(uint64_t featureId, uint32_t docId) { ++_numFeatures; } -void DocumentFeaturesStore::insert(const PredicateTreeAnnotations &annotations, - uint32_t doc_id) { +void +DocumentFeaturesStore::insert(const PredicateTreeAnnotations &annotations, uint32_t doc_id) { assert(doc_id != 0); if (!annotations.features.empty()) { auto it = _docs.find(doc_id); @@ -172,15 +169,15 @@ DocumentFeaturesStore::get(uint32_t docId) const { if (rangeItr != _ranges.end()) { for (auto range : rangeItr->second) { const char *label = _word_store.getWord(range.label_ref); - PredicateRangeExpander::expandRange( - label, range.from, range.to, _arity, - std::inserter(features, features.end())); + PredicateRangeExpander::expandRange(label, range.from, range.to, _arity, + std::inserter(features, features.end())); } } return features; } -void DocumentFeaturesStore::remove(uint32_t doc_id) { +void +DocumentFeaturesStore::remove(uint32_t doc_id) { auto itr = _docs.find(doc_id); if (itr != _docs.end()) { _numFeatures = _numFeatures >= itr->second.size() ? @@ -198,7 +195,8 @@ void DocumentFeaturesStore::remove(uint32_t doc_id) { } } -vespalib::MemoryUsage DocumentFeaturesStore::getMemoryUsage() const { +vespalib::MemoryUsage +DocumentFeaturesStore::getMemoryUsage() const { vespalib::MemoryUsage usage; usage.incAllocatedBytes(_docs.getMemoryConsumption()); usage.incUsedBytes(_docs.getMemoryUsed()); @@ -219,9 +217,11 @@ vespalib::MemoryUsage DocumentFeaturesStore::getMemoryUsage() const { namespace { template <typename RangeFeaturesMap> -void findUsedWords(const RangeFeaturesMap &ranges, - unordered_map<uint32_t, uint32_t> &word_map, - vector<EntryRef> &word_list) { +void +findUsedWords(const RangeFeaturesMap &ranges, + unordered_map<uint32_t, uint32_t> &word_map, + vector<EntryRef> &word_list) +{ for (const auto &range_features_entry : ranges) { for (const auto &range : range_features_entry.second) { if (!word_map.count(range.label_ref.ref())) { @@ -232,8 +232,10 @@ void findUsedWords(const RangeFeaturesMap &ranges, } } -void serializeWords(DataBuffer &buffer, const vector<EntryRef> &word_list, - const memoryindex::WordStore &word_store) { +void +serializeWords(DataBuffer &buffer, const vector<EntryRef> &word_list, + const memoryindex::WordStore &word_store) +{ buffer.writeInt32(word_list.size()); for (const auto &word_ref : word_list) { const char *word = word_store.getWord(word_ref); @@ -244,8 +246,10 @@ void serializeWords(DataBuffer &buffer, const vector<EntryRef> &word_list, } template <typename RangeFeaturesMap> -void serializeRanges(DataBuffer &buffer, RangeFeaturesMap &ranges, - unordered_map<uint32_t, uint32_t> &word_map) { +void +serializeRanges(DataBuffer &buffer, RangeFeaturesMap &ranges, + unordered_map<uint32_t, uint32_t> &word_map) +{ buffer.writeInt32(ranges.size()); for (const auto &range_features_entry : ranges) { buffer.writeInt32(range_features_entry.first); // doc id @@ -259,7 +263,8 @@ void serializeRanges(DataBuffer &buffer, RangeFeaturesMap &ranges, } template <typename DocumentFeaturesMap> -void serializeDocs(DataBuffer &buffer, DocumentFeaturesMap &docs) { +void +serializeDocs(DataBuffer &buffer, DocumentFeaturesMap &docs) { buffer.writeInt32(docs.size()); for (const auto &doc_features_entry : docs) { buffer.writeInt32(doc_features_entry.first); // doc id @@ -271,7 +276,8 @@ void serializeDocs(DataBuffer &buffer, DocumentFeaturesMap &docs) { } } // namespace -void DocumentFeaturesStore::serialize(DataBuffer &buffer) const { +void +DocumentFeaturesStore::serialize(DataBuffer &buffer) const { vector<EntryRef> word_list; unordered_map<uint32_t, uint32_t> word_map; diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.h b/searchlib/src/vespa/searchlib/predicate/document_features_store.h index a45c7ba043a..442249d619a 100644 --- a/searchlib/src/vespa/searchlib/predicate/document_features_store.h +++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.h @@ -54,14 +54,14 @@ class DocumentFeaturesStore { vespalib::btree::NoAggregated, const KeyComp &> WordIndex; DocumentFeaturesMap _docs; - RangeFeaturesMap _ranges; - WordStore _word_store; - WordIndex _word_index; - uint32_t _currDocId; - FeatureVector *_currFeatures; - size_t _numFeatures; - size_t _numRanges; - uint32_t _arity; + RangeFeaturesMap _ranges; + WordStore _word_store; + WordIndex _word_index; + uint32_t _currDocId; + FeatureVector *_currFeatures; + size_t _numFeatures; + size_t _numRanges; + uint32_t _arity; void setCurrent(uint32_t docId, FeatureVector *features); diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h index 0ef2d81f094..9d2e90af7a5 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h @@ -53,7 +53,8 @@ namespace { } // namespace template<typename Iterator> -bool PredicateBoundsPostingList<Iterator>::next(uint32_t doc_id) { +bool +PredicateBoundsPostingList<Iterator>::next(uint32_t doc_id) { if (_iterator.valid() && _iterator.getKey() <= doc_id) { _iterator.linearSeek(doc_id + 1); } @@ -74,7 +75,8 @@ bool PredicateBoundsPostingList<Iterator>::next(uint32_t doc_id) { } template<typename Iterator> -bool PredicateBoundsPostingList<Iterator>::nextInterval() { +bool +PredicateBoundsPostingList<Iterator>::nextInterval() { uint32_t next_bounds; do { if (__builtin_expect(_interval_count == 1, true)) { diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp index e9b1a6bd685..6cbe11e2240 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp @@ -17,16 +17,19 @@ using std::vector; namespace search::predicate { template <> -void PredicateIndex::addPosting<Interval>(uint64_t feature, uint32_t doc_id, EntryRef ref) { +void +PredicateIndex::addPosting<Interval>(uint64_t feature, uint32_t doc_id, EntryRef ref) { _interval_index.addPosting(feature, doc_id, ref); } template <> -void PredicateIndex::addPosting<IntervalWithBounds>(uint64_t feature, uint32_t doc_id, EntryRef ref) { +void +PredicateIndex::addPosting<IntervalWithBounds>(uint64_t feature, uint32_t doc_id, EntryRef ref) { _bounds_index.addPosting(feature, doc_id, ref); } template <typename IntervalT> -void PredicateIndex::indexDocumentFeatures(uint32_t doc_id, const PredicateIndex::FeatureMap<IntervalT> &interval_map) { +void +PredicateIndex::indexDocumentFeatures(uint32_t doc_id, const PredicateIndex::FeatureMap<IntervalT> &interval_map) { if (interval_map.empty()) { return; } @@ -80,11 +83,10 @@ public: } // namespace -PredicateIndex::PredicateIndex(GenerationHandler &generation_handler, GenerationHolder &genHolder, +PredicateIndex::PredicateIndex(GenerationHolder &genHolder, const DocIdLimitProvider &limit_provider, const SimpleIndexConfig &simple_index_config, uint32_t arity) : _arity(arity), - _generation_handler(generation_handler), _limit_provider(limit_provider), _interval_index(genHolder, limit_provider, simple_index_config), _bounds_index(genHolder, limit_provider, simple_index_config), @@ -95,12 +97,11 @@ PredicateIndex::PredicateIndex(GenerationHandler &generation_handler, Generation { } -PredicateIndex::PredicateIndex(GenerationHandler &generation_handler, GenerationHolder &genHolder, +PredicateIndex::PredicateIndex(GenerationHolder &genHolder, const DocIdLimitProvider &limit_provider, const SimpleIndexConfig &simple_index_config, DataBuffer &buffer, SimpleIndexDeserializeObserver<> & observer, uint32_t version) : _arity(0), - _generation_handler(generation_handler), _limit_provider(limit_provider), _interval_index(genHolder, limit_provider, simple_index_config), _bounds_index(genHolder, limit_provider, simple_index_config), @@ -121,15 +122,15 @@ PredicateIndex::PredicateIndex(GenerationHandler &generation_handler, Generation _zero_constraint_docs.assign(builder); IntervalDeserializer<Interval> interval_deserializer(_interval_store); _interval_index.deserialize(buffer, interval_deserializer, observer, version); - IntervalDeserializer<IntervalWithBounds> - bounds_deserializer(_interval_store); + IntervalDeserializer<IntervalWithBounds> bounds_deserializer(_interval_store); _bounds_index.deserialize(buffer, bounds_deserializer, observer, version); commit(); } PredicateIndex::~PredicateIndex() = default; -void PredicateIndex::serialize(DataBuffer &buffer) const { +void +PredicateIndex::serialize(DataBuffer &buffer) const { _features_store.serialize(buffer); buffer.writeInt16(_arity); buffer.writeInt32(_zero_constraint_docs.size()); @@ -142,25 +143,29 @@ void PredicateIndex::serialize(DataBuffer &buffer) const { _bounds_index.serialize(buffer, bounds_serializer); } -void PredicateIndex::onDeserializationCompleted() { +void +PredicateIndex::onDeserializationCompleted() { _interval_index.promoteOverThresholdVectors(); _bounds_index.promoteOverThresholdVectors(); } -void PredicateIndex::indexDocument(uint32_t doc_id, const PredicateTreeAnnotations &annotations) { +void +PredicateIndex::indexDocument(uint32_t doc_id, const PredicateTreeAnnotations &annotations) { indexDocumentFeatures(doc_id, annotations.interval_map); indexDocumentFeatures(doc_id, annotations.bounds_map); _features_store.insert(annotations, doc_id); } -void PredicateIndex::indexEmptyDocument(uint32_t doc_id) +void +PredicateIndex::indexEmptyDocument(uint32_t doc_id) { _zero_constraint_docs.insert(doc_id, vespalib::btree::BTreeNoLeafData::_instance); } namespace { -void removeFromIndex( - uint64_t feature, uint32_t doc_id, SimpleIndex<vespalib::datastore::EntryRef> &index, PredicateIntervalStore &interval_store) +void +removeFromIndex(uint64_t feature, uint32_t doc_id, SimpleIndex<vespalib::datastore::EntryRef> &index, + PredicateIntervalStore &interval_store) { auto result = index.removeFromPostingList(feature, doc_id); if (result.second) { // Posting was removed @@ -189,7 +194,8 @@ private: } // namespace -void PredicateIndex::removeDocument(uint32_t doc_id) { +void +PredicateIndex::removeDocument(uint32_t doc_id) { _zero_constraint_docs.remove(doc_id); auto features = _features_store.get(doc_id); @@ -203,27 +209,31 @@ void PredicateIndex::removeDocument(uint32_t doc_id) { _features_store.remove(doc_id); } -void PredicateIndex::commit() { +void +PredicateIndex::commit() { _interval_index.commit(); _bounds_index.commit(); _zero_constraint_docs.getAllocator().freeze(); } -void PredicateIndex::trimHoldLists(generation_t used_generation) { +void +PredicateIndex::trimHoldLists(generation_t used_generation) { _interval_index.trimHoldLists(used_generation); _bounds_index.trimHoldLists(used_generation); _interval_store.trimHoldLists(used_generation); _zero_constraint_docs.getAllocator().trimHoldLists(used_generation); } -void PredicateIndex::transferHoldLists(generation_t generation) { +void +PredicateIndex::transferHoldLists(generation_t generation) { _interval_index.transferHoldLists(generation); _bounds_index.transferHoldLists(generation); _interval_store.transferHoldLists(generation); _zero_constraint_docs.getAllocator().transferHoldLists(generation); } -vespalib::MemoryUsage PredicateIndex::getMemoryUsage() const { +vespalib::MemoryUsage +PredicateIndex::getMemoryUsage() const { // TODO Include bit vector cache memory usage vespalib::MemoryUsage combined; combined.merge(_interval_index.getMemoryUsage()); diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.h b/searchlib/src/vespa/searchlib/predicate/predicate_index.h index d2ed70694a2..f4c89a2b369 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_index.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.h @@ -38,16 +38,15 @@ public: using BTreeIterator = SimpleIndex<vespalib::datastore::EntryRef>::BTreeIterator; using VectorIterator = SimpleIndex<vespalib::datastore::EntryRef>::VectorIterator; private: - uint32_t _arity; - GenerationHandler &_generation_handler; + uint32_t _arity; const DocIdLimitProvider &_limit_provider; - IntervalIndex _interval_index; - BoundsIndex _bounds_index; - PredicateIntervalStore _interval_store; - BTreeSet _zero_constraint_docs; + IntervalIndex _interval_index; + BoundsIndex _bounds_index; + PredicateIntervalStore _interval_store; + BTreeSet _zero_constraint_docs; - DocumentFeaturesStore _features_store; - mutable BitVectorCache _cache; + DocumentFeaturesStore _features_store; + mutable BitVectorCache _cache; template <typename IntervalT> void addPosting(uint64_t feature, uint32_t doc_id, vespalib::datastore::EntryRef ref); @@ -58,12 +57,12 @@ private: PopulateInterface::Iterator::UP lookup(uint64_t key) const override; public: - PredicateIndex(GenerationHandler &generation_handler, GenerationHolder &genHolder, + PredicateIndex(GenerationHolder &genHolder, const DocIdLimitProvider &limit_provider, const SimpleIndexConfig &simple_index_config, uint32_t arity); // deserializes PredicateIndex from buffer. // The observer can be used to gain some insight into what has been added to the index.. - PredicateIndex(GenerationHandler &generation_handler, GenerationHolder &genHolder, + PredicateIndex(GenerationHolder &genHolder, const DocIdLimitProvider &limit_provider, const SimpleIndexConfig &simple_index_config, vespalib::DataBuffer &buffer, SimpleIndexDeserializeObserver<> & observer, uint32_t version); diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp index a92c16de462..d98e8a151dc 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp +++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp @@ -5,14 +5,16 @@ namespace search::predicate { -std::ostream &operator<<(std::ostream &out, const Interval &i) { +std::ostream & +operator<<(std::ostream &out, const Interval &i) { std::ios_base::fmtflags flags = out.flags(); out << "0x" << std::hex << i.interval; out.flags(flags); return out; } -std::ostream &operator<<(std::ostream &out, const IntervalWithBounds &i) { +std::ostream & +operator<<(std::ostream &out, const IntervalWithBounds &i) { std::ios_base::fmtflags flags = out.flags(); out << "0x" << std::hex << i.interval << ", 0x" << i.bounds; out.flags(flags); diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h index f93d99b550b..33e15b2be33 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h @@ -14,10 +14,10 @@ namespace search::predicate { template<typename Iterator> class PredicateIntervalPostingList : public PredicatePostingList { const PredicateIntervalStore &_interval_store; - Iterator _iterator; - const Interval *_current_interval; - uint32_t _interval_count; - Interval _single_buf; + Iterator _iterator; + const Interval *_current_interval; + uint32_t _interval_count; + Interval _single_buf; public: PredicateIntervalPostingList(const PredicateIntervalStore &interval_store, Iterator it); @@ -46,7 +46,8 @@ PredicateIntervalPostingList<Iterator>::PredicateIntervalPostingList( } template<typename Iterator> -bool PredicateIntervalPostingList<Iterator>::next(uint32_t doc_id) { +bool +PredicateIntervalPostingList<Iterator>::next(uint32_t doc_id) { if (!_iterator.valid()) { return false; } diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp index 28c82cb7a97..13be0f0127b 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp +++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp @@ -21,7 +21,8 @@ PredicateIntervalStore::PredicateIntervalStore() : _store(), _size1Type(1, 1024u, RefType::offsetSize()), _store_adapter(_store), - _ref_cache(_store_adapter) { + _ref_cache(_store_adapter) +{ // This order determines type ids. _store.addType(&_size1Type); @@ -46,7 +47,8 @@ PredicateIntervalStore::~PredicateIntervalStore() { // anyway. // template <typename IntervalT> -EntryRef PredicateIntervalStore::insert(const vector<IntervalT> &intervals) { +EntryRef +PredicateIntervalStore::insert(const vector<IntervalT> &intervals) { const uint32_t size = entrySize<IntervalT>() * intervals.size(); if (size == 0) { return EntryRef(); @@ -81,7 +83,8 @@ EntryRef PredicateIntervalStore::insert(const vector<Interval> &); template EntryRef PredicateIntervalStore::insert(const vector<IntervalWithBounds> &); -void PredicateIntervalStore::remove(EntryRef ref) { +void +PredicateIntervalStore::remove(EntryRef ref) { if (ref.valid()) { uint32_t buffer_id = RefType(ref).bufferId(); if (buffer_id == 0) { // single interval optimization. @@ -96,11 +99,13 @@ void PredicateIntervalStore::remove(EntryRef ref) { } } -void PredicateIntervalStore::trimHoldLists(generation_t used_generation) { +void +PredicateIntervalStore::trimHoldLists(generation_t used_generation) { _store.trimHoldLists(used_generation); } -void PredicateIntervalStore::transferHoldLists(generation_t generation) { +void +PredicateIntervalStore::transferHoldLists(generation_t generation) { _store.transferHoldLists(generation); } diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h index e4573866eb8..5f55a2d3d5f 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h @@ -34,7 +34,7 @@ class PredicateIntervalStore { } }; DataStoreAdapter _store_adapter; - RefCacheType _ref_cache; + RefCacheType _ref_cache; // Return type for private allocation functions template <typename T> @@ -89,7 +89,8 @@ public: * single interval optimization. */ template <typename IntervalT> - const IntervalT *get(vespalib::datastore::EntryRef btree_ref, + const IntervalT + *get(vespalib::datastore::EntryRef btree_ref, uint32_t &size_out, IntervalT *single_buf) const { diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h index 93e671f603f..50024913dcb 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h @@ -16,9 +16,9 @@ class PredicatePostingList { protected: PredicatePostingList() - : _docId(0), - _subquery(UINT64_MAX) { - } + : _docId(0), + _subquery(UINT64_MAX) + { } void setDocId(uint32_t docId) { _docId = docId; } diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h index 965c4ad3042..0268d2bdb0c 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h @@ -41,7 +41,8 @@ PredicateZstarCompressedPostingList<Iterator>::PredicateZstarCompressedPostingLi } template<typename Iterator> -bool PredicateZstarCompressedPostingList<Iterator>::next(uint32_t doc_id) { +bool +PredicateZstarCompressedPostingList<Iterator>::next(uint32_t doc_id) { if (_iterator.valid() && _iterator.getKey() <= doc_id) { _iterator.linearSeek(doc_id + 1); } @@ -57,7 +58,8 @@ bool PredicateZstarCompressedPostingList<Iterator>::next(uint32_t doc_id) { } template<typename Iterator> -bool PredicateZstarCompressedPostingList<Iterator>::nextInterval() { +bool +PredicateZstarCompressedPostingList<Iterator>::nextInterval() { uint32_t next_interval = UINT32_MAX; if (_interval_count > 1) { next_interval = _current_interval[1].interval; diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.cpp b/searchlib/src/vespa/searchlib/predicate/simple_index.cpp index 1b0db8f52d4..b0ef11e1c25 100644 --- a/searchlib/src/vespa/searchlib/predicate/simple_index.cpp +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.cpp @@ -6,14 +6,12 @@ #include <vespa/vespalib/btree/btreeiterator.hpp> #include <vespa/vespalib/btree/btreestore.hpp> #include <vespa/vespalib/btree/btreenodeallocator.hpp> -#include <vespa/vespalib/util/array.hpp> #include <vespa/vespalib/datastore/buffer_type.hpp> #include <vespa/log/log.h> LOG_SETUP(".searchlib.predicate.simple_index"); -namespace search::predicate { - namespace simpleindex { +namespace search::predicate::simpleindex { bool log_enabled() { return LOG_WOULD_LOG(debug); @@ -25,6 +23,8 @@ void log_debug(vespalib::string &str) { } // namespace simpleindex +namespace search::predicate { + template class SimpleIndex<vespalib::datastore::EntryRef>; } diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.h b/searchlib/src/vespa/searchlib/predicate/simple_index.h index cfc288770c8..1398bb0817c 100644 --- a/searchlib/src/vespa/searchlib/predicate/simple_index.h +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.h @@ -141,12 +141,12 @@ private: template <typename T> using optional = std::optional<T>; - Dictionary _dictionary; - BTreeStore _btree_posting_lists; - VectorStore _vector_posting_lists; - GenerationHolder &_generation_holder; - uint32_t _insert_remove_counter = 0; - const SimpleIndexConfig _config; + Dictionary _dictionary; + BTreeStore _btree_posting_lists; + VectorStore _vector_posting_lists; + GenerationHolder &_generation_holder; + uint32_t _insert_remove_counter = 0; + const SimpleIndexConfig _config; const DocIdLimitProvider &_limit_provider; void insertIntoPosting(vespalib::datastore::EntryRef &ref, Key key, DocId doc_id, const Posting &posting); @@ -164,7 +164,7 @@ private: bool shouldRemoveVectorPosting(size_t size, double ratio) const; size_t getVectorPostingSize(const PostingVector &vector) const { return std::min(vector.size(), - static_cast<size_t>(_limit_provider.getCommittedDocIdLimit())); + static_cast<size_t>(_limit_provider.getCommittedDocIdLimit())); } public: @@ -219,8 +219,8 @@ public: template<typename Posting, typename Key, typename DocId> template<typename FunctionType> -void SimpleIndex<Posting, Key, DocId>::foreach_frozen_key( - vespalib::datastore::EntryRef ref, Key key, FunctionType func) const { +void +SimpleIndex<Posting, Key, DocId>::foreach_frozen_key(vespalib::datastore::EntryRef ref, Key key, FunctionType func) const { auto it = _vector_posting_lists.getFrozenView().find(key); double ratio = getDocumentRatio(getDocumentCount(ref), _limit_provider.getDocIdLimit()); if (it.valid() && ratio > _config.foreach_vector_threshold) { diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp index b49218f1ba6..ada77b9fe38 100644 --- a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp @@ -13,8 +13,8 @@ namespace simpleindex { } template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::insertIntoPosting( - vespalib::datastore::EntryRef &ref, Key key, DocId doc_id, const Posting &posting) { +void +SimpleIndex<Posting, Key, DocId>::insertIntoPosting(vespalib::datastore::EntryRef &ref, Key key, DocId doc_id, const Posting &posting) { bool ok = _btree_posting_lists.insert(ref, doc_id, posting); if (!ok) { _btree_posting_lists.remove(ref, doc_id); @@ -26,8 +26,8 @@ void SimpleIndex<Posting, Key, DocId>::insertIntoPosting( } template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::insertIntoVectorPosting( - vespalib::datastore::EntryRef ref, Key key, DocId doc_id, const Posting &posting) { +void +SimpleIndex<Posting, Key, DocId>::insertIntoVectorPosting(vespalib::datastore::EntryRef ref, Key key, DocId doc_id, const Posting &posting) { assert(doc_id < _limit_provider.getDocIdLimit()); auto it = _vector_posting_lists.find(key); if (it.valid()) { @@ -69,9 +69,8 @@ SimpleIndex<Posting, Key, DocId>::~SimpleIndex() { } template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::serialize( - vespalib::DataBuffer &buffer, - const PostingSerializer<Posting> &serializer) const { +void +SimpleIndex<Posting, Key, DocId>::serialize(vespalib::DataBuffer &buffer, const PostingSerializer<Posting> &serializer) const { assert(sizeof(Key) <= sizeof(uint64_t)); assert(sizeof(DocId) <= sizeof(uint32_t)); buffer.writeInt32(_dictionary.size()); @@ -90,10 +89,10 @@ void SimpleIndex<Posting, Key, DocId>::serialize( } template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::deserialize( - vespalib::DataBuffer &buffer, - PostingDeserializer<Posting> &deserializer, - SimpleIndexDeserializeObserver<Key, DocId> &observer, uint32_t version) { +void +SimpleIndex<Posting, Key, DocId>::deserialize(vespalib::DataBuffer &buffer, PostingDeserializer<Posting> &deserializer, + SimpleIndexDeserializeObserver<Key, DocId> &observer, uint32_t version) +{ typename Dictionary::Builder builder(_dictionary.getAllocator()); uint32_t size = buffer.readInt32(); std::vector<vespalib::btree::BTreeKeyData<DocId, Posting>> postings; @@ -128,8 +127,8 @@ void SimpleIndex<Posting, Key, DocId>::deserialize( } template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::addPosting(Key key, DocId doc_id, - const Posting &posting) { +void +SimpleIndex<Posting, Key, DocId>::addPosting(Key key, DocId doc_id, const Posting &posting) { auto iter = _dictionary.find(key); vespalib::datastore::EntryRef ref; if (iter.valid()) { @@ -178,8 +177,8 @@ SimpleIndex<Posting, Key, DocId>::removeFromPostingList(Key key, DocId doc_id) { } template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::removeFromVectorPostingList( - vespalib::datastore::EntryRef ref, Key key, DocId doc_id) { +void +SimpleIndex<Posting, Key, DocId>::removeFromVectorPostingList(vespalib::datastore::EntryRef ref, Key key, DocId doc_id) { auto it = _vector_posting_lists.find(key); if (it.valid()) { if (!removeVectorIfBelowThreshold(ref, it)) { @@ -189,7 +188,8 @@ void SimpleIndex<Posting, Key, DocId>::removeFromVectorPostingList( }; template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::pruneBelowThresholdVectors() { +void +SimpleIndex<Posting, Key, DocId>::pruneBelowThresholdVectors() { // Check if it is time to prune any vector postings if (++_insert_remove_counter % _config.vector_prune_frequency > 0) return; @@ -204,7 +204,8 @@ void SimpleIndex<Posting, Key, DocId>::pruneBelowThresholdVectors() { }; template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::promoteOverThresholdVectors() { +void +SimpleIndex<Posting, Key, DocId>::promoteOverThresholdVectors() { for (auto it = _dictionary.begin(); it.valid(); ++it) { Key key = it.getKey(); if (!_vector_posting_lists.find(key).valid()) { @@ -214,8 +215,8 @@ void SimpleIndex<Posting, Key, DocId>::promoteOverThresholdVectors() { } template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::logVector( - const char *action, Key key, size_t document_count, double ratio, size_t vector_length) const { +void +SimpleIndex<Posting, Key, DocId>::logVector(const char *action, Key key, size_t document_count, double ratio, size_t vector_length) const { if (!simpleindex::log_enabled()) return; auto msg = vespalib::make_string( "%s vector for key '%016" PRIx64 "' with length %zu. Contains %zu documents " @@ -227,7 +228,8 @@ void SimpleIndex<Posting, Key, DocId>::logVector( } template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::createVectorIfOverThreshold(vespalib::datastore::EntryRef ref, Key key) { +void +SimpleIndex<Posting, Key, DocId>::createVectorIfOverThreshold(vespalib::datastore::EntryRef ref, Key key) { uint32_t doc_id_limit = _limit_provider.getDocIdLimit(); size_t size = getDocumentCount(ref); double ratio = getDocumentRatio(size, doc_id_limit); @@ -242,8 +244,8 @@ void SimpleIndex<Posting, Key, DocId>::createVectorIfOverThreshold(vespalib::dat } template <typename Posting, typename Key, typename DocId> -bool SimpleIndex<Posting, Key, DocId>::removeVectorIfBelowThreshold( - vespalib::datastore::EntryRef ref, typename VectorStore::Iterator &it) { +bool +SimpleIndex<Posting, Key, DocId>::removeVectorIfBelowThreshold(vespalib::datastore::EntryRef ref, typename VectorStore::Iterator &it) { size_t size = getDocumentCount(ref); double ratio = getDocumentRatio(size, _limit_provider.getDocIdLimit()); if (shouldRemoveVectorPosting(size, ratio)) { @@ -257,36 +259,41 @@ bool SimpleIndex<Posting, Key, DocId>::removeVectorIfBelowThreshold( } template <typename Posting, typename Key, typename DocId> -double SimpleIndex<Posting, Key, DocId>::getDocumentRatio(size_t document_count, - uint32_t doc_id_limit) const { +double +SimpleIndex<Posting, Key, DocId>::getDocumentRatio(size_t document_count, uint32_t doc_id_limit) const { assert(doc_id_limit > 1); return document_count / static_cast<double>(doc_id_limit - 1); }; template <typename Posting, typename Key, typename DocId> -size_t SimpleIndex<Posting, Key, DocId>::getDocumentCount(vespalib::datastore::EntryRef ref) const { +size_t +SimpleIndex<Posting, Key, DocId>::getDocumentCount(vespalib::datastore::EntryRef ref) const { return _btree_posting_lists.size(ref); }; template <typename Posting, typename Key, typename DocId> -bool SimpleIndex<Posting, Key, DocId>::shouldRemoveVectorPosting(size_t size, double ratio) const { +bool +SimpleIndex<Posting, Key, DocId>::shouldRemoveVectorPosting(size_t size, double ratio) const { return size < _config.lower_vector_size_threshold || ratio < _config.lower_docid_freq_threshold; }; template <typename Posting, typename Key, typename DocId> -bool SimpleIndex<Posting, Key, DocId>::shouldCreateVectorPosting(size_t size, double ratio) const { +bool +SimpleIndex<Posting, Key, DocId>::shouldCreateVectorPosting(size_t size, double ratio) const { return size >= _config.upper_vector_size_threshold && ratio >= _config.upper_docid_freq_threshold; }; template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::commit() { +void +SimpleIndex<Posting, Key, DocId>::commit() { _dictionary.getAllocator().freeze(); _btree_posting_lists.freeze(); _vector_posting_lists.getAllocator().freeze(); } template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::trimHoldLists(generation_t used_generation) { +void +SimpleIndex<Posting, Key, DocId>::trimHoldLists(generation_t used_generation) { _btree_posting_lists.trimHoldLists(used_generation); _dictionary.getAllocator().trimHoldLists(used_generation); _vector_posting_lists.getAllocator().trimHoldLists(used_generation); @@ -294,14 +301,16 @@ void SimpleIndex<Posting, Key, DocId>::trimHoldLists(generation_t used_generatio } template <typename Posting, typename Key, typename DocId> -void SimpleIndex<Posting, Key, DocId>::transferHoldLists(generation_t generation) { +void +SimpleIndex<Posting, Key, DocId>::transferHoldLists(generation_t generation) { _dictionary.getAllocator().transferHoldLists(generation); _btree_posting_lists.transferHoldLists(generation); _vector_posting_lists.getAllocator().transferHoldLists(generation); } template <typename Posting, typename Key, typename DocId> -vespalib::MemoryUsage SimpleIndex<Posting, Key, DocId>::getMemoryUsage() const { +vespalib::MemoryUsage +SimpleIndex<Posting, Key, DocId>::getMemoryUsage() const { vespalib::MemoryUsage combined; combined.merge(_dictionary.getMemoryUsage()); combined.merge(_btree_posting_lists.getMemoryUsage()); diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp index 66acc2f0836..5f11f79aa4b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp @@ -15,6 +15,7 @@ #include <vespa/vespalib/btree/btreenodeallocator.hpp> #include <vespa/vespalib/util/memory_allocator.h> #include <algorithm> + #include <vespa/log/log.h> LOG_SETUP(".searchlib.predicate.predicate_blueprint"); #include <vespa/searchlib/predicate/predicate_range_term_expander.h> @@ -73,11 +74,11 @@ struct MyRangeHandler { }; template <typename Entry> -void pushRangeDictionaryEntries( - const Entry &entry, - const PredicateIndex &index, - vector<IntervalEntry> &interval_entries, - vector<BoundsEntry> &bounds_entries) { +void +pushRangeDictionaryEntries(const Entry &entry, const PredicateIndex &index, + vector<IntervalEntry> &interval_entries, + vector<BoundsEntry> &bounds_entries) +{ PredicateRangeTermExpander expander(index.getArity()); MyRangeHandler handler{index.getIntervalIndex(), index.getBoundsIndex(), interval_entries, bounds_entries, entry.getSubQueryBitmap()}; @@ -202,16 +203,17 @@ PredicateBlueprint::PredicateBlueprint(const FieldSpecBase &field, }); - if (zero_constraints_docs.size() == 0 && + if ((zero_constraints_docs.size() == 0) && _interval_dict_entries.empty() && _bounds_dict_entries.empty() && - !_zstar_dict_entry.valid()) { + !_zstar_dict_entry.valid()) + { setEstimate(HitEstimate(0, true)); } else { setEstimate(HitEstimate(static_cast<uint32_t>(zero_constraints_docs.size()), false)); } } -PredicateBlueprint::~PredicateBlueprint() {} +PredicateBlueprint::~PredicateBlueprint() = default; namespace { @@ -277,29 +279,30 @@ PredicateBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, PredicateAttribute::MinFeatureHandle mfh = attribute.getMinFeatureVector(); auto interval_range_vector = attribute.getIntervalRangeVector(); auto max_interval_range = attribute.getMaxIntervalRange(); - return SearchIterator::UP(new PredicateSearch(mfh.first, interval_range_vector, max_interval_range, _kV, - createPostingLists(), tfmda)); + return std::make_unique<PredicateSearch>(mfh.first, interval_range_vector, max_interval_range, _kV, + createPostingLists(), tfmda); } namespace { - template<typename IteratorEntry, typename PostingListFactory> - void createPredicatePostingLists(const std::vector<IteratorEntry> &iterator_entries, - std::vector<PredicatePostingList::UP> &posting_lists, - PostingListFactory posting_list_factory) - { - for (const auto &entry : iterator_entries) { - if (entry.iterator.valid()) { - auto posting_list = posting_list_factory(entry); - posting_list->setSubquery(entry.entry.subquery); - posting_lists.emplace_back(PredicatePostingList::UP(posting_list)); - } +template<typename IteratorEntry, typename PostingListFactory> +void createPredicatePostingLists(const std::vector<IteratorEntry> &iterator_entries, + std::vector<PredicatePostingList::UP> &posting_lists, + PostingListFactory posting_list_factory) +{ + for (const auto &entry : iterator_entries) { + if (entry.iterator.valid()) { + auto posting_list = posting_list_factory(entry); + posting_list->setSubquery(entry.entry.subquery); + posting_lists.emplace_back(PredicatePostingList::UP(posting_list)); } } +} } -std::vector<PredicatePostingList::UP> PredicateBlueprint::createPostingLists() const { +std::vector<PredicatePostingList::UP> +PredicateBlueprint::createPostingLists() const { size_t total_size = _interval_btree_iterators.size() + _interval_vector_iterators.size() + _bounds_btree_iterators.size() + _bounds_vector_iterators.size() + 2; std::vector<PredicatePostingList::UP> posting_lists; @@ -333,17 +336,15 @@ std::vector<PredicatePostingList::UP> PredicateBlueprint::createPostingLists() c }); if (_zstar_vector_iterator && _zstar_vector_iterator->valid()) { - auto posting_list = PredicatePostingList::UP( - new PredicateZstarCompressedPostingList<VectorIterator>(interval_store, *_zstar_vector_iterator)); + auto posting_list = std::make_unique<PredicateZstarCompressedPostingList<VectorIterator>>(interval_store, *_zstar_vector_iterator); posting_lists.emplace_back(std::move(posting_list)); } else if (_zstar_btree_iterator && _zstar_btree_iterator->valid()) { - auto posting_list = PredicatePostingList::UP( - new PredicateZstarCompressedPostingList<BTreeIterator>(interval_store, *_zstar_btree_iterator)); + auto posting_list = std::make_unique<PredicateZstarCompressedPostingList<BTreeIterator>>(interval_store, *_zstar_btree_iterator); posting_lists.emplace_back(std::move(posting_list)); } auto iterator = _index.getZeroConstraintDocs().begin(); if (iterator.valid()) { - auto posting_list = PredicatePostingList::UP(new PredicateZeroConstraintPostingList(iterator)); + auto posting_list = std::make_unique<PredicateZeroConstraintPostingList>(iterator); posting_lists.emplace_back(std::move(posting_list)); } return posting_lists; |