diff options
Diffstat (limited to 'searchlib/src/vespa/searchlib/attribute')
7 files changed, 69 insertions, 23 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 5b17b491a20..635851f9f1d 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -11,7 +11,6 @@ #include "in_term_search.h" #include "multi_term_or_filter_search.h" #include "predicate_attribute.h" -#include <vespa/eval/eval/value.h> #include <vespa/searchcommon/attribute/config.h> #include <vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h> #include <vespa/searchlib/common/location.h> @@ -94,6 +93,7 @@ using search::queryeval::StrictHeapOrSearch; using search::queryeval::WeightedSetTermBlueprint; using search::queryeval::flow::btree_cost; using search::queryeval::flow::btree_strict_cost; +using search::queryeval::flow::estimate_when_unknown; using search::queryeval::flow::get_num_indirections; using search::queryeval::flow::lookup_cost; using search::queryeval::flow::lookup_strict_cost; @@ -150,10 +150,9 @@ public: search::queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override { if (_hit_estimate.is_unknown()) { // E.g. attributes without fast-search are not able to provide a hit estimate. - // In this case we just assume matching half of the document corpus. // In addition, matching is lookup based, and we are not able to skip documents efficiently when being strict. size_t indirections = get_num_indirections(_attr.getBasicType(), _attr.getCollectionType()); - return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)}; + return {estimate_when_unknown(), lookup_cost(indirections), lookup_strict_cost(indirections)}; } else { double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit); return {rel_est, btree_cost(rel_est), btree_strict_cost(rel_est)}; @@ -443,7 +442,8 @@ private: class DirectWandBlueprint : public queryeval::ComplexLeafBlueprint { private: - mutable queryeval::SharedWeakAndPriorityQueue _scores; + using WeakAndPriorityQueue = queryeval::WeakAndPriorityQueue; + std::unique_ptr<WeakAndPriorityQueue> _scores; const queryeval::wand::score_t _scoreThreshold; double _thresholdBoostFactor; const uint32_t _scoresAdjustFrequency; @@ -452,14 +452,16 @@ private: const IDocidWithWeightPostingStore &_attr; vespalib::datastore::EntryRef _dictionary_snapshot; + public: DirectWandBlueprint(const FieldSpec &field, const IDocidWithWeightPostingStore &attr, uint32_t scoresToTrack, - queryeval::wand::score_t scoreThreshold, double thresholdBoostFactor, size_t size_hint) + queryeval::wand::score_t scoreThreshold, double thresholdBoostFactor, size_t size_hint, + bool thread_safe) : ComplexLeafBlueprint(field), - _scores(scoresToTrack), + _scores(WeakAndPriorityQueue::createHeap(scoresToTrack, thread_safe)), _scoreThreshold(scoreThreshold), _thresholdBoostFactor(thresholdBoostFactor), - _scoresAdjustFrequency(queryeval::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY), + _scoresAdjustFrequency(queryeval::wand::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY), _weights(), _terms(), _attr(attr), @@ -496,7 +498,7 @@ public: using OrFlow = search::queryeval::OrFlow; using MyAdapter = attribute::DirectPostingStoreFlowStatsAdapter; double child_est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms); - double my_est = abs_to_rel_est(_scores.getScoresToTrack(), docid_limit); + double my_est = abs_to_rel_est(_scores->getScoresToTrack(), docid_limit); double est = (child_est + my_est) / 2.0; return {est, OrFlow::cost_of(MyAdapter(docid_limit), _terms, false), OrFlow::cost_of(MyAdapter(docid_limit), _terms, true) + queryeval::flow::heap_cost(est, _terms.size())}; @@ -508,9 +510,8 @@ public: return std::make_unique<queryeval::EmptySearch>(); } return queryeval::ParallelWeakAndSearch::create(*tfmda[0], - queryeval::ParallelWeakAndSearch::MatchParams(_scores, _scoreThreshold, - _thresholdBoostFactor, _scoresAdjustFrequency) - .setDocIdLimit(get_docid_limit()), + queryeval::ParallelWeakAndSearch::MatchParams(*_scores, _scoreThreshold, _thresholdBoostFactor, + _scoresAdjustFrequency, get_docid_limit()), _weights, _terms, _attr, strict()); } std::unique_ptr<SearchIterator> createFilterSearch(FilterConstraint constraint) const override; @@ -712,15 +713,12 @@ public: void visit(query::WandTerm &n) override { if (has_always_btree_iterators_with_docid_and_weight()) { - auto *bp = new DirectWandBlueprint(_field, *_dwwps, - n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(), - n.getNumTerms()); + auto *bp = new DirectWandBlueprint(_field, *_dwwps, n.getTargetNumHits(), n.getScoreThreshold(), + n.getThresholdBoostFactor(), n.getNumTerms(), is_search_multi_threaded()); createDirectMultiTerm(bp, n); } else { - auto *bp = new ParallelWeakAndBlueprint(_field, - n.getTargetNumHits(), - n.getScoreThreshold(), - n.getThresholdBoostFactor()); + auto *bp = new ParallelWeakAndBlueprint(_field, n.getTargetNumHits(), n.getScoreThreshold(), + n.getThresholdBoostFactor(), is_search_multi_threaded()); createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); } } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h index e2928710a32..ac6fc6f603a 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h @@ -16,15 +16,18 @@ struct AttributeBlueprintParams double global_filter_upper_limit; double target_hits_max_adjustment_factor; vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm; + double weakand_range; AttributeBlueprintParams(double global_filter_lower_limit_in, double global_filter_upper_limit_in, double target_hits_max_adjustment_factor_in, - vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in) + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in, + double weakand_range_in) : global_filter_lower_limit(global_filter_lower_limit_in), global_filter_upper_limit(global_filter_upper_limit_in), target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in), - fuzzy_matching_algorithm(fuzzy_matching_algorithm_in) + fuzzy_matching_algorithm(fuzzy_matching_algorithm_in), + weakand_range(weakand_range_in) { } @@ -32,7 +35,8 @@ struct AttributeBlueprintParams : AttributeBlueprintParams(fef::indexproperties::matching::GlobalFilterLowerLimit::DEFAULT_VALUE, fef::indexproperties::matching::GlobalFilterUpperLimit::DEFAULT_VALUE, fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE, - fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE) + fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE, + fef::indexproperties::temporary::WeakAndRange::DEFAULT_VALUE) { } }; diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp index 928023c0f94..e2d7f0fe312 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp @@ -132,7 +132,7 @@ AttributeWeightedSetBlueprint::calculate_flow_stats(uint32_t docid_limit) const using MyAdapter = attribute::HitEstimateFlowStatsAdapter; size_t num_indirections = queryeval::flow::get_num_indirections(_attr.getBasicType(), _attr.getCollectionType()); double est = OrFlow::estimate_of(MyAdapter(docid_limit, num_indirections), _estimates); - return {est, OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, false), + return {est, queryeval::flow::reverse_hash_lookup(), OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())}; } diff --git a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp index e20d02afe50..6762c0516b2 100644 --- a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp +++ b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp @@ -3,6 +3,7 @@ #include "bitvector_search_cache.h" #include <vespa/searchlib/common/bitvector.h> #include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/util/memoryusage.h> #include <mutex> namespace search::attribute { @@ -10,6 +11,7 @@ namespace search::attribute { BitVectorSearchCache::BitVectorSearchCache() : _mutex(), _size(0), + _entries_extra_memory_usage(0), _cache() {} @@ -18,9 +20,19 @@ BitVectorSearchCache::~BitVectorSearchCache() = default; void BitVectorSearchCache::insert(const vespalib::string &term, std::shared_ptr<Entry> entry) { + size_t entry_extra_memory_usage = 0; + if (entry) { + entry_extra_memory_usage = sizeof(Entry); + if (entry->bitVector) { + entry_extra_memory_usage += entry->bitVector->getFileBytes(); + } + } std::unique_lock guard(_mutex); - _cache.insert(std::make_pair(term, std::move(entry))); + auto ins_res = _cache.insert(std::make_pair(term, std::move(entry))); _size.store(_cache.size()); + if (ins_res.second) { + _entries_extra_memory_usage += entry_extra_memory_usage; + } } std::shared_ptr<BitVectorSearchCache::Entry> @@ -36,12 +48,25 @@ BitVectorSearchCache::find(const vespalib::string &term) const return {}; } +vespalib::MemoryUsage +BitVectorSearchCache::get_memory_usage() const +{ + std::lock_guard guard(_mutex); + size_t cache_memory_consumption = _cache.getMemoryConsumption(); + size_t cache_memory_used = _cache.getMemoryUsed(); + size_t self_memory_used = sizeof(BitVectorSearchCache) - sizeof(_cache); + size_t allocated = self_memory_used + cache_memory_consumption + _entries_extra_memory_usage; + size_t used = self_memory_used + cache_memory_used + _entries_extra_memory_usage; + return vespalib::MemoryUsage(allocated, used, 0, 0); +} + void BitVectorSearchCache::clear() { std::unique_lock guard(_mutex); _cache.clear(); _size.store(0ul, std::memory_order_relaxed); + _entries_extra_memory_usage = 0; } } diff --git a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h index 233f8315aaf..3a38cdcea26 100644 --- a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h +++ b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h @@ -10,6 +10,8 @@ #include <atomic> namespace search { class BitVector; } +namespace vespalib { class MemoryUsage; } + namespace search::attribute { /** @@ -37,6 +39,7 @@ private: mutable std::shared_mutex _mutex; std::atomic<uint64_t> _size; + size_t _entries_extra_memory_usage; Cache _cache; public: @@ -45,6 +48,7 @@ public: void insert(const vespalib::string &term, std::shared_ptr<Entry> entry); std::shared_ptr<Entry> find(const vespalib::string &term) const; size_t size() const { return _size.load(std::memory_order_relaxed); } + vespalib::MemoryUsage get_memory_usage() const; void clear(); }; diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp index 029dc155785..f6a33165f0c 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp @@ -3,6 +3,7 @@ #include "imported_attribute_vector.h" #include "imported_attribute_vector_read_guard.h" #include "imported_search_context.h" +#include <vespa/vespalib/util/memoryusage.h> namespace search::attribute { @@ -58,4 +59,15 @@ void ImportedAttributeVector::clearSearchCache() { } } +vespalib::MemoryUsage +ImportedAttributeVector::get_memory_usage() const +{ + constexpr auto self_memory_usage = sizeof(ImportedAttributeVector); + vespalib::MemoryUsage result(self_memory_usage, self_memory_usage, 0, 0); + if (_search_cache) { + result.merge(_search_cache->get_memory_usage()); + } + return result; +} + } diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h index bd018df5273..5b68957b7f5 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h +++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h @@ -6,6 +6,8 @@ #include <vespa/searchcommon/attribute/i_document_meta_store_context.h> #include <vespa/vespalib/stllike/string.h> +namespace vespalib { class MemoryUsage; } + namespace search::attribute { class BitVectorSearchCache; @@ -62,6 +64,7 @@ public: std::unique_ptr<AttributeReadGuard> makeReadGuard(bool stableEnumGuard) const override; virtual std::unique_ptr<AttributeReadGuard> makeReadGuard(std::shared_ptr<MetaStoreReadGuard> targetMetaStoreReadGuard, bool stableEnumGuard) const; + vespalib::MemoryUsage get_memory_usage() const; protected: vespalib::string _name; |