aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/attribute
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/vespa/searchlib/attribute')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp34
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h10
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h3
7 files changed, 69 insertions, 23 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 5b17b491a20..635851f9f1d 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -11,7 +11,6 @@
#include "in_term_search.h"
#include "multi_term_or_filter_search.h"
#include "predicate_attribute.h"
-#include <vespa/eval/eval/value.h>
#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchcommon/attribute/hit_estimate_flow_stats_adapter.h>
#include <vespa/searchlib/common/location.h>
@@ -94,6 +93,7 @@ using search::queryeval::StrictHeapOrSearch;
using search::queryeval::WeightedSetTermBlueprint;
using search::queryeval::flow::btree_cost;
using search::queryeval::flow::btree_strict_cost;
+using search::queryeval::flow::estimate_when_unknown;
using search::queryeval::flow::get_num_indirections;
using search::queryeval::flow::lookup_cost;
using search::queryeval::flow::lookup_strict_cost;
@@ -150,10 +150,9 @@ public:
search::queryeval::FlowStats calculate_flow_stats(uint32_t docid_limit) const override {
if (_hit_estimate.is_unknown()) {
// E.g. attributes without fast-search are not able to provide a hit estimate.
- // In this case we just assume matching half of the document corpus.
// In addition, matching is lookup based, and we are not able to skip documents efficiently when being strict.
size_t indirections = get_num_indirections(_attr.getBasicType(), _attr.getCollectionType());
- return {0.5, lookup_cost(indirections), lookup_strict_cost(indirections)};
+ return {estimate_when_unknown(), lookup_cost(indirections), lookup_strict_cost(indirections)};
} else {
double rel_est = abs_to_rel_est(_hit_estimate.est_hits(), docid_limit);
return {rel_est, btree_cost(rel_est), btree_strict_cost(rel_est)};
@@ -443,7 +442,8 @@ private:
class DirectWandBlueprint : public queryeval::ComplexLeafBlueprint
{
private:
- mutable queryeval::SharedWeakAndPriorityQueue _scores;
+ using WeakAndPriorityQueue = queryeval::WeakAndPriorityQueue;
+ std::unique_ptr<WeakAndPriorityQueue> _scores;
const queryeval::wand::score_t _scoreThreshold;
double _thresholdBoostFactor;
const uint32_t _scoresAdjustFrequency;
@@ -452,14 +452,16 @@ private:
const IDocidWithWeightPostingStore &_attr;
vespalib::datastore::EntryRef _dictionary_snapshot;
+
public:
DirectWandBlueprint(const FieldSpec &field, const IDocidWithWeightPostingStore &attr, uint32_t scoresToTrack,
- queryeval::wand::score_t scoreThreshold, double thresholdBoostFactor, size_t size_hint)
+ queryeval::wand::score_t scoreThreshold, double thresholdBoostFactor, size_t size_hint,
+ bool thread_safe)
: ComplexLeafBlueprint(field),
- _scores(scoresToTrack),
+ _scores(WeakAndPriorityQueue::createHeap(scoresToTrack, thread_safe)),
_scoreThreshold(scoreThreshold),
_thresholdBoostFactor(thresholdBoostFactor),
- _scoresAdjustFrequency(queryeval::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
+ _scoresAdjustFrequency(queryeval::wand::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
_weights(),
_terms(),
_attr(attr),
@@ -496,7 +498,7 @@ public:
using OrFlow = search::queryeval::OrFlow;
using MyAdapter = attribute::DirectPostingStoreFlowStatsAdapter;
double child_est = OrFlow::estimate_of(MyAdapter(docid_limit), _terms);
- double my_est = abs_to_rel_est(_scores.getScoresToTrack(), docid_limit);
+ double my_est = abs_to_rel_est(_scores->getScoresToTrack(), docid_limit);
double est = (child_est + my_est) / 2.0;
return {est, OrFlow::cost_of(MyAdapter(docid_limit), _terms, false),
OrFlow::cost_of(MyAdapter(docid_limit), _terms, true) + queryeval::flow::heap_cost(est, _terms.size())};
@@ -508,9 +510,8 @@ public:
return std::make_unique<queryeval::EmptySearch>();
}
return queryeval::ParallelWeakAndSearch::create(*tfmda[0],
- queryeval::ParallelWeakAndSearch::MatchParams(_scores, _scoreThreshold,
- _thresholdBoostFactor, _scoresAdjustFrequency)
- .setDocIdLimit(get_docid_limit()),
+ queryeval::ParallelWeakAndSearch::MatchParams(*_scores, _scoreThreshold, _thresholdBoostFactor,
+ _scoresAdjustFrequency, get_docid_limit()),
_weights, _terms, _attr, strict());
}
std::unique_ptr<SearchIterator> createFilterSearch(FilterConstraint constraint) const override;
@@ -712,15 +713,12 @@ public:
void visit(query::WandTerm &n) override {
if (has_always_btree_iterators_with_docid_and_weight()) {
- auto *bp = new DirectWandBlueprint(_field, *_dwwps,
- n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(),
- n.getNumTerms());
+ auto *bp = new DirectWandBlueprint(_field, *_dwwps, n.getTargetNumHits(), n.getScoreThreshold(),
+ n.getThresholdBoostFactor(), n.getNumTerms(), is_search_multi_threaded());
createDirectMultiTerm(bp, n);
} else {
- auto *bp = new ParallelWeakAndBlueprint(_field,
- n.getTargetNumHits(),
- n.getScoreThreshold(),
- n.getThresholdBoostFactor());
+ auto *bp = new ParallelWeakAndBlueprint(_field, n.getTargetNumHits(), n.getScoreThreshold(),
+ n.getThresholdBoostFactor(), is_search_multi_threaded());
createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
index e2928710a32..ac6fc6f603a 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
@@ -16,15 +16,18 @@ struct AttributeBlueprintParams
double global_filter_upper_limit;
double target_hits_max_adjustment_factor;
vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm;
+ double weakand_range;
AttributeBlueprintParams(double global_filter_lower_limit_in,
double global_filter_upper_limit_in,
double target_hits_max_adjustment_factor_in,
- vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in)
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in,
+ double weakand_range_in)
: global_filter_lower_limit(global_filter_lower_limit_in),
global_filter_upper_limit(global_filter_upper_limit_in),
target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in),
- fuzzy_matching_algorithm(fuzzy_matching_algorithm_in)
+ fuzzy_matching_algorithm(fuzzy_matching_algorithm_in),
+ weakand_range(weakand_range_in)
{
}
@@ -32,7 +35,8 @@ struct AttributeBlueprintParams
: AttributeBlueprintParams(fef::indexproperties::matching::GlobalFilterLowerLimit::DEFAULT_VALUE,
fef::indexproperties::matching::GlobalFilterUpperLimit::DEFAULT_VALUE,
fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE,
- fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE)
+ fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE,
+ fef::indexproperties::temporary::WeakAndRange::DEFAULT_VALUE)
{
}
};
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
index 928023c0f94..e2d7f0fe312 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
@@ -132,7 +132,7 @@ AttributeWeightedSetBlueprint::calculate_flow_stats(uint32_t docid_limit) const
using MyAdapter = attribute::HitEstimateFlowStatsAdapter;
size_t num_indirections = queryeval::flow::get_num_indirections(_attr.getBasicType(), _attr.getCollectionType());
double est = OrFlow::estimate_of(MyAdapter(docid_limit, num_indirections), _estimates);
- return {est, OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, false),
+ return {est, queryeval::flow::reverse_hash_lookup(),
OrFlow::cost_of(MyAdapter(docid_limit, num_indirections), _estimates, true) + queryeval::flow::heap_cost(est, _estimates.size())};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
index e20d02afe50..6762c0516b2 100644
--- a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
@@ -3,6 +3,7 @@
#include "bitvector_search_cache.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/util/memoryusage.h>
#include <mutex>
namespace search::attribute {
@@ -10,6 +11,7 @@ namespace search::attribute {
BitVectorSearchCache::BitVectorSearchCache()
: _mutex(),
_size(0),
+ _entries_extra_memory_usage(0),
_cache()
{}
@@ -18,9 +20,19 @@ BitVectorSearchCache::~BitVectorSearchCache() = default;
void
BitVectorSearchCache::insert(const vespalib::string &term, std::shared_ptr<Entry> entry)
{
+ size_t entry_extra_memory_usage = 0;
+ if (entry) {
+ entry_extra_memory_usage = sizeof(Entry);
+ if (entry->bitVector) {
+ entry_extra_memory_usage += entry->bitVector->getFileBytes();
+ }
+ }
std::unique_lock guard(_mutex);
- _cache.insert(std::make_pair(term, std::move(entry)));
+ auto ins_res = _cache.insert(std::make_pair(term, std::move(entry)));
_size.store(_cache.size());
+ if (ins_res.second) {
+ _entries_extra_memory_usage += entry_extra_memory_usage;
+ }
}
std::shared_ptr<BitVectorSearchCache::Entry>
@@ -36,12 +48,25 @@ BitVectorSearchCache::find(const vespalib::string &term) const
return {};
}
+vespalib::MemoryUsage
+BitVectorSearchCache::get_memory_usage() const
+{
+ std::lock_guard guard(_mutex);
+ size_t cache_memory_consumption = _cache.getMemoryConsumption();
+ size_t cache_memory_used = _cache.getMemoryUsed();
+ size_t self_memory_used = sizeof(BitVectorSearchCache) - sizeof(_cache);
+ size_t allocated = self_memory_used + cache_memory_consumption + _entries_extra_memory_usage;
+ size_t used = self_memory_used + cache_memory_used + _entries_extra_memory_usage;
+ return vespalib::MemoryUsage(allocated, used, 0, 0);
+}
+
void
BitVectorSearchCache::clear()
{
std::unique_lock guard(_mutex);
_cache.clear();
_size.store(0ul, std::memory_order_relaxed);
+ _entries_extra_memory_usage = 0;
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
index 233f8315aaf..3a38cdcea26 100644
--- a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
+++ b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
@@ -10,6 +10,8 @@
#include <atomic>
namespace search { class BitVector; }
+namespace vespalib { class MemoryUsage; }
+
namespace search::attribute {
/**
@@ -37,6 +39,7 @@ private:
mutable std::shared_mutex _mutex;
std::atomic<uint64_t> _size;
+ size_t _entries_extra_memory_usage;
Cache _cache;
public:
@@ -45,6 +48,7 @@ public:
void insert(const vespalib::string &term, std::shared_ptr<Entry> entry);
std::shared_ptr<Entry> find(const vespalib::string &term) const;
size_t size() const { return _size.load(std::memory_order_relaxed); }
+ vespalib::MemoryUsage get_memory_usage() const;
void clear();
};
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
index 029dc155785..f6a33165f0c 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
@@ -3,6 +3,7 @@
#include "imported_attribute_vector.h"
#include "imported_attribute_vector_read_guard.h"
#include "imported_search_context.h"
+#include <vespa/vespalib/util/memoryusage.h>
namespace search::attribute {
@@ -58,4 +59,15 @@ void ImportedAttributeVector::clearSearchCache() {
}
}
+vespalib::MemoryUsage
+ImportedAttributeVector::get_memory_usage() const
+{
+ constexpr auto self_memory_usage = sizeof(ImportedAttributeVector);
+ vespalib::MemoryUsage result(self_memory_usage, self_memory_usage, 0, 0);
+ if (_search_cache) {
+ result.merge(_search_cache->get_memory_usage());
+ }
+ return result;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
index bd018df5273..5b68957b7f5 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
+++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
@@ -6,6 +6,8 @@
#include <vespa/searchcommon/attribute/i_document_meta_store_context.h>
#include <vespa/vespalib/stllike/string.h>
+namespace vespalib { class MemoryUsage; }
+
namespace search::attribute {
class BitVectorSearchCache;
@@ -62,6 +64,7 @@ public:
std::unique_ptr<AttributeReadGuard> makeReadGuard(bool stableEnumGuard) const override;
virtual std::unique_ptr<AttributeReadGuard> makeReadGuard(std::shared_ptr<MetaStoreReadGuard> targetMetaStoreReadGuard, bool stableEnumGuard) const;
+ vespalib::MemoryUsage get_memory_usage() const;
protected:
vespalib::string _name;