summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <tegge@vespa.ai>2024-02-13 16:41:39 +0100
committerGitHub <noreply@github.com>2024-02-13 16:41:39 +0100
commitdf26263159179ffd110041076f25ab561115091c (patch)
tree79566000bee85d8a5998898d7b3c1c1b42f11916
parent3ccbe9b115761a54632c772686e81e6d0038c3ab (diff)
parent295eb7bc9a0145d46b574474fe8047f9b2202196 (diff)
Merge pull request #30260 from vespa-engine/toregge/use-array-store-in-search-predicate-document-features-store
Use array store in search::predicate::DocumentFeaturesStore.
-rw-r--r--searchlib/src/tests/predicate/document_features_store_test.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/predicate/document_features_store.cpp294
-rw-r--r--searchlib/src/vespa/searchlib/predicate/document_features_store.h37
-rw-r--r--searchlib/src/vespa/searchlib/predicate/predicate_index.cpp3
4 files changed, 244 insertions, 104 deletions
diff --git a/searchlib/src/tests/predicate/document_features_store_test.cpp b/searchlib/src/tests/predicate/document_features_store_test.cpp
index c09ca1d61c7..fd30041deec 100644
--- a/searchlib/src/tests/predicate/document_features_store_test.cpp
+++ b/searchlib/src/tests/predicate/document_features_store_test.cpp
@@ -165,17 +165,17 @@ TEST("require that both features and ranges are removed by 'remove'") {
TEST("require that both features and ranges counts towards memory usage") {
DocumentFeaturesStore features_store(10);
- EXPECT_EQUAL(50064u, features_store.getMemoryUsage().usedBytes());
+ EXPECT_EQUAL(562024u, features_store.getMemoryUsage().usedBytes());
PredicateTreeAnnotations annotations;
annotations.features.push_back(PredicateHash::hash64("foo=100-199"));
features_store.insert(annotations, doc_id);
- EXPECT_EQUAL(50072u, features_store.getMemoryUsage().usedBytes());
+ EXPECT_EQUAL(562376u, features_store.getMemoryUsage().usedBytes());
annotations.features.clear();
annotations.range_features.push_back({"foo", 100, 199});
features_store.insert(annotations, doc_id + 1);
- EXPECT_EQUAL(50168u, features_store.getMemoryUsage().usedBytes());
+ EXPECT_EQUAL(562480u, features_store.getMemoryUsage().usedBytes());
}
TEST("require that DocumentFeaturesStore can be serialized") {
@@ -205,17 +205,17 @@ TEST("require that serialization cleans up wordstore") {
PredicateTreeAnnotations annotations;
annotations.range_features.push_back({"foo", 100, 199});
features_store.insert(annotations, doc_id);
- EXPECT_EQUAL(50160u, features_store.getMemoryUsage().usedBytes());
+ EXPECT_EQUAL(562464u, features_store.getMemoryUsage().usedBytes());
annotations.range_features.push_back({"bar", 100, 199});
features_store.insert(annotations, doc_id + 1);
- EXPECT_EQUAL(50548u, features_store.getMemoryUsage().usedBytes());
+ EXPECT_EQUAL(562524u, features_store.getMemoryUsage().usedBytes());
features_store.remove(doc_id + 1);
- EXPECT_EQUAL(50500u, features_store.getMemoryUsage().usedBytes());
+ EXPECT_EQUAL(562524u, features_store.getMemoryUsage().usedBytes());
vespalib::DataBuffer buffer;
features_store.serialize(buffer);
DocumentFeaturesStore features_store2(buffer);
- EXPECT_EQUAL(50160u, features_store2.getMemoryUsage().usedBytes());
+ EXPECT_EQUAL(562464u, features_store2.getMemoryUsage().usedBytes());
}
diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp
index 1f580d707e8..6653dc5733b 100644
--- a/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp
+++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp
@@ -2,12 +2,17 @@
#include "document_features_store.h"
#include "predicate_range_expander.h"
-#include <vespa/vespalib/stllike/hash_map.hpp>
#include <vespa/vespalib/btree/btree.hpp>
#include <vespa/vespalib/btree/btreeroot.hpp>
#include <vespa/vespalib/btree/btreenodeallocator.hpp>
+#include <vespa/vespalib/datastore/array_store.hpp>
+#include <vespa/vespalib/datastore/buffer_type.hpp>
+#include <vespa/vespalib/datastore/array_store_dynamic_type_mapper.hpp>
+#include <vespa/vespalib/datastore/dynamic_array_buffer_type.hpp>
using vespalib::btree::BTreeNoLeafData;
+using vespalib::datastore::ArrayStore;
+using vespalib::datastore::ArrayStoreConfig;
using vespalib::datastore::EntryRef;
using vespalib::DataBuffer;
using vespalib::stringref;
@@ -16,20 +21,64 @@ using std::vector;
namespace search::predicate {
+namespace {
+
+constexpr double array_store_grow_factor = 1.03;
+constexpr uint32_t array_store_max_type_id = 300;
+constexpr float alloc_grow_factor = 0.2;
+constexpr size_t max_buffer_size = ArrayStoreConfig::default_max_buffer_size;
+
+}
+
+DocumentFeaturesStore::FeaturesStoreTypeMapper
+DocumentFeaturesStore::make_features_store_type_mapper()
+{
+ return FeaturesStoreTypeMapper(array_store_max_type_id, array_store_grow_factor, max_buffer_size);
+}
+
+ArrayStoreConfig
+DocumentFeaturesStore::make_features_store_config()
+{
+ auto mapper = make_features_store_type_mapper();
+ auto result = FeaturesStore::optimizedConfigForHugePage(array_store_max_type_id, mapper, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE, vespalib::alloc::MemoryAllocator::PAGE_SIZE, max_buffer_size, 8_Ki, alloc_grow_factor);
+ result.enable_free_lists(true);
+ return result;
+}
+
+DocumentFeaturesStore::RangesStoreTypeMapper
+DocumentFeaturesStore::make_ranges_store_type_mapper()
+{
+ return RangesStoreTypeMapper(array_store_max_type_id, array_store_grow_factor, max_buffer_size);
+}
+
+ArrayStoreConfig
+DocumentFeaturesStore::make_ranges_store_config()
+{
+ auto mapper = make_ranges_store_type_mapper();
+ auto result = RangesStore::optimizedConfigForHugePage(array_store_max_type_id, mapper, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE, vespalib::alloc::MemoryAllocator::PAGE_SIZE, max_buffer_size, 8_Ki, alloc_grow_factor);
+ result.enable_free_lists(true);
+ return result;
+}
+
DocumentFeaturesStore::DocumentFeaturesStore(uint32_t arity)
- : _docs(),
- _ranges(),
+ : _refs(),
+ _features(make_features_store_config(),
+ std::shared_ptr<vespalib::alloc::MemoryAllocator>(),
+ make_features_store_type_mapper()),
+ _ranges(make_ranges_store_config(),
+ std::shared_ptr<vespalib::alloc::MemoryAllocator>(),
+ make_ranges_store_type_mapper()),
_word_store(),
_word_index(),
- _numFeatures(0),
- _numRanges(0),
- _arity(arity) {
+ _arity(arity)
+{
}
namespace {
template <typename KeyComp, typename WordIndex>
void
-deserializeWords(DataBuffer &buffer, memoryindex::WordStore &word_store, WordIndex &word_index, vector<EntryRef> &word_refs) {
+deserializeWords(DataBuffer &buffer, memoryindex::WordStore &word_store, WordIndex &word_index, vector<EntryRef> &word_refs)
+{
uint32_t word_list_size = buffer.readInt32();
word_refs.reserve(word_list_size);
vector<char> word;
@@ -44,15 +93,22 @@ deserializeWords(DataBuffer &buffer, memoryindex::WordStore &word_store, WordInd
}
}
-template <typename RangeFeaturesMap>
+template <typename RefsVector, typename RangesStore>
void
-deserializeRanges(DataBuffer &buffer, vector<EntryRef> &word_refs, RangeFeaturesMap &ranges, size_t &num_ranges) {
- using Range = typename RangeFeaturesMap::mapped_type::value_type;
+deserialize_ranges(DataBuffer &buffer, vector<EntryRef> &word_refs, RefsVector& refs, RangesStore& ranges)
+{
+ using Range = typename RangesStore::ElemType;
+ std::vector<Range> range_vector;
uint32_t ranges_size = buffer.readInt32();
for (uint32_t i = 0; i < ranges_size; ++i) {
uint32_t doc_id = buffer.readInt32();
+ if (doc_id >= refs.size()) {
+ refs.resize(doc_id + 1);
+ }
+ auto& cur_refs = refs[doc_id];
+ assert(!cur_refs._ranges.valid());
uint32_t range_count = buffer.readInt32();
- auto &range_vector = ranges[doc_id];
+ range_vector.clear();
range_vector.reserve(range_count);
for (uint32_t j = 0; j < range_count; ++j) {
Range range;
@@ -61,23 +117,30 @@ deserializeRanges(DataBuffer &buffer, vector<EntryRef> &word_refs, RangeFeatures
range.to = buffer.readInt64();
range_vector.push_back(range);
}
- num_ranges += range_count;
+ cur_refs._ranges = ranges.add(range_vector);
}
}
-template <typename DocumentFeaturesMap>
+template <typename RefsVector, typename FeaturesStore>
void
-deserializeDocs(DataBuffer &buffer, DocumentFeaturesMap &docs, size_t &num_features) {
+deserialize_features(DataBuffer &buffer, RefsVector& refs, FeaturesStore &features)
+{
+ std::vector<typename FeaturesStore::ElemType> feature_vector;
uint32_t docs_size = buffer.readInt32();
for (uint32_t i = 0; i < docs_size; ++i) {
uint32_t doc_id = buffer.readInt32();
+ if (doc_id >= refs.size()) {
+ refs.resize(doc_id + 1);
+ }
+ auto& cur_refs = refs[doc_id];
+ assert(!cur_refs._features.valid());
uint32_t feature_count = buffer.readInt32();
- auto &feature_vector = docs[doc_id];
+ feature_vector.clear();
feature_vector.reserve(feature_count);
for (uint32_t j = 0; j < feature_count; ++j) {
feature_vector.push_back(buffer.readInt64());
}
- num_features += feature_count;
+ cur_refs._features = features.add(feature_vector);
}
}
} // namespace
@@ -88,8 +151,8 @@ DocumentFeaturesStore::DocumentFeaturesStore(DataBuffer &buffer)
vector<EntryRef> word_refs;
deserializeWords<KeyComp>(buffer, _word_store, _word_index, word_refs);
- deserializeRanges(buffer, word_refs, _ranges, _numRanges);
- deserializeDocs(buffer, _docs, _numFeatures);
+ deserialize_ranges(buffer, word_refs, _refs, _ranges);
+ deserialize_features(buffer, _refs, _features);
}
DocumentFeaturesStore::~DocumentFeaturesStore() {
@@ -102,22 +165,27 @@ DocumentFeaturesStore::~DocumentFeaturesStore() {
void
DocumentFeaturesStore::insert(const PredicateTreeAnnotations &annotations, uint32_t doc_id) {
assert(doc_id != 0);
+ if (doc_id >= _refs.size()) {
+ _refs.resize(doc_id + 1);
+ }
+ auto& cur_refs = _refs[doc_id];
if (!annotations.features.empty()) {
- auto it = _docs.find(doc_id);
- if (it == _docs.end()) {
- it = _docs.insert(std::make_pair(doc_id, FeatureVector())).first;
- }
- size_t size = it->second.size();
- it->second.resize(size + annotations.features.size());
- memcpy(&it->second[size], &annotations.features[0],
+ auto old_features_ref = cur_refs._features;
+ auto old_features = _features.get(old_features_ref);
+ std::vector<uint64_t> features(old_features.begin(), old_features.end());
+ size_t size = features.size();
+ features.resize(size + annotations.features.size());
+ memcpy(&features[size], &annotations.features[0],
annotations.features.size() * sizeof(annotations.features[0]));
- _numFeatures += annotations.features.size();
+ cur_refs._features = _features.add(features);
+ if (old_features_ref.valid()) {
+ _features.remove(old_features_ref);
+ }
}
if (!annotations.range_features.empty()) {
- auto it = _ranges.find(doc_id);
- if (it == _ranges.end()) {
- it = _ranges.insert(std::make_pair(doc_id, RangeVector())).first;
- }
+ auto old_ranges_ref = cur_refs._ranges;
+ auto old_ranges = _ranges.get(old_ranges_ref);
+ std::vector<Range> ranges(old_ranges.begin(), old_ranges.end());
for (const auto &range : annotations.range_features) {
stringref word(range.label.data, range.label.size);
KeyComp cmp(_word_store, word);
@@ -129,22 +197,29 @@ DocumentFeaturesStore::insert(const PredicateTreeAnnotations &annotations, uint3
ref = _word_store.addWord(word);
_word_index.insert(ref, BTreeNoLeafData(), cmp);
}
- it->second.push_back({ref, range.from, range.to});
+ ranges.emplace_back(ref, range.from, range.to);
+ }
+ cur_refs._ranges = _ranges.add(ranges);
+ if (old_ranges_ref.valid()) {
+ _ranges.remove(old_ranges_ref);
}
- _numRanges += annotations.range_features.size();
}
}
DocumentFeaturesStore::FeatureSet
DocumentFeaturesStore::get(uint32_t docId) const {
FeatureSet features;
- auto docsItr = _docs.find(docId);
- if (docsItr != _docs.end()) {
- features.insert(docsItr->second.begin(), docsItr->second.end());
+ if (docId >= _refs.size()) {
+ return features;
}
- auto rangeItr = _ranges.find(docId);
- if (rangeItr != _ranges.end()) {
- for (auto range : rangeItr->second) {
+ auto& cur_refs = _refs[docId];
+ if (cur_refs._features.valid()) {
+ auto old_features = _features.get(cur_refs._features);
+ features.insert(old_features.begin(), old_features.end());
+ }
+ if (cur_refs._ranges.valid()) {
+ auto old_ranges = _ranges.get(cur_refs._ranges);
+ for (auto range : old_ranges) {
const char *label = _word_store.getWord(range.label_ref);
PredicateRangeExpander::expandRange(label, range.from, range.to, _arity,
std::inserter(features, features.end()));
@@ -155,34 +230,43 @@ DocumentFeaturesStore::get(uint32_t docId) const {
void
DocumentFeaturesStore::remove(uint32_t doc_id) {
- auto itr = _docs.find(doc_id);
- if (itr != _docs.end()) {
- _numFeatures = _numFeatures >= itr->second.size() ?
- (_numFeatures - itr->second.size()) : 0;
- _docs.erase(itr);
+ if (doc_id >= _refs.size()) {
+ return;
+ }
+ auto& cur_refs = _refs[doc_id];
+ auto old_features_ref = cur_refs._features;
+ if (old_features_ref.valid()) {
+ _features.remove(old_features_ref);
+ cur_refs._features = EntryRef();
}
- auto range_itr = _ranges.find(doc_id);
- if (range_itr != _ranges.end()) {
- _numRanges = _numRanges >= range_itr->second.size() ?
- (_numRanges - range_itr->second.size()) : 0;
- _ranges.erase(range_itr);
+ auto old_ranges_ref = cur_refs._ranges;
+ if (old_ranges_ref.valid()) {
+ _ranges.remove(old_ranges_ref);
+ cur_refs._ranges = EntryRef();
}
}
+void
+DocumentFeaturesStore::reclaim_memory(generation_t oldest_used_gen)
+{
+ _features.reclaim_memory(oldest_used_gen);
+ _ranges.reclaim_memory(oldest_used_gen);
+}
+
+void
+DocumentFeaturesStore::assign_generation(generation_t current_gen)
+{
+ _features.assign_generation(current_gen);
+ _ranges.assign_generation(current_gen);
+}
+
vespalib::MemoryUsage
DocumentFeaturesStore::getMemoryUsage() const {
vespalib::MemoryUsage usage;
- usage.incAllocatedBytes(_docs.getMemoryConsumption());
- usage.incUsedBytes(_docs.getMemoryUsed());
- usage.incAllocatedBytes(_ranges.getMemoryConsumption());
- usage.incUsedBytes(_ranges.getMemoryUsed());
- // Note: allocated bytes in FeatureVector is slighly larger, but
- // this should be good enough.
- usage.incAllocatedBytes(_numFeatures * sizeof(uint64_t));
- usage.incUsedBytes(_numFeatures * sizeof(uint64_t));
- usage.incAllocatedBytes(_numRanges * sizeof(Range));
- usage.incUsedBytes(_numRanges * sizeof(Range));
-
+ usage.incAllocatedBytes(_refs.capacity() * sizeof(Refs));
+ usage.incUsedBytes(_refs.size() * sizeof(Refs));
+ usage.merge(_features.getMemoryUsage());
+ usage.merge(_ranges.getMemoryUsage());
usage.merge(_word_store.getMemoryUsage());
usage.merge(_word_index.getMemoryUsage());
@@ -190,17 +274,22 @@ DocumentFeaturesStore::getMemoryUsage() const {
}
namespace {
-template <typename RangeFeaturesMap>
+
+template <typename RefsVector, typename RangesStore>
void
-findUsedWords(const RangeFeaturesMap &ranges,
+findUsedWords(const RefsVector& refs, const RangesStore& ranges,
unordered_map<uint32_t, uint32_t> &word_map,
vector<EntryRef> &word_list)
{
- for (const auto &range_features_entry : ranges) {
- for (const auto &range : range_features_entry.second) {
- if (!word_map.count(range.label_ref.ref())) {
- word_map[range.label_ref.ref()] = word_list.size();
- word_list.push_back(range.label_ref);
+ for (auto& cur_refs : refs) {
+ auto ranges_ref = cur_refs._ranges;
+ if (ranges_ref.valid()) {
+ auto range_vector = ranges.get(ranges_ref);
+ for (const auto& range : range_vector) {
+ if (!word_map.count(range.label_ref.ref())) {
+ word_map[range.label_ref.ref()] = word_list.size();
+ word_list.push_back(range.label_ref);
+ }
}
}
}
@@ -219,48 +308,77 @@ serializeWords(DataBuffer &buffer, const vector<EntryRef> &word_list,
}
}
-template <typename RangeFeaturesMap>
+template <typename RefsVector, typename RangesStore>
void
-serializeRanges(DataBuffer &buffer, RangeFeaturesMap &ranges,
+serialize_ranges(DataBuffer &buffer, const RefsVector& refs, const RangesStore &ranges,
unordered_map<uint32_t, uint32_t> &word_map)
{
- buffer.writeInt32(ranges.size());
- for (const auto &range_features_entry : ranges) {
- buffer.writeInt32(range_features_entry.first); // doc id
- buffer.writeInt32(range_features_entry.second.size());
- for (const auto &range : range_features_entry.second) {
- buffer.writeInt32(word_map[range.label_ref.ref()]);
- buffer.writeInt64(range.from);
- buffer.writeInt64(range.to);
+ uint32_t ranges_size = 0;
+ if (!refs.empty()) {
+ assert(!refs.front()._ranges.valid());
+ for (auto& cur_refs : refs) {
+ if (cur_refs._ranges.valid()) {
+ ++ranges_size;
+ }
+ }
+ }
+ buffer.writeInt32(ranges_size);
+ for (uint32_t doc_id = 0; doc_id < refs.size(); ++doc_id) {
+ auto ranges_ref = refs[doc_id]._ranges;
+ if (ranges_ref.valid()) {
+ buffer.writeInt32(doc_id);
+ auto range_vector = ranges.get(ranges_ref);
+ buffer.writeInt32(range_vector.size());
+ for (const auto &range : range_vector) {
+ buffer.writeInt32(word_map[range.label_ref.ref()]);
+ buffer.writeInt64(range.from);
+ buffer.writeInt64(range.to);
+ }
}
}
}
-template <typename DocumentFeaturesMap>
+template <typename RefsVector, typename FeaturesStore>
void
-serializeDocs(DataBuffer &buffer, DocumentFeaturesMap &docs) {
- buffer.writeInt32(docs.size());
- for (const auto &doc_features_entry : docs) {
- buffer.writeInt32(doc_features_entry.first); // doc id
- buffer.writeInt32(doc_features_entry.second.size());
- for (const auto &feature : doc_features_entry.second) {
- buffer.writeInt64(feature);
+serialize_features(DataBuffer &buffer, const RefsVector& refs, const FeaturesStore& features)
+{
+ uint32_t features_size = 0;
+ if (!refs.empty()) {
+ assert(!refs.front()._features.valid());
+ for (auto& cur_refs : refs) {
+ if (cur_refs._features.valid()) {
+ ++features_size;
+ }
+ }
+ }
+ buffer.writeInt32(features_size);
+ for (uint32_t doc_id = 0; doc_id < refs.size(); ++doc_id) {
+ auto features_ref = refs[doc_id]._features;
+ if (features_ref.valid()) {
+ buffer.writeInt32(doc_id);
+ auto feature_vector = features.get(features_ref);
+ buffer.writeInt32(feature_vector.size());
+ for (const auto &feature : feature_vector) {
+ buffer.writeInt64(feature);
+ }
}
}
}
+
} // namespace
void
-DocumentFeaturesStore::serialize(DataBuffer &buffer) const {
+DocumentFeaturesStore::serialize(DataBuffer &buffer) const
+{
vector<EntryRef> word_list;
unordered_map<uint32_t, uint32_t> word_map;
- findUsedWords(_ranges, word_map, word_list);
+ findUsedWords(_refs, _ranges, word_map, word_list);
buffer.writeInt16(_arity);
serializeWords(buffer, word_list, _word_store);
- serializeRanges(buffer, _ranges, word_map);
- serializeDocs(buffer, _docs);
+ serialize_ranges(buffer, _refs, _ranges, word_map);
+ serialize_features(buffer, _refs, _features);
}
}
diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.h b/searchlib/src/vespa/searchlib/predicate/document_features_store.h
index 3b8aed53ca1..bc4d58b1c04 100644
--- a/searchlib/src/vespa/searchlib/predicate/document_features_store.h
+++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.h
@@ -6,7 +6,10 @@
#include <vespa/searchlib/memoryindex/word_store.h>
#include <vespa/vespalib/btree/btree.h>
#include <vespa/vespalib/data/databuffer.h>
-#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/datastore/array_store.h>
+#include <vespa/vespalib/datastore/array_store_dynamic_type_mapper.h>
+#include <vespa/vespalib/datastore/dynamic_array_buffer_type.h>
+#include <vespa/vespalib/stllike/allocator.h>
#include <unordered_set>
namespace search::predicate {
@@ -46,21 +49,34 @@ class DocumentFeaturesStore {
return strcmp(getWord(lhs), getWord(rhs)) < 0;
}
};
- using FeatureVector = vespalib::Array<uint64_t>;
- using DocumentFeaturesMap = vespalib::hash_map<uint32_t, FeatureVector>;
- using RangeVector = vespalib::Array<Range>;
- using RangeFeaturesMap = vespalib::hash_map<uint32_t, RangeVector>;
using WordIndex = vespalib::btree::BTree<vespalib::datastore::EntryRef, vespalib::btree::BTreeNoLeafData,
vespalib::btree::NoAggregated, const KeyComp &>;
+ // Array store for features
+ using FeaturesRefType = vespalib::datastore::EntryRefT<19>;
+ using FeaturesStoreTypeMapper = vespalib::datastore::ArrayStoreDynamicTypeMapper<uint64_t>;
+ using FeaturesStore = vespalib::datastore::ArrayStore<uint64_t, FeaturesRefType, FeaturesStoreTypeMapper>;
+ // Array store for ranges
+ using RangesRefType = vespalib::datastore::EntryRefT<19>;
+ using RangesStoreTypeMapper = vespalib::datastore::ArrayStoreDynamicTypeMapper<Range>;
+ using RangesStore = vespalib::datastore::ArrayStore<Range, RangesRefType, RangesStoreTypeMapper>;
+ struct Refs {
+ vespalib::datastore::EntryRef _features;
+ vespalib::datastore::EntryRef _ranges;
+ };
+ using RefsVector = std::vector<Refs, vespalib::allocator_large<Refs>>;
+ using generation_t = vespalib::GenerationHandler::generation_t;
- DocumentFeaturesMap _docs;
- RangeFeaturesMap _ranges;
+ RefsVector _refs;
+ FeaturesStore _features;
+ RangesStore _ranges;
WordStore _word_store;
WordIndex _word_index;
- size_t _numFeatures;
- size_t _numRanges;
uint32_t _arity;
+ static FeaturesStoreTypeMapper make_features_store_type_mapper();
+ static RangesStoreTypeMapper make_ranges_store_type_mapper();
+ static vespalib::datastore::ArrayStoreConfig make_features_store_config();
+ static vespalib::datastore::ArrayStoreConfig make_ranges_store_config();
public:
using FeatureSet = std::unordered_set<uint64_t>;
@@ -71,6 +87,9 @@ public:
void insert(const PredicateTreeAnnotations &annotations, uint32_t docId);
FeatureSet get(uint32_t docId) const;
void remove(uint32_t docId);
+ void commit() { }
+ void reclaim_memory(generation_t oldest_used_gen);
+ void assign_generation(generation_t current_gen);
vespalib::MemoryUsage getMemoryUsage() const;
void serialize(vespalib::DataBuffer &buffer) const;
diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp
index 7e3d62640cf..cf7d7dafa05 100644
--- a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp
+++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp
@@ -213,6 +213,7 @@ PredicateIndex::commit() {
_interval_index.commit();
_bounds_index.commit();
_zero_constraint_docs.getAllocator().freeze();
+ _features_store.commit();
}
void
@@ -221,6 +222,7 @@ PredicateIndex::reclaim_memory(generation_t oldest_used_gen) {
_bounds_index.reclaim_memory(oldest_used_gen);
_interval_store.reclaim_memory(oldest_used_gen);
_zero_constraint_docs.getAllocator().reclaim_memory(oldest_used_gen);
+ _features_store.reclaim_memory(oldest_used_gen);
}
void
@@ -229,6 +231,7 @@ PredicateIndex::assign_generation(generation_t current_gen) {
_bounds_index.assign_generation(current_gen);
_interval_store.assign_generation(current_gen);
_zero_constraint_docs.getAllocator().assign_generation(current_gen);
+ _features_store.assign_generation(current_gen);
}
vespalib::MemoryUsage