aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2021-12-06 15:44:26 +0100
committerTor Egge <Tor.Egge@online.no>2021-12-06 15:44:26 +0100
commit1330d2c3d3b8647b6053ac37e95503cd0278e2e3 (patch)
tree1434a85c72ac28563db3802f69fe99f50f86412e /searchlib
parent2ad949884ee12126f00b18d6e8890af8cbc61391 (diff)
Add EntryRefFilter class.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp18
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h21
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.cpp32
5 files changed, 64 insertions, 51 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
index c2a3139d83a..40ff25ff976 100644
--- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -8,6 +8,7 @@ LOG_SETUP("enumstore_test");
using Type = search::DictionaryConfig::Type;
using vespalib::datastore::EntryRef;
+using vespalib::datastore::EntryRefFilter;
using RefT = vespalib::datastore::EntryRefT<22>;
namespace vespalib::datastore {
@@ -714,6 +715,20 @@ EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::clear_sample_values(uin
}
}
+namespace {
+
+EntryRefFilter make_entry_ref_filter(bool one_filter)
+{
+ if (one_filter) {
+ EntryRefFilter filter(RefT::numBuffers(), RefT::offset_bits);
+ filter.add_buffer(3);
+ return filter;
+ }
+ return EntryRefFilter::create_all_filter(RefT::numBuffers(), RefT::offset_bits);
+}
+
+}
+
template <typename EnumStoreTypeAndDictionaryType>
void
EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_normalize_posting_lists(bool use_filter, bool one_filter)
@@ -734,18 +749,12 @@ EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_normalize_posting_
}
EXPECT_EQ(exp_refs, get_sample_values(large_population));
if (use_filter) {
- std::vector<bool> filter;
- if (one_filter) {
- filter = std::vector<bool>(RefT::numBuffers());
- filter[3] = true;
- } else {
- filter = std::vector<bool>(RefT::numBuffers(), true);
- }
+ auto filter = make_entry_ref_filter(one_filter);
auto dummy = [](std::vector<EntryRef>&) noexcept { };
auto adjust_refs = [](std::vector<EntryRef> &refs) noexcept { for (auto &ref : refs) { ref = adjust_fake_pidx(ref); } };
- EXPECT_FALSE(dict.normalize_posting_lists(dummy, filter, RefT::offset_bits));
+ EXPECT_FALSE(dict.normalize_posting_lists(dummy, filter));
EXPECT_EQ(exp_refs, get_sample_values(large_population));
- EXPECT_TRUE(dict.normalize_posting_lists(adjust_refs, filter, RefT::offset_bits));
+ EXPECT_TRUE(dict.normalize_posting_lists(adjust_refs, filter));
} else {
auto dummy = [](EntryRef posting_idx) noexcept { return posting_idx; };
auto adjust_refs = [](EntryRef ref) noexcept { return adjust_fake_pidx(ref); };
@@ -761,21 +770,15 @@ template <typename EnumStoreTypeAndDictionaryType>
void
EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_foreach_posting_list(bool one_filter)
{
- std::vector<bool> filter;
- if (one_filter) {
- filter = std::vector<bool>(RefT::numBuffers());
- filter[3] = true;
- } else {
- filter = std::vector<bool>(RefT::numBuffers(), true);
- }
+ auto filter = make_entry_ref_filter(one_filter);
populate_sample_data(large_population);
auto& dict = store.get_dictionary();
std::vector<EntryRef> exp_refs;
auto save_exp_refs = [&exp_refs](std::vector<EntryRef>& refs) { exp_refs.insert(exp_refs.end(), refs.begin(), refs.end()); };
- EXPECT_FALSE(dict.normalize_posting_lists(save_exp_refs, filter, RefT::offset_bits));
+ EXPECT_FALSE(dict.normalize_posting_lists(save_exp_refs, filter));
std::vector<EntryRef> act_refs;
auto save_act_refs = [&act_refs](const std::vector<EntryRef>& refs) { act_refs.insert(act_refs.end(), refs.begin(), refs.end()); };
- dict.foreach_posting_list(save_act_refs, filter, RefT::offset_bits);
+ dict.foreach_posting_list(save_act_refs, filter);
EXPECT_EQ(exp_refs, act_refs);
clear_sample_values(large_population);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
index f77b9426b32..8bc28abc238 100644
--- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
@@ -312,7 +312,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists(
template <>
bool
-EnumStoreDictionary<EnumTree>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)>, const std::vector<bool> &, uint32_t)
+EnumStoreDictionary<EnumTree>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)>, const EntryRefFilter&)
{
LOG_ABORT("should not be reached");
}
@@ -399,7 +399,7 @@ ChangeWriter<HashDictionaryT>::write(const std::vector<EntryRef> &refs)
template <typename BTreeDictionaryT, typename HashDictionaryT>
bool
-EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits)
+EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter)
{
if constexpr (has_btree_dictionary) {
std::vector<EntryRef> refs;
@@ -413,8 +413,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists(
for (auto itr = dict.begin(); itr.valid(); ++itr) {
EntryRef ref(itr.getData());
if (ref.valid()) {
- uint32_t buffer_id = ref.buffer_id(entry_ref_offset_bits);
- if (filter[buffer_id]) {
+ if (filter.has(ref)) {
refs.emplace_back(ref);
change_writer.emplace_back(itr.getKey(), itr.getWData());
if (refs.size() >= refs.capacity()) {
@@ -431,20 +430,20 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists(
}
return changed;
} else {
- return this->_hash_dict.normalize_values(normalize, filter, entry_ref_offset_bits);
+ return this->_hash_dict.normalize_values(normalize, filter);
}
}
template <>
void
-EnumStoreDictionary<EnumTree>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)>, const std::vector<bool>&, uint32_t)
+EnumStoreDictionary<EnumTree>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)>, const EntryRefFilter&)
{
LOG_ABORT("should not be reached");
}
template <typename BTreeDictionaryT, typename HashDictionaryT>
void
-EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits)
+EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter)
{
if constexpr (has_btree_dictionary) {
std::vector<EntryRef> refs;
@@ -453,8 +452,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::foreach_posting_list(std
for (auto itr = dict.begin(); itr.valid(); ++itr) {
EntryRef ref(itr.getData());
if (ref.valid()) {
- uint32_t buffer_id = ref.buffer_id(entry_ref_offset_bits);
- if (filter[buffer_id]) {
+ if (filter.has(ref)) {
refs.emplace_back(ref);
if (refs.size() >= refs.capacity()) {
callback(refs);
@@ -467,7 +465,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::foreach_posting_list(std
callback(refs);
}
} else {
- this->_hash_dict.foreach_value(callback, filter, entry_ref_offset_bits);
+ this->_hash_dict.foreach_value(callback, filter);
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
index a17ffd14864..db1176c5484 100644
--- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
@@ -16,6 +16,7 @@ template <typename BTreeDictionaryT, typename HashDictionaryT = vespalib::datast
class EnumStoreDictionary : public vespalib::datastore::UniqueStoreDictionary<BTreeDictionaryT, IEnumStoreDictionary, HashDictionaryT> {
protected:
using EntryRef = IEnumStoreDictionary::EntryRef;
+ using EntryRefFilter = IEnumStoreDictionary::EntryRefFilter;
using Index = IEnumStoreDictionary::Index;
using BTreeDictionaryType = BTreeDictionaryT;
using EntryComparator = IEnumStoreDictionary::EntryComparator;
@@ -54,8 +55,8 @@ public:
void clear_all_posting_lists(std::function<void(EntryRef)> clearer) override;
void update_posting_list(Index idx, const EntryComparator& cmp, std::function<EntryRef(EntryRef)> updater) override;
bool normalize_posting_lists(std::function<EntryRef(EntryRef)> normalize) override;
- bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits) override;
- void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits) override;
+ bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter) override;
+ void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter) override;
const EnumPostingTree& get_posting_dictionary() const override;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
index 4a00c72d6ba..a9716ec5d05 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
@@ -30,6 +30,7 @@ class IEnumStoreDictionary : public vespalib::datastore::IUniqueStoreDictionary
public:
using EntryRef = vespalib::datastore::EntryRef;
using EntryComparator = vespalib::datastore::EntryComparator;
+ using EntryRefFilter = vespalib::datastore::EntryRefFilter;
using EnumVector = IEnumStore::EnumVector;
using Index = IEnumStore::Index;
using IndexList = IEnumStore::IndexList;
@@ -52,9 +53,25 @@ public:
virtual Index remap_index(Index idx) = 0;
virtual void clear_all_posting_lists(std::function<void(EntryRef)> clearer) = 0;
virtual void update_posting_list(Index idx, const EntryComparator& cmp, std::function<EntryRef(EntryRef)> updater) = 0;
+ /*
+ * Scan dictionary and call normalize function for each value. If
+ * returned value is different then write back the modified value to
+ * the dictionary. Only used by unit tests.
+ */
virtual bool normalize_posting_lists(std::function<EntryRef(EntryRef)> normalize) = 0;
- virtual bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits) = 0;
- virtual void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits) = 0;
+ /*
+ * Scan dictionary and call normalize function for batches of values
+ * that pass the filter. Write back modified values to the dictionary.
+ * Used by compaction of posting lists when moving short arrays,
+ * bitvectors or btree roots.
+ */
+ virtual bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter) = 0;
+ /*
+ * Scan dictionary and call callback function for batches of values
+ * that pass the filter. Used by compaction of posting lists when
+ * moving btree nodes.
+ */
+ virtual void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter) = 0;
virtual const EnumPostingTree& get_posting_dictionary() const = 0;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
index 2592a2889e4..8ed8a0cfbee 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
@@ -7,11 +7,13 @@
#include <vespa/vespalib/btree/btreeiterator.hpp>
#include <vespa/vespalib/btree/btreerootbase.cpp>
#include <vespa/vespalib/datastore/datastore.hpp>
+#include <vespa/vespalib/datastore/entry_ref_filter.h>
#include <vespa/vespalib/datastore/buffer_type.hpp>
namespace search::attribute {
using vespalib::btree::BTreeNoLeafData;
+using vespalib::datastore::EntryRefFilter;
// #define FORCE_BITVECTORS
@@ -127,13 +129,11 @@ PostingStore<DataT>::removeSparseBitVectors()
}
}
if (needscan) {
- std::vector<bool> filter(RefType::numBuffers());
- for (uint32_t buffer_id : _bvType.get_active_buffers()) {
- filter[buffer_id] = true;
- }
+ EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits);
+ filter.add_buffers(_bvType.get_active_buffers());
res = _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs)
{ consider_remove_sparse_bitvector(refs); },
- filter, RefType::offset_bits);
+ filter);
}
return res;
}
@@ -727,16 +727,12 @@ void
PostingStore<DataT>::compact_worst_btree_nodes()
{
auto to_hold = this->start_compact_worst_btree_nodes();
- std::vector<bool> filter(RefType::numBuffers());
+ EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits);
// Only look at buffers containing bitvectors and btree roots
- for (uint32_t buffer_id : this->_treeType.get_active_buffers()) {
- filter[buffer_id] = true;
- }
- for (uint32_t buffer_id : _bvType.get_active_buffers()) {
- filter[buffer_id] = true;
- }
+ filter.add_buffers(this->_treeType.get_active_buffers());
+ filter.add_buffers(_bvType.get_active_buffers());
_dictionary.foreach_posting_list([this](const std::vector<EntryRef>& refs)
- { move_btree_nodes(refs); }, filter, RefType::offset_bits);
+ { move_btree_nodes(refs); }, filter);
this->finish_compact_worst_btree_nodes(to_hold);
}
@@ -746,24 +742,22 @@ PostingStore<DataT>::compact_worst_buffers()
{
auto to_hold = this->start_compact_worst_buffers();
bool compact_btree_roots = false;
- std::vector<bool> filter(RefType::numBuffers());
+ EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits);
+ filter.add_buffers(to_hold);
// Start with looking at buffers being compacted
for (uint32_t buffer_id : to_hold) {
if (isBTree(_store.getBufferState(buffer_id).getTypeId())) {
compact_btree_roots = true;
}
- filter[buffer_id] = true;
}
if (compact_btree_roots) {
// If we are compacting btree roots then we also have to look at bitvector
// buffers
- for (uint32_t buffer_id : _bvType.get_active_buffers()) {
- filter[buffer_id] = true;
- }
+ filter.add_buffers(_bvType.get_active_buffers());
}
_dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs)
{ return move(refs); },
- filter, RefType::offset_bits);
+ filter);
this->finishCompact(to_hold);
}