summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@verizonmedia.com>2019-06-14 12:24:50 +0000
committerTor Egge <Tor.Egge@verizonmedia.com>2019-06-14 12:50:11 +0000
commit3ddea2a1c8a5d0da53b97c021c48f878b64c7810 (patch)
tree186130cb849876ce9f968c62df6088eb570f7480
parent2c3e1a1ab6000096e8494f2eff7a3c8af31a6d9e (diff)
Change posting list for memory index to have PostingListEntry as value.
-rw-r--r--searchlib/src/tests/memoryindex/field_index/field_index_test.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.h5
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp59
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h31
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp4
7 files changed, 106 insertions, 18 deletions
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
index 05c905cdc32..0f1c966ad5d 100644
--- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
@@ -171,7 +171,7 @@ assertPostingList(const std::string &exp,
uint32_t docId = itr.getKey();
ss << docId;
if (store != nullptr) { // consider features as well
- EntryRef ref(itr.getData());
+ EntryRef ref(itr.getData().get_features());
store->setupForField(0, decoder);
store->setupForUnpackFeatures(ref, decoder);
decoder.unpackFeatures(matchData, docId);
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
index e2e1c99a9b9..8daf87e1899 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
@@ -127,36 +127,35 @@ FieldIndex::compactFeatures()
const PostingList *tree = _postingListStore.getTreeEntry(pidx);
auto pitr = tree->begin(_postingListStore.getAllocator());
for (; pitr.valid(); ++pitr) {
- EntryRef oldFeatures(pitr.getData());
+ const PostingListEntry &posting_entry(pitr.getData());
// Filter on which buffers to move features from when
// performing incremental compaction.
- EntryRef newFeatures = _featureStore.moveFeatures(packedIndex, oldFeatures);
+ EntryRef newFeatures = _featureStore.moveFeatures(packedIndex, posting_entry.get_features());
// Features must be written before reference is updated.
std::atomic_thread_fence(std::memory_order_release);
- // Ugly, ugly due to const_cast in iterator
- pitr.writeData(newFeatures.ref());
+ // Reference the moved data
+ posting_entry.update_features(newFeatures);
}
} else {
const PostingListKeyDataType *shortArray = _postingListStore.getKeyDataEntry(pidx, clusterSize);
const PostingListKeyDataType *ite = shortArray + clusterSize;
for (const PostingListKeyDataType *it = shortArray; it < ite; ++it) {
- EntryRef oldFeatures(it->getData());
+ const PostingListEntry &posting_entry(it->getData());
// Filter on which buffers to move features from when
// performing incremental compaction.
- EntryRef newFeatures = _featureStore.moveFeatures(packedIndex, oldFeatures);
+ EntryRef newFeatures = _featureStore.moveFeatures(packedIndex, posting_entry.get_features());
// Features must be written before reference is updated.
std::atomic_thread_fence(std::memory_order_release);
- // Ugly, ugly due to const_cast, but new data is
- // semantically equal to old data
- const_cast<PostingListKeyDataType *>(it)->setData(newFeatures.ref());
+ // Reference the moved data
+ posting_entry.update_features(newFeatures);
}
}
}
@@ -189,7 +188,7 @@ FieldIndex::dump(search::index::IndexBuilder & indexBuilder)
assert(pitr.valid());
for (; pitr.valid(); ++pitr) {
uint32_t docId = pitr.getKey();
- EntryRef featureRef(pitr.getData());
+ EntryRef featureRef(pitr.getData().get_features());
_featureStore.setupForReadFeatures(featureRef, decoder);
decoder.readFeatures(features);
features.set_doc_id(docId);
@@ -202,7 +201,7 @@ FieldIndex::dump(search::index::IndexBuilder & indexBuilder)
const PostingListKeyDataType *kde = kd + clusterSize;
for (; kd != kde; ++kd) {
uint32_t docId = kd->_key;
- EntryRef featureRef(kd->getData());
+ EntryRef featureRef(kd->getData().get_features());
_featureStore.setupForReadFeatures(featureRef, decoder);
decoder.readFeatures(features);
features.set_doc_id(docId);
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
index dba57f553b5..d5df2fa49c8 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
@@ -5,6 +5,7 @@
#include "feature_store.h"
#include "field_index_remover.h"
#include "word_store.h"
+#include "posting_list_entry.h"
#include <vespa/searchlib/index/docidandfeatures.h>
#include <vespa/searchlib/index/field_length_calculator.h>
#include <vespa/searchlib/index/indexbuilder.h>
@@ -35,8 +36,8 @@ class OrderedFieldIndexInserter;
class FieldIndex {
public:
// Mapping from docid -> feature ref
- using PostingList = btree::BTreeRoot<uint32_t, uint32_t, search::btree::NoAggregated>;
- using PostingListStore = btree::BTreeStore<uint32_t, uint32_t,
+ using PostingList = btree::BTreeRoot<uint32_t, PostingListEntry, search::btree::NoAggregated>;
+ using PostingListStore = btree::BTreeStore<uint32_t, PostingListEntry,
search::btree::NoAggregated,
std::less<uint32_t>,
btree::BTreeDefaultTraits>;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp
index 637a13d67be..1d38e88b747 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp
@@ -111,7 +111,7 @@ OrderedFieldIndexInserter::add(uint32_t docId,
assert(_prevDocId == noDocId || _prevDocId < docId ||
(_prevDocId == docId && !_prevAdd));
datastore::EntryRef featureRef = _fieldIndex.addFeatures(features);
- _adds.push_back(PostingListKeyDataType(docId, featureRef.ref()));
+ _adds.push_back(PostingListKeyDataType(docId, featureRef));
_listener.insert(_dItr.getKey()._wordRef, docId);
_prevDocId = docId;
_prevAdd = true;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp
index 63040aab66f..290aa16dfe4 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp
@@ -6,6 +6,7 @@
#include <vespa/vespalib/btree/btreenodeallocator.hpp>
#include <vespa/vespalib/btree/btreenodestore.hpp>
#include <vespa/vespalib/btree/btreeroot.hpp>
+#include <vespa/vespalib/btree/btreestore.hpp>
#include <vespa/log/log.h>
LOG_SETUP(".searchlib.memoryindex.posting_iterator");
@@ -62,7 +63,7 @@ PostingIterator::doUnpack(uint32_t docId)
assert(docId == getDocId());
assert(_itr.valid());
assert(docId == _itr.getKey());
- datastore::EntryRef featureRef(_itr.getData());
+ datastore::EntryRef featureRef(_itr.getData().get_features());
_featureStore.setupForUnpackFeatures(featureRef, _featureDecoder);
_featureDecoder.unpackFeatures(_matchData, docId);
setUnpacked();
@@ -70,3 +71,59 @@ PostingIterator::doUnpack(uint32_t docId)
}
+namespace search::btree {
+
+template class BTreeNodeTT<uint32_t,
+ search::memoryindex::PostingListEntry,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS>;
+
+template class BTreeLeafNode<uint32_t,
+ search::memoryindex::PostingListEntry,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+template class BTreeNodeStore<uint32_t,
+ search::memoryindex::PostingListEntry,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+template class BTreeIteratorBase<uint32_t,
+ search::memoryindex::PostingListEntry,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS,
+ BTreeDefaultTraits::PATH_SIZE>;
+
+template class BTreeIterator<uint32_t,
+ search::memoryindex::PostingListEntry,
+ search::btree::NoAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits>;
+
+template class BTree<uint32_t,
+ search::memoryindex::PostingListEntry,
+ search::btree::NoAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits>;
+
+template class BTreeRoot<uint32_t,
+ search::memoryindex::PostingListEntry,
+ search::btree::NoAggregated,
+ std::less<uint32_t>,
+ BTreeDefaultTraits>;
+
+template class BTreeRootBase<uint32_t,
+ search::memoryindex::PostingListEntry,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+template class BTreeNodeAllocator<uint32_t,
+ search::memoryindex::PostingListEntry,
+ search::btree::NoAggregated,
+ BTreeDefaultTraits::INTERNAL_SLOTS,
+ BTreeDefaultTraits::LEAF_SLOTS>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h b/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h
new file mode 100644
index 00000000000..af204c84bbb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h
@@ -0,0 +1,31 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# pragma once
+
+#include <vespa/vespalib/datastore/entryref.h>
+
+namespace search::memoryindex {
+
+/**
+ * Entry per document in memory index posting list.
+ */
+class PostingListEntry {
+ mutable datastore::EntryRef _features; // reference to compressed features
+
+public:
+ PostingListEntry(datastore::EntryRef features)
+ : _features(features)
+ {
+ }
+
+ PostingListEntry()
+ : _features()
+ {
+ }
+ datastore::EntryRef get_features() const { return _features; }
+
+ // Reference moved data (used when compacting FeatureStore)
+ void update_features(datastore::EntryRef features) const { _features = features; }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp
index 54c0aa866b4..f5300430bea 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp
@@ -261,13 +261,13 @@ FakeMemTreeOccMgr::flush()
lastWord = wordIdx;
if (i->getRemove()) {
if (itr.valid() && itr.getKey() == docId) {
- uint64_t bits = _featureStore.bitSize(fw->getPackedIndex(), EntryRef(itr.getData()));
+ uint64_t bits = _featureStore.bitSize(fw->getPackedIndex(), EntryRef(itr.getData().get_features()));
_featureSizes[wordIdx] -= RefType::align((bits + 7) / 8) * 8;
tree.remove(itr);
}
} else {
if (!itr.valid() || docId < itr.getKey()) {
- tree.insert(itr, docId, i->getFeatureRef().ref());
+ tree.insert(itr, docId, i->getFeatureRef());
}
}
}