diff options
author | Tor Egge <Tor.Egge@verizonmedia.com> | 2019-06-14 12:24:50 +0000 |
---|---|---|
committer | Tor Egge <Tor.Egge@verizonmedia.com> | 2019-06-14 12:50:11 +0000 |
commit | 3ddea2a1c8a5d0da53b97c021c48f878b64c7810 (patch) | |
tree | 186130cb849876ce9f968c62df6088eb570f7480 | |
parent | 2c3e1a1ab6000096e8494f2eff7a3c8af31a6d9e (diff) |
Change posting list for memory index to have PostingListEntry as value.
7 files changed, 106 insertions, 18 deletions
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index 05c905cdc32..0f1c966ad5d 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -171,7 +171,7 @@ assertPostingList(const std::string &exp, uint32_t docId = itr.getKey(); ss << docId; if (store != nullptr) { // consider features as well - EntryRef ref(itr.getData()); + EntryRef ref(itr.getData().get_features()); store->setupForField(0, decoder); store->setupForUnpackFeatures(ref, decoder); decoder.unpackFeatures(matchData, docId); diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index e2e1c99a9b9..8daf87e1899 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -127,36 +127,35 @@ FieldIndex::compactFeatures() const PostingList *tree = _postingListStore.getTreeEntry(pidx); auto pitr = tree->begin(_postingListStore.getAllocator()); for (; pitr.valid(); ++pitr) { - EntryRef oldFeatures(pitr.getData()); + const PostingListEntry &posting_entry(pitr.getData()); // Filter on which buffers to move features from when // performing incremental compaction. - EntryRef newFeatures = _featureStore.moveFeatures(packedIndex, oldFeatures); + EntryRef newFeatures = _featureStore.moveFeatures(packedIndex, posting_entry.get_features()); // Features must be written before reference is updated. std::atomic_thread_fence(std::memory_order_release); - // Ugly, ugly due to const_cast in iterator - pitr.writeData(newFeatures.ref()); + // Reference the moved data + posting_entry.update_features(newFeatures); } } else { const PostingListKeyDataType *shortArray = _postingListStore.getKeyDataEntry(pidx, clusterSize); const PostingListKeyDataType *ite = shortArray + clusterSize; for (const PostingListKeyDataType *it = shortArray; it < ite; ++it) { - EntryRef oldFeatures(it->getData()); + const PostingListEntry &posting_entry(it->getData()); // Filter on which buffers to move features from when // performing incremental compaction. - EntryRef newFeatures = _featureStore.moveFeatures(packedIndex, oldFeatures); + EntryRef newFeatures = _featureStore.moveFeatures(packedIndex, posting_entry.get_features()); // Features must be written before reference is updated. std::atomic_thread_fence(std::memory_order_release); - // Ugly, ugly due to const_cast, but new data is - // semantically equal to old data - const_cast<PostingListKeyDataType *>(it)->setData(newFeatures.ref()); + // Reference the moved data + posting_entry.update_features(newFeatures); } } } @@ -189,7 +188,7 @@ FieldIndex::dump(search::index::IndexBuilder & indexBuilder) assert(pitr.valid()); for (; pitr.valid(); ++pitr) { uint32_t docId = pitr.getKey(); - EntryRef featureRef(pitr.getData()); + EntryRef featureRef(pitr.getData().get_features()); _featureStore.setupForReadFeatures(featureRef, decoder); decoder.readFeatures(features); features.set_doc_id(docId); @@ -202,7 +201,7 @@ FieldIndex::dump(search::index::IndexBuilder & indexBuilder) const PostingListKeyDataType *kde = kd + clusterSize; for (; kd != kde; ++kd) { uint32_t docId = kd->_key; - EntryRef featureRef(kd->getData()); + EntryRef featureRef(kd->getData().get_features()); _featureStore.setupForReadFeatures(featureRef, decoder); decoder.readFeatures(features); features.set_doc_id(docId); diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h index dba57f553b5..d5df2fa49c8 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h @@ -5,6 +5,7 @@ #include "feature_store.h" #include "field_index_remover.h" #include "word_store.h" +#include "posting_list_entry.h" #include <vespa/searchlib/index/docidandfeatures.h> #include <vespa/searchlib/index/field_length_calculator.h> #include <vespa/searchlib/index/indexbuilder.h> @@ -35,8 +36,8 @@ class OrderedFieldIndexInserter; class FieldIndex { public: // Mapping from docid -> feature ref - using PostingList = btree::BTreeRoot<uint32_t, uint32_t, search::btree::NoAggregated>; - using PostingListStore = btree::BTreeStore<uint32_t, uint32_t, + using PostingList = btree::BTreeRoot<uint32_t, PostingListEntry, search::btree::NoAggregated>; + using PostingListStore = btree::BTreeStore<uint32_t, PostingListEntry, search::btree::NoAggregated, std::less<uint32_t>, btree::BTreeDefaultTraits>; diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp index 637a13d67be..1d38e88b747 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp @@ -111,7 +111,7 @@ OrderedFieldIndexInserter::add(uint32_t docId, assert(_prevDocId == noDocId || _prevDocId < docId || (_prevDocId == docId && !_prevAdd)); datastore::EntryRef featureRef = _fieldIndex.addFeatures(features); - _adds.push_back(PostingListKeyDataType(docId, featureRef.ref())); + _adds.push_back(PostingListKeyDataType(docId, featureRef)); _listener.insert(_dItr.getKey()._wordRef, docId); _prevDocId = docId; _prevAdd = true; diff --git a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp index 63040aab66f..290aa16dfe4 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp @@ -6,6 +6,7 @@ #include <vespa/vespalib/btree/btreenodeallocator.hpp> #include <vespa/vespalib/btree/btreenodestore.hpp> #include <vespa/vespalib/btree/btreeroot.hpp> +#include <vespa/vespalib/btree/btreestore.hpp> #include <vespa/log/log.h> LOG_SETUP(".searchlib.memoryindex.posting_iterator"); @@ -62,7 +63,7 @@ PostingIterator::doUnpack(uint32_t docId) assert(docId == getDocId()); assert(_itr.valid()); assert(docId == _itr.getKey()); - datastore::EntryRef featureRef(_itr.getData()); + datastore::EntryRef featureRef(_itr.getData().get_features()); _featureStore.setupForUnpackFeatures(featureRef, _featureDecoder); _featureDecoder.unpackFeatures(_matchData, docId); setUnpacked(); @@ -70,3 +71,59 @@ PostingIterator::doUnpack(uint32_t docId) } +namespace search::btree { + +template class BTreeNodeTT<uint32_t, + search::memoryindex::PostingListEntry, + search::btree::NoAggregated, + BTreeDefaultTraits::INTERNAL_SLOTS>; + +template class BTreeLeafNode<uint32_t, + search::memoryindex::PostingListEntry, + search::btree::NoAggregated, + BTreeDefaultTraits::LEAF_SLOTS>; + +template class BTreeNodeStore<uint32_t, + search::memoryindex::PostingListEntry, + search::btree::NoAggregated, + BTreeDefaultTraits::INTERNAL_SLOTS, + BTreeDefaultTraits::LEAF_SLOTS>; + +template class BTreeIteratorBase<uint32_t, + search::memoryindex::PostingListEntry, + search::btree::NoAggregated, + BTreeDefaultTraits::INTERNAL_SLOTS, + BTreeDefaultTraits::LEAF_SLOTS, + BTreeDefaultTraits::PATH_SIZE>; + +template class BTreeIterator<uint32_t, + search::memoryindex::PostingListEntry, + search::btree::NoAggregated, + std::less<uint32_t>, + BTreeDefaultTraits>; + +template class BTree<uint32_t, + search::memoryindex::PostingListEntry, + search::btree::NoAggregated, + std::less<uint32_t>, + BTreeDefaultTraits>; + +template class BTreeRoot<uint32_t, + search::memoryindex::PostingListEntry, + search::btree::NoAggregated, + std::less<uint32_t>, + BTreeDefaultTraits>; + +template class BTreeRootBase<uint32_t, + search::memoryindex::PostingListEntry, + search::btree::NoAggregated, + BTreeDefaultTraits::INTERNAL_SLOTS, + BTreeDefaultTraits::LEAF_SLOTS>; + +template class BTreeNodeAllocator<uint32_t, + search::memoryindex::PostingListEntry, + search::btree::NoAggregated, + BTreeDefaultTraits::INTERNAL_SLOTS, + BTreeDefaultTraits::LEAF_SLOTS>; + +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h b/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h new file mode 100644 index 00000000000..af204c84bbb --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h @@ -0,0 +1,31 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# pragma once + +#include <vespa/vespalib/datastore/entryref.h> + +namespace search::memoryindex { + +/** + * Entry per document in memory index posting list. + */ +class PostingListEntry { + mutable datastore::EntryRef _features; // reference to compressed features + +public: + PostingListEntry(datastore::EntryRef features) + : _features(features) + { + } + + PostingListEntry() + : _features() + { + } + datastore::EntryRef get_features() const { return _features; } + + // Reference moved data (used when compacting FeatureStore) + void update_features(datastore::EntryRef features) const { _features = features; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp index 54c0aa866b4..f5300430bea 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp @@ -261,13 +261,13 @@ FakeMemTreeOccMgr::flush() lastWord = wordIdx; if (i->getRemove()) { if (itr.valid() && itr.getKey() == docId) { - uint64_t bits = _featureStore.bitSize(fw->getPackedIndex(), EntryRef(itr.getData())); + uint64_t bits = _featureStore.bitSize(fw->getPackedIndex(), EntryRef(itr.getData().get_features())); _featureSizes[wordIdx] -= RefType::align((bits + 7) / 8) * 8; tree.remove(itr); } } else { if (!itr.valid() || docId < itr.getKey()) { - tree.insert(itr, docId, i->getFeatureRef().ref()); + tree.insert(itr, docId, i->getFeatureRef()); } } } |