diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-06-04 16:57:05 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-04 16:57:05 +0200 |
commit | 11da9c85cc5d44cf0a36baa4ce7c471d58fba5b3 (patch) | |
tree | 3117667fd3b0087ee0737e2a381d374045af7ec1 | |
parent | e7e8208b6b180454d62d47c22d3250e2dfc8b8de (diff) | |
parent | 6c537290ad1df8469b54dee80079542c171fac1a (diff) |
Merge pull request #18114 from vespa-engine/balder/explicit-erase-when-capacity-is-low
Use explicit erase to avoid clearing and resizing the hashtable when …
18 files changed, 190 insertions, 153 deletions
diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp index af1fcea2e21..79e120d0683 100644 --- a/searchlib/src/tests/attribute/attribute_test.cpp +++ b/searchlib/src/tests/attribute/attribute_test.cpp @@ -1670,8 +1670,8 @@ AttributeTest::testStatus() AttributePtr ptr = createAttribute("as", cfg); addDocs(ptr, numDocs); auto & sa = *(static_cast<StringAttribute *>(ptr.get())); - const size_t numUniq(16); - const size_t numValuesPerDoc(16); + const size_t numValuesPerDoc(values.size()); + const size_t numUniq(numValuesPerDoc); for (uint32_t i = 0; i < numDocs; ++i) { EXPECT_TRUE(appendToVector(sa, i, numValuesPerDoc, values)); } @@ -1680,11 +1680,11 @@ AttributeTest::testStatus() EXPECT_EQUAL(ptr->getStatus().getNumValues(), numDocs*numValuesPerDoc); EXPECT_EQUAL(ptr->getStatus().getNumUniqueValues(), numUniq); size_t expUsed = 0; - expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree - expUsed += numUniq * 32; // enum store (16 unique values, 32 bytes per entry) + expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // Approximate enum store tree + expUsed += 272; // TODO Approximate... enum store (16 unique values, 17 bytes per entry) // multi value mapping (numdocs * sizeof(MappingIndex) + numvalues * sizeof(EnumIndex) + - // numdocs * sizeof(Array<EnumIndex>) (due to vector vector)) - expUsed += numDocs * sizeof(vespalib::datastore::EntryRef) + numDocs * numValuesPerDoc * sizeof(IEnumStore::Index) + ((numValuesPerDoc > 1024) ? numDocs * NestedVectorSize : 0); + // 32 + numdocs * sizeof(Array<EnumIndex>) (due to vector vector)) + expUsed += 32 + numDocs * sizeof(vespalib::datastore::EntryRef) + numDocs * numValuesPerDoc * sizeof(IEnumStore::Index) + ((numValuesPerDoc > 1024) ? numDocs * NestedVectorSize : 0); EXPECT_GREATER_EQUAL(ptr->getStatus().getUsed(), expUsed); EXPECT_GREATER_EQUAL(ptr->getStatus().getAllocated(), expUsed); } diff --git a/searchlib/src/tests/attribute/changevector/changevector_test.cpp b/searchlib/src/tests/attribute/changevector/changevector_test.cpp index ad33774e904..93a23630407 100644 --- a/searchlib/src/tests/attribute/changevector/changevector_test.cpp +++ b/searchlib/src/tests/attribute/changevector/changevector_test.cpp @@ -2,17 +2,27 @@ #include <vespa/vespalib/testkit/testapp.h> #include <vespa/searchlib/attribute/changevector.hpp> - +#include <vespa/vespalib/stllike/hash_set.h> using namespace search; +using Change = ChangeTemplate<NumericChangeData<long>>; +using CV = ChangeVectorT<Change>; + template <typename T> void verifyStrictOrdering(const T & v) { - long count(0); - for (const auto & c : v) { - count++; - EXPECT_EQUAL(count, c._data.get()); + vespalib::hash_set<uint32_t> complete; + uint32_t prev_doc(0); + uint32_t prev_value(0); + for (const auto & c : v.getDocIdInsertOrder()) { + if (prev_doc != c._doc) { + complete.insert(prev_doc); + EXPECT_FALSE(complete.contains(c._doc)); + prev_doc = c._doc; + } else { + EXPECT_GREATER(c._data, prev_value); + } + prev_value = c._data; } - EXPECT_EQUAL(v.size(), size_t(count)); } class Accessor { @@ -30,8 +40,6 @@ private: TEST("require insert ordering is preserved for same doc") { - typedef ChangeTemplate<NumericChangeData<long>> Change; - typedef ChangeVectorT<Change> CV; CV a; a.push_back(Change(Change::NOOP, 7, 1)); EXPECT_EQUAL(1u, a.size()); @@ -42,8 +50,6 @@ TEST("require insert ordering is preserved for same doc") TEST("require insert ordering is preserved ") { - typedef ChangeTemplate<NumericChangeData<long>> Change; - typedef ChangeVectorT<Change> CV; CV a; a.push_back(Change(Change::NOOP, 7, 1)); EXPECT_EQUAL(1u, a.size()); @@ -56,8 +62,6 @@ TEST("require insert ordering is preserved ") TEST("require insert ordering is preserved with mix") { - typedef ChangeTemplate<NumericChangeData<long>> Change; - typedef ChangeVectorT<Change> CV; CV a; a.push_back(Change(Change::NOOP, 7, 1)); EXPECT_EQUAL(1u, a.size()); @@ -77,8 +81,6 @@ TEST("require insert ordering is preserved with mix") } TEST("require that inserting empty vector does not affect the vector.") { - typedef ChangeTemplate<NumericChangeData<long>> Change; - typedef ChangeVectorT<Change> CV; CV a; std::vector<long> v; Accessor ac(v); @@ -86,4 +88,42 @@ TEST("require that inserting empty vector does not affect the vector.") { EXPECT_EQUAL(0u, a.size()); } +TEST("require that we have control over buffer construction size") { + CV a; + EXPECT_EQUAL(0u, a.size()); + EXPECT_EQUAL(256u, a.capacity()); + a.clear(); + EXPECT_EQUAL(0u, a.size()); + EXPECT_EQUAL(256u, a.capacity()); +} + +TEST("require that buffer can grow some") { + CV a; + for (size_t i(0); i < 1024; i++) { + a.push_back(Change(Change::NOOP, i, i)); + } + EXPECT_EQUAL(1024u, a.size()); + EXPECT_EQUAL(1024u, a.capacity()); + a.clear(); + EXPECT_EQUAL(0u, a.size()); + EXPECT_EQUAL(1024u, a.capacity()); +} + +TEST("require that buffer can grow some, but not unbound") { + CV a; + for (size_t i(0); i < 1025; i++) { + a.push_back(Change(Change::NOOP, i, i)); + } + EXPECT_EQUAL(1025u, a.size()); + EXPECT_EQUAL(2048u, a.capacity()); + a.clear(); + EXPECT_EQUAL(0u, a.size()); + EXPECT_EQUAL(256u, a.capacity()); +} + +TEST("Control Change size") { + EXPECT_EQUAL(32u, sizeof(ChangeTemplate<NumericChangeData<long>>)); + EXPECT_EQUAL(88u, sizeof(ChangeTemplate<StringChangeData>)); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.hpp b/searchlib/src/vespa/searchlib/attribute/attributevector.hpp index efc96bc57c2..616096e9091 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.hpp +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.hpp @@ -89,40 +89,36 @@ AttributeVector::adjustWeight(ChangeVectorT< ChangeTemplate<T> >& changes, DocId template<typename T> bool -AttributeVector::applyArithmetic(ChangeVectorT< ChangeTemplate<T> > & changes, DocId doc, const T & v, +AttributeVector::applyArithmetic(ChangeVectorT< ChangeTemplate<T> > & changes, DocId doc, const T &, const ArithmeticValueUpdate & arithm) { - (void) v; - bool retval(!hasMultiValue() && (doc < getNumDocs())); - if (retval) { - size_t oldSz(changes.size()); - ArithmeticValueUpdate::Operator op(arithm.getOperator()); - double aop = arithm.getOperand(); - if (op == ArithmeticValueUpdate::Add) { - changes.push_back(ChangeTemplate<T>(ChangeBase::ADD, doc, 0, 0)); - } else if (op == ArithmeticValueUpdate::Sub) { - changes.push_back(ChangeTemplate<T>(ChangeBase::SUB, doc, 0, 0)); - } else if (op == ArithmeticValueUpdate::Mul) { - changes.push_back(ChangeTemplate<T>(ChangeBase::MUL, doc, 0, 0)); - } else if (op == ArithmeticValueUpdate::Div) { - if (this->getClass().inherits(IntegerAttribute::classId) && aop == 0) { - divideByZeroWarning(); - } else { - changes.push_back(ChangeTemplate<T>(ChangeBase::DIV, doc, 0, 0)); - } + if (hasMultiValue() || (doc >= getNumDocs())) return false; + + size_t oldSz(changes.size()); + ArithmeticValueUpdate::Operator op(arithm.getOperator()); + double aop = arithm.getOperand(); + if (op == ArithmeticValueUpdate::Add) { + changes.push_back(ChangeTemplate<T>(ChangeBase::ADD, doc, 0, 0)); + } else if (op == ArithmeticValueUpdate::Sub) { + changes.push_back(ChangeTemplate<T>(ChangeBase::SUB, doc, 0, 0)); + } else if (op == ArithmeticValueUpdate::Mul) { + changes.push_back(ChangeTemplate<T>(ChangeBase::MUL, doc, 0, 0)); + } else if (op == ArithmeticValueUpdate::Div) { + if (this->getClass().inherits(IntegerAttribute::classId) && aop == 0) { + divideByZeroWarning(); } else { - retval = false; - } - if (retval) { - const size_t diff = changes.size() - oldSz; - _status.incNonIdempotentUpdates(diff); - _status.incUpdates(diff); - if (diff > 0) { - changes.back()._arithOperand = aop; - } + changes.push_back(ChangeTemplate<T>(ChangeBase::DIV, doc, 0, 0)); } + } else { + return false; } - return retval; + const size_t diff = changes.size() - oldSz; + _status.incNonIdempotentUpdates(diff); + _status.incUpdates(diff); + if (diff > 0) { + changes.back()._arithOperand = aop; + } + return true; } template<typename T> diff --git a/searchlib/src/vespa/searchlib/attribute/changevector.h b/searchlib/src/vespa/searchlib/attribute/changevector.h index c3b4d0ce3b0..d63ef2e2b35 100644 --- a/searchlib/src/vespa/searchlib/attribute/changevector.h +++ b/searchlib/src/vespa/searchlib/attribute/changevector.h @@ -2,8 +2,8 @@ #pragma once -#include <vespa/vespalib/stllike/hash_map.h> #include <vespa/searchcommon/common/undefinedvalues.h> +#include <vespa/vespalib/stllike/allocator.h> #include <vector> namespace vespalib { class MemoryUsage; } @@ -26,11 +26,10 @@ struct ChangeBase { DIV, CLEARDOC }; - enum {TAIL=0, UNSET_ENUM = 0xffffffffu}; + enum {UNSET_ENUM = 0xffffffffu}; ChangeBase() : _type(NOOP), - _next(TAIL), _doc(0), _weight(1), _enumScratchPad(UNSET_ENUM), @@ -39,7 +38,6 @@ struct ChangeBase { ChangeBase(Type type, uint32_t d, int32_t w = 1) : _type(type), - _next(TAIL), _doc(d), _weight(w), _enumScratchPad(UNSET_ENUM), @@ -48,18 +46,11 @@ struct ChangeBase { int cmp(const ChangeBase &b) const { int diff(_doc - b._doc); return diff; } bool operator <(const ChangeBase & b) const { return cmp(b) < 0; } - bool isAtEnd() const { return _next == TAIL; } - uint32_t getNext() const { return _next; } - void setNext(uint32_t next) { _next = next; } uint32_t getEnum() const { return _enumScratchPad; } void setEnum(uint32_t value) const { _enumScratchPad = value; } bool isEnumValid() const { return _enumScratchPad != UNSET_ENUM; } - void invalidateEnum() const { _enumScratchPad = UNSET_ENUM; } Type _type; -private: - uint32_t _next; -public: uint32_t _doc; int32_t _weight; mutable uint32_t _enumScratchPad; @@ -108,7 +99,7 @@ struct ChangeTemplate : public ChangeBase { ChangeBase(type, d, w), _data(v) { } - T _data; + T _data; }; template <> @@ -131,54 +122,66 @@ NumericChangeData<double>::operator<(const NumericChangeData<double> &rhs) const return _v < rhs._v; } -class ChangeVectorBase { -protected: -}; - /** - * Maintains a list of changes where changes to the same docid are adjacent, but ordered by insertion order. - * Apart from that no ordering by docid. + * Maintains a list of changes. + * You can select to view the in insert order, + * or unordered, but changes to the same docid are adjacent and ordered by insertion order. */ template <typename T> -class ChangeVectorT : public ChangeVectorBase { +class ChangeVectorT { private: - using Map = vespalib::hash_map<uint32_t, uint32_t>; - using Vector = std::vector<T>; + using Vector = std::vector<T, vespalib::allocator_large<T>>; public: + using const_iterator = typename Vector::const_iterator; ChangeVectorT(); ~ChangeVectorT(); - class const_iterator { - public: - const_iterator(const Vector & vector, uint32_t next) : _v(&vector), _next(next) { } - bool operator == (const const_iterator & rhs) const { return _v == rhs._v && _next == rhs._next; } - bool operator != (const const_iterator & rhs) const { return _v != rhs._v || _next != rhs._next; } - const_iterator& operator++() { advance(); return *this; } - const_iterator operator++(int) { const_iterator other(*this); advance(); return other; } - const T & operator * () const { return v()[_next]; } - const T * operator -> () const { return &v()[_next]; } - private: - void advance() { _next = v()[_next].getNext(); } - const Vector & v() const { return *_v; } - const Vector * _v; - uint32_t _next; - }; - void push_back(const T & c); template <typename Accessor> void push_back(uint32_t doc, Accessor & ac); - const T & back() const { return _v.back(); } T & back() { return _v.back(); } size_t size() const { return _v.size(); } + size_t capacity() const { return _v.capacity(); } bool empty() const { return _v.empty(); } void clear(); - const_iterator begin() const { return const_iterator(_v, 0); } - const_iterator end() const { return const_iterator(_v, size()); } + class InsertOrder { + public: + InsertOrder(const Vector & v) : _v(v) { } + const_iterator begin() const { return _v.begin(); } + const_iterator end() const { return _v.end(); } + private: + const Vector &_v; + }; + class DocIdInsertOrder { + using AdjacentDocIds = std::vector<uint64_t, vespalib::allocator_large<uint64_t>>; + public: + class const_iterator { + public: + const_iterator(const Vector & vector, const AdjacentDocIds & order, uint32_t cur) + : _v(&vector), _o(&order), _cur(cur) { } + bool operator == (const const_iterator & rhs) const { return _v == rhs._v && _cur == rhs._cur; } + bool operator != (const const_iterator & rhs) const { return _v != rhs._v || _cur != rhs._cur; } + const_iterator& operator++() { _cur++; return *this; } + const_iterator operator++(int) { const_iterator other(*this); _cur++; return other; } + const T & operator * () const { return v(); } + const T * operator -> () const { return &v(); } + private: + const T & v() const { return (*_v)[(*_o)[_cur] & 0xffffffff]; } + const Vector * _v; + const AdjacentDocIds * _o; + uint32_t _cur; + }; + DocIdInsertOrder(const Vector & v); + const_iterator begin() const { return const_iterator(_v, _adjacent, 0); } + const_iterator end() const { return const_iterator(_v, _adjacent, _v.size()); } + private: + const Vector &_v; + AdjacentDocIds _adjacent; + }; + InsertOrder getInsertOrder() const { return InsertOrder(_v); } + DocIdInsertOrder getDocIdInsertOrder() const { return DocIdInsertOrder(_v); } vespalib::MemoryUsage getMemoryUsage() const; private: - void linkIn(uint32_t doc, size_t index, size_t last); - Vector _v; - Map _docs; - uint32_t _tail; + Vector _v; }; } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/changevector.hpp b/searchlib/src/vespa/searchlib/attribute/changevector.hpp index dcb31ebae73..5052f4b9a10 100644 --- a/searchlib/src/vespa/searchlib/attribute/changevector.hpp +++ b/searchlib/src/vespa/searchlib/attribute/changevector.hpp @@ -4,9 +4,12 @@ #include "changevector.h" #include <vespa/vespalib/util/memoryusage.h> +#include <vespa/vespalib/util/alloc.h> namespace search { +using vespalib::roundUp2inN; + namespace { // This number is selected to be large enough to hold bursts between commits @@ -16,11 +19,9 @@ constexpr size_t NUM_ELEMS_TO_RESERVE = 200; template <typename T> ChangeVectorT<T>::ChangeVectorT() - : _v(), - _docs(NUM_ELEMS_TO_RESERVE*2), - _tail(0) + : _v() { - _v.reserve(vespalib::roundUp2inN(NUM_ELEMS_TO_RESERVE, sizeof(T))); + _v.reserve(roundUp2inN<T>(NUM_ELEMS_TO_RESERVE)); } template <typename T> @@ -29,17 +30,21 @@ ChangeVectorT<T>::~ChangeVectorT() = default; template <typename T> void ChangeVectorT<T>::clear() { - _v.clear(); - _docs.clear(); + if (_v.capacity() > roundUp2inN<T>(NUM_ELEMS_TO_RESERVE * 5)) { + // Ensure we do not keep insanely large buffers over time, due to abnormal peaks + // caused by hickups else where. + _v = Vector(); + _v.reserve(roundUp2inN<T>(NUM_ELEMS_TO_RESERVE)); + } else { + _v.clear(); + } } template <typename T> void ChangeVectorT<T>::push_back(const T & c) { - size_t index(size()); _v.push_back(c); - linkIn(c._doc, index, index); } template <typename T> @@ -49,48 +54,32 @@ ChangeVectorT<T>::push_back(uint32_t doc, Accessor & ac) { if (ac.size() <= 0) { return; } - size_t index(size()); - _v.reserve(vespalib::roundUp2inN(index + ac.size(), sizeof(T))); + _v.reserve(roundUp2inN<T>(size() + ac.size())); for (size_t i(0), m(ac.size()); i < m; i++, ac.next()) { _v.push_back(T(ChangeBase::APPEND, doc, typename T::DataType(ac.value()), ac.weight())); - _v.back().setNext(index + i + 1); } - linkIn(doc, index, size() - 1); } template <typename T> -void -ChangeVectorT<T>::linkIn(uint32_t doc, size_t first, size_t last) +vespalib::MemoryUsage +ChangeVectorT<T>::getMemoryUsage() const { - if (first != 0 && (_v[_tail]._doc == doc)) { - _v[_tail].setNext(first); - _tail = last; - } else { - Map::iterator found(_docs.find(doc)); - if (found == _docs.end()) { - _docs[doc] = last; - if (_tail != first) { - _v[_tail].setNext(first); - } - _tail = last; - } else { - uint32_t prev(found->second); - for (; _v[_v[prev].getNext()]._doc == doc; prev = _v[prev].getNext()); - _v[last].setNext(_v[prev].getNext()); - _v[prev].setNext(first); - found->second = last; - } - } - _v[_tail].setNext(size()); + size_t usedBytes = _v.size() * sizeof(T); + size_t allocBytes = _v.capacity() * sizeof(T); + return vespalib::MemoryUsage(allocBytes, usedBytes, 0, 0); } template <typename T> -vespalib::MemoryUsage -ChangeVectorT<T>::getMemoryUsage() const +ChangeVectorT<T>::DocIdInsertOrder::DocIdInsertOrder(const Vector & v) + : _v(v), + _adjacent() { - size_t usedBytes = _v.size() * sizeof(T) + _docs.getMemoryUsed(); - size_t allocBytes = _v.capacity() * sizeof(T) + _docs.getMemoryConsumption(); - return vespalib::MemoryUsage(allocBytes, usedBytes, 0, 0); + _adjacent.reserve(v.size()); + uint32_t index(0); + for (const auto & c : _v) { + _adjacent.push_back((uint64_t(c._doc) << 32) | index++); + } + std::sort(_adjacent.begin(), _adjacent.end()); } } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp index fd576b3a9ba..164bb411061 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp @@ -63,7 +63,7 @@ void EnumAttribute<B>::insertNewUniqueValues(EnumStoreBatchUpdater& updater) { // find and insert new unique strings - for (const auto & data : this->_changes) { + for (const auto & data : this->_changes.getInsertOrder()) { considerAttributeChange(data, updater); } } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 90bcf92a103..9885613f4e3 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -207,7 +207,7 @@ template <typename EntryT> vespalib::MemoryUsage EnumStoreT<EntryT>::update_stat() { - auto &store = _store.get_allocator().get_data_store(); + auto &store = _store.get_data_store(); _cached_values_memory_usage = store.getMemoryUsage(); _cached_values_address_space_usage = store.getAddressSpaceUsage(); _cached_dictionary_btree_usage = _dict->get_btree_memory_usage(); diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index 8475451ba60..072398abcf9 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -25,7 +25,7 @@ template <typename B, typename M> bool MultiValueEnumAttribute<B, M>::extractChangeData(const Change & c, EnumIndex & idx) { - if (c._enumScratchPad == Change::UNSET_ENUM) { + if ( ! c.isEnumValid() ) { return this->_enumStore.find_index(c._data.raw(), idx); } idx = EnumIndex(vespalib::datastore::EntryRef(c._enumScratchPad)); diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h index 66ca6bd2eac..d36777a25a9 100644 --- a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h @@ -20,7 +20,6 @@ protected: typedef typename B::DocId DocId; typedef typename B::Change Change; typedef typename B::ChangeVector ChangeVector; - typedef typename B::ChangeVector::const_iterator ChangeVectorIterator; using MultiValueType = M; using MultiValueMapping = attribute::MultiValueMapping<MultiValueType>; diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp index 0cd2e0bbc27..2e73909ea1e 100644 --- a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp @@ -78,11 +78,12 @@ void MultiValueAttribute<B, M>::apply_attribute_changes_to_array(DocumentValues& docValues) { // compute new values for each document with changes - for (ChangeVectorIterator current(this->_changes.begin()), end(this->_changes.end()); (current != end); ) { + auto iterable = this->_changes.getDocIdInsertOrder(); + for (auto current(iterable.begin()), end(iterable.end()); (current != end); ) { DocId doc = current->_doc; // find last clear doc - ChangeVectorIterator last_clear_doc = end; - for (ChangeVectorIterator iter = current; (iter != end) && (iter->_doc == doc); ++iter) { + auto last_clear_doc = end; + for (auto iter = current; (iter != end) && (iter->_doc == doc); ++iter) { if (iter->_type == ChangeBase::CLEARDOC) { last_clear_doc = iter; } @@ -137,12 +138,13 @@ void MultiValueAttribute<B, M>::apply_attribute_changes_to_wset(DocumentValues& docValues) { // compute new values for each document with changes - for (ChangeVectorIterator current(this->_changes.begin()), end(this->_changes.end()); (current != end); ) { + auto iterable = this->_changes.getDocIdInsertOrder(); + for (auto current(iterable.begin()), end(iterable.end()); (current != end); ) { const DocId doc = current->_doc; // find last clear doc - ChangeVectorIterator last_clear_doc = end; + auto last_clear_doc = end; size_t max_elems_inserted = 0; - for (ChangeVectorIterator iter = current; (iter != end) && (iter->_doc == doc); ++iter) { + for (auto iter = current; (iter != end) && (iter->_doc == doc); ++iter) { if (iter->_type == ChangeBase::CLEARDOC) { last_clear_doc = iter; } diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp index 2b40150f87b..6c8edea13cf 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp @@ -59,7 +59,7 @@ SingleBoolAttribute::onCommit() { if ( ! _changes.empty()) { // apply updates ValueModifier valueGuard(getValueModifier()); - for (const auto & change : _changes) { + for (const auto & change : _changes.getInsertOrder()) { if (change._type == ChangeBase::UPDATE) { std::atomic_thread_fence(std::memory_order_release); setBit(change._doc, change._data != 0); diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index b39bdeb3b00..bf75400b157 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -175,7 +175,7 @@ void SingleValueEnumAttribute<B>::applyValueChanges(EnumStoreBatchUpdater& updater) { ValueModifier valueGuard(this->getValueModifier()); - for (const auto& change : this->_changes) { + for (const auto& change : this->_changes.getInsertOrder()) { if (change._type == ChangeBase::UPDATE) { applyUpdateValueChange(change, updater); } else if (change._type >= ChangeBase::ADD && change._type <= ChangeBase::DIV) { diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp index fd913f34c3a..671bdc44e22 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp @@ -37,7 +37,7 @@ SingleValueNumericAttribute<B>::onCommit() { // apply updates typename B::ValueModifier valueGuard(this->getValueModifier()); - for (const auto & change : this->_changes) { + for (const auto & change : this->_changes.getInsertOrder()) { if (change._type == ChangeBase::UPDATE) { std::atomic_thread_fence(std::memory_order_release); _data[change._doc] = change._data; diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp index f5ab855565c..e1c2a817af7 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp @@ -84,7 +84,7 @@ SingleValueNumericPostingAttribute<B>::applyValueChanges(EnumStoreBatchUpdater& // used to make sure several arithmetic operations on the same document in a single commit works std::map<DocId, EnumIndex> currEnumIndices; - for (const auto& change : this->_changes) { + for (const auto& change : this->_changes.getInsertOrder()) { auto enumIter = currEnumIndices.find(change._doc); EnumIndex oldIdx; if (enumIter != currEnumIndices.end()) { diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp index f1d0da42165..8d460b5c661 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp @@ -53,7 +53,7 @@ SingleValueSmallNumericAttribute::onCommit() { // apply updates B::ValueModifier valueGuard(getValueModifier()); - for (const auto & change : _changes) { + for (const auto & change : _changes.getInsertOrder()) { if (change._type == ChangeBase::UPDATE) { std::atomic_thread_fence(std::memory_order_release); set(change._doc, change._data); diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp index 39ad8d71021..4432acf2c55 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp @@ -85,7 +85,7 @@ SingleValueStringPostingAttributeT<B>::applyValueChanges(EnumStoreBatchUpdater& // used to make sure several arithmetic operations on the same document in a single commit works std::map<DocId, EnumIndex> currEnumIndices; - for (const auto& change : this->_changes) { + for (const auto& change : this->_changes.getInsertOrder()) { auto enumIter = currEnumIndices.find(change._doc); EnumIndex oldIdx; if (enumIter != currEnumIndices.end()) { diff --git a/vespalib/src/vespa/vespalib/util/alloc.h b/vespalib/src/vespa/vespalib/util/alloc.h index f608a244035..a97e8c9f25e 100644 --- a/vespalib/src/vespa/vespalib/util/alloc.h +++ b/vespalib/src/vespa/vespalib/util/alloc.h @@ -102,13 +102,21 @@ private: namespace vespalib { /// Rounds up to the closest number that is a power of 2 -inline size_t roundUp2inN(size_t minimum) { +inline size_t +roundUp2inN(size_t minimum) { return 2ul << Optimized::msbIdx(minimum - 1); } /// Rounds minElems up to the closest number where minElems*elemSize is a power of 2 -inline size_t roundUp2inN(size_t minElems, size_t elemSize) { +inline size_t +roundUp2inN(size_t minElems, size_t elemSize) { return roundUp2inN(minElems * elemSize)/elemSize; } +template <typename T> +size_t +roundUp2inN(size_t elems) { + return roundUp2inN(elems, sizeof(T)); +} + } diff --git a/vespalib/src/vespa/vespalib/util/optimized.h b/vespalib/src/vespa/vespalib/util/optimized.h index 92cf1f0ca24..6c6d1b12a71 100644 --- a/vespalib/src/vespa/vespalib/util/optimized.h +++ b/vespalib/src/vespa/vespalib/util/optimized.h @@ -22,9 +22,9 @@ public: static int lsbIdx(unsigned int v); static int lsbIdx(unsigned long v); static int lsbIdx(unsigned long long v); - static int popCount(unsigned int v) { return __builtin_popcount(v); } - static int popCount(unsigned long v) { return __builtin_popcountl(v); } - static int popCount(unsigned long long v) { return __builtin_popcountll(v); } + static constexpr int popCount(unsigned int v) { return __builtin_popcount(v); } + static constexpr int popCount(unsigned long v) { return __builtin_popcountl(v); } + static constexpr int popCount(unsigned long long v) { return __builtin_popcountll(v); } }; /** |