diff options
10 files changed, 74 insertions, 33 deletions
diff --git a/configdefinitions/src/vespa/attributes.def b/configdefinitions/src/vespa/attributes.def index 0fdb3b2489f..676c890d1a7 100644 --- a/configdefinitions/src/vespa/attributes.def +++ b/configdefinitions/src/vespa/attributes.def @@ -35,6 +35,9 @@ attribute[].tensortype string default="" # Whether this is an imported attribute (from parent document db) or not. attribute[].imported bool default=false +# Max amount of uncommitted memory during feed. (Default just shy of 128k) +attribute[].maxuncommittedmemory long default=130000 + # The distance metric to use for nearest neighbor search. # Is only used when the attribute is a 1-dimensional indexed tensor. attribute[].distancemetric enum { EUCLIDEAN, ANGULAR, GEODEGREES, INNERPRODUCT, HAMMING } default=EUCLIDEAN diff --git a/searchcommon/src/vespa/searchcommon/attribute/config.cpp b/searchcommon/src/vespa/searchcommon/attribute/config.cpp index aae565ff775..0a50faa04c0 100644 --- a/searchcommon/src/vespa/searchcommon/attribute/config.cpp +++ b/searchcommon/src/vespa/searchcommon/attribute/config.cpp @@ -4,25 +4,14 @@ namespace search::attribute { -Config::Config() noexcept : - _basicType(BasicType::NONE), - _type(CollectionType::SINGLE), - _fastSearch(false), - _huge(false), - _enableBitVectors(false), - _enableOnlyBitVector(false), - _isFilter(false), - _fastAccess(false), - _mutable(false), - _paged(false), - _match(Match::UNCASED), - _dictionary(), - _growStrategy(), - _compactionStrategy(), - _predicateParams(), - _tensorType(vespalib::eval::ValueType::error_type()), - _distance_metric(DistanceMetric::Euclidean), - _hnsw_index_params() +namespace { + +static constexpr uint64_t MAX_UNCOMMITTED_MEMORY = 8000; + +} + +Config::Config() noexcept + : Config(BasicType::NONE, CollectionType::SINGLE, false, false) { } @@ -37,6 +26,7 @@ Config::Config(BasicType bt, CollectionType ct, bool fastSearch_, bool huge_) no _fastAccess(false), _mutable(false), _paged(false), + _maxUnCommittedMemory(MAX_UNCOMMITTED_MEMORY), _match(Match::UNCASED), _dictionary(), _growStrategy(), @@ -67,6 +57,7 @@ Config::operator==(const Config &b) const _fastAccess == b._fastAccess && _mutable == b._mutable && _paged == b._paged && + _maxUnCommittedMemory == b._maxUnCommittedMemory && _match == b._match && _dictionary == b._dictionary && _growStrategy == b._growStrategy && diff --git a/searchcommon/src/vespa/searchcommon/attribute/config.h b/searchcommon/src/vespa/searchcommon/attribute/config.h index 9734124ff69..e6a428e5843 100644 --- a/searchcommon/src/vespa/searchcommon/attribute/config.h +++ b/searchcommon/src/vespa/searchcommon/attribute/config.h @@ -129,6 +129,9 @@ public: bool operator!=(const Config &b) const { return !(operator==(b)); } bool operator==(const Config &b) const; + uint64_t getMaxUnCommittedMemory() const { return _maxUnCommittedMemory; } + Config & setMaxUnCommittedMemory(uint64_t value) { _maxUnCommittedMemory = value; return *this; } + private: BasicType _basicType; CollectionType _type; @@ -140,6 +143,7 @@ private: bool _fastAccess; bool _mutable; bool _paged; + uint64_t _maxUnCommittedMemory; Match _match; DictionaryConfig _dictionary; GrowStrategy _growStrategy; diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp index 8069190ab17..26374ba1480 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp @@ -151,6 +151,7 @@ applyPutToAttribute(SerialNum serialNum, const FieldValue::UP &fieldValue, Docum } else { attr.clearDoc(lid); } + attr.commitIfChangeVectorTooLarge(); } void @@ -184,6 +185,7 @@ applyUpdateToAttribute(SerialNum serialNum, const FieldUpdate &fieldUpd, { ensureLidSpace(serialNum, lid, attr); AttributeUpdater::handleUpdate(attr, lid, fieldUpd); + attr.commitIfChangeVectorTooLarge(); } void diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp index 97813a2a65e..6ee97595b79 100644 --- a/searchlib/src/tests/attribute/attribute_test.cpp +++ b/searchlib/src/tests/attribute/attribute_test.cpp @@ -275,6 +275,7 @@ private: void testReaderDuringLastUpdate(); void testPendingCompaction(); + void testConditionalCommit(); public: AttributeTest(); @@ -402,13 +403,18 @@ void AttributeTest::populate(StringAttribute & v, unsigned seed) v.commit(); } -template <> -void AttributeTest::populateSimple(IntegerAttribute & v, uint32_t docIdLow, uint32_t docIdHigh) +void populateSimpleUncommitted(IntegerAttribute & v, uint32_t docIdLow, uint32_t docIdHigh) { - for(uint32_t docId(docIdLow); docId < docIdHigh; ++docId) { + for (uint32_t docId(docIdLow); docId < docIdHigh; ++docId) { v.clearDoc(docId); EXPECT_TRUE( v.update(docId, docId + 1) ); } +} + +template <> +void AttributeTest::populateSimple(IntegerAttribute & v, uint32_t docIdLow, uint32_t docIdHigh) +{ + populateSimpleUncommitted(v, docIdLow, docIdHigh); v.commit(); } @@ -2233,6 +2239,34 @@ AttributeTest::testPendingCompaction() populateSimple(iv, 1, 2); // should not trigger new compaction } +void +AttributeTest::testConditionalCommit() { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + cfg.setMaxUnCommittedMemory(70000); + AttributePtr v = createAttribute("sfsint32_cc", cfg); + addClearedDocs(v, 1000); + auto &iv = static_cast<IntegerAttribute &>(*v.get()); + EXPECT_EQUAL(0x8000u, iv.getChangeVectorMemoryUsage().allocatedBytes()); + EXPECT_EQUAL(0u, iv.getChangeVectorMemoryUsage().usedBytes()); + AttributeGuard guard1(v); + populateSimpleUncommitted(iv, 1, 3); + EXPECT_EQUAL(0x8000u, iv.getChangeVectorMemoryUsage().allocatedBytes()); + EXPECT_EQUAL(128u, iv.getChangeVectorMemoryUsage().usedBytes()); + populateSimpleUncommitted(iv, 1, 1000); + EXPECT_EQUAL(0x10000u, iv.getChangeVectorMemoryUsage().allocatedBytes()); + EXPECT_EQUAL(64064u, iv.getChangeVectorMemoryUsage().usedBytes()); + EXPECT_FALSE(v->commitIfChangeVectorTooLarge()); + EXPECT_EQUAL(0x10000u, iv.getChangeVectorMemoryUsage().allocatedBytes()); + EXPECT_EQUAL(64064u, iv.getChangeVectorMemoryUsage().usedBytes()); + populateSimpleUncommitted(iv, 1, 200); + EXPECT_EQUAL(0x20000u, iv.getChangeVectorMemoryUsage().allocatedBytes()); + EXPECT_EQUAL(76800u, iv.getChangeVectorMemoryUsage().usedBytes()); + EXPECT_TRUE(v->commitIfChangeVectorTooLarge()); + EXPECT_EQUAL(0x2000u, iv.getChangeVectorMemoryUsage().allocatedBytes()); + EXPECT_EQUAL(0u, iv.getChangeVectorMemoryUsage().usedBytes()); +} + void testNamePrefix() { Config cfg(BasicType::INT32, CollectionType::SINGLE); AttributeVector::SP vFlat = createAttribute("sfsint32_pc", cfg); @@ -2312,6 +2346,7 @@ int AttributeTest::Main() TEST_DO(requireThatAddressSpaceUsageIsReported()); testReaderDuringLastUpdate(); TEST_DO(testPendingCompaction()); + TEST_DO(testConditionalCommit()); TEST_DO(testNamePrefix()); test_multi_value_mapping_has_free_lists_enabled(); diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp index 15aec41934a..3bc1e5ec25f 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp @@ -756,6 +756,15 @@ AttributeVector::getChangeVectorMemoryUsage() const return vespalib::MemoryUsage(0, 0, 0, 0); } +bool +AttributeVector::commitIfChangeVectorTooLarge() { + bool needCommit = getChangeVectorMemoryUsage().usedBytes() > getConfig().getMaxUnCommittedMemory(); + if (needCommit) { + commit(false); + } + return needCommit; +} + void AttributeVector::logEnumStoreEvent(const char *reason, const char *stage) { diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index 61c86d53dcf..bb88b168474 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -359,8 +359,7 @@ protected: template <typename BaseType, typename ChangeData> static BaseType - applyArithmetic(const BaseType &value, - const ChangeTemplate<ChangeData> & arithmetic) + applyArithmetic(const BaseType &value, const ChangeTemplate<ChangeData> & arithmetic) { typedef typename ChangeData::DataType LargeType; if (attribute::isUndefined(value)) { @@ -371,12 +370,10 @@ protected: return value - static_cast<LargeType>(arithmetic._arithOperand); } else if (arithmetic._type == ChangeBase::MUL) { LargeType r; - return round((static_cast<double>(value) * - arithmetic._arithOperand), r); + return round((static_cast<double>(value) * arithmetic._arithOperand), r); } else if (arithmetic._type == ChangeBase::DIV) { LargeType r; - return round(static_cast<double>(value) / - arithmetic._arithOperand, r); + return round(static_cast<double>(value) / arithmetic._arithOperand, r); } return value; } @@ -669,6 +666,7 @@ public: static bool isEnumerated(const vespalib::GenericHeader &header); virtual vespalib::MemoryUsage getChangeVectorMemoryUsage() const; + bool commitIfChangeVectorTooLarge(); void drain_hold(uint64_t hold_limit); }; diff --git a/searchlib/src/vespa/searchlib/attribute/changevector.h b/searchlib/src/vespa/searchlib/attribute/changevector.h index d8abdfae92a..12ac77febb9 100644 --- a/searchlib/src/vespa/searchlib/attribute/changevector.h +++ b/searchlib/src/vespa/searchlib/attribute/changevector.h @@ -3,7 +3,6 @@ #pragma once #include <vespa/searchcommon/common/undefinedvalues.h> -#include <vespa/vespalib/stllike/allocator.h> #include <vector> namespace vespalib { class MemoryUsage; } @@ -130,7 +129,7 @@ NumericChangeData<double>::operator<(const NumericChangeData<double> &rhs) const template <typename T> class ChangeVectorT { private: - using Vector = std::vector<T, vespalib::allocator_large<T>>; + using Vector = std::vector<T>; public: using const_iterator = typename Vector::const_iterator; ChangeVectorT(); @@ -152,7 +151,7 @@ public: const Vector &_v; }; class DocIdInsertOrder { - using AdjacentDocIds = std::vector<uint64_t, vespalib::allocator_large<uint64_t>>; + using AdjacentDocIds = std::vector<uint64_t>; public: class const_iterator { public: diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp index 761543f4bad..f6e8266f858 100644 --- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp @@ -109,6 +109,7 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg) retval.setFastAccess(cfg.fastaccess); retval.setMutable(cfg.ismutable); retval.setPaged(cfg.paged); + retval.setMaxUnCommittedMemory(cfg.maxuncommittedmemory); predicateParams.setArity(cfg.arity); predicateParams.setBounds(cfg.lowerbound, cfg.upperbound); predicateParams.setDensePostingListThreshold(cfg.densepostinglistthreshold); diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.h b/searchlib/src/vespa/searchlib/attribute/integerbase.h index 234666d7845..c4a2035b200 100644 --- a/searchlib/src/vespa/searchlib/attribute/integerbase.h +++ b/searchlib/src/vespa/searchlib/attribute/integerbase.h @@ -30,13 +30,12 @@ public: bool applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust) override; bool applyWeight(DocId doc, const FieldValue& fv, const document::AssignValueUpdate& wAdjust) override; uint32_t clearDoc(DocId doc) override; + vespalib::MemoryUsage getChangeVectorMemoryUsage() const override; protected: IntegerAttribute(const vespalib::string & name, const Config & c); using Change = ChangeTemplate<NumericChangeData<largeint_t>>; using ChangeVector = ChangeVectorT<Change>; ChangeVector _changes; - - vespalib::MemoryUsage getChangeVectorMemoryUsage() const override; private: const char * getString(DocId doc, char * s, size_t sz) const override; uint32_t get(DocId doc, vespalib::string * v, uint32_t sz) const override; |