diff options
author | Geir Storli <geirst@yahooinc.com> | 2022-10-17 17:45:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-17 17:45:29 +0200 |
commit | 169187771dfc974f58238fc19db3b534c4b6c9f4 (patch) | |
tree | 08d8deb07b99b4ab3479f8166cf611e8aba4f098 | |
parent | 6504bf7aa25112cfaf3da6a966d2e7ae4187c014 (diff) | |
parent | 913d46f2fe1a41d3f7d9edda041df98279e1a002 (diff) |
Merge pull request #24477 from vespa-engine/balder/allways-check-for-space
- Always check if enough space to write.
6 files changed, 78 insertions, 105 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.hpp b/searchlib/src/vespa/searchlib/attribute/floatbase.hpp index ca634629d66..bd8eb7b0ac2 100644 --- a/searchlib/src/vespa/searchlib/attribute/floatbase.hpp +++ b/searchlib/src/vespa/searchlib/attribute/floatbase.hpp @@ -50,28 +50,16 @@ FloatingPointAttributeTemplate<T>::findFoldedEnums(const char *value) const template<typename T> long -FloatingPointAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { - (void) bc; - if (available >= long(sizeof(T))) { - T origValue(get(doc)); - vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo); - } else { - return -1; - } - return sizeof(T); +FloatingPointAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const { + T origValue(get(doc)); + return vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo, available); } template<typename T> long -FloatingPointAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { - (void) bc; - if (available >= long(sizeof(T))) { - T origValue(get(doc)); - vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo); - } else { - return -1; - } - return sizeof(T); +FloatingPointAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const { + T origValue(get(doc)); + return vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo, available); } } diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.hpp b/searchlib/src/vespa/searchlib/attribute/integerbase.hpp index 93e9a68b074..375e1abf831 100644 --- a/searchlib/src/vespa/searchlib/attribute/integerbase.hpp +++ b/searchlib/src/vespa/searchlib/attribute/integerbase.hpp @@ -63,28 +63,16 @@ IntegerAttributeTemplate<T>::findFoldedEnums(const char *value) const template<typename T> long -IntegerAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { - (void) bc; - if (available >= long(sizeof(T))) { - T origValue(get(doc)); - vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo); - } else { - return -1; - } - return sizeof(T); +IntegerAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const { + T origValue(get(doc)); + return vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo, available); } template<typename T> long -IntegerAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { - (void) bc; - if (available >= long(sizeof(T))) { - T origValue(get(doc)); - vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo); - } else { - return -1; - } - return sizeof(T); +IntegerAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const { + T origValue(get(doc)); + return vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo, available); } } diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index d27c61d6ff0..3a9f88babfe 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -140,7 +140,7 @@ StringAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long avai } if (available >= (long)buf.size()) { const auto * src(static_cast<const uint8_t *>(buf.data())); - for (size_t i(0), m(buf.size()); i < m; ++i) { + for (size_t i(0); i < buf.size(); ++i) { dst[i] = 0xff - src[i]; } } else { diff --git a/searchlib/src/vespa/searchlib/common/sortresults.cpp b/searchlib/src/vespa/searchlib/common/sortresults.cpp index 4eed49defc5..a09845f3330 100644 --- a/searchlib/src/vespa/searchlib/common/sortresults.cpp +++ b/searchlib/src/vespa/searchlib/common/sortresults.cpp @@ -198,18 +198,6 @@ FastS_SortSpec::Add(IAttributeContext & vecMan, const SortInfo & sInfo) return true; } -uint8_t * -FastS_SortSpec::realloc(uint32_t n, size_t & variableWidth, uint32_t & available, uint32_t & dataSize, uint8_t *mySortData) -{ - // realloc - variableWidth *= 2; - available += variableWidth * n; - dataSize += variableWidth * n; - uint32_t byteUsed = mySortData - _binarySortData.data(); - _binarySortData.resize(dataSize); - return _binarySortData.data() + byteUsed; -} - void FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n) { @@ -218,7 +206,7 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n) size_t variableWidth = 0; for (const auto & vec : _vectors) { if (vec._type >= ASC_DOCID) { // doc id - fixedWidth = (vec._vector != nullptr) + fixedWidth += (vec._vector != nullptr) ? vec._vector->getFixedWidth() : sizeof(uint32_t) + sizeof(uint16_t); } else if (vec._type >= ASC_RANK) { // rank value @@ -232,61 +220,15 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n) } } } - uint32_t dataSize = (fixedWidth + variableWidth) * n; - uint32_t available = dataSize; - _binarySortData.resize(dataSize); - uint8_t *mySortData = _binarySortData.data(); - + _binarySortData.resize((fixedWidth + variableWidth) * n); _sortDataArray.resize(n); + size_t offset = 0; for (uint32_t i(0), idx(0); (i < n) && !_doom.hard_doom(); ++i) { uint32_t len = 0; for (const auto & vec : _vectors) { - long written(0); - if (available < std::max(sizeof(hits->_docId) + sizeof(_partitionId), sizeof(hits->_rankValue))) { - mySortData = realloc(n, variableWidth, available, dataSize, mySortData); - } - do { - switch (vec._type) { - case ASC_DOCID: - if (vec._vector != nullptr) { - written = vec._vector->serializeForAscendingSort(hits[i].getDocId(), mySortData, available, vec._converter); - } else { - serializeForSort<convertForSort<uint32_t, true> >(hits[i].getDocId(), mySortData); - serializeForSort<convertForSort<uint16_t, true> >(_partitionId, mySortData + sizeof(hits->_docId)); - written = sizeof(hits->_docId) + sizeof(_partitionId); - } - break; - case DESC_DOCID: - if (vec._vector != nullptr) { - written = vec._vector->serializeForDescendingSort(hits[i].getDocId(), mySortData, available, vec._converter); - } else { - serializeForSort<convertForSort<uint32_t, false> >(hits[i].getDocId(), mySortData); - serializeForSort<convertForSort<uint16_t, false> >(_partitionId, mySortData + sizeof(hits->_docId)); - written = sizeof(hits->_docId) + sizeof(_partitionId); - } - break; - case ASC_RANK: - serializeForSort<convertForSort<search::HitRank, true> >(hits[i].getRank(), mySortData); - written = sizeof(hits->_rankValue); - break; - case DESC_RANK: - serializeForSort<convertForSort<search::HitRank, false> >(hits[i].getRank(), mySortData); - written = sizeof(hits->_rankValue); - break; - case ASC_VECTOR: - written = vec._vector->serializeForAscendingSort(hits[i].getDocId(), mySortData, available, vec._converter); - break; - case DESC_VECTOR: - written = vec._vector->serializeForDescendingSort(hits[i].getDocId(), mySortData, available, vec._converter); - break; - } - if (written == -1) { - mySortData = realloc(n, variableWidth, available, dataSize, mySortData); - } - } while(written == -1); - available -= written; - mySortData += written; + int written = initSortData(vec, hits[i], offset); + offset += written; len += written; } SortData & sd = _sortDataArray[i]; @@ -299,6 +241,59 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n) } } +int +FastS_SortSpec::initSortData(const VectorRef & vec, const RankedHit & hit, size_t offset) { + long written(0); + do { + uint8_t * mySortData = _binarySortData.data() + offset; + uint32_t available = _binarySortData.size() - offset; + switch (vec._type) { + case ASC_DOCID: + if (vec._vector != nullptr) { + written = vec._vector->serializeForAscendingSort(hit.getDocId(), mySortData, available, vec._converter); + } else { + if (available >= (sizeof(hit._docId) + sizeof(_partitionId))) { + serializeForSort<convertForSort<uint32_t, true> >(hit.getDocId(), mySortData, available); + serializeForSort<convertForSort<uint16_t, true> >(_partitionId, mySortData + sizeof(hit._docId), available - sizeof(hit._docId)); + written = sizeof(hit._docId) + sizeof(_partitionId); + } else { + written = -1; + } + } + break; + case DESC_DOCID: + if (vec._vector != nullptr) { + written = vec._vector->serializeForDescendingSort(hit.getDocId(), mySortData, available, vec._converter); + } else { + if (available >= (sizeof(hit._docId) + sizeof(_partitionId))) { + serializeForSort<convertForSort<uint32_t, false> >(hit.getDocId(), mySortData, available); + serializeForSort<convertForSort<uint16_t, false> >(_partitionId, mySortData + sizeof(hit._docId), available - sizeof(hit._docId)); + written = sizeof(hit._docId) + sizeof(_partitionId); + } else { + written = -1; + } + } + break; + case ASC_RANK: + written = serializeForSort<convertForSort<search::HitRank, true> >(hit.getRank(), mySortData, available); + break; + case DESC_RANK: + written = serializeForSort<convertForSort<search::HitRank, false> >(hit.getRank(), mySortData, available); + break; + case ASC_VECTOR: + written = vec._vector->serializeForAscendingSort(hit.getDocId(), mySortData, available, vec._converter); + break; + case DESC_VECTOR: + written = vec._vector->serializeForDescendingSort(hit.getDocId(), mySortData, available, vec._converter); + break; + } + if (written < 0) { + _binarySortData.resize(vespalib::roundUp2inN(_binarySortData.size()*2)); + } + } while (written < 0); + return written; +} + FastS_SortSpec::FastS_SortSpec(vespalib::stringref documentmetastore, uint32_t partitionId, const Doom & doom, const ConverterFactory & ucaFactory) : _documentmetastore(documentmetastore), _partitionId(partitionId), @@ -390,7 +385,7 @@ public: int cmp(const FastS_SortSpec::SortData & a, const FastS_SortSpec::SortData & b) const { uint32_t len = std::min(a._len, b._len); int retval = memcmp(_sortSpec + a._idx, _sortSpec + b._idx, len); - return retval ? retval : a._len - b._len; + return retval ? retval : (a._len < b._len) ? -1 : 1; } private: const uint8_t * _sortSpec; @@ -417,7 +412,8 @@ public: case 1: r |= _data[a._idx + a._pos + 0] << 24; [[fallthrough]]; - case 0:; + case 0: + break; } a._pos += std::min(4u, left); return r; @@ -444,7 +440,7 @@ FastS_SortSpec::sortResults(RankedHit a[], uint32_t n, uint32_t topn) Array<uint32_t> radixScratchPad(n, Alloc::alloc(0, MMAP_LIMIT)); search::radix_sort(SortDataRadix(binary), StdSortDataCompare(binary), SortDataEof(), 1, sortData, n, radixScratchPad.data(), 0, 96, topn); } - for (uint32_t i(0), m(_sortDataArray.size()); i < m; ++i) { + for (uint32_t i(0); i < _sortDataArray.size(); ++i) { a[i]._rankValue = _sortDataArray[i]._rankValue; a[i]._docId = _sortDataArray[i]._docId; } diff --git a/searchlib/src/vespa/searchlib/common/sortresults.h b/searchlib/src/vespa/searchlib/common/sortresults.h index 337863601d5..a4a23b77ca0 100644 --- a/searchlib/src/vespa/searchlib/common/sortresults.h +++ b/searchlib/src/vespa/searchlib/common/sortresults.h @@ -106,7 +106,7 @@ private: bool Add(search::attribute::IAttributeContext & vecMan, const search::common::SortInfo & sInfo); void initSortData(const search::RankedHit *a, uint32_t n); - uint8_t * realloc(uint32_t n, size_t & variableWidth, uint32_t & available, uint32_t & dataSize, uint8_t *mySortData); + int initSortData(const VectorRef & vec, const search::RankedHit & hit, size_t offset); public: FastS_SortSpec(const FastS_SortSpec &) = delete; diff --git a/vespalib/src/vespa/vespalib/util/sort.h b/vespalib/src/vespa/vespalib/util/sort.h index ce3f6772ef1..7d21cb4fe1a 100644 --- a/vespalib/src/vespa/vespalib/util/sort.h +++ b/vespalib/src/vespa/vespalib/util/sort.h @@ -271,8 +271,9 @@ public: }; template<typename C> -uint32_t serializeForSort(typename C::InputType v, void * dst) { +int32_t serializeForSort(typename C::InputType v, void * dst, uint32_t available) { typename C::UIntType nbo(vespalib::nbo::n2h(C::convert(v))); + if (available < sizeof(nbo)) return -1; memcpy(dst, &nbo, sizeof(nbo)); return sizeof(nbo); } |