diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-10-17 14:06:59 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2022-10-17 14:06:59 +0000 |
commit | 913d46f2fe1a41d3f7d9edda041df98279e1a002 (patch) | |
tree | 5450bdda138c5276e31837018e332694b98e3e45 /searchlib | |
parent | b7123d3a07bc823961e452ad527d00e236012ebe (diff) |
- Always check if enough space to write.
- Split large initSortData method.
- make buffer resizing explicit and readable.
- Make a better initial estimate for buffer size.
Diffstat (limited to 'searchlib')
5 files changed, 76 insertions, 104 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.hpp b/searchlib/src/vespa/searchlib/attribute/floatbase.hpp index ca634629d66..bd8eb7b0ac2 100644 --- a/searchlib/src/vespa/searchlib/attribute/floatbase.hpp +++ b/searchlib/src/vespa/searchlib/attribute/floatbase.hpp @@ -50,28 +50,16 @@ FloatingPointAttributeTemplate<T>::findFoldedEnums(const char *value) const template<typename T> long -FloatingPointAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { - (void) bc; - if (available >= long(sizeof(T))) { - T origValue(get(doc)); - vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo); - } else { - return -1; - } - return sizeof(T); +FloatingPointAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const { + T origValue(get(doc)); + return vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo, available); } template<typename T> long -FloatingPointAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { - (void) bc; - if (available >= long(sizeof(T))) { - T origValue(get(doc)); - vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo); - } else { - return -1; - } - return sizeof(T); +FloatingPointAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const { + T origValue(get(doc)); + return vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo, available); } } diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.hpp b/searchlib/src/vespa/searchlib/attribute/integerbase.hpp index 93e9a68b074..375e1abf831 100644 --- a/searchlib/src/vespa/searchlib/attribute/integerbase.hpp +++ b/searchlib/src/vespa/searchlib/attribute/integerbase.hpp @@ -63,28 +63,16 @@ IntegerAttributeTemplate<T>::findFoldedEnums(const char *value) const template<typename T> long -IntegerAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { - (void) bc; - if (available >= long(sizeof(T))) { - T origValue(get(doc)); - vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo); - } else { - return -1; - } - return sizeof(T); +IntegerAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const { + T origValue(get(doc)); + return vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo, available); } template<typename T> long -IntegerAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { - (void) bc; - if (available >= long(sizeof(T))) { - T origValue(get(doc)); - vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo); - } else { - return -1; - } - return sizeof(T); +IntegerAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const { + T origValue(get(doc)); + return vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo, available); } } diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index d27c61d6ff0..3a9f88babfe 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -140,7 +140,7 @@ StringAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long avai } if (available >= (long)buf.size()) { const auto * src(static_cast<const uint8_t *>(buf.data())); - for (size_t i(0), m(buf.size()); i < m; ++i) { + for (size_t i(0); i < buf.size(); ++i) { dst[i] = 0xff - src[i]; } } else { diff --git a/searchlib/src/vespa/searchlib/common/sortresults.cpp b/searchlib/src/vespa/searchlib/common/sortresults.cpp index 4eed49defc5..a09845f3330 100644 --- a/searchlib/src/vespa/searchlib/common/sortresults.cpp +++ b/searchlib/src/vespa/searchlib/common/sortresults.cpp @@ -198,18 +198,6 @@ FastS_SortSpec::Add(IAttributeContext & vecMan, const SortInfo & sInfo) return true; } -uint8_t * -FastS_SortSpec::realloc(uint32_t n, size_t & variableWidth, uint32_t & available, uint32_t & dataSize, uint8_t *mySortData) -{ - // realloc - variableWidth *= 2; - available += variableWidth * n; - dataSize += variableWidth * n; - uint32_t byteUsed = mySortData - _binarySortData.data(); - _binarySortData.resize(dataSize); - return _binarySortData.data() + byteUsed; -} - void FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n) { @@ -218,7 +206,7 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n) size_t variableWidth = 0; for (const auto & vec : _vectors) { if (vec._type >= ASC_DOCID) { // doc id - fixedWidth = (vec._vector != nullptr) + fixedWidth += (vec._vector != nullptr) ? vec._vector->getFixedWidth() : sizeof(uint32_t) + sizeof(uint16_t); } else if (vec._type >= ASC_RANK) { // rank value @@ -232,61 +220,15 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n) } } } - uint32_t dataSize = (fixedWidth + variableWidth) * n; - uint32_t available = dataSize; - _binarySortData.resize(dataSize); - uint8_t *mySortData = _binarySortData.data(); - + _binarySortData.resize((fixedWidth + variableWidth) * n); _sortDataArray.resize(n); + size_t offset = 0; for (uint32_t i(0), idx(0); (i < n) && !_doom.hard_doom(); ++i) { uint32_t len = 0; for (const auto & vec : _vectors) { - long written(0); - if (available < std::max(sizeof(hits->_docId) + sizeof(_partitionId), sizeof(hits->_rankValue))) { - mySortData = realloc(n, variableWidth, available, dataSize, mySortData); - } - do { - switch (vec._type) { - case ASC_DOCID: - if (vec._vector != nullptr) { - written = vec._vector->serializeForAscendingSort(hits[i].getDocId(), mySortData, available, vec._converter); - } else { - serializeForSort<convertForSort<uint32_t, true> >(hits[i].getDocId(), mySortData); - serializeForSort<convertForSort<uint16_t, true> >(_partitionId, mySortData + sizeof(hits->_docId)); - written = sizeof(hits->_docId) + sizeof(_partitionId); - } - break; - case DESC_DOCID: - if (vec._vector != nullptr) { - written = vec._vector->serializeForDescendingSort(hits[i].getDocId(), mySortData, available, vec._converter); - } else { - serializeForSort<convertForSort<uint32_t, false> >(hits[i].getDocId(), mySortData); - serializeForSort<convertForSort<uint16_t, false> >(_partitionId, mySortData + sizeof(hits->_docId)); - written = sizeof(hits->_docId) + sizeof(_partitionId); - } - break; - case ASC_RANK: - serializeForSort<convertForSort<search::HitRank, true> >(hits[i].getRank(), mySortData); - written = sizeof(hits->_rankValue); - break; - case DESC_RANK: - serializeForSort<convertForSort<search::HitRank, false> >(hits[i].getRank(), mySortData); - written = sizeof(hits->_rankValue); - break; - case ASC_VECTOR: - written = vec._vector->serializeForAscendingSort(hits[i].getDocId(), mySortData, available, vec._converter); - break; - case DESC_VECTOR: - written = vec._vector->serializeForDescendingSort(hits[i].getDocId(), mySortData, available, vec._converter); - break; - } - if (written == -1) { - mySortData = realloc(n, variableWidth, available, dataSize, mySortData); - } - } while(written == -1); - available -= written; - mySortData += written; + int written = initSortData(vec, hits[i], offset); + offset += written; len += written; } SortData & sd = _sortDataArray[i]; @@ -299,6 +241,59 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n) } } +int +FastS_SortSpec::initSortData(const VectorRef & vec, const RankedHit & hit, size_t offset) { + long written(0); + do { + uint8_t * mySortData = _binarySortData.data() + offset; + uint32_t available = _binarySortData.size() - offset; + switch (vec._type) { + case ASC_DOCID: + if (vec._vector != nullptr) { + written = vec._vector->serializeForAscendingSort(hit.getDocId(), mySortData, available, vec._converter); + } else { + if (available >= (sizeof(hit._docId) + sizeof(_partitionId))) { + serializeForSort<convertForSort<uint32_t, true> >(hit.getDocId(), mySortData, available); + serializeForSort<convertForSort<uint16_t, true> >(_partitionId, mySortData + sizeof(hit._docId), available - sizeof(hit._docId)); + written = sizeof(hit._docId) + sizeof(_partitionId); + } else { + written = -1; + } + } + break; + case DESC_DOCID: + if (vec._vector != nullptr) { + written = vec._vector->serializeForDescendingSort(hit.getDocId(), mySortData, available, vec._converter); + } else { + if (available >= (sizeof(hit._docId) + sizeof(_partitionId))) { + serializeForSort<convertForSort<uint32_t, false> >(hit.getDocId(), mySortData, available); + serializeForSort<convertForSort<uint16_t, false> >(_partitionId, mySortData + sizeof(hit._docId), available - sizeof(hit._docId)); + written = sizeof(hit._docId) + sizeof(_partitionId); + } else { + written = -1; + } + } + break; + case ASC_RANK: + written = serializeForSort<convertForSort<search::HitRank, true> >(hit.getRank(), mySortData, available); + break; + case DESC_RANK: + written = serializeForSort<convertForSort<search::HitRank, false> >(hit.getRank(), mySortData, available); + break; + case ASC_VECTOR: + written = vec._vector->serializeForAscendingSort(hit.getDocId(), mySortData, available, vec._converter); + break; + case DESC_VECTOR: + written = vec._vector->serializeForDescendingSort(hit.getDocId(), mySortData, available, vec._converter); + break; + } + if (written < 0) { + _binarySortData.resize(vespalib::roundUp2inN(_binarySortData.size()*2)); + } + } while (written < 0); + return written; +} + FastS_SortSpec::FastS_SortSpec(vespalib::stringref documentmetastore, uint32_t partitionId, const Doom & doom, const ConverterFactory & ucaFactory) : _documentmetastore(documentmetastore), _partitionId(partitionId), @@ -390,7 +385,7 @@ public: int cmp(const FastS_SortSpec::SortData & a, const FastS_SortSpec::SortData & b) const { uint32_t len = std::min(a._len, b._len); int retval = memcmp(_sortSpec + a._idx, _sortSpec + b._idx, len); - return retval ? retval : a._len - b._len; + return retval ? retval : (a._len < b._len) ? -1 : 1; } private: const uint8_t * _sortSpec; @@ -417,7 +412,8 @@ public: case 1: r |= _data[a._idx + a._pos + 0] << 24; [[fallthrough]]; - case 0:; + case 0: + break; } a._pos += std::min(4u, left); return r; @@ -444,7 +440,7 @@ FastS_SortSpec::sortResults(RankedHit a[], uint32_t n, uint32_t topn) Array<uint32_t> radixScratchPad(n, Alloc::alloc(0, MMAP_LIMIT)); search::radix_sort(SortDataRadix(binary), StdSortDataCompare(binary), SortDataEof(), 1, sortData, n, radixScratchPad.data(), 0, 96, topn); } - for (uint32_t i(0), m(_sortDataArray.size()); i < m; ++i) { + for (uint32_t i(0); i < _sortDataArray.size(); ++i) { a[i]._rankValue = _sortDataArray[i]._rankValue; a[i]._docId = _sortDataArray[i]._docId; } diff --git a/searchlib/src/vespa/searchlib/common/sortresults.h b/searchlib/src/vespa/searchlib/common/sortresults.h index 337863601d5..a4a23b77ca0 100644 --- a/searchlib/src/vespa/searchlib/common/sortresults.h +++ b/searchlib/src/vespa/searchlib/common/sortresults.h @@ -106,7 +106,7 @@ private: bool Add(search::attribute::IAttributeContext & vecMan, const search::common::SortInfo & sInfo); void initSortData(const search::RankedHit *a, uint32_t n); - uint8_t * realloc(uint32_t n, size_t & variableWidth, uint32_t & available, uint32_t & dataSize, uint8_t *mySortData); + int initSortData(const VectorRef & vec, const search::RankedHit & hit, size_t offset); public: FastS_SortSpec(const FastS_SortSpec &) = delete; |