summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-10-17 17:45:29 +0200
committerGitHub <noreply@github.com>2022-10-17 17:45:29 +0200
commit169187771dfc974f58238fc19db3b534c4b6c9f4 (patch)
tree08d8deb07b99b4ab3479f8166cf611e8aba4f098
parent6504bf7aa25112cfaf3da6a966d2e7ae4187c014 (diff)
parent913d46f2fe1a41d3f7d9edda041df98279e1a002 (diff)
Merge pull request #24477 from vespa-engine/balder/allways-check-for-space
- Always check if enough space to write.
-rw-r--r--searchlib/src/vespa/searchlib/attribute/floatbase.hpp24
-rw-r--r--searchlib/src/vespa/searchlib/attribute/integerbase.hpp24
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/common/sortresults.cpp128
-rw-r--r--searchlib/src/vespa/searchlib/common/sortresults.h2
-rw-r--r--vespalib/src/vespa/vespalib/util/sort.h3
6 files changed, 78 insertions, 105 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.hpp b/searchlib/src/vespa/searchlib/attribute/floatbase.hpp
index ca634629d66..bd8eb7b0ac2 100644
--- a/searchlib/src/vespa/searchlib/attribute/floatbase.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/floatbase.hpp
@@ -50,28 +50,16 @@ FloatingPointAttributeTemplate<T>::findFoldedEnums(const char *value) const
template<typename T>
long
-FloatingPointAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const {
- (void) bc;
- if (available >= long(sizeof(T))) {
- T origValue(get(doc));
- vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo);
- } else {
- return -1;
- }
- return sizeof(T);
+FloatingPointAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const {
+ T origValue(get(doc));
+ return vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo, available);
}
template<typename T>
long
-FloatingPointAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const {
- (void) bc;
- if (available >= long(sizeof(T))) {
- T origValue(get(doc));
- vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo);
- } else {
- return -1;
- }
- return sizeof(T);
+FloatingPointAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const {
+ T origValue(get(doc));
+ return vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo, available);
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.hpp b/searchlib/src/vespa/searchlib/attribute/integerbase.hpp
index 93e9a68b074..375e1abf831 100644
--- a/searchlib/src/vespa/searchlib/attribute/integerbase.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/integerbase.hpp
@@ -63,28 +63,16 @@ IntegerAttributeTemplate<T>::findFoldedEnums(const char *value) const
template<typename T>
long
-IntegerAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const {
- (void) bc;
- if (available >= long(sizeof(T))) {
- T origValue(get(doc));
- vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo);
- } else {
- return -1;
- }
- return sizeof(T);
+IntegerAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const {
+ T origValue(get(doc));
+ return vespalib::serializeForSort< vespalib::convertForSort<T, true> >(origValue, serTo, available);
}
template<typename T>
long
-IntegerAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const {
- (void) bc;
- if (available >= long(sizeof(T))) {
- T origValue(get(doc));
- vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo);
- } else {
- return -1;
- }
- return sizeof(T);
+IntegerAttributeTemplate<T>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const {
+ T origValue(get(doc));
+ return vespalib::serializeForSort< vespalib::convertForSort<T, false> >(origValue, serTo, available);
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
index d27c61d6ff0..3a9f88babfe 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
@@ -140,7 +140,7 @@ StringAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long avai
}
if (available >= (long)buf.size()) {
const auto * src(static_cast<const uint8_t *>(buf.data()));
- for (size_t i(0), m(buf.size()); i < m; ++i) {
+ for (size_t i(0); i < buf.size(); ++i) {
dst[i] = 0xff - src[i];
}
} else {
diff --git a/searchlib/src/vespa/searchlib/common/sortresults.cpp b/searchlib/src/vespa/searchlib/common/sortresults.cpp
index 4eed49defc5..a09845f3330 100644
--- a/searchlib/src/vespa/searchlib/common/sortresults.cpp
+++ b/searchlib/src/vespa/searchlib/common/sortresults.cpp
@@ -198,18 +198,6 @@ FastS_SortSpec::Add(IAttributeContext & vecMan, const SortInfo & sInfo)
return true;
}
-uint8_t *
-FastS_SortSpec::realloc(uint32_t n, size_t & variableWidth, uint32_t & available, uint32_t & dataSize, uint8_t *mySortData)
-{
- // realloc
- variableWidth *= 2;
- available += variableWidth * n;
- dataSize += variableWidth * n;
- uint32_t byteUsed = mySortData - _binarySortData.data();
- _binarySortData.resize(dataSize);
- return _binarySortData.data() + byteUsed;
-}
-
void
FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n)
{
@@ -218,7 +206,7 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n)
size_t variableWidth = 0;
for (const auto & vec : _vectors) {
if (vec._type >= ASC_DOCID) { // doc id
- fixedWidth = (vec._vector != nullptr)
+ fixedWidth += (vec._vector != nullptr)
? vec._vector->getFixedWidth()
: sizeof(uint32_t) + sizeof(uint16_t);
} else if (vec._type >= ASC_RANK) { // rank value
@@ -232,61 +220,15 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n)
}
}
}
- uint32_t dataSize = (fixedWidth + variableWidth) * n;
- uint32_t available = dataSize;
- _binarySortData.resize(dataSize);
- uint8_t *mySortData = _binarySortData.data();
-
+ _binarySortData.resize((fixedWidth + variableWidth) * n);
_sortDataArray.resize(n);
+ size_t offset = 0;
for (uint32_t i(0), idx(0); (i < n) && !_doom.hard_doom(); ++i) {
uint32_t len = 0;
for (const auto & vec : _vectors) {
- long written(0);
- if (available < std::max(sizeof(hits->_docId) + sizeof(_partitionId), sizeof(hits->_rankValue))) {
- mySortData = realloc(n, variableWidth, available, dataSize, mySortData);
- }
- do {
- switch (vec._type) {
- case ASC_DOCID:
- if (vec._vector != nullptr) {
- written = vec._vector->serializeForAscendingSort(hits[i].getDocId(), mySortData, available, vec._converter);
- } else {
- serializeForSort<convertForSort<uint32_t, true> >(hits[i].getDocId(), mySortData);
- serializeForSort<convertForSort<uint16_t, true> >(_partitionId, mySortData + sizeof(hits->_docId));
- written = sizeof(hits->_docId) + sizeof(_partitionId);
- }
- break;
- case DESC_DOCID:
- if (vec._vector != nullptr) {
- written = vec._vector->serializeForDescendingSort(hits[i].getDocId(), mySortData, available, vec._converter);
- } else {
- serializeForSort<convertForSort<uint32_t, false> >(hits[i].getDocId(), mySortData);
- serializeForSort<convertForSort<uint16_t, false> >(_partitionId, mySortData + sizeof(hits->_docId));
- written = sizeof(hits->_docId) + sizeof(_partitionId);
- }
- break;
- case ASC_RANK:
- serializeForSort<convertForSort<search::HitRank, true> >(hits[i].getRank(), mySortData);
- written = sizeof(hits->_rankValue);
- break;
- case DESC_RANK:
- serializeForSort<convertForSort<search::HitRank, false> >(hits[i].getRank(), mySortData);
- written = sizeof(hits->_rankValue);
- break;
- case ASC_VECTOR:
- written = vec._vector->serializeForAscendingSort(hits[i].getDocId(), mySortData, available, vec._converter);
- break;
- case DESC_VECTOR:
- written = vec._vector->serializeForDescendingSort(hits[i].getDocId(), mySortData, available, vec._converter);
- break;
- }
- if (written == -1) {
- mySortData = realloc(n, variableWidth, available, dataSize, mySortData);
- }
- } while(written == -1);
- available -= written;
- mySortData += written;
+ int written = initSortData(vec, hits[i], offset);
+ offset += written;
len += written;
}
SortData & sd = _sortDataArray[i];
@@ -299,6 +241,59 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n)
}
}
+int
+FastS_SortSpec::initSortData(const VectorRef & vec, const RankedHit & hit, size_t offset) {
+ long written(0);
+ do {
+ uint8_t * mySortData = _binarySortData.data() + offset;
+ uint32_t available = _binarySortData.size() - offset;
+ switch (vec._type) {
+ case ASC_DOCID:
+ if (vec._vector != nullptr) {
+ written = vec._vector->serializeForAscendingSort(hit.getDocId(), mySortData, available, vec._converter);
+ } else {
+ if (available >= (sizeof(hit._docId) + sizeof(_partitionId))) {
+ serializeForSort<convertForSort<uint32_t, true> >(hit.getDocId(), mySortData, available);
+ serializeForSort<convertForSort<uint16_t, true> >(_partitionId, mySortData + sizeof(hit._docId), available - sizeof(hit._docId));
+ written = sizeof(hit._docId) + sizeof(_partitionId);
+ } else {
+ written = -1;
+ }
+ }
+ break;
+ case DESC_DOCID:
+ if (vec._vector != nullptr) {
+ written = vec._vector->serializeForDescendingSort(hit.getDocId(), mySortData, available, vec._converter);
+ } else {
+ if (available >= (sizeof(hit._docId) + sizeof(_partitionId))) {
+ serializeForSort<convertForSort<uint32_t, false> >(hit.getDocId(), mySortData, available);
+ serializeForSort<convertForSort<uint16_t, false> >(_partitionId, mySortData + sizeof(hit._docId), available - sizeof(hit._docId));
+ written = sizeof(hit._docId) + sizeof(_partitionId);
+ } else {
+ written = -1;
+ }
+ }
+ break;
+ case ASC_RANK:
+ written = serializeForSort<convertForSort<search::HitRank, true> >(hit.getRank(), mySortData, available);
+ break;
+ case DESC_RANK:
+ written = serializeForSort<convertForSort<search::HitRank, false> >(hit.getRank(), mySortData, available);
+ break;
+ case ASC_VECTOR:
+ written = vec._vector->serializeForAscendingSort(hit.getDocId(), mySortData, available, vec._converter);
+ break;
+ case DESC_VECTOR:
+ written = vec._vector->serializeForDescendingSort(hit.getDocId(), mySortData, available, vec._converter);
+ break;
+ }
+ if (written < 0) {
+ _binarySortData.resize(vespalib::roundUp2inN(_binarySortData.size()*2));
+ }
+ } while (written < 0);
+ return written;
+}
+
FastS_SortSpec::FastS_SortSpec(vespalib::stringref documentmetastore, uint32_t partitionId, const Doom & doom, const ConverterFactory & ucaFactory)
: _documentmetastore(documentmetastore),
_partitionId(partitionId),
@@ -390,7 +385,7 @@ public:
int cmp(const FastS_SortSpec::SortData & a, const FastS_SortSpec::SortData & b) const {
uint32_t len = std::min(a._len, b._len);
int retval = memcmp(_sortSpec + a._idx, _sortSpec + b._idx, len);
- return retval ? retval : a._len - b._len;
+ return retval ? retval : (a._len < b._len) ? -1 : 1;
}
private:
const uint8_t * _sortSpec;
@@ -417,7 +412,8 @@ public:
case 1:
r |= _data[a._idx + a._pos + 0] << 24;
[[fallthrough]];
- case 0:;
+ case 0:
+ break;
}
a._pos += std::min(4u, left);
return r;
@@ -444,7 +440,7 @@ FastS_SortSpec::sortResults(RankedHit a[], uint32_t n, uint32_t topn)
Array<uint32_t> radixScratchPad(n, Alloc::alloc(0, MMAP_LIMIT));
search::radix_sort(SortDataRadix(binary), StdSortDataCompare(binary), SortDataEof(), 1, sortData, n, radixScratchPad.data(), 0, 96, topn);
}
- for (uint32_t i(0), m(_sortDataArray.size()); i < m; ++i) {
+ for (uint32_t i(0); i < _sortDataArray.size(); ++i) {
a[i]._rankValue = _sortDataArray[i]._rankValue;
a[i]._docId = _sortDataArray[i]._docId;
}
diff --git a/searchlib/src/vespa/searchlib/common/sortresults.h b/searchlib/src/vespa/searchlib/common/sortresults.h
index 337863601d5..a4a23b77ca0 100644
--- a/searchlib/src/vespa/searchlib/common/sortresults.h
+++ b/searchlib/src/vespa/searchlib/common/sortresults.h
@@ -106,7 +106,7 @@ private:
bool Add(search::attribute::IAttributeContext & vecMan, const search::common::SortInfo & sInfo);
void initSortData(const search::RankedHit *a, uint32_t n);
- uint8_t * realloc(uint32_t n, size_t & variableWidth, uint32_t & available, uint32_t & dataSize, uint8_t *mySortData);
+ int initSortData(const VectorRef & vec, const search::RankedHit & hit, size_t offset);
public:
FastS_SortSpec(const FastS_SortSpec &) = delete;
diff --git a/vespalib/src/vespa/vespalib/util/sort.h b/vespalib/src/vespa/vespalib/util/sort.h
index ce3f6772ef1..7d21cb4fe1a 100644
--- a/vespalib/src/vespa/vespalib/util/sort.h
+++ b/vespalib/src/vespa/vespalib/util/sort.h
@@ -271,8 +271,9 @@ public:
};
template<typename C>
-uint32_t serializeForSort(typename C::InputType v, void * dst) {
+int32_t serializeForSort(typename C::InputType v, void * dst, uint32_t available) {
typename C::UIntType nbo(vespalib::nbo::n2h(C::convert(v)));
+ if (available < sizeof(nbo)) return -1;
memcpy(dst, &nbo, sizeof(nbo));
return sizeof(nbo);
}