aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-03-25 17:16:47 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2022-03-25 17:16:47 +0000
commit50de2c8cefcdb4a96b62c01aa19a7e73023248f2 (patch)
treefe1d41a98c7c38a53265582471c5602e3109a705
parentce885ac3588c1d75167928444343c1d5044691e3 (diff)
Use size_t(64 bit) insteda of unsigent int(32 bit) to allow sorting arrays larger than 4G elements.
-rw-r--r--searchlib/src/vespa/searchlib/common/sort.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/common/sort.h122
2 files changed, 64 insertions, 63 deletions
diff --git a/searchlib/src/vespa/searchlib/common/sort.cpp b/searchlib/src/vespa/searchlib/common/sort.cpp
index a85317b2c53..0a4db613649 100644
--- a/searchlib/src/vespa/searchlib/common/sort.cpp
+++ b/searchlib/src/vespa/searchlib/common/sort.cpp
@@ -3,7 +3,8 @@
namespace search {
-bool radix_prepare(unsigned int n, unsigned int last[257], unsigned int ptr[256], unsigned int cnt[256])
+bool
+radix_prepare(size_t n, size_t last[257], size_t ptr[256], size_t cnt[256])
{
// Accumulate cnt positions
bool sorted = (cnt[0]==n);
@@ -12,7 +13,7 @@ bool radix_prepare(unsigned int n, unsigned int last[257], unsigned int ptr[256]
ptr[i] = ptr[i-1] + cnt[i-1];
sorted |= (cnt[i]==n);
}
- memcpy(last, ptr, 256*sizeof(unsigned int));
+ memcpy(last, ptr, 256*sizeof(last[0]));
last[256] = last[255] + cnt[255];
return sorted;
}
diff --git a/searchlib/src/vespa/searchlib/common/sort.h b/searchlib/src/vespa/searchlib/common/sort.h
index 2513c5506e9..b90bda698a0 100644
--- a/searchlib/src/vespa/searchlib/common/sort.h
+++ b/searchlib/src/vespa/searchlib/common/sort.h
@@ -10,22 +10,22 @@
namespace search {
-bool radix_prepare(unsigned int n, unsigned int last[257], unsigned int ptr[256], unsigned int cnt[256]);
+bool radix_prepare(size_t n, size_t last[257], size_t ptr[256], size_t cnt[256]);
template<typename T>
-void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t * radixScratch, unsigned int shiftWidth) __attribute__ ((noinline));
+void radix_sort_core(const size_t * last, T * a, size_t n, uint32_t * radixScratch, unsigned int shiftWidth) __attribute__ ((noinline));
template<typename T>
-void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t * radixScratch, unsigned int shiftWidth)
+void radix_sort_core(const size_t * last, T * a, size_t n, uint32_t * radixScratch, unsigned int shiftWidth)
{
T temp, swap;
// Go through all permutation cycles until all
// elements are moved or found to be already in place
- unsigned int ptr[256];
- unsigned int i, j, k;
+ size_t ptr[256];
+ size_t i, j, k;
memcpy(ptr, last, sizeof(ptr));
i = 0;
- unsigned int remain = n;
+ size_t remain = n;
while (remain > 0) {
// Find first uncompleted class
@@ -42,7 +42,7 @@ void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t
if (i != k) {
swap = a[j];
do {
- unsigned int t(ptr[k]);
+ size_t t(ptr[k]);
temp = a[t];
uint32_t tempK(radixScratch[t]);
radixScratch[t] = swapK;
@@ -63,12 +63,12 @@ void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t
}
template<typename T, typename GR>
-unsigned int radix_fetch(T *a, unsigned int n, uint32_t * radixScratch, GR R) __attribute__ ((noinline));
+unsigned int radix_fetch(T *a, size_t n, uint32_t * radixScratch, GR R) __attribute__ ((noinline));
template<typename T, typename GR>
-unsigned int radix_fetch(T *a, unsigned int n, uint32_t * radixScratch, GR R)
+unsigned int radix_fetch(T *a, size_t n, uint32_t * radixScratch, GR R)
{
- unsigned int i = 0;
+ size_t i = 0;
uint32_t usedBits = 0;
if (n > 3) {
for(; i < n - 3; i += 4) {
@@ -102,12 +102,12 @@ public:
};
template<typename T, typename ER>
-bool radix_eof(const T *a, unsigned int n, ER E) __attribute__ ((noinline));
+bool radix_eof(const T *a, size_t n, ER E) __attribute__ ((noinline));
template<typename T, typename ER>
-bool radix_eof(const T *a, unsigned int n, ER E)
+bool radix_eof(const T *a, size_t n, ER E)
{
- unsigned int i = 0;
+ size_t i = 0;
bool eof(true);
if (n > 3) {
for(; eof && (i < n - 3); i += 4) {
@@ -137,11 +137,11 @@ bool radix_eof(const T *a, unsigned int n, ER E)
**/
template<typename T, typename GR, typename GE, typename GRE>
void radix_sort(GR R, GE E, GRE EE, int stackDepth,
- T * a, unsigned int n,
+ T * a, size_t n,
uint32_t *radixScratch,
int radixBits,
unsigned insertSortLevel=10,
- unsigned int topn=std::numeric_limits<unsigned int>::max())
+ size_t topn=std::numeric_limits<size_t>::max())
{
if (((stackDepth > 20) && (radixBits == 0)) || (n < insertSortLevel)) {
// switch to simpler sort if few elements
@@ -151,8 +151,8 @@ void radix_sort(GR R, GE E, GRE EE, int stackDepth,
return;
}
- unsigned int last[257];
- unsigned int cnt[256];
+ size_t last[257];
+ size_t cnt[256];
int shiftWidth = radixBits - 8;
for (bool allInOneBucket(true); allInOneBucket;) {
while ( radixBits == 0 ) {
@@ -169,7 +169,7 @@ void radix_sort(GR R, GE E, GRE EE, int stackDepth,
shiftWidth = radixBits - 8;
memset(cnt, 0, sizeof(cnt));
- unsigned int i = 0;
+ size_t i = 0;
if (n > 3) {
for(; i < n - 3; i += 4) {
cnt[(radixScratch[i + 0] >> shiftWidth) & 0xFF]++;
@@ -196,9 +196,9 @@ void radix_sort(GR R, GE E, GRE EE, int stackDepth,
radix_sort_core(last, a, n, radixScratch, shiftWidth);
// Sort on next 8 bits of key
- for(unsigned i(0), sum(0); (i<256) && (sum < topn); i++) {
- const unsigned l(last[i]);
- const unsigned c(cnt[i]);
+ for(size_t i(0), sum(0); (i<256) && (sum < topn); i++) {
+ const size_t l(last[i]);
+ const size_t c(cnt[i]);
if (c) {
if (c > insertSortLevel) {
radix_sort(R, E, EE, stackDepth + 1, &a[l], c, &radixScratch[l], radixBits, insertSortLevel, topn-sum);
@@ -215,15 +215,15 @@ template<typename GR, typename T, int SHIFT>
class ShiftBasedRadixSorterBase
{
protected:
- static void radix_fetch(GR R, unsigned int cnt[256], const T * a, unsigned int n) __attribute__((noinline));
- static void radix_sort_core(GR R, unsigned int ptr[256], unsigned int last[257], T * a, unsigned int n) __attribute__((noinline));
+ static void radix_fetch(GR R, size_t cnt[256], const T * a, size_t n) __attribute__((noinline));
+ static void radix_sort_core(GR R, size_t ptr[256], size_t last[257], T * a, size_t n) __attribute__((noinline));
};
template<typename GR, typename T, int SHIFT>
-void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_fetch(GR R, unsigned int cnt[256], const T * a, unsigned int n)
+void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_fetch(GR R, size_t cnt[256], const T * a, size_t n)
{
- memset(cnt, 0, 256*sizeof(unsigned int));
- unsigned int p(0);
+ memset(cnt, 0, 256 * sizeof(cnt[0]));
+ size_t p(0);
if (n > 3) {
for(; p < n - 3; p += 4) {
cnt[(R(a[p]) >> SHIFT) & 0xFF]++;
@@ -239,12 +239,12 @@ void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_fetch(GR R, unsigned int cnt
template<typename GR, typename T, int SHIFT>
-void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_sort_core(GR R, unsigned int ptr[256], unsigned int last[257], T * a, unsigned int n)
+void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_sort_core(GR R, size_t ptr[256], size_t last[257], T * a, size_t n)
{
// Go through all permutation cycles until all
// elements are moved or found to be already in place
- unsigned int i(0), remain(n);
- unsigned int j, k;
+ size_t i(0), remain(n);
+ size_t j, k;
T temp, swap;
while(remain>0) {
@@ -286,17 +286,17 @@ template<typename T, typename GR, typename GE, int SHIFT, bool continueAfterRadi
class ShiftBasedRadixSorter : private ShiftBasedRadixSorterBase<GR, T, SHIFT>
{
public:
- static size_t radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel=10, unsigned int topn=std::numeric_limits<unsigned int>::max());
- static size_t radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn);
+ static size_t radix_sort(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel=10, size_t topn=std::numeric_limits<size_t>::max());
+ static size_t radix_sort_internal(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel, size_t topn);
private:
typedef ShiftBasedRadixSorterBase<GR, T, SHIFT> Base;
};
template<typename T, typename GR, typename GE, int SHIFT, bool continueAfterRadixEnds>
-size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn)
+size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_sort_internal(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel, size_t topn)
{
- unsigned int last[257], ptr[256], cnt[256];
- unsigned int sum(n);
+ size_t last[257], ptr[256], cnt[256];
+ size_t sum(n);
Base::radix_fetch(R, cnt, a, n);
@@ -312,8 +312,8 @@ size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_so
// Sort on next key
sum = 0;
for(unsigned i(0); (i<256) && (sum < topn); i++) {
- const unsigned int c(cnt[i]);
- const unsigned int l(last[i]);
+ const size_t c(cnt[i]);
+ const size_t l(last[i]);
if (c) {
if (c>insertSortLevel) {
sum += ShiftBasedRadixSorter<T, GR, GE, SHIFT - 8, continueAfterRadixEnds>::radix_sort_internal(R, E, &a[l], c, insertSortLevel, topn-sum);
@@ -329,7 +329,7 @@ size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_so
template<typename T, typename GR, typename GE, int SHIFT, bool continueAfterRadixEnds>
-size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn)
+size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_sort(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel, size_t topn)
{
if (n > insertSortLevel) {
return radix_sort_internal(R, E, a, n, insertSortLevel, topn);
@@ -342,7 +342,7 @@ size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_so
template<typename A, typename B, typename C>
class ShiftBasedRadixSorter<A, B, C, -8, false> {
public:
- static size_t radix_sort_internal(B, C, A *, unsigned int, unsigned int, unsigned int) {
+ static size_t radix_sort_internal(B, C, A *, size_t, unsigned int, size_t) {
return 0;
}
};
@@ -350,7 +350,7 @@ public:
template<typename A, typename B, typename C>
class ShiftBasedRadixSorter<A, B, C, -8, true> {
public:
- static size_t radix_sort_internal(B, C E, A * v, unsigned int sz, unsigned int, unsigned int) {
+ static size_t radix_sort_internal(B, C E, A * v, size_t sz, unsigned int, size_t) {
std::sort(v, v + sz, E);
return sz;
}
@@ -365,7 +365,7 @@ public:
public:
typename C::UIntType operator () (typename C::InputType v) const { return C::convert(v); }
};
- void operator() (T * start, size_t sz, unsigned topn = std::numeric_limits<uint32_t>::max()) const {
+ void operator() (T * start, size_t sz, size_t topn = std::numeric_limits<size_t>::max()) const {
if (sz > 16) {
ShiftBasedRadixSorter<typename C::InputType, RadixSortable, typename C::Compare, 8*(sizeof(typename C::UIntType) -1)>::radix_sort_internal(RadixSortable(), typename C::Compare(), start, sz, 16, topn);
} else {
@@ -375,13 +375,13 @@ public:
};
template<typename GR, typename T, int IDX>
-void radix_fetch2(GR R, unsigned int cnt[256], const T * a, unsigned int n) __attribute__ ((noinline));
+void radix_fetch2(GR R, size_t cnt[256], const T * a, size_t n) __attribute__ ((noinline));
template<typename GR, typename T, int IDX>
-void radix_fetch2(GR R, unsigned int cnt[256], const T * a, unsigned int n)
+void radix_fetch2(GR R, size_t cnt[256], const T * a, size_t n)
{
- memset(cnt, 0, 256*sizeof(unsigned int));
- unsigned int p(0);
+ memset(cnt, 0, 256*sizeof(cnt[0]));
+ size_t p(0);
if (n > 3) {
for(; p < n - 3; p += 4) {
cnt[R(a[p + 0], IDX)]++;
@@ -396,9 +396,9 @@ void radix_fetch2(GR R, unsigned int cnt[256], const T * a, unsigned int n)
}
template<typename T, typename GR, typename GE, int LEN, int POS>
-void radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn)
+void radix_sort_internal(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel, size_t topn)
{
- unsigned int last[257], ptr[256], cnt[256];
+ size_t last[257], ptr[256], cnt[256];
radix_fetch2<GR, T, LEN-POS>(R, cnt, a, n);
@@ -407,8 +407,8 @@ void radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertS
if (!sorted) {
// Go through all permutation cycles until all
// elements are moved or found to be already in place
- unsigned int i(0), remain(n);
- unsigned int j, k;
+ size_t i(0), remain(n);
+ size_t j, k;
T temp, swap;
while(remain>0) {
@@ -443,9 +443,9 @@ void radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertS
if (LEN>0) {
// Sort on next key
- for(unsigned i(0), sum(0); (i<256) && (sum < topn); i++) {
- const unsigned int c(cnt[i]);
- const unsigned int l(last[i]);
+ for(size_t i(0), sum(0); (i<256) && (sum < topn); i++) {
+ const size_t c(cnt[i]);
+ const size_t l(last[i]);
if (c) {
if (c>insertSortLevel) {
radix_sort_internal<T, GR, GE, LEN, POS - 1>(R, E, &a[l], c, insertSortLevel, topn-sum);
@@ -460,7 +460,7 @@ void radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertS
template<typename T, typename GR, typename GE, int LEN, int POS>
-void radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel=10, unsigned int topn=std::numeric_limits<unsigned int>::max())
+void radix_sort(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel=10, size_t topn=std::numeric_limits<size_t>::max())
{
if (n > insertSortLevel) {
radix_sort_internal<T, GR, GE, LEN, POS>(R, E, a, n, insertSortLevel, topn);
@@ -471,13 +471,13 @@ void radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel=
template<typename T, typename GR, int SHIFT>
-void radix_stable_core(GR R, unsigned int ptr[256], const T * a, T * b, unsigned int n) __attribute__ ((noinline));
+void radix_stable_core(GR R, size_t ptr[256], const T * a, T * b, size_t n) __attribute__ ((noinline));
template<typename T, typename GR, int SHIFT>
-void radix_stable_core(GR R, unsigned int ptr[256], const T * a, T * b, unsigned int n)
+void radix_stable_core(GR R, size_t ptr[256], const T * a, T * b, size_t n)
{
- unsigned int k;
- for (unsigned int i(0); i < n; i++) {
+ size_t k;
+ for (size_t i(0); i < n; i++) {
k = (R(a[i]) >> SHIFT) & 0xFF;
b[ptr[k]] = a[i];
ptr[k]++;
@@ -485,9 +485,9 @@ void radix_stable_core(GR R, unsigned int ptr[256], const T * a, T * b, unsigned
}
template<typename T, typename GR, typename GE, int SHIFT>
-T * radix_stable_sort_internal(GR R, GE E, T * a, T * b, unsigned int n, unsigned int insertSortLevel=10)
+T * radix_stable_sort_internal(GR R, GE E, T * a, T * b, size_t n, unsigned int insertSortLevel=10)
{
- unsigned int last[257], ptr[256], cnt[256];
+ size_t last[257], ptr[256], cnt[256];
radix_fetch<GR, T, SHIFT>(R, cnt, a, n);
@@ -502,8 +502,8 @@ T * radix_stable_sort_internal(GR R, GE E, T * a, T * b, unsigned int n, unsigne
if (SHIFT>0) {
// Sort on next key
for(unsigned i(0); i<256 ; i++) {
- const unsigned int c(cnt[i]);
- const unsigned int l(last[i]);
+ const size_t c(cnt[i]);
+ const size_t l(last[i]);
if (c>insertSortLevel) {
const T * r = radix_stable_sort_internal<T, GR, GE, SHIFT - 8>(R, E, &b[l], &a[l], c, insertSortLevel);
if (r != &b[l]) {
@@ -520,7 +520,7 @@ T * radix_stable_sort_internal(GR R, GE E, T * a, T * b, unsigned int n, unsigne
}
template<typename T, typename GR, typename GE, int SHIFT>
-T* radix_stable_sort(GR R, GE E, T * a, T * b, unsigned int n, unsigned int insertSortLevel=10)
+T* radix_stable_sort(GR R, GE E, T * a, T * b, size_t n, unsigned int insertSortLevel=10)
{
if (n > insertSortLevel) {
return radix_stable_sort_internal<T, GR, GE, SHIFT>(R, E, a, b, n, insertSortLevel);