diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-03-25 17:16:47 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2022-03-25 17:16:47 +0000 |
commit | 50de2c8cefcdb4a96b62c01aa19a7e73023248f2 (patch) | |
tree | fe1d41a98c7c38a53265582471c5602e3109a705 | |
parent | ce885ac3588c1d75167928444343c1d5044691e3 (diff) |
Use size_t(64 bit) insteda of unsigent int(32 bit) to allow sorting arrays larger than 4G elements.
-rw-r--r-- | searchlib/src/vespa/searchlib/common/sort.cpp | 5 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/common/sort.h | 122 |
2 files changed, 64 insertions, 63 deletions
diff --git a/searchlib/src/vespa/searchlib/common/sort.cpp b/searchlib/src/vespa/searchlib/common/sort.cpp index a85317b2c53..0a4db613649 100644 --- a/searchlib/src/vespa/searchlib/common/sort.cpp +++ b/searchlib/src/vespa/searchlib/common/sort.cpp @@ -3,7 +3,8 @@ namespace search { -bool radix_prepare(unsigned int n, unsigned int last[257], unsigned int ptr[256], unsigned int cnt[256]) +bool +radix_prepare(size_t n, size_t last[257], size_t ptr[256], size_t cnt[256]) { // Accumulate cnt positions bool sorted = (cnt[0]==n); @@ -12,7 +13,7 @@ bool radix_prepare(unsigned int n, unsigned int last[257], unsigned int ptr[256] ptr[i] = ptr[i-1] + cnt[i-1]; sorted |= (cnt[i]==n); } - memcpy(last, ptr, 256*sizeof(unsigned int)); + memcpy(last, ptr, 256*sizeof(last[0])); last[256] = last[255] + cnt[255]; return sorted; } diff --git a/searchlib/src/vespa/searchlib/common/sort.h b/searchlib/src/vespa/searchlib/common/sort.h index 2513c5506e9..b90bda698a0 100644 --- a/searchlib/src/vespa/searchlib/common/sort.h +++ b/searchlib/src/vespa/searchlib/common/sort.h @@ -10,22 +10,22 @@ namespace search { -bool radix_prepare(unsigned int n, unsigned int last[257], unsigned int ptr[256], unsigned int cnt[256]); +bool radix_prepare(size_t n, size_t last[257], size_t ptr[256], size_t cnt[256]); template<typename T> -void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t * radixScratch, unsigned int shiftWidth) __attribute__ ((noinline)); +void radix_sort_core(const size_t * last, T * a, size_t n, uint32_t * radixScratch, unsigned int shiftWidth) __attribute__ ((noinline)); template<typename T> -void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t * radixScratch, unsigned int shiftWidth) +void radix_sort_core(const size_t * last, T * a, size_t n, uint32_t * radixScratch, unsigned int shiftWidth) { T temp, swap; // Go through all permutation cycles until all // elements are moved or found to be already in place - unsigned int ptr[256]; - unsigned int i, j, k; + size_t ptr[256]; + size_t i, j, k; memcpy(ptr, last, sizeof(ptr)); i = 0; - unsigned int remain = n; + size_t remain = n; while (remain > 0) { // Find first uncompleted class @@ -42,7 +42,7 @@ void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t if (i != k) { swap = a[j]; do { - unsigned int t(ptr[k]); + size_t t(ptr[k]); temp = a[t]; uint32_t tempK(radixScratch[t]); radixScratch[t] = swapK; @@ -63,12 +63,12 @@ void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t } template<typename T, typename GR> -unsigned int radix_fetch(T *a, unsigned int n, uint32_t * radixScratch, GR R) __attribute__ ((noinline)); +unsigned int radix_fetch(T *a, size_t n, uint32_t * radixScratch, GR R) __attribute__ ((noinline)); template<typename T, typename GR> -unsigned int radix_fetch(T *a, unsigned int n, uint32_t * radixScratch, GR R) +unsigned int radix_fetch(T *a, size_t n, uint32_t * radixScratch, GR R) { - unsigned int i = 0; + size_t i = 0; uint32_t usedBits = 0; if (n > 3) { for(; i < n - 3; i += 4) { @@ -102,12 +102,12 @@ public: }; template<typename T, typename ER> -bool radix_eof(const T *a, unsigned int n, ER E) __attribute__ ((noinline)); +bool radix_eof(const T *a, size_t n, ER E) __attribute__ ((noinline)); template<typename T, typename ER> -bool radix_eof(const T *a, unsigned int n, ER E) +bool radix_eof(const T *a, size_t n, ER E) { - unsigned int i = 0; + size_t i = 0; bool eof(true); if (n > 3) { for(; eof && (i < n - 3); i += 4) { @@ -137,11 +137,11 @@ bool radix_eof(const T *a, unsigned int n, ER E) **/ template<typename T, typename GR, typename GE, typename GRE> void radix_sort(GR R, GE E, GRE EE, int stackDepth, - T * a, unsigned int n, + T * a, size_t n, uint32_t *radixScratch, int radixBits, unsigned insertSortLevel=10, - unsigned int topn=std::numeric_limits<unsigned int>::max()) + size_t topn=std::numeric_limits<size_t>::max()) { if (((stackDepth > 20) && (radixBits == 0)) || (n < insertSortLevel)) { // switch to simpler sort if few elements @@ -151,8 +151,8 @@ void radix_sort(GR R, GE E, GRE EE, int stackDepth, return; } - unsigned int last[257]; - unsigned int cnt[256]; + size_t last[257]; + size_t cnt[256]; int shiftWidth = radixBits - 8; for (bool allInOneBucket(true); allInOneBucket;) { while ( radixBits == 0 ) { @@ -169,7 +169,7 @@ void radix_sort(GR R, GE E, GRE EE, int stackDepth, shiftWidth = radixBits - 8; memset(cnt, 0, sizeof(cnt)); - unsigned int i = 0; + size_t i = 0; if (n > 3) { for(; i < n - 3; i += 4) { cnt[(radixScratch[i + 0] >> shiftWidth) & 0xFF]++; @@ -196,9 +196,9 @@ void radix_sort(GR R, GE E, GRE EE, int stackDepth, radix_sort_core(last, a, n, radixScratch, shiftWidth); // Sort on next 8 bits of key - for(unsigned i(0), sum(0); (i<256) && (sum < topn); i++) { - const unsigned l(last[i]); - const unsigned c(cnt[i]); + for(size_t i(0), sum(0); (i<256) && (sum < topn); i++) { + const size_t l(last[i]); + const size_t c(cnt[i]); if (c) { if (c > insertSortLevel) { radix_sort(R, E, EE, stackDepth + 1, &a[l], c, &radixScratch[l], radixBits, insertSortLevel, topn-sum); @@ -215,15 +215,15 @@ template<typename GR, typename T, int SHIFT> class ShiftBasedRadixSorterBase { protected: - static void radix_fetch(GR R, unsigned int cnt[256], const T * a, unsigned int n) __attribute__((noinline)); - static void radix_sort_core(GR R, unsigned int ptr[256], unsigned int last[257], T * a, unsigned int n) __attribute__((noinline)); + static void radix_fetch(GR R, size_t cnt[256], const T * a, size_t n) __attribute__((noinline)); + static void radix_sort_core(GR R, size_t ptr[256], size_t last[257], T * a, size_t n) __attribute__((noinline)); }; template<typename GR, typename T, int SHIFT> -void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_fetch(GR R, unsigned int cnt[256], const T * a, unsigned int n) +void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_fetch(GR R, size_t cnt[256], const T * a, size_t n) { - memset(cnt, 0, 256*sizeof(unsigned int)); - unsigned int p(0); + memset(cnt, 0, 256 * sizeof(cnt[0])); + size_t p(0); if (n > 3) { for(; p < n - 3; p += 4) { cnt[(R(a[p]) >> SHIFT) & 0xFF]++; @@ -239,12 +239,12 @@ void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_fetch(GR R, unsigned int cnt template<typename GR, typename T, int SHIFT> -void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_sort_core(GR R, unsigned int ptr[256], unsigned int last[257], T * a, unsigned int n) +void ShiftBasedRadixSorterBase<GR, T, SHIFT>::radix_sort_core(GR R, size_t ptr[256], size_t last[257], T * a, size_t n) { // Go through all permutation cycles until all // elements are moved or found to be already in place - unsigned int i(0), remain(n); - unsigned int j, k; + size_t i(0), remain(n); + size_t j, k; T temp, swap; while(remain>0) { @@ -286,17 +286,17 @@ template<typename T, typename GR, typename GE, int SHIFT, bool continueAfterRadi class ShiftBasedRadixSorter : private ShiftBasedRadixSorterBase<GR, T, SHIFT> { public: - static size_t radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel=10, unsigned int topn=std::numeric_limits<unsigned int>::max()); - static size_t radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn); + static size_t radix_sort(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel=10, size_t topn=std::numeric_limits<size_t>::max()); + static size_t radix_sort_internal(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel, size_t topn); private: typedef ShiftBasedRadixSorterBase<GR, T, SHIFT> Base; }; template<typename T, typename GR, typename GE, int SHIFT, bool continueAfterRadixEnds> -size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn) +size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_sort_internal(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel, size_t topn) { - unsigned int last[257], ptr[256], cnt[256]; - unsigned int sum(n); + size_t last[257], ptr[256], cnt[256]; + size_t sum(n); Base::radix_fetch(R, cnt, a, n); @@ -312,8 +312,8 @@ size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_so // Sort on next key sum = 0; for(unsigned i(0); (i<256) && (sum < topn); i++) { - const unsigned int c(cnt[i]); - const unsigned int l(last[i]); + const size_t c(cnt[i]); + const size_t l(last[i]); if (c) { if (c>insertSortLevel) { sum += ShiftBasedRadixSorter<T, GR, GE, SHIFT - 8, continueAfterRadixEnds>::radix_sort_internal(R, E, &a[l], c, insertSortLevel, topn-sum); @@ -329,7 +329,7 @@ size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_so template<typename T, typename GR, typename GE, int SHIFT, bool continueAfterRadixEnds> -size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn) +size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_sort(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel, size_t topn) { if (n > insertSortLevel) { return radix_sort_internal(R, E, a, n, insertSortLevel, topn); @@ -342,7 +342,7 @@ size_t ShiftBasedRadixSorter<T, GR, GE, SHIFT, continueAfterRadixEnds>::radix_so template<typename A, typename B, typename C> class ShiftBasedRadixSorter<A, B, C, -8, false> { public: - static size_t radix_sort_internal(B, C, A *, unsigned int, unsigned int, unsigned int) { + static size_t radix_sort_internal(B, C, A *, size_t, unsigned int, size_t) { return 0; } }; @@ -350,7 +350,7 @@ public: template<typename A, typename B, typename C> class ShiftBasedRadixSorter<A, B, C, -8, true> { public: - static size_t radix_sort_internal(B, C E, A * v, unsigned int sz, unsigned int, unsigned int) { + static size_t radix_sort_internal(B, C E, A * v, size_t sz, unsigned int, size_t) { std::sort(v, v + sz, E); return sz; } @@ -365,7 +365,7 @@ public: public: typename C::UIntType operator () (typename C::InputType v) const { return C::convert(v); } }; - void operator() (T * start, size_t sz, unsigned topn = std::numeric_limits<uint32_t>::max()) const { + void operator() (T * start, size_t sz, size_t topn = std::numeric_limits<size_t>::max()) const { if (sz > 16) { ShiftBasedRadixSorter<typename C::InputType, RadixSortable, typename C::Compare, 8*(sizeof(typename C::UIntType) -1)>::radix_sort_internal(RadixSortable(), typename C::Compare(), start, sz, 16, topn); } else { @@ -375,13 +375,13 @@ public: }; template<typename GR, typename T, int IDX> -void radix_fetch2(GR R, unsigned int cnt[256], const T * a, unsigned int n) __attribute__ ((noinline)); +void radix_fetch2(GR R, size_t cnt[256], const T * a, size_t n) __attribute__ ((noinline)); template<typename GR, typename T, int IDX> -void radix_fetch2(GR R, unsigned int cnt[256], const T * a, unsigned int n) +void radix_fetch2(GR R, size_t cnt[256], const T * a, size_t n) { - memset(cnt, 0, 256*sizeof(unsigned int)); - unsigned int p(0); + memset(cnt, 0, 256*sizeof(cnt[0])); + size_t p(0); if (n > 3) { for(; p < n - 3; p += 4) { cnt[R(a[p + 0], IDX)]++; @@ -396,9 +396,9 @@ void radix_fetch2(GR R, unsigned int cnt[256], const T * a, unsigned int n) } template<typename T, typename GR, typename GE, int LEN, int POS> -void radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn) +void radix_sort_internal(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel, size_t topn) { - unsigned int last[257], ptr[256], cnt[256]; + size_t last[257], ptr[256], cnt[256]; radix_fetch2<GR, T, LEN-POS>(R, cnt, a, n); @@ -407,8 +407,8 @@ void radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertS if (!sorted) { // Go through all permutation cycles until all // elements are moved or found to be already in place - unsigned int i(0), remain(n); - unsigned int j, k; + size_t i(0), remain(n); + size_t j, k; T temp, swap; while(remain>0) { @@ -443,9 +443,9 @@ void radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertS if (LEN>0) { // Sort on next key - for(unsigned i(0), sum(0); (i<256) && (sum < topn); i++) { - const unsigned int c(cnt[i]); - const unsigned int l(last[i]); + for(size_t i(0), sum(0); (i<256) && (sum < topn); i++) { + const size_t c(cnt[i]); + const size_t l(last[i]); if (c) { if (c>insertSortLevel) { radix_sort_internal<T, GR, GE, LEN, POS - 1>(R, E, &a[l], c, insertSortLevel, topn-sum); @@ -460,7 +460,7 @@ void radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertS template<typename T, typename GR, typename GE, int LEN, int POS> -void radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel=10, unsigned int topn=std::numeric_limits<unsigned int>::max()) +void radix_sort(GR R, GE E, T * a, size_t n, unsigned int insertSortLevel=10, size_t topn=std::numeric_limits<size_t>::max()) { if (n > insertSortLevel) { radix_sort_internal<T, GR, GE, LEN, POS>(R, E, a, n, insertSortLevel, topn); @@ -471,13 +471,13 @@ void radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel= template<typename T, typename GR, int SHIFT> -void radix_stable_core(GR R, unsigned int ptr[256], const T * a, T * b, unsigned int n) __attribute__ ((noinline)); +void radix_stable_core(GR R, size_t ptr[256], const T * a, T * b, size_t n) __attribute__ ((noinline)); template<typename T, typename GR, int SHIFT> -void radix_stable_core(GR R, unsigned int ptr[256], const T * a, T * b, unsigned int n) +void radix_stable_core(GR R, size_t ptr[256], const T * a, T * b, size_t n) { - unsigned int k; - for (unsigned int i(0); i < n; i++) { + size_t k; + for (size_t i(0); i < n; i++) { k = (R(a[i]) >> SHIFT) & 0xFF; b[ptr[k]] = a[i]; ptr[k]++; @@ -485,9 +485,9 @@ void radix_stable_core(GR R, unsigned int ptr[256], const T * a, T * b, unsigned } template<typename T, typename GR, typename GE, int SHIFT> -T * radix_stable_sort_internal(GR R, GE E, T * a, T * b, unsigned int n, unsigned int insertSortLevel=10) +T * radix_stable_sort_internal(GR R, GE E, T * a, T * b, size_t n, unsigned int insertSortLevel=10) { - unsigned int last[257], ptr[256], cnt[256]; + size_t last[257], ptr[256], cnt[256]; radix_fetch<GR, T, SHIFT>(R, cnt, a, n); @@ -502,8 +502,8 @@ T * radix_stable_sort_internal(GR R, GE E, T * a, T * b, unsigned int n, unsigne if (SHIFT>0) { // Sort on next key for(unsigned i(0); i<256 ; i++) { - const unsigned int c(cnt[i]); - const unsigned int l(last[i]); + const size_t c(cnt[i]); + const size_t l(last[i]); if (c>insertSortLevel) { const T * r = radix_stable_sort_internal<T, GR, GE, SHIFT - 8>(R, E, &b[l], &a[l], c, insertSortLevel); if (r != &b[l]) { @@ -520,7 +520,7 @@ T * radix_stable_sort_internal(GR R, GE E, T * a, T * b, unsigned int n, unsigne } template<typename T, typename GR, typename GE, int SHIFT> -T* radix_stable_sort(GR R, GE E, T * a, T * b, unsigned int n, unsigned int insertSortLevel=10) +T* radix_stable_sort(GR R, GE E, T * a, T * b, size_t n, unsigned int insertSortLevel=10) { if (n > insertSortLevel) { return radix_stable_sort_internal<T, GR, GE, SHIFT>(R, E, a, b, n, insertSortLevel); |