diff options
16 files changed, 56 insertions, 50 deletions
diff --git a/eval/src/tests/ann/nns-l2.h b/eval/src/tests/ann/nns-l2.h index 82a95741200..de24df50b6c 100644 --- a/eval/src/tests/ann/nns-l2.h +++ b/eval/src/tests/ann/nns-l2.h @@ -36,7 +36,7 @@ template <typename FltType = float> struct L2DistCalc { const vespalib::hwaccelrated::IAccelrated & _hw; - L2DistCalc() : _hw(vespalib::hwaccelrated::IAccelrated::getAccelrator()) {} + L2DistCalc() : _hw(vespalib::hwaccelrated::IAccelrated::getAccelerator()) {} using Arr = vespalib::ArrayRef<FltType>; using ConstArr = vespalib::ConstArrayRef<FltType>; diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index 96234e373dc..0a33e23de72 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -167,7 +167,7 @@ BitVector::countInterval(Range range_in) const ++endw; } if (startw < endw) { - res += IAccelrated::getAccelrator().populationCount(bitValues + startw, endw - startw); + res += IAccelrated::getAccelerator().populationCount(bitValues + startw, endw - startw); } if (partialEnd) { res += Optimized::popCount(bitValues[endw] & ~endBits(last)); @@ -185,13 +185,13 @@ BitVector::orWith(const BitVector & right) if (right.size() > 0) { ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word); if (commonBytes > 0) { - IAccelrated::getAccelrator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); + IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); } Index last(right.size() - 1); getWordIndex(last)[0] |= (right.getWordIndex(last)[0] & ~endBits(last)); } } else { - IAccelrated::getAccelrator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); + IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); } repairEnds(); invalidateCachedCount(); @@ -216,7 +216,7 @@ BitVector::andWith(const BitVector & right) verifyInclusiveStart(*this, right); uint32_t commonBytes = std::min(getActiveBytes(), numActiveBytes(getStartIndex(), right.size())); - IAccelrated::getAccelrator().andBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); + IAccelrated::getAccelerator().andBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); if (right.size() < size()) { clearInterval(right.size(), size()); } @@ -235,13 +235,13 @@ BitVector::andNotWith(const BitVector& right) if (right.size() > 0) { ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word); if (commonBytes > 0) { - IAccelrated::getAccelrator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); + IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); } Index last(right.size() - 1); getWordIndex(last)[0] &= ~(right.getWordIndex(last)[0] & ~endBits(last)); } } else { - IAccelrated::getAccelrator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); + IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); } repairEnds(); @@ -250,7 +250,7 @@ BitVector::andNotWith(const BitVector& right) void BitVector::notSelf() { - IAccelrated::getAccelrator().notBit(getActiveStart(), getActiveBytes()); + IAccelrated::getAccelerator().notBit(getActiveStart(), getActiveBytes()); setGuardBit(); invalidateCachedCount(); } diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp index a8737a19eec..37fd98c9f20 100644 --- a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp @@ -256,7 +256,7 @@ namespace dotproduct::array { template <typename BaseType> DotProductExecutorBase<BaseType>::DotProductExecutorBase(const V & queryVector) : FeatureExecutor(), - _multiplier(IAccelrated::getAccelrator()), + _multiplier(IAccelrated::getAccelerator()), _queryVector(queryVector) { } diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp index ca82e45cf0e..b1134c3dc6e 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp @@ -24,7 +24,7 @@ public: explicit MultiBitVectorIterator(const Children & children) : MultiBitVectorIteratorBase(children), _update(), - _accel(IAccelrated::getAccelrator()), + _accel(IAccelrated::getAccelerator()), _lastWords() { static_assert(sizeof(_lastWords) == 64, "Latswords should have 64 byte size"); @@ -56,16 +56,16 @@ private: struct And { using Word = BitWord::Word; - void operator () (const IAccelrated & accel, uint32_t offset, const std::vector<std::pair<const Word *, bool>> & src, Word *dest) { - accel.and64(offset, src, dest); + void operator () (const IAccelrated & accel, size_t offset, const std::vector<std::pair<const void *, bool>> & src, Word *dest) { + accel.and64(offset*sizeof(uint64_t), src, dest); } static bool isAnd() { return true; } }; struct Or { using Word = BitWord::Word; - void operator () (const IAccelrated & accel, uint32_t offset, const std::vector<std::pair<const Word *, bool>> & src, Word *dest) { - accel.or64(offset, src, dest); + void operator () (const IAccelrated & accel, size_t offset, const std::vector<std::pair<const void *, bool>> & src, Word *dest) { + accel.or64(offset*sizeof(uint64_t), src, dest); } static bool isAnd() { return false; } }; @@ -159,7 +159,7 @@ MultiBitVectorIteratorBase::MultiBitVectorIteratorBase(const Children & children _bvs.reserve(children.size()); for (const auto & child : children) { const auto * bv = static_cast<const BitVectorIterator *>(child); - _bvs.emplace_back(reinterpret_cast<const Word *>(bv->getBitValues()), bv->isInverted()); + _bvs.emplace_back(bv->getBitValues(), bv->isInverted()); _numDocs = std::min(_numDocs, bv->getDocIdLimit()); } } @@ -180,7 +180,7 @@ MultiBitVectorIteratorBase::andWith(UP filter, uint32_t estimate) (void) estimate; if (filter->isBitVector() && acceptExtraFilter()) { const auto & bv = static_cast<const BitVectorIterator &>(*filter); - _bvs.emplace_back(reinterpret_cast<const Word *>(bv.getBitValues()), bv.isInverted()); + _bvs.emplace_back(bv.getBitValues(), bv.isInverted()); insert(getChildren().size(), std::move(filter)); _lastMaxDocIdLimit = 0; // force reload _lastMaxDocIdLimitRequireFetch = 0; diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h index cdf3eea8ef2..dbe2d6f8965 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h @@ -21,7 +21,7 @@ public: static SearchIterator::UP optimize(SearchIterator::UP parent); protected: MultiBitVectorIteratorBase(const Children & children); - using MetaWord = std::pair<const Word *, bool>; + using MetaWord = std::pair<const void *, bool>; uint32_t _numDocs; uint32_t _lastMaxDocIdLimit; // next documentid requiring recomputation. diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h index 79f987c740c..d37495e85da 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h @@ -17,7 +17,7 @@ template <typename FloatType> class SquaredEuclideanDistance : public DistanceFunction { public: SquaredEuclideanDistance() - : _computer(vespalib::hwaccelrated::IAccelrated::getAccelrator()) + : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) {} double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override { auto lhs_vector = lhs.typify<FloatType>(); @@ -60,7 +60,7 @@ template <typename FloatType> class AngularDistance : public DistanceFunction { public: AngularDistance() - : _computer(vespalib::hwaccelrated::IAccelrated::getAccelrator()) + : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) {} double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override { auto lhs_vector = lhs.typify<FloatType>(); diff --git a/vespalib/src/tests/dotproduct/dotproductbenchmark.cpp b/vespalib/src/tests/dotproduct/dotproductbenchmark.cpp index d6e1aef9394..e95e8a5c58b 100644 --- a/vespalib/src/tests/dotproduct/dotproductbenchmark.cpp +++ b/vespalib/src/tests/dotproduct/dotproductbenchmark.cpp @@ -60,7 +60,7 @@ template <typename T> FullBenchmark<T>::FullBenchmark(size_t numDocs, size_t numValues) : _values(numDocs*numValues), _query(numValues), - _dp(IAccelrated::getAccelrator()) + _dp(IAccelrated::getAccelerator()) { for (size_t i(0); i < numDocs; i++) { for (size_t j(0); j < numValues; j++) { diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp index 233609d505b..8588a5510f7 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp @@ -21,12 +21,12 @@ Avx2Accelrator::squaredEuclideanDistance(const double * a, const double * b, siz } void -Avx2Accelrator::and64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const { +Avx2Accelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const { helper::andChunks<32u, 2u>(offset, src, dest); } void -Avx2Accelrator::or64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const { +Avx2Accelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const { helper::orChunks<32u, 2u>(offset, src, dest); } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h index 292961a6f4d..b6f3d299748 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h +++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h @@ -15,8 +15,8 @@ public: size_t populationCount(const uint64_t *a, size_t sz) const override; double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const override; double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const override; - void and64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const override; - void or64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const override; + void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override; + void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override; }; } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp index 9cfae4757b9..4dade08e77a 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp @@ -33,12 +33,12 @@ Avx512Accelrator::squaredEuclideanDistance(const double * a, const double * b, s } void -Avx512Accelrator::and64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const { +Avx512Accelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const { helper::andChunks<64, 1>(offset, src, dest); } void -Avx512Accelrator::or64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const { +Avx512Accelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const { helper::orChunks<64, 1>(offset, src, dest); } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h index ee422b57171..a54d57407b2 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h +++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h @@ -17,8 +17,8 @@ public: size_t populationCount(const uint64_t *a, size_t sz) const override; double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const override; double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const override; - void and64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const override; - void or64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const override; + void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override; + void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override; }; } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp index 460ae7e7388..f9dfaacf626 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp @@ -166,12 +166,12 @@ GenericAccelrator::squaredEuclideanDistance(const double * a, const double * b, } void -GenericAccelrator::and64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const { +GenericAccelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const { helper::andChunks<16, 4>(offset, src, dest); } void -GenericAccelrator::or64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const { +GenericAccelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const { helper::orChunks<16,4>(offset, src, dest); } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h index 8ce320cd4c4..2335b40fe85 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h +++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h @@ -25,8 +25,8 @@ public: size_t populationCount(const uint64_t *a, size_t sz) const override; double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const override; double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const override; - void and64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const override; - void or64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const override; + void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override; + void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override; }; } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp index 068833357c3..de917c5f065 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp @@ -149,7 +149,7 @@ void verifyOr64(const IAccelrated & accel, const std::vector<std::vector<uint64_t>> & vectors, size_t offset, size_t num_vectors, bool invertSome) { - std::vector<std::pair<const uint64_t *, bool>> vRefs; + std::vector<std::pair<const void *, bool>> vRefs; for (size_t j(0); j < num_vectors; j++) { vRefs.emplace_back(&vectors[j][0], shouldInvert(invertSome)); } @@ -160,7 +160,7 @@ verifyOr64(const IAccelrated & accel, const std::vector<std::vector<uint64_t>> & } uint64_t dest[8] __attribute((aligned(64))); - accel.or64(offset, vRefs, dest); + accel.or64(offset*sizeof(uint64_t), vRefs, dest); int diff = memcmp(&expected[offset], dest, sizeof(dest)); if (diff != 0) { LOG_ABORT("Accelerator fails to compute correct 64 bytes OR"); @@ -171,7 +171,7 @@ void verifyAnd64(const IAccelrated & accel, const std::vector<std::vector<uint64_t>> & vectors, size_t offset, size_t num_vectors, bool invertSome) { - std::vector<std::pair<const uint64_t *, bool>> vRefs; + std::vector<std::pair<const void *, bool>> vRefs; for (size_t j(0); j < num_vectors; j++) { vRefs.emplace_back(&vectors[j][0], shouldInvert(invertSome)); } @@ -181,7 +181,7 @@ verifyAnd64(const IAccelrated & accel, const std::vector<std::vector<uint64_t>> } uint64_t dest[8] __attribute((aligned(64))); - accel.and64(offset, vRefs, dest); + accel.and64(offset*sizeof(uint64_t), vRefs, dest); int diff = memcmp(&expected[offset], dest, sizeof(dest)); if (diff != 0) { LOG_ABORT("Accelerator fails to compute correct 64 bytes AND"); @@ -239,7 +239,7 @@ RuntimeVerificator::RuntimeVerificator() GenericAccelrator generic; verify(generic); - const IAccelrated & thisCpu(IAccelrated::getAccelrator()); + const IAccelrated & thisCpu(IAccelrated::getAccelerator()); verify(thisCpu); } @@ -272,7 +272,7 @@ static Selector _G_selector; RuntimeVerificator _G_verifyAccelrator; const IAccelrated & -IAccelrated::getAccelrator() +IAccelrated::getAccelerator() { static IAccelrated::UP accelrator = _G_selector.create(); return *accelrator; diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h index f352fb292ce..2594a48dd33 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h +++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h @@ -30,11 +30,12 @@ public: virtual size_t populationCount(const uint64_t *a, size_t sz) const = 0; virtual double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const = 0; virtual double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const = 0; - // And 64 bytes from multiple sources - virtual void and64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const = 0; - virtual void or64(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> &src, uint64_t *dest) const = 0; + // AND 64 bytes from multiple, optionally inverted sources + virtual void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const = 0; + // OR 64 bytes from multiple, optionally inverted sources + virtual void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const = 0; - static const IAccelrated & getAccelrator() __attribute__((noinline)); + static const IAccelrated & getAccelerator() __attribute__((noinline)); }; } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp index 2759cc35ba9..6fc49f969f2 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp @@ -31,17 +31,22 @@ T get(const void * base, bool invert) { return __builtin_expect(invert, false) ? ~v : v; } +template <typename T> +const T * cast(const void * ptr, size_t offsetBytes) { + return static_cast<const T *>(static_cast<const void *>(static_cast<const char *>(ptr) + offsetBytes)); +} + template<unsigned ChunkSize, unsigned Chunks> void -andChunks(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> & src, uint64_t * dest) { +andChunks(size_t offset, const std::vector<std::pair<const void *, bool>> & src, void * dest) { typedef uint64_t Chunk __attribute__ ((vector_size (ChunkSize))); - Chunk * chunk = reinterpret_cast<Chunk *>(dest); - const Chunk * tmp = reinterpret_cast<const Chunk *>(src[0].first+offset); + Chunk * chunk = static_cast<Chunk *>(dest); + const Chunk * tmp = cast<Chunk>(src[0].first, offset); for (size_t n=0; n < Chunks; n++) { chunk[n] = get<Chunk>(tmp+n, src[0].second); } for (size_t i(1); i < src.size(); i++) { - tmp = reinterpret_cast<const Chunk *>(src[i].first+offset); + tmp = cast<Chunk>(src[i].first, offset); for (size_t n=0; n < Chunks; n++) { chunk[n] &= get<Chunk>(tmp+n, src[i].second); } @@ -50,15 +55,15 @@ andChunks(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> & template<unsigned ChunkSize, unsigned Chunks> void -orChunks(size_t offset, const std::vector<std::pair<const uint64_t *, bool>> & src, uint64_t * dest) { +orChunks(size_t offset, const std::vector<std::pair<const void *, bool>> & src, void * dest) { typedef uint64_t Chunk __attribute__ ((vector_size (ChunkSize))); - Chunk * chunk = reinterpret_cast<Chunk *>(dest); - const Chunk * tmp = reinterpret_cast<const Chunk *>(src[0].first+offset); + Chunk * chunk = static_cast<Chunk *>(dest); + const Chunk * tmp = cast<Chunk>(src[0].first, offset); for (size_t n=0; n < Chunks; n++) { chunk[n] = get<Chunk>(tmp+n, src[0].second); } for (size_t i(1); i < src.size(); i++) { - tmp = reinterpret_cast<const Chunk *>(src[i].first+offset); + tmp = cast<Chunk>(src[i].first, offset); for (size_t n=0; n < Chunks; n++) { chunk[n] |= get<Chunk>(tmp+n, src[i].second); } |