summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2020-06-08 08:03:16 +0200
committerGitHub <noreply@github.com>2020-06-08 08:03:16 +0200
commit8a1262dcfb1698ca3bf06f2734dd364f25cc1f70 (patch)
tree1da3189429e5c4fd816b7261031ba1000c259a96
parentbda9b2e28e416daaefffa181d4dc8fe8566e8ca0 (diff)
Revert "When we pull in a cacheline, we should use it too."
-rw-r--r--eval/src/tests/ann/nns-l2.h2
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvector.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/features/dotproductfeature.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp94
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h19
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_functions.h4
-rw-r--r--vespalib/src/tests/dotproduct/dotproductbenchmark.cpp2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp10
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx2.h2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp10
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx512.h2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp10
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.h2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp127
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h7
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp46
16 files changed, 68 insertions, 285 deletions
diff --git a/eval/src/tests/ann/nns-l2.h b/eval/src/tests/ann/nns-l2.h
index de24df50b6c..82a95741200 100644
--- a/eval/src/tests/ann/nns-l2.h
+++ b/eval/src/tests/ann/nns-l2.h
@@ -36,7 +36,7 @@ template <typename FltType = float>
struct L2DistCalc {
const vespalib::hwaccelrated::IAccelrated & _hw;
- L2DistCalc() : _hw(vespalib::hwaccelrated::IAccelrated::getAccelerator()) {}
+ L2DistCalc() : _hw(vespalib::hwaccelrated::IAccelrated::getAccelrator()) {}
using Arr = vespalib::ArrayRef<FltType>;
using ConstArr = vespalib::ConstArrayRef<FltType>;
diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp
index 0a33e23de72..96234e373dc 100644
--- a/searchlib/src/vespa/searchlib/common/bitvector.cpp
+++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp
@@ -167,7 +167,7 @@ BitVector::countInterval(Range range_in) const
++endw;
}
if (startw < endw) {
- res += IAccelrated::getAccelerator().populationCount(bitValues + startw, endw - startw);
+ res += IAccelrated::getAccelrator().populationCount(bitValues + startw, endw - startw);
}
if (partialEnd) {
res += Optimized::popCount(bitValues[endw] & ~endBits(last));
@@ -185,13 +185,13 @@ BitVector::orWith(const BitVector & right)
if (right.size() > 0) {
ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word);
if (commonBytes > 0) {
- IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
+ IAccelrated::getAccelrator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
}
Index last(right.size() - 1);
getWordIndex(last)[0] |= (right.getWordIndex(last)[0] & ~endBits(last));
}
} else {
- IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
+ IAccelrated::getAccelrator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
}
repairEnds();
invalidateCachedCount();
@@ -216,7 +216,7 @@ BitVector::andWith(const BitVector & right)
verifyInclusiveStart(*this, right);
uint32_t commonBytes = std::min(getActiveBytes(), numActiveBytes(getStartIndex(), right.size()));
- IAccelrated::getAccelerator().andBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
+ IAccelrated::getAccelrator().andBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
if (right.size() < size()) {
clearInterval(right.size(), size());
}
@@ -235,13 +235,13 @@ BitVector::andNotWith(const BitVector& right)
if (right.size() > 0) {
ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word);
if (commonBytes > 0) {
- IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
+ IAccelrated::getAccelrator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
}
Index last(right.size() - 1);
getWordIndex(last)[0] &= ~(right.getWordIndex(last)[0] & ~endBits(last));
}
} else {
- IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
+ IAccelrated::getAccelrator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
}
repairEnds();
@@ -250,7 +250,7 @@ BitVector::andNotWith(const BitVector& right)
void
BitVector::notSelf() {
- IAccelrated::getAccelerator().notBit(getActiveStart(), getActiveBytes());
+ IAccelrated::getAccelrator().notBit(getActiveStart(), getActiveBytes());
setGuardBit();
invalidateCachedCount();
}
diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp
index 37fd98c9f20..a8737a19eec 100644
--- a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp
@@ -256,7 +256,7 @@ namespace dotproduct::array {
template <typename BaseType>
DotProductExecutorBase<BaseType>::DotProductExecutorBase(const V & queryVector)
: FeatureExecutor(),
- _multiplier(IAccelrated::getAccelerator()),
+ _multiplier(IAccelrated::getAccelrator()),
_queryVector(queryVector)
{
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
index d36d16a679a..105d57b22b1 100644
--- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
@@ -1,19 +1,19 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "multibitvectoriterator.h"
-#include "andsearch.h"
-#include "andnotsearch.h"
-#include "sourceblendersearch.h"
+#include <vespa/searchlib/queryeval/multibitvectoriterator.h>
+#include <vespa/searchlib/queryeval/andsearch.h>
+#include <vespa/searchlib/queryeval/andnotsearch.h>
+#include <vespa/searchlib/queryeval/sourceblendersearch.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/attribute/attributeiterators.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/vespalib/util/optimized.h>
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
namespace search::queryeval {
using vespalib::Trinary;
-using vespalib::hwaccelrated::IAccelrated;
namespace {
@@ -21,15 +21,7 @@ template<typename Update>
class MultiBitVectorIterator : public MultiBitVectorIteratorBase
{
public:
- explicit MultiBitVectorIterator(Children children)
- : MultiBitVectorIteratorBase(std::move(children)),
- _update(),
- _accel(IAccelrated::getAccelerator()),
- _lastWords()
- {
- static_assert(sizeof(_lastWords) == 64, "Latswords should have 64 byte size");
- memset(&_lastWords, 0, sizeof(_lastWords));
- }
+ MultiBitVectorIterator(Children children) : MultiBitVectorIteratorBase(std::move(children)) { }
protected:
void updateLastValue(uint32_t docId);
void strictSeek(uint32_t docId);
@@ -37,55 +29,33 @@ private:
void doSeek(uint32_t docId) override;
Trinary is_strict() const override { return Trinary::False; }
bool acceptExtraFilter() const override { return Update::isAnd(); }
- Update _update;
- const IAccelrated & _accel;
- alignas(64) Word _lastWords[8];
+ Update _update;
};
template<typename Update>
class MultiBitVectorIteratorStrict : public MultiBitVectorIterator<Update>
{
public:
- explicit MultiBitVectorIteratorStrict(MultiSearch::Children children)
- : MultiBitVectorIterator<Update>(std::move(children))
- { }
+ MultiBitVectorIteratorStrict(MultiSearch::Children children) : MultiBitVectorIterator<Update>(std::move(children)) { }
private:
void doSeek(uint32_t docId) override { this->strictSeek(docId); }
Trinary is_strict() const override { return Trinary::True; }
};
-struct And {
- using Word = BitWord::Word;
- void operator () (const IAccelrated & accel, size_t offset, const std::vector<std::pair<const void *, bool>> & src, Word *dest) {
- accel.and64(offset*sizeof(uint64_t), src, dest);
- }
- static bool isAnd() { return true; }
-};
-
-struct Or {
- using Word = BitWord::Word;
- void operator () (const IAccelrated & accel, size_t offset, const std::vector<std::pair<const void *, bool>> & src, Word *dest) {
- accel.or64(offset*sizeof(uint64_t), src, dest);
- }
- static bool isAnd() { return false; }
-};
-
template<typename Update>
void MultiBitVectorIterator<Update>::updateLastValue(uint32_t docId)
{
if (docId >= _lastMaxDocIdLimit) {
- if (__builtin_expect(docId >= _numDocs, false)) {
+ if (__builtin_expect(docId < _numDocs, true)) {
+ const uint32_t index(wordNum(docId));
+ _lastValue = _bvs[0][index];
+ for(uint32_t i(1); i < _bvs.size(); i++) {
+ _lastValue = _update(_lastValue, _bvs[i][index]);
+ }
+ _lastMaxDocIdLimit = (index + 1) * WordLen;
+ } else {
setAtEnd();
- return;
- }
- const uint32_t index(wordNum(docId));
- if (docId >= _lastMaxDocIdLimitRequireFetch) {
- uint32_t baseIndex = index & ~(sizeof(_lastWords)/sizeof(Word) - 1);
- _update(_accel, baseIndex, _bvs, _lastWords);
- _lastMaxDocIdLimitRequireFetch = (baseIndex + (sizeof(_lastWords)/sizeof(Word))) * WordLen;
}
- _lastValue = _lastWords[index % (sizeof(_lastWords)/sizeof(Word))];
- _lastMaxDocIdLimit = (index + 1) * WordLen;
}
}
@@ -105,7 +75,7 @@ template<typename Update>
void
MultiBitVectorIterator<Update>::strictSeek(uint32_t docId)
{
- for (updateLastValue(docId), _lastValue = _lastValue & checkTab(docId);
+ for (updateLastValue(docId), _lastValue=_lastValue & checkTab(docId);
(_lastValue == 0) && __builtin_expect(! isAtEnd(), true);
updateLastValue(_lastMaxDocIdLimit));
if (__builtin_expect(!isAtEnd(), true)) {
@@ -118,6 +88,21 @@ MultiBitVectorIterator<Update>::strictSeek(uint32_t docId)
}
}
+struct And {
+ typedef BitWord::Word Word;
+ Word operator () (const Word a, const Word b) {
+ return a & b;
+ }
+ static bool isAnd() { return true; }
+};
+
+struct Or {
+ typedef BitWord::Word Word;
+ Word operator () (const Word a, const Word b) {
+ return a | b;
+ }
+ static bool isAnd() { return false; }
+};
typedef MultiBitVectorIterator<And> AndBVIterator;
typedef MultiBitVectorIteratorStrict<And> AndBVIteratorStrict;
@@ -151,15 +136,14 @@ bool canOptimize(const MultiSearch & s) {
MultiBitVectorIteratorBase::MultiBitVectorIteratorBase(Children children) :
MultiSearch(std::move(children)),
_numDocs(std::numeric_limits<unsigned int>::max()),
- _lastMaxDocIdLimit(0),
- _lastMaxDocIdLimitRequireFetch(0),
_lastValue(0),
+ _lastMaxDocIdLimit(0),
_bvs()
{
_bvs.reserve(getChildren().size());
- for (const auto & child : getChildren()) {
- const auto * bv = static_cast<const BitVectorIterator *>(child.get());
- _bvs.emplace_back(bv->getBitValues(), bv->isInverted());
+ for (size_t i(0); i < getChildren().size(); i++) {
+ const auto * bv = static_cast<const BitVectorIterator *>(getChildren()[i].get());
+ _bvs.emplace_back(reinterpret_cast<const Word *>(bv->getBitValues()), bv->isInverted());
_numDocs = std::min(_numDocs, bv->getDocIdLimit());
}
}
@@ -171,7 +155,6 @@ MultiBitVectorIteratorBase::initRange(uint32_t beginId, uint32_t endId)
{
MultiSearch::initRange(beginId, endId);
_lastMaxDocIdLimit = 0;
- _lastMaxDocIdLimitRequireFetch = 0;
}
SearchIterator::UP
@@ -180,10 +163,9 @@ MultiBitVectorIteratorBase::andWith(UP filter, uint32_t estimate)
(void) estimate;
if (filter->isBitVector() && acceptExtraFilter()) {
const auto & bv = static_cast<const BitVectorIterator &>(*filter);
- _bvs.emplace_back(bv.getBitValues(), bv.isInverted());
+ _bvs.emplace_back(reinterpret_cast<const Word *>(bv.getBitValues()), bv.isInverted());
insert(getChildren().size(), std::move(filter));
_lastMaxDocIdLimit = 0; // force reload
- _lastMaxDocIdLimitRequireFetch = 0;
}
return filter;
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
index 29e92584ffe..cde9ffcbfe5 100644
--- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
+++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
@@ -11,7 +11,7 @@ namespace search::queryeval {
class MultiBitVectorIteratorBase : public MultiSearch, protected BitWord
{
public:
- ~MultiBitVectorIteratorBase() override;
+ ~MultiBitVectorIteratorBase();
void initRange(uint32_t beginId, uint32_t endId) override;
void addUnpackIndex(size_t index) { _unpackInfo.add(index); }
/**
@@ -20,21 +20,26 @@ public:
*/
static SearchIterator::UP optimize(SearchIterator::UP parent);
protected:
- MultiBitVectorIteratorBase(Children hildren);
- using MetaWord = std::pair<const void *, bool>;
+ MultiBitVectorIteratorBase(Children children);
+ class MetaWord {
+ public:
+ MetaWord(const Word * words, bool inverted) : _words(words), _inverted(inverted) { }
+ Word operator [] (uint32_t index) const { return _inverted ? ~_words[index] : _words[index]; }
+ private:
+ const Word * _words;
+ bool _inverted;
+ };
uint32_t _numDocs;
- uint32_t _lastMaxDocIdLimit; // next documentid requiring recomputation.
- uint32_t _lastMaxDocIdLimitRequireFetch;
Word _lastValue; // Last value computed
+ uint32_t _lastMaxDocIdLimit; // next documentid requiring recomputation.
std::vector<MetaWord> _bvs;
private:
virtual bool acceptExtraFilter() const = 0;
UP andWith(UP filter, uint32_t estimate) override;
void doUnpack(uint32_t docid) override;
+ UnpackInfo _unpackInfo;
static SearchIterator::UP optimizeMultiSearch(SearchIterator::UP parent);
-
- UnpackInfo _unpackInfo;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
index d37495e85da..79f987c740c 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
@@ -17,7 +17,7 @@ template <typename FloatType>
class SquaredEuclideanDistance : public DistanceFunction {
public:
SquaredEuclideanDistance()
- : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
+ : _computer(vespalib::hwaccelrated::IAccelrated::getAccelrator())
{}
double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override {
auto lhs_vector = lhs.typify<FloatType>();
@@ -60,7 +60,7 @@ template <typename FloatType>
class AngularDistance : public DistanceFunction {
public:
AngularDistance()
- : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
+ : _computer(vespalib::hwaccelrated::IAccelrated::getAccelrator())
{}
double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override {
auto lhs_vector = lhs.typify<FloatType>();
diff --git a/vespalib/src/tests/dotproduct/dotproductbenchmark.cpp b/vespalib/src/tests/dotproduct/dotproductbenchmark.cpp
index e95e8a5c58b..d6e1aef9394 100644
--- a/vespalib/src/tests/dotproduct/dotproductbenchmark.cpp
+++ b/vespalib/src/tests/dotproduct/dotproductbenchmark.cpp
@@ -60,7 +60,7 @@ template <typename T>
FullBenchmark<T>::FullBenchmark(size_t numDocs, size_t numValues)
: _values(numDocs*numValues),
_query(numValues),
- _dp(IAccelrated::getAccelerator())
+ _dp(IAccelrated::getAccelrator())
{
for (size_t i(0); i < numDocs; i++) {
for (size_t j(0); j < numValues; j++) {
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
index 8588a5510f7..7ff393c87f8 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
@@ -20,14 +20,4 @@ Avx2Accelrator::squaredEuclideanDistance(const double * a, const double * b, siz
return avx::euclideanDistanceSelectAlignment<double, 32>(a, b, sz);
}
-void
-Avx2Accelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const {
- helper::andChunks<32u, 2u>(offset, src, dest);
-}
-
-void
-Avx2Accelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const {
- helper::orChunks<32u, 2u>(offset, src, dest);
-}
-
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
index b6f3d299748..3e0dbb28110 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
@@ -15,8 +15,6 @@ public:
size_t populationCount(const uint64_t *a, size_t sz) const override;
double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const override;
double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const override;
- void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override;
- void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
index 4dade08e77a..0941e6d6ad8 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
@@ -32,14 +32,4 @@ Avx512Accelrator::squaredEuclideanDistance(const double * a, const double * b, s
return avx::euclideanDistanceSelectAlignment<double, 64>(a, b, sz);
}
-void
-Avx512Accelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const {
- helper::andChunks<64, 1>(offset, src, dest);
-}
-
-void
-Avx512Accelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const {
- helper::orChunks<64, 1>(offset, src, dest);
-}
-
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
index a54d57407b2..209ec06c857 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
@@ -17,8 +17,6 @@ public:
size_t populationCount(const uint64_t *a, size_t sz) const override;
double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const override;
double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const override;
- void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override;
- void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
index f9dfaacf626..f9684e88c63 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
@@ -165,14 +165,4 @@ GenericAccelrator::squaredEuclideanDistance(const double * a, const double * b,
return euclideanDistanceT<double, 4>(a, b, sz);
}
-void
-GenericAccelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const {
- helper::andChunks<16, 4>(offset, src, dest);
-}
-
-void
-GenericAccelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const {
- helper::orChunks<16,4>(offset, src, dest);
-}
-
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
index 2335b40fe85..50a3d59d49d 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
@@ -25,8 +25,6 @@ public:
size_t populationCount(const uint64_t *a, size_t sz) const override;
double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const override;
double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const override;
- void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override;
- void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
index de917c5f065..bb132165e53 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
@@ -46,8 +46,7 @@ std::vector<T> createAndFill(size_t sz) {
}
template<typename T>
-void
-verifyDotproduct(const IAccelrated & accel)
+void verifyDotproduct(const IAccelrated & accel)
{
const size_t testLength(255);
srand(1);
@@ -67,8 +66,7 @@ verifyDotproduct(const IAccelrated & accel)
}
template<typename T>
-void
-verifyEuclideanDistance(const IAccelrated & accel) {
+void verifyEuclideanDistance(const IAccelrated & accel) {
const size_t testLength(255);
srand(1);
std::vector<T> a = createAndFill<T>(testLength);
@@ -86,8 +84,7 @@ verifyEuclideanDistance(const IAccelrated & accel) {
}
}
-void
-verifyPopulationCount(const IAccelrated & accel)
+void verifyPopulationCount(const IAccelrated & accel)
{
const uint64_t words[7] = {0x123456789abcdef0L, // 32
0x0000000000000000L, // 0
@@ -104,118 +101,6 @@ verifyPopulationCount(const IAccelrated & accel)
}
}
-void
-fill(std::vector<uint64_t> & v, size_t n) {
- v.reserve(n);
- for (size_t i(0); i < n; i++) {
- v.emplace_back(random());
- }
-}
-
-void
-simpleAndWith(std::vector<uint64_t> & dest, const std::vector<uint64_t> & src) {
- for (size_t i(0); i < dest.size(); i++) {
- dest[i] &= src[i];
- }
-}
-
-void
-simpleOrWith(std::vector<uint64_t> & dest, const std::vector<uint64_t> & src) {
- for (size_t i(0); i < dest.size(); i++) {
- dest[i] |= src[i];
- }
-}
-
-std::vector<uint64_t>
-simpleInvert(const std::vector<uint64_t> & src) {
- std::vector<uint64_t> inverted;
- inverted.reserve(src.size());
- for (size_t i(0); i < src.size(); i++) {
- inverted.push_back(~src[i]);
- }
- return inverted;
-}
-
-std::vector<uint64_t>
-optionallyInvert(bool invert, std::vector<uint64_t> v) {
- return invert ? simpleInvert(std::move(v)) : std::move(v);
-}
-
-bool shouldInvert(bool invertSome) {
- return invertSome ? (random() & 1) : false;
-}
-
-void
-verifyOr64(const IAccelrated & accel, const std::vector<std::vector<uint64_t>> & vectors,
- size_t offset, size_t num_vectors, bool invertSome)
-{
- std::vector<std::pair<const void *, bool>> vRefs;
- for (size_t j(0); j < num_vectors; j++) {
- vRefs.emplace_back(&vectors[j][0], shouldInvert(invertSome));
- }
-
- std::vector<uint64_t> expected = optionallyInvert(vRefs[0].second, vectors[0]);
- for (size_t j = 1; j < num_vectors; j++) {
- simpleOrWith(expected, optionallyInvert(vRefs[j].second, vectors[j]));
- }
-
- uint64_t dest[8] __attribute((aligned(64)));
- accel.or64(offset*sizeof(uint64_t), vRefs, dest);
- int diff = memcmp(&expected[offset], dest, sizeof(dest));
- if (diff != 0) {
- LOG_ABORT("Accelerator fails to compute correct 64 bytes OR");
- }
-}
-
-void
-verifyAnd64(const IAccelrated & accel, const std::vector<std::vector<uint64_t>> & vectors,
- size_t offset, size_t num_vectors, bool invertSome)
-{
- std::vector<std::pair<const void *, bool>> vRefs;
- for (size_t j(0); j < num_vectors; j++) {
- vRefs.emplace_back(&vectors[j][0], shouldInvert(invertSome));
- }
- std::vector<uint64_t> expected = optionallyInvert(vRefs[0].second, vectors[0]);
- for (size_t j = 1; j < num_vectors; j++) {
- simpleAndWith(expected, optionallyInvert(vRefs[j].second, vectors[j]));
- }
-
- uint64_t dest[8] __attribute((aligned(64)));
- accel.and64(offset*sizeof(uint64_t), vRefs, dest);
- int diff = memcmp(&expected[offset], dest, sizeof(dest));
- if (diff != 0) {
- LOG_ABORT("Accelerator fails to compute correct 64 bytes AND");
- }
-}
-
-void
-verifyOr64(const IAccelrated & accel) {
- std::vector<std::vector<uint64_t>> vectors(3) ;
- for (auto & v : vectors) {
- fill(v, 16);
- }
- for (size_t offset = 0; offset < 8; offset++) {
- for (size_t i = 1; i < vectors.size(); i++) {
- verifyOr64(accel, vectors, offset, i, false);
- verifyOr64(accel, vectors, offset, i, true);
- }
- }
-}
-
-void
-verifyAnd64(const IAccelrated & accel) {
- std::vector<std::vector<uint64_t>> vectors(3);
- for (auto & v : vectors) {
- fill(v, 16);
- }
- for (size_t offset = 0; offset < 8; offset++) {
- for (size_t i = 1; i < vectors.size(); i++) {
- verifyAnd64(accel, vectors, offset, i, false);
- verifyAnd64(accel, vectors, offset, i, true);
- }
- }
-}
-
class RuntimeVerificator
{
public:
@@ -229,8 +114,6 @@ private:
verifyEuclideanDistance<float>(accelrated);
verifyEuclideanDistance<double>(accelrated);
verifyPopulationCount(accelrated);
- verifyAnd64(accelrated);
- verifyOr64(accelrated);
}
};
@@ -239,7 +122,7 @@ RuntimeVerificator::RuntimeVerificator()
GenericAccelrator generic;
verify(generic);
- const IAccelrated & thisCpu(IAccelrated::getAccelerator());
+ const IAccelrated & thisCpu(IAccelrated::getAccelrator());
verify(thisCpu);
}
@@ -272,7 +155,7 @@ static Selector _G_selector;
RuntimeVerificator _G_verifyAccelrator;
const IAccelrated &
-IAccelrated::getAccelerator()
+IAccelrated::getAccelrator()
{
static IAccelrated::UP accelrator = _G_selector.create();
return *accelrator;
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
index 2594a48dd33..0292ad14643 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
@@ -4,7 +4,6 @@
#include <memory>
#include <cstdint>
-#include <vector>
namespace vespalib::hwaccelrated {
@@ -30,12 +29,8 @@ public:
virtual size_t populationCount(const uint64_t *a, size_t sz) const = 0;
virtual double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const = 0;
virtual double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const = 0;
- // AND 64 bytes from multiple, optionally inverted sources
- virtual void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const = 0;
- // OR 64 bytes from multiple, optionally inverted sources
- virtual void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const = 0;
- static const IAccelrated & getAccelerator() __attribute__((noinline));
+ static const IAccelrated & getAccelrator() __attribute__((noinline));
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
index 6fc49f969f2..f5daf2b9081 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
@@ -24,51 +24,5 @@ populationCount(const uint64_t *a, size_t sz) {
return count;
}
-template<typename T>
-T get(const void * base, bool invert) {
- T v;
- memcpy(&v, base, sizeof(T));
- return __builtin_expect(invert, false) ? ~v : v;
-}
-
-template <typename T>
-const T * cast(const void * ptr, size_t offsetBytes) {
- return static_cast<const T *>(static_cast<const void *>(static_cast<const char *>(ptr) + offsetBytes));
-}
-
-template<unsigned ChunkSize, unsigned Chunks>
-void
-andChunks(size_t offset, const std::vector<std::pair<const void *, bool>> & src, void * dest) {
- typedef uint64_t Chunk __attribute__ ((vector_size (ChunkSize)));
- Chunk * chunk = static_cast<Chunk *>(dest);
- const Chunk * tmp = cast<Chunk>(src[0].first, offset);
- for (size_t n=0; n < Chunks; n++) {
- chunk[n] = get<Chunk>(tmp+n, src[0].second);
- }
- for (size_t i(1); i < src.size(); i++) {
- tmp = cast<Chunk>(src[i].first, offset);
- for (size_t n=0; n < Chunks; n++) {
- chunk[n] &= get<Chunk>(tmp+n, src[i].second);
- }
- }
-}
-
-template<unsigned ChunkSize, unsigned Chunks>
-void
-orChunks(size_t offset, const std::vector<std::pair<const void *, bool>> & src, void * dest) {
- typedef uint64_t Chunk __attribute__ ((vector_size (ChunkSize)));
- Chunk * chunk = static_cast<Chunk *>(dest);
- const Chunk * tmp = cast<Chunk>(src[0].first, offset);
- for (size_t n=0; n < Chunks; n++) {
- chunk[n] = get<Chunk>(tmp+n, src[0].second);
- }
- for (size_t i(1); i < src.size(); i++) {
- tmp = cast<Chunk>(src[i].first, offset);
- for (size_t n=0; n < Chunks; n++) {
- chunk[n] |= get<Chunk>(tmp+n, src[i].second);
- }
- }
-}
-
}
}