summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2019-04-01 07:38:39 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2019-04-01 07:38:39 +0000
commitca2e83ed3f8eef6e69c6110ee32d1c59d5d19c2a (patch)
tree2063bd3e6db381bdc0f3b159f6e6bbc4bb5e83d3 /vespalib
parentaaf479240531f08548bb236ade6f99b272481051 (diff)
Revert unintended change.
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp5
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp12
2 files changed, 0 insertions, 17 deletions
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
index 4295f9850c5..1bf7ea1c44c 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
@@ -10,11 +10,6 @@ template <typename ACCUM, typename T, size_t UNROLL>
ACCUM
multiplyAdd(const T * a, const T * b, size_t sz)
{
-#if 1
- for (int i(0); i < 16; i++) {
- __builtin_prefetch(&b[(4+i)*(64/sizeof(T))], 0, 0);
- }
-#endif
ACCUM partial[UNROLL];
for (size_t i(0); i < UNROLL; i++) {
partial[i] = 0;
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
index 209de4666a2..f135de52e5a 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
@@ -16,20 +16,12 @@ bool validAlignment16(const void * a, const void * b) {
}
-#define PREFETCH_DISTANCE 16
-#define L1_DIST 4
float
Sse2Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const
{
if ( ! validAlignment16(af, bf)) {
return GenericAccelrator::dotProduct(af, bf, sz);
}
-
-#if PREFETCH_DISTANCE > 0
- for (int i(0); i < PREFETCH_DISTANCE; i++) {
- __builtin_prefetch(&bf[i*16], 0, 0);
- }
-#endif
typedef float v4sf __attribute__ ((vector_size (16)));
const size_t ChunkSize(16);
const size_t VectorsPerChunk(ChunkSize/4);
@@ -39,10 +31,6 @@ Sse2Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const
const size_t numChunks(sz/ChunkSize);
for (size_t i(0); i < numChunks; i++) {
-#if PREFETCH_DISTANCE > 0
- __builtin_prefetch(&bf[(i+PREFETCH_DISTANCE)*16], 0, 0);
- __builtin_prefetch(&af[((i+L1_DIST)&0xf)*16], 0, 3);
-#endif
for (size_t j(0); j < VectorsPerChunk; j++) {
partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j];
}