summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-01-24 15:10:15 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2020-01-24 15:10:15 +0000
commit5cb24ee230a6d5f7eb1155b2746c6a3f11d28b16 (patch)
treed2d9628909144c2a1affb432392d34fc39658d55 /vespalib
parent5ff453a5a69bbae2f05ba67240f08774be025e79 (diff)
Count bits faster when hardware supports it.
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp5
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx.h1
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp5
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx2.h1
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp5
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx512.h1
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp1
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp6
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.h1
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp31
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h1
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp27
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp6
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/sse2.h2
14 files changed, 87 insertions, 6 deletions
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
index 14abb93d8d0..39ea0d2d73b 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
@@ -17,4 +17,9 @@ AvxAccelrator::dotProduct(const double * af, const double * bf, size_t sz) const
return avx::dotProductSelectAlignment<double, 32>(af, bf, sz);
}
+size_t
+AvxAccelrator::populationCount(const uint64_t *a, size_t sz) const {
+ return helper::populationCount(a, sz);
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx.h
index ffbe0b8d27f..624531a9ca5 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx.h
@@ -14,6 +14,7 @@ class AvxAccelrator : public Sse2Accelrator
public:
float dotProduct(const float * a, const float * b, size_t sz) const override;
double dotProduct(const double * a, const double * b, size_t sz) const override;
+ size_t populationCount(const uint64_t *a, size_t sz) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
index 4c4e53e88db..ea8a3ead538 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
@@ -17,4 +17,9 @@ Avx2Accelrator::dotProduct(const double * af, const double * bf, size_t sz) cons
return avx::dotProductSelectAlignment<double, 32>(af, bf, sz);
}
+size_t
+Avx2Accelrator::populationCount(const uint64_t *a, size_t sz) const {
+ return helper::populationCount(a, sz);
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
index f20068c6478..cf91bc81cfd 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
@@ -14,6 +14,7 @@ class Avx2Accelrator : public AvxAccelrator
public:
float dotProduct(const float * a, const float * b, size_t sz) const override;
double dotProduct(const double * a, const double * b, size_t sz) const override;
+ size_t populationCount(const uint64_t *a, size_t sz) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
index 4d21c9358ec..1abf6b270cf 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
@@ -17,4 +17,9 @@ Avx512Accelrator::dotProduct(const double * af, const double * bf, size_t sz) co
return avx::dotProductSelectAlignment<double, 64>(af, bf, sz);
}
+size_t
+Avx512Accelrator::populationCount(const uint64_t *a, size_t sz) const {
+ return helper::populationCount(a, sz);
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
index 5807aeeee57..eac8c96832b 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
@@ -14,6 +14,7 @@ class Avx512Accelrator : public Avx2Accelrator
public:
float dotProduct(const float * a, const float * b, size_t sz) const override;
double dotProduct(const double * a, const double * b, size_t sz) const override;
+ size_t populationCount(const uint64_t *a, size_t sz) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp b/vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
index 2db7ebfd8fd..9e6a6d8817f 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
@@ -2,6 +2,7 @@
#pragma once
+#include "private_helpers.hpp"
#include <vespa/fastos/dynamiclibrary.h>
#include <cstring>
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
index d70071525c6..b70ebb4051a 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "generic.h"
+#include "private_helpers.hpp"
namespace vespalib::hwaccelrated {
@@ -124,4 +125,9 @@ GenericAccelrator::notBit(void * aOrg, size_t bytes) const
}
}
+size_t
+GenericAccelrator::populationCount(const uint64_t *a, size_t sz) const {
+ return helper::populationCount(a, sz);
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
index f9aab3ae845..d76d0728bdd 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
@@ -22,6 +22,7 @@ public:
void andBit(void * a, const void * b, size_t bytes) const override;
void andNotBit(void * a, const void * b, size_t bytes) const override;
void notBit(void * a, size_t bytes) const override;
+ size_t populationCount(const uint64_t *a, size_t sz) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
index aae277b48d8..4006897dce5 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
@@ -6,6 +6,7 @@
#include "avx.h"
#include "avx2.h"
#include "avx512.h"
+#include <vespa/vespalib/util/memory.h>
#include <vespa/log/log.h>
LOG_SETUP(".vespalib.hwaccelrated");
@@ -22,27 +23,27 @@ public:
class GenericFactory :public Factory{
public:
- IAccelrated::UP create() const override { return IAccelrated::UP(new GenericAccelrator()); }
+ IAccelrated::UP create() const override { return std::make_unique<GenericAccelrator>(); }
};
class Sse2Factory :public Factory{
public:
- IAccelrated::UP create() const override { return IAccelrated::UP(new Sse2Accelrator()); }
+ IAccelrated::UP create() const override { return std::make_unique<Sse2Accelrator>(); }
};
class AvxFactory :public Factory{
public:
- IAccelrated::UP create() const override { return IAccelrated::UP(new AvxAccelrator()); }
+ IAccelrated::UP create() const override { return std::make_unique<AvxAccelrator>(); }
};
class Avx2Factory :public Factory{
public:
- IAccelrated::UP create() const override { return IAccelrated::UP(new Avx2Accelrator()); }
+ IAccelrated::UP create() const override { return std::make_unique<Avx2Accelrator>(); }
};
class Avx512Factory :public Factory{
public:
- IAccelrated::UP create() const override { return IAccelrated::UP(new Avx512Accelrator()); }
+ IAccelrated::UP create() const override { return std::make_unique<Avx512Accelrator>(); }
};
template<typename T>
@@ -67,6 +68,23 @@ void verifyAccelrator(const IAccelrated & accel)
delete [] b;
}
+void verifyPopulationCount(const IAccelrated & accel)
+{
+ const uint64_t words[7] = {0x123456789abcdef0L, // 32
+ 0x0000000000000000L, // 0
+ 0x8000000000000000L, // 1
+ 0xdeadbeefbeefdeadUL, // 48
+ 0x5555555555555555L, // 32
+ 0x00000000000000001, // 1
+ 0xffffffffffffffff}; // 64
+ constexpr size_t expected = 32 + 0 + 1 + 48 + 32 + 1 + 64;
+ size_t hwComputedPopulationCount = accel.populationCount(words, VESPA_NELEMS(words));
+ if (hwComputedPopulationCount != expected) {
+ fprintf(stderr, "Accelrator is not computing populationCount correctly.Expected %zu, computed %zu\n", expected, hwComputedPopulationCount);
+ LOG_ABORT("should not be reached");
+ }
+}
+
class RuntimeVerificator
{
public:
@@ -79,7 +97,8 @@ RuntimeVerificator::RuntimeVerificator()
verifyAccelrator<float>(generic);
verifyAccelrator<double>(generic);
verifyAccelrator<int32_t>(generic);
- verifyAccelrator<int64_t>(generic);
+ verifyAccelrator<int64_t>(generic);
+ verifyPopulationCount(generic);
IAccelrated::UP thisCpu(IAccelrated::getAccelrator());
verifyAccelrator<float>(*thisCpu);
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
index aae60279d06..4031169c44d 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
@@ -26,6 +26,7 @@ public:
virtual void andBit(void * a, const void * b, size_t bytes) const = 0;
virtual void andNotBit(void * a, const void * b, size_t bytes) const = 0;
virtual void notBit(void * a, size_t bytes) const = 0;
+ virtual size_t populationCount(const uint64_t *a, size_t sz) const = 0;
static IAccelrated::UP getAccelrator() __attribute__((noinline));
};
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
new file mode 100644
index 00000000000..8eba313d5f1
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
@@ -0,0 +1,27 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/optimized.h>
+
+namespace vespalib::hwaccelrated::helper {
+namespace {
+
+inline size_t
+populationCount(const uint64_t *a, size_t sz) {
+ size_t count(0);
+ size_t i(0);
+ for (; (i + 3) < sz; i += 4) {
+ count += Optimized::popCount(a[i + 0]) +
+ Optimized::popCount(a[i + 1]) +
+ Optimized::popCount(a[i + 2]) +
+ Optimized::popCount(a[i + 3]);
+ }
+ for (; i < sz; i++) {
+ count += Optimized::popCount(a[i]);
+ }
+ return count;
+}
+
+}
+} \ No newline at end of file
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
index f135de52e5a..a0f584f8a9f 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "sse2.h"
+#include "private_helpers.hpp"
namespace vespalib::hwaccelrated {
@@ -76,4 +77,9 @@ Sse2Accelrator::dotProduct(const double * af, const double * bf, size_t sz) cons
return sum;
}
+size_t
+Sse2Accelrator::populationCount(const uint64_t *a, size_t sz) const {
+ return helper::populationCount(a, sz);
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.h b/vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
index a539aa44b03..d0fbefe5f03 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
@@ -14,6 +14,8 @@ class Sse2Accelrator : public GenericAccelrator
public:
float dotProduct(const float * a, const float * b, size_t sz) const override;
double dotProduct(const double * a, const double * b, size_t sz) const override;
+
+ size_t populationCount(const uint64_t *a, size_t sz) const override;
};
}