diff options
author | Geir Storli <geirst@yahoo-inc.com> | 2016-10-24 14:28:00 +0200 |
---|---|---|
committer | Geir Storli <geirst@yahoo-inc.com> | 2016-10-24 14:28:00 +0200 |
commit | 292817945280d84896d3b137d3c2c2385d7b15b4 (patch) | |
tree | 3f457c1630a9e6070eab59edc9b1e2bf2fe99d7e /staging_vespalib | |
parent | b73bd2dded336d472a67c53539500160d6d28796 (diff) |
Move hwaccelrated library from staging_vespalib to vespalib (for usage in tensor code).
Diffstat (limited to 'staging_vespalib')
19 files changed, 0 insertions, 866 deletions
diff --git a/staging_vespalib/CMakeLists.txt b/staging_vespalib/CMakeLists.txt index 5184d98d1ce..652200e2d7b 100644 --- a/staging_vespalib/CMakeLists.txt +++ b/staging_vespalib/CMakeLists.txt @@ -16,7 +16,6 @@ vespa_define_module( src/tests/crc src/tests/databuffer src/tests/directio - src/tests/dotproduct src/tests/encoding/base64 src/tests/fileheader src/tests/floatingpointtype @@ -42,7 +41,6 @@ vespa_define_module( src/vespa/vespalib src/vespa/vespalib/data src/vespa/vespalib/encoding - src/vespa/vespalib/hwaccelrated src/vespa/vespalib/net src/vespa/vespalib/objects src/vespa/vespalib/stllike diff --git a/staging_vespalib/src/tests/dotproduct/.gitignore b/staging_vespalib/src/tests/dotproduct/.gitignore deleted file mode 100644 index 5d9432fbd08..00000000000 --- a/staging_vespalib/src/tests/dotproduct/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -dotproductbenchmark -staging_vespalib_dotproductbenchmark_app diff --git a/staging_vespalib/src/tests/dotproduct/CMakeLists.txt b/staging_vespalib/src/tests/dotproduct/CMakeLists.txt deleted file mode 100644 index 30a02632f1c..00000000000 --- a/staging_vespalib/src/tests/dotproduct/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(staging_vespalib_dotproductbenchmark_app - SOURCES - dotproductbenchmark.cpp - DEPENDS - staging_vespalib -) -vespa_add_test(NAME staging_vespalib_dotproductbenchmark_app_sparse-ordered COMMAND staging_vespalib_dotproductbenchmark_app 10 10 1000 1000 BENCHMARK) -vespa_add_test(NAME staging_vespalib_dotproductbenchmark_app_sparse-unordered COMMAND staging_vespalib_dotproductbenchmark_app 10 10 1000 1000 BENCHMARK) -vespa_add_test(NAME staging_vespalib_dotproductbenchmark_app_full COMMAND staging_vespalib_dotproductbenchmark_app 10 10 1000 1000 BENCHMARK) - -# benchmark: dotproductbenchmark -# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-ordered 1000 1000 1000 1000 -# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-unordered 1000 1000 1000 1000 -# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark full 1000 1000 1000 1000 -# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-ordered 1000 1000 100 1000 -# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-unordered 1000 1000 100 1000 -# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark full 1000 1000 100 1000 -# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-ordered 1000 1000 1000 100 -# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-unordered 1000 1000 1000 100 -# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark full 1000 1000 1000 100 diff --git a/staging_vespalib/src/tests/dotproduct/dotproductbenchmark.cpp b/staging_vespalib/src/tests/dotproduct/dotproductbenchmark.cpp deleted file mode 100644 index 9ef3d959f3b..00000000000 --- a/staging_vespalib/src/tests/dotproduct/dotproductbenchmark.cpp +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/fastos/fastos.h> -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> -#include <vespa/vespalib/stllike/string.h> -#include <vespa/vespalib/stllike/hash_map.h> -#include <iostream> - -using namespace vespalib; -using vespalib::hwaccelrated::IAccelrated; - -class Benchmark { -public: - virtual ~Benchmark() { } - virtual void compute(size_t docId) const = 0; -}; - -void -runBenchmark(size_t count, size_t docs, const Benchmark & benchmark) -{ - for (size_t i(0); i < count; i++) { - for (size_t docId(0); docId < docs; docId++) { - benchmark.compute(docId); - } - } -} - -template <typename T> -class FullBenchmark : public Benchmark -{ -public: - FullBenchmark(size_t numDocs, size_t numValues) : - _values(numDocs*numValues), - _query(numValues), - _dp(IAccelrated::getAccelrator()) - { - for (size_t i(0); i < numDocs; i++) { - for (size_t j(0); j < numValues; j++) { - _values[i*numValues + j] = j; - } - } - for (size_t j(0); j < numValues; j++) { - _query[j] = j; - } - } - virtual void compute(size_t docId) const { - _dp->dotProduct(&_query[0], &_values[docId * _query.size()], _query.size()); - } -private: - std::vector<T> _values; - std::vector<T> _query; - IAccelrated::UP _dp; -}; - -class SparseBenchmark : public Benchmark -{ -public: - SparseBenchmark(size_t numDocs, size_t numValues, size_t numQueryValues) : - _numValues(numValues), - _values(numDocs*numValues) - { - for (size_t i(0); i < numDocs; i++) { - for (size_t j(0); j < numValues; j++) { - size_t k(numValues < numQueryValues ? (j*numQueryValues)/numValues : j); - _values[i*numValues + j] = P(k, k); - } - } - } -protected: - struct P { - P(uint32_t key=0, int32_t value=0) : - _key(key), - _value(value) - { } - uint32_t _key; - int32_t _value; - }; - size_t _numValues; - std::vector<P> _values; -}; - -class UnorderedSparseBenchmark : public SparseBenchmark -{ -private: - typedef hash_map<uint32_t, int32_t> map; -public: - UnorderedSparseBenchmark(size_t numDocs, size_t numValues, size_t numQueryValues) : - SparseBenchmark(numDocs, numValues, numQueryValues) - { - for (size_t j(0); j < numQueryValues; j++) { - _query[j] = j; - } - } -private: - virtual void compute(size_t docId) const { - int64_t sum(0); - size_t offset(docId*_numValues); - const auto e(_query.end()); - for (size_t i(0); i < _numValues; i++) { - auto it = _query.find(_values[offset + i]._key); - if (it != e) { - sum += static_cast<int64_t>(_values[offset + i]._value) * it->second; - } - } - } - map _query; -}; - -class OrderedSparseBenchmark : public SparseBenchmark -{ -private: -public: - OrderedSparseBenchmark(size_t numDocs, size_t numValues, size_t numQueryValues) : - SparseBenchmark(numDocs, numValues, numQueryValues), - _query(numQueryValues) - { - for (size_t j(0); j < numQueryValues; j++) { - size_t k(numValues > numQueryValues ? j*numValues/numQueryValues : j); - _query[j] = P(k, k); - } - } -private: - virtual void compute(size_t docId) const { - int64_t sum(0); - size_t offset(docId*_numValues); - - for (size_t a(0), b(0); a < _query.size() && b < _numValues; b++) { - for (; a < _query.size() && (_query[a]._key <= _values[offset + b]._key); a++); - if (_query[a]._key == _values[offset + b]._key) { - sum += static_cast<int64_t>(_values[offset + b]._value) * _query[a]._value; - } - } - } - std::vector<P> _query; -}; - -int main(int argc, char *argv[]) -{ - size_t numDocs(1); - size_t numValues(1000); - size_t numQueryValues(1000); - size_t numQueries(1000000); - string type("full"); - if ( argc > 1) { - type = argv[1]; - } - if ( argc > 2) { - numQueries = strtoul(argv[2], NULL, 0); - } - if ( argc > 3) { - numDocs = strtoul(argv[3], NULL, 0); - } - if ( argc > 4) { - numValues = strtoul(argv[4], NULL, 0); - } - if ( argc > 5) { - numQueryValues = strtoul(argv[5], NULL, 0); - } - - std::cout << "type = " << type << std::endl; - std::cout << "numQueries = " << numQueries << std::endl; - std::cout << "numDocs = " << numDocs << std::endl; - std::cout << "numValues = " << numValues << std::endl; - std::cout << "numQueryValues = " << numQueryValues << std::endl; - if (type == "full") { - FullBenchmark<int32_t> bm(numDocs, numValues); - runBenchmark(numQueries, numDocs, bm); - } else if (type == "sparse-ordered") { - OrderedSparseBenchmark bm(numDocs, numValues, numQueryValues); - runBenchmark(numQueries, numDocs, bm); - } else if (type == "sparse-unordered") { - UnorderedSparseBenchmark bm(numDocs, numValues, numQueryValues); - runBenchmark(numQueries, numDocs, bm); - } else { - std::cerr << "type '" << type << "' is unknown." << std::endl; - } - - return 0; -} - diff --git a/staging_vespalib/src/vespa/vespalib/CMakeLists.txt b/staging_vespalib/src/vespa/vespalib/CMakeLists.txt index 7bdcc9bbf64..640278a583b 100644 --- a/staging_vespalib/src/vespa/vespalib/CMakeLists.txt +++ b/staging_vespalib/src/vespa/vespalib/CMakeLists.txt @@ -8,7 +8,6 @@ vespa_add_library(staging_vespalib $<TARGET_OBJECTS:staging_vespalib_vespalib_stllike> $<TARGET_OBJECTS:staging_vespalib_vespalib_net> $<TARGET_OBJECTS:staging_vespalib_vespalib_trace> - $<TARGET_OBJECTS:staging_vespalib_vespalib_hwaccelrated> INSTALL lib64 DEPENDS ) diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt b/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt deleted file mode 100644 index 6362cdf6f87..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(staging_vespalib_vespalib_hwaccelrated OBJECT - SOURCES - iaccelrated.cpp - generic.cpp - sse2.cpp - avx.cpp - avx2.cpp - avx512.cpp - DEPENDS -) -set_source_files_properties(avx.cpp PROPERTIES COMPILE_FLAGS -march=sandybridge) -set_source_files_properties(avx2.cpp PROPERTIES COMPILE_FLAGS -march=haswell) -vespa_workaround_set_gcc_march_skylake_avx512_if_supported(avx512.cpp) diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp deleted file mode 100644 index ec5064bf647..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vespalib/hwaccelrated/avx.h> -#include <vespa/vespalib/hwaccelrated/avxprivate.hpp> - -namespace vespalib { - -namespace hwaccelrated { - -float -AvxAccelrator::dotProduct(const float * af, const float * bf, size_t sz) const -{ - return avx::dotProductSelectAlignment<float, 32>(af, bf, sz); -} - -double -AvxAccelrator::dotProduct(const double * af, const double * bf, size_t sz) const -{ - return avx::dotProductSelectAlignment<double, 32>(af, bf, sz); -} - -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h deleted file mode 100644 index 4b391c163ac..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/vespalib/hwaccelrated/sse2.h> - -namespace vespalib { - -namespace hwaccelrated { - -/** - * Avx-256 implementation. - */ -class AvxAccelrator : public Sse2Accelrator -{ -public: - float dotProduct(const float * a, const float * b, size_t sz) const override; - double dotProduct(const double * a, const double * b, size_t sz) const override; -}; - -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp deleted file mode 100644 index f87738e3a6c..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vespalib/hwaccelrated/avx2.h> -#include <vespa/vespalib/hwaccelrated/avxprivate.hpp> - -namespace vespalib { - -namespace hwaccelrated { - -float -Avx2Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const -{ - return avx::dotProductSelectAlignment<float, 32>(af, bf, sz); -} - -double -Avx2Accelrator::dotProduct(const double * af, const double * bf, size_t sz) const -{ - return avx::dotProductSelectAlignment<double, 32>(af, bf, sz); -} - -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h deleted file mode 100644 index 56d3a8ac65e..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/vespalib/hwaccelrated/avx.h> - -namespace vespalib { - -namespace hwaccelrated { - -/** - * Avx-512 implementation. - */ -class Avx2Accelrator : public AvxAccelrator -{ -public: - float dotProduct(const float * a, const float * b, size_t sz) const override; - double dotProduct(const double * a, const double * b, size_t sz) const override; -}; - -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp deleted file mode 100644 index 9f7a6dcda3e..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vespalib/hwaccelrated/avx512.h> -#include <vespa/vespalib/hwaccelrated/avxprivate.hpp> - -namespace vespalib { - -namespace hwaccelrated { - -float -Avx512Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const -{ - return avx::dotProductSelectAlignment<float, 64>(af, bf, sz); -} - -double -Avx512Accelrator::dotProduct(const double * af, const double * bf, size_t sz) const -{ - return avx::dotProductSelectAlignment<double, 64>(af, bf, sz); -} - -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h deleted file mode 100644 index 5d7028c30ba..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/vespalib/hwaccelrated/avx2.h> - -namespace vespalib { - -namespace hwaccelrated { - -/** - * Avx-512 implementation. - */ -class Avx512Accelrator : public Avx2Accelrator -{ -public: - float dotProduct(const float * a, const float * b, size_t sz) const override; - double dotProduct(const double * a, const double * b, size_t sz) const override; -}; - -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp deleted file mode 100644 index 87a043b3428..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/fastos/dynamiclibrary.h> - -namespace vespalib { - -namespace hwaccelrated { - -namespace avx { - -namespace { - -inline bool validAlignment(const void * p, const size_t align) { - return (reinterpret_cast<uint64_t>(p) & (align-1)) == 0; -} - -template <typename T, typename V> -T sumT(const V & v) { - T sum(0); - for (size_t i(0); i < (sizeof(V)/sizeof(T)); i++) { - sum += v[i]; - } - return sum; -} - -template <typename T, size_t VLEN, unsigned AlignA, unsigned AlignB, size_t VectorsPerChunk> -static T computeDotProduct(const T * af, const T * bf, size_t sz) __attribute__((noinline)); - -template <typename T, size_t VLEN, unsigned AlignA, unsigned AlignB, size_t VectorsPerChunk> -T computeDotProduct(const T * af, const T * bf, size_t sz) -{ - constexpr const size_t ChunkSize = VLEN*VectorsPerChunk/sizeof(T); - typedef T V __attribute__ ((vector_size (VLEN))); - typedef T A __attribute__ ((vector_size (VLEN), aligned(AlignA))); - typedef T B __attribute__ ((vector_size (VLEN), aligned(AlignB))); - V partial[VectorsPerChunk]; - memset(partial, 0, sizeof(partial)); - const A * a = reinterpret_cast<const A *>(af); - const B * b = reinterpret_cast<const B *>(bf); - - const size_t numChunks(sz/ChunkSize); - for (size_t i(0); i < numChunks; i++) { - for (size_t j(0); j < VectorsPerChunk; j++) { - partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j]; - } - } - T sum(0); - for (size_t i(numChunks*ChunkSize); i < sz; i++) { - sum += af[i] * bf[i]; - } - for (size_t i(1); i < VectorsPerChunk; i++) { - partial[0] += partial[i]; - } - return sum + sumT<T, V>(partial[0]); -} - -} - -template <typename T, size_t VLEN, size_t VectorsPerChunk=4> -VESPA_DLL_LOCAL static T dotProductSelectAlignment(const T * af, const T * bf, size_t sz); - -template <typename T, size_t VLEN, size_t VectorsPerChunk> -T dotProductSelectAlignment(const T * af, const T * bf, size_t sz) -{ - if (validAlignment(af, VLEN)) { - if (validAlignment(bf, VLEN)) { - return computeDotProduct<T, VLEN, VLEN, VLEN, VectorsPerChunk>(af, bf, sz); - } else { - return computeDotProduct<T, VLEN, VLEN, 1, VectorsPerChunk>(af, bf, sz); - } - } else { - if (validAlignment(bf, VLEN)) { - return computeDotProduct<T, VLEN, 1, VLEN, VectorsPerChunk>(af, bf, sz); - } else { - return computeDotProduct<T, VLEN, 1, 1, VectorsPerChunk>(af, bf, sz); - } - } -} - -} -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp deleted file mode 100644 index f218e4172f9..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// Copyright (C) 2003 Fast Search & Transfer ASA -// Copyright (C) 2003 Overture Services Norway AS - -#include <vespa/vespalib/hwaccelrated/generic.h> - -namespace vespalib { - -namespace hwaccelrated { - -namespace { - -template <typename ACCUM, typename T, size_t UNROLL> -ACCUM -multiplyAdd(const T * a, const T * b, size_t sz) -{ - ACCUM partial[UNROLL]; - for (size_t i(0); i < UNROLL; i++) { - partial[i] = 0; - } - size_t i(0); - for (; i + UNROLL <= sz; i+= UNROLL) { - for (size_t j(0); j < UNROLL; j++) { - partial[j] += a[i+j] * b[i+j]; - } - } - for (;i < sz; i++) { - partial[i%UNROLL] += a[i] * b[i]; - } - ACCUM sum(0); - for (size_t j(0); j < UNROLL; j++) { - sum += partial[j]; - } - return sum; -} - -} - -float -GenericAccelrator::dotProduct(const float * a, const float * b, size_t sz) const -{ - return multiplyAdd<float, float, 4>(a, b, sz); -} - -double -GenericAccelrator::dotProduct(const double * a, const double * b, size_t sz) const -{ - return multiplyAdd<double, double, 4>(a, b, sz); -} - -int64_t -GenericAccelrator::dotProduct(const int32_t * a, const int32_t * b, size_t sz) const -{ - return multiplyAdd<int64_t, int32_t, 4>(a, b, sz); -} - -long long -GenericAccelrator::dotProduct(const int64_t * a, const int64_t * b, size_t sz) const -{ - return multiplyAdd<long long, int64_t, 4>(a, b, sz); -} - -void -GenericAccelrator::orBit(void * aOrg, const void * bOrg, size_t bytes) const -{ - uint64_t *a(static_cast<uint64_t *>(aOrg)); - const uint64_t *b(static_cast<const uint64_t *>(bOrg)); - const size_t sz(bytes/sizeof(uint64_t)); - for (size_t i(0); i < sz; i++) { - a[i] |= b[i]; - } - uint8_t *ac(static_cast<uint8_t *>(aOrg)); - const uint8_t *bc(static_cast<const uint8_t *>(bOrg)); - for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) { - ac[i] |= bc[i]; - } -} - -void -GenericAccelrator::andBit(void * aOrg, const void * bOrg, size_t bytes) const -{ - uint64_t *a(static_cast<uint64_t *>(aOrg)); - const uint64_t *b(static_cast<const uint64_t *>(bOrg)); - const size_t sz(bytes/sizeof(uint64_t)); - for (size_t i(0); i < sz; i++) { - a[i] &= b[i]; - } - uint8_t *ac(static_cast<uint8_t *>(aOrg)); - const uint8_t *bc(static_cast<const uint8_t *>(bOrg)); - for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) { - ac[i] &= bc[i]; - } -} -void -GenericAccelrator::andNotBit(void * aOrg, const void * bOrg, size_t bytes) const -{ - uint64_t *a(static_cast<uint64_t *>(aOrg)); - const uint64_t *b(static_cast<const uint64_t *>(bOrg)); - const size_t sz(bytes/sizeof(uint64_t)); - for (size_t i(0); i < sz; i++) { - a[i] &= ~b[i]; - } - uint8_t *ac(static_cast<uint8_t *>(aOrg)); - const uint8_t *bc(static_cast<const uint8_t *>(bOrg)); - for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) { - ac[i] &= ~bc[i]; - } -} - -void -GenericAccelrator::notBit(void * aOrg, size_t bytes) const -{ - uint64_t *a(static_cast<uint64_t *>(aOrg)); - const size_t sz(bytes/sizeof(uint64_t)); - for (size_t i(0); i < sz; i++) { - a[i] = ~a[i]; - } - uint8_t *ac(static_cast<uint8_t *>(aOrg)); - for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) { - ac[i] = ~ac[i]; - } -} - -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.h deleted file mode 100644 index 0cb21b70ca3..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// Copyright (C) 2003 Fast Search & Transfer ASA -// Copyright (C) 2003 Overture Services Norway AS - -#pragma once - -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> - -namespace vespalib { - -namespace hwaccelrated { - -/** - * Generic cpu agnostic implementation. - */ -class GenericAccelrator : public IAccelrated -{ -public: - float dotProduct(const float * a, const float * b, size_t sz) const override; - double dotProduct(const double * a, const double * b, size_t sz) const override; - int64_t dotProduct(const int32_t * a, const int32_t * b, size_t sz) const override; - long long dotProduct(const int64_t * a, const int64_t * b, size_t sz) const override; - void orBit(void * a, const void * b, size_t bytes) const override; - void andBit(void * a, const void * b, size_t bytes) const override; - void andNotBit(void * a, const void * b, size_t bytes) const override; - void notBit(void * a, size_t bytes) const override; -}; - -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp deleted file mode 100644 index aede024f5af..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// Copyright (C) 2003 Fast Search & Transfer ASA -// Copyright (C) 2003 Overture Services Norway AS - -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> -#include <vespa/vespalib/hwaccelrated/generic.h> -#include <vespa/vespalib/hwaccelrated/sse2.h> -#include <vespa/vespalib/hwaccelrated/avx.h> -#include <vespa/vespalib/hwaccelrated/avx2.h> -#include <vespa/vespalib/hwaccelrated/avx512.h> -#include <assert.h> - -namespace vespalib { - -namespace hwaccelrated { - -namespace { - -class Factory { -public: - virtual ~Factory() { } - virtual IAccelrated::UP create() const = 0; -}; - -class GenericFactory :public Factory{ -public: - virtual IAccelrated::UP create() const { return IAccelrated::UP(new GenericAccelrator()); } -}; - -class Sse2Factory :public Factory{ -public: - virtual IAccelrated::UP create() const { return IAccelrated::UP(new Sse2Accelrator()); } -}; - -class AvxFactory :public Factory{ -public: - virtual IAccelrated::UP create() const { return IAccelrated::UP(new AvxAccelrator()); } -}; - -class Avx2Factory :public Factory{ -public: - virtual IAccelrated::UP create() const { return IAccelrated::UP(new Avx2Accelrator()); } -}; - -class Avx512Factory :public Factory{ -public: - virtual IAccelrated::UP create() const { return IAccelrated::UP(new Avx512Accelrator()); } -}; - -template<typename T> -void verifyAccelrator(const IAccelrated & accel) -{ - const size_t testLength(127); - T * a = new T[testLength]; - T * b = new T[testLength]; - for (size_t j(0); j < 0x20; j++) { - T sum(0); - for (size_t i(j); i < testLength; i++) { - a[i] = b[i] = i; - sum += i*i; - } - T hwComputedSum(accel.dotProduct(&a[j], &b[j], testLength - j)); - assert(sum == hwComputedSum); - } - delete [] a; - delete [] b; -} - -class RuntimeVerificator -{ -public: - RuntimeVerificator(); -}; - -RuntimeVerificator::RuntimeVerificator() -{ - GenericAccelrator generic; - verifyAccelrator<float>(generic); - verifyAccelrator<double>(generic); - verifyAccelrator<int32_t>(generic); - verifyAccelrator<int64_t>(generic); - - IAccelrated::UP thisCpu(IAccelrated::getAccelrator()); - verifyAccelrator<float>(*thisCpu); - verifyAccelrator<double>(*thisCpu); - verifyAccelrator<int32_t>(*thisCpu); - verifyAccelrator<int64_t>(*thisCpu); - -} - -class Selector -{ -public: - Selector() __attribute__((noinline)); - IAccelrated::UP create() { return _factory->create(); } -private: - std::unique_ptr<Factory> _factory; -}; - -Selector::Selector() : - _factory(new GenericFactory()) -{ - __builtin_cpu_init (); - if (__builtin_cpu_supports("avx512f")) { - _factory.reset(new Avx512Factory()); - } else if (__builtin_cpu_supports("avx2")) { - _factory.reset(new Avx2Factory()); - } else if (__builtin_cpu_supports("avx")) { - _factory.reset(new AvxFactory()); - } else if (__builtin_cpu_supports("sse2")) { - _factory.reset(new Sse2Factory()); - } -} - -} - -static Selector _G_selector; - -RuntimeVerificator _G_verifyAccelrator; - - -IAccelrated::UP -IAccelrated::getAccelrator() -{ - return _G_selector.create(); -} - -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h deleted file mode 100644 index b8c7794a386..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// Copyright (C) 2003 Fast Search & Transfer ASA -// Copyright (C) 2003 Overture Services Norway AS - -#pragma once - -#include <memory> -#include <stdint.h> - -namespace vespalib { - -namespace hwaccelrated { - -/** - * This contains an interface to all primitives that has different cpu supported accelrations. - * The actual implementation you get by calling the the static getAccelrator method. - */ -class IAccelrated -{ -public: - virtual ~IAccelrated() { } - typedef std::unique_ptr<IAccelrated> UP; - virtual float dotProduct(const float * a, const float * b, size_t sz) const = 0; - virtual double dotProduct(const double * a, const double * b, size_t sz) const = 0; - virtual int64_t dotProduct(const int32_t * a, const int32_t * b, size_t sz) const = 0; - virtual long long dotProduct(const int64_t * a, const int64_t * b, size_t sz) const = 0; - virtual void orBit(void * a, const void * b, size_t bytes) const = 0; - virtual void andBit(void * a, const void * b, size_t bytes) const = 0; - virtual void andNotBit(void * a, const void * b, size_t bytes) const = 0; - virtual void notBit(void * a, size_t bytes) const = 0; - - static IAccelrated::UP getAccelrator() __attribute__((noinline)); -}; - -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp deleted file mode 100644 index 11333d62f78..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// Copyright (C) 2003 Fast Search & Transfer ASA -// Copyright (C) 2003 Overture Services Norway AS - -#include <vespa/vespalib/hwaccelrated/sse2.h> - -namespace vespalib { - -namespace hwaccelrated { - -namespace { - -bool validAlignment16(const void * p) { - return (reinterpret_cast<uint64_t>(p) & 0xful) == 0; -} - -bool validAlignment16(const void * a, const void * b) { - return validAlignment16(a) && validAlignment16(b); -} - -} - -float -Sse2Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const -{ - if ( ! validAlignment16(af, bf)) { - return GenericAccelrator::dotProduct(af, bf, sz); - } - typedef float v4sf __attribute__ ((vector_size (16))); - const size_t ChunkSize(16); - const size_t VectorsPerChunk(ChunkSize/4); - v4sf partial[VectorsPerChunk] = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} }; - const v4sf * a = reinterpret_cast<const v4sf *>(af); - const v4sf * b = reinterpret_cast<const v4sf *>(bf); - - const size_t numChunks(sz/ChunkSize); - for (size_t i(0); i < numChunks; i++) { - for (size_t j(0); j < VectorsPerChunk; j++) { - partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j]; - } - } - float sum(0); - for (size_t i(numChunks*ChunkSize); i < sz; i++) { - sum += af[i] * bf[i]; - } - for (size_t i(1); i < VectorsPerChunk; i++) { - partial[0] += partial[i]; - } - sum += partial[0][0] + partial[0][1] + partial[0][2] + partial[0][3]; - return sum; -} - -double -Sse2Accelrator::dotProduct(const double * af, const double * bf, size_t sz) const -{ - if ( ! validAlignment16(af, bf)) { - return GenericAccelrator::dotProduct(af, bf, sz); - } - typedef double v2sd __attribute__ ((vector_size (16))); - const size_t ChunkSize(8); - const size_t VectorsPerChunk(ChunkSize/2); - v2sd partial[VectorsPerChunk] = { {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0} }; - const v2sd * a = reinterpret_cast<const v2sd *>(af); - const v2sd * b = reinterpret_cast<const v2sd *>(bf); - - const size_t numChunks(sz/ChunkSize); - for (size_t i(0); i < numChunks; i++) { - for (size_t j(0); j < VectorsPerChunk; j++) { - partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j]; - } - } - double sum(0); - for (size_t i(numChunks*ChunkSize); i < sz; i++) { - sum += af[i] * bf[i]; - } - for (size_t i(1); i < VectorsPerChunk; i++) { - partial[0] += partial[i]; - } - sum += partial[0][0] + partial[0][1]; - return sum; -} - -} -} diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h deleted file mode 100644 index a7c39581997..00000000000 --- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// Copyright (C) 2003 Fast Search & Transfer ASA -// Copyright (C) 2003 Overture Services Norway AS - -#pragma once - -#include <vespa/vespalib/hwaccelrated/generic.h> - -namespace vespalib { - -namespace hwaccelrated { - -/** - * Generic cpu agnostic implementation. - */ -class Sse2Accelrator : public GenericAccelrator -{ -public: - float dotProduct(const float * a, const float * b, size_t sz) const override; - double dotProduct(const double * a, const double * b, size_t sz) const override; -}; - -} -} |