aboutsummaryrefslogtreecommitdiffstats
path: root/staging_vespalib
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahoo-inc.com>2016-10-24 14:28:00 +0200
committerGeir Storli <geirst@yahoo-inc.com>2016-10-24 14:28:00 +0200
commit292817945280d84896d3b137d3c2c2385d7b15b4 (patch)
tree3f457c1630a9e6070eab59edc9b1e2bf2fe99d7e /staging_vespalib
parentb73bd2dded336d472a67c53539500160d6d28796 (diff)
Move hwaccelrated library from staging_vespalib to vespalib (for usage in tensor code).
Diffstat (limited to 'staging_vespalib')
-rw-r--r--staging_vespalib/CMakeLists.txt2
-rw-r--r--staging_vespalib/src/tests/dotproduct/.gitignore2
-rw-r--r--staging_vespalib/src/tests/dotproduct/CMakeLists.txt21
-rw-r--r--staging_vespalib/src/tests/dotproduct/dotproductbenchmark.cpp179
-rw-r--r--staging_vespalib/src/vespa/vespalib/CMakeLists.txt1
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt14
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp23
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h22
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp23
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h22
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp23
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h22
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp84
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp125
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.h30
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp129
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h36
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp84
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h24
19 files changed, 0 insertions, 866 deletions
diff --git a/staging_vespalib/CMakeLists.txt b/staging_vespalib/CMakeLists.txt
index 5184d98d1ce..652200e2d7b 100644
--- a/staging_vespalib/CMakeLists.txt
+++ b/staging_vespalib/CMakeLists.txt
@@ -16,7 +16,6 @@ vespa_define_module(
src/tests/crc
src/tests/databuffer
src/tests/directio
- src/tests/dotproduct
src/tests/encoding/base64
src/tests/fileheader
src/tests/floatingpointtype
@@ -42,7 +41,6 @@ vespa_define_module(
src/vespa/vespalib
src/vespa/vespalib/data
src/vespa/vespalib/encoding
- src/vespa/vespalib/hwaccelrated
src/vespa/vespalib/net
src/vespa/vespalib/objects
src/vespa/vespalib/stllike
diff --git a/staging_vespalib/src/tests/dotproduct/.gitignore b/staging_vespalib/src/tests/dotproduct/.gitignore
deleted file mode 100644
index 5d9432fbd08..00000000000
--- a/staging_vespalib/src/tests/dotproduct/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-dotproductbenchmark
-staging_vespalib_dotproductbenchmark_app
diff --git a/staging_vespalib/src/tests/dotproduct/CMakeLists.txt b/staging_vespalib/src/tests/dotproduct/CMakeLists.txt
deleted file mode 100644
index 30a02632f1c..00000000000
--- a/staging_vespalib/src/tests/dotproduct/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(staging_vespalib_dotproductbenchmark_app
- SOURCES
- dotproductbenchmark.cpp
- DEPENDS
- staging_vespalib
-)
-vespa_add_test(NAME staging_vespalib_dotproductbenchmark_app_sparse-ordered COMMAND staging_vespalib_dotproductbenchmark_app 10 10 1000 1000 BENCHMARK)
-vespa_add_test(NAME staging_vespalib_dotproductbenchmark_app_sparse-unordered COMMAND staging_vespalib_dotproductbenchmark_app 10 10 1000 1000 BENCHMARK)
-vespa_add_test(NAME staging_vespalib_dotproductbenchmark_app_full COMMAND staging_vespalib_dotproductbenchmark_app 10 10 1000 1000 BENCHMARK)
-
-# benchmark: dotproductbenchmark
-# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-ordered 1000 1000 1000 1000
-# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-unordered 1000 1000 1000 1000
-# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark full 1000 1000 1000 1000
-# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-ordered 1000 1000 100 1000
-# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-unordered 1000 1000 100 1000
-# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark full 1000 1000 100 1000
-# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-ordered 1000 1000 1000 100
-# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-unordered 1000 1000 1000 100
-# $(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark full 1000 1000 1000 100
diff --git a/staging_vespalib/src/tests/dotproduct/dotproductbenchmark.cpp b/staging_vespalib/src/tests/dotproduct/dotproductbenchmark.cpp
deleted file mode 100644
index 9ef3d959f3b..00000000000
--- a/staging_vespalib/src/tests/dotproduct/dotproductbenchmark.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/fastos/fastos.h>
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-#include <vespa/vespalib/stllike/string.h>
-#include <vespa/vespalib/stllike/hash_map.h>
-#include <iostream>
-
-using namespace vespalib;
-using vespalib::hwaccelrated::IAccelrated;
-
-class Benchmark {
-public:
- virtual ~Benchmark() { }
- virtual void compute(size_t docId) const = 0;
-};
-
-void
-runBenchmark(size_t count, size_t docs, const Benchmark & benchmark)
-{
- for (size_t i(0); i < count; i++) {
- for (size_t docId(0); docId < docs; docId++) {
- benchmark.compute(docId);
- }
- }
-}
-
-template <typename T>
-class FullBenchmark : public Benchmark
-{
-public:
- FullBenchmark(size_t numDocs, size_t numValues) :
- _values(numDocs*numValues),
- _query(numValues),
- _dp(IAccelrated::getAccelrator())
- {
- for (size_t i(0); i < numDocs; i++) {
- for (size_t j(0); j < numValues; j++) {
- _values[i*numValues + j] = j;
- }
- }
- for (size_t j(0); j < numValues; j++) {
- _query[j] = j;
- }
- }
- virtual void compute(size_t docId) const {
- _dp->dotProduct(&_query[0], &_values[docId * _query.size()], _query.size());
- }
-private:
- std::vector<T> _values;
- std::vector<T> _query;
- IAccelrated::UP _dp;
-};
-
-class SparseBenchmark : public Benchmark
-{
-public:
- SparseBenchmark(size_t numDocs, size_t numValues, size_t numQueryValues) :
- _numValues(numValues),
- _values(numDocs*numValues)
- {
- for (size_t i(0); i < numDocs; i++) {
- for (size_t j(0); j < numValues; j++) {
- size_t k(numValues < numQueryValues ? (j*numQueryValues)/numValues : j);
- _values[i*numValues + j] = P(k, k);
- }
- }
- }
-protected:
- struct P {
- P(uint32_t key=0, int32_t value=0) :
- _key(key),
- _value(value)
- { }
- uint32_t _key;
- int32_t _value;
- };
- size_t _numValues;
- std::vector<P> _values;
-};
-
-class UnorderedSparseBenchmark : public SparseBenchmark
-{
-private:
- typedef hash_map<uint32_t, int32_t> map;
-public:
- UnorderedSparseBenchmark(size_t numDocs, size_t numValues, size_t numQueryValues) :
- SparseBenchmark(numDocs, numValues, numQueryValues)
- {
- for (size_t j(0); j < numQueryValues; j++) {
- _query[j] = j;
- }
- }
-private:
- virtual void compute(size_t docId) const {
- int64_t sum(0);
- size_t offset(docId*_numValues);
- const auto e(_query.end());
- for (size_t i(0); i < _numValues; i++) {
- auto it = _query.find(_values[offset + i]._key);
- if (it != e) {
- sum += static_cast<int64_t>(_values[offset + i]._value) * it->second;
- }
- }
- }
- map _query;
-};
-
-class OrderedSparseBenchmark : public SparseBenchmark
-{
-private:
-public:
- OrderedSparseBenchmark(size_t numDocs, size_t numValues, size_t numQueryValues) :
- SparseBenchmark(numDocs, numValues, numQueryValues),
- _query(numQueryValues)
- {
- for (size_t j(0); j < numQueryValues; j++) {
- size_t k(numValues > numQueryValues ? j*numValues/numQueryValues : j);
- _query[j] = P(k, k);
- }
- }
-private:
- virtual void compute(size_t docId) const {
- int64_t sum(0);
- size_t offset(docId*_numValues);
-
- for (size_t a(0), b(0); a < _query.size() && b < _numValues; b++) {
- for (; a < _query.size() && (_query[a]._key <= _values[offset + b]._key); a++);
- if (_query[a]._key == _values[offset + b]._key) {
- sum += static_cast<int64_t>(_values[offset + b]._value) * _query[a]._value;
- }
- }
- }
- std::vector<P> _query;
-};
-
-int main(int argc, char *argv[])
-{
- size_t numDocs(1);
- size_t numValues(1000);
- size_t numQueryValues(1000);
- size_t numQueries(1000000);
- string type("full");
- if ( argc > 1) {
- type = argv[1];
- }
- if ( argc > 2) {
- numQueries = strtoul(argv[2], NULL, 0);
- }
- if ( argc > 3) {
- numDocs = strtoul(argv[3], NULL, 0);
- }
- if ( argc > 4) {
- numValues = strtoul(argv[4], NULL, 0);
- }
- if ( argc > 5) {
- numQueryValues = strtoul(argv[5], NULL, 0);
- }
-
- std::cout << "type = " << type << std::endl;
- std::cout << "numQueries = " << numQueries << std::endl;
- std::cout << "numDocs = " << numDocs << std::endl;
- std::cout << "numValues = " << numValues << std::endl;
- std::cout << "numQueryValues = " << numQueryValues << std::endl;
- if (type == "full") {
- FullBenchmark<int32_t> bm(numDocs, numValues);
- runBenchmark(numQueries, numDocs, bm);
- } else if (type == "sparse-ordered") {
- OrderedSparseBenchmark bm(numDocs, numValues, numQueryValues);
- runBenchmark(numQueries, numDocs, bm);
- } else if (type == "sparse-unordered") {
- UnorderedSparseBenchmark bm(numDocs, numValues, numQueryValues);
- runBenchmark(numQueries, numDocs, bm);
- } else {
- std::cerr << "type '" << type << "' is unknown." << std::endl;
- }
-
- return 0;
-}
-
diff --git a/staging_vespalib/src/vespa/vespalib/CMakeLists.txt b/staging_vespalib/src/vespa/vespalib/CMakeLists.txt
index 7bdcc9bbf64..640278a583b 100644
--- a/staging_vespalib/src/vespa/vespalib/CMakeLists.txt
+++ b/staging_vespalib/src/vespa/vespalib/CMakeLists.txt
@@ -8,7 +8,6 @@ vespa_add_library(staging_vespalib
$<TARGET_OBJECTS:staging_vespalib_vespalib_stllike>
$<TARGET_OBJECTS:staging_vespalib_vespalib_net>
$<TARGET_OBJECTS:staging_vespalib_vespalib_trace>
- $<TARGET_OBJECTS:staging_vespalib_vespalib_hwaccelrated>
INSTALL lib64
DEPENDS
)
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt b/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
deleted file mode 100644
index 6362cdf6f87..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_library(staging_vespalib_vespalib_hwaccelrated OBJECT
- SOURCES
- iaccelrated.cpp
- generic.cpp
- sse2.cpp
- avx.cpp
- avx2.cpp
- avx512.cpp
- DEPENDS
-)
-set_source_files_properties(avx.cpp PROPERTIES COMPILE_FLAGS -march=sandybridge)
-set_source_files_properties(avx2.cpp PROPERTIES COMPILE_FLAGS -march=haswell)
-vespa_workaround_set_gcc_march_skylake_avx512_if_supported(avx512.cpp)
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
deleted file mode 100644
index ec5064bf647..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/vespalib/hwaccelrated/avx.h>
-#include <vespa/vespalib/hwaccelrated/avxprivate.hpp>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-float
-AvxAccelrator::dotProduct(const float * af, const float * bf, size_t sz) const
-{
- return avx::dotProductSelectAlignment<float, 32>(af, bf, sz);
-}
-
-double
-AvxAccelrator::dotProduct(const double * af, const double * bf, size_t sz) const
-{
- return avx::dotProductSelectAlignment<double, 32>(af, bf, sz);
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h
deleted file mode 100644
index 4b391c163ac..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/vespalib/hwaccelrated/sse2.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * Avx-256 implementation.
- */
-class AvxAccelrator : public Sse2Accelrator
-{
-public:
- float dotProduct(const float * a, const float * b, size_t sz) const override;
- double dotProduct(const double * a, const double * b, size_t sz) const override;
-};
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
deleted file mode 100644
index f87738e3a6c..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/vespalib/hwaccelrated/avx2.h>
-#include <vespa/vespalib/hwaccelrated/avxprivate.hpp>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-float
-Avx2Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const
-{
- return avx::dotProductSelectAlignment<float, 32>(af, bf, sz);
-}
-
-double
-Avx2Accelrator::dotProduct(const double * af, const double * bf, size_t sz) const
-{
- return avx::dotProductSelectAlignment<double, 32>(af, bf, sz);
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
deleted file mode 100644
index 56d3a8ac65e..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/vespalib/hwaccelrated/avx.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * Avx-512 implementation.
- */
-class Avx2Accelrator : public AvxAccelrator
-{
-public:
- float dotProduct(const float * a, const float * b, size_t sz) const override;
- double dotProduct(const double * a, const double * b, size_t sz) const override;
-};
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
deleted file mode 100644
index 9f7a6dcda3e..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/vespalib/hwaccelrated/avx512.h>
-#include <vespa/vespalib/hwaccelrated/avxprivate.hpp>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-float
-Avx512Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const
-{
- return avx::dotProductSelectAlignment<float, 64>(af, bf, sz);
-}
-
-double
-Avx512Accelrator::dotProduct(const double * af, const double * bf, size_t sz) const
-{
- return avx::dotProductSelectAlignment<double, 64>(af, bf, sz);
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
deleted file mode 100644
index 5d7028c30ba..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/vespalib/hwaccelrated/avx2.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * Avx-512 implementation.
- */
-class Avx512Accelrator : public Avx2Accelrator
-{
-public:
- float dotProduct(const float * a, const float * b, size_t sz) const override;
- double dotProduct(const double * a, const double * b, size_t sz) const override;
-};
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
deleted file mode 100644
index 87a043b3428..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/fastos/dynamiclibrary.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-namespace avx {
-
-namespace {
-
-inline bool validAlignment(const void * p, const size_t align) {
- return (reinterpret_cast<uint64_t>(p) & (align-1)) == 0;
-}
-
-template <typename T, typename V>
-T sumT(const V & v) {
- T sum(0);
- for (size_t i(0); i < (sizeof(V)/sizeof(T)); i++) {
- sum += v[i];
- }
- return sum;
-}
-
-template <typename T, size_t VLEN, unsigned AlignA, unsigned AlignB, size_t VectorsPerChunk>
-static T computeDotProduct(const T * af, const T * bf, size_t sz) __attribute__((noinline));
-
-template <typename T, size_t VLEN, unsigned AlignA, unsigned AlignB, size_t VectorsPerChunk>
-T computeDotProduct(const T * af, const T * bf, size_t sz)
-{
- constexpr const size_t ChunkSize = VLEN*VectorsPerChunk/sizeof(T);
- typedef T V __attribute__ ((vector_size (VLEN)));
- typedef T A __attribute__ ((vector_size (VLEN), aligned(AlignA)));
- typedef T B __attribute__ ((vector_size (VLEN), aligned(AlignB)));
- V partial[VectorsPerChunk];
- memset(partial, 0, sizeof(partial));
- const A * a = reinterpret_cast<const A *>(af);
- const B * b = reinterpret_cast<const B *>(bf);
-
- const size_t numChunks(sz/ChunkSize);
- for (size_t i(0); i < numChunks; i++) {
- for (size_t j(0); j < VectorsPerChunk; j++) {
- partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j];
- }
- }
- T sum(0);
- for (size_t i(numChunks*ChunkSize); i < sz; i++) {
- sum += af[i] * bf[i];
- }
- for (size_t i(1); i < VectorsPerChunk; i++) {
- partial[0] += partial[i];
- }
- return sum + sumT<T, V>(partial[0]);
-}
-
-}
-
-template <typename T, size_t VLEN, size_t VectorsPerChunk=4>
-VESPA_DLL_LOCAL static T dotProductSelectAlignment(const T * af, const T * bf, size_t sz);
-
-template <typename T, size_t VLEN, size_t VectorsPerChunk>
-T dotProductSelectAlignment(const T * af, const T * bf, size_t sz)
-{
- if (validAlignment(af, VLEN)) {
- if (validAlignment(bf, VLEN)) {
- return computeDotProduct<T, VLEN, VLEN, VLEN, VectorsPerChunk>(af, bf, sz);
- } else {
- return computeDotProduct<T, VLEN, VLEN, 1, VectorsPerChunk>(af, bf, sz);
- }
- } else {
- if (validAlignment(bf, VLEN)) {
- return computeDotProduct<T, VLEN, 1, VLEN, VectorsPerChunk>(af, bf, sz);
- } else {
- return computeDotProduct<T, VLEN, 1, 1, VectorsPerChunk>(af, bf, sz);
- }
- }
-}
-
-}
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
deleted file mode 100644
index f218e4172f9..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#include <vespa/vespalib/hwaccelrated/generic.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-namespace {
-
-template <typename ACCUM, typename T, size_t UNROLL>
-ACCUM
-multiplyAdd(const T * a, const T * b, size_t sz)
-{
- ACCUM partial[UNROLL];
- for (size_t i(0); i < UNROLL; i++) {
- partial[i] = 0;
- }
- size_t i(0);
- for (; i + UNROLL <= sz; i+= UNROLL) {
- for (size_t j(0); j < UNROLL; j++) {
- partial[j] += a[i+j] * b[i+j];
- }
- }
- for (;i < sz; i++) {
- partial[i%UNROLL] += a[i] * b[i];
- }
- ACCUM sum(0);
- for (size_t j(0); j < UNROLL; j++) {
- sum += partial[j];
- }
- return sum;
-}
-
-}
-
-float
-GenericAccelrator::dotProduct(const float * a, const float * b, size_t sz) const
-{
- return multiplyAdd<float, float, 4>(a, b, sz);
-}
-
-double
-GenericAccelrator::dotProduct(const double * a, const double * b, size_t sz) const
-{
- return multiplyAdd<double, double, 4>(a, b, sz);
-}
-
-int64_t
-GenericAccelrator::dotProduct(const int32_t * a, const int32_t * b, size_t sz) const
-{
- return multiplyAdd<int64_t, int32_t, 4>(a, b, sz);
-}
-
-long long
-GenericAccelrator::dotProduct(const int64_t * a, const int64_t * b, size_t sz) const
-{
- return multiplyAdd<long long, int64_t, 4>(a, b, sz);
-}
-
-void
-GenericAccelrator::orBit(void * aOrg, const void * bOrg, size_t bytes) const
-{
- uint64_t *a(static_cast<uint64_t *>(aOrg));
- const uint64_t *b(static_cast<const uint64_t *>(bOrg));
- const size_t sz(bytes/sizeof(uint64_t));
- for (size_t i(0); i < sz; i++) {
- a[i] |= b[i];
- }
- uint8_t *ac(static_cast<uint8_t *>(aOrg));
- const uint8_t *bc(static_cast<const uint8_t *>(bOrg));
- for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) {
- ac[i] |= bc[i];
- }
-}
-
-void
-GenericAccelrator::andBit(void * aOrg, const void * bOrg, size_t bytes) const
-{
- uint64_t *a(static_cast<uint64_t *>(aOrg));
- const uint64_t *b(static_cast<const uint64_t *>(bOrg));
- const size_t sz(bytes/sizeof(uint64_t));
- for (size_t i(0); i < sz; i++) {
- a[i] &= b[i];
- }
- uint8_t *ac(static_cast<uint8_t *>(aOrg));
- const uint8_t *bc(static_cast<const uint8_t *>(bOrg));
- for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) {
- ac[i] &= bc[i];
- }
-}
-void
-GenericAccelrator::andNotBit(void * aOrg, const void * bOrg, size_t bytes) const
-{
- uint64_t *a(static_cast<uint64_t *>(aOrg));
- const uint64_t *b(static_cast<const uint64_t *>(bOrg));
- const size_t sz(bytes/sizeof(uint64_t));
- for (size_t i(0); i < sz; i++) {
- a[i] &= ~b[i];
- }
- uint8_t *ac(static_cast<uint8_t *>(aOrg));
- const uint8_t *bc(static_cast<const uint8_t *>(bOrg));
- for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) {
- ac[i] &= ~bc[i];
- }
-}
-
-void
-GenericAccelrator::notBit(void * aOrg, size_t bytes) const
-{
- uint64_t *a(static_cast<uint64_t *>(aOrg));
- const size_t sz(bytes/sizeof(uint64_t));
- for (size_t i(0); i < sz; i++) {
- a[i] = ~a[i];
- }
- uint8_t *ac(static_cast<uint8_t *>(aOrg));
- for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) {
- ac[i] = ~ac[i];
- }
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.h
deleted file mode 100644
index 0cb21b70ca3..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#pragma once
-
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * Generic cpu agnostic implementation.
- */
-class GenericAccelrator : public IAccelrated
-{
-public:
- float dotProduct(const float * a, const float * b, size_t sz) const override;
- double dotProduct(const double * a, const double * b, size_t sz) const override;
- int64_t dotProduct(const int32_t * a, const int32_t * b, size_t sz) const override;
- long long dotProduct(const int64_t * a, const int64_t * b, size_t sz) const override;
- void orBit(void * a, const void * b, size_t bytes) const override;
- void andBit(void * a, const void * b, size_t bytes) const override;
- void andNotBit(void * a, const void * b, size_t bytes) const override;
- void notBit(void * a, size_t bytes) const override;
-};
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
deleted file mode 100644
index aede024f5af..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-#include <vespa/vespalib/hwaccelrated/generic.h>
-#include <vespa/vespalib/hwaccelrated/sse2.h>
-#include <vespa/vespalib/hwaccelrated/avx.h>
-#include <vespa/vespalib/hwaccelrated/avx2.h>
-#include <vespa/vespalib/hwaccelrated/avx512.h>
-#include <assert.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-namespace {
-
-class Factory {
-public:
- virtual ~Factory() { }
- virtual IAccelrated::UP create() const = 0;
-};
-
-class GenericFactory :public Factory{
-public:
- virtual IAccelrated::UP create() const { return IAccelrated::UP(new GenericAccelrator()); }
-};
-
-class Sse2Factory :public Factory{
-public:
- virtual IAccelrated::UP create() const { return IAccelrated::UP(new Sse2Accelrator()); }
-};
-
-class AvxFactory :public Factory{
-public:
- virtual IAccelrated::UP create() const { return IAccelrated::UP(new AvxAccelrator()); }
-};
-
-class Avx2Factory :public Factory{
-public:
- virtual IAccelrated::UP create() const { return IAccelrated::UP(new Avx2Accelrator()); }
-};
-
-class Avx512Factory :public Factory{
-public:
- virtual IAccelrated::UP create() const { return IAccelrated::UP(new Avx512Accelrator()); }
-};
-
-template<typename T>
-void verifyAccelrator(const IAccelrated & accel)
-{
- const size_t testLength(127);
- T * a = new T[testLength];
- T * b = new T[testLength];
- for (size_t j(0); j < 0x20; j++) {
- T sum(0);
- for (size_t i(j); i < testLength; i++) {
- a[i] = b[i] = i;
- sum += i*i;
- }
- T hwComputedSum(accel.dotProduct(&a[j], &b[j], testLength - j));
- assert(sum == hwComputedSum);
- }
- delete [] a;
- delete [] b;
-}
-
-class RuntimeVerificator
-{
-public:
- RuntimeVerificator();
-};
-
-RuntimeVerificator::RuntimeVerificator()
-{
- GenericAccelrator generic;
- verifyAccelrator<float>(generic);
- verifyAccelrator<double>(generic);
- verifyAccelrator<int32_t>(generic);
- verifyAccelrator<int64_t>(generic);
-
- IAccelrated::UP thisCpu(IAccelrated::getAccelrator());
- verifyAccelrator<float>(*thisCpu);
- verifyAccelrator<double>(*thisCpu);
- verifyAccelrator<int32_t>(*thisCpu);
- verifyAccelrator<int64_t>(*thisCpu);
-
-}
-
-class Selector
-{
-public:
- Selector() __attribute__((noinline));
- IAccelrated::UP create() { return _factory->create(); }
-private:
- std::unique_ptr<Factory> _factory;
-};
-
-Selector::Selector() :
- _factory(new GenericFactory())
-{
- __builtin_cpu_init ();
- if (__builtin_cpu_supports("avx512f")) {
- _factory.reset(new Avx512Factory());
- } else if (__builtin_cpu_supports("avx2")) {
- _factory.reset(new Avx2Factory());
- } else if (__builtin_cpu_supports("avx")) {
- _factory.reset(new AvxFactory());
- } else if (__builtin_cpu_supports("sse2")) {
- _factory.reset(new Sse2Factory());
- }
-}
-
-}
-
-static Selector _G_selector;
-
-RuntimeVerificator _G_verifyAccelrator;
-
-
-IAccelrated::UP
-IAccelrated::getAccelrator()
-{
- return _G_selector.create();
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
deleted file mode 100644
index b8c7794a386..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#pragma once
-
-#include <memory>
-#include <stdint.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * This contains an interface to all primitives that has different cpu supported accelrations.
- * The actual implementation you get by calling the the static getAccelrator method.
- */
-class IAccelrated
-{
-public:
- virtual ~IAccelrated() { }
- typedef std::unique_ptr<IAccelrated> UP;
- virtual float dotProduct(const float * a, const float * b, size_t sz) const = 0;
- virtual double dotProduct(const double * a, const double * b, size_t sz) const = 0;
- virtual int64_t dotProduct(const int32_t * a, const int32_t * b, size_t sz) const = 0;
- virtual long long dotProduct(const int64_t * a, const int64_t * b, size_t sz) const = 0;
- virtual void orBit(void * a, const void * b, size_t bytes) const = 0;
- virtual void andBit(void * a, const void * b, size_t bytes) const = 0;
- virtual void andNotBit(void * a, const void * b, size_t bytes) const = 0;
- virtual void notBit(void * a, size_t bytes) const = 0;
-
- static IAccelrated::UP getAccelrator() __attribute__((noinline));
-};
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
deleted file mode 100644
index 11333d62f78..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#include <vespa/vespalib/hwaccelrated/sse2.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-namespace {
-
-bool validAlignment16(const void * p) {
- return (reinterpret_cast<uint64_t>(p) & 0xful) == 0;
-}
-
-bool validAlignment16(const void * a, const void * b) {
- return validAlignment16(a) && validAlignment16(b);
-}
-
-}
-
-float
-Sse2Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const
-{
- if ( ! validAlignment16(af, bf)) {
- return GenericAccelrator::dotProduct(af, bf, sz);
- }
- typedef float v4sf __attribute__ ((vector_size (16)));
- const size_t ChunkSize(16);
- const size_t VectorsPerChunk(ChunkSize/4);
- v4sf partial[VectorsPerChunk] = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
- const v4sf * a = reinterpret_cast<const v4sf *>(af);
- const v4sf * b = reinterpret_cast<const v4sf *>(bf);
-
- const size_t numChunks(sz/ChunkSize);
- for (size_t i(0); i < numChunks; i++) {
- for (size_t j(0); j < VectorsPerChunk; j++) {
- partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j];
- }
- }
- float sum(0);
- for (size_t i(numChunks*ChunkSize); i < sz; i++) {
- sum += af[i] * bf[i];
- }
- for (size_t i(1); i < VectorsPerChunk; i++) {
- partial[0] += partial[i];
- }
- sum += partial[0][0] + partial[0][1] + partial[0][2] + partial[0][3];
- return sum;
-}
-
-double
-Sse2Accelrator::dotProduct(const double * af, const double * bf, size_t sz) const
-{
- if ( ! validAlignment16(af, bf)) {
- return GenericAccelrator::dotProduct(af, bf, sz);
- }
- typedef double v2sd __attribute__ ((vector_size (16)));
- const size_t ChunkSize(8);
- const size_t VectorsPerChunk(ChunkSize/2);
- v2sd partial[VectorsPerChunk] = { {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0} };
- const v2sd * a = reinterpret_cast<const v2sd *>(af);
- const v2sd * b = reinterpret_cast<const v2sd *>(bf);
-
- const size_t numChunks(sz/ChunkSize);
- for (size_t i(0); i < numChunks; i++) {
- for (size_t j(0); j < VectorsPerChunk; j++) {
- partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j];
- }
- }
- double sum(0);
- for (size_t i(numChunks*ChunkSize); i < sz; i++) {
- sum += af[i] * bf[i];
- }
- for (size_t i(1); i < VectorsPerChunk; i++) {
- partial[0] += partial[i];
- }
- sum += partial[0][0] + partial[0][1];
- return sum;
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
deleted file mode 100644
index a7c39581997..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#pragma once
-
-#include <vespa/vespalib/hwaccelrated/generic.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * Generic cpu agnostic implementation.
- */
-class Sse2Accelrator : public GenericAccelrator
-{
-public:
- float dotProduct(const float * a, const float * b, size_t sz) const override;
- double dotProduct(const double * a, const double * b, size_t sz) const override;
-};
-
-}
-}