Move hwaccelrated library from staging_vespalib to vespalib (for usage in tensor code).

author: Geir Storli <geirst@yahoo-inc.com> 2016-10-24 14:28:00 +0200
committer: Geir Storli <geirst@yahoo-inc.com> 2016-10-24 14:28:00 +0200
commit: 292817945280d84896d3b137d3c2c2385d7b15b4 (patch)
tree: 3f457c1630a9e6070eab59edc9b1e2bf2fe99d7e /staging_vespalib
parent: b73bd2dded336d472a67c53539500160d6d28796 (diff)
19 files changed, 0 insertions, 866 deletions
diff --git a/staging_vespalib/CMakeLists.txt b/staging_vespalib/CMakeLists.txt
index 5184d98d1ce..652200e2d7b 100644
--- a/staging_vespalib/CMakeLists.txt
+++ b/staging_vespalib/CMakeLists.txt
@@ -16,7 +16,6 @@ vespa_define_module(
     src/tests/crc
     src/tests/databuffer
     src/tests/directio
-    src/tests/dotproduct
     src/tests/encoding/base64
     src/tests/fileheader
     src/tests/floatingpointtype
@@ -42,7 +41,6 @@ vespa_define_module(
     src/vespa/vespalib
     src/vespa/vespalib/data
     src/vespa/vespalib/encoding
-    src/vespa/vespalib/hwaccelrated
     src/vespa/vespalib/net
     src/vespa/vespalib/objects
     src/vespa/vespalib/stllike
diff --git a/staging_vespalib/src/tests/dotproduct/.gitignore b/staging_vespalib/src/tests/dotproduct/.gitignore
deleted file mode 100644
index 5d9432fbd08..00000000000
--- a/staging_vespalib/src/tests/dotproduct/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-dotproductbenchmark
-staging_vespalib_dotproductbenchmark_app
diff --git a/staging_vespalib/src/tests/dotproduct/CMakeLists.txt b/staging_vespalib/src/tests/dotproduct/CMakeLists.txt
deleted file mode 100644
index 30a02632f1c..00000000000
--- a/staging_vespalib/src/tests/dotproduct/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(staging_vespalib_dotproductbenchmark_app
-    SOURCES
-    dotproductbenchmark.cpp
-    DEPENDS
-    staging_vespalib
-)
-vespa_add_test(NAME staging_vespalib_dotproductbenchmark_app_sparse-ordered COMMAND staging_vespalib_dotproductbenchmark_app 10 10 1000 1000 BENCHMARK)
-vespa_add_test(NAME staging_vespalib_dotproductbenchmark_app_sparse-unordered COMMAND staging_vespalib_dotproductbenchmark_app 10 10 1000 1000 BENCHMARK)
-vespa_add_test(NAME staging_vespalib_dotproductbenchmark_app_full COMMAND staging_vespalib_dotproductbenchmark_app 10 10 1000 1000 BENCHMARK) 
-
-# benchmark: dotproductbenchmark
-# 	$(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-ordered   1000 1000 1000 1000
-# 	$(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-unordered 1000 1000 1000 1000
-# 	$(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark full             1000 1000 1000 1000 
-# 	$(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-ordered   1000 1000 100 1000
-# 	$(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-unordered 1000 1000 100 1000
-# 	$(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark full             1000 1000 100 1000 
-# 	$(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-ordered   1000 1000 1000 100
-# 	$(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark sparse-unordered 1000 1000 1000 100
-# 	$(HIDE)$(LDL) time $(VALGRIND) ./dotproductbenchmark full             1000 1000 1000 100 
diff --git a/staging_vespalib/src/tests/dotproduct/dotproductbenchmark.cpp b/staging_vespalib/src/tests/dotproduct/dotproductbenchmark.cpp
deleted file mode 100644
index 9ef3d959f3b..00000000000
--- a/staging_vespalib/src/tests/dotproduct/dotproductbenchmark.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/fastos/fastos.h>
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-#include <vespa/vespalib/stllike/string.h>
-#include <vespa/vespalib/stllike/hash_map.h>
-#include <iostream>
-
-using namespace vespalib;
-using vespalib::hwaccelrated::IAccelrated;
-
-class Benchmark {
-public:
-    virtual ~Benchmark() { }
-    virtual void compute(size_t docId) const = 0;
-};
-
-void
-runBenchmark(size_t count, size_t docs, const Benchmark & benchmark)
-{
-    for (size_t i(0); i < count; i++) {
-        for (size_t docId(0); docId < docs; docId++) {
-            benchmark.compute(docId);
-        }
-    }
-}
-
-template <typename T>
-class FullBenchmark : public Benchmark
-{
-public:
-    FullBenchmark(size_t numDocs, size_t numValues) :
-        _values(numDocs*numValues),
-        _query(numValues),
-        _dp(IAccelrated::getAccelrator())
-    {
-        for (size_t i(0); i < numDocs; i++) {
-            for (size_t j(0); j < numValues; j++) {
-                _values[i*numValues + j] = j;
-            }
-        }
-        for (size_t j(0); j < numValues; j++) {
-            _query[j] = j;
-        }
-    }
-    virtual void compute(size_t docId) const {
-        _dp->dotProduct(&_query[0], &_values[docId * _query.size()], _query.size());
-    }
-private:
-    std::vector<T> _values;
-    std::vector<T> _query;
-    IAccelrated::UP _dp;
-};
-
-class SparseBenchmark : public Benchmark
-{
-public:
-    SparseBenchmark(size_t numDocs, size_t numValues, size_t numQueryValues) :
-        _numValues(numValues),
-        _values(numDocs*numValues)
-    {
-        for (size_t i(0); i < numDocs; i++) {
-            for (size_t j(0); j < numValues; j++) {
-                size_t k(numValues < numQueryValues ?  (j*numQueryValues)/numValues : j);
-                _values[i*numValues + j] = P(k, k);
-            }
-        }
-    }
-protected:
-    struct P {
-        P(uint32_t key=0, int32_t value=0) :
-            _key(key),
-            _value(value)
-        { }
-        uint32_t _key;
-        int32_t  _value;
-    };
-    size_t _numValues;
-    std::vector<P> _values;
-};
-
-class UnorderedSparseBenchmark : public SparseBenchmark
-{
-private:
-    typedef hash_map<uint32_t, int32_t> map;
-public:
-    UnorderedSparseBenchmark(size_t numDocs, size_t numValues, size_t numQueryValues) :
-        SparseBenchmark(numDocs, numValues, numQueryValues)
-    {
-        for (size_t j(0); j < numQueryValues; j++) {
-            _query[j] = j;
-        }
-    }
-private:
-    virtual void compute(size_t docId) const {
-        int64_t sum(0);
-        size_t offset(docId*_numValues);
-        const auto e(_query.end());
-        for (size_t i(0); i < _numValues; i++) {
-            auto it = _query.find(_values[offset + i]._key);
-            if (it != e) {
-                sum += static_cast<int64_t>(_values[offset + i]._value) * it->second;
-            }
-        }
-    }
-    map _query;
-};
-
-class OrderedSparseBenchmark : public SparseBenchmark
-{
-private:
-public:
-    OrderedSparseBenchmark(size_t numDocs, size_t numValues, size_t numQueryValues) :
-        SparseBenchmark(numDocs, numValues, numQueryValues),
-        _query(numQueryValues)
-    {
-        for (size_t j(0); j < numQueryValues; j++) {
-            size_t k(numValues > numQueryValues ?  j*numValues/numQueryValues : j);
-            _query[j] = P(k, k);
-        }
-    }
-private:
-    virtual void compute(size_t docId) const {
-        int64_t sum(0);
-        size_t offset(docId*_numValues);
-
-        for (size_t a(0), b(0); a < _query.size() && b < _numValues; b++) {
-            for (; a < _query.size() && (_query[a]._key <= _values[offset + b]._key); a++);
-            if (_query[a]._key == _values[offset + b]._key) {
-                sum += static_cast<int64_t>(_values[offset + b]._value) * _query[a]._value;
-            }
-        }
-    }
-    std::vector<P> _query;
-};
-
-int main(int argc, char *argv[])
-{
-    size_t numDocs(1);
-    size_t numValues(1000);
-    size_t numQueryValues(1000);
-    size_t numQueries(1000000);
-    string type("full");
-    if ( argc > 1) {
-        type = argv[1];
-    }
-    if ( argc > 2) {
-        numQueries = strtoul(argv[2], NULL, 0);
-    }
-    if ( argc > 3) {
-        numDocs = strtoul(argv[3], NULL, 0);
-    }
-    if ( argc > 4) {
-        numValues = strtoul(argv[4], NULL, 0);
-    }
-    if ( argc > 5) {
-        numQueryValues = strtoul(argv[5], NULL, 0);
-    }
-
-    std::cout << "type = " << type << std::endl;
-    std::cout << "numQueries = " << numQueries << std::endl;
-    std::cout << "numDocs = " << numDocs << std::endl;
-    std::cout << "numValues = " << numValues << std::endl;
-    std::cout << "numQueryValues = " << numQueryValues << std::endl;
-    if (type == "full") {
-        FullBenchmark<int32_t> bm(numDocs, numValues);
-        runBenchmark(numQueries, numDocs, bm);
-    } else if (type == "sparse-ordered") {
-        OrderedSparseBenchmark bm(numDocs, numValues, numQueryValues);
-        runBenchmark(numQueries, numDocs, bm);
-    } else if (type == "sparse-unordered") {
-        UnorderedSparseBenchmark bm(numDocs, numValues, numQueryValues);
-        runBenchmark(numQueries, numDocs, bm);
-    } else {
-        std::cerr << "type '" << type << "' is unknown." << std::endl;
-    }
-    
-    return 0;
-}
-
diff --git a/staging_vespalib/src/vespa/vespalib/CMakeLists.txt b/staging_vespalib/src/vespa/vespalib/CMakeLists.txt
index 7bdcc9bbf64..640278a583b 100644
--- a/staging_vespalib/src/vespa/vespalib/CMakeLists.txt
+++ b/staging_vespalib/src/vespa/vespalib/CMakeLists.txt
@@ -8,7 +8,6 @@ vespa_add_library(staging_vespalib
     $<TARGET_OBJECTS:staging_vespalib_vespalib_stllike>
     $<TARGET_OBJECTS:staging_vespalib_vespalib_net>
     $<TARGET_OBJECTS:staging_vespalib_vespalib_trace>
-    $<TARGET_OBJECTS:staging_vespalib_vespalib_hwaccelrated>
     INSTALL lib64
     DEPENDS
 )
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt b/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
deleted file mode 100644
index 6362cdf6f87..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_library(staging_vespalib_vespalib_hwaccelrated OBJECT
-    SOURCES
-    iaccelrated.cpp
-    generic.cpp
-    sse2.cpp
-    avx.cpp
-    avx2.cpp
-    avx512.cpp
-    DEPENDS
-)
-set_source_files_properties(avx.cpp PROPERTIES COMPILE_FLAGS -march=sandybridge)
-set_source_files_properties(avx2.cpp PROPERTIES COMPILE_FLAGS -march=haswell)
-vespa_workaround_set_gcc_march_skylake_avx512_if_supported(avx512.cpp)
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
deleted file mode 100644
index ec5064bf647..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/vespalib/hwaccelrated/avx.h>
-#include <vespa/vespalib/hwaccelrated/avxprivate.hpp>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-float
-AvxAccelrator::dotProduct(const float * af, const float * bf, size_t sz) const
-{
-    return avx::dotProductSelectAlignment<float, 32>(af, bf, sz);
-}
-
-double
-AvxAccelrator::dotProduct(const double * af, const double * bf, size_t sz) const
-{
-    return avx::dotProductSelectAlignment<double, 32>(af, bf, sz);
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h
deleted file mode 100644
index 4b391c163ac..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/vespalib/hwaccelrated/sse2.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * Avx-256 implementation.
- */
-class AvxAccelrator : public Sse2Accelrator
-{
-public:
-    float dotProduct(const float * a, const float * b, size_t sz) const override;
-    double dotProduct(const double * a, const double * b, size_t sz) const override;
-};
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
deleted file mode 100644
index f87738e3a6c..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/vespalib/hwaccelrated/avx2.h>
-#include <vespa/vespalib/hwaccelrated/avxprivate.hpp>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-float
-Avx2Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const
-{
-    return avx::dotProductSelectAlignment<float, 32>(af, bf, sz);
-}
-
-double
-Avx2Accelrator::dotProduct(const double * af, const double * bf, size_t sz) const
-{
-    return avx::dotProductSelectAlignment<double, 32>(af, bf, sz);
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
deleted file mode 100644
index 56d3a8ac65e..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/vespalib/hwaccelrated/avx.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * Avx-512 implementation.
- */
-class Avx2Accelrator : public AvxAccelrator
-{
-public:
-    float dotProduct(const float * a, const float * b, size_t sz) const override;
-    double dotProduct(const double * a, const double * b, size_t sz) const override;
-};
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
deleted file mode 100644
index 9f7a6dcda3e..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/vespalib/hwaccelrated/avx512.h>
-#include <vespa/vespalib/hwaccelrated/avxprivate.hpp>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-float
-Avx512Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const
-{
-    return avx::dotProductSelectAlignment<float, 64>(af, bf, sz);
-}
-
-double
-Avx512Accelrator::dotProduct(const double * af, const double * bf, size_t sz) const
-{
-    return avx::dotProductSelectAlignment<double, 64>(af, bf, sz);
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
deleted file mode 100644
index 5d7028c30ba..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/vespalib/hwaccelrated/avx2.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * Avx-512 implementation.
- */
-class Avx512Accelrator : public Avx2Accelrator
-{
-public:
-    float dotProduct(const float * a, const float * b, size_t sz) const override;
-    double dotProduct(const double * a, const double * b, size_t sz) const override;
-};
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
deleted file mode 100644
index 87a043b3428..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/fastos/dynamiclibrary.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-namespace avx {
-
-namespace {
-
-inline bool validAlignment(const void * p, const size_t align) {
-    return (reinterpret_cast<uint64_t>(p) & (align-1)) == 0;
-}
-
-template <typename T, typename V>
-T sumT(const V & v) {
-    T sum(0);
-    for (size_t i(0); i < (sizeof(V)/sizeof(T)); i++) {
-        sum += v[i];
-    }
-    return sum;
-}
-
-template <typename T, size_t VLEN, unsigned AlignA, unsigned AlignB, size_t VectorsPerChunk>
-static T computeDotProduct(const T * af, const T * bf, size_t sz) __attribute__((noinline));
-
-template <typename T, size_t VLEN, unsigned AlignA, unsigned AlignB, size_t VectorsPerChunk>
-T computeDotProduct(const T * af, const T * bf, size_t sz)
-{
-    constexpr const size_t ChunkSize = VLEN*VectorsPerChunk/sizeof(T);
-    typedef T V __attribute__ ((vector_size (VLEN)));
-    typedef T A __attribute__ ((vector_size (VLEN), aligned(AlignA)));
-    typedef T B __attribute__ ((vector_size (VLEN), aligned(AlignB)));
-    V partial[VectorsPerChunk];
-    memset(partial, 0, sizeof(partial));
-    const A * a = reinterpret_cast<const A *>(af);
-    const B * b = reinterpret_cast<const B *>(bf);
-
-    const size_t numChunks(sz/ChunkSize);
-    for (size_t i(0); i < numChunks; i++) {
-        for (size_t j(0); j < VectorsPerChunk; j++) {
-            partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j];
-        }
-    }
-    T sum(0);
-    for (size_t i(numChunks*ChunkSize); i < sz; i++) {
-        sum += af[i] * bf[i];
-    }
-    for (size_t i(1); i < VectorsPerChunk; i++) {
-        partial[0] += partial[i];
-    }
-    return sum + sumT<T, V>(partial[0]);
-}
-
-}
-
-template <typename T, size_t VLEN, size_t VectorsPerChunk=4>
-VESPA_DLL_LOCAL static T dotProductSelectAlignment(const T * af, const T * bf, size_t sz);
-
-template <typename T, size_t VLEN, size_t VectorsPerChunk>
-T dotProductSelectAlignment(const T * af, const T * bf, size_t sz)
-{
-    if (validAlignment(af, VLEN)) {
-        if (validAlignment(bf, VLEN)) {
-            return computeDotProduct<T, VLEN, VLEN, VLEN, VectorsPerChunk>(af, bf, sz);
-        } else {
-            return computeDotProduct<T, VLEN, VLEN, 1, VectorsPerChunk>(af, bf, sz);
-        }
-    } else {
-        if (validAlignment(bf, VLEN)) {
-            return computeDotProduct<T, VLEN, 1, VLEN, VectorsPerChunk>(af, bf, sz);
-        } else {
-            return computeDotProduct<T, VLEN, 1, 1, VectorsPerChunk>(af, bf, sz);
-        }
-    }
-}
-
-}
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
deleted file mode 100644
index f218e4172f9..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#include <vespa/vespalib/hwaccelrated/generic.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-namespace {
-
-template <typename ACCUM, typename T, size_t UNROLL>
-ACCUM
-multiplyAdd(const T * a, const T * b, size_t sz)
-{
-    ACCUM partial[UNROLL];
-    for (size_t i(0); i < UNROLL; i++) {
-        partial[i] = 0;
-    }
-    size_t i(0);
-    for (; i + UNROLL <= sz; i+= UNROLL) {
-        for (size_t j(0); j < UNROLL; j++) {
-            partial[j] += a[i+j] * b[i+j];
-        }
-    }
-    for (;i < sz; i++) {
-        partial[i%UNROLL] += a[i] * b[i];
-    }
-    ACCUM sum(0);
-    for (size_t j(0); j < UNROLL; j++) {
-        sum += partial[j];
-    }
-    return sum;
-}
-
-}
-
-float
-GenericAccelrator::dotProduct(const float * a, const float * b, size_t sz) const
-{
-    return multiplyAdd<float, float, 4>(a, b, sz);
-}
-
-double
-GenericAccelrator::dotProduct(const double * a, const double * b, size_t sz) const
-{
-    return multiplyAdd<double, double, 4>(a, b, sz);
-}
-
-int64_t
-GenericAccelrator::dotProduct(const int32_t * a, const int32_t * b, size_t sz) const
-{
-    return multiplyAdd<int64_t, int32_t, 4>(a, b, sz);
-}
-
-long long
-GenericAccelrator::dotProduct(const int64_t * a, const int64_t * b, size_t sz) const
-{
-    return multiplyAdd<long long, int64_t, 4>(a, b, sz);
-}
-
-void
-GenericAccelrator::orBit(void * aOrg, const void * bOrg, size_t bytes) const
-{
-    uint64_t *a(static_cast<uint64_t *>(aOrg));
-    const uint64_t *b(static_cast<const uint64_t *>(bOrg));
-    const size_t sz(bytes/sizeof(uint64_t));
-    for (size_t i(0); i < sz; i++) {
-        a[i] |= b[i];
-    }
-    uint8_t *ac(static_cast<uint8_t *>(aOrg));
-    const uint8_t *bc(static_cast<const uint8_t *>(bOrg));
-    for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) {
-        ac[i] |= bc[i];
-    }
-}
-
-void
-GenericAccelrator::andBit(void * aOrg, const void * bOrg, size_t bytes) const 
-{
-    uint64_t *a(static_cast<uint64_t *>(aOrg));
-    const uint64_t *b(static_cast<const uint64_t *>(bOrg));
-    const size_t sz(bytes/sizeof(uint64_t));
-    for (size_t i(0); i < sz; i++) {
-        a[i] &= b[i];
-    }
-    uint8_t *ac(static_cast<uint8_t *>(aOrg));
-    const uint8_t *bc(static_cast<const uint8_t *>(bOrg));
-    for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) {
-        ac[i] &= bc[i];
-    }
-}
-void
-GenericAccelrator::andNotBit(void * aOrg, const void * bOrg, size_t bytes) const 
-{
-    uint64_t *a(static_cast<uint64_t *>(aOrg));
-    const uint64_t *b(static_cast<const uint64_t *>(bOrg));
-    const size_t sz(bytes/sizeof(uint64_t));
-    for (size_t i(0); i < sz; i++) {
-        a[i] &= ~b[i];
-    }
-    uint8_t *ac(static_cast<uint8_t *>(aOrg));
-    const uint8_t *bc(static_cast<const uint8_t *>(bOrg));
-    for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) {
-        ac[i] &= ~bc[i];
-    }
-}
-
-void
-GenericAccelrator::notBit(void * aOrg, size_t bytes) const
-{
-    uint64_t *a(static_cast<uint64_t *>(aOrg));
-    const size_t sz(bytes/sizeof(uint64_t));
-    for (size_t i(0); i < sz; i++) {
-        a[i] = ~a[i];
-    }
-    uint8_t *ac(static_cast<uint8_t *>(aOrg));
-    for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) {
-        ac[i] = ~ac[i];
-    }
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.h
deleted file mode 100644
index 0cb21b70ca3..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/generic.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#pragma once
-
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * Generic cpu agnostic implementation.
- */
-class GenericAccelrator : public IAccelrated
-{
-public:
-    float dotProduct(const float * a, const float * b, size_t sz) const override;
-    double dotProduct(const double * a, const double * b, size_t sz) const override;
-    int64_t dotProduct(const int32_t * a, const int32_t * b, size_t sz) const override;
-    long long dotProduct(const int64_t * a, const int64_t * b, size_t sz) const override;
-    void orBit(void * a, const void * b, size_t bytes) const override;
-    void andBit(void * a, const void * b, size_t bytes) const override;
-    void andNotBit(void * a, const void * b, size_t bytes) const override;
-    void notBit(void * a, size_t bytes) const override;
-};
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
deleted file mode 100644
index aede024f5af..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-#include <vespa/vespalib/hwaccelrated/generic.h>
-#include <vespa/vespalib/hwaccelrated/sse2.h>
-#include <vespa/vespalib/hwaccelrated/avx.h>
-#include <vespa/vespalib/hwaccelrated/avx2.h>
-#include <vespa/vespalib/hwaccelrated/avx512.h>
-#include <assert.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-namespace {
-
-class Factory {
-public:
-    virtual ~Factory() { }
-    virtual IAccelrated::UP create() const = 0;
-};
-
-class GenericFactory :public Factory{
-public:
-    virtual IAccelrated::UP create() const { return IAccelrated::UP(new GenericAccelrator()); }
-};
-
-class Sse2Factory :public Factory{
-public:
-    virtual IAccelrated::UP create() const { return IAccelrated::UP(new Sse2Accelrator()); }
-};
-
-class AvxFactory :public Factory{
-public:
-    virtual IAccelrated::UP create() const { return IAccelrated::UP(new AvxAccelrator()); }
-};
-
-class Avx2Factory :public Factory{
-public:
-    virtual IAccelrated::UP create() const { return IAccelrated::UP(new Avx2Accelrator()); }
-};
-
-class Avx512Factory :public Factory{
-public:
-    virtual IAccelrated::UP create() const { return IAccelrated::UP(new Avx512Accelrator()); }
-};
-
-template<typename T>
-void verifyAccelrator(const IAccelrated & accel)
-{
-    const size_t testLength(127);
-    T * a = new T[testLength];
-    T * b = new T[testLength];
-    for (size_t j(0); j < 0x20; j++) {
-        T sum(0);
-        for (size_t i(j); i < testLength; i++) {
-            a[i] = b[i] = i;
-            sum += i*i;
-        }
-        T hwComputedSum(accel.dotProduct(&a[j], &b[j], testLength - j));
-        assert(sum == hwComputedSum);
-    }
-    delete [] a;
-    delete [] b;
-}
-
-class RuntimeVerificator
-{
-public:
-    RuntimeVerificator();
-};
-
-RuntimeVerificator::RuntimeVerificator()
-{
-   GenericAccelrator generic;
-   verifyAccelrator<float>(generic); 
-   verifyAccelrator<double>(generic); 
-   verifyAccelrator<int32_t>(generic); 
-   verifyAccelrator<int64_t>(generic); 
-
-   IAccelrated::UP thisCpu(IAccelrated::getAccelrator());
-   verifyAccelrator<float>(*thisCpu); 
-   verifyAccelrator<double>(*thisCpu); 
-   verifyAccelrator<int32_t>(*thisCpu); 
-   verifyAccelrator<int64_t>(*thisCpu); 
-   
-}
-
-class Selector
-{
-public:
-    Selector() __attribute__((noinline));
-    IAccelrated::UP create() { return _factory->create(); }
-private:
-    std::unique_ptr<Factory> _factory;
-};
-
-Selector::Selector() :
-    _factory(new GenericFactory())
-{
-    __builtin_cpu_init ();
-    if (__builtin_cpu_supports("avx512f")) {
-        _factory.reset(new Avx512Factory());
-    } else if (__builtin_cpu_supports("avx2")) {
-        _factory.reset(new Avx2Factory());
-    } else if (__builtin_cpu_supports("avx")) {
-        _factory.reset(new AvxFactory());
-    } else if (__builtin_cpu_supports("sse2")) {
-        _factory.reset(new Sse2Factory());
-    }
-}
-
-}
-
-static Selector _G_selector;
-
-RuntimeVerificator _G_verifyAccelrator;
-
-
-IAccelrated::UP
-IAccelrated::getAccelrator()
-{
-    return _G_selector.create();
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
deleted file mode 100644
index b8c7794a386..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#pragma once
-
-#include <memory>
-#include <stdint.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * This contains an interface to all primitives that has different cpu supported accelrations.
- * The actual implementation you get by calling the the static getAccelrator method.
- */
-class IAccelrated
-{
-public:
-    virtual ~IAccelrated() { }
-    typedef std::unique_ptr<IAccelrated> UP;
-    virtual float dotProduct(const float * a, const float * b, size_t sz) const = 0;
-    virtual double dotProduct(const double * a, const double * b, size_t sz) const = 0;
-    virtual int64_t dotProduct(const int32_t * a, const int32_t * b, size_t sz) const = 0;
-    virtual long long dotProduct(const int64_t * a, const int64_t * b, size_t sz) const = 0;
-    virtual void orBit(void * a, const void * b, size_t bytes) const = 0;
-    virtual void andBit(void * a, const void * b, size_t bytes) const = 0;
-    virtual void andNotBit(void * a, const void * b, size_t bytes) const = 0;
-    virtual void notBit(void * a, size_t bytes) const = 0;
-
-    static IAccelrated::UP getAccelrator() __attribute__((noinline));
-};
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
deleted file mode 100644
index 11333d62f78..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#include <vespa/vespalib/hwaccelrated/sse2.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-namespace {
-
-bool validAlignment16(const void * p) {
-    return (reinterpret_cast<uint64_t>(p) & 0xful) == 0;
-}
-
-bool validAlignment16(const void * a, const void * b) {
-    return validAlignment16(a) && validAlignment16(b);
-}
-
-}
-
-float
-Sse2Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const
-{
-    if ( ! validAlignment16(af, bf)) {
-        return GenericAccelrator::dotProduct(af, bf, sz);
-    }
-    typedef float v4sf __attribute__ ((vector_size (16)));
-    const size_t ChunkSize(16);
-    const size_t VectorsPerChunk(ChunkSize/4);
-    v4sf partial[VectorsPerChunk] = { {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0}, {0.0, 0.0, 0.0, 0.0} };
-    const v4sf * a = reinterpret_cast<const v4sf *>(af);
-    const v4sf * b = reinterpret_cast<const v4sf *>(bf);
-
-    const size_t numChunks(sz/ChunkSize);
-    for (size_t i(0); i < numChunks; i++) {
-        for (size_t j(0); j < VectorsPerChunk; j++) {
-            partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j];
-        }
-    }
-    float sum(0);
-    for (size_t i(numChunks*ChunkSize); i < sz; i++) {
-        sum += af[i] * bf[i];
-    }
-    for (size_t i(1); i < VectorsPerChunk; i++) {
-        partial[0] += partial[i];
-    }
-    sum += partial[0][0] + partial[0][1] + partial[0][2] + partial[0][3];
-    return sum; 
-}
-
-double
-Sse2Accelrator::dotProduct(const double * af, const double * bf, size_t sz) const
-{
-    if ( ! validAlignment16(af, bf)) {
-        return GenericAccelrator::dotProduct(af, bf, sz);
-    }
-    typedef double v2sd __attribute__ ((vector_size (16)));
-    const size_t ChunkSize(8);
-    const size_t VectorsPerChunk(ChunkSize/2);
-    v2sd partial[VectorsPerChunk] = { {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0} };
-    const v2sd * a = reinterpret_cast<const v2sd *>(af);
-    const v2sd * b = reinterpret_cast<const v2sd *>(bf);
-
-    const size_t numChunks(sz/ChunkSize);
-    for (size_t i(0); i < numChunks; i++) {
-        for (size_t j(0); j < VectorsPerChunk; j++) {
-            partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j];
-        }
-    }
-    double sum(0);
-    for (size_t i(numChunks*ChunkSize); i < sz; i++) {
-        sum += af[i] * bf[i];
-    }
-    for (size_t i(1); i < VectorsPerChunk; i++) {
-        partial[0] += partial[i];
-    }
-    sum += partial[0][0] + partial[0][1];
-    return sum; 
-}
-
-}
-}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
deleted file mode 100644
index a7c39581997..00000000000
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
-
-#pragma once
-
-#include <vespa/vespalib/hwaccelrated/generic.h>
-
-namespace vespalib {
-
-namespace hwaccelrated {
-
-/**
- * Generic cpu agnostic implementation.
- */
-class Sse2Accelrator : public GenericAccelrator
-{
-public:
-    float dotProduct(const float * a, const float * b, size_t sz) const override;
-    double dotProduct(const double * a, const double * b, size_t sz) const override;
-};
-
-}
-}
author	Geir Storli <geirst@yahoo-inc.com>	2016-10-24 14:28:00 +0200
committer	Geir Storli <geirst@yahoo-inc.com>	2016-10-24 14:28:00 +0200
commit	292817945280d84896d3b137d3c2c2385d7b15b4 (patch)
tree	3f457c1630a9e6070eab59edc9b1e2bf2fe99d7e /staging_vespalib
parent	b73bd2dded336d472a67c53539500160d6d28796 (diff)