aboutsummaryrefslogtreecommitdiffstats
path: root/staging_vespalib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2016-10-11 21:19:38 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2016-10-11 21:19:38 +0000
commit4ff929491b2966e91cefdb473470a628488205c8 (patch)
treeb8897cb8cba54a52a6f1157443e42172dea49506 /staging_vespalib
parentb12b624cde3cc4c8137ce6fce8599d3243c72e7d (diff)
Differentiate between avx, avx2 and avx512f
Diffstat (limited to 'staging_vespalib')
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt4
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp88
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h14
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp86
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h13
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp23
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h22
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp123
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp10
-rw-r--r--staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h4
10 files changed, 194 insertions, 193 deletions
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt b/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
index 2495c106277..1c80add3d8e 100644
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
+++ b/staging_vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
@@ -6,7 +6,9 @@ vespa_add_library(staging_vespalib_vespalib_hwaccelrated OBJECT
sse2.cpp
avx.cpp
avx2.cpp
+ avx512.cpp
DEPENDS
)
set_source_files_properties(avx.cpp PROPERTIES COMPILE_FLAGS -march=sandybridge)
-set_source_files_properties(avx2.cpp PROPERTIES COMPILE_FLAGS -march=broadwell)
+set_source_files_properties(avx2.cpp PROPERTIES COMPILE_FLAGS -march=haswell)
+set_source_files_properties(avx512.cpp PROPERTIES COMPILE_FLAGS -march=skylake) # should be skylake-avx512 when assembler supports it.
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
index d9e0cc41882..ec5064bf647 100644
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
+++ b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
@@ -1,104 +1,22 @@
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
#include <vespa/vespalib/hwaccelrated/avx.h>
+#include <vespa/vespalib/hwaccelrated/avxprivate.hpp>
namespace vespalib {
namespace hwaccelrated {
-namespace {
-
-bool validAlignment32(const void * p) {
- return (reinterpret_cast<uint64_t>(p) & 0x1ful) == 0;
-}
-
-template <typename T>
-class TypeSpecifics { };
-
-template <>
-struct TypeSpecifics<float> {
- static constexpr const size_t V_SZ = 32;
- typedef float V __attribute__ ((vector_size (V_SZ)));
- static constexpr const size_t VectorsPerChunk = 4;
- static constexpr const V zero = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
- static float sum(V v) {
- return v[0] + v[1] + v[2] + v[3] + v[4] + v[5] + v[6] + v[7];
- }
-};
-
-template <>
-struct TypeSpecifics<double> {
- static constexpr const size_t V_SZ = 32;
- typedef double V __attribute__ ((vector_size (V_SZ)));
- static constexpr const size_t VectorsPerChunk = 4;
- static constexpr const V zero = {0.0, 0.0, 0.0, 0.0};
- static float sum(V v) {
- return v[0] + v[1] + v[2] + v[3];
- }
-};
-
-}
-
-template <typename T, unsigned AlignA, unsigned AlignB>
-T
-AvxAccelrator::computeDotProduct(const T * af, const T * bf, size_t sz)
-{
- using TT = TypeSpecifics<T>;
- constexpr const size_t ChunkSize = TT::V_SZ*4/sizeof(T);
- constexpr const size_t VectorsPerChunk = TT::VectorsPerChunk;
- typename TT::V partial[VectorsPerChunk] = { TT::zero, TT::zero, TT::zero, TT::zero};
- typedef T A __attribute__ ((vector_size (TT::V_SZ), aligned(AlignA)));
- typedef T B __attribute__ ((vector_size (TT::V_SZ), aligned(AlignB)));
- const A * a = reinterpret_cast<const A *>(af);
- const B * b = reinterpret_cast<const B *>(bf);
-
- const size_t numChunks(sz/ChunkSize);
- for (size_t i(0); i < numChunks; i++) {
- for (size_t j(0); j < VectorsPerChunk; j++) {
- partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j];
- }
- }
- T sum(0);
- for (size_t i(numChunks*ChunkSize); i < sz; i++) {
- sum += af[i] * bf[i];
- }
- for (size_t i(1); i < VectorsPerChunk; i++) {
- partial[0] += partial[i];
- }
- return sum + TT::sum(partial[0]);
-}
-
-template <typename T>
-T
-AvxAccelrator::dotProductSelectAlignment(const T * af, const T * bf, size_t sz)
-{
- if (validAlignment32(af)) {
- if (validAlignment32(bf)) {
- return computeDotProduct<T, 32, 32>(af, bf, sz);
- } else {
- return computeDotProduct<T, 32, 1>(af, bf, sz);
- }
- } else {
- if (validAlignment32(bf)) {
- return computeDotProduct<T, 1, 32>(af, bf, sz);
- } else {
- return computeDotProduct<T, 1, 1>(af, bf, sz);
- }
- }
-}
-
float
AvxAccelrator::dotProduct(const float * af, const float * bf, size_t sz) const
{
- return dotProductSelectAlignment(af, bf, sz);
+ return avx::dotProductSelectAlignment<float, 32>(af, bf, sz);
}
double
AvxAccelrator::dotProduct(const double * af, const double * bf, size_t sz) const
{
- return dotProductSelectAlignment(af, bf, sz);
+ return avx::dotProductSelectAlignment<double, 32>(af, bf, sz);
}
}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h
index ed833713c2d..4b391c163ac 100644
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h
+++ b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx.h
@@ -1,29 +1,21 @@
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
#pragma once
#include <vespa/vespalib/hwaccelrated/sse2.h>
-#include <vespa/fastos/dynamiclibrary.h>
namespace vespalib {
namespace hwaccelrated {
/**
- * Generic cpu agnostic implementation.
+ * Avx-256 implementation.
*/
class AvxAccelrator : public Sse2Accelrator
{
public:
- virtual float dotProduct(const float * a, const float * b, size_t sz) const;
- virtual double dotProduct(const double * a, const double * b, size_t sz) const;
-private:
- template <typename T>
- VESPA_DLL_LOCAL static T dotProductSelectAlignment(const T * af, const T * bf, size_t sz);
- template <typename T, unsigned AlignA, unsigned AlignB>
- VESPA_DLL_LOCAL static T computeDotProduct(const T * af, const T * bf, size_t sz) __attribute__((noinline));
+ float dotProduct(const float * a, const float * b, size_t sz) const override;
+ double dotProduct(const double * a, const double * b, size_t sz) const override;
};
}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
index 867b39ec326..f87738e3a6c 100644
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
+++ b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
@@ -1,102 +1,22 @@
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/hwaccelrated/avx2.h>
+#include <vespa/vespalib/hwaccelrated/avxprivate.hpp>
namespace vespalib {
namespace hwaccelrated {
-namespace {
-
-bool validAlignment32(const void * p) {
- return (reinterpret_cast<uint64_t>(p) & 0x1ful) == 0;
-}
-
-template <typename T>
-class TypeSpecifics { };
-
-template <>
-struct TypeSpecifics<float> {
- static constexpr const size_t V_SZ = 32;
- typedef float V __attribute__ ((vector_size (V_SZ)));
- static constexpr const size_t VectorsPerChunk = 4;
- static constexpr const V zero = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
- static float sum(V v) {
- return v[0] + v[1] + v[2] + v[3] + v[4] + v[5] + v[6] + v[7];
- }
-};
-
-template <>
-struct TypeSpecifics<double> {
- static constexpr const size_t V_SZ = 32;
- typedef double V __attribute__ ((vector_size (V_SZ)));
- static constexpr const size_t VectorsPerChunk = 4;
- static constexpr const V zero = {0.0, 0.0, 0.0, 0.0};
- static float sum(V v) {
- return v[0] + v[1] + v[2] + v[3];
- }
-};
-
-}
-
-template <typename T, unsigned AlignA, unsigned AlignB>
-T
-Avx2Accelrator::computeDotProduct(const T * af, const T * bf, size_t sz)
-{
- using TT = TypeSpecifics<T>;
- constexpr const size_t ChunkSize = TT::V_SZ*4/sizeof(T);
- constexpr const size_t VectorsPerChunk = TT::VectorsPerChunk;
- typename TT::V partial[VectorsPerChunk] = { TT::zero, TT::zero, TT::zero, TT::zero};
- typedef T A __attribute__ ((vector_size (TT::V_SZ), aligned(AlignA)));
- typedef T B __attribute__ ((vector_size (TT::V_SZ), aligned(AlignB)));
- const A * a = reinterpret_cast<const A *>(af);
- const B * b = reinterpret_cast<const B *>(bf);
-
- const size_t numChunks(sz/ChunkSize);
- for (size_t i(0); i < numChunks; i++) {
- for (size_t j(0); j < VectorsPerChunk; j++) {
- partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j];
- }
- }
- T sum(0);
- for (size_t i(numChunks*ChunkSize); i < sz; i++) {
- sum += af[i] * bf[i];
- }
- for (size_t i(1); i < VectorsPerChunk; i++) {
- partial[0] += partial[i];
- }
- return sum + TT::sum(partial[0]);
-}
-
-template <typename T>
-T
-Avx2Accelrator::dotProductSelectAlignment(const T * af, const T * bf, size_t sz)
-{
- if (validAlignment32(af)) {
- if (validAlignment32(bf)) {
- return computeDotProduct<T, 32, 32>(af, bf, sz);
- } else {
- return computeDotProduct<T, 32, 1>(af, bf, sz);
- }
- } else {
- if (validAlignment32(bf)) {
- return computeDotProduct<T, 1, 32>(af, bf, sz);
- } else {
- return computeDotProduct<T, 1, 1>(af, bf, sz);
- }
- }
-}
-
float
Avx2Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const
{
- return dotProductSelectAlignment(af, bf, sz);
+ return avx::dotProductSelectAlignment<float, 32>(af, bf, sz);
}
double
Avx2Accelrator::dotProduct(const double * af, const double * bf, size_t sz) const
{
- return dotProductSelectAlignment(af, bf, sz);
+ return avx::dotProductSelectAlignment<double, 32>(af, bf, sz);
}
}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
index e434bfc84e6..56d3a8ac65e 100644
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
+++ b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
@@ -1,6 +1,4 @@
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Copyright (C) 2003 Fast Search & Transfer ASA
-// Copyright (C) 2003 Overture Services Norway AS
#pragma once
@@ -11,18 +9,13 @@ namespace vespalib {
namespace hwaccelrated {
/**
- * Generic cpu agnostic implementation.
+ * Avx-512 implementation.
*/
class Avx2Accelrator : public AvxAccelrator
{
public:
- virtual float dotProduct(const float * a, const float * b, size_t sz) const;
- virtual double dotProduct(const double * a, const double * b, size_t sz) const;
-private:
- template <typename T>
- VESPA_DLL_LOCAL static T dotProductSelectAlignment(const T * af, const T * bf, size_t sz);
- template <typename T, unsigned AlignA, unsigned AlignB>
- VESPA_DLL_LOCAL static T computeDotProduct(const T * af, const T * bf, size_t sz) __attribute__((noinline));
+ float dotProduct(const float * a, const float * b, size_t sz) const override;
+ double dotProduct(const double * a, const double * b, size_t sz) const override;
};
}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
new file mode 100644
index 00000000000..9f7a6dcda3e
--- /dev/null
+++ b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/hwaccelrated/avx512.h>
+#include <vespa/vespalib/hwaccelrated/avxprivate.hpp>
+
+namespace vespalib {
+
+namespace hwaccelrated {
+
+float
+Avx512Accelrator::dotProduct(const float * af, const float * bf, size_t sz) const
+{
+ return avx::dotProductSelectAlignment<float, 64>(af, bf, sz);
+}
+
+double
+Avx512Accelrator::dotProduct(const double * af, const double * bf, size_t sz) const
+{
+ return avx::dotProductSelectAlignment<double, 64>(af, bf, sz);
+}
+
+}
+}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
new file mode 100644
index 00000000000..5d7028c30ba
--- /dev/null
+++ b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/hwaccelrated/avx2.h>
+
+namespace vespalib {
+
+namespace hwaccelrated {
+
+/**
+ * Avx-512 implementation.
+ */
+class Avx512Accelrator : public Avx2Accelrator
+{
+public:
+ float dotProduct(const float * a, const float * b, size_t sz) const override;
+ double dotProduct(const double * a, const double * b, size_t sz) const override;
+};
+
+}
+}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
new file mode 100644
index 00000000000..5491fe1eef7
--- /dev/null
+++ b/staging_vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/dynamiclibrary.h>
+
+namespace vespalib {
+
+namespace hwaccelrated {
+
+namespace avx {
+
+namespace {
+
+inline bool validAlignment(const void * p, const size_t align) {
+ return (reinterpret_cast<uint64_t>(p) & (align-1)) == 0;
+}
+
+template <typename T, typename V>
+T sumT(const V & v) {
+ T sum(0);
+ for (size_t i(0); i < (sizeof(V)/sizeof(T)); i++) {
+ sum += v[i];
+ }
+ return sum;
+}
+
+template <typename T, size_t VLEN>
+class TypeSpecifics { };
+
+template <>
+struct TypeSpecifics<float, 32u> {
+ static constexpr const size_t V_SZ = 32u;
+ typedef float V __attribute__ ((vector_size (V_SZ)));
+ static constexpr const size_t VectorsPerChunk = 4;
+ static constexpr const V zero = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+ static float sum(const V & v) { return sumT<float, V>(v); }
+};
+
+template <>
+struct TypeSpecifics<double, 32u> {
+ static constexpr const size_t V_SZ = 32u;
+ typedef double V __attribute__ ((vector_size (V_SZ)));
+ static constexpr const size_t VectorsPerChunk = 4;
+ static constexpr const V zero = {0.0, 0.0, 0.0, 0.0};
+ static double sum(const V & v) { return sumT<double, V>(v); }
+};
+
+template <>
+struct TypeSpecifics<float, 64u> {
+ static constexpr const size_t V_SZ = 64u;
+ typedef float V __attribute__ ((vector_size (V_SZ)));
+ static constexpr const size_t VectorsPerChunk = 4;
+ static constexpr const V zero = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+ static float sum(const V & v) { return sumT<float, V>(v); }
+};
+
+template <>
+struct TypeSpecifics<double, 64u> {
+ static constexpr const size_t V_SZ = 64u;
+ typedef double V __attribute__ ((vector_size (V_SZ)));
+ static constexpr const size_t VectorsPerChunk = 4;
+ static constexpr const V zero = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+ static double sum(const V & v) { return sumT<double, V>(v); }
+};
+
+template <typename T, size_t VLEN, unsigned AlignA, unsigned AlignB>
+static T computeDotProduct(const T * af, const T * bf, size_t sz) __attribute__((noinline));
+
+template <typename T, size_t VLEN, unsigned AlignA, unsigned AlignB>
+T computeDotProduct(const T * af, const T * bf, size_t sz)
+{
+ using TT = TypeSpecifics<T, VLEN>;
+ constexpr const size_t ChunkSize = TT::V_SZ*4/sizeof(T);
+ constexpr const size_t VectorsPerChunk = TT::VectorsPerChunk;
+ typename TT::V partial[VectorsPerChunk] = { TT::zero, TT::zero, TT::zero, TT::zero};
+ typedef T A __attribute__ ((vector_size (TT::V_SZ), aligned(AlignA)));
+ typedef T B __attribute__ ((vector_size (TT::V_SZ), aligned(AlignB)));
+ const A * a = reinterpret_cast<const A *>(af);
+ const B * b = reinterpret_cast<const B *>(bf);
+
+ const size_t numChunks(sz/ChunkSize);
+ for (size_t i(0); i < numChunks; i++) {
+ for (size_t j(0); j < VectorsPerChunk; j++) {
+ partial[j] += a[VectorsPerChunk*i+j] * b[VectorsPerChunk*i+j];
+ }
+ }
+ T sum(0);
+ for (size_t i(numChunks*ChunkSize); i < sz; i++) {
+ sum += af[i] * bf[i];
+ }
+ for (size_t i(1); i < VectorsPerChunk; i++) {
+ partial[0] += partial[i];
+ }
+ return sum + TT::sum(partial[0]);
+}
+
+}
+
+template <typename T, size_t VLEN>
+VESPA_DLL_LOCAL static T dotProductSelectAlignment(const T * af, const T * bf, size_t sz);
+
+template <typename T, size_t VLEN>
+T dotProductSelectAlignment(const T * af, const T * bf, size_t sz)
+{
+ if (validAlignment(af, VLEN)) {
+ if (validAlignment(bf, VLEN)) {
+ return computeDotProduct<T, VLEN, VLEN, VLEN>(af, bf, sz);
+ } else {
+ return computeDotProduct<T, VLEN, VLEN, 1>(af, bf, sz);
+ }
+ } else {
+ if (validAlignment(bf, VLEN)) {
+ return computeDotProduct<T, VLEN, 1, VLEN>(af, bf, sz);
+ } else {
+ return computeDotProduct<T, VLEN, 1, 1>(af, bf, sz);
+ }
+ }
+}
+
+}
+}
+}
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp b/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
index 769d40fd0ec..aede024f5af 100644
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
+++ b/staging_vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
@@ -7,6 +7,7 @@
#include <vespa/vespalib/hwaccelrated/sse2.h>
#include <vespa/vespalib/hwaccelrated/avx.h>
#include <vespa/vespalib/hwaccelrated/avx2.h>
+#include <vespa/vespalib/hwaccelrated/avx512.h>
#include <assert.h>
namespace vespalib {
@@ -41,6 +42,11 @@ public:
virtual IAccelrated::UP create() const { return IAccelrated::UP(new Avx2Accelrator()); }
};
+class Avx512Factory :public Factory{
+public:
+ virtual IAccelrated::UP create() const { return IAccelrated::UP(new Avx512Accelrator()); }
+};
+
template<typename T>
void verifyAccelrator(const IAccelrated & accel)
{
@@ -95,7 +101,9 @@ Selector::Selector() :
_factory(new GenericFactory())
{
__builtin_cpu_init ();
- if (__builtin_cpu_supports("avx2")) {
+ if (__builtin_cpu_supports("avx512f")) {
+ _factory.reset(new Avx512Factory());
+ } else if (__builtin_cpu_supports("avx2")) {
_factory.reset(new Avx2Factory());
} else if (__builtin_cpu_supports("avx")) {
_factory.reset(new AvxFactory());
diff --git a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h b/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
index 86fbc41a486..a7c39581997 100644
--- a/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
+++ b/staging_vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
@@ -16,8 +16,8 @@ namespace hwaccelrated {
class Sse2Accelrator : public GenericAccelrator
{
public:
- virtual float dotProduct(const float * a, const float * b, size_t sz) const;
- virtual double dotProduct(const double * a, const double * b, size_t sz) const;
+ float dotProduct(const float * a, const float * b, size_t sz) const override;
+ double dotProduct(const double * a, const double * b, size_t sz) const override;
};
}