1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
#include <vespa/vespalib/hwaccelrated/generic.h>
#include <vespa/vespalib/util/time.h>
#include <cinttypes>
using namespace vespalib;
template<typename T>
std::vector<T> createAndFill(size_t sz) {
std::vector<T> v(sz);
for (size_t i(0); i < sz; i++) {
v[i] = rand()%128;
}
return v;
}
template<typename T>
void
benchmarkEuclideanDistance(const hwaccelrated::IAccelrated & accel, size_t sz, size_t count) {
srand(1);
std::vector<T> a = createAndFill<T>(sz);
std::vector<T> b = createAndFill<T>(sz);
steady_time start = steady_clock::now();
double sumOfSums(0);
for (size_t j(0); j < count; j++) {
double sum = accel.squaredEuclideanDistance(&a[0], &b[0], sz);
sumOfSums += sum;
}
duration elapsed = steady_clock::now() - start;
printf("sum=%f of N=%zu and vector length=%zu took %" PRId64 "\n", sumOfSums, count, sz, count_ms(elapsed));
}
void
benchMarkEuclidianDistance(const hwaccelrated::IAccelrated & accelrator, size_t sz, size_t count) {
printf("double : ");
benchmarkEuclideanDistance<double>(accelrator, sz, count);
printf("float : ");
benchmarkEuclideanDistance<float>(accelrator, sz, count);
printf("int8_t : ");
benchmarkEuclideanDistance<int8_t>(accelrator, sz, count);
}
int main(int argc, char *argv[]) {
int length = 1000;
int count = 1000000;
if (argc > 1) {
length = atol(argv[1]);
}
if (argc > 2) {
count = atol(argv[2]);
}
printf("%s %d %d\n", argv[0], length, count);
printf("Squared Euclidian Distance - Generic\n");
benchMarkEuclidianDistance(hwaccelrated::GenericAccelrator(), length, count);
printf("Squared Euclidian Distance - Optimized for this cpu\n");
benchMarkEuclidianDistance(hwaccelrated::IAccelrated::getAccelerator(), length, count);
return 0;
}
|