vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

package com.yahoo.tensor;

import com.yahoo.tensor.functions.Reduce;

import java.util.*;

/**
 * Microbenchmark of tensor operations.
 * 
 * @author bratseth
 */
public class TensorFunctionBenchmark {

    private final static Random random = new Random();
    
    public double benchmark(int iterations, List<Tensor> modelVectors) {
        Tensor queryVector = generateVectors(1, 300).get(0);
        dotProduct(queryVector, modelVectors, 10); // warmup
        long startTime = System.currentTimeMillis();
        dotProduct(queryVector, modelVectors, iterations);
        long totalTime = System.currentTimeMillis() - startTime;
        return totalTime / iterations;
    }

    private double dotProduct(Tensor tensor, List<Tensor> tensors, int iterations) {
        double result = 0;
        for (int i = 0 ; i < iterations; i++)
            result = dotProduct(tensor, tensors);
        return result;
    }

    private double dotProduct(Tensor tensor, List<Tensor> tensors) {
        double largest = Double.MIN_VALUE;
        for (Tensor tensorElement : tensors) { // tensors.size() = 1 for larger tensor
            Tensor result = tensor.join(tensorElement, (a, b) -> a * b).reduce(Reduce.Aggregator.sum, "x");
            double dotProduct = result.reduce(Reduce.Aggregator.max).asDouble(); // for larger tensor
            if (dotProduct > largest) {
                largest = dotProduct;
            }
        }
        System.out.println(largest);
        return largest;
    }

    private static List<Tensor> generateVectors(int vectorCount, int vectorSize) {
        List<Tensor> tensors = new ArrayList<>();
        TensorType type = new TensorType.Builder().mapped("x").build();
        for (int i = 0; i < vectorCount; i++) {
            MapTensorBuilder builder = new MapTensorBuilder(type);
            for (int j = 0; j < vectorSize; j++) {
                builder.cell().label("x", String.valueOf(j)).value(random.nextDouble());
            }
            tensors.add(builder.build());
        }
        return tensors;
    }

    private static List<Tensor> generateVectorsInOneTensor(int vectorCount, int vectorSize) {
        List<Tensor> tensors = new ArrayList<>();
        TensorType type = new TensorType.Builder().mapped("i").mapped("x").build();
        MapTensorBuilder builder = new MapTensorBuilder(type);
        for (int i = 0; i < vectorCount; i++) {
            for (int j = 0; j < vectorSize; j++) {
                builder.cell()
                        .label("i", String.valueOf(i))
                        .label("x", String.valueOf(j))
                        .value(random.nextDouble());
            }
        }
        tensors.add(builder.build());
        return tensors; // only one tensor in the list.
    }

    public static void main(String[] args) {
        // Was: 150 ms
        // After adding type: 300 ms
        // After sorting dimensions: 100 ms
        // After special-casing single space: 4 ms
        double timeperJoin = new TensorFunctionBenchmark().benchmark(100, generateVectors(100, 300));

        // This benchmark should be as fast as fast as the previous. Currently it is not by a factor of 600
        double timePerJoinOneTensor = new TensorFunctionBenchmark().benchmark(20, generateVectorsInOneTensor(100, 300));

        System.out.println("Time per join: " + timeperJoin +  " ms");
        System.out.println("Time per join, one tensor: " + timePerJoinOneTensor +  " ms");
    }

}