aboutsummaryrefslogtreecommitdiffstats
path: root/eval/src/vespa/eval/eval/nested_loop.h
blob: 695b212dd90b3f508c57da6103ee82a8350597a0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include <cstddef>
#include <vector>

namespace vespalib::eval {

// This file contains implementations for the generic nested loops
// used by DenseJoinPlan, DenseReducePlan and similar. The loops act
// like arbitrarily nested for-loops that are index-based where each
// loop-level has a different stride that modifies the overall
// index. An initial index is passed to the top-level function, which
// is then modified by each loop-layer and finally passed back to a
// callable for each iteration of the inner loop. There are different
// implementations for traversing different numbers of index spaces in
// parallel. Note that all loop layers must have at least 1 iteration.

namespace nested_loop {

//-----------------------------------------------------------------------------

template <typename F, size_t N> void execute_few(size_t idx, const size_t *loop, const size_t *stride, const F &f) {
    if constexpr (N == 0) {
        f(idx);
    } else {
        for (size_t i = 0; i < *loop; ++i, idx += *stride) {
            execute_few<F, N - 1>(idx, loop + 1, stride + 1, f);
        }
    }
}

template <typename F> void execute_many(size_t idx, const size_t *loop, const size_t *stride, size_t levels, const F &f) {
    for (size_t i = 0; i < *loop; ++i, idx += *stride) {
        if ((levels - 1) == 3) {
            execute_few<F, 3>(idx, loop + 1, stride + 1, f);
        } else {
            execute_many<F>(idx, loop + 1, stride + 1, levels - 1, f);
        }
    }
}

//-----------------------------------------------------------------------------

template <typename F, size_t N> void execute_few(size_t idx1, size_t idx2, const size_t *loop, const size_t *stride1, const size_t *stride2, const F &f) {
    if constexpr (N == 0) {
        f(idx1, idx2);
    } else {
        for (size_t i = 0; i < *loop; ++i, idx1 += *stride1, idx2 += *stride2) {
            execute_few<F, N - 1>(idx1, idx2, loop + 1, stride1 + 1, stride2 + 1, f);
        }
    }
}

template <typename F> void execute_many(size_t idx1, size_t idx2, const size_t *loop, const size_t *stride1, const size_t *stride2, size_t levels, const F &f) {
    for (size_t i = 0; i < *loop; ++i, idx1 += *stride1, idx2 += *stride2) {
        if ((levels - 1) == 3) {
            execute_few<F, 3>(idx1, idx2, loop + 1, stride1 + 1, stride2 + 1, f);
        } else {
            execute_many<F>(idx1, idx2, loop + 1, stride1 + 1, stride2 + 1, levels - 1, f);
        }
    }
}

//-----------------------------------------------------------------------------

template <typename F, size_t N> void execute_few(size_t idx1, size_t idx2, size_t idx3, const size_t *loop, const size_t *stride1, const size_t *stride2, const size_t *stride3, const F &f) {
    if constexpr (N == 0) {
            f(idx1, idx2, idx3);
    } else {
        for (size_t i = 0; i < *loop; ++i, idx1 += *stride1, idx2 += *stride2, idx3 += *stride3) {
            execute_few<F, N - 1>(idx1, idx2, idx3, loop + 1, stride1 + 1, stride2 + 1, stride3 + 1, f);
        }
    }
}

template <typename F> void execute_many(size_t idx1, size_t idx2, size_t idx3, const size_t *loop, const size_t *stride1, const size_t *stride2, const size_t *stride3, size_t levels, const F &f) {
    for (size_t i = 0; i < *loop; ++i, idx1 += *stride1, idx2 += *stride2, idx3 += *stride3) {
        if ((levels - 1) == 3) {
            execute_few<F, 3>(idx1, idx2, idx3, loop + 1, stride1 + 1, stride2 + 1, stride3 + 1, f);
        } else {
            execute_many<F>(idx1, idx2, idx3, loop + 1, stride1 + 1, stride2 + 1, stride3 + 1, levels - 1, f);
        }
    }
}

//-----------------------------------------------------------------------------

} // implementation details

// Run a nested loop and pass indexes to 'f'
template <typename F, typename V>
void run_nested_loop(size_t idx, const V &loop, const V &stride, const F &f) {
    size_t levels = loop.size();
    switch(levels) {
    case 0: return f(idx);
    case 1: return nested_loop::execute_few<F, 1>(idx, &loop[0], &stride[0], f);
    case 2: return nested_loop::execute_few<F, 2>(idx, &loop[0], &stride[0], f);
    case 3: return nested_loop::execute_few<F, 3>(idx, &loop[0], &stride[0], f);
    default: return nested_loop::execute_many<F>(idx, &loop[0], &stride[0], levels, f);
    }
}

// Run two nested loops in parallel and pass both indexes to 'f'. Note
// that 'loop' is shared, which means that only individual strides may
// differ between the two loops.
template <typename F, typename V>
void run_nested_loop(size_t idx1, size_t idx2, const V &loop, const V &stride1, const V &stride2, const F &f) {
    size_t levels = loop.size();
    switch(levels) {
    case 0: return f(idx1, idx2);
    case 1: return nested_loop::execute_few<F, 1>(idx1, idx2, &loop[0], &stride1[0], &stride2[0], f);
    case 2: return nested_loop::execute_few<F, 2>(idx1, idx2, &loop[0], &stride1[0], &stride2[0], f);
    case 3: return nested_loop::execute_few<F, 3>(idx1, idx2, &loop[0], &stride1[0], &stride2[0], f);
    default: return nested_loop::execute_many<F>(idx1, idx2, &loop[0], &stride1[0], &stride2[0], levels, f);
    }
}

// Run three nested loops in parallel and all three indexes to
// 'f'. Note that 'loop' is shared, which means that only individual
// strides may differ between the three loops.
template <typename F, typename V>
void run_nested_loop(size_t idx1, size_t idx2, size_t idx3, const V &loop, const V &stride1, const V &stride2, const V &stride3, const F &f) {
    size_t levels = loop.size();
    switch(levels) {
    case 0: return f(idx1, idx2, idx3);
    case 1: return nested_loop::execute_few<F, 1>(idx1, idx2, idx3, &loop[0], &stride1[0], &stride2[0], &stride3[0], f);
    case 2: return nested_loop::execute_few<F, 2>(idx1, idx2, idx3, &loop[0], &stride1[0], &stride2[0], &stride3[0], f);
    case 3: return nested_loop::execute_few<F, 3>(idx1, idx2, idx3, &loop[0], &stride1[0], &stride2[0], &stride3[0], f);
    default: return nested_loop::execute_many<F>(idx1, idx2, idx3, &loop[0], &stride1[0], &stride2[0], &stride3[0], levels, f);
    }
}

} // namespace