searchlib/src/vespa/searchlib/queryeval/flow_tuning.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once
#include <cmath>
#include <cstddef>

namespace search::queryeval::flow {

/**
 * This function is used when calculating the strict cost of
 * intermediate and complex leaf blueprints that use a heap for their strict iterator implementation.
 *
 * Iterator benchmarking has shown the need to increase the strict cost
 * of complex blueprints, to avoid that they are forced strict too early.
 * The 5.0 multiplier reflects this.
 *
 * Program used: searchlib/src/tests/queryeval/iterator_benchmark
 * Tests used: analyze_and_with_filter_vs_*
 */
inline double heap_cost(double my_est, size_t num_children) {
    return 5.0 * my_est * std::log2(std::max(size_t(1), num_children));
}

/**
 * The following constants and formulas were derived after analyzing term search over attributes
 * (with and without fast-search) and disk index by using the iterator benchmark program:
 * searchlib/src/tests/queryeval/iterator_benchmark
 *
 * The following tests were executed on a machine with an Intel Xeon 2.5 GHz CPU with 48 cores and 256 Gb of memory:
 * ./searchlib_iterator_benchmark_test_app --gtest_filter='*analyze_term_search*'
 *
 * TODO: Add details on OR benchmarking.
 *
 * The benchmark summary shows the 'average ms per cost' of the different benchmark cases.
 * The following constants and formulas were derived to balance 'average ms per cost' to be similar
 * across the different benchmark cases.
 */

// Non-strict cost of lookup based matching in an attribute (not fast-search).
inline double lookup_cost(size_t num_indirections) {
    return 1.0 + (num_indirections * 1.0);
}

// Non-strict cost of reverse lookup into a hash table (containing terms from a multi-term operator).
inline double reverse_hash_lookup() {
    return 5.0;
}

// Strict cost of lookup based matching in an attribute (not fast-search).
inline double lookup_strict_cost(size_t num_indirections) {
    return lookup_cost(num_indirections);
}

// Non-strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field).
inline double btree_cost() {
    return 1.0;
}

// Strict cost of matching in a btree posting list (e.g. fast-search attribute or memory index field).
inline double btree_strict_cost(double my_est) {
    return my_est;
}

// Non-strict cost of matching in a bitvector.
inline double bitvector_cost() {
    return 1.0;
}

// Strict cost of matching in a bitvector.
// Test used: IteratorBenchmark::analyze_btree_vs_bitvector_iterators_strict
inline double bitvector_strict_cost(double my_est) {
    return 1.5 * my_est;
}

// Non-strict cost of matching in a disk index posting list.
inline double disk_index_cost() {
    return 1.5;
}

// Strict cost of matching in a disk index posting list.
inline double disk_index_strict_cost(double my_est) {
    return 1.5 * my_est;
}

}