config-model/src/main/java/com/yahoo/vespa/model/search/NodeResourcesTuning.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.search;

import com.yahoo.config.provision.NodeResources;
import com.yahoo.vespa.config.search.core.ProtonConfig;
import com.yahoo.vespa.model.Host;
import com.yahoo.vespa.model.content.Redundancy;

import static java.lang.Long.min;
import static java.lang.Long.max;

/**
 * Tuning of proton config for a search node based on the resources on the node.
 *
 * @author geirst
 */
public class NodeResourcesTuning implements ProtonConfig.Producer {

    private final static double SUMMARY_FILE_SIZE_AS_FRACTION_OF_MEMORY = 0.02;
    private final static double SUMMARY_CACHE_SIZE_AS_FRACTION_OF_MEMORY = 0.04;
    private final static double MEMORY_GAIN_AS_FRACTION_OF_MEMORY = 0.08;
    private final static double MIN_MEMORY_PER_FLUSH_THREAD_GB = 11.0;
    private final static double TLS_SIZE_FRACTION = 0.02;
    final static long MB = 1024 * 1024;
    public final static long GB = MB * 1024;
    private final NodeResources resources;
    private final int threadsPerSearch;
    private final double fractionOfMemoryReserved;

    public NodeResourcesTuning(NodeResources resources,
                               int threadsPerSearch,
                               double fractionOfMemoryReserved) {
        this.resources = resources;
        this.threadsPerSearch = threadsPerSearch;
        this.fractionOfMemoryReserved = fractionOfMemoryReserved;
    }

    @Override
    public void getConfig(ProtonConfig.Builder builder) {
        setHwInfo(builder);
        tuneDiskWriteSpeed(builder);
        tuneRequestThreads(builder);
        tuneDocumentStoreMaxFileSize(builder.summary.log);
        tuneFlushStrategyMemoryLimits(builder.flush.memory);
        tuneFlushStrategyTlsSize(builder.flush.memory);
        tuneFlushConcurrentThreads(builder.flush);
        tuneSummaryReadIo(builder.summary.read);
        tuneSummaryCache(builder.summary.cache);
        tuneSearchReadIo(builder.search.mmap);
    }

    private void tuneSummaryCache(ProtonConfig.Summary.Cache.Builder builder) {
        long memoryLimitBytes = (long) ((usableMemoryGb() * SUMMARY_CACHE_SIZE_AS_FRACTION_OF_MEMORY) * GB);
        builder.maxbytes(memoryLimitBytes);
    }

    private void setHwInfo(ProtonConfig.Builder builder) {
        builder.hwinfo.disk.shared(true);
        builder.hwinfo.cpu.cores((int)resources.vcpu());
        builder.hwinfo.memory.size((long)(usableMemoryGb() * GB));
        builder.hwinfo.disk.size((long)(resources.diskGb() * GB));
    }

    private void tuneDiskWriteSpeed(ProtonConfig.Builder builder) {
        if (resources.diskSpeed() != NodeResources.DiskSpeed.fast) {
            builder.hwinfo.disk.writespeed(40);
        }
    }

    private void tuneDocumentStoreMaxFileSize(ProtonConfig.Summary.Log.Builder builder) {
        long fileSizeBytes = (long) Math.max(256*MB, usableMemoryGb()*GB*SUMMARY_FILE_SIZE_AS_FRACTION_OF_MEMORY);
        builder.maxfilesize(fileSizeBytes);
    }

    private void tuneFlushStrategyMemoryLimits(ProtonConfig.Flush.Memory.Builder builder) {
        long memoryLimitBytes = (long) ((usableMemoryGb() * MEMORY_GAIN_AS_FRACTION_OF_MEMORY) * GB);
        builder.maxmemory(memoryLimitBytes);
        builder.each.maxmemory(memoryLimitBytes);
    }

    private void tuneFlushConcurrentThreads(ProtonConfig.Flush.Builder builder) {
        int max_concurrent = 2; // TODO bring slowly up towards 4
        if (usableMemoryGb() < MIN_MEMORY_PER_FLUSH_THREAD_GB) {
            max_concurrent = 1;
        }
        double min_concurrent_mem = usableMemoryGb() / MIN_MEMORY_PER_FLUSH_THREAD_GB;
        builder.maxconcurrent(Math.min(max_concurrent, (int)Math.ceil(min_concurrent_mem)));
    }

    private void tuneFlushStrategyTlsSize(ProtonConfig.Flush.Memory.Builder builder) {
        long tlsSizeBytes = (long) ((resources.diskGb() * TLS_SIZE_FRACTION) * GB);
        tlsSizeBytes = max(2*GB, min(tlsSizeBytes, 100 * GB));
        builder.maxtlssize(tlsSizeBytes);
    }

    private void tuneSummaryReadIo(ProtonConfig.Summary.Read.Builder builder) {
        if (resources.diskSpeed() == NodeResources.DiskSpeed.fast) {
            builder.io(ProtonConfig.Summary.Read.Io.DIRECTIO);
        }
    }

    private void tuneSearchReadIo(ProtonConfig.Search.Mmap.Builder builder) {
        if (resources.diskSpeed() == NodeResources.DiskSpeed.fast) {
            builder.advise(ProtonConfig.Search.Mmap.Advise.RANDOM);
        }
    }

    private void tuneRequestThreads(ProtonConfig.Builder builder) {
        int numCores = (int)Math.ceil(resources.vcpu());
        builder.numsearcherthreads(Math.min(((numCores*4 + threadsPerSearch - 1)/threadsPerSearch)*threadsPerSearch, numCores*threadsPerSearch));
        builder.numsummarythreads(numCores);
        builder.numthreadspersearch(threadsPerSearch);
    }

    /** Returns the memory we can expect will be available for the content node processes */
    private double usableMemoryGb() {
        double usableMemoryGb = resources.memoryGb() - Host.memoryOverheadGb;
        return usableMemoryGb * (1 - fractionOfMemoryReserved);
    }

}