aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/apps/docstore/benchmarkdatastore.cpp
blob: cf2e7f7356dee9172fd6127f25b82f23b68cb1bb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include <vespa/searchlib/docstore/logdatastore.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/transactionlog/nosyncproxy.h>
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/util/lambdatask.h>
#include <vespa/vespalib/util/size_literals.h>
#include <vespa/vespalib/util/threadstackexecutor.h>
#include <vespa/vespalib/util/signalhandler.h>
#include <unistd.h>
#include <random>

#include <vespa/log/log.h>
LOG_SETUP("documentstore.benchmark");

using namespace search;

class BenchmarkDataStoreApp
{
    void usage(const char *self);
    int benchmark(const vespalib::string & directory, size_t numReads, size_t numThreads, size_t perChunk, const vespalib::string & readType);
    void read(size_t numReads, size_t perChunk, const IDataStore * dataStore);
public:
    int main(int argc, char **argv);
};



void
BenchmarkDataStoreApp::usage(const char *self)
{
    printf("Usage: %s <direcory> <numreads> <numthreads> <objects per read> <normal,directio,mmap>\n", self);
    fflush(stdout);
}

int
BenchmarkDataStoreApp::main(int argc, char **argv)
{
    if (argc >= 2) {
        size_t numThreads(16);
        size_t numReads(1000000);
        size_t perChunk(1);
        vespalib::string readType("directio");
        vespalib::string directory(argv[1]);
        if (argc >= 3) {
            numReads = strtoul(argv[2], NULL, 0);
            if (argc >= 4) {
                numThreads = strtoul(argv[3], NULL, 0);
                if (argc >= 5) {
                    perChunk = strtoul(argv[4], NULL, 0);
                    if (argc >= 5) {
                        readType = argv[5];
                    }
                }
            }
        }
        return benchmark(directory, numReads, numThreads, perChunk, readType);
    } else {
        fprintf(stderr, "Too few arguments\n");
        usage(argv[0]);
        return 1;
    }
    return 0;
}

void BenchmarkDataStoreApp::read(size_t numReads, size_t perChunk, const IDataStore * dataStore)
{
    vespalib::DataBuffer buf;
    std::minstd_rand rng;
    const size_t docIdLimit(dataStore->getDocIdLimit());
    assert(docIdLimit > 0);
    rng.seed(getpid());
    int32_t rnd(0);
    for ( size_t i(0); i < numReads; i++) {
        rnd = rng();
        uint32_t lid(rnd%docIdLimit);
        for (uint32_t j(lid); j < std::min(docIdLimit, lid+perChunk); j++) {
            dataStore->read(j, buf);
            buf.clear();
        }
    }
}

int
BenchmarkDataStoreApp::benchmark(const vespalib::string & dir, size_t numReads, size_t numThreads, size_t perChunk, const vespalib::string & readType)
{
    int retval(0);
    LogDataStore::Config config;
    GrowStrategy growStrategy;
    TuneFileSummary tuning;
    if (readType == "directio") {
        tuning._randRead.setWantDirectIO();
    } else if (readType == "normal") {
        tuning._randRead.setWantNormal();
    } else if (readType == "mmap") {
        tuning._randRead.setWantMemoryMap();
    }
    search::index::DummyFileHeaderContext fileHeaderContext;
    vespalib::ThreadStackExecutor executor(1);
    transactionlog::NoSyncProxy noTlSyncer;
    LogDataStore store(executor, dir, config, growStrategy, tuning,
                       fileHeaderContext,
                       noTlSyncer, NULL, true);
    vespalib::ThreadStackExecutor bmPool(numThreads);
    LOG(info, "Start read benchmark with %lu threads doing %lu reads in chunks of %lu reads. Totally %lu objects", numThreads, numReads, perChunk, numThreads * numReads * perChunk);
    for (size_t i(0); i < numThreads; i++) {
        bmPool.execute(vespalib::makeLambdaTask([&]() { read(numReads, perChunk, static_cast<const IDataStore *>(&store)); }));
    }
    bmPool.sync();
    LOG(info, "Benchmark done.");
    return retval;
}

int main(int argc, char **argv) {
    vespalib::SignalHandler::PIPE.ignore();
    BenchmarkDataStoreApp app;
    return app.main(argc, argv);
}