aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h
blob: 9f827fb0086f5bb2b3e9c2c43373e4460d06e124 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include "bitvectorkeyscope.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/index/bitvectorkeys.h>
#include <vespa/searchlib/common/tunefileinfo.h>
#include <vespa/vespalib/stllike/string.h>
#include <vector>

namespace search::diskindex {

/**
 * This dictionary provides a sparse mapping from word number -> BitVector.
 * The dictionary is constructed based on the boolocc idx file and
 * the actual bit vectors are stored in the boolocc dat file.
 **/
class BitVectorDictionary
{
private:
    using WordSingleKey = search::index::BitVectorWordSingleKey;

    uint32_t                              _docIdLimit;
    std::vector<WordSingleKey>            _entries;
    size_t                                _vectorSize;
    std::unique_ptr<FastOS_FileInterface> _datFile;
    uint32_t                              _datHeaderLen;

public:
    using SP = std::shared_ptr<BitVectorDictionary>;
    BitVectorDictionary(const BitVectorDictionary &rhs) = delete;
    BitVectorDictionary &operator=(const BitVectorDictionary &rhs) = delete;
    BitVectorDictionary();
    ~BitVectorDictionary();

    /**
     * Open this dictionary using the following path prefix to where
     * the files are located.  The boolocc idx file is loaded into
     * memory while the dat file is just opened.
     *
     * @param pathPrefix the path prefix to where the boolocc files
     *                   are located.
     * @return true if the files could be opened.
     **/
    bool
    open(const vespalib::string &pathPrefix,
         const TuneFileRandRead &tuneFileRead,
         BitVectorKeyScope scope);

    /**
     * Lookup the given word number and load and return the associated
     * bit vector if found.
     *
     * @param wordNum the word number to lookup a bit vector for.
     * @return the loaded bit vector or nullptr if not found.
     **/
    BitVector::UP lookup(uint64_t wordNum);

    uint32_t getDocIdLimit() const { return _docIdLimit; }

    const std::vector<WordSingleKey> & getEntries() const { return _entries; }
};

}