blob: 10f07d015f3b983a8859009f4712a84ed3ef0db5 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
#include "bitvectorkeyscope.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/index/bitvectorkeys.h>
#include <vespa/searchlib/common/tunefileinfo.h>
#include <vespa/vespalib/stllike/string.h>
#include <vector>
namespace search::diskindex {
/**
* This dictionary provides a sparse mapping from word number -> BitVector.
* The dictionary is constructed based on the boolocc idx file and
* the actual bit vectors are stored in the boolocc dat file.
**/
class BitVectorDictionary
{
private:
using WordSingleKey = search::index::BitVectorWordSingleKey;
uint32_t _docIdLimit;
std::vector<WordSingleKey> _entries;
size_t _vectorSize;
std::unique_ptr<FastOS_FileInterface> _datFile;
uint32_t _datHeaderLen;
public:
using SP = std::shared_ptr<BitVectorDictionary>;
BitVectorDictionary(const BitVectorDictionary &rhs) = delete;
BitVectorDictionary &operator=(const BitVectorDictionary &rhs) = delete;
BitVectorDictionary();
~BitVectorDictionary();
/**
* Open this dictionary using the following path prefix to where
* the files are located. The boolocc idx file is loaded into
* memory while the dat file is just opened.
*
* @param pathPrefix the path prefix to where the boolocc files
* are located.
* @return true if the files could be opened.
**/
bool
open(const vespalib::string &pathPrefix,
const TuneFileRandRead &tuneFileRead,
BitVectorKeyScope scope);
/**
* Lookup the given word number and load and return the associated
* bit vector if found.
*
* @param wordNum the word number to lookup a bit vector for.
* @return the loaded bit vector or nullptr if not found.
**/
BitVector::UP lookup(uint64_t wordNum);
uint32_t getDocIdLimit() const { return _docIdLimit; }
const std::vector<WordSingleKey> & getEntries() const { return _entries; }
};
}
|