1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
#include <vector>
#include <vespa/searchlib/bitcompression/compression.h>
#include <vespa/searchlib/bitcompression/posocccompression.h>
#include <vespa/searchlib/bitcompression/posocc_fields_params.h>
#include <vespa/searchcommon/common/schema.h>
namespace vespalib { class Rand48; }
namespace search::fakedata {
class FakeWord;
/**
* Contains lists of fake words for 3 word classes categorized based on number of occurrences.
*/
class FakeWordSet {
public:
using PosOccFieldsParams = bitcompression::PosOccFieldsParams;
using Schema = index::Schema;
using FakeWordPtr = std::unique_ptr<FakeWord>;
using FakeWordVector = std::vector<FakeWordPtr>;
enum {
COMMON_WORD,
MEDIUM_WORD,
RARE_WORD,
NUM_WORDCLASSES,
};
private:
std::vector<FakeWordVector> _words;
Schema _schema;
std::vector<PosOccFieldsParams> _fieldsParams;
uint32_t _numDocs;
public:
FakeWordSet();
FakeWordSet(bool hasElements,
bool hasElementWeights);
~FakeWordSet();
void setupParams(bool hasElements,
bool hasElementWeights);
void setupWords(vespalib::Rand48 &rnd,
uint32_t numDocs,
uint32_t commonDocFreq,
uint32_t numWordsPerWordClass);
void setupWords(vespalib::Rand48 &rnd,
uint32_t numDocs,
uint32_t commonDocFreq,
uint32_t mediumDocFreq,
uint32_t rareDocFreq,
uint32_t numWordsPerWordClass);
const std::vector<FakeWordVector>& words() const { return _words; }
int getNumWords() const;
const PosOccFieldsParams& getFieldsParams() const {
return _fieldsParams.back();
}
uint32_t getPackedIndex() const {
return _fieldsParams.size() - 1;
}
const std::vector<PosOccFieldsParams>& getAllFieldsParams() const {
return _fieldsParams;
}
const Schema& getSchema() const {
return _schema;
}
uint32_t numDocs() const { return _numDocs; }
void addDocIdBias(uint32_t docIdBias);
};
}
|