1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "zcposocciterators.h"
#include "zc4_posting_params.h"
#include <vespa/searchlib/bitcompression/posocc_fields_params.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <cassert>
namespace search::diskindex {
using search::fef::TermFieldMatchDataArray;
using search::bitcompression::PosOccFieldsParams;
using search::index::PostingListCounts;
#define DEBUG_ZCFILTEROCC_PRINTF 0
#define DEBUG_ZCFILTEROCC_ASSERT 0
template <bool bigEndian, bool dynamic_k>
ZcRareWordPosOccIterator<bigEndian, dynamic_k>::
ZcRareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit,
bool decode_normal_features, bool decode_interleaved_features,
bool unpack_normal_features, bool unpack_interleaved_features,
const PosOccFieldsParams *fieldsParams,
TermFieldMatchDataArray matchData)
: ZcRareWordPostingIterator<bigEndian, dynamic_k>(std::move(matchData), start, docIdLimit,
decode_normal_features, decode_interleaved_features,
unpack_normal_features, unpack_interleaved_features),
_decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams)
{
assert(!this->_matchData.valid() || (fieldsParams->getNumFields() == this->_matchData.size()));
_decodeContext = &_decodeContextReal;
}
template <bool bigEndian, bool dynamic_k>
ZcPosOccIterator<bigEndian, dynamic_k>::
ZcPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit,
bool decode_normal_features, bool decode_interleaved_features,
bool unpack_normal_features, bool unpack_interleaved_features,
uint32_t minChunkDocs, const PostingListCounts &counts,
const PosOccFieldsParams *fieldsParams,
TermFieldMatchDataArray matchData)
: ZcPostingIterator<bigEndian>(minChunkDocs, dynamic_k, counts, std::move(matchData), start, docIdLimit,
decode_normal_features, decode_interleaved_features,
unpack_normal_features, unpack_interleaved_features),
_decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams)
{
assert(!this->_matchData.valid() || (fieldsParams->getNumFields() == this->_matchData.size()));
_decodeContext = &_decodeContextReal;
}
template <bool bigEndian>
std::unique_ptr<search::queryeval::SearchIterator>
create_zc_posocc_iterator(const PostingListCounts &counts, bitcompression::Position start, uint64_t bit_length,
const Zc4PostingParams &posting_params, const bitcompression::PosOccFieldsParams &fields_params,
fef::TermFieldMatchDataArray match_data, bool unpack_normal_features, bool unpack_interleaved_features)
{
using EC = bitcompression::EncodeContext64<bigEndian>;
bitcompression::DecodeContext64<bigEndian> d(start.getOccurences(), start.getBitOffset());
UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
uint32_t length;
uint64_t val64;
UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
uint32_t num_docs = static_cast<uint32_t>(val64) + 1;
assert((num_docs == counts._numDocs) || ((num_docs == posting_params._min_chunk_docs) && (num_docs < counts._numDocs)));
if (num_docs < posting_params._min_skip_docs) {
if (posting_params._dynamic_k) {
return std::make_unique<ZcRareWordPosOccIterator<bigEndian, true>>(start, bit_length, posting_params._doc_id_limit,
posting_params._encode_features, posting_params._encode_interleaved_features, unpack_normal_features,
unpack_interleaved_features, &fields_params, std::move(match_data));
} else {
return std::make_unique<ZcRareWordPosOccIterator<bigEndian, false>>(start, bit_length, posting_params._doc_id_limit,
posting_params._encode_features, posting_params._encode_interleaved_features, unpack_normal_features,
unpack_interleaved_features, &fields_params, std::move(match_data));
}
} else {
if (posting_params._dynamic_k) {
return std::make_unique<ZcPosOccIterator<bigEndian, true>>(start, bit_length, posting_params._doc_id_limit,
posting_params._encode_features, posting_params._encode_interleaved_features, unpack_normal_features,
unpack_interleaved_features, posting_params._min_chunk_docs, counts, &fields_params, std::move(match_data));
} else {
return std::make_unique<ZcPosOccIterator<bigEndian, false>>(start, bit_length, posting_params._doc_id_limit,
posting_params._encode_features, posting_params._encode_interleaved_features, unpack_normal_features,
unpack_interleaved_features, posting_params._min_chunk_docs, counts, &fields_params, std::move(match_data));
}
}
}
std::unique_ptr<search::queryeval::SearchIterator>
create_zc_posocc_iterator(bool bigEndian, const PostingListCounts &counts, bitcompression::Position start, uint64_t bit_length,
const Zc4PostingParams &posting_params, const bitcompression::PosOccFieldsParams &fields_params,
fef::TermFieldMatchDataArray match_data)
{
bool unpack_normal_features = match_data.valid() ? match_data[0]->needs_normal_features() : false;
bool unpack_interleaved_features = match_data.valid() ? match_data[0]->needs_interleaved_features() : false;
if (bigEndian) {
return create_zc_posocc_iterator<true>(counts, start, bit_length, posting_params, fields_params, std::move(match_data), unpack_normal_features, unpack_interleaved_features);
} else {
return create_zc_posocc_iterator<false>(counts, start, bit_length, posting_params, fields_params, std::move(match_data), unpack_normal_features, unpack_interleaved_features);
}
}
template class ZcRareWordPosOccIterator<false, false>;
template class ZcRareWordPosOccIterator<false, true>;
template class ZcRareWordPosOccIterator<true, false>;
template class ZcRareWordPosOccIterator<true, true>;
template class ZcPosOccIterator<false, false>;
template class ZcPosOccIterator<false, true>;
template class ZcPosOccIterator<true, false>;
template class ZcPosOccIterator<true, true>;
}
|