1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "termmatchdatamerger.h"
#include <algorithm>
namespace search::fef {
TermMatchDataMerger::TermMatchDataMerger(const Inputs &allinputs,
TermFieldMatchDataArray outputs)
: _inputs(),
_output(std::move(outputs)),
_scratch()
{
for (size_t i = 0; i < _output.size(); ++i) {
Inputs inputs_for_i;
uint32_t fieldId = _output[i]->getFieldId();
for (size_t j = 0; j < allinputs.size(); ++j) {
if (allinputs[j].matchData->getFieldId() == fieldId) {
inputs_for_i.push_back(allinputs[j]);
}
}
_inputs.push_back(inputs_for_i);
}
}
TermMatchDataMerger::~TermMatchDataMerger() = default;
void
TermMatchDataMerger::merge(uint32_t docid)
{
for (size_t i = 0; i < _output.size(); ++i) {
merge(docid, _inputs[i], *(_output[i]));
}
}
void
TermMatchDataMerger::merge(uint32_t docid,
const Inputs &in,
TermFieldMatchData &out)
{
_scratch.clear();
bool wasMatch = false;
bool needs_normal_features = out.needs_normal_features();
bool needs_interleaved_features = out.needs_interleaved_features();
uint32_t num_occs = 0u;
uint16_t field_length = 0u;
for (size_t i = 0; i < in.size(); ++i) {
const TermFieldMatchData *md = in[i].matchData;
if (md->getDocId() == docid) {
if (needs_normal_features) {
for (const TermFieldMatchDataPosition &iter : *md) {
double exactness = in[i].exactness * iter.getMatchExactness();
_scratch.push_back(iter);
_scratch.back().setMatchExactness(exactness);
}
}
if (needs_interleaved_features) {
num_occs += md->getNumOccs();
field_length = std::max(field_length, md->getFieldLength());
}
wasMatch = true;
}
}
if (wasMatch) {
out.reset(docid);
if (needs_normal_features) {
num_occs = 0;
if (_scratch.size() > 0) {
std::sort(_scratch.begin(), _scratch.end(),
TermFieldMatchDataPosition::compareWithExactness);
TermFieldMatchDataPosition prev = _scratch[0];
for (size_t i = 1; i < _scratch.size(); ++i) {
const TermFieldMatchDataPosition &curr = _scratch[i];
if (prev.key() < curr.key()) {
out.appendPosition(prev);
prev = curr;
++num_occs;
}
}
out.appendPosition(prev);
++num_occs;
}
}
if (needs_interleaved_features) {
constexpr uint32_t max_num_occs = std::numeric_limits<uint16_t>::max();
uint16_t capped_num_occs = std::min(num_occs, max_num_occs);
out.setNumOccs(std::min(capped_num_occs, field_length));
out.setFieldLength(field_length);
}
}
}
}
|