1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "termdistancecalculator.h"
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/fef/itermdata.h>
using namespace search::fef;
namespace search {
namespace features {
const uint32_t TermDistanceCalculator::UNDEFINED_VALUE(1000000);
void
TermDistanceCalculator::run(const QueryTerm &termX, const QueryTerm &termY,
const MatchData & match, uint32_t docId, Result & r)
{
const TermFieldMatchData *tmdX = match.resolveTermField(termX.fieldHandle());
const TermFieldMatchData *tmdY = match.resolveTermField(termY.fieldHandle());
if (tmdX->getDocId() != docId || tmdY->getDocId() != docId) {
return;
}
findBest(tmdX, tmdY, termX.termData()->getPhraseLength(), r.forwardDist, r.forwardTermPos);
findBest(tmdY, tmdX, termY.termData()->getPhraseLength(), r.reverseDist, r.reverseTermPos);
}
void
TermDistanceCalculator::findBest(const TermFieldMatchData *tmdX,
const TermFieldMatchData *tmdY,
uint32_t numTermsX,
uint32_t & bestDist,
uint32_t & bestPos)
{
search::fef::TermFieldMatchData::PositionsIterator itA, itB, epA, epB;
itA = tmdX->begin();
epA = tmdX->end();
itB = tmdY->begin();
epB = tmdY->end();
uint32_t addA = numTermsX - 1;
while (itB != epB) {
uint32_t eid = itB->getElementId();
while (itA != epA && itA->getElementId() < eid) {
++itA;
}
if (itA != epA && itA->getElementId() == eid) {
// there is a pair somewhere here
while (itA != epA &&
itB != epB &&
itA->getElementId() == eid &&
itB->getElementId() == eid)
{
uint32_t a = itA->getPosition();
uint32_t b = itB->getPosition();
if (a < b) {
if (b - a < bestDist + addA) {
bestDist = b - (a + addA);
bestPos = a;
}
itA++;
} else {
itB++;
}
}
} else {
++itB;
}
}
}
} // namespace features
} // namespace search
|