aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/features/termdistancecalculator.cpp
blob: c210c3c9085f7f08dd50bb830e04d859a7ae3803 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "termdistancecalculator.h"
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/fef/itermdata.h>

using namespace search::fef;

namespace search {
namespace features {

const uint32_t TermDistanceCalculator::UNDEFINED_VALUE(1000000);


void
TermDistanceCalculator::run(const QueryTerm &termX, const QueryTerm &termY,
                            const MatchData & match, uint32_t docId, Result & r)
{
    const TermFieldMatchData *tmdX = match.resolveTermField(termX.fieldHandle());
    const TermFieldMatchData *tmdY = match.resolveTermField(termY.fieldHandle());
    if (tmdX->getDocId() != docId || tmdY->getDocId() != docId) {
        return;
    }
    findBest(tmdX, tmdY, termX.termData()->getPhraseLength(), r.forwardDist, r.forwardTermPos);
    findBest(tmdY, tmdX, termY.termData()->getPhraseLength(), r.reverseDist, r.reverseTermPos);
}


void
TermDistanceCalculator::findBest(const TermFieldMatchData *tmdX,
                                 const TermFieldMatchData *tmdY,
                                 uint32_t numTermsX,
                                 uint32_t & bestDist,
                                 uint32_t & bestPos)
{
    search::fef::TermFieldMatchData::PositionsIterator itA, itB, epA, epB;
    itA = tmdX->begin();
    epA = tmdX->end();

    itB = tmdY->begin();
    epB = tmdY->end();

    uint32_t addA = numTermsX - 1;

    while (itB != epB) {
        uint32_t eid = itB->getElementId();
        while (itA != epA && itA->getElementId() < eid) {
            ++itA;
        }
        if (itA != epA && itA->getElementId() == eid) {
            // there is a pair somewhere here
            while (itA != epA &&
                   itB != epB &&
                   itA->getElementId() == eid &&
                   itB->getElementId() == eid)
            {
                uint32_t a = itA->getPosition();
                uint32_t b = itB->getPosition();
                if (a < b) {
                    if (b - a < bestDist + addA) {
                        bestDist = b - (a + addA);
                        bestPos = a;
                    }
                    itA++;
                } else {
                    itB++;
                }
            }
        } else {
            ++itB;
        }
    }

}


} // namespace features
} // namespace search