summaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/features/element_completeness_feature.cpp
blob: 6128850bd1f1380507f257209a7a68f4415fcd83 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "element_completeness_feature.h"
#include <vespa/searchlib/fef/itermdata.h>
#include <vespa/searchlib/fef/featurenamebuilder.h>
#include <vespa/searchlib/fef/properties.h>
#include <vespa/vespalib/locale/c.h>
#include <vespa/vespalib/util/stash.h>
#include <cassert>

namespace search::features {

//-----------------------------------------------------------------------------

ElementCompletenessExecutor::ElementCompletenessExecutor(const fef::IQueryEnvironment &env,
                                                         const ElementCompletenessParams &params)
    : _params(params),
      _terms(),
      _queue(),
      _sumTermWeight(0),
      _md(nullptr)
{
    for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
        const fef::ITermData *termData = env.getTerm(i);
        if (termData->getWeight().percent() != 0) { // only consider query terms with contribution
            using FRA = fef::ITermFieldRangeAdapter;
            for (FRA iter(*termData); iter.valid(); iter.next()) {
                const fef::ITermFieldData &tfd = iter.get();
                if (tfd.getFieldId() == _params.fieldId) {
                    int termWeight = termData->getWeight().percent();
                    _sumTermWeight += termWeight;
                    _terms.push_back(Term(tfd.getHandle(), termWeight));
                }
            }
        }
    }
}

void
ElementCompletenessExecutor::execute(uint32_t docId)
{
    assert(_queue.empty());
    for (size_t i = 0; i < _terms.size(); ++i) {
        const fef::TermFieldMatchData *tfmd = _md->resolveTermField(_terms[i].termHandle);
        if (tfmd->getDocId() == docId) {
            Item item(i, tfmd->begin(), tfmd->end());
            if (item.pos != item.end) {
                _queue.push(item);
            }
        }
    }
    State best(0, 0);
    while (!_queue.empty()) {
        uint32_t elementId = _queue.front().pos->getElementId();
        State state(_queue.front().pos->getElementWeight(),
                    _queue.front().pos->getElementLen());
        while (!_queue.empty() && _queue.front().pos->getElementId() == elementId) {
            state.addMatch(_terms[_queue.front().termIdx].termWeight);
            Item &item = _queue.front();
            while (item.pos != item.end && item.pos->getElementId() == elementId) {
                ++item.pos;
            }
            if (item.pos == item.end) {
                _queue.pop_front();
            } else {
                _queue.adjust();
            }
        }
        state.calculateScore(_sumTermWeight, _params.fieldCompletenessImportance);
        if (state.score > best.score) {
            best = state;
        }
    }
    outputs().set_number(0, best.completeness);
    outputs().set_number(1, best.fieldCompleteness);
    outputs().set_number(2, best.queryCompleteness);
    outputs().set_number(3, best.elementWeight);
}

void
ElementCompletenessExecutor::handle_bind_match_data(const fef::MatchData &md)
{
    _md = &md;
}

//-----------------------------------------------------------------------------

ElementCompletenessBlueprint::ElementCompletenessBlueprint()
    : Blueprint("elementCompleteness"),
      _output(),
      _params()
{
    _output.push_back("completeness");
    _output.push_back("fieldCompleteness");
    _output.push_back("queryCompleteness");
    _output.push_back("elementWeight");
}

ElementCompletenessBlueprint::~ElementCompletenessBlueprint() = default;

void
ElementCompletenessBlueprint::visitDumpFeatures(const fef::IIndexEnvironment &env,
                                                fef::IDumpFeatureVisitor &visitor) const
{
    for (uint32_t i = 0; i < env.getNumFields(); ++i) {
        const fef::FieldInfo &field = *env.getField(i);
        if (field.type() == fef::FieldType::INDEX) {
            if (!field.isFilter()) {
                fef::FeatureNameBuilder fnb;
                fnb.baseName(getBaseName()).parameter(field.name());
                for (size_t out = 0; out < _output.size(); ++out) {
                    visitor.visitDumpFeature(fnb.output(_output[out]).buildName());
                }
            }
        }
    }
}

fef::Blueprint::UP
ElementCompletenessBlueprint::createInstance() const
{
    return std::make_unique<ElementCompletenessBlueprint>();
}

bool
ElementCompletenessBlueprint::setup(const fef::IIndexEnvironment &env,
                                    const fef::ParameterList &params)
{
    const fef::FieldInfo *field = params[0].asField();

    _params.fieldId = field->id();
    const fef::Properties &lst = env.getProperties();
    fef::Property obj = lst.lookup(getName(), "fieldCompletenessImportance");
    if (obj.found()) {
        _params.fieldCompletenessImportance = vespalib::locale::c::atof(obj.get().c_str());
    }
    describeOutput(_output[0], "combined completeness for best scored element");
    describeOutput(_output[1], "best scored element completeness");
    describeOutput(_output[2], "query completeness for best scored element");
    describeOutput(_output[3], "element weight of best scored element");
    env.hintFieldAccess(field->id());
    return true;
}

fef::FeatureExecutor &
ElementCompletenessBlueprint::createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const
{
    return stash.create<ElementCompletenessExecutor>(env, _params);
}

//-----------------------------------------------------------------------------

}