aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/features/termfeature.cpp
blob: e84cf6a9a1ef5c7eea65d191ea7806d0dfe151e1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "termfeature.h"
#include "utils.h"
#include <vespa/searchlib/fef/featurenamebuilder.h>
#include <vespa/searchlib/fef/fieldinfo.h>
#include <vespa/searchlib/fef/properties.h>
#include <vespa/searchlib/fef/itermdata.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/vespalib/util/stash.h>

using namespace search::fef;

namespace search::features {

TermExecutor::TermExecutor(const search::fef::IQueryEnvironment &env,
                           uint32_t termId) :
    search::fef::FeatureExecutor(),
    _termData(env.getTerm(termId)),
    _connectedness(util::lookupConnectedness(env, termId)),
    _significance(0)
{
    if (_termData != nullptr) {
        feature_t fallback = util::getSignificance(*_termData);
        _significance = util::lookupSignificance(env, termId, fallback);
    }
}

void
TermExecutor::execute(uint32_t)
{
    if (_termData == nullptr) { // this query term is not present in the query
        outputs().set_number(0, 0.0f); // connectedness
        outputs().set_number(1, 0.0f); // significance (1 - frequency)
        outputs().set_number(2, 0.0f); // weight
        return;
    }
    outputs().set_number(0, _connectedness);
    outputs().set_number(1, _significance);
    outputs().set_number(2, (feature_t)_termData->getWeight().percent());
}

TermBlueprint::TermBlueprint() :
    search::fef::Blueprint("term"),
    _termId(0)
{
    // empty
}

void
TermBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env,
                                 search::fef::IDumpFeatureVisitor &visitor) const
{
    int numTerms = atoi(env.getProperties().lookup(getBaseName(), "numTerms").get("5").c_str());
    for (int term = 0; term < numTerms; ++term) {
        search::fef::FeatureNameBuilder fnb;
        fnb.baseName(getBaseName()).parameter(vespalib::make_string("%d", term));
        visitor.visitDumpFeature(fnb.output("connectedness").buildName());
        visitor.visitDumpFeature(fnb.output("significance").buildName());
        visitor.visitDumpFeature(fnb.output("weight").buildName());
    }
}

bool
TermBlueprint::setup(const search::fef::IIndexEnvironment &,
                     const search::fef::ParameterList &params)
{
    _termId = params[0].asInteger();
    describeOutput("connectedness", "The normalized strength with which this term is connected to the next term in the query.");
    describeOutput("significance",  "1 - the normalized frequency of documents containing this query term.");
    describeOutput("weight",        "The normalized importance of matching this query term.");
    return true;
}

search::fef::Blueprint::UP
TermBlueprint::createInstance() const
{
    return std::make_unique<TermBlueprint>();
}

search::fef::FeatureExecutor &
TermBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const
{
    return stash.create<TermExecutor>(env, _termId);
}

}