1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "term_field_md_feature.h"
#include "utils.h"
#include <vespa/searchlib/fef/indexproperties.h>
#include <vespa/searchlib/fef/itablemanager.h>
#include <vespa/searchlib/fef/properties.h>
#include <vespa/vespalib/util/stash.h>
#include <cassert>
using namespace search::fef;
namespace search::features {
TermFieldMdExecutor::TermFieldMdExecutor(const fef::IQueryEnvironment &env, uint32_t fieldId)
: _terms(),
_md(nullptr)
{
for (uint32_t i = 0; i < env.getNumTerms(); ++i) {
const fef::ITermData *td = env.getTerm(i);
assert(td != 0);
const fef::ITermFieldData *tfd = td->lookupField(fieldId);
if (tfd != 0) {
assert(tfd->getHandle() != fef::IllegalHandle);
_terms.push_back(std::make_pair(tfd->getHandle(), td->getWeight()));
}
}
}
void
TermFieldMdExecutor::execute(uint32_t docId)
{
uint32_t termsmatched = 0;
uint32_t occs = 0;
feature_t score = 0;
feature_t weight = 0;
feature_t maxTermWeight = 0;
for (size_t i = 0; i < _terms.size(); ++i) {
const TermFieldMatchData &tfmd = *_md->resolveTermField(_terms[i].first);
int32_t termWeight = _terms[i].second.percent();
if (tfmd.getDocId() == docId) {
++termsmatched;
score += tfmd.getWeight();
occs += (tfmd.end() - tfmd.begin());
if (weight == 0) {
weight = tfmd.getWeight();
}
if (termWeight > maxTermWeight) {
maxTermWeight = termWeight;
}
}
}
outputs().set_number(0, score);
outputs().set_number(1, _terms.size());
outputs().set_number(2, (termsmatched > 0 ? 1.0 : 0.0));
outputs().set_number(3, termsmatched);
outputs().set_number(4, weight);
outputs().set_number(5, occs);
outputs().set_number(6, maxTermWeight);
}
void
TermFieldMdExecutor::handle_bind_match_data(const MatchData &md)
{
_md = &md;
}
TermFieldMdBlueprint::TermFieldMdBlueprint() :
Blueprint("termFieldMd"),
_field(0)
{
}
void
TermFieldMdBlueprint::visitDumpFeatures(const IIndexEnvironment &,
IDumpFeatureVisitor &) const
{
}
Blueprint::UP
TermFieldMdBlueprint::createInstance() const
{
return std::make_unique<TermFieldMdBlueprint>();
}
bool
TermFieldMdBlueprint::setup(const IIndexEnvironment &,
const ParameterList & params)
{
_field = params[0].asField();
assert(_field != 0);
describeOutput("score", "The term field match score");
describeOutput("terms", "The number of ranked terms searching this field");
describeOutput("match", "1.0 if some ranked term matched this field, 0.0 otherwise");
describeOutput("termsmatched", "The number of ranked terms matching this field");
describeOutput("firstweight", "The first element weight seen");
describeOutput("occurrences", "The sum of occurrences (positions) in the match data");
describeOutput("maxTermWeight", "The max term weight among ranked terms matching this field");
return true;
}
FeatureExecutor &
TermFieldMdBlueprint::createExecutor(const IQueryEnvironment & env, vespalib::Stash &stash) const
{
return stash.create<TermFieldMdExecutor>(env, _field->id());
}
}
|