1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "euclidean_distance_feature.h"
#include "valuefeature.h"
#include "array_parser.hpp"
#include <vespa/searchlib/attribute/integerbase.h>
#include <vespa/searchlib/fef/properties.h>
#include <vespa/searchcommon/attribute/attributecontent.h>
#include <vespa/vespalib/util/issue.h>
#include <vespa/vespalib/util/stash.h>
#include <cmath>
#include <vespa/log/log.h>
LOG_SETUP(".features.euclidean_distance_feature");
using namespace search::attribute;
using namespace search::fef;
using vespalib::Issue;
namespace search::features {
template <typename DataType>
EuclideanDistanceExecutor<DataType>::EuclideanDistanceExecutor(const search::attribute::IAttributeVector &attribute, QueryVectorType vector) :
FeatureExecutor(),
_attribute(attribute),
_vector(std::move(vector)),
_attributeBuffer()
{
}
template <typename DataType>
feature_t EuclideanDistanceExecutor<DataType>::euclideanDistance(const BufferType &v1, const QueryVectorType &v2)
{
feature_t val = 0;
size_t commonRange = std::min(static_cast<size_t>( v1.size() ), v2.size());
for (size_t i = 0; i < commonRange; ++i) {
feature_t diff = v1[i] - v2[i];
val += diff * diff;
}
return std::sqrt(val);
}
template <typename DataType>
void
EuclideanDistanceExecutor<DataType>::execute(uint32_t docId)
{
_attributeBuffer.fill(_attribute, docId);
outputs().set_number(0, euclideanDistance(_attributeBuffer, _vector));
}
EuclideanDistanceBlueprint::EuclideanDistanceBlueprint() :
Blueprint("euclideanDistance"),
_attributeName(),
_queryVector()
{
}
EuclideanDistanceBlueprint::~EuclideanDistanceBlueprint() = default;
void
EuclideanDistanceBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const
{
}
bool
EuclideanDistanceBlueprint::setup(const IIndexEnvironment &env, const ParameterList ¶ms)
{
_attributeName = params[0].getValue();
_queryVector = params[1].getValue();
describeOutput("distance", "The result after calculating the euclidean distance of the vector represented by the array "
"and the vector sent down with the query");
env.hintAttributeAccess(_attributeName);
return true;
}
Blueprint::UP
EuclideanDistanceBlueprint::createInstance() const
{
return std::make_unique<EuclideanDistanceBlueprint>();
}
namespace {
template <typename DataType>
FeatureExecutor & create(const IAttributeVector &attribute, const Property &queryVector, vespalib::Stash &stash)
{
std::vector<DataType> v;
ArrayParser::parse(queryVector.get(), v);
return stash.create<EuclideanDistanceExecutor<DataType>>(attribute, std::move(v));
}
}
FeatureExecutor &
EuclideanDistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const
{
const IAttributeVector * attribute = env.getAttributeContext().getAttribute(_attributeName);
if (attribute == nullptr) {
Issue::report("euclidean_distance feature: The attribute vector '%s' was not found, returning default value.",
_attributeName.c_str());
return stash.create<SingleZeroValueExecutor>();
}
Property queryVector = env.getProperties().lookup(getBaseName(), _queryVector);
if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) {
if (attribute->isIntegerType()) {
return create<IAttributeVector::largeint_t>(*attribute, queryVector, stash);
} else if (attribute->isFloatingPointType()) {
return create<double>(*attribute, queryVector, stash);
}
}
Issue::report("euclidean_distance feature: The attribute vector '%s' is NOT of type array<int/long/float/double>"
", returning default value.", attribute->getName().c_str());
return stash.create<SingleZeroValueExecutor>();
}
}
|