summaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp
blob: 583e910a593acff5576c17fc9d7efce8ba628331 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "tensor_from_weighted_set_feature.h"

#include "constant_tensor_executor.h"
#include "utils.h"
#include "tensor_from_attribute_executor.h"
#include "weighted_set_parser.hpp"

#include <vespa/searchlib/fef/properties.h>
#include <vespa/searchlib/fef/feature_type.h>
#include <vespa/searchcommon/attribute/attributecontent.h>
#include <vespa/searchcommon/attribute/iattributevector.h>
#include <vespa/eval/eval/function.h>
#include <vespa/eval/tensor/tensor.h>
#include <vespa/eval/tensor/default_tensor.h>
#include <vespa/eval/eval/value_type.h>

#include <vespa/log/log.h>
LOG_SETUP(".features.tensor_from_weighted_set_feature");

using namespace search::fef;
using search::attribute::IAttributeVector;
using search::attribute::WeightedConstCharContent;
using search::attribute::WeightedStringContent;
using vespalib::tensor::DefaultTensor;
using vespalib::tensor::TensorBuilder;
using vespalib::eval::ValueType;
using search::fef::FeatureType;

namespace search {
namespace features {

namespace {

struct WeightedStringVector
{
    std::vector<IAttributeVector::WeightedString> _data;
    void insert(const vespalib::stringref &key, const vespalib::stringref &weight) {
        _data.emplace_back(key, util::strToNum<int32_t>(weight));
    }
};

}

TensorFromWeightedSetBlueprint::TensorFromWeightedSetBlueprint()
    : TensorFactoryBlueprint("tensorFromWeightedSet")
{
}

bool
TensorFromWeightedSetBlueprint::setup(const search::fef::IIndexEnvironment &env,
                                      const search::fef::ParameterList &params)
{
    (void) env;
    // _params[0] = source ('attribute(name)' OR 'query(param)');
    // _params[1] = dimension (optional);
    bool validSource = extractSource(params[0].getValue());
    if (params.size() == 2) {
        _dimension = params[1].getValue();
    } else {
        _dimension = _sourceParam;
    }
    describeOutput("tensor",
                   "The tensor created from the given weighted set source (attribute field or query parameter)",
                   FeatureType::object(ValueType::tensor_type({{_dimension}})));
    return validSource;
}

namespace {

FeatureExecutor &
createAttributeExecutor(const search::fef::IQueryEnvironment &env,
                        const vespalib::string &attrName,
                        const vespalib::string &dimension,
                        vespalib::Stash &stash)
{
    const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName);
    if (attribute == NULL) {
        LOG(warning, "The attribute vector '%s' was not found in the attribute manager."
                " Returning empty tensor.", attrName.c_str());
        return ConstantTensorExecutor::createEmpty(ValueType::tensor_type({{dimension}}), stash);
    }
    if (attribute->getCollectionType() != search::attribute::CollectionType::WSET ||
            attribute->isFloatingPointType()) {
        LOG(warning, "The attribute vector '%s' is NOT of type weighted set of string or integer."
                " Returning empty tensor.", attrName.c_str());
        return ConstantTensorExecutor::createEmpty(ValueType::tensor_type({{dimension}}), stash);
    }
    if (attribute->isIntegerType()) {
        // Using WeightedStringContent ensures that the integer values are converted
        // to strings while extracting them from the attribute.
        return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, dimension);
    }
    // When the underlying attribute is of type string we can reference these values
    // using WeightedConstCharContent.
    return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, dimension);
}

FeatureExecutor &
createQueryExecutor(const search::fef::IQueryEnvironment &env,
                    const vespalib::string &queryKey,
                    const vespalib::string &dimension, vespalib::Stash &stash)
{
    search::fef::Property prop = env.getProperties().lookup(queryKey);
    if (prop.found() && !prop.get().empty()) {
        WeightedStringVector vector;
        WeightedSetParser::parse(prop.get(), vector);
        DefaultTensor::builder tensorBuilder;
        TensorBuilder::Dimension dimensionEnum = tensorBuilder.define_dimension(dimension);
        for (const auto &elem : vector._data) {
            tensorBuilder.add_label(dimensionEnum, elem.value());
            tensorBuilder.add_cell(elem.weight());
        }
        return ConstantTensorExecutor::create(tensorBuilder.build(), stash);
    }
    return ConstantTensorExecutor::createEmpty(ValueType::tensor_type({{dimension}}), stash);
}

}

FeatureExecutor &
TensorFromWeightedSetBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const
{
    if (_sourceType == ATTRIBUTE_SOURCE) {
        return createAttributeExecutor(env, _sourceParam, _dimension, stash);
    } else if (_sourceType == QUERY_SOURCE) {
        return createQueryExecutor(env, _sourceParam, _dimension, stash);
    }
    return ConstantTensorExecutor::createEmpty(ValueType::tensor_type({{_dimension}}), stash);
}

} // namespace features
} // namespace search