diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2019-01-22 14:27:06 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2019-01-22 14:27:06 +0000 |
commit | 53e31ed9798ddf616f0431888cec0673dbf7bdba (patch) | |
tree | cb2a3feece64a82b1ae5892be021c52744fcd6c6 /searchlib | |
parent | 6e9a146bcf912c9378f4b78831ac55e1c0cd5f0f (diff) |
Undo clion auto format
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp | 749 |
1 files changed, 362 insertions, 387 deletions
diff --git a/searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp b/searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp index 73d3262ab4e..c902eefe6a3 100644 --- a/searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp @@ -12,453 +12,428 @@ LOG_SETUP(".features.elementsimilarity"); namespace search::features { - using CollectionType = fef::FieldInfo::CollectionType; +using CollectionType = fef::FieldInfo::CollectionType; - namespace { +namespace { //----------------------------------------------------------------------------- - struct Aggregator { - typedef std::unique_ptr<Aggregator> UP; - - virtual UP create() const = 0; - - virtual void clear() = 0; - - virtual void add(double) = 0; - - virtual double get() const = 0; - - virtual ~Aggregator() {} - }; - - struct MaxAggregator : Aggregator { - size_t count; - double value; - - MaxAggregator() : count(0), value(0.0) {} - - UP create() const override { return UP(new MaxAggregator()); } - - void clear() override { - count = 0; - value = 0.0; - } - - void add(double v) override { value = ((++count == 1) || (v > value)) ? v : value; } - - double get() const override { return value; } - }; - - struct AvgAggregator : Aggregator { - size_t count; - double value; - - AvgAggregator() : count(0), value(0.0) {} - - UP create() const override { return UP(new AvgAggregator()); } - - void clear() override { - count = 0; - value = 0.0; - } - - void add(double v) override { - ++count; - value += v; - } - - double get() const override { return (count == 0) ? 0.0 : (value / count); } - }; - - struct SumAggregator : Aggregator { - double value; - - SumAggregator() : value(0.0) {} - - UP create() const override { return UP(new SumAggregator()); } - - void clear() override { value = 0.0; } +struct Aggregator { + typedef std::unique_ptr<Aggregator> UP; + virtual UP create() const = 0; + virtual void clear() = 0; + virtual void add(double) = 0; + virtual double get() const = 0; + virtual ~Aggregator() {} +}; + +struct MaxAggregator : Aggregator { + size_t count; + double value; + + MaxAggregator() : count(0), value(0.0) {} + UP create() const override { return UP(new MaxAggregator()); } + void clear() override { count = 0; value = 0.0; } + void add(double v) override { value = ((++count == 1) || (v > value)) ? v : value; } + double get() const override { return value; } +}; + +struct AvgAggregator : Aggregator { + size_t count; + double value; + + AvgAggregator() : count(0), value(0.0) {} + + UP create() const override { return UP(new AvgAggregator()); } + void clear() override { count = 0; value = 0.0; } + void add(double v) override { ++count;value += v; } + double get() const override { return (count == 0) ? 0.0 : (value / count); } +}; + +struct SumAggregator : Aggregator { + double value; + + SumAggregator() : value(0.0) {} + UP create() const override { return UP(new SumAggregator()); } + void clear() override { value = 0.0; } + void add(double v) override { value += v; } + double get() const override { return value; } +}; + +Aggregator::UP create_aggregator(const vespalib::string &name) { + if (name == "max") { + return Aggregator::UP(new MaxAggregator()); + } + if (name == "avg") { + return Aggregator::UP(new AvgAggregator()); + } + if (name == "sum") { + return Aggregator::UP(new SumAggregator()); + } + return Aggregator::UP(nullptr); +} - void add(double v) override { value += v; } +//----------------------------------------------------------------------------- - double get() const override { return value; } - }; +typedef double (*function_5)(double, double, double, double, double); - Aggregator::UP create_aggregator(const vespalib::string &name) { - if (name == "max") { - return Aggregator::UP(new MaxAggregator()); - } - if (name == "avg") { - return Aggregator::UP(new AvgAggregator()); - } - if (name == "sum") { - return Aggregator::UP(new SumAggregator()); - } - return Aggregator::UP(nullptr); - } +typedef std::pair<function_5, Aggregator::UP> OutputSpec; //----------------------------------------------------------------------------- - typedef double (*function_5)(double, double, double, double, double); - - typedef std::pair<function_5, Aggregator::UP> OutputSpec; +struct VectorizedQueryTerms { + struct Term { + fef::TermFieldHandle handle; + int weight; + int index; -//----------------------------------------------------------------------------- + Term(fef::TermFieldHandle handle_in, int weight_in, int index_in) + : handle(handle_in), weight(weight_in), index(index_in) + {} + }; - struct VectorizedQueryTerms { - struct Term { - fef::TermFieldHandle handle; - int weight; - int index; - - Term(fef::TermFieldHandle handle_in, int weight_in, int index_in) - : handle(handle_in), weight(weight_in), index(index_in) {} - }; - - std::vector<fef::TermFieldHandle> handles; - std::vector<int> weights; - int total_weight; - - VectorizedQueryTerms(const VectorizedQueryTerms &) = delete; - - VectorizedQueryTerms(VectorizedQueryTerms &&rhs) - : handles(std::move(rhs.handles)), weights(std::move(rhs.weights)), - total_weight(rhs.total_weight) {} - - VectorizedQueryTerms(const fef::IQueryEnvironment &env, uint32_t field_id) - : handles(), weights(), total_weight(0) { - std::vector<Term> terms; - for (uint32_t i = 0; i < env.getNumTerms(); ++i) { - const fef::ITermData *termData = env.getTerm(i); - if (termData->getWeight().percent() != 0) { // only consider query terms with contribution - typedef fef::ITermFieldRangeAdapter FRA; - for (FRA iter(*termData); iter.valid(); iter.next()) { - const fef::ITermFieldData &tfd = iter.get(); - if (tfd.getFieldId() == field_id) { - int term_weight = termData->getWeight().percent(); - total_weight += term_weight; - terms.push_back(Term(tfd.getHandle(), term_weight, - termData->getTermIndex())); - } - } + std::vector<fef::TermFieldHandle> handles; + std::vector<int> weights; + int total_weight; + + VectorizedQueryTerms(const VectorizedQueryTerms &) = delete; + + VectorizedQueryTerms(VectorizedQueryTerms &&rhs) + : handles(std::move(rhs.handles)), weights(std::move(rhs.weights)), + total_weight(rhs.total_weight) + {} + + VectorizedQueryTerms(const fef::IQueryEnvironment &env, uint32_t field_id) + : handles(), weights(), total_weight(0) + { + std::vector<Term> terms; + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + const fef::ITermData *termData = env.getTerm(i); + if (termData->getWeight().percent() != 0) { // only consider query terms with contribution + typedef fef::ITermFieldRangeAdapter FRA; + for (FRA iter(*termData); iter.valid(); iter.next()) { + const fef::ITermFieldData &tfd = iter.get(); + if (tfd.getFieldId() == field_id) { + int term_weight = termData->getWeight().percent(); + total_weight += term_weight; + terms.push_back(Term(tfd.getHandle(), term_weight, termData->getTermIndex())); } } - std::sort(terms.begin(), terms.end(), [](const Term &a, const Term &b) { return (a.index < b.index); }); - handles.reserve(terms.size()); - weights.reserve(terms.size()); - for (size_t i = 0; i < terms.size(); ++i) { - handles.push_back(terms[i].handle); - weights.push_back(terms[i].weight); - } } + } + std::sort(terms.begin(), terms.end(), [](const Term &a, const Term &b) { return (a.index < b.index); }); + handles.reserve(terms.size()); + weights.reserve(terms.size()); + for (size_t i = 0; i < terms.size(); ++i) { + handles.push_back(terms[i].handle); + weights.push_back(terms[i].weight); + } + } - ~VectorizedQueryTerms(); - }; + ~VectorizedQueryTerms(); +}; - VectorizedQueryTerms::~VectorizedQueryTerms() {} +VectorizedQueryTerms::~VectorizedQueryTerms() = default; //----------------------------------------------------------------------------- - struct State { - uint32_t element_length; - uint32_t matched_terms; - int sum_term_weight; - uint32_t last_pos; - double sum_proximity_score; - uint32_t last_idx; - uint32_t num_in_order; - - double proximity; - double order; - double query_coverage; - double field_coverage; - double element_weight; - - State(uint32_t element_length_in, int32_t element_weight_in, - uint32_t first_pos, int32_t first_weight, uint32_t first_idx) - : element_length(element_length_in), - matched_terms(1), sum_term_weight(first_weight), - last_pos(first_pos), sum_proximity_score(0.0), - last_idx(first_idx), num_in_order(0), - proximity(0.0), order(0.0), - query_coverage(0.0), field_coverage(0.0), - element_weight(element_weight_in) {} - - double proximity_score(uint32_t dist) { - return (dist > 8) ? 0 : (1.0 - (((dist - 1) / 8.0) * ((dist - 1) / 8.0))); - } +struct State { + uint32_t element_length; + uint32_t matched_terms; + int sum_term_weight; + uint32_t last_pos; + double sum_proximity_score; + uint32_t last_idx; + uint32_t num_in_order; + + double proximity; + double order; + double query_coverage; + double field_coverage; + double element_weight; + + State(uint32_t element_length_in, int32_t element_weight_in, + uint32_t first_pos, int32_t first_weight, uint32_t first_idx) + : element_length(element_length_in), + matched_terms(1), sum_term_weight(first_weight), + last_pos(first_pos), sum_proximity_score(0.0), + last_idx(first_idx), num_in_order(0), + proximity(0.0), order(0.0), + query_coverage(0.0), field_coverage(0.0), + element_weight(element_weight_in) + {} + + double proximity_score(uint32_t dist) { + return (dist > 8) ? 0 : (1.0 - (((dist - 1) / 8.0) * ((dist - 1) / 8.0))); + } - bool want_match(uint32_t pos) { - return (pos > last_pos); - } + bool want_match(uint32_t pos) { + return (pos > last_pos); + } - void addMatch(uint32_t pos, int32_t weight, uint32_t idx) { - sum_proximity_score += proximity_score(pos - last_pos); - num_in_order += (idx > last_idx) ? 1 : 0; - last_pos = pos; - last_idx = idx; - ++matched_terms; - sum_term_weight += weight; - } + void addMatch(uint32_t pos, int32_t weight, uint32_t idx) { + sum_proximity_score += proximity_score(pos - last_pos); + num_in_order += (idx > last_idx) ? 1 : 0; + last_pos = pos; + last_idx = idx; + ++matched_terms; + sum_term_weight += weight; + } - void calculate_scores(size_t num_query_terms, int total_term_weight) { - double matches = std::min(element_length, matched_terms); - if (matches < 2) { - proximity = proximity_score(element_length); - order = (num_query_terms == 1) ? 1.0 : 0.0; - } else { - proximity = sum_proximity_score / (matches - 1); - order = num_in_order / (double) (matches - 1); - } - query_coverage = sum_term_weight / (double) total_term_weight; - field_coverage = matches / (double) element_length; - } - }; + void calculate_scores(size_t num_query_terms, int total_term_weight) { + double matches = std::min(element_length, matched_terms); + if (matches < 2) { + proximity = proximity_score(element_length); + order = (num_query_terms == 1) ? 1.0 : 0.0; + } else { + proximity = sum_proximity_score / (matches - 1); + order = num_in_order / (double) (matches - 1); + } + query_coverage = sum_term_weight / (double) total_term_weight; + field_coverage = matches / (double) element_length; + } +}; //----------------------------------------------------------------------------- - class ElementSimilarityExecutor : public fef::FeatureExecutor { - private: - typedef fef::TermFieldMatchData::PositionsIterator ITR; +class ElementSimilarityExecutor : public fef::FeatureExecutor { +private: + typedef fef::TermFieldMatchData::PositionsIterator ITR; - struct CmpPosition { - ITR *pos; + struct CmpPosition { + ITR *pos; - CmpPosition(ITR *pos_in) : pos(pos_in) {} + CmpPosition(ITR *pos_in) : pos(pos_in) {} - bool operator()(uint16_t a, uint16_t b) { - return (pos[a]->getPosition() == pos[b]->getPosition()) - ? (a < b) - : (pos[a]->getPosition() < pos[b]->getPosition()); - } - }; + bool operator()(uint16_t a, uint16_t b) { + return (pos[a]->getPosition() == pos[b]->getPosition()) + ? (a < b) + : (pos[a]->getPosition() < pos[b]->getPosition()); + } + }; - struct CmpElement { - ITR *pos; + struct CmpElement { + ITR *pos; - CmpElement(ITR *pos_in) : pos(pos_in) {} + CmpElement(ITR *pos_in) : pos(pos_in) {} - bool operator()(uint16_t a, uint16_t b) { - return pos[a]->getElementId() < pos[b]->getElementId(); - } - }; - - typedef vespalib::PriorityQueue<uint16_t, CmpPosition> PositionQueue; - typedef vespalib::PriorityQueue<uint16_t, CmpElement> ElementQueue; - - VectorizedQueryTerms _terms; - std::vector<ITR> _pos; - std::vector<ITR> _end; - PositionQueue _position_queue; - ElementQueue _element_queue; - std::vector<OutputSpec> _outputs; - const fef::MatchData *_md; - - public: - ElementSimilarityExecutor(VectorizedQueryTerms &&terms, std::vector<OutputSpec> &&outputs_in) - : _terms(std::move(terms)), - _pos(_terms.handles.size(), nullptr), - _end(_terms.handles.size(), nullptr), - _position_queue(CmpPosition(&_pos[0])), - _element_queue(CmpElement(&_pos[0])), - _outputs(std::move(outputs_in)), - _md(nullptr) {} - - bool isPure() override { return _terms.handles.empty(); } - - void handle_bind_match_data(const fef::MatchData &md) override { - _md = &md; - } + bool operator()(uint16_t a, uint16_t b) { + return pos[a]->getElementId() < pos[b]->getElementId(); + } + }; - void requeue_term(uint16_t term, uint32_t element) { - while (_pos[term] != _end[term] && - _pos[term]->getElementId() == element) { - ++_pos[term]; - } - if (_pos[term] != _end[term]) { - _element_queue.push(term); - } - } + typedef vespalib::PriorityQueue<uint16_t, CmpPosition> PositionQueue; + typedef vespalib::PriorityQueue<uint16_t, CmpElement> ElementQueue; + + VectorizedQueryTerms _terms; + std::vector<ITR> _pos; + std::vector<ITR> _end; + PositionQueue _position_queue; + ElementQueue _element_queue; + std::vector<OutputSpec> _outputs; + const fef::MatchData *_md; + +public: + ElementSimilarityExecutor(VectorizedQueryTerms &&terms, std::vector<OutputSpec> &&outputs_in) + : _terms(std::move(terms)), + _pos(_terms.handles.size(), nullptr), + _end(_terms.handles.size(), nullptr), + _position_queue(CmpPosition(&_pos[0])), + _element_queue(CmpElement(&_pos[0])), + _outputs(std::move(outputs_in)), + _md(nullptr) + { } + + bool isPure() override { return _terms.handles.empty(); } + + void handle_bind_match_data(const fef::MatchData &md) override { + _md = &md; + } - void execute(uint32_t docId) override { - for (auto &output: _outputs) { - output.second->clear(); - } - for (size_t i = 0; i < _terms.handles.size(); ++i) { - const fef::TermFieldMatchData *tfmd = _md->resolveTermField(_terms.handles[i]); - if (tfmd->getDocId() == docId) { - _pos[i] = tfmd->begin(); - _end[i] = tfmd->end(); - if (_pos[i] != _end[i]) { - _element_queue.push(i); - } - } + void requeue_term(uint16_t term, uint32_t element) { + while (_pos[term] != _end[term] && (_pos[term]->getElementId() == element)) { + ++_pos[term]; + } + if (_pos[term] != _end[term]) { + _element_queue.push(term); + } + } + + void execute(uint32_t docId) override { + for (auto &output: _outputs) { + output.second->clear(); + } + for (size_t i = 0; i < _terms.handles.size(); ++i) { + const fef::TermFieldMatchData *tfmd = _md->resolveTermField(_terms.handles[i]); + if (tfmd->getDocId() == docId) { + _pos[i] = tfmd->begin(); + _end[i] = tfmd->end(); + if (_pos[i] != _end[i]) { + _element_queue.push(i); } - while (!_element_queue.empty()) { - uint32_t elementId = _pos[_element_queue.front()]->getElementId(); - while (!_element_queue.empty() && _pos[_element_queue.front()]->getElementId() == elementId) { - _position_queue.push(_element_queue.front()); - _element_queue.pop_front(); - } - uint16_t first = _position_queue.front(); - State state(_pos[first]->getElementLen(), - _pos[first]->getElementWeight(), - _pos[first]->getPosition(), - _terms.weights[first], - first); + } + } + while (!_element_queue.empty()) { + uint32_t elementId = _pos[_element_queue.front()]->getElementId(); + while (!_element_queue.empty() && _pos[_element_queue.front()]->getElementId() == elementId) { + _position_queue.push(_element_queue.front()); + _element_queue.pop_front(); + } + uint16_t first = _position_queue.front(); + State state(_pos[first]->getElementLen(), + _pos[first]->getElementWeight(), + _pos[first]->getPosition(), + _terms.weights[first], + first); + requeue_term(_position_queue.front(), elementId); + _position_queue.pop_front(); + while (!_position_queue.empty()) { + uint16_t item = _position_queue.front(); + if (state.want_match(_pos[item]->getPosition())) { + state.addMatch(_pos[item]->getPosition(), _terms.weights[item], item); requeue_term(_position_queue.front(), elementId); _position_queue.pop_front(); - while (!_position_queue.empty()) { - uint16_t item = _position_queue.front(); - if (state.want_match(_pos[item]->getPosition())) { - state.addMatch(_pos[item]->getPosition(), - _terms.weights[item], - item); - requeue_term(_position_queue.front(), elementId); - _position_queue.pop_front(); - } else { - ++_pos[item]; - if (_pos[item] == _end[item]) { - _position_queue.pop_front(); - } else { - _position_queue.adjust(); - } - } - } - state.calculate_scores(_terms.handles.size(), _terms.total_weight); - for (auto &output: _outputs) { - output.second->add(output.first(state.proximity, state.order, - state.query_coverage, state.field_coverage, - state.element_weight)); + } else { + ++_pos[item]; + if (_pos[item] == _end[item]) { + _position_queue.pop_front(); + } else { + _position_queue.adjust(); } } - for (size_t i = 0; i < _outputs.size(); ++i) { - outputs().set_number(i, _outputs[i].second->get()); - } } - }; + state.calculate_scores(_terms.handles.size(), _terms.total_weight); + for (auto &output: _outputs) { + output.second->add(output.first(state.proximity, state.order, + state.query_coverage, state.field_coverage, + state.element_weight)); + } + } + for (size_t i = 0; i < _outputs.size(); ++i) { + outputs().set_number(i, _outputs[i].second->get()); + } + } +}; //----------------------------------------------------------------------------- - std::vector<std::pair<vespalib::string, vespalib::string> > extract_properties(const fef::Properties &props, - const vespalib::string &ns, - const vespalib::string &first_name, - const vespalib::string &first_default) { - struct MyVisitor : fef::IPropertiesVisitor { - const vespalib::string &first_name; - std::vector<std::pair<vespalib::string, vespalib::string> > &result; - - MyVisitor(const vespalib::string &first_name_in, - std::vector<std::pair<vespalib::string, vespalib::string> > &result_in) - : first_name(first_name_in), result(result_in) {} - - virtual void visitProperty(const fef::Property::Value &key, - const fef::Property &values) override { - if (key != first_name) { - result.emplace_back(key, values.get()); - } - } - }; - std::vector<std::pair<vespalib::string, vespalib::string> > result; - result.emplace_back(first_name, props.lookup(ns, first_name).get(first_default)); - MyVisitor my_visitor(first_name, result); - props.visitNamespace(ns, my_visitor); - return result; +std::vector<std::pair<vespalib::string, vespalib::string> > +extract_properties(const fef::Properties &props, const vespalib::string &ns, + const vespalib::string &first_name, const vespalib::string &first_default) +{ + struct MyVisitor : fef::IPropertiesVisitor { + const vespalib::string &first_name; + std::vector<std::pair<vespalib::string, vespalib::string> > &result; + + MyVisitor(const vespalib::string &first_name_in, + std::vector<std::pair<vespalib::string, vespalib::string> > &result_in) + : first_name(first_name_in), result(result_in) + {} + + void visitProperty(const fef::Property::Value &key, const fef::Property &values) override { + if (key != first_name) { + result.emplace_back(key, values.get()); + } } + }; + std::vector<std::pair<vespalib::string, vespalib::string> > result; + result.emplace_back(first_name, props.lookup(ns, first_name).get(first_default)); + MyVisitor my_visitor(first_name, result); + props.visitNamespace(ns, my_visitor); + return result; +} - std::vector<std::pair<vespalib::string, vespalib::string> > get_outputs(const fef::Properties &props, - const vespalib::string &feature) { - return extract_properties(props, feature + ".output", "default", "max((0.35*p+0.15*o+0.30*q+0.20*f)*w)"); - } +std::vector<std::pair<vespalib::string, vespalib::string> > +get_outputs(const fef::Properties &props, const vespalib::string &feature) { + return extract_properties(props, feature + ".output", "default", "max((0.35*p+0.15*o+0.30*q+0.20*f)*w)"); +} - } // namespace features::<unnamed> +} // namespace features::<unnamed> //----------------------------------------------------------------------------- - struct ElementSimilarityBlueprint::OutputContext { - vespalib::eval::CompileCache::Token::UP compile_token; - Aggregator::UP aggregator_factory; +struct ElementSimilarityBlueprint::OutputContext { + vespalib::eval::CompileCache::Token::UP compile_token; + Aggregator::UP aggregator_factory; - OutputContext(const vespalib::eval::Function &function, - Aggregator::UP aggregator) - : compile_token(vespalib::eval::CompileCache::compile(function, vespalib::eval::PassParams::SEPARATE)), - aggregator_factory(std::move(aggregator)) {} - }; + OutputContext(const vespalib::eval::Function &function, Aggregator::UP aggregator) + : compile_token(vespalib::eval::CompileCache::compile(function, vespalib::eval::PassParams::SEPARATE)), + aggregator_factory(std::move(aggregator)) + {} +}; //----------------------------------------------------------------------------- - ElementSimilarityBlueprint::ElementSimilarityBlueprint() - : Blueprint("elementSimilarity"), _field_id(fef::IllegalHandle), _outputs() {} - - ElementSimilarityBlueprint::~ElementSimilarityBlueprint() {} - - void - ElementSimilarityBlueprint::visitDumpFeatures(const fef::IIndexEnvironment &env, - fef::IDumpFeatureVisitor &visitor) const { - for (uint32_t i = 0; i < env.getNumFields(); ++i) { - const fef::FieldInfo &field = *env.getField(i); - if ((field.type() == fef::FieldType::INDEX) && - (field.collection() != CollectionType::SINGLE) && - (!field.isFilter())) { - fef::FeatureNameBuilder fnb; - fnb.baseName(getBaseName()).parameter(field.name()); - auto outputs = get_outputs(env.getProperties(), fnb.buildName()); - visitor.visitDumpFeature(fnb.output("").buildName()); - for (size_t out_idx = 1; out_idx < outputs.size(); ++out_idx) { - visitor.visitDumpFeature(fnb.output(outputs[out_idx].first).buildName()); - } +ElementSimilarityBlueprint::ElementSimilarityBlueprint() + : Blueprint("elementSimilarity"), _field_id(fef::IllegalHandle), _outputs() +{} + +ElementSimilarityBlueprint::~ElementSimilarityBlueprint() = default; + +void +ElementSimilarityBlueprint::visitDumpFeatures(const fef::IIndexEnvironment &env, + fef::IDumpFeatureVisitor &visitor) const +{ + for (uint32_t i = 0; i < env.getNumFields(); ++i) { + const fef::FieldInfo &field = *env.getField(i); + if ((field.type() == fef::FieldType::INDEX) && + (field.collection() != CollectionType::SINGLE) && + (!field.isFilter())) { + fef::FeatureNameBuilder fnb; + fnb.baseName(getBaseName()).parameter(field.name()); + auto outputs = get_outputs(env.getProperties(), fnb.buildName()); + visitor.visitDumpFeature(fnb.output("").buildName()); + for (size_t out_idx = 1; out_idx < outputs.size(); ++out_idx) { + visitor.visitDumpFeature(fnb.output(outputs[out_idx].first).buildName()); } } } +} - bool - ElementSimilarityBlueprint::setup(const fef::IIndexEnvironment &env, - const fef::ParameterList ¶ms) { - const fef::FieldInfo *field = params[0].asField(); - _field_id = field->id(); - fef::FeatureNameBuilder fnb; - fnb.baseName(getBaseName()).parameter(field->name()); - auto outputs = get_outputs(env.getProperties(), fnb.buildName()); - for (const auto &entry: outputs) { - describeOutput(entry.first, entry.second); - vespalib::string aggr_name; - vespalib::string expr; - vespalib::string error; - if (!vespalib::eval::Function::unwrap(entry.second, aggr_name, expr, error)) { - LOG(warning, - "'%s': could not extract aggregator and expression for output '%s' from config value '%s' (%s)", - fnb.buildName().c_str(), entry.first.c_str(), entry.second.c_str(), error.c_str()); - return false; - } - Aggregator::UP aggr = create_aggregator(aggr_name); - if (aggr.get() == nullptr) { - LOG(warning, "'%s': unknown aggregator '%s'", fnb.buildName().c_str(), aggr_name.c_str()); - return false; - } - std::vector<vespalib::string> args({"p", "o", "q", "f", "w"}); - vespalib::eval::Function function = vespalib::eval::Function::parse(args, expr); - if (function.has_error()) { - LOG(warning, "'%s': per-element expression parse error: %s", - fnb.buildName().c_str(), function.get_error().c_str()); - return false; - } - _outputs.push_back(OutputContext_UP(new OutputContext(function, std::move(aggr)))); +bool +ElementSimilarityBlueprint::setup(const fef::IIndexEnvironment &env, const fef::ParameterList ¶ms) { + const fef::FieldInfo *field = params[0].asField(); + _field_id = field->id(); + fef::FeatureNameBuilder fnb; + fnb.baseName(getBaseName()).parameter(field->name()); + auto outputs = get_outputs(env.getProperties(), fnb.buildName()); + for (const auto &entry: outputs) { + describeOutput(entry.first, entry.second); + vespalib::string aggr_name; + vespalib::string expr; + vespalib::string error; + if (!vespalib::eval::Function::unwrap(entry.second, aggr_name, expr, error)) { + LOG(warning, + "'%s': could not extract aggregator and expression for output '%s' from config value '%s' (%s)", + fnb.buildName().c_str(), entry.first.c_str(), entry.second.c_str(), error.c_str()); + return false; } - env.hintFieldAccess(field->id()); - return true; - } - - fef::FeatureExecutor & - ElementSimilarityBlueprint::createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const { - std::vector<OutputSpec> output_specs; - for (const auto &output: _outputs) { - output_specs.emplace_back(output->compile_token->get().get_function<5>(), - output->aggregator_factory->create()); + Aggregator::UP aggr = create_aggregator(aggr_name); + if (aggr.get() == nullptr) { + LOG(warning, "'%s': unknown aggregator '%s'", fnb.buildName().c_str(), aggr_name.c_str()); + return false; + } + std::vector<vespalib::string> args({"p", "o", "q", "f", "w"}); + vespalib::eval::Function function = vespalib::eval::Function::parse(args, expr); + if (function.has_error()) { + LOG(warning, "'%s': per-element expression parse error: %s", + fnb.buildName().c_str(), function.get_error().c_str()); + return false; } - return stash.create<ElementSimilarityExecutor>(VectorizedQueryTerms(env, _field_id), std::move(output_specs)); + _outputs.push_back(OutputContext_UP(new OutputContext(function, std::move(aggr)))); } + env.hintFieldAccess(field->id()); + return true; +} -//----------------------------------------------------------------------------- +fef::FeatureExecutor & +ElementSimilarityBlueprint::createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const { + std::vector<OutputSpec> output_specs; + for (const auto &output: _outputs) { + output_specs.emplace_back(output->compile_token->get().get_function<5>(), + output->aggregator_factory->create()); + } + return stash.create<ElementSimilarityExecutor>(VectorizedQueryTerms(env, _field_id), std::move(output_specs)); +} } |