diff options
author | Håvard Pettersen <havardpe@oath.com> | 2022-03-10 15:46:22 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2022-03-10 16:29:09 +0000 |
commit | 7f84853795dd6c8dfc28e4058ad1fcd17f3fe442 (patch) | |
tree | 3efb371ed6b8b6792a5329ab99ba38516702a026 /searchlib | |
parent | 93fecf64015c811e03ec8abfd4730383d1c9cf16 (diff) |
support binary feature overrides
also checks type to make sure the override is valid
Diffstat (limited to 'searchlib')
4 files changed, 197 insertions, 37 deletions
diff --git a/searchlib/src/tests/fef/featureoverride/featureoverride.cpp b/searchlib/src/tests/fef/featureoverride/featureoverride.cpp index d580e361bff..b95478072de 100644 --- a/searchlib/src/tests/fef/featureoverride/featureoverride.cpp +++ b/searchlib/src/tests/fef/featureoverride/featureoverride.cpp @@ -1,6 +1,4 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/log/log.h> -LOG_SETUP("featureoverride_test"); #include <vespa/vespalib/testkit/testapp.h> #include <vespa/searchlib/fef/fef.h> @@ -9,11 +7,23 @@ LOG_SETUP("featureoverride_test"); #include <vespa/searchlib/fef/test/plugin/double.h> #include <vespa/searchlib/fef/test/plugin/sum.h> #include <vespa/searchlib/features/valuefeature.h> +#include <vespa/searchlib/features/rankingexpressionfeature.h> +#include <vespa/searchlib/fef/test/test_features.h> +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/issue.h> -using namespace search::fef; -using namespace search::fef::test; using namespace search::features; +using namespace search::fef::test; +using namespace search::fef; using search::feature_t; +using vespalib::Issue; +using vespalib::eval::TensorSpec; +using vespalib::eval::FastValueBuilderFactory; +using vespalib::make_string_short::fmt; + typedef Blueprint::SP BPSP; @@ -50,7 +60,7 @@ TEST_F("test decorator - single override", Fixture) { FeatureExecutor *fe = &f.createValueExecutor(); vespalib::Stash &stash = f.stash; - fe = &stash.create<FeatureOverrider>(*fe, 1, 50.0); + fe = &stash.create<FeatureOverrider>(*fe, 1, 50.0, nullptr); f.add(fe, 3).run(); EXPECT_EQUAL(fe->outputs().size(), 3u); @@ -63,8 +73,8 @@ TEST_F("test decorator - multiple overrides", Fixture) { FeatureExecutor *fe = &f.createValueExecutor(); vespalib::Stash &stash = f.stash; - fe = &stash.create<FeatureOverrider>(*fe, 0, 50.0); - fe = &stash.create<FeatureOverrider>(*fe, 2, 100.0); + fe = &stash.create<FeatureOverrider>(*fe, 0, 50.0, nullptr); + fe = &stash.create<FeatureOverrider>(*fe, 2, 100.0, nullptr); f.add(fe, 3).run(); EXPECT_EQUAL(fe->outputs().size(), 3u); @@ -77,7 +87,7 @@ TEST_F("test decorator - non-existing override", Fixture) { FeatureExecutor *fe = &f.createValueExecutor(); vespalib::Stash &stash = f.stash; - fe = &stash.create<FeatureOverrider>(*fe, 1000, 50.0); + fe = &stash.create<FeatureOverrider>(*fe, 1000, 50.0, nullptr); f.add(fe, 3).run(); EXPECT_EQUAL(fe->outputs().size(), 3u); @@ -90,12 +100,12 @@ TEST_F("test decorator - transitive override", Fixture) { FeatureExecutor *fe = &f.createValueExecutor(); vespalib::Stash &stash = f.stash; - fe = &stash.create<FeatureOverrider>(*fe, 1, 50.0); + fe = &stash.create<FeatureOverrider>(*fe, 1, 50.0, nullptr); f.add(fe, 3); EXPECT_EQUAL(fe->outputs().size(), 3u); FeatureExecutor *fe2 = &stash.create<DoubleExecutor>(3); - fe2 = &stash.create<FeatureOverrider>(*fe2, 2, 10.0); + fe2 = &stash.create<FeatureOverrider>(*fe2, 2, 10.0, nullptr); auto inputs = stash.create_array<LazyValue>(3, nullptr); inputs[0] = LazyValue(fe->outputs().get_raw(0), fe); inputs[1] = LazyValue(fe->outputs().get_raw(1), fe); @@ -169,4 +179,123 @@ TEST("test overrides") EXPECT_APPROX(res["double(value(3)).0"], 6.0, 1e-6); } +//----------------------------------------------------------------------------- + +struct SimpleRankFixture { + BlueprintFactory factory; + IndexEnvironment indexEnv; + BlueprintResolver::SP resolver; + Properties overrides; + MatchData::UP match_data; + RankProgram program; + static vespalib::string expr_feature(const vespalib::string &name) { + return fmt("rankingExpression(%s)", name.c_str()); + } + SimpleRankFixture() + : factory(), indexEnv(), resolver(new BlueprintResolver(factory, indexEnv)), + overrides(), match_data(), program(resolver) + { + factory.addPrototype(std::make_shared<DocidBlueprint>()); + factory.addPrototype(std::make_shared<RankingExpressionBlueprint>()); + } + ~SimpleRankFixture(); + void add_expr(const vespalib::string &name, const vespalib::string &expr) { + vespalib::string feature_name = expr_feature(name); + vespalib::string expr_name = feature_name + ".rankingScript"; + indexEnv.getProperties().add(expr_name, expr); + } + void add_override(const vespalib::string &name, const TensorSpec &spec) { + vespalib::nbostream data; + auto tensor = vespalib::eval::value_from_spec(spec, FastValueBuilderFactory::get()); + vespalib::eval::encode_value(*tensor, data); + overrides.add(name, vespalib::stringref(data.peek(), data.size())); + } + void add_override(const vespalib::string &name, const vespalib::string &str) { + overrides.add(name, str); + } + bool try_compile(const vespalib::string &seed) { + resolver->addSeed(seed); + if (!resolver->compile()) { + return false; + } + MatchDataLayout mdl; + QueryEnvironment queryEnv(&indexEnv); + match_data = mdl.createMatchData(); + program.setup(*match_data, queryEnv, overrides); + return true; + } + void compile(const vespalib::string &seed) { + ASSERT_TRUE(try_compile(seed)); + } + TensorSpec get(uint32_t docid) { + auto result = program.get_seeds(false); + ASSERT_EQUAL(1u, result.num_features()); + return TensorSpec::from_value(result.resolve(0).as_object(docid)); + } +}; +SimpleRankFixture::~SimpleRankFixture() = default; + +TensorSpec from_expr(const vespalib::string &expr) { + auto result = TensorSpec::from_expr(expr); + ASSERT_TRUE(result.type() != "error"); + return result; +} + +struct MyIssues : Issue::Handler { + std::vector<vespalib::string> list; + Issue::Binding capture; + MyIssues() : list(), capture(Issue::listen(*this)) {} + void handle(const Issue &issue) override { list.push_back(issue.message()); } +}; + +//----------------------------------------------------------------------------- + +TEST_F("require expression without override works", SimpleRankFixture) { + auto expect = from_expr("tensor<float>(x[3]):[1,2,3]"); + f1.add_expr("foo", "tensor<float>(x[3]):[1,2,3]"); + f1.compile(f1.expr_feature("foo")); + EXPECT_EQUAL(f1.get(1), expect); +} + +TEST_F("require that const binary override works", SimpleRankFixture) { + auto expect = from_expr("tensor<float>(x[3]):[5,6,7]"); + f1.add_expr("foo", "tensor<float>(x[3]):[1,2,3]"); + f1.add_override(f1.expr_feature("foo"), expect); + f1.compile(f1.expr_feature("foo")); + EXPECT_EQUAL(f1.get(1), expect); +} + +TEST_F("require that non-const binary override works", SimpleRankFixture) { + auto expect = from_expr("tensor<float>(x[3]):[5,6,7]"); + f1.add_expr("foo", "tensor<float>(x[3]):[docid,2,3]"); + f1.add_override(f1.expr_feature("foo"), expect); + f1.compile(f1.expr_feature("foo")); + EXPECT_EQUAL(f1.get(1), expect); +} + +TEST_F("require that wrong type binary override is ignored", SimpleRankFixture) { + MyIssues issues; + auto expect = from_expr("tensor<float>(x[3]):[1,2,3]"); + auto other = from_expr("tensor(x[3]):[5,6,7]"); + f1.add_expr("foo", "tensor<float>(x[3]):[1,2,3]"); + f1.add_override(f1.expr_feature("foo"), other); + f1.compile(f1.expr_feature("foo")); + ASSERT_EQUAL(issues.list.size(), 1u); + EXPECT_LESS(issues.list[0].find("has invalid type"), issues.list[0].size()); + fprintf(stderr, "issue: %s\n", issues.list[0].c_str()); +} + +TEST_F("require that bad format binary override is ignored", SimpleRankFixture) { + MyIssues issues; + auto expect = from_expr("tensor<float>(x[3]):[1,2,3]"); + f1.add_expr("foo", "tensor<float>(x[3]):[1,2,3]"); + f1.add_override(f1.expr_feature("foo"), vespalib::string("bad format")); + f1.compile(f1.expr_feature("foo")); + ASSERT_EQUAL(issues.list.size(), 1u); + EXPECT_LESS(issues.list[0].find("has invalid format"), issues.list[0].size()); + fprintf(stderr, "issue: %s\n", issues.list[0].c_str()); +} + +//----------------------------------------------------------------------------- + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp b/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp index 5092842e4b3..c0d7beac260 100644 --- a/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp +++ b/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp @@ -17,10 +17,11 @@ FeatureOverrider::handle_bind_outputs(vespalib::ArrayRef<NumberOrObject> outputs _executor.bind_outputs(outputs); } -FeatureOverrider::FeatureOverrider(FeatureExecutor &executor, uint32_t outputIdx, feature_t value) +FeatureOverrider::FeatureOverrider(FeatureExecutor &executor, uint32_t outputIdx, feature_t number, Value::UP object) : _executor(executor), _outputIdx(outputIdx), - _value(value) + _number(number), + _object(std::move(object)) { } @@ -35,7 +36,11 @@ FeatureOverrider::execute(uint32_t docId) { _executor.lazy_execute(docId); if (_outputIdx < outputs().size()) { - outputs().set_number(_outputIdx, _value); + if (_object) { + outputs().set_object(_outputIdx, *_object); + } else { + outputs().set_number(_outputIdx, _number); + } } } diff --git a/searchlib/src/vespa/searchlib/fef/featureoverrider.h b/searchlib/src/vespa/searchlib/fef/featureoverrider.h index 05c651932fe..1ce18903e4e 100644 --- a/searchlib/src/vespa/searchlib/fef/featureoverrider.h +++ b/searchlib/src/vespa/searchlib/fef/featureoverrider.h @@ -17,31 +17,25 @@ namespace fef { class FeatureOverrider : public FeatureExecutor { private: + using Value = vespalib::eval::Value; + FeatureOverrider(const FeatureOverrider &); FeatureOverrider &operator=(const FeatureOverrider &); FeatureExecutor & _executor; uint32_t _outputIdx; - feature_t _value; + feature_t _number; + Value::UP _object; virtual void handle_bind_match_data(const MatchData &md) override; virtual void handle_bind_inputs(vespalib::ConstArrayRef<LazyValue> inputs) override; virtual void handle_bind_outputs(vespalib::ArrayRef<NumberOrObject> outputs) override; public: - /** - * Create a feature overrider that will override the given output - * with the given feature value. - * - * @param executor the feature executor for which we should override an output - * @param outputIdx which output to override - * @param value what value to override with - **/ - FeatureOverrider(FeatureExecutor &executor, uint32_t outputIdx, feature_t value); + FeatureOverrider(FeatureExecutor &executor, uint32_t outputIdx, feature_t number, Value::UP object); bool isPure() override; void execute(uint32_t docId) override; }; } // namespace fef } // namespace search - diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.cpp b/searchlib/src/vespa/searchlib/fef/rank_program.cpp index 83cb9237975..c8c2d59e872 100644 --- a/searchlib/src/vespa/searchlib/fef/rank_program.cpp +++ b/searchlib/src/vespa/searchlib/fef/rank_program.cpp @@ -3,8 +3,11 @@ #include "rank_program.h" #include "featureoverrider.h" #include <vespa/vespalib/locale/c.h> +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/value_codec.h> #include <vespa/vespalib/stllike/hash_set.hpp> #include <vespa/vespalib/util/size_literals.h> +#include <vespa/vespalib/util/issue.h> #include <algorithm> #include <cassert> @@ -12,6 +15,10 @@ LOG_SETUP(".fef.rankprogram"); using vespalib::Stash; +using vespalib::Issue; +using vespalib::eval::Value; +using vespalib::eval::ValueType; +using vespalib::eval::FastValueBuilderFactory; namespace search::fef { @@ -22,10 +29,14 @@ namespace { struct Override { BlueprintResolver::FeatureRef ref; - feature_t value; + feature_t number; + Value::UP object; Override(const BlueprintResolver::FeatureRef &r, feature_t v) noexcept - : ref(r), value(v) {} + : ref(r), number(v), object() {} + + Override(const BlueprintResolver::FeatureRef &r, Value::UP v) noexcept + : ref(r), number(), object(std::move(v)) {} bool operator<(const Override &rhs) const { return (ref.executor < rhs.ref.executor); @@ -34,29 +45,50 @@ struct Override struct OverrideVisitor : public IPropertiesVisitor { + const BlueprintResolver::ExecutorSpecList &specs; const BlueprintResolver::FeatureMap &feature_map; std::vector<Override> &overrides; - OverrideVisitor(const BlueprintResolver::FeatureMap &feature_map_in, + OverrideVisitor(const BlueprintResolver::ExecutorSpecList &specs_in, + const BlueprintResolver::FeatureMap &feature_map_in, std::vector<Override> &overrides_out) - : feature_map(feature_map_in), overrides(overrides_out) {} + : specs(specs_in), feature_map(feature_map_in), overrides(overrides_out) {} - void visitProperty(const Property::Value & key, - const Property & values) override - { + void visitProperty(const Property::Value &key, const Property &prop) override { auto pos = feature_map.find(key); if (pos != feature_map.end()) { - overrides.emplace_back(pos->second, vespalib::locale::c::strtod(values.get().c_str(), nullptr)); + const auto &name = pos->first; + const auto &ref = pos->second; + const auto &feature_type = specs[ref.executor].output_types[ref.output]; + if (feature_type.is_object()) { + const auto &value_type = feature_type.type(); + const vespalib::string &encoded_value = prop.get(); + vespalib::nbostream stream(encoded_value.data(), encoded_value.size()); + try { + auto tensor = vespalib::eval::decode_value(stream, vespalib::eval::FastValueBuilderFactory::get()); + if (tensor->type() == value_type) { + overrides.emplace_back(ref, std::move(tensor)); + } else { + Issue::report("override for feature '%s' has invalid type: expected %s, got %s", + name.c_str(), value_type.to_spec().c_str(), tensor->type().to_spec().c_str()); + } + } catch (const vespalib::eval::DecodeValueException &e) { + Issue::report("override for feature '%s' has invalid format: %s", name.c_str(), e.what()); + } + } else { + overrides.emplace_back(ref, vespalib::locale::c::strtod(prop.get().c_str(), nullptr)); + } } } }; -std::vector<Override> prepare_overrides(const BlueprintResolver::FeatureMap &feature_map, +std::vector<Override> prepare_overrides(const BlueprintResolver::ExecutorSpecList &specs, + const BlueprintResolver::FeatureMap &feature_map, const Properties &featureOverrides) { std::vector<Override> overrides; overrides.reserve(featureOverrides.numValues()); - OverrideVisitor visitor(feature_map, overrides); + OverrideVisitor visitor(specs, feature_map, overrides); featureOverrides.visitProperties(visitor); std::sort(overrides.begin(), overrides.end()); return overrides; @@ -173,12 +205,12 @@ RankProgram::setup(const MatchData &md, const IQueryEnvironment &queryEnv, const Properties &featureOverrides) { + const auto &specs = _resolver->getExecutorSpecs(); assert(_executors.empty()); - std::vector<Override> overrides = prepare_overrides(_resolver->getFeatureMap(), featureOverrides); + std::vector<Override> overrides = prepare_overrides(specs, _resolver->getFeatureMap(), featureOverrides); auto override = overrides.begin(); auto override_end = overrides.end(); - const auto &specs = _resolver->getExecutorSpecs(); _executors.reserve(specs.size()); _is_const.resize(specs.size()*2); // Reserve space in hashmap for executors to be const for (uint32_t i = 0; i < specs.size(); ++i) { @@ -205,7 +237,7 @@ RankProgram::setup(const MatchData &md, } for (; (override < override_end) && (override->ref.executor == i); ++override) { FeatureExecutor *tmp = executor; - executor = &(stash.get().create<FeatureOverrider>(*tmp, override->ref.output, override->value)); + executor = &(stash.get().create<FeatureOverrider>(*tmp, override->ref.output, override->number, std::move(override->object))); } executor->bind_inputs(inputs); executor->bind_outputs(outputs); |