diff options
Diffstat (limited to 'searchlib')
29 files changed, 369 insertions, 319 deletions
diff --git a/searchlib/src/tests/features/beta/beta_features.cpp b/searchlib/src/tests/features/beta/beta_features.cpp index 1bc75d6f3bb..2d992ab82e1 100644 --- a/searchlib/src/tests/features/beta/beta_features.cpp +++ b/searchlib/src/tests/features/beta/beta_features.cpp @@ -292,6 +292,10 @@ Test::testProximity() .addScore("proximity(foo,0,1).posA", a < b ? a : util::FEATURE_MAX) .addScore("proximity(foo,0,1).posB", a < b ? b : util::FEATURE_MIN); TEST_STATE(vespalib::make_string("a=%u, b=%u", a, b).c_str()); + { // reset lazy evaluation + RankResult dummy; + ft.executeOnly(dummy, 0); + } EXPECT_TRUE(ft.execute(exp)); } } @@ -344,6 +348,10 @@ Test::testQueryCompleteness() RankResult exp; exp.addScore("queryCompleteness(foo).hit", (feature_t)(i)); exp.addScore("queryCompleteness(foo).miss", (feature_t)(5 - i)); + { // reset lazy evaluation + RankResult dummy; + ft.executeOnly(dummy, 0); + } EXPECT_TRUE(ft.execute(exp)); } } @@ -374,6 +382,10 @@ Test::assertQueryCompleteness(FtFeatureTest & ft, uint32_t firstOcc, uint32_t hi RankResult exp; exp.addScore("queryCompleteness(foo,5,10).hit", hits); exp.addScore("queryCompleteness(foo,5,10).miss", miss); + { // reset lazy evaluation + RankResult dummy; + ft.executeOnly(dummy, 0); + } EXPECT_TRUE(ft.execute(exp)); } @@ -473,6 +485,10 @@ Test::testFlowCompleteness() exp.addScore("flowCompleteness(foo).weight", 100.0); exp.addScore("flowCompleteness(foo).flow", i); TEST_STATE("run execute"); + { // reset lazy evaluation + RankResult dummy; + ft.executeOnly(dummy, 0); + } EXPECT_TRUE(ft.execute(exp)); } } @@ -524,6 +540,10 @@ Test::testFlowCompleteness() exp.addScore("flowCompleteness(foo).flow", flow); TEST_STATE(vespalib::make_string("execute t0m=%u t1m=%u t2m=%u t3m=%u flow=%u", t0m, t1m, t2m, t3m, flow).c_str()); + { // reset lazy evaluation + RankResult dummy; + ft.executeOnly(dummy, 0); + } ASSERT_TRUE(ft.execute(exp)); } } diff --git a/searchlib/src/tests/features/constant/constant_test.cpp b/searchlib/src/tests/features/constant/constant_test.cpp index d990943367b..2bce80fb190 100644 --- a/searchlib/src/tests/features/constant/constant_test.cpp +++ b/searchlib/src/tests/features/constant/constant_test.cpp @@ -56,25 +56,21 @@ struct ExecFixture setup_search_features(factory); } bool setup() { return test.setup(); } - const Tensor &extractTensor() { - const Value::CREF *value = test.resolveObjectFeature(); - ASSERT_TRUE(value != nullptr); - ASSERT_TRUE(value->get().is_tensor()); - return static_cast<const Tensor &>(*value->get().as_tensor()); + const Tensor &extractTensor(uint32_t docid) { + Value::CREF value = test.resolveObjectFeature(docid); + ASSERT_TRUE(value.get().is_tensor()); + return static_cast<const Tensor &>(*value.get().as_tensor()); } const Tensor &executeTensor(uint32_t docId = 1) { - test.executeOnly(docId); - return extractTensor(); + return extractTensor(docId); } - double extractDouble() { - const Value::CREF *value = test.resolveObjectFeature(); - ASSERT_TRUE(value != nullptr); - ASSERT_TRUE(value->get().is_double()); - return value->get().as_double(); + double extractDouble(uint32_t docid) { + Value::CREF value = test.resolveObjectFeature(docid); + ASSERT_TRUE(value.get().is_double()); + return value.get().as_double(); } double executeDouble(uint32_t docId = 1) { - test.executeOnly(docId); - return extractDouble(); + return extractDouble(docId); } void addTensor(const vespalib::string &name, const TensorCells &cells, diff --git a/searchlib/src/tests/features/featurebenchmark.cpp b/searchlib/src/tests/features/featurebenchmark.cpp index dc9d94907b4..ed8af1cdf14 100644 --- a/searchlib/src/tests/features/featurebenchmark.cpp +++ b/searchlib/src/tests/features/featurebenchmark.cpp @@ -236,7 +236,7 @@ Benchmark::runFieldMatch(Config & cfg) start(); std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; for (uint32_t i = 0; i < numRuns; ++i) { - ft.executeOnly(0); + // rank evaluation is now lazy, please re-write benchmark if needed } sample(); } @@ -261,7 +261,7 @@ Benchmark::runRankingExpression(Config & cfg) start(); std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; for (uint32_t i = 0; i < numRuns; ++i) { - ft.executeOnly(0); + // rank evaluation is now lazy, please re-write benchmark if needed } sample(); } @@ -374,7 +374,7 @@ Benchmark::runAttributeMatch(Config & cfg) pos.setElementWeight(i % numDocs); amd->appendPosition(pos); } - ft.executeOnly(i % numDocs); + // rank evaluation is now lazy, please re-write benchmark if needed } sample(); } @@ -405,7 +405,7 @@ Benchmark::runAttribute(Config & cfg) start(); std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; for (uint32_t i = 0; i < numRuns; ++i) { - ft.executeOnly(i % numDocs); + // rank evaluation is now lazy, please re-write benchmark if needed } sample(); } @@ -457,7 +457,7 @@ Benchmark::runDotProduct(Config & cfg) start(); std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; for (uint32_t i = 0; i < numRuns; ++i) { - ft.executeOnly(i % numDocs); + // rank evaluation is now lazy, please re-write benchmark if needed } sample(); } @@ -499,7 +499,7 @@ Benchmark::runNativeAttributeMatch(Config & cfg) pos.setElementWeight(docId); amd->appendPosition(pos); } - ft.executeOnly(docId); + // rank evaluation is now lazy, please re-write benchmark if needed } sample(); } @@ -538,7 +538,7 @@ Benchmark::runNativeFieldMatch(Config & cfg) start(); std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; for (uint32_t i = 0; i < numRuns; ++i) { - ft.executeOnly(0); + // rank evaluation is now lazy, please re-write benchmark if needed } sample(); } @@ -580,7 +580,7 @@ Benchmark::runNativeProximity(Config & cfg) start(); std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; for (uint32_t i = 0; i < numRuns; ++i) { - ft.executeOnly(0); + // rank evaluation is now lazy, please re-write benchmark if needed } sample(); } diff --git a/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp b/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp index 199a9fa72e3..fe093d41ba7 100644 --- a/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp +++ b/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp @@ -97,8 +97,7 @@ struct RankFixture : BlueprintFactoryFixture, IndexFixture { rankProgram->setup(mdl, queryEnv); } feature_t getScore(uint32_t docId) { - rankProgram->run(docId); - return *Utils::getScoreFeature(*rankProgram); + return Utils::getScoreFeature(*rankProgram, docId); } void setScore(TermFieldHandle handle, uint32_t docId, feature_t score) { rankProgram->match_data().resolveTermField(handle)->setRawScore(docId, score); diff --git a/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp index 3e3702cceec..87305cd1670 100644 --- a/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp +++ b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp @@ -103,8 +103,7 @@ struct RankFixture : BlueprintFactoryFixture, IndexFixture { rankProgram->setup(mdl, queryEnv); } feature_t getScore(uint32_t docId) { - rankProgram->run(docId); - return *Utils::getScoreFeature(*rankProgram); + return Utils::getScoreFeature(*rankProgram, docId); } void setFooWeight(uint32_t i, uint32_t docId, int32_t index_weight) { ASSERT_LESS(i, fooHandles.size()); diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index ad52c3ac861..4efd140b871 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -1353,7 +1353,7 @@ Test::testNow() ASSERT_TRUE(ft.setup()); RankResult res; - ASSERT_TRUE(ft.executeOnly(res, 0)); + ASSERT_TRUE(ft.executeOnly(res, 1)); feature_t now = 15000000000; ASSERT_EQUAL(now, res.getScore("now")); } diff --git a/searchlib/src/tests/features/prod_features_attributematch.cpp b/searchlib/src/tests/features/prod_features_attributematch.cpp index 06b2b859709..fc69061b4ef 100644 --- a/searchlib/src/tests/features/prod_features_attributematch.cpp +++ b/searchlib/src/tests/features/prod_features_attributematch.cpp @@ -234,6 +234,10 @@ Test::testAttributeMatch() mdb->apply(1); exp.clear(). addScore("attributeMatch(wsint).fieldCompleteness", 0.5f); + { // reset lazy evaluation + RankResult dummy; + ft.executeOnly(dummy, 0); + } ASSERT_TRUE(ft.execute(exp)); // test that normalized values lies in the interval [0,1]. @@ -243,6 +247,10 @@ Test::testAttributeMatch() exp.clear(). addScore("attributeMatch(wsfloat).normalizedWeight", 1). addScore("attributeMatch(wsfloat).normalizedWeightedWeight", 1); + { // reset lazy evaluation + RankResult dummy; + ft.executeOnly(dummy, 0); + } ASSERT_TRUE(ft.execute(exp)); } diff --git a/searchlib/src/tests/features/prod_features_fieldmatch.cpp b/searchlib/src/tests/features/prod_features_fieldmatch.cpp index e26d6a92fa6..e9bafdb1c78 100644 --- a/searchlib/src/tests/features/prod_features_fieldmatch.cpp +++ b/searchlib/src/tests/features/prod_features_fieldmatch.cpp @@ -957,20 +957,20 @@ Test::testFieldMatchExecutorRemaining() ASSERT_TRUE(mdb->setFieldLength("foo", 3)); ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); // 'a' ASSERT_TRUE(mdb->addOccurence("foo", 1, 1)); // 'b' - ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(mdb->apply(2)); RankResult rr = toRankResult("fieldMatch(foo)", "score:0.9558 matches:2"); rr.setEpsilon(1e-4); // same as java tests - ASSERT_TRUE(ft.execute(rr, 1)); + ASSERT_TRUE(ft.execute(rr, 2)); } { // docid 3: "x a b" MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); ASSERT_TRUE(mdb->setFieldLength("foo", 3)); ASSERT_TRUE(mdb->addOccurence("foo", 0, 1)); // 'a' ASSERT_TRUE(mdb->addOccurence("foo", 1, 2)); // 'b' - ASSERT_TRUE(mdb->apply(2)); + ASSERT_TRUE(mdb->apply(3)); RankResult rr = toRankResult("fieldMatch(foo)", "score:0.9463 matches:2"); rr.setEpsilon(1e-4); // same as java tests - ASSERT_TRUE(ft.execute(rr, 2)); + ASSERT_TRUE(ft.execute(rr, 3)); } } @@ -1008,6 +1008,10 @@ Test::testFieldMatchExecutorRemaining() // add hit with query term 'b' mdb->getTermFieldMatchData(1, 0)->reset(1); ASSERT_TRUE(mdb->apply(1)); + { // reset lazy evaluation + RankResult dummy; + ft.executeOnly(dummy, 0); + } ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:0 completeness:0.475 queryCompleteness:0.5 weight:0.2 matches:2 degradedMatches:2"). setEpsilon(1e-4))); diff --git a/searchlib/src/tests/features/raw_score/raw_score_test.cpp b/searchlib/src/tests/features/raw_score/raw_score_test.cpp index 0a15ff69318..b98c60b7c50 100644 --- a/searchlib/src/tests/features/raw_score/raw_score_test.cpp +++ b/searchlib/src/tests/features/raw_score/raw_score_test.cpp @@ -73,8 +73,7 @@ struct RankFixture : BlueprintFactoryFixture, IndexFixture { rankProgram->setup(mdl, queryEnv); } feature_t getScore(uint32_t docId) { - rankProgram->run(docId); - return *Utils::getScoreFeature(*rankProgram); + return Utils::getScoreFeature(*rankProgram, docId); } void setScore(TermFieldHandle handle, uint32_t docId, feature_t score) { rankProgram->match_data().resolveTermField(handle)->setRawScore(docId, score); diff --git a/searchlib/src/tests/features/subqueries/subqueries_test.cpp b/searchlib/src/tests/features/subqueries/subqueries_test.cpp index 160ec404b20..4ed3b833853 100644 --- a/searchlib/src/tests/features/subqueries/subqueries_test.cpp +++ b/searchlib/src/tests/features/subqueries/subqueries_test.cpp @@ -70,8 +70,7 @@ struct RankFixture : BlueprintFactoryFixture, IndexFixture { return handles; } feature_t getSubqueries(uint32_t docId) { - rankProgram->run(docId); - return *Utils::getScoreFeature(*rankProgram); + return Utils::getScoreFeature(*rankProgram, docId); } void setSubqueries(TermFieldHandle handle, uint32_t docId, uint64_t subqueries) { diff --git a/searchlib/src/tests/features/tensor/tensor_test.cpp b/searchlib/src/tests/features/tensor/tensor_test.cpp index fd59cd8b9d7..34a5df23395 100644 --- a/searchlib/src/tests/features/tensor/tensor_test.cpp +++ b/searchlib/src/tests/features/tensor/tensor_test.cpp @@ -154,15 +154,13 @@ struct ExecFixture { "x", "y" }); setQueryTensorType("null", "tensor(q{})"); } - const Tensor &extractTensor() { - const Value::CREF *value = test.resolveObjectFeature(); - ASSERT_TRUE(value != nullptr); - ASSERT_TRUE(value->get().is_tensor()); - return static_cast<const Tensor &>(*value->get().as_tensor()); + const Tensor &extractTensor(uint32_t docid) { + Value::CREF value = test.resolveObjectFeature(docid); + ASSERT_TRUE(value.get().is_tensor()); + return static_cast<const Tensor &>(*value.get().as_tensor()); } const Tensor &execute(uint32_t docId = 1) { - test.executeOnly(docId); - return extractTensor(); + return extractTensor(docId); } }; diff --git a/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp index e0e6b4746a4..6f413da004c 100644 --- a/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp +++ b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp @@ -121,15 +121,13 @@ struct ExecFixture test.getQueryEnv().getProperties().add("astr_query", "[d e f]"); test.getQueryEnv().getProperties().add("aint_query", "[11 13 17]"); } - const Tensor &extractTensor() { - const Value::CREF *value = test.resolveObjectFeature(); - ASSERT_TRUE(value != nullptr); - ASSERT_TRUE(value->get().is_tensor()); - return static_cast<const Tensor &>(*value->get().as_tensor()); + const Tensor &extractTensor(uint32_t docid) { + Value::CREF value = test.resolveObjectFeature(docid); + ASSERT_TRUE(value.get().is_tensor()); + return static_cast<const Tensor &>(*value.get().as_tensor()); } const Tensor &execute() { - test.executeOnly(); - return extractTensor(); + return extractTensor(1); } }; diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp index 839b1efec74..d8dec88c418 100644 --- a/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp +++ b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp @@ -120,15 +120,13 @@ struct ExecFixture void setupQueryEnvironment() { test.getQueryEnv().getProperties().add("wsquery", "{d:11,e:13,f:17}"); } - const Tensor &extractTensor() { - const Value::CREF *value = test.resolveObjectFeature(); - ASSERT_TRUE(value != nullptr); - ASSERT_TRUE(value->get().is_tensor()); - return static_cast<const Tensor &>(*value->get().as_tensor()); + const Tensor &extractTensor(uint32_t docid) { + Value::CREF value = test.resolveObjectFeature(docid); + ASSERT_TRUE(value.get().is_tensor()); + return static_cast<const Tensor &>(*value.get().as_tensor()); } const Tensor &execute() { - test.executeOnly(); - return extractTensor(); + return extractTensor(1); } }; diff --git a/searchlib/src/tests/fef/featureoverride/featureoverride.cpp b/searchlib/src/tests/fef/featureoverride/featureoverride.cpp index 80389064a8f..e77e50a99c8 100644 --- a/searchlib/src/tests/fef/featureoverride/featureoverride.cpp +++ b/searchlib/src/tests/fef/featureoverride/featureoverride.cpp @@ -97,10 +97,10 @@ TEST_F("test decorator - transitive override", Fixture) FeatureExecutor *fe2 = &stash.create<DoubleExecutor>(3); fe2 = &stash.create<FeatureOverrider>(*fe2, 2, 10.0); - auto inputs = stash.create_array<const NumberOrObject *>(3); - inputs[0] = fe->outputs().get_raw(0); - inputs[1] = fe->outputs().get_raw(1); - inputs[2] = fe->outputs().get_raw(2); + auto inputs = stash.create_array<LazyValue>(3, nullptr); + inputs[0] = LazyValue(fe->outputs().get_raw(0), fe); + inputs[1] = LazyValue(fe->outputs().get_raw(1), fe); + inputs[2] = LazyValue(fe->outputs().get_raw(2), fe); fe2->bind_inputs(inputs); f.add(fe2, 3).run(); EXPECT_EQUAL(fe2->outputs().size(), 3u); @@ -143,9 +143,8 @@ TEST("test overrides") overrides.add("bogus(feature)", "10.0"); rankProgram->setup(mdl, queryEnv, overrides); - rankProgram->run(2); - std::map<vespalib::string, feature_t> res = Utils::getAllFeatures(*rankProgram); + std::map<vespalib::string, feature_t> res = Utils::getAllFeatures(*rankProgram, 2); EXPECT_EQUAL(res.size(), 20u); EXPECT_APPROX(res["value(1)"], 1.0, 1e-6); diff --git a/searchlib/src/tests/fef/object_passing/object_passing_test.cpp b/searchlib/src/tests/fef/object_passing/object_passing_test.cpp index 53f9b028e9d..d62be9b42fb 100644 --- a/searchlib/src/tests/fef/object_passing/object_passing_test.cpp +++ b/searchlib/src/tests/fef/object_passing/object_passing_test.cpp @@ -91,11 +91,10 @@ struct Fixture { Properties overrides; RankProgram program(resolver); program.setup(mdl, queryEnv, overrides); - program.run(1); auto result = program.get_seeds(); EXPECT_EQUAL(1u, result.num_features()); EXPECT_TRUE(!result.is_object(0)); // verifies auto-unboxing - return *result.resolve_number(0); + return result.resolve(0).as_number(1); } bool verify(const vespalib::string &feature) { diff --git a/searchlib/src/tests/fef/rank_program/rank_program_test.cpp b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp index b70b78d8d28..91c266f2745 100644 --- a/searchlib/src/tests/fef/rank_program/rank_program_test.cpp +++ b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp @@ -13,19 +13,47 @@ using namespace search::fef; using namespace search::fef::test; using namespace search::features; -size_t count_unique_features(const RankProgram &program) { - std::set<const NumberOrObject *> seen; +uint32_t default_docid = 1; + +void maybe_insert(const LazyValue &value, std::vector<LazyValue> &seen) { + for (const auto &entry: seen) { + if (value.is_same(entry)) { + return; + } + } + seen.push_back(value); +} + +std::vector<LazyValue> get_features(const RankProgram &program) { + std::vector<LazyValue> seen; auto unboxed = program.get_all_features(true); for (size_t i = 0; i < unboxed.num_features(); ++i) { - // fprintf(stderr, "seen feature (unboxed): %s\n", unboxed.name_of(i).c_str()); - seen.insert(unboxed.resolve_raw(i)); + maybe_insert(unboxed.resolve(i), seen); } auto maybe_boxed = program.get_all_features(false); for (size_t i = 0; i < maybe_boxed.num_features(); ++i) { - // fprintf(stderr, "seen feature (maybe boxed): %s\n", maybe_boxed.name_of(i).c_str()); - seen.insert(maybe_boxed.resolve_raw(i)); + maybe_insert(maybe_boxed.resolve(i), seen); } - return seen.size(); + return seen; +} + +template <typename Predicate> +size_t count(const RankProgram &program, Predicate pred) { + size_t cnt = 0; + for (const auto &value: get_features(program)) { + if (pred(value)) { + ++cnt; + } + } + return cnt; +} + +size_t count_features(const RankProgram &program) { + return count(program, [](const LazyValue &){ return true; }); +} + +size_t count_const_features(const RankProgram &program) { + return count(program, [](const LazyValue &value){ return value.is_const(); }); } struct ImpureValueExecutor : FeatureExecutor { @@ -51,6 +79,23 @@ struct ImpureValueBlueprint : Blueprint { } }; +struct DocidExecutor : FeatureExecutor { + void execute(uint32_t docid) override { outputs().set_number(0, docid); } +}; + +struct DocidBlueprint : Blueprint { + DocidBlueprint() : Blueprint("docid") {} + void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const override {} + Blueprint::UP createInstance() const override { return Blueprint::UP(new DocidBlueprint()); } + bool setup(const IIndexEnvironment &, const std::vector<vespalib::string> &) override { + describeOutput("out", "the local document id"); + return true; + } + FeatureExecutor &createExecutor(const IQueryEnvironment &, vespalib::Stash &stash) const override { + return stash.create<DocidExecutor>(); + } +}; + struct MySetup { BlueprintFactory factory; IndexEnvironment indexEnv; @@ -63,6 +108,7 @@ struct MySetup { factory.addPrototype(Blueprint::SP(new ValueBlueprint())); factory.addPrototype(Blueprint::SP(new ImpureValueBlueprint())); factory.addPrototype(Blueprint::SP(new SumBlueprint())); + factory.addPrototype(Blueprint::SP(new DocidBlueprint())); } MySetup &add(const vespalib::string &feature) { resolver->addSeed(feature); @@ -79,20 +125,16 @@ struct MySetup { program.setup(mdl, queryEnv, overrides); return *this; } - MySetup &run() { - program.run(1); - return *this; - } - double get() { + double get(uint32_t docid = default_docid) { auto result = program.get_seeds(); EXPECT_EQUAL(1u, result.num_features()); - return *result.resolve_number(0); + return result.resolve(0).as_number(docid); } double get(const vespalib::string &feature) { auto result = program.get_seeds(); for (size_t i = 0; i < result.num_features(); ++i) { if (result.name_of(i) == feature) { - return *result.resolve_number(i); + return result.resolve(i).as_number(default_docid); } } return 31212.0; @@ -101,41 +143,42 @@ struct MySetup { auto result = program.get_seeds(); std::map<vespalib::string, double> result_map; for (size_t i = 0; i < result.num_features(); ++i) { - result_map[result.name_of(i)] = *result.resolve_number(i); + result_map[result.name_of(i)] = result.resolve(i).as_number(default_docid); } return result_map; } }; TEST_F("require that simple program works", MySetup()) { - EXPECT_EQUAL(15.0, f1.add("mysum(value(10),ivalue(5))").compile().run().get()); + EXPECT_EQUAL(15.0, f1.add("mysum(value(10),ivalue(5))").compile().get()); EXPECT_EQUAL(3u, f1.program.num_executors()); - EXPECT_EQUAL(2u, f1.program.program_size()); + EXPECT_EQUAL(3u, count_features(f1.program)); + EXPECT_EQUAL(1u, count_const_features(f1.program)); } -TEST_F("require that const features are calculated during setup", MySetup()) { +TEST_F("require that const features work", MySetup()) { f1.add("mysum(value(10),value(5))").compile(); EXPECT_EQUAL(15.0, f1.get()); EXPECT_EQUAL(3u, f1.program.num_executors()); - EXPECT_EQUAL(0u, f1.program.program_size()); + EXPECT_EQUAL(3u, count_features(f1.program)); + EXPECT_EQUAL(3u, count_const_features(f1.program)); } -TEST_F("require that non-const features are calculated during run", MySetup()) { +TEST_F("require that non-const features work", MySetup()) { f1.add("mysum(ivalue(10),ivalue(5))").compile(); - EXPECT_EQUAL(0.0, f1.get()); - f1.run(); EXPECT_EQUAL(15.0, f1.get()); EXPECT_EQUAL(3u, f1.program.num_executors()); - EXPECT_EQUAL(3u, f1.program.program_size()); + EXPECT_EQUAL(3u, count_features(f1.program)); + EXPECT_EQUAL(0u, count_const_features(f1.program)); } TEST_F("require that a single program can calculate multiple output features", MySetup()) { f1.add("value(1)").add("ivalue(2)").add("ivalue(3)"); f1.add("mysum(value(1),value(2),ivalue(3))"); - f1.compile().run(); + f1.compile(); EXPECT_EQUAL(5u, f1.program.num_executors()); - EXPECT_EQUAL(3u, f1.program.program_size()); - EXPECT_EQUAL(5u, count_unique_features(f1.program)); + EXPECT_EQUAL(5u, count_features(f1.program)); + EXPECT_EQUAL(2u, count_const_features(f1.program)); auto result = f1.all(); EXPECT_EQUAL(4u, result.size()); EXPECT_EQUAL(1.0, result["value(1)"]); @@ -146,20 +189,20 @@ TEST_F("require that a single program can calculate multiple output features", M TEST_F("require that a single executor can produce multiple features", MySetup()) { f1.add("mysum(value(1,2,3).0,value(1,2,3).1,value(1,2,3).2)"); - EXPECT_EQUAL(6.0, f1.compile().run().get()); + EXPECT_EQUAL(6.0, f1.compile().get()); EXPECT_EQUAL(2u, f1.program.num_executors()); - EXPECT_EQUAL(0u, f1.program.program_size()); - EXPECT_EQUAL(4u, count_unique_features(f1.program)); + EXPECT_EQUAL(4u, count_features(f1.program)); + EXPECT_EQUAL(4u, count_const_features(f1.program)); } TEST_F("require that feature values can be overridden", MySetup()) { f1.add("value(1)").add("ivalue(2)").add("ivalue(3)"); f1.add("mysum(value(1),value(2),ivalue(3))"); f1.override("value(2)", 20.0).override("ivalue(3)", 30.0); - f1.compile().run(); + f1.compile(); EXPECT_EQUAL(5u, f1.program.num_executors()); - EXPECT_EQUAL(3u, f1.program.program_size()); - EXPECT_EQUAL(5u, count_unique_features(f1.program)); + EXPECT_EQUAL(5u, count_features(f1.program)); + EXPECT_EQUAL(2u, count_const_features(f1.program)); auto result = f1.all(); EXPECT_EQUAL(4u, result.size()); EXPECT_EQUAL(1.0, result["value(1)"]); @@ -168,4 +211,14 @@ TEST_F("require that feature values can be overridden", MySetup()) { EXPECT_EQUAL(51.0, result["mysum(value(1),value(2),ivalue(3))"]); } +TEST_F("require that the rank program can calculate scores for multiple documents", MySetup()) { + f1.add("mysum(value(10),docid)").compile(); + EXPECT_EQUAL(3u, count_features(f1.program)); + EXPECT_EQUAL(1u, count_const_features(f1.program)); + EXPECT_EQUAL(f1.get(1), 11.0); + EXPECT_EQUAL(f1.get(2), 12.0); + EXPECT_EQUAL(f1.get(3), 13.0); + EXPECT_EQUAL(f1.get(1), 11.0); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/ranksetup/ranksetup_test.cpp b/searchlib/src/tests/ranksetup/ranksetup_test.cpp index 22c6478aeb9..fd34e030d33 100644 --- a/searchlib/src/tests/ranksetup/ranksetup_test.cpp +++ b/searchlib/src/tests/ranksetup/ranksetup_test.cpp @@ -103,7 +103,7 @@ public: _initRank(initRank), _finalRank(finalRank), _rankEnv(rankEnv), _layout(), _rs(), _firstPhaseProgram(), _secondPhaseProgram() {} bool setup(); - RankResult execute(uint32_t docId = 0); + RankResult execute(uint32_t docId = 1); }; bool @@ -136,12 +136,10 @@ RankResult RankExecutor::execute(uint32_t docId) { RankResult result; - _firstPhaseProgram->run(docId); - result.addScore(_initRank, *Utils::getScoreFeature(*_firstPhaseProgram)); + result.addScore(_initRank, Utils::getScoreFeature(*_firstPhaseProgram, docId)); if (_secondPhaseProgram.get() != nullptr) { - _secondPhaseProgram->run(docId); - result.addScore(_finalRank, *Utils::getScoreFeature(*_secondPhaseProgram)); + result.addScore(_finalRank, Utils::getScoreFeature(*_secondPhaseProgram, docId)); } return result; @@ -198,8 +196,7 @@ FeatureDumper::setup() RankResult FeatureDumper::dump() { - _rankProgram->run(1); - std::map<vespalib::string, feature_t> features = Utils::getSeedFeatures(*_rankProgram); + std::map<vespalib::string, feature_t> features = Utils::getSeedFeatures(*_rankProgram, 1); RankResult retval; for (auto itr = features.begin(); itr != features.end(); ++itr) { retval.addScore(itr->first, itr->second); @@ -230,10 +227,10 @@ private: void testCompilation(); void testRankSetup(); bool testExecution(const vespalib::string & initRank, feature_t initScore, - const vespalib::string & finalRank = "", feature_t finalScore = 0.0f, uint32_t docId = 0); + const vespalib::string & finalRank = "", feature_t finalScore = 0.0f, uint32_t docId = 1); bool testExecution(const RankEnvironment &rankEnv, const vespalib::string & initRank, feature_t initScore, - const vespalib::string & finalRank = "", feature_t finalScore = 0.0f, uint32_t docId = 0); + const vespalib::string & finalRank = "", feature_t finalScore = 0.0f, uint32_t docId = 1); void testExecution(); void testFeatureDump(); @@ -582,7 +579,7 @@ RankSetupTest::testExecution() { // static rank executor vespalib::string sr1 = "staticrank(staticrank1)"; vespalib::string sr2 = "staticrank(staticrank2)"; - for (uint32_t i = 0; i < 5; ++i) { + for (uint32_t i = 1; i < 5; ++i) { EXPECT_TRUE(testExecution(sr1, static_cast<feature_t>(i + 100), sr2, static_cast<feature_t>(i + 200), i)); } @@ -786,21 +783,18 @@ RankSetupTest::testFeatureNormalization() secondPhaseProgram->setup(layout, queryEnv); summaryProgram->setup(layout, queryEnv); - firstPhaseProgram->run(1); - EXPECT_APPROX(2.0, *Utils::getScoreFeature(*firstPhaseProgram), 0.001); - secondPhaseProgram->run(1); - EXPECT_APPROX(4.0, *Utils::getScoreFeature(*secondPhaseProgram), 0.001); - summaryProgram->run(1); + EXPECT_APPROX(2.0, Utils::getScoreFeature(*firstPhaseProgram, 1), 0.001); + EXPECT_APPROX(4.0, Utils::getScoreFeature(*secondPhaseProgram, 1), 0.001); { // rank seed features - std::map<vespalib::string, feature_t> actual = Utils::getSeedFeatures(*summaryProgram); + std::map<vespalib::string, feature_t> actual = Utils::getSeedFeatures(*summaryProgram, 1); std::map<vespalib::string, feature_t> exp; exp["mysum(value(5),value(5))"] = 10.0; exp["mysum(\"value( 5 )\",\"value( 5 )\")"] = 10.0; TEST_DO(checkFeatures(exp, actual)); } { // all rank features (1. phase) - std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*firstPhaseProgram); + std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*firstPhaseProgram, 1); std::map<vespalib::string, feature_t> exp; exp["value(1)"] = 1.0; exp["value(1).0"] = 1.0; @@ -809,7 +803,7 @@ RankSetupTest::testFeatureNormalization() TEST_DO(checkFeatures(exp, actual)); } { // all rank features (2. phase) - std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*secondPhaseProgram); + std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*secondPhaseProgram, 1); std::map<vespalib::string, feature_t> exp; exp["value(2)"] = 2.0; exp["value(2).0"] = 2.0; @@ -818,7 +812,7 @@ RankSetupTest::testFeatureNormalization() TEST_DO(checkFeatures(exp, actual)); } { // all rank features (summary) - std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*summaryProgram); + std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*summaryProgram, 1); std::map<vespalib::string, feature_t> exp; exp["value(5)"] = 5.0; exp["value(5).0"] = 5.0; @@ -835,10 +829,9 @@ RankSetupTest::testFeatureNormalization() QueryEnvironment queryEnv; RankProgram::UP rankProgram = rankSetup.create_dump_program(); rankProgram->setup(layout, queryEnv); - rankProgram->run(1); { // dump seed features - std::map<vespalib::string, feature_t> actual = Utils::getSeedFeatures(*rankProgram); + std::map<vespalib::string, feature_t> actual = Utils::getSeedFeatures(*rankProgram, 1); std::map<vespalib::string, feature_t> exp; exp["mysum(value(10),value(10))"] = 20.0; exp["mysum(\"value( 10 )\",\"value( 10 )\")"] = 20.0; @@ -846,7 +839,7 @@ RankSetupTest::testFeatureNormalization() } { // all dump features - std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*rankProgram); + std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*rankProgram, 1); std::map<vespalib::string, feature_t> exp; exp["value(10)"] = 10.0; diff --git a/searchlib/src/vespa/searchlib/fef/feature_resolver.h b/searchlib/src/vespa/searchlib/fef/feature_resolver.h index 97e2295e570..0280106b133 100644 --- a/searchlib/src/vespa/searchlib/fef/feature_resolver.h +++ b/searchlib/src/vespa/searchlib/fef/feature_resolver.h @@ -5,12 +5,11 @@ #include "number_or_object.h" #include <vespa/vespalib/stllike/string.h> #include <vector> +#include "featureexecutor.h" namespace search { namespace fef { -class RankProgram; - /** * A FeatureResolver knowns the name and memory location of values * calculated by a RankProgram. Note that objects of this class will @@ -20,7 +19,7 @@ class FeatureResolver { private: std::vector<vespalib::string> _names; - std::vector<const NumberOrObject *> _features; + std::vector<LazyValue> _features; std::vector<bool> _is_object; public: FeatureResolver(size_t size_hint) : _names(), _features(), _is_object() { @@ -28,7 +27,7 @@ public: _features.reserve(size_hint); _is_object.reserve(size_hint); } - void add(const vespalib::string &name, const NumberOrObject *feature, bool is_object) { + void add(const vespalib::string &name, LazyValue feature, bool is_object) { _names.push_back(name); _features.push_back(feature); _is_object.push_back(is_object); @@ -36,9 +35,7 @@ public: size_t num_features() const { return _names.size(); } const vespalib::string &name_of(size_t i) const { return _names[i]; } bool is_object(size_t i) const { return _is_object[i]; } - const feature_t *resolve_number(size_t i) const { return &(_features[i]->as_number); } - const vespalib::eval::Value::CREF *resolve_object(size_t i) const { return &(_features[i]->as_object); } - const NumberOrObject *resolve_raw(size_t i) const { return _features[i]; } + LazyValue resolve(size_t i) const { return _features[i]; } }; } // namespace fef diff --git a/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp b/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp index dc5fa6fe92e..8aa8227ad0b 100644 --- a/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp +++ b/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp @@ -18,7 +18,7 @@ FeatureExecutor::isPure() } void -FeatureExecutor::handle_bind_inputs(vespalib::ConstArrayRef<const NumberOrObject *>) +FeatureExecutor::handle_bind_inputs(vespalib::ConstArrayRef<LazyValue>) { } @@ -33,7 +33,7 @@ FeatureExecutor::handle_bind_match_data(MatchData &) } void -FeatureExecutor::bind_inputs(vespalib::ConstArrayRef<const NumberOrObject *> inputs) +FeatureExecutor::bind_inputs(vespalib::ConstArrayRef<LazyValue> inputs) { _inputs.bind(inputs); handle_bind_inputs(inputs); diff --git a/searchlib/src/vespa/searchlib/fef/featureexecutor.h b/searchlib/src/vespa/searchlib/fef/featureexecutor.h index f7645675112..117e0b95194 100644 --- a/searchlib/src/vespa/searchlib/fef/featureexecutor.h +++ b/searchlib/src/vespa/searchlib/fef/featureexecutor.h @@ -14,6 +14,32 @@ namespace search { namespace fef { +class FeatureExecutor; + +/** + * A LazyValue is a reference to a value that can be calculated by a + * FeatureExecutor when needed. Actual Values and FeatureExecutors are + * owned by a RankProgram. LazyValue objects are used when resolving + * value dependencies between FeatureExecutors inside a RankProgram + * and when a client wants to access values from the outside, + * typically during ranking and when producing summary features. + **/ +class LazyValue { +private: + const NumberOrObject *_value; + FeatureExecutor *_executor; +public: + explicit LazyValue(const NumberOrObject *value) : _value(value), _executor(nullptr) {} + LazyValue(const NumberOrObject *value, FeatureExecutor *executor) + : _value(value), _executor(executor) {} + bool is_const() const { return (_executor == nullptr); } + bool is_same(const LazyValue &rhs) const { + return ((_value == rhs._value) && (_executor == rhs._executor)); + } + inline double as_number(uint32_t docid) const; + inline vespalib::eval::Value::CREF as_object(uint32_t docid) const; +}; + /** * A feature executor is a general component that calculates one or * more feature values. It may take multiple features as input. A @@ -24,19 +50,15 @@ class FeatureExecutor { public: class Inputs { - vespalib::ConstArrayRef<const NumberOrObject *> _inputs; + uint32_t _docid; + vespalib::ConstArrayRef<LazyValue> _inputs; public: - Inputs() : _inputs() {} - void bind(vespalib::ConstArrayRef<const NumberOrObject *> inputs) { _inputs = inputs; } - feature_t get_number(size_t idx) const { - return _inputs[idx]->as_number; - } - vespalib::eval::Value::CREF get_object(size_t idx) const { - return _inputs[idx]->as_object; - } - const NumberOrObject *get_raw(size_t idx) const { - return _inputs[idx]; - } + Inputs() : _docid(0), _inputs() {} + void set_docid(uint32_t docid) { _docid = docid; } + uint32_t get_docid() const { return _docid; } + void bind(vespalib::ConstArrayRef<LazyValue> inputs) { _inputs = inputs; } + inline feature_t get_number(size_t idx) const; + inline vespalib::eval::Value::CREF get_object(size_t idx) const; size_t size() const { return _inputs.size(); } }; @@ -77,7 +99,7 @@ private: Outputs _outputs; protected: - virtual void handle_bind_inputs(vespalib::ConstArrayRef<const NumberOrObject *> inputs); + virtual void handle_bind_inputs(vespalib::ConstArrayRef<LazyValue> inputs); virtual void handle_bind_outputs(vespalib::ArrayRef<NumberOrObject> outputs); virtual void handle_bind_match_data(MatchData &md); @@ -89,7 +111,7 @@ public: FeatureExecutor(); // bind order per executor: inputs, outputs, match_data - void bind_inputs(vespalib::ConstArrayRef<const NumberOrObject *> inputs); + void bind_inputs(vespalib::ConstArrayRef<LazyValue> inputs); void bind_outputs(vespalib::ArrayRef<NumberOrObject> outputs); void bind_match_data(MatchData &md); @@ -121,12 +143,41 @@ public: **/ virtual void execute(uint32_t docId) = 0; + void lazy_execute(uint32_t docid) { + if (_inputs.get_docid() != docid) { + _inputs.set_docid(docid); + execute(docid); + } + } + /** * Virtual destructor to allow subclassing. **/ virtual ~FeatureExecutor() {} }; +double LazyValue::as_number(uint32_t docid) const { + if (_executor != nullptr) { + _executor->lazy_execute(docid); + } + return _value->as_number; +} + +vespalib::eval::Value::CREF LazyValue::as_object(uint32_t docid) const { + if (_executor != nullptr) { + _executor->lazy_execute(docid); + } + return _value->as_object; +} + +feature_t FeatureExecutor::Inputs::get_number(size_t idx) const { + return _inputs[idx].as_number(_docid); +} + +vespalib::eval::Value::CREF FeatureExecutor::Inputs::get_object(size_t idx) const { + return _inputs[idx].as_object(_docid); +} + } // namespace fef } // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp b/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp index a4e0f58b590..c4557d48d2e 100644 --- a/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp +++ b/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp @@ -7,7 +7,7 @@ namespace search { namespace fef { void -FeatureOverrider::handle_bind_inputs(vespalib::ConstArrayRef<const NumberOrObject *> inputs) +FeatureOverrider::handle_bind_inputs(vespalib::ConstArrayRef<LazyValue> inputs) { _executor.bind_inputs(inputs); } diff --git a/searchlib/src/vespa/searchlib/fef/featureoverrider.h b/searchlib/src/vespa/searchlib/fef/featureoverrider.h index 2d942914fcc..876fbe23e96 100644 --- a/searchlib/src/vespa/searchlib/fef/featureoverrider.h +++ b/searchlib/src/vespa/searchlib/fef/featureoverrider.h @@ -25,7 +25,7 @@ private: feature_t _value; virtual void handle_bind_match_data(MatchData &md) override; - virtual void handle_bind_inputs(vespalib::ConstArrayRef<const NumberOrObject *> inputs) override; + virtual void handle_bind_inputs(vespalib::ConstArrayRef<LazyValue> inputs) override; virtual void handle_bind_outputs(vespalib::ArrayRef<NumberOrObject> outputs) override; public: diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.cpp b/searchlib/src/vespa/searchlib/fef/rank_program.cpp index cb19e3ca5b0..abc6deb8618 100644 --- a/searchlib/src/vespa/searchlib/fef/rank_program.cpp +++ b/searchlib/src/vespa/searchlib/fef/rank_program.cpp @@ -13,7 +13,8 @@ using vespalib::Stash; namespace search { namespace fef { -using MappedValues = std::map<const NumberOrObject *, const NumberOrObject *>; +using MappedValues = std::map<const NumberOrObject *, LazyValue>; +using ValueSet = std::set<const NumberOrObject *>; namespace { @@ -61,27 +62,10 @@ std::vector<Override> prepare_overrides(const BlueprintResolver::FeatureMap &fea } struct UnboxingExecutor : FeatureExecutor { - const NumberOrObject &input; - NumberOrObject &output; - UnboxingExecutor(const NumberOrObject &input_in, NumberOrObject &output_in) - : input(input_in), output(output_in) {} - void execute(uint32_t) override { output.as_number = input.as_object.get().as_double(); } -}; - -class Features { -private: - vespalib::ArrayRef<NumberOrObject> _features; - size_t _used; -public: - explicit Features(vespalib::ArrayRef<NumberOrObject> features) - : _features(features), _used(0) {} - vespalib::ArrayRef<NumberOrObject> alloc(size_t cnt) { - assert((_used + cnt) <= _features.size()); - NumberOrObject *begin = &_features[_used]; - _used += cnt; - return vespalib::ArrayRef<NumberOrObject>(begin, cnt); + bool isPure() override { return true; } + void execute(uint32_t) override { + outputs().set_number(0, inputs().get_object(0).get().as_double()); } - bool is_full() const { return (_used == _features.size()); } }; class StashSelector { @@ -102,101 +86,83 @@ public: } }; -class ProgramBuilder { -private: - std::vector<FeatureExecutor *> _program; - std::set<const NumberOrObject *> _is_calculated; -public: - ProgramBuilder() : _program(), _is_calculated() {} - bool is_calculated(const NumberOrObject *raw_value) const { - return (_is_calculated.count(raw_value) == 1); - } - void add(FeatureExecutor *executor, bool is_const) { - if (is_const) { - executor->execute(1); - const auto &outputs = executor->outputs(); - for (size_t out_idx = 0; out_idx < outputs.size(); ++out_idx) { - _is_calculated.insert(outputs.get_raw(out_idx)); - } - } else { - _program.push_back(executor); - } - } - void unbox(const NumberOrObject &input, NumberOrObject &output, Stash &stash) { - if (is_calculated(&input)) { - output.as_number = input.as_object.get().as_double(); - } else { - _program.push_back(&stash.create<UnboxingExecutor>(input, output)); - } - } - const std::vector<FeatureExecutor *> &get() const { return _program; } -}; +} // namespace search::fef::<unnamed> -bool executor_is_const(FeatureExecutor *executor, - const ProgramBuilder &program, - const std::vector<FeatureExecutor *> &executors, - const std::vector<BlueprintResolver::FeatureRef> &inputs) +bool +RankProgram::check_const(FeatureExecutor *executor, const std::vector<BlueprintResolver::FeatureRef> &inputs) const { if (!executor->isPure()) { return false; } for (const auto &ref: inputs) { - if (!program.is_calculated(executors[ref.executor]->outputs().get_raw(ref.output))) { + if (!check_const(_executors[ref.executor]->outputs().get_raw(ref.output))) { return false; } } - return true; + return true; } -size_t count_features(const BlueprintResolver &resolver) { - size_t cnt = 0; - const auto &specs = resolver.getExecutorSpecs(); - for (const auto &entry: specs) { - cnt += entry.output_types.size(); // normal outputs +void +RankProgram::run_const(FeatureExecutor *executor) +{ + executor->execute(1); + const auto &outputs = executor->outputs(); + for (size_t out_idx = 0; out_idx < outputs.size(); ++out_idx) { + _is_const.insert(outputs.get_raw(out_idx)); } - for (const auto &seed_entry: resolver.getSeedMap()) { - auto seed = seed_entry.second; - if (specs[seed.executor].output_types[seed.output]) { - ++cnt; // unboxed seeds - } +} + +void +RankProgram::unbox(BlueprintResolver::FeatureRef seed) +{ + FeatureExecutor *input_executor = _executors[seed.executor]; + const NumberOrObject *input_value = input_executor->outputs().get_raw(seed.output); + vespalib::ArrayRef<NumberOrObject> outputs = _hot_stash.create_array<NumberOrObject>(1); + if (check_const(input_value)) { + outputs[0].as_number = input_value->as_object.get().as_double(); + _unboxed_seeds.emplace(input_value, LazyValue(&outputs[0])); + } else { + vespalib::ArrayRef<LazyValue> inputs = _hot_stash.create_array<LazyValue>(1, input_value, input_executor); + FeatureExecutor &unboxer = _hot_stash.create<UnboxingExecutor>(); + unboxer.bind_inputs(inputs); + unboxer.bind_outputs(outputs); + unboxer.bind_match_data(*_match_data); + _unboxed_seeds.emplace(input_value, LazyValue(&outputs[0], &unboxer)); } - return cnt; } -FeatureResolver resolve(const BlueprintResolver::FeatureMap &features, - const BlueprintResolver::ExecutorSpecList &specs, - const std::vector<FeatureExecutor *> &executors, - const MappedValues &unboxed_seeds, - bool unbox_seeds) +FeatureResolver +RankProgram::resolve(const BlueprintResolver::FeatureMap &features, bool unbox_seeds) const { FeatureResolver result(features.size()); + const auto &specs = _resolver->getExecutorSpecs(); for (const auto &entry: features) { const auto &name = entry.first; auto ref = entry.second; bool is_object = specs[ref.executor].output_types[ref.output]; - const NumberOrObject *raw_value = executors[ref.executor]->outputs().get_raw(ref.output); + FeatureExecutor *executor = _executors[ref.executor]; + const NumberOrObject *raw_value = executor->outputs().get_raw(ref.output); + LazyValue lazy_value = check_const(raw_value) ? LazyValue(raw_value) : LazyValue(raw_value, executor); if (is_object && unbox_seeds) { - auto pos = unboxed_seeds.find(raw_value); - if (pos != unboxed_seeds.end()) { - raw_value = pos->second; + auto pos = _unboxed_seeds.find(raw_value); + if (pos != _unboxed_seeds.end()) { + lazy_value = pos->second; is_object = false; } } - result.add(name, raw_value, is_object); + result.add(name, lazy_value, is_object); } return result; } -} // namespace search::fef::<unnamed> - RankProgram::RankProgram(BlueprintResolver::SP resolver) : _resolver(resolver), _match_data(), _hot_stash(32768), _cold_stash(), - _program(), _executors(), - _unboxed_seeds() + _unboxed_seeds(), + _is_const() { } @@ -211,25 +177,29 @@ RankProgram::setup(const MatchDataLayout &mdl_in, auto override = overrides.begin(); auto override_end = overrides.end(); - ProgramBuilder program; - Features features(_hot_stash.create_array<NumberOrObject>(count_features(*_resolver))); const auto &specs = _resolver->getExecutorSpecs(); for (uint32_t i = 0; i < specs.size(); ++i) { + vespalib::ArrayRef<NumberOrObject> outputs = _hot_stash.create_array<NumberOrObject>(specs[i].output_types.size()); StashSelector stash(_hot_stash, _cold_stash); FeatureExecutor *executor = &(specs[i].blueprint->createExecutor(queryEnv, stash.get())); - bool is_const = executor_is_const(executor, program, _executors, specs[i].inputs); + bool is_const = check_const(executor, specs[i].inputs); if (is_const) { stash.use_secondary(); executor = &(specs[i].blueprint->createExecutor(queryEnv, stash.get())); is_const = executor->isPure(); } size_t num_inputs = specs[i].inputs.size(); - vespalib::ArrayRef<const NumberOrObject *> inputs = stash.get().create_array<const NumberOrObject *>(num_inputs); + vespalib::ArrayRef<LazyValue> inputs = stash.get().create_array<LazyValue>(num_inputs, nullptr); for (size_t input_idx = 0; input_idx < num_inputs; ++input_idx) { auto ref = specs[i].inputs[input_idx]; - inputs[input_idx] = _executors[ref.executor]->outputs().get_raw(ref.output); + FeatureExecutor *input_executor = _executors[ref.executor]; + const NumberOrObject *input_value = input_executor->outputs().get_raw(ref.output); + if (check_const(input_value)) { + inputs[input_idx] = LazyValue(input_value); + } else { + inputs[input_idx] = LazyValue(input_value, input_executor); + } } - vespalib::ArrayRef<NumberOrObject> outputs = features.alloc(specs[i].output_types.size()); for (; (override < override_end) && (override->ref.executor == i); ++override) { FeatureExecutor *tmp = executor; executor = &(stash.get().create<FeatureOverrider>(*tmp, override->ref.output, override->value)); @@ -238,32 +208,29 @@ RankProgram::setup(const MatchDataLayout &mdl_in, executor->bind_outputs(outputs); executor->bind_match_data(*_match_data); _executors.push_back(executor); - program.add(executor, is_const); + if (is_const) { + run_const(executor); + } } for (const auto &seed_entry: _resolver->getSeedMap()) { auto seed = seed_entry.second; if (specs[seed.executor].output_types[seed.output]) { - const NumberOrObject &input = *_executors[seed.executor]->outputs().get_raw(seed.output); - NumberOrObject &output = features.alloc(1)[0]; - _unboxed_seeds[&input] = &output; - program.unbox(input, output, _hot_stash); + unbox(seed); } } - _program = _hot_stash.copy_array<FeatureExecutor *>(program.get()); assert(_executors.size() == specs.size()); - assert(features.is_full()); } FeatureResolver RankProgram::get_seeds(bool unbox_seeds) const { - return resolve(_resolver->getSeedMap(), _resolver->getExecutorSpecs(), _executors, _unboxed_seeds, unbox_seeds); + return resolve(_resolver->getSeedMap(), unbox_seeds); } FeatureResolver RankProgram::get_all_features(bool unbox_seeds) const { - return resolve(_resolver->getFeatureMap(), _resolver->getExecutorSpecs(), _executors, _unboxed_seeds, unbox_seeds); + return resolve(_resolver->getFeatureMap(), unbox_seeds); } } // namespace fef diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.h b/searchlib/src/vespa/searchlib/fef/rank_program.h index b8f60dac71e..b7552ee8e9e 100644 --- a/searchlib/src/vespa/searchlib/fef/rank_program.h +++ b/searchlib/src/vespa/searchlib/fef/rank_program.h @@ -12,15 +12,21 @@ #include <vector> #include <memory.h> #include <vespa/vespalib/util/array.h> +#include <set> namespace search { namespace fef { /** - * A rank program runs multiple feature executors in a predefined - * order to produce a set of feature values. The rank program owns the - * MatchData used to store unpacked term-field match information and - * feature values used during evaluation. + * A rank program is able to lazily calculate a set of feature + * values. In order to access (and thereby calculate) output features + * you typically use the get_seeds function to resolve the predefined + * set of output features. Each feature value will be wrapped in a + * LazyValue object that can be realized for a specific docid. The + * rank program also owns the MatchData used to store unpacked + * term-field match information. Note that you need unpack any + * relevant posting information into the MatchData object before + * trying to resolve lazy values. **/ class RankProgram { @@ -28,15 +34,22 @@ private: RankProgram(const RankProgram &) = delete; RankProgram &operator=(const RankProgram &) = delete; - using MappedValues = std::map<const NumberOrObject *, const NumberOrObject *>; + using MappedValues = std::map<const NumberOrObject *, LazyValue>; + using ValueSet = std::set<const NumberOrObject *>; - BlueprintResolver::SP _resolver; - MatchData::UP _match_data; - vespalib::Stash _hot_stash; - vespalib::Stash _cold_stash; - vespalib::ArrayRef<FeatureExecutor *> _program; - std::vector<FeatureExecutor *> _executors; - MappedValues _unboxed_seeds; + BlueprintResolver::SP _resolver; + MatchData::UP _match_data; + vespalib::Stash _hot_stash; + vespalib::Stash _cold_stash; + std::vector<FeatureExecutor *> _executors; + MappedValues _unboxed_seeds; + ValueSet _is_const; + + bool check_const(const NumberOrObject *value) const { return (_is_const.count(value) == 1); } + bool check_const(FeatureExecutor *executor, const std::vector<BlueprintResolver::FeatureRef> &inputs) const; + void run_const(FeatureExecutor *executor); + void unbox(BlueprintResolver::FeatureRef seed); + FeatureResolver resolve(const BlueprintResolver::FeatureMap &features, bool unbox_seeds) const; public: typedef std::unique_ptr<RankProgram> UP; @@ -48,7 +61,6 @@ public: **/ RankProgram(BlueprintResolver::SP resolver); - size_t program_size() const { return _program.size(); } size_t num_executors() const { return _executors.size(); } /** @@ -62,9 +74,8 @@ public: const Properties &featureOverrides = Properties()); /** - * Expose the MatchData containing all calculated features. This - * is also used when creating search iterators as it is where all - * iterators should unpack their match information. + * Expose the MatchData used when creating search iterators as it + * is where all iterators should unpack their match information. **/ MatchData &match_data() { return *_match_data; } const MatchData &match_data() const { return *_match_data; } @@ -87,24 +98,6 @@ public: * @params unbox_seeds make sure seeds values are numbers **/ FeatureResolver get_all_features(bool unbox_seeds = true) const; - - /** - * Run this rank program on the current state of the internal - * match data for the given docid. Typically, match data for a - * specific result will be unpacked before calling run. After run - * is called, the wanted results can be extracted using the - * appropriate feature handles. The given docid will be used to - * tag the internal match data container before execution. Match - * data for individual term/field combinations are only considered - * valid if their docid matches that of the match data container. - * - * @param docid the document we are ranking - **/ - void run(uint32_t docid) { - for (FeatureExecutor *executor: _program) { - executor->execute(docid); - } - } }; } // namespace fef diff --git a/searchlib/src/vespa/searchlib/fef/test/featuretest.cpp b/searchlib/src/vespa/searchlib/fef/test/featuretest.cpp index dd8dc0699f5..70f1fcf709d 100644 --- a/searchlib/src/vespa/searchlib/fef/test/featuretest.cpp +++ b/searchlib/src/vespa/searchlib/fef/test/featuretest.cpp @@ -6,6 +6,7 @@ LOG_SETUP(".fef.featuretest"); #include <sstream> #include "featuretest.h" #include <vespa/searchlib/fef/utils.h> +#include <vespa/vespalib/testkit/test_kit.h> namespace search { namespace fef { @@ -113,37 +114,23 @@ FeatureTest::execute(feature_t expected, double epsilon, uint32_t docId) } bool -FeatureTest::executeOnly(uint32_t docId) +FeatureTest::executeOnly(RankResult & result, uint32_t docId) { if (!_doneSetup) { LOG(error, "Setup not done."); return false; } - // Note: match data object is reset as part of run - _rankProgram->run(docId); - - return true; -} - -bool -FeatureTest::executeOnly(RankResult & result, uint32_t docId) -{ - if (!executeOnly(docId)) { - return false; - } - - std::map<vespalib::string, feature_t> all = Utils::getAllFeatures(*_rankProgram); + std::map<vespalib::string, feature_t> all = Utils::getAllFeatures(*_rankProgram, docId); for (auto itr = all.begin(); itr != all.end(); ++itr) { result.addScore(itr->first, itr->second); } - return true; } -const vespalib::eval::Value::CREF * -FeatureTest::resolveObjectFeature() +vespalib::eval::Value::CREF +FeatureTest::resolveObjectFeature(uint32_t docid) { - return Utils::getObjectFeature(*_rankProgram); + return Utils::getObjectFeature(*_rankProgram, docid); } void diff --git a/searchlib/src/vespa/searchlib/fef/test/featuretest.h b/searchlib/src/vespa/searchlib/fef/test/featuretest.h index 1f4605c0622..0c10bbf0c5f 100644 --- a/searchlib/src/vespa/searchlib/fef/test/featuretest.h +++ b/searchlib/src/vespa/searchlib/fef/test/featuretest.h @@ -95,14 +95,6 @@ public: bool execute(feature_t expected, double epsilon = 0, uint32_t docId = 1); /** - * Executes the content of this runner only. - * - * @param docId The document id to set on the match data object before running executors. - * @return Whether the executors were executed. - */ - bool executeOnly(uint32_t docId = 1); - - /** * Executes the content of this runner only and stores the result in the given rank result. * * @param result The rank result to store the rank scores. @@ -115,7 +107,7 @@ public: * Resolve the only object feature that is present in the match data of the underlying * rank program. */ - const vespalib::eval::Value::CREF *resolveObjectFeature(); + vespalib::eval::Value::CREF resolveObjectFeature(uint32_t docid = 1); private: BlueprintFactory &_factory; diff --git a/searchlib/src/vespa/searchlib/fef/test/ftlib.h b/searchlib/src/vespa/searchlib/fef/test/ftlib.h index dff9764b03b..288f70dbdd5 100644 --- a/searchlib/src/vespa/searchlib/fef/test/ftlib.h +++ b/searchlib/src/vespa/searchlib/fef/test/ftlib.h @@ -104,10 +104,9 @@ public: bool setup() { return _test.setup(); } bool execute(feature_t expected, double epsilon = 0, uint32_t docId = 1) { return _test.execute(expected, epsilon, docId); } bool execute(const search::fef::test::RankResult &expected, uint32_t docId = 1) { return _test.execute(expected, docId); } - bool executeOnly(uint32_t docId = 1) { return _test.executeOnly(docId); } bool executeOnly(search::fef::test::RankResult &result, uint32_t docId = 1) { return _test.executeOnly(result, docId); } search::fef::test::MatchDataBuilder::UP createMatchDataBuilder() { return _test.createMatchDataBuilder(); } - const vespalib::eval::Value::CREF *resolveObjectFeature() { return _test.resolveObjectFeature(); } + vespalib::eval::Value::CREF resolveObjectFeature(uint32_t docid = 1) { return _test.resolveObjectFeature(docid); } FtIndexEnvironment &getIndexEnv() { return _indexEnv; } FtQueryEnvironment &getQueryEnv() { return _queryEnv; } diff --git a/searchlib/src/vespa/searchlib/fef/utils.cpp b/searchlib/src/vespa/searchlib/fef/utils.cpp index a5c198c6e0e..70396857759 100644 --- a/searchlib/src/vespa/searchlib/fef/utils.cpp +++ b/searchlib/src/vespa/searchlib/fef/utils.cpp @@ -7,32 +7,34 @@ namespace search { namespace fef { -const feature_t * -Utils::getScoreFeature(const RankProgram &rankProgram) +feature_t +Utils::getScoreFeature(const RankProgram &rankProgram, uint32_t docid) { FeatureResolver resolver(rankProgram.get_seeds(false)); assert(resolver.num_features() == 1u); - return resolver.resolve_number(0); + assert(!resolver.is_object(0)); + return resolver.resolve(0).as_number(docid); } -const vespalib::eval::Value::CREF * -Utils::getObjectFeature(const RankProgram &rankProgram) +vespalib::eval::Value::CREF +Utils::getObjectFeature(const RankProgram &rankProgram, uint32_t docid) { FeatureResolver resolver(rankProgram.get_seeds(false)); assert(resolver.num_features() == 1u); - return resolver.resolve_object(0); + assert(resolver.is_object(0)); + return resolver.resolve(0).as_object(docid); } namespace { std::map<vespalib::string, feature_t> -resolveFeatures(const FeatureResolver &resolver) +resolveFeatures(const FeatureResolver &resolver, uint32_t docid) { std::map<vespalib::string, feature_t> result; size_t numFeatures = resolver.num_features(); for (size_t i = 0; i < numFeatures; ++i) { const vespalib::string &name = resolver.name_of(i); - feature_t value = *(resolver.resolve_number(i)); + feature_t value = resolver.resolve(i).as_number(docid); result.insert(std::make_pair(name, value)); } return result; @@ -41,17 +43,17 @@ resolveFeatures(const FeatureResolver &resolver) } std::map<vespalib::string, feature_t> -Utils::getSeedFeatures(const RankProgram &rankProgram) +Utils::getSeedFeatures(const RankProgram &rankProgram, uint32_t docid) { FeatureResolver resolver(rankProgram.get_seeds()); - return resolveFeatures(resolver); + return resolveFeatures(resolver, docid); } std::map<vespalib::string, feature_t> -Utils::getAllFeatures(const RankProgram &rankProgram) +Utils::getAllFeatures(const RankProgram &rankProgram, uint32_t docid) { FeatureResolver resolver(rankProgram.get_all_features()); - return resolveFeatures(resolver); + return resolveFeatures(resolver, docid); } } // namespace fef diff --git a/searchlib/src/vespa/searchlib/fef/utils.h b/searchlib/src/vespa/searchlib/fef/utils.h index 96c8d51a46f..1728bf83717 100644 --- a/searchlib/src/vespa/searchlib/fef/utils.h +++ b/searchlib/src/vespa/searchlib/fef/utils.h @@ -14,22 +14,22 @@ struct Utils /** * Extract a single score feature from the given rank program. */ - static const feature_t *getScoreFeature(const RankProgram &rankProgram); + static feature_t getScoreFeature(const RankProgram &rankProgram, uint32_t docid); /** * Extract a single object feature from the given rank program. */ - static const vespalib::eval::Value::CREF *getObjectFeature(const RankProgram &rankProgram); + static vespalib::eval::Value::CREF getObjectFeature(const RankProgram &rankProgram, uint32_t docid); /** * Extract all seed feature values from the given rank program. **/ - static std::map<vespalib::string, feature_t> getSeedFeatures(const RankProgram &rankProgram); + static std::map<vespalib::string, feature_t> getSeedFeatures(const RankProgram &rankProgram, uint32_t docid); /** * Extract all feature values from the given rank program. **/ - static std::map<vespalib::string, feature_t> getAllFeatures(const RankProgram &rankProgram); + static std::map<vespalib::string, feature_t> getAllFeatures(const RankProgram &rankProgram, uint32_t docid); }; |