From 0885e78faf297b939fa1c5dfa63c7b4afc308850 Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Tue, 2 Nov 2021 13:39:24 +0000 Subject: added match features to rank setup --- searchlib/src/tests/ranksetup/ranksetup_test.cpp | 22 +++++++++++++++++++++ .../src/vespa/searchlib/fef/indexproperties.cpp | 13 ++++++++++++ .../src/vespa/searchlib/fef/indexproperties.h | 14 +++++++++++++ searchlib/src/vespa/searchlib/fef/ranksetup.cpp | 19 ++++++++++++++++++ searchlib/src/vespa/searchlib/fef/ranksetup.h | 23 ++++++++++++++++++++++ 5 files changed, 91 insertions(+) (limited to 'searchlib') diff --git a/searchlib/src/tests/ranksetup/ranksetup_test.cpp b/searchlib/src/tests/ranksetup/ranksetup_test.cpp index 51f998d0793..ddd3f946cc5 100644 --- a/searchlib/src/tests/ranksetup/ranksetup_test.cpp +++ b/searchlib/src/tests/ranksetup/ranksetup_test.cpp @@ -503,6 +503,8 @@ void RankSetupTest::testRankSetup() IndexEnvironment env; env.getProperties().add(rank::FirstPhase::NAME, "firstphase"); env.getProperties().add(rank::SecondPhase::NAME, "secondphase"); + env.getProperties().add(match::Feature::NAME, "match_foo"); + env.getProperties().add(match::Feature::NAME, "match_bar"); env.getProperties().add(dump::Feature::NAME, "foo"); env.getProperties().add(dump::Feature::NAME, "bar"); env.getProperties().add(matching::NumThreadsPerSearch::NAME, "3"); @@ -532,9 +534,14 @@ void RankSetupTest::testRankSetup() env.getProperties().add(mutate::on_summary::Operation::NAME, "-=2"); RankSetup rs(_factory, env); + EXPECT_FALSE(rs.has_match_features()); rs.configure(); EXPECT_EQUAL(rs.getFirstPhaseRank(), vespalib::string("firstphase")); EXPECT_EQUAL(rs.getSecondPhaseRank(), vespalib::string("secondphase")); + EXPECT_TRUE(rs.has_match_features()); + ASSERT_TRUE(rs.get_match_features().size() == 2); + EXPECT_EQUAL(rs.get_match_features()[0], vespalib::string("match_foo")); + EXPECT_EQUAL(rs.get_match_features()[1], vespalib::string("match_bar")); ASSERT_TRUE(rs.getDumpFeatures().size() == 2); EXPECT_EQUAL(rs.getDumpFeatures()[0], vespalib::string("foo")); EXPECT_EQUAL(rs.getDumpFeatures()[1], vespalib::string("bar")); @@ -804,6 +811,8 @@ RankSetupTest::testFeatureNormalization() rankSetup.setFirstPhaseRank(" mysum ( value ( 1 ) , value ( 1 ) ) "); rankSetup.setSecondPhaseRank(" mysum ( value ( 2 ) , value ( 2 ) ) "); + rankSetup.add_match_feature(" mysum ( value ( 3 ) , value ( 3 ) ) "); + rankSetup.add_match_feature(" mysum ( \"value( 3 )\" , \"value( 3 )\" ) "); rankSetup.addSummaryFeature(" mysum ( value ( 5 ) , value ( 5 ) ) "); rankSetup.addSummaryFeature(" mysum ( \"value( 5 )\" , \"value( 5 )\" ) "); rankSetup.addDumpFeature(" mysum ( value ( 10 ) , value ( 10 ) ) "); @@ -817,9 +826,11 @@ RankSetupTest::testFeatureNormalization() MatchData::UP match_data = layout.createMatchData(); RankProgram::UP firstPhaseProgram = rankSetup.create_first_phase_program(); RankProgram::UP secondPhaseProgram = rankSetup.create_second_phase_program(); + RankProgram::UP match_program = rankSetup.create_match_program(); RankProgram::UP summaryProgram = rankSetup.create_summary_program(); firstPhaseProgram->setup(*match_data, queryEnv); secondPhaseProgram->setup(*match_data, queryEnv); + match_program->setup(*match_data, queryEnv); summaryProgram->setup(*match_data, queryEnv); EXPECT_APPROX(2.0, Utils::getScoreFeature(*firstPhaseProgram, 1), 0.001); @@ -850,6 +861,17 @@ RankSetupTest::testFeatureNormalization() exp["mysum(value(2),value(2)).out"] = 4.0; TEST_DO(checkFeatures(exp, actual)); } + { // all match features + std::map actual = Utils::getAllFeatures(*match_program, 1); + std::map exp; + exp["value(3)"] = 3.0; + exp["value(3).0"] = 3.0; + exp["mysum(value(3),value(3))"] = 6.0; + exp["mysum(value(3),value(3)).out"] = 6.0; + exp["mysum(\"value( 3 )\",\"value( 3 )\")"] = 6.0; + exp["mysum(\"value( 3 )\",\"value( 3 )\").out"] = 6.0; + TEST_DO(checkFeatures(exp, actual)); + } { // all rank features (summary) std::map actual = Utils::getAllFeatures(*summaryProgram, 1); std::map exp; diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index 604b566df2b..f3794d07102 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -248,6 +248,19 @@ namespace on_summary { } } +namespace match { + +const vespalib::string Feature::NAME("vespa.match.feature"); +const std::vector Feature::DEFAULT_VALUE; + +std::vector +Feature::lookup(const Properties &props) +{ + return lookupStringVector(props, NAME, DEFAULT_VALUE); +} + +} // namespace match + namespace summary { const vespalib::string Feature::NAME("vespa.summary.feature"); diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 3b8447c2a61..1283620f9f0 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -57,6 +57,20 @@ namespace rank { } // namespace rank +namespace match { + + /** + * Property for the set of features to be inserted into the search + * reply (match features). + **/ + struct Feature { + static const vespalib::string NAME; + static const std::vector DEFAULT_VALUE; + static std::vector lookup(const Properties &props); + }; + +} // namespace match + namespace summary { /** diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 80be1299fe7..ec05c0ddcb4 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -29,6 +29,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _indexEnv(indexEnv), _first_phase_resolver(std::make_shared(factory, indexEnv)), _second_phase_resolver(std::make_shared(factory, indexEnv)), + _match_resolver(std::make_shared(factory, indexEnv)), _summary_resolver(std::make_shared(factory, indexEnv)), _dumpResolver(std::make_shared(factory, indexEnv)), _firstPhaseRankFeature(), @@ -49,6 +50,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _degradationSamplePercentage(0.2), _degradationPostFilterMultiplier(1.0), _rankScoreDropLimit(0), + _match_features(), _summaryFeatures(), _dumpFeatures(), _ignoreDefaultRankFeatures(false), @@ -78,6 +80,9 @@ RankSetup::configure() { setFirstPhaseRank(rank::FirstPhase::lookup(_indexEnv.getProperties())); setSecondPhaseRank(rank::SecondPhase::lookup(_indexEnv.getProperties())); + for (const auto &feature: match::Feature::lookup(_indexEnv.getProperties())) { + add_match_feature(feature); + } std::vector summaryFeatures = summary::Feature::lookup(_indexEnv.getProperties()); for (const auto & feature : summaryFeatures) { addSummaryFeature(feature); @@ -138,6 +143,13 @@ RankSetup::setSecondPhaseRank(const vespalib::string &featureName) _secondPhaseRankFeature = featureName; } +void +RankSetup::add_match_feature(const vespalib::string &match_feature) +{ + LOG_ASSERT(!_compiled); + _match_features.push_back(match_feature); +} + void RankSetup::addSummaryFeature(const vespalib::string &summaryFeature) { @@ -178,6 +190,9 @@ RankSetup::compile() _compileError = true; } } + for (const auto &feature: _match_features) { + _match_resolver->addSeed(feature); + } for (const auto & feature :_summaryFeatures) { _summary_resolver->addSeed(feature); } @@ -191,6 +206,7 @@ RankSetup::compile() _indexEnv.hintFeatureMotivation(IIndexEnvironment::RANK); _compileError |= !_first_phase_resolver->compile(); _compileError |= !_second_phase_resolver->compile(); + _compileError |= !_match_resolver->compile(); _compileError |= !_summary_resolver->compile(); _indexEnv.hintFeatureMotivation(IIndexEnvironment::DUMP); _compileError |= !_dumpResolver->compile(); @@ -208,6 +224,9 @@ RankSetup::prepareSharedState(const IQueryEnvironment &queryEnv, IObjectStore &o for (const auto &spec : _second_phase_resolver->getExecutorSpecs()) { spec.blueprint->prepareSharedState(queryEnv, objectStore); } + for (const auto &spec : _match_resolver->getExecutorSpecs()) { + spec.blueprint->prepareSharedState(queryEnv, objectStore); + } for (const auto &spec : _summary_resolver->getExecutorSpecs()) { spec.blueprint->prepareSharedState(queryEnv, objectStore); } diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index d3120fa7d46..fce7d1772cf 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -37,6 +37,7 @@ private: const IIndexEnvironment &_indexEnv; BlueprintResolver::SP _first_phase_resolver; BlueprintResolver::SP _second_phase_resolver; + BlueprintResolver::SP _match_resolver; BlueprintResolver::SP _summary_resolver; BlueprintResolver::SP _dumpResolver; vespalib::string _firstPhaseRankFeature; @@ -57,6 +58,7 @@ private: double _degradationSamplePercentage; double _degradationPostFilterMultiplier; feature_t _rankScoreDropLimit; + std::vector _match_features; std::vector _summaryFeatures; std::vector _dumpFeatures; bool _ignoreDefaultRankFeatures; @@ -343,6 +345,14 @@ public: **/ feature_t getRankScoreDropLimit() const { return _rankScoreDropLimit; } + /** + * This method may be used to indicate that certain features + * should be present in the search result. + * + * @param match_feature full feature name of a match feature + **/ + void add_match_feature(const vespalib::string &match_feature); + /** * This method may be used to indicate that certain features * should be present in the docsum. @@ -351,6 +361,18 @@ public: **/ void addSummaryFeature(const vespalib::string &summaryFeature); + /** + * @return whether there are any match features + **/ + bool has_match_features() const { return !_match_features.empty(); } + + /** + * Returns a const view of the match features added. + * + * @return vector of match feature names. + **/ + const std::vector &get_match_features() const { return _match_features; } + /** * Returns a const view of the summary features added. * @@ -423,6 +445,7 @@ public: RankProgram::UP create_first_phase_program() const { return std::make_unique(_first_phase_resolver); } RankProgram::UP create_second_phase_program() const { return std::make_unique(_second_phase_resolver); } + RankProgram::UP create_match_program() const { return std::make_unique(_match_resolver); } RankProgram::UP create_summary_program() const { return std::make_unique(_summary_resolver); } RankProgram::UP create_dump_program() const { return std::make_unique(_dumpResolver); } -- cgit v1.2.3