diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2024-01-17 15:18:28 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-17 15:18:28 +0100 |
commit | 22bbf5af3980477b4951c6122c1c0d801a7b2cad (patch) | |
tree | 0f332ed680f7eba64dbfbc09ec8234176d9f5b92 | |
parent | ce4db10cbb74b0a458832cbce9bee7ecdbc37da1 (diff) | |
parent | faa437ff5da4a91af5ac51050e27927e46061f77 (diff) |
Merge pull request #29954 from vespa-engine/arnej/extend-verify-ranksetup
Arnej/extend verify ranksetup
6 files changed, 101 insertions, 36 deletions
diff --git a/searchcore/src/apps/verify_ranksetup/CMakeLists.txt b/searchcore/src/apps/verify_ranksetup/CMakeLists.txt index 4411babeb10..13e4092c2ad 100644 --- a/searchcore/src/apps/verify_ranksetup/CMakeLists.txt +++ b/searchcore/src/apps/verify_ranksetup/CMakeLists.txt @@ -4,6 +4,7 @@ vespa_add_library(searchcore_verify_ranksetup verify_ranksetup.cpp INSTALL lib64 DEPENDS + streamingvisitors searchcore_matching searchcore_documentmetastore ) diff --git a/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp b/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp index 513290cc4d1..759792d205d 100644 --- a/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp +++ b/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp @@ -22,10 +22,14 @@ #include <vespa/searchlib/fef/onnx_models.h> #include <vespa/searchlib/fef/ranking_expressions.h> #include <vespa/searchlib/fef/test/plugin/setup.h> +#include <vespa/searchvisitor/indexenvironment.h> +#include <vespa/searchvisitor/rankmanager.h> +#include <vespa/vsm/config/config-vsmfields.h> #include <vespa/config/subscription/configsubscriber.hpp> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/stllike/asciistream.h> #include <optional> +#include <functional> using config::ConfigContext; using config::ConfigHandle; @@ -43,6 +47,7 @@ using vespa::config::search::core::RankingConstantsConfig; using vespa::config::search::core::RankingExpressionsConfig; using vespa::config::search::core::OnnxModelsConfig; using vespa::config::search::core::VerifyRanksetupConfig; +using vespa::config::search::vsm::VsmfieldsConfig; using vespalib::eval::BadConstantValue; using vespalib::eval::ConstantValue; using vespalib::eval::FastValueBuilderFactory; @@ -99,11 +104,12 @@ class VerifyRankSetup { private: std::vector<search::fef::Message> _messages; - bool verify(const search::index::Schema &schema, - const search::fef::Properties &props, - const IRankingAssetsRepo &repo); + SearchMode _searchMode; + + bool verifyIndexEnv(const search::fef::IIndexEnvironment &indexEnv); bool verifyConfig(const VerifyRanksetupConfig &myCfg, + const VsmfieldsConfig &vsmFieldsCcfg, const RankProfilesConfig &rankCfg, const IndexschemaConfig &schemaCfg, const AttributesConfig &attributeCfg, @@ -112,7 +118,7 @@ private: const OnnxModelsConfig &modelsCfg); public: - VerifyRankSetup(); + explicit VerifyRankSetup(SearchMode mode); ~VerifyRankSetup(); [[nodiscard]] const std::vector<search::fef::Message> & getMessages() const { return _messages; } bool verify(const std::string & configId); @@ -140,7 +146,9 @@ DummyRankingAssetsRepo::DummyRankingAssetsRepo(const RankingConstantsConfig &cfg _expressions(std::move(expressions)), _onnxModels(std::move(onnxModels)) {} + DummyRankingAssetsRepo::~DummyRankingAssetsRepo() = default; + vespalib::eval::ConstantValue::UP DummyRankingAssetsRepo::getConstant(const vespalib::string &name) const { for (const auto &entry: cfg.constant) { @@ -156,18 +164,15 @@ DummyRankingAssetsRepo::getConstant(const vespalib::string &name) const { return {}; } -VerifyRankSetup::VerifyRankSetup() - : _messages() +VerifyRankSetup::VerifyRankSetup(SearchMode mode) + : _messages(), + _searchMode(mode) { } VerifyRankSetup::~VerifyRankSetup() = default; bool -VerifyRankSetup::verify(const search::index::Schema &schema, - const search::fef::Properties &props, - const IRankingAssetsRepo &repo) -{ - proton::matching::IndexEnvironment indexEnv(0, schema, props, repo); +VerifyRankSetup::verifyIndexEnv(const search::fef::IIndexEnvironment &indexEnv) { search::fef::BlueprintFactory factory; search::features::setup_search_features(factory); search::fef::test::setup_fef_test_plugin(factory); @@ -195,6 +200,7 @@ VerifyRankSetup::verify(const search::index::Schema &schema, bool VerifyRankSetup::verifyConfig(const VerifyRanksetupConfig &myCfg, + const VsmfieldsConfig &vsmFieldsCfg, const RankProfilesConfig &rankCfg, const IndexschemaConfig &schemaCfg, const AttributesConfig &attributeCfg, @@ -203,17 +209,38 @@ VerifyRankSetup::verifyConfig(const VerifyRanksetupConfig &myCfg, const OnnxModelsConfig &modelsCfg) { bool ok = true; + auto repo = std::make_shared<DummyRankingAssetsRepo>(constantsCfg, + make_expressions(expressionsCfg, myCfg, _messages), + make_models(modelsCfg, myCfg, _messages)); + + using IndexEnvFactory = std::function<std::unique_ptr<search::fef::IIndexEnvironment>(const search::fef::Properties &)>; + IndexEnvFactory factory; + streaming::IndexEnvPrototype streamingProto; search::index::Schema schema; - search::index::SchemaBuilder::build(schemaCfg, schema); - search::index::SchemaBuilder::build(attributeCfg, schema); - DummyRankingAssetsRepo repo(constantsCfg, make_expressions(expressionsCfg, myCfg, _messages), - make_models(modelsCfg, myCfg, _messages)); + if (_searchMode == SearchMode::STREAMING) { + streamingProto.set_ranking_assets_repo(repo); + streamingProto.detectFields(vsmFieldsCfg); + factory = [&](const search::fef::Properties &properties) + { + auto indexEnv = streamingProto.clone(); + indexEnv->getProperties().import(properties); + return indexEnv; + }; + } else { + search::index::SchemaBuilder::build(schemaCfg, schema); + search::index::SchemaBuilder::build(attributeCfg, schema); + factory = [&](const search::fef::Properties &properties) + { + return std::make_unique<proton::matching::IndexEnvironment>(0, schema, properties, *repo); + }; + } for(const auto & profile : rankCfg.rankprofile) { search::fef::Properties properties; for(const auto & j : profile.fef.property) { properties.add(j.name, j.value); } - if (verify(schema, properties, repo)) { + auto indexEnvP = factory(properties); + if (verifyIndexEnv(*indexEnvP)) { _messages.emplace_back(search::fef::Level::INFO, fmt("rank profile '%s': pass", profile.name.c_str())); } else { @@ -241,8 +268,17 @@ VerifyRankSetup::verify(const std::string & configid) ConfigHandle<RankingExpressionsConfig>::UP expressionsHandle = subscriber.subscribe<RankingExpressionsConfig>(cfgId); ConfigHandle<OnnxModelsConfig>::UP modelsHandle = subscriber.subscribe<OnnxModelsConfig>(cfgId); + std::unique_ptr<VsmfieldsConfig> vsmFieldsCfg = std::make_unique<VsmfieldsConfig>(); + ConfigHandle<VsmfieldsConfig>::UP vsmFieldsHandle; + if (_searchMode == SearchMode::STREAMING) { + vsmFieldsHandle = subscriber.subscribe<VsmfieldsConfig>(cfgId); + } subscriber.nextConfig(); + if (_searchMode == SearchMode::STREAMING) { + vsmFieldsCfg = vsmFieldsHandle->getConfig(); + } ok = verifyConfig(*myHandle->getConfig(), + *vsmFieldsCfg, *rankHandle->getConfig(), *schemaHandle->getConfig(), *attributesHandle->getConfig(), @@ -260,8 +296,8 @@ VerifyRankSetup::verify(const std::string & configid) } std::pair<bool, std::vector<search::fef::Message>> -verifyRankSetup(const char * configId) { - VerifyRankSetup verifier; +verifyRankSetup(const char * configId, SearchMode mode) { + VerifyRankSetup verifier{mode}; bool ok = verifier.verify(configId); return {ok, verifier.getMessages()}; diff --git a/searchcore/src/apps/verify_ranksetup/verify_ranksetup.h b/searchcore/src/apps/verify_ranksetup/verify_ranksetup.h index 8e77bdd51fb..4b18805fe0c 100644 --- a/searchcore/src/apps/verify_ranksetup/verify_ranksetup.h +++ b/searchcore/src/apps/verify_ranksetup/verify_ranksetup.h @@ -4,4 +4,6 @@ #include <vespa/searchlib/fef/verify_feature.h> -std::pair<bool, std::vector<search::fef::Message>> verifyRankSetup(const char * configId); +enum class SearchMode { INDEXED, STREAMING }; + +std::pair<bool, std::vector<search::fef::Message>> verifyRankSetup(const char * configId, SearchMode mode); diff --git a/searchcore/src/apps/verify_ranksetup/verify_ranksetup_app.cpp b/searchcore/src/apps/verify_ranksetup/verify_ranksetup_app.cpp index 4d2c657fc70..e179685b55b 100644 --- a/searchcore/src/apps/verify_ranksetup/verify_ranksetup_app.cpp +++ b/searchcore/src/apps/verify_ranksetup/verify_ranksetup_app.cpp @@ -34,14 +34,20 @@ toLogLevel(search::fef::Level level) { abort(); } } + int App::main(int argc, char **argv) { + SearchMode mode = SearchMode::INDEXED; + if (argc == 3 && (strcmp("-S", argv[2]) == 0)) { + mode = SearchMode::STREAMING; + --argc; + } if (argc != 2) { return usage(); } - auto [ok, messages] = verifyRankSetup(argv[1]); + auto [ok, messages] = verifyRankSetup(argv[1], mode); for (const auto & msg : messages) { VLOG(toLogLevel(msg.first), "%s", msg.second.c_str()); diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp index 6f2e77d30cb..cdaf14eef9b 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp @@ -52,16 +52,23 @@ to_data_type(VsmfieldsConfig::Fieldspec::Searchmethod search_method) return FieldInfo::DataType::DOUBLE; } -void -RankManager::Snapshot::detectFields(const VsmfieldsHandle & fields) +IndexEnvPrototype::IndexEnvPrototype() + : _tableManager(), + _prototype(_tableManager) { - for (uint32_t i = 0; i < fields->fieldspec.size(); ++i) { - const VsmfieldsConfig::Fieldspec & fs = fields->fieldspec[i]; + auto tableFactory = std::make_shared<search::fef::FunctionTableFactory>(256); + _tableManager.addFactory(tableFactory); +} + +void +IndexEnvPrototype::detectFields(const vespa::config::search::vsm::VsmfieldsConfig &fields) { + for (uint32_t i = 0; i < fields.fieldspec.size(); ++i) { + const VsmfieldsConfig::Fieldspec & fs = fields.fieldspec[i]; bool isAttribute = (fs.fieldtype == VsmfieldsConfig::Fieldspec::Fieldtype::ATTRIBUTE); LOG(debug, "Adding field of type '%s' and name '%s' with id '%u' the index environment.", isAttribute ? "ATTRIBUTE" : "INDEX", fs.name.c_str(), i); // This id must match the vsm specific field id - _protoEnv.addField(fs.name, isAttribute, to_data_type(fs.searchmethod)); + _prototype.addField(fs.name, isAttribute, to_data_type(fs.searchmethod)); } } @@ -96,14 +103,14 @@ buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const search::fef } } - + void RankManager::Snapshot::buildFieldMappings(const VsmfieldsHandle & fields) { for(const VsmfieldsConfig::Documenttype & di : fields->documenttype) { LOG(debug, "Looking through indexes for documenttype '%s'", di.name.c_str()); for(const VsmfieldsConfig::Documenttype::Index & ci : di.index) { - FieldIdTList view = buildFieldSet(ci, _protoEnv, di.index); + FieldIdTList view = buildFieldSet(ci, _protoEnv.current(), di.index); if (_views.find(ci.name) == _views.end()) { std::sort(view.begin(), view.end()); // lowest field id first _views[ci.name] = view; @@ -119,7 +126,7 @@ RankManager::Snapshot::initRankSetup(const BlueprintFactory & factory) { // set up individual index environments per rank profile for (uint32_t i = 0; i < _properties.size(); ++i) { - _indexEnv.push_back(_protoEnv); + _indexEnv.push_back(_protoEnv.current()); IndexEnvironment & ie = _indexEnv.back(); ie.getProperties().import(_properties[i].second); } @@ -151,15 +158,13 @@ RankManager::Snapshot::initRankSetup(const BlueprintFactory & factory) } RankManager::Snapshot::Snapshot() : - _tableManager(), - _protoEnv(_tableManager), + _protoEnv(), _properties(), _indexEnv(), _rankSetup(), _rpmap(), _views() { - _tableManager.addFactory(search::fef::ITableFactory::SP(new search::fef::FunctionTableFactory(256))); } RankManager::Snapshot::~Snapshot() = default; @@ -168,7 +173,7 @@ bool RankManager::Snapshot::setup(const RankManager & rm) { VsmfieldsHandle fields = rm._vsmAdapter->getFieldsConfig(); - detectFields(fields); + _protoEnv.detectFields(*fields); buildFieldMappings(fields); if (!initRankSetup(rm._blueprintFactory)) { return false; @@ -223,5 +228,5 @@ RankManager::configure(const vsm::VSMConfigSnapshot & snap, std::shared_ptr<cons { notify(snap, std::move(ranking_assets_repo)); } - + } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.h b/streamingvisitors/src/vespa/searchvisitor/rankmanager.h index 6eb3993cef8..12785daeb89 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.h @@ -11,6 +11,23 @@ namespace streaming { +/** handle per-document-type indexing environment */ +class IndexEnvPrototype { +private: + search::fef::TableManager _tableManager; + streaming::IndexEnvironment _prototype; +public: + IndexEnvPrototype(); + void detectFields(const vespa::config::search::vsm::VsmfieldsConfig &fields); + void set_ranking_assets_repo(std::shared_ptr<const search::fef::IRankingAssetsRepo> repo) { + _prototype.set_ranking_assets_repo(std::move(repo)); + } + std::unique_ptr<IndexEnvironment> clone() const { + return std::make_unique<IndexEnvironment>(_prototype); + } + const IndexEnvironment& current() const { return _prototype; } +}; + /** * This class subscribes to the rank-profiles config and keeps a setup per rank profile. **/ @@ -30,8 +47,7 @@ public: using NamedPropertySet = std::pair<vespalib::string, search::fef::Properties>; using ViewMap = vespalib::hash_map<vespalib::string, View>; using Map = vespalib::hash_map<vespalib::string, int>; - search::fef::TableManager _tableManager; - IndexEnvironment _protoEnv; + IndexEnvPrototype _protoEnv; std::vector<NamedPropertySet> _properties; // property set per rank profile std::vector<IndexEnvironment> _indexEnv; // index environment per rank profile std::vector<std::shared_ptr<const search::fef::RankSetup>> _rankSetup; // rank setup per rank profile @@ -39,7 +55,6 @@ public: ViewMap _views; void addProperties(const vespa::config::search::RankProfilesConfig & cfg); - void detectFields(const vsm::VsmfieldsHandle & fields); void buildFieldMappings(const vsm::VsmfieldsHandle & fields); bool initRankSetup(const search::fef::BlueprintFactory & factory); bool setup(const RankManager & manager); |