diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-05-09 10:17:25 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-05-09 10:17:25 +0200 |
commit | 32b0a642e18552eed36fde6a1ad9868d22c6b1c9 (patch) | |
tree | 58c3457f601d7278e7d6e412952acdcd962a01ef /searchlib | |
parent | 576c99377745221ea70472b55ff2b527bc6753a5 (diff) |
Move ranking config to configdefinitions module.
Diffstat (limited to 'searchlib')
11 files changed, 410 insertions, 1 deletions
diff --git a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt index 398cc0518f8..2ea9349861b 100644 --- a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt @@ -20,6 +20,7 @@ vespa_add_library(searchlib_fef OBJECT matchdatalayout.cpp objectstore.cpp onnx_model.cpp + onnx_models.cpp parameter.cpp parameterdescriptions.cpp parametervalidator.cpp @@ -29,6 +30,9 @@ vespa_add_library(searchlib_fef OBJECT query_value.cpp queryproperties.cpp rank_program.cpp + ranking_assets_repo.cpp + ranking_constants.cpp + ranking_expressions.cpp ranksetup.cpp simpletermdata.cpp simpletermfielddata.cpp diff --git a/searchlib/src/vespa/searchlib/fef/i_ranking_assets_repo.h b/searchlib/src/vespa/searchlib/fef/i_ranking_assets_repo.h new file mode 100644 index 00000000000..e3c1ed0d821 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/i_ranking_assets_repo.h @@ -0,0 +1,22 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value_cache/constant_value.h> + +namespace search::fef { + +class OnnxModel; + +/** + * Interface for retrieving named constants, expressions and models from ranking. + * Empty strings or nullptrs indicates nothing found. + */ +struct IRankingAssetsRepo { + virtual vespalib::eval::ConstantValue::UP getConstant(const vespalib::string &name) const = 0; + virtual vespalib::string getExpression(const vespalib::string &name) const = 0; + virtual const search::fef::OnnxModel *getOnnxModel(const vespalib::string &name) const = 0; + virtual ~IRankingAssetsRepo() = default; +}; + +} diff --git a/searchlib/src/vespa/searchlib/fef/onnx_model.h b/searchlib/src/vespa/searchlib/fef/onnx_model.h index 62d12b8bcd1..345388573de 100644 --- a/searchlib/src/vespa/searchlib/fef/onnx_model.h +++ b/searchlib/src/vespa/searchlib/fef/onnx_model.h @@ -11,7 +11,7 @@ namespace search::fef { /** * Class containing configuration for a single onnx model setup. This * class is used both by the IIndexEnvironment api as well as the - * OnnxModels config adapter in the search core (matching component). + * OnnxModels config adapter. **/ class OnnxModel { private: diff --git a/searchlib/src/vespa/searchlib/fef/onnx_models.cpp b/searchlib/src/vespa/searchlib/fef/onnx_models.cpp new file mode 100644 index 00000000000..15092b604cc --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/onnx_models.cpp @@ -0,0 +1,49 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "onnx_models.h" +#include <cassert> + +namespace search::fef { + +OnnxModels::OnnxModels() = default; +OnnxModels::OnnxModels(OnnxModels &&) noexcept = default; +OnnxModels::~OnnxModels() = default; + +OnnxModels::OnnxModels(Vector models) + : _models() +{ + for (auto &model: models) { + _models.emplace(model.name(), std::move(model)); + } +} + +bool +OnnxModels::operator==(const OnnxModels &rhs) const +{ + return (_models == rhs._models); +} + +const OnnxModels::Model * +OnnxModels::getModel(const vespalib::string &name) const +{ + auto itr = _models.find(name); + if (itr != _models.end()) { + return &itr->second; + } + return nullptr; +} + +void +OnnxModels::configure(const ModelConfig &config, Model &model) +{ + assert(config.name == model.name()); + for (const auto &input: config.input) { + model.input_feature(input.name, input.source); + } + for (const auto &output: config.output) { + model.output_name(output.name, output.as); + } + model.dry_run_on_setup(config.dryRunOnSetup); +} + +} diff --git a/searchlib/src/vespa/searchlib/fef/onnx_models.h b/searchlib/src/vespa/searchlib/fef/onnx_models.h new file mode 100644 index 00000000000..c6c4a2b3fe9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/onnx_models.h @@ -0,0 +1,42 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "onnx_model.h" +#include <vespa/config-onnx-models.h> +#include <vespa/vespalib/stllike/string.h> +#include <map> +#include <vector> + +namespace search::fef { + +/** + * Class representing a set of configured onnx models, with full path + * for where the models are stored on disk. + */ +class OnnxModels { +public: + using ModelConfig = vespa::config::search::core::OnnxModelsConfig::Model; + using Model = OnnxModel; + using Vector = std::vector<Model>; + +private: + using Map = std::map<vespalib::string, Model>; + Map _models; + +public: + using SP = std::shared_ptr<OnnxModels>; + OnnxModels(); + OnnxModels(Vector models); + OnnxModels(OnnxModels &&) noexcept; + OnnxModels & operator=(OnnxModels &&) = delete; + OnnxModels(const OnnxModels &) = delete; + OnnxModels & operator =(const OnnxModels &) = delete; + ~OnnxModels(); + bool operator==(const OnnxModels &rhs) const; + [[nodiscard]] const Model *getModel(const vespalib::string &name) const; + [[nodiscard]] size_t size() const { return _models.size(); } + static void configure(const ModelConfig &config, Model &model); +}; + +} diff --git a/searchlib/src/vespa/searchlib/fef/ranking_assets_repo.cpp b/searchlib/src/vespa/searchlib/fef/ranking_assets_repo.cpp new file mode 100644 index 00000000000..6b5629ff91c --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/ranking_assets_repo.cpp @@ -0,0 +1,43 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "ranking_assets_repo.h" + +using vespalib::eval::ConstantValue; + +namespace search::fef { + +RankingAssetsRepo::RankingAssetsRepo(const ConstantValueFactory &factory, + std::shared_ptr<const RankingConstants> constants, + std::shared_ptr<const RankingExpressions> expressions, + std::shared_ptr<const OnnxModels> models) + : _factory(factory), + _constants(std::move(constants)), + _rankingExpressions(std::move(expressions)), + _onnxModels(std::move(models)) +{ +} + +RankingAssetsRepo::~RankingAssetsRepo() = default; + +ConstantValue::UP +RankingAssetsRepo::getConstant(const vespalib::string &name) const +{ + if ( ! _constants) return {}; + const RankingConstants::Constant *constant = _constants->getConstant(name); + if (constant != nullptr) { + return _factory.create(constant->filePath, constant->type); + } + return {}; +} + +vespalib::string +RankingAssetsRepo::getExpression(const vespalib::string &name) const { + return _rankingExpressions ? _rankingExpressions->loadExpression(name) : ""; +} + +const OnnxModel * +RankingAssetsRepo::getOnnxModel(const vespalib::string &name) const { + return _onnxModels ? _onnxModels->getModel(name) : nullptr; +} + +} diff --git a/searchlib/src/vespa/searchlib/fef/ranking_assets_repo.h b/searchlib/src/vespa/searchlib/fef/ranking_assets_repo.h new file mode 100644 index 00000000000..4e613f4bffb --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/ranking_assets_repo.h @@ -0,0 +1,39 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "i_ranking_assets_repo.h" +#include "ranking_constants.h" +#include "onnx_models.h" +#include "ranking_expressions.h" +#include <vespa/eval/eval/value_cache/constant_value.h> + +namespace search::fef { + +/** + * Class that provides access to a configured set of rank constant values. + * + * This class maps symbolic names to assets used while setting up rank features blueprints. + * A factory is used to instantiate constant values. + */ +class RankingAssetsRepo : public IRankingAssetsRepo { +private: + using ConstantValueFactory = vespalib::eval::ConstantValueFactory; + + const ConstantValueFactory &_factory; + const std::shared_ptr<const RankingConstants> _constants; + const std::shared_ptr<const RankingExpressions> _rankingExpressions; + const std::shared_ptr<const OnnxModels> _onnxModels; + +public: + RankingAssetsRepo(const ConstantValueFactory &factory, + std::shared_ptr<const RankingConstants> constants, + std::shared_ptr<const RankingExpressions> expressions, + std::shared_ptr<const OnnxModels> models); + ~RankingAssetsRepo() override; + vespalib::eval::ConstantValue::UP getConstant(const vespalib::string &name) const override; + vespalib::string getExpression(const vespalib::string &name) const override; + const OnnxModel *getOnnxModel(const vespalib::string &name) const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/fef/ranking_constants.cpp b/searchlib/src/vespa/searchlib/fef/ranking_constants.cpp new file mode 100644 index 00000000000..1d0df1b8d94 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/ranking_constants.cpp @@ -0,0 +1,59 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "ranking_constants.h" + +namespace search::fef { + +RankingConstants::Constant::Constant(const vespalib::string &name_in, + const vespalib::string &type_in, + const vespalib::string &filePath_in) + : name(name_in), + type(type_in), + filePath(filePath_in) +{ +} + +RankingConstants::Constant::~Constant() = default; + +bool +RankingConstants::Constant::operator==(const Constant &rhs) const +{ + return (name == rhs.name) && + (type == rhs.type) && + (filePath == rhs.filePath); +} + +RankingConstants::RankingConstants() + : _constants() +{ +} + +RankingConstants::~RankingConstants() = default; +RankingConstants::RankingConstants(RankingConstants &&) noexcept = default; + +RankingConstants::RankingConstants(const Vector &constants) + : _constants() +{ + for (const auto &constant : constants) { + _constants.insert(std::make_pair(constant.name, constant)); + } +} + +bool +RankingConstants::operator==(const RankingConstants &rhs) const +{ + return _constants == rhs._constants; +} + +const RankingConstants::Constant * +RankingConstants::getConstant(const vespalib::string &name) const +{ + auto itr = _constants.find(name); + if (itr != _constants.end()) { + return &itr->second; + } + return nullptr; +} + +} + diff --git a/searchlib/src/vespa/searchlib/fef/ranking_constants.h b/searchlib/src/vespa/searchlib/fef/ranking_constants.h new file mode 100644 index 00000000000..56c124986ba --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/ranking_constants.h @@ -0,0 +1,49 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <map> +#include <vector> +#include <memory> + +namespace search::fef { + +/** + * Class representing a set of configured ranking constants, with name, type and file path (where constant is stored). + */ +class RankingConstants { +public: + struct Constant { + vespalib::string name; + vespalib::string type; + vespalib::string filePath; + + Constant(const vespalib::string &name_in, + const vespalib::string &type_in, + const vespalib::string &filePath_in); + ~Constant(); + bool operator==(const Constant &rhs) const; + }; + + using Vector = std::vector<Constant>; + +private: + using Map = std::map<vespalib::string, Constant>; + Map _constants; + +public: + using SP = std::shared_ptr<RankingConstants>; + RankingConstants(); + RankingConstants(RankingConstants &&) noexcept; + RankingConstants & operator =(RankingConstants &&) = delete; + RankingConstants(const RankingConstants &) = delete; + RankingConstants & operator =(const RankingConstants &) = delete; + explicit RankingConstants(const Vector &constants); + ~RankingConstants(); + bool operator==(const RankingConstants &rhs) const; + const Constant *getConstant(const vespalib::string &name) const; + size_t size() const { return _constants.size(); } +}; + +} diff --git a/searchlib/src/vespa/searchlib/fef/ranking_expressions.cpp b/searchlib/src/vespa/searchlib/fef/ranking_expressions.cpp new file mode 100644 index 00000000000..2b293ea7d3f --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/ranking_expressions.cpp @@ -0,0 +1,65 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "ranking_expressions.h" +#include <vespa/vespalib/io/mapped_file_input.h> +#include <vespa/vespalib/data/lz4_input_decoder.h> +#include <vespa/vespalib/util/size_literals.h> + +#include <vespa/log/log.h> +LOG_SETUP(".fef.ranking_expressions"); + +namespace search::fef { + +namespace { + +vespalib::string extract_data(vespalib::Input &input) { + vespalib::string result; + for (auto chunk = input.obtain(); chunk.size > 0; chunk = input.obtain()) { + result.append(vespalib::stringref(chunk.data, chunk.size)); + input.evict(chunk.size); + } + return result; +} + +} // unnamed + +RankingExpressions::RankingExpressions() = default; +RankingExpressions::RankingExpressions(RankingExpressions &&rhs) noexcept = default; +RankingExpressions::~RankingExpressions() = default; + +RankingExpressions & +RankingExpressions::add(const vespalib::string &name, const vespalib::string &path) +{ + _expressions.insert_or_assign(name, path); + return *this; +} + +vespalib::string +RankingExpressions::loadExpression(const vespalib::string &name) const +{ + auto pos = _expressions.find(name); + if (pos == _expressions.end()) { + LOG(warning, "no such ranking expression: '%s'", name.c_str()); + return {}; + } + auto path = pos->second; + vespalib::MappedFileInput file(path); + if (!file.valid()) { + LOG(warning, "rankexpression: %s -> could not read file: %s", name.c_str(), path.c_str()); + return {}; + } + if (ends_with(path, ".lz4")) { + size_t buffer_size = 64_Ki; + vespalib::Lz4InputDecoder lz4_decoder(file, buffer_size); + auto result = extract_data(lz4_decoder); + if (lz4_decoder.failed()) { + LOG(warning, "file contains lz4 errors (%s): %s", + lz4_decoder.reason().c_str(), path.c_str()); + return {}; + } + return result; + } + return extract_data(file); +} + +} diff --git a/searchlib/src/vespa/searchlib/fef/ranking_expressions.h b/searchlib/src/vespa/searchlib/fef/ranking_expressions.h new file mode 100644 index 00000000000..317015938e6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/ranking_expressions.h @@ -0,0 +1,37 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <memory> +#include <map> + +namespace search::fef { + +/** + * Class representing a collection of named ranking expressions + * obtained through file-distribution. + */ +class RankingExpressions +{ +private: + // expression name -> full_path of expression file + std::map<vespalib::string,vespalib::string> _expressions; + +public: + using SP = std::shared_ptr<RankingExpressions>; + RankingExpressions(); + RankingExpressions(RankingExpressions &&rhs) noexcept; + RankingExpressions & operator=(RankingExpressions &&rhs) = delete; + RankingExpressions(const RankingExpressions &rhs) = delete; + RankingExpressions & operator=(const RankingExpressions &rhs) = delete; + ~RankingExpressions(); + bool operator==(const RankingExpressions &rhs) const { + return _expressions == rhs._expressions; + } + size_t size() const { return _expressions.size(); } + RankingExpressions &add(const vespalib::string &name, const vespalib::string &path); + vespalib::string loadExpression(const vespalib::string &name) const; +}; + +} |