diff options
-rw-r--r-- | searchlib/src/vespa/searchlib/features/queryfeature.cpp | 201 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/features/queryfeature.h | 10 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/fef/CMakeLists.txt | 1 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/fef/query_value.cpp | 229 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/fef/query_value.h | 89 |
5 files changed, 338 insertions, 192 deletions
diff --git a/searchlib/src/vespa/searchlib/features/queryfeature.cpp b/searchlib/src/vespa/searchlib/features/queryfeature.cpp index 483ba6f82b4..acf13c55c99 100644 --- a/searchlib/src/vespa/searchlib/features/queryfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/queryfeature.cpp @@ -1,139 +1,24 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "constant_tensor_executor.h" #include "queryfeature.h" #include "utils.h" #include "valuefeature.h" -#include "constant_tensor_executor.h" - -#include <vespa/document/datatype/tensor_data_type.h> #include <vespa/searchlib/fef/featureexecutor.h> -#include <vespa/searchlib/fef/indexproperties.h> -#include <vespa/searchlib/fef/properties.h> #include <vespa/searchlib/fef/feature_type.h> -#include <vespa/vespalib/objects/nbostream.h> #include <vespa/eval/eval/value_type.h> -#include <vespa/eval/eval/value_codec.h> -#include <vespa/eval/eval/fast_value.h> -#include <vespa/eval/eval/function.h> -#include <vespa/eval/eval/interpreted_function.h> -#include <vespa/vespalib/locale/c.h> -#include <vespa/vespalib/util/issue.h> -#include <cerrno> #include <vespa/log/log.h> LOG_SETUP(".features.queryfeature"); using namespace search::fef; -using namespace search::fef::indexproperties; -using document::TensorDataType; -using vespalib::eval::ValueType; -using vespalib::eval::Value; -using vespalib::eval::TensorSpec; -using vespalib::eval::Function; -using vespalib::eval::InterpretedFunction; -using vespalib::eval::NodeTypes; -using vespalib::eval::SimpleObjectParams; -using vespalib::Issue; using search::fef::FeatureType; -using search::fef::AnyWrapper; -using search::fef::Anything; - -using ValueWrapper = AnyWrapper<Value::UP>; namespace search::features { -namespace { - -/** - * Convert a string to a feature value using special quoting - * mechanics; a string that can be converted directly into a feature - * (numeric value) will be converted. If the string cannot be - * converted directly, it will be hashed, after stripping the leading - * "'" if it exists. - * - * @return feature value - * @param str string value to be converted - **/ -feature_t asFeature(const vespalib::string &str) { - char *end; - errno = 0; - double val = vespalib::locale::c::strtod(str.c_str(), &end); - if (errno != 0 || *end != '\0') { // not happy - if (str.size() > 0 && str[0] == '\'') { - val = vespalib::hash_code(str.substr(1)); - } else { - val = vespalib::hash_code(str); - } - } - return val; -} - -// Create an empty tensor of the given type. -Value::UP empty_tensor(const ValueType &type) { - const auto &factory = vespalib::eval::FastValueBuilderFactory::get(); - return vespalib::eval::value_from_spec(TensorSpec(type.to_spec()), factory); -} - -// Create a tensor value by evaluating a self-contained expression. -Value::UP as_tensor(const vespalib::string &expr, const ValueType &wanted_type) { - const auto &factory = vespalib::eval::FastValueBuilderFactory::get(); - auto fun = Function::parse(expr); - if (!fun->has_error() && (fun->num_params() == 0)) { - NodeTypes types = NodeTypes(*fun, {}); - ValueType res_type = types.get_type(fun->root()); - if (res_type == wanted_type) { - SimpleObjectParams params({}); - InterpretedFunction ifun(factory, *fun, types); - InterpretedFunction::Context ctx(ifun); - return factory.copy(ifun.eval(ctx, params)); - } - } - return {}; -} - -// query(foo): -// query.value.foo -> decoded tensor value 'foo' -vespalib::string make_value_key(const vespalib::string &base, const vespalib::string &sub_key) { - vespalib::string key(base); - key.append(".value."); - key.append(sub_key); - return key; -} - -} // namespace search::features::<unnamed> - -Property -QueryBlueprint::config_lookup(const IIndexEnvironment &env) const -{ - const auto &props = env.getProperties(); - auto res = props.lookup(getName()); // query(foo) - if (!res.found()) { - res = props.lookup(_old_key); // $foo - } - return res; -} - -Property -QueryBlueprint::request_lookup(const IQueryEnvironment &env) const -{ - const auto &props = env.getProperties(); - auto res = props.lookup(getName()); // query(foo) - if (!res.found()) { - res = props.lookup(_key); // foo - } - if (!res.found()) { - res = props.lookup(_old_key); // $foo - } - return res; -} - QueryBlueprint::QueryBlueprint() : Blueprint("query"), - _key(), - _old_key(), - _stored_value_key(), - _type(ValueType::double_type()), - _default_number_value(), + _qvalue(), _default_object_value() { } @@ -154,89 +39,37 @@ QueryBlueprint::createInstance() const bool QueryBlueprint::setup(const IIndexEnvironment &env, const ParameterList ¶ms) { - _key = params[0].getValue(); - _old_key = "$"; - _old_key.append(_key); - _stored_value_key = make_value_key(getBaseName(), _key); - vespalib::string type_str = type::QueryFeature::lookup(env.getProperties(), _key); - if (!type_str.empty()) { - _type = ValueType::from_spec(type_str); - if (_type.is_error()) { - return fail("invalid type: '%s'", type_str.c_str()); - } - } - Property p = config_lookup(env); - if (_type.is_double()) { - if (p.found()) { - _default_number_value = asFeature(p.get()); - } - } else { - if (p.found()) { - _default_object_value = as_tensor(p.get(), _type); - if (_default_object_value.get() == nullptr) { - return fail("could not create default tensor value of type '%s' from the expression '%s'", - _type.to_spec().c_str(), p.get().c_str()); - } - } else { - _default_object_value = empty_tensor(_type); - } + try { + _qvalue = QueryValue::from_config(params[0].getValue(), env); + _default_object_value = _qvalue.make_default_value(env); + } catch (const InvalidValueTypeException& ex) { + return fail("invalid type: '%s'", ex.type_str().c_str()); + } catch (const InvalidTensorValueException& ex) { + return fail("could not create default tensor value of type '%s' from the expression '%s'", + _qvalue.type().to_spec().c_str(), ex.expr().c_str()); } - FeatureType output_type = _type.is_double() ? FeatureType::number() : FeatureType::object(_type); + const auto& type = _qvalue.type(); + FeatureType output_type = type.is_double() ? FeatureType::number() : FeatureType::object(type); describeOutput("out", "The value looked up in query properties using the given key.", output_type); - assert(_type.has_dimensions() == (_default_object_value.get() != nullptr)); return true; } -namespace { - -Value::UP decode_tensor_value(Property prop, const ValueType &valueType) { - if (prop.found() && !prop.get().empty()) { - const vespalib::string &value = prop.get(); - vespalib::nbostream stream(value.data(), value.size()); - try { - auto tensor = vespalib::eval::decode_value(stream, vespalib::eval::FastValueBuilderFactory::get()); - if (TensorDataType::isAssignableType(valueType, tensor->type())) { - return tensor; - } else { - Issue::report("Query feature type is '%s' but other tensor type is '%s'", - valueType.to_spec().c_str(), tensor->type().to_spec().c_str()); - } - } catch (const vespalib::eval::DecodeValueException &e) { - Issue::report("Query feature has invalid binary format: %s", e.what()); - } - } - return {}; -} - -} - void QueryBlueprint::prepareSharedState(const fef::IQueryEnvironment &env, fef::IObjectStore &store) const { - if (!_stored_value_key.empty() && _type.has_dimensions() && (store.get(_stored_value_key) == nullptr)) { - if (auto value = decode_tensor_value(request_lookup(env), _type)) { - store.add(_stored_value_key, std::make_unique<ValueWrapper>(std::move(value))); - } - } + _qvalue.prepare_shared_state(env, store); } FeatureExecutor & QueryBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const { - if (_type.has_dimensions()) { - if (const Anything *wrapped_value = env.getObjectStore().get(_stored_value_key)) { - if (const Value *value = ValueWrapper::getValue(*wrapped_value).get()) { - return stash.create<ConstantTensorRefExecutor>(*value); - } + if (_qvalue.type().has_dimensions()) { + if (const vespalib::eval::Value *value = _qvalue.lookup_value(env.getObjectStore())) { + return stash.create<ConstantTensorRefExecutor>(*value); } return stash.create<ConstantTensorRefExecutor>(*_default_object_value); } else { - auto p = request_lookup(env); - if (p.found()) { - return stash.create<SingleValueExecutor>(asFeature(p.get())); - } else { - return stash.create<SingleValueExecutor>(_default_number_value); - } + return stash.create<SingleValueExecutor>(_qvalue.lookup_number(env, _default_object_value->as_double())); } } diff --git a/searchlib/src/vespa/searchlib/features/queryfeature.h b/searchlib/src/vespa/searchlib/features/queryfeature.h index 020fd31989d..ae77a7d660f 100644 --- a/searchlib/src/vespa/searchlib/features/queryfeature.h +++ b/searchlib/src/vespa/searchlib/features/queryfeature.h @@ -4,6 +4,7 @@ #include <vespa/searchlib/fef/blueprint.h> #include <vespa/searchlib/fef/properties.h> +#include <vespa/searchlib/fef/query_value.h> #include <vespa/eval/eval/value_type.h> #include <vespa/eval/eval/value.h> @@ -17,16 +18,9 @@ namespace search::features { */ class QueryBlueprint : public fef::Blueprint { private: - vespalib::string _key; // 'foo' - vespalib::string _old_key; // '$foo' - vespalib::string _stored_value_key; // query.value.foo - vespalib::eval::ValueType _type; - feature_t _default_number_value; + search::fef::QueryValue _qvalue; vespalib::eval::Value::UP _default_object_value; - fef::Property config_lookup(const fef::IIndexEnvironment &env) const; - fef::Property request_lookup(const fef::IQueryEnvironment &env) const; - public: QueryBlueprint(); ~QueryBlueprint(); diff --git a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt index 8968e4e347d..ba4430ff8e6 100644 --- a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt @@ -26,6 +26,7 @@ vespa_add_library(searchlib_fef OBJECT phrase_splitter_query_env.cpp phrasesplitter.cpp properties.cpp + query_value.cpp queryproperties.cpp rank_program.cpp ranksetup.cpp diff --git a/searchlib/src/vespa/searchlib/fef/query_value.cpp b/searchlib/src/vespa/searchlib/fef/query_value.cpp new file mode 100644 index 00000000000..d9cdb0aa23d --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/query_value.cpp @@ -0,0 +1,229 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "iindexenvironment.h" +#include "indexproperties.h" +#include "iqueryenvironment.h" +#include "query_value.h" +#include <vespa/document/datatype/tensor_data_type.h> +#include <vespa/eval/eval/fast_value.h> +#include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/vespalib/locale/c.h> +#include <vespa/vespalib/util/issue.h> +#include <vespa/vespalib/util/string_hash.h> +#include <cerrno> + +using document::TensorDataType; +using vespalib::Issue; +using vespalib::eval::DoubleValue; +using vespalib::eval::Function; +using vespalib::eval::InterpretedFunction; +using vespalib::eval::NodeTypes; +using vespalib::eval::SimpleObjectParams; +using vespalib::eval::TensorSpec; +using vespalib::eval::Value; +using vespalib::eval::ValueType; + +using namespace search::fef::indexproperties; + +namespace search::fef { + +using ValueWrapper = AnyWrapper<Value::UP>; + +InvalidValueTypeException::InvalidValueTypeException(const vespalib::string& query_key, const vespalib::string& type_str_in) + : vespalib::Exception("Invalid type '" + type_str_in + "' for query value '" + query_key + "'"), + _type_str(type_str_in) +{ +} + +InvalidTensorValueException::InvalidTensorValueException(const vespalib::eval::ValueType& type, const vespalib::string& expr_in) + : vespalib::Exception("Could not create tensor value of type '" + type.to_spec() + "' from the expression '" + expr_in + "'"), + _expr(expr_in) +{ +} + +namespace { + +/** + * Convert a string to a feature value using special quoting mechanics; + * a string that can be converted directly into a feature + * (numeric value) will be converted. If the string cannot be + * converted directly, it will be hashed, after stripping the leading + * "'" if it exists. + */ +feature_t +as_feature(const vespalib::string& str) +{ + char *end; + errno = 0; + double val = vespalib::locale::c::strtod(str.c_str(), &end); + if (errno != 0 || *end != '\0') { // not happy + if (str.size() > 0 && str[0] == '\'') { + val = vespalib::hash_code(str.substr(1)); + } else { + val = vespalib::hash_code(str); + } + } + return val; +} + +// Create an empty tensor of the given type. +std::unique_ptr<Value> +empty_tensor(const ValueType& type) +{ + const auto& factory = vespalib::eval::FastValueBuilderFactory::get(); + return vespalib::eval::value_from_spec(TensorSpec(type.to_spec()), factory); +} + +// Create a tensor value by evaluating a self-contained expression. +std::unique_ptr<Value> +as_tensor(const vespalib::string& expr, const ValueType& wanted_type) +{ + const auto& factory = vespalib::eval::FastValueBuilderFactory::get(); + auto fun = Function::parse(expr); + if (!fun->has_error() && (fun->num_params() == 0)) { + NodeTypes types = NodeTypes(*fun, {}); + ValueType res_type = types.get_type(fun->root()); + if (res_type == wanted_type) { + SimpleObjectParams params({}); + InterpretedFunction ifun(factory, *fun, types); + InterpretedFunction::Context ctx(ifun); + return factory.copy(ifun.eval(ctx, params)); + } + } + return {}; +} + +std::unique_ptr<Value> +decode_tensor_value(Property prop, const ValueType& value_type) +{ + if (prop.found() && !prop.get().empty()) { + const vespalib::string& value = prop.get(); + vespalib::nbostream stream(value.data(), value.size()); + try { + auto tensor = vespalib::eval::decode_value(stream, vespalib::eval::FastValueBuilderFactory::get()); + if (TensorDataType::isAssignableType(value_type, tensor->type())) { + return tensor; + } else { + Issue::report("Query value type is '%s' but decoded tensor type is '%s'", + value_type.to_spec().c_str(), tensor->type().to_spec().c_str()); + } + } catch (const vespalib::eval::DecodeValueException& e) { + Issue::report("Query value has invalid binary format: %s", e.what()); + } + } + return {}; +} + +} + +Property +QueryValue::config_lookup(const IIndexEnvironment& env) const +{ + const auto& props = env.getProperties(); + auto res = props.lookup(_name); // query(foo) + if (!res.found()) { + res = props.lookup(_old_key); // $foo + } + return res; +} + +Property +QueryValue::request_lookup(const IQueryEnvironment& env) const +{ + const auto& props = env.getProperties(); + auto res = props.lookup(_name); // query(foo) + if (!res.found()) { + res = props.lookup(_key); // foo + } + if (!res.found()) { + res = props.lookup(_old_key); // $foo + } + return res; +} + +QueryValue::QueryValue() + : _key(), + _name(), + _old_key(), + _stored_value_key(), + _type(ValueType::double_type()) +{ +} + +QueryValue::QueryValue(const vespalib::string& key, const vespalib::eval::ValueType& type) + : _key(key), + _name("query(" + key + ")"), + _old_key("$" + key), + _stored_value_key("query.value." + key), + _type(type) +{ +} + +QueryValue +QueryValue::from_config(const vespalib::string& key, const IIndexEnvironment& env) +{ + vespalib::string type_str = type::QueryFeature::lookup(env.getProperties(), key); + ValueType type = type_str.empty() ? ValueType::double_type() : ValueType::from_spec(type_str); + if (type.is_error()) { + throw InvalidValueTypeException(key, type_str); + } + return {key, type}; +} + +std::unique_ptr<Value> +QueryValue::make_default_value(const IIndexEnvironment& env) const +{ + Property p = config_lookup(env); + if (_type.is_double()) { + if (p.found()) { + return std::make_unique<DoubleValue>(as_feature(p.get())); + } else { + return std::make_unique<DoubleValue>(0); + } + } else { + if (p.found()) { + auto tensor = as_tensor(p.get(), _type); + if (tensor.get() == nullptr) { + throw InvalidTensorValueException(_type, p.get().c_str()); + } + return tensor; + } else { + return empty_tensor(_type); + } + } +} + +void +QueryValue::prepare_shared_state(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const +{ + if (!_stored_value_key.empty() && _type.has_dimensions() && (store.get(_stored_value_key) == nullptr)) { + if (auto value = decode_tensor_value(request_lookup(env), _type)) { + store.add(_stored_value_key, std::make_unique<ValueWrapper>(std::move(value))); + } + } +} + +const Value* +QueryValue::lookup_value(const fef::IObjectStore& store) const +{ + if (const Anything* wrapped_value = store.get(_stored_value_key)) { + return ValueWrapper::getValue(*wrapped_value).get(); + } + return nullptr; +} + +double +QueryValue::lookup_number(const fef::IQueryEnvironment& env, double default_value) const +{ + assert(!_type.has_dimensions()); + auto p = request_lookup(env); + if (p.found()) { + return as_feature(p.get()); + } + return default_value; +} + +} + diff --git a/searchlib/src/vespa/searchlib/fef/query_value.h b/searchlib/src/vespa/searchlib/fef/query_value.h new file mode 100644 index 00000000000..477b6aa451f --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/query_value.h @@ -0,0 +1,89 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "properties.h" +#include <vespa/eval/eval/value_type.h> +#include <vespa/vespalib/util/exception.h> +#include <vespa/vespalib/stllike/string.h> +#include <memory> + +namespace vespalib::eval { struct Value; } + +namespace search::fef { + +class IIndexEnvironment; +class IQueryEnvironment; + +/** + * Exception for when the value type is an error. + */ +class InvalidValueTypeException : public vespalib::Exception { +private: + vespalib::string _type_str; + +public: + InvalidValueTypeException(const vespalib::string& query_key, const vespalib::string& type_str_in); + const vespalib::string& type_str() const { return _type_str; } +}; + +/** + * Exception for when a tensor value could not be created from an expression. + */ +class InvalidTensorValueException : public vespalib::Exception { +private: + vespalib::string _expr; + +public: + InvalidTensorValueException(const vespalib::eval::ValueType& type, const vespalib::string& expr_in); + const vespalib::string& expr() const { return _expr; } +}; + +/** + * Class representing a vespalib::eval::Value (number or tensor) passed down with the query. + * + * The value type and optional default value are defined in IIndexEnvironment properties and extracted at config time. + * Per query, the value is extracted from IQueryEnvironment properties. This is stored in the shared IObjectStore. + */ +class QueryValue { +private: + vespalib::string _key; // 'foo' + vespalib::string _name; // 'query(foo)' + vespalib::string _old_key; // '$foo' + vespalib::string _stored_value_key; // query.value.foo + vespalib::eval::ValueType _type; + + Property config_lookup(const IIndexEnvironment& env) const; + Property request_lookup(const IQueryEnvironment& env) const; + +public: + QueryValue(); + QueryValue(const vespalib::string& key, const vespalib::eval::ValueType& type); + + /** + * Create a QueryValue using properties from the given index environment to extract the value type. + * + * Throws InvalidValueTypeException if the value type is an error. + */ + static QueryValue from_config(const vespalib::string& key, const IIndexEnvironment& env); + + const vespalib::eval::ValueType& type() const { return _type; } + + /** + * Create a default value based on properties from the given index environment. + * + * An empty value is created if not found. + * Throws InvalidTensorValueException if a tensor value could not be created. + */ + std::unique_ptr<vespalib::eval::Value> make_default_value(const IIndexEnvironment& env) const; + + void prepare_shared_state(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const; + + const vespalib::eval::Value* lookup_value(const fef::IObjectStore& store) const; + + double lookup_number(const fef::IQueryEnvironment& env, double default_value) const; + +}; + +} + |