summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-07-04 16:13:39 +0200
committerGitHub <noreply@github.com>2022-07-04 16:13:39 +0200
commitb8e236091b22552fc0dea58ff1a038831b42a81d (patch)
treecb4fbe64cbab4e58069f24341d6b78a206387f63
parentb48c604d2702ba59fc808ac6c8dda7c2c7b970fc (diff)
parent27d554bdc150f26fab89c25dcd0d93e82bbaf248 (diff)
Merge pull request #23344 from vespa-engine/geirst/common-code-to-access-query-values
Refactor out code used to lookup a query value for re-use other places.
-rw-r--r--searchlib/src/vespa/searchlib/features/queryfeature.cpp201
-rw-r--r--searchlib/src/vespa/searchlib/features/queryfeature.h10
-rw-r--r--searchlib/src/vespa/searchlib/fef/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/fef/query_value.cpp229
-rw-r--r--searchlib/src/vespa/searchlib/fef/query_value.h89
5 files changed, 338 insertions, 192 deletions
diff --git a/searchlib/src/vespa/searchlib/features/queryfeature.cpp b/searchlib/src/vespa/searchlib/features/queryfeature.cpp
index 483ba6f82b4..acf13c55c99 100644
--- a/searchlib/src/vespa/searchlib/features/queryfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/queryfeature.cpp
@@ -1,139 +1,24 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "constant_tensor_executor.h"
#include "queryfeature.h"
#include "utils.h"
#include "valuefeature.h"
-#include "constant_tensor_executor.h"
-
-#include <vespa/document/datatype/tensor_data_type.h>
#include <vespa/searchlib/fef/featureexecutor.h>
-#include <vespa/searchlib/fef/indexproperties.h>
-#include <vespa/searchlib/fef/properties.h>
#include <vespa/searchlib/fef/feature_type.h>
-#include <vespa/vespalib/objects/nbostream.h>
#include <vespa/eval/eval/value_type.h>
-#include <vespa/eval/eval/value_codec.h>
-#include <vespa/eval/eval/fast_value.h>
-#include <vespa/eval/eval/function.h>
-#include <vespa/eval/eval/interpreted_function.h>
-#include <vespa/vespalib/locale/c.h>
-#include <vespa/vespalib/util/issue.h>
-#include <cerrno>
#include <vespa/log/log.h>
LOG_SETUP(".features.queryfeature");
using namespace search::fef;
-using namespace search::fef::indexproperties;
-using document::TensorDataType;
-using vespalib::eval::ValueType;
-using vespalib::eval::Value;
-using vespalib::eval::TensorSpec;
-using vespalib::eval::Function;
-using vespalib::eval::InterpretedFunction;
-using vespalib::eval::NodeTypes;
-using vespalib::eval::SimpleObjectParams;
-using vespalib::Issue;
using search::fef::FeatureType;
-using search::fef::AnyWrapper;
-using search::fef::Anything;
-
-using ValueWrapper = AnyWrapper<Value::UP>;
namespace search::features {
-namespace {
-
-/**
- * Convert a string to a feature value using special quoting
- * mechanics; a string that can be converted directly into a feature
- * (numeric value) will be converted. If the string cannot be
- * converted directly, it will be hashed, after stripping the leading
- * "'" if it exists.
- *
- * @return feature value
- * @param str string value to be converted
- **/
-feature_t asFeature(const vespalib::string &str) {
- char *end;
- errno = 0;
- double val = vespalib::locale::c::strtod(str.c_str(), &end);
- if (errno != 0 || *end != '\0') { // not happy
- if (str.size() > 0 && str[0] == '\'') {
- val = vespalib::hash_code(str.substr(1));
- } else {
- val = vespalib::hash_code(str);
- }
- }
- return val;
-}
-
-// Create an empty tensor of the given type.
-Value::UP empty_tensor(const ValueType &type) {
- const auto &factory = vespalib::eval::FastValueBuilderFactory::get();
- return vespalib::eval::value_from_spec(TensorSpec(type.to_spec()), factory);
-}
-
-// Create a tensor value by evaluating a self-contained expression.
-Value::UP as_tensor(const vespalib::string &expr, const ValueType &wanted_type) {
- const auto &factory = vespalib::eval::FastValueBuilderFactory::get();
- auto fun = Function::parse(expr);
- if (!fun->has_error() && (fun->num_params() == 0)) {
- NodeTypes types = NodeTypes(*fun, {});
- ValueType res_type = types.get_type(fun->root());
- if (res_type == wanted_type) {
- SimpleObjectParams params({});
- InterpretedFunction ifun(factory, *fun, types);
- InterpretedFunction::Context ctx(ifun);
- return factory.copy(ifun.eval(ctx, params));
- }
- }
- return {};
-}
-
-// query(foo):
-// query.value.foo -> decoded tensor value 'foo'
-vespalib::string make_value_key(const vespalib::string &base, const vespalib::string &sub_key) {
- vespalib::string key(base);
- key.append(".value.");
- key.append(sub_key);
- return key;
-}
-
-} // namespace search::features::<unnamed>
-
-Property
-QueryBlueprint::config_lookup(const IIndexEnvironment &env) const
-{
- const auto &props = env.getProperties();
- auto res = props.lookup(getName()); // query(foo)
- if (!res.found()) {
- res = props.lookup(_old_key); // $foo
- }
- return res;
-}
-
-Property
-QueryBlueprint::request_lookup(const IQueryEnvironment &env) const
-{
- const auto &props = env.getProperties();
- auto res = props.lookup(getName()); // query(foo)
- if (!res.found()) {
- res = props.lookup(_key); // foo
- }
- if (!res.found()) {
- res = props.lookup(_old_key); // $foo
- }
- return res;
-}
-
QueryBlueprint::QueryBlueprint()
: Blueprint("query"),
- _key(),
- _old_key(),
- _stored_value_key(),
- _type(ValueType::double_type()),
- _default_number_value(),
+ _qvalue(),
_default_object_value()
{
}
@@ -154,89 +39,37 @@ QueryBlueprint::createInstance() const
bool
QueryBlueprint::setup(const IIndexEnvironment &env, const ParameterList &params)
{
- _key = params[0].getValue();
- _old_key = "$";
- _old_key.append(_key);
- _stored_value_key = make_value_key(getBaseName(), _key);
- vespalib::string type_str = type::QueryFeature::lookup(env.getProperties(), _key);
- if (!type_str.empty()) {
- _type = ValueType::from_spec(type_str);
- if (_type.is_error()) {
- return fail("invalid type: '%s'", type_str.c_str());
- }
- }
- Property p = config_lookup(env);
- if (_type.is_double()) {
- if (p.found()) {
- _default_number_value = asFeature(p.get());
- }
- } else {
- if (p.found()) {
- _default_object_value = as_tensor(p.get(), _type);
- if (_default_object_value.get() == nullptr) {
- return fail("could not create default tensor value of type '%s' from the expression '%s'",
- _type.to_spec().c_str(), p.get().c_str());
- }
- } else {
- _default_object_value = empty_tensor(_type);
- }
+ try {
+ _qvalue = QueryValue::from_config(params[0].getValue(), env);
+ _default_object_value = _qvalue.make_default_value(env);
+ } catch (const InvalidValueTypeException& ex) {
+ return fail("invalid type: '%s'", ex.type_str().c_str());
+ } catch (const InvalidTensorValueException& ex) {
+ return fail("could not create default tensor value of type '%s' from the expression '%s'",
+ _qvalue.type().to_spec().c_str(), ex.expr().c_str());
}
- FeatureType output_type = _type.is_double() ? FeatureType::number() : FeatureType::object(_type);
+ const auto& type = _qvalue.type();
+ FeatureType output_type = type.is_double() ? FeatureType::number() : FeatureType::object(type);
describeOutput("out", "The value looked up in query properties using the given key.", output_type);
- assert(_type.has_dimensions() == (_default_object_value.get() != nullptr));
return true;
}
-namespace {
-
-Value::UP decode_tensor_value(Property prop, const ValueType &valueType) {
- if (prop.found() && !prop.get().empty()) {
- const vespalib::string &value = prop.get();
- vespalib::nbostream stream(value.data(), value.size());
- try {
- auto tensor = vespalib::eval::decode_value(stream, vespalib::eval::FastValueBuilderFactory::get());
- if (TensorDataType::isAssignableType(valueType, tensor->type())) {
- return tensor;
- } else {
- Issue::report("Query feature type is '%s' but other tensor type is '%s'",
- valueType.to_spec().c_str(), tensor->type().to_spec().c_str());
- }
- } catch (const vespalib::eval::DecodeValueException &e) {
- Issue::report("Query feature has invalid binary format: %s", e.what());
- }
- }
- return {};
-}
-
-}
-
void
QueryBlueprint::prepareSharedState(const fef::IQueryEnvironment &env, fef::IObjectStore &store) const
{
- if (!_stored_value_key.empty() && _type.has_dimensions() && (store.get(_stored_value_key) == nullptr)) {
- if (auto value = decode_tensor_value(request_lookup(env), _type)) {
- store.add(_stored_value_key, std::make_unique<ValueWrapper>(std::move(value)));
- }
- }
+ _qvalue.prepare_shared_state(env, store);
}
FeatureExecutor &
QueryBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const
{
- if (_type.has_dimensions()) {
- if (const Anything *wrapped_value = env.getObjectStore().get(_stored_value_key)) {
- if (const Value *value = ValueWrapper::getValue(*wrapped_value).get()) {
- return stash.create<ConstantTensorRefExecutor>(*value);
- }
+ if (_qvalue.type().has_dimensions()) {
+ if (const vespalib::eval::Value *value = _qvalue.lookup_value(env.getObjectStore())) {
+ return stash.create<ConstantTensorRefExecutor>(*value);
}
return stash.create<ConstantTensorRefExecutor>(*_default_object_value);
} else {
- auto p = request_lookup(env);
- if (p.found()) {
- return stash.create<SingleValueExecutor>(asFeature(p.get()));
- } else {
- return stash.create<SingleValueExecutor>(_default_number_value);
- }
+ return stash.create<SingleValueExecutor>(_qvalue.lookup_number(env, _default_object_value->as_double()));
}
}
diff --git a/searchlib/src/vespa/searchlib/features/queryfeature.h b/searchlib/src/vespa/searchlib/features/queryfeature.h
index 020fd31989d..ae77a7d660f 100644
--- a/searchlib/src/vespa/searchlib/features/queryfeature.h
+++ b/searchlib/src/vespa/searchlib/features/queryfeature.h
@@ -4,6 +4,7 @@
#include <vespa/searchlib/fef/blueprint.h>
#include <vespa/searchlib/fef/properties.h>
+#include <vespa/searchlib/fef/query_value.h>
#include <vespa/eval/eval/value_type.h>
#include <vespa/eval/eval/value.h>
@@ -17,16 +18,9 @@ namespace search::features {
*/
class QueryBlueprint : public fef::Blueprint {
private:
- vespalib::string _key; // 'foo'
- vespalib::string _old_key; // '$foo'
- vespalib::string _stored_value_key; // query.value.foo
- vespalib::eval::ValueType _type;
- feature_t _default_number_value;
+ search::fef::QueryValue _qvalue;
vespalib::eval::Value::UP _default_object_value;
- fef::Property config_lookup(const fef::IIndexEnvironment &env) const;
- fef::Property request_lookup(const fef::IQueryEnvironment &env) const;
-
public:
QueryBlueprint();
~QueryBlueprint();
diff --git a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt
index 8968e4e347d..ba4430ff8e6 100644
--- a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt
@@ -26,6 +26,7 @@ vespa_add_library(searchlib_fef OBJECT
phrase_splitter_query_env.cpp
phrasesplitter.cpp
properties.cpp
+ query_value.cpp
queryproperties.cpp
rank_program.cpp
ranksetup.cpp
diff --git a/searchlib/src/vespa/searchlib/fef/query_value.cpp b/searchlib/src/vespa/searchlib/fef/query_value.cpp
new file mode 100644
index 00000000000..d9cdb0aa23d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/query_value.cpp
@@ -0,0 +1,229 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "iindexenvironment.h"
+#include "indexproperties.h"
+#include "iqueryenvironment.h"
+#include "query_value.h"
+#include <vespa/document/datatype/tensor_data_type.h>
+#include <vespa/eval/eval/fast_value.h>
+#include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/vespalib/locale/c.h>
+#include <vespa/vespalib/util/issue.h>
+#include <vespa/vespalib/util/string_hash.h>
+#include <cerrno>
+
+using document::TensorDataType;
+using vespalib::Issue;
+using vespalib::eval::DoubleValue;
+using vespalib::eval::Function;
+using vespalib::eval::InterpretedFunction;
+using vespalib::eval::NodeTypes;
+using vespalib::eval::SimpleObjectParams;
+using vespalib::eval::TensorSpec;
+using vespalib::eval::Value;
+using vespalib::eval::ValueType;
+
+using namespace search::fef::indexproperties;
+
+namespace search::fef {
+
+using ValueWrapper = AnyWrapper<Value::UP>;
+
+InvalidValueTypeException::InvalidValueTypeException(const vespalib::string& query_key, const vespalib::string& type_str_in)
+ : vespalib::Exception("Invalid type '" + type_str_in + "' for query value '" + query_key + "'"),
+ _type_str(type_str_in)
+{
+}
+
+InvalidTensorValueException::InvalidTensorValueException(const vespalib::eval::ValueType& type, const vespalib::string& expr_in)
+ : vespalib::Exception("Could not create tensor value of type '" + type.to_spec() + "' from the expression '" + expr_in + "'"),
+ _expr(expr_in)
+{
+}
+
+namespace {
+
+/**
+ * Convert a string to a feature value using special quoting mechanics;
+ * a string that can be converted directly into a feature
+ * (numeric value) will be converted. If the string cannot be
+ * converted directly, it will be hashed, after stripping the leading
+ * "'" if it exists.
+ */
+feature_t
+as_feature(const vespalib::string& str)
+{
+ char *end;
+ errno = 0;
+ double val = vespalib::locale::c::strtod(str.c_str(), &end);
+ if (errno != 0 || *end != '\0') { // not happy
+ if (str.size() > 0 && str[0] == '\'') {
+ val = vespalib::hash_code(str.substr(1));
+ } else {
+ val = vespalib::hash_code(str);
+ }
+ }
+ return val;
+}
+
+// Create an empty tensor of the given type.
+std::unique_ptr<Value>
+empty_tensor(const ValueType& type)
+{
+ const auto& factory = vespalib::eval::FastValueBuilderFactory::get();
+ return vespalib::eval::value_from_spec(TensorSpec(type.to_spec()), factory);
+}
+
+// Create a tensor value by evaluating a self-contained expression.
+std::unique_ptr<Value>
+as_tensor(const vespalib::string& expr, const ValueType& wanted_type)
+{
+ const auto& factory = vespalib::eval::FastValueBuilderFactory::get();
+ auto fun = Function::parse(expr);
+ if (!fun->has_error() && (fun->num_params() == 0)) {
+ NodeTypes types = NodeTypes(*fun, {});
+ ValueType res_type = types.get_type(fun->root());
+ if (res_type == wanted_type) {
+ SimpleObjectParams params({});
+ InterpretedFunction ifun(factory, *fun, types);
+ InterpretedFunction::Context ctx(ifun);
+ return factory.copy(ifun.eval(ctx, params));
+ }
+ }
+ return {};
+}
+
+std::unique_ptr<Value>
+decode_tensor_value(Property prop, const ValueType& value_type)
+{
+ if (prop.found() && !prop.get().empty()) {
+ const vespalib::string& value = prop.get();
+ vespalib::nbostream stream(value.data(), value.size());
+ try {
+ auto tensor = vespalib::eval::decode_value(stream, vespalib::eval::FastValueBuilderFactory::get());
+ if (TensorDataType::isAssignableType(value_type, tensor->type())) {
+ return tensor;
+ } else {
+ Issue::report("Query value type is '%s' but decoded tensor type is '%s'",
+ value_type.to_spec().c_str(), tensor->type().to_spec().c_str());
+ }
+ } catch (const vespalib::eval::DecodeValueException& e) {
+ Issue::report("Query value has invalid binary format: %s", e.what());
+ }
+ }
+ return {};
+}
+
+}
+
+Property
+QueryValue::config_lookup(const IIndexEnvironment& env) const
+{
+ const auto& props = env.getProperties();
+ auto res = props.lookup(_name); // query(foo)
+ if (!res.found()) {
+ res = props.lookup(_old_key); // $foo
+ }
+ return res;
+}
+
+Property
+QueryValue::request_lookup(const IQueryEnvironment& env) const
+{
+ const auto& props = env.getProperties();
+ auto res = props.lookup(_name); // query(foo)
+ if (!res.found()) {
+ res = props.lookup(_key); // foo
+ }
+ if (!res.found()) {
+ res = props.lookup(_old_key); // $foo
+ }
+ return res;
+}
+
+QueryValue::QueryValue()
+ : _key(),
+ _name(),
+ _old_key(),
+ _stored_value_key(),
+ _type(ValueType::double_type())
+{
+}
+
+QueryValue::QueryValue(const vespalib::string& key, const vespalib::eval::ValueType& type)
+ : _key(key),
+ _name("query(" + key + ")"),
+ _old_key("$" + key),
+ _stored_value_key("query.value." + key),
+ _type(type)
+{
+}
+
+QueryValue
+QueryValue::from_config(const vespalib::string& key, const IIndexEnvironment& env)
+{
+ vespalib::string type_str = type::QueryFeature::lookup(env.getProperties(), key);
+ ValueType type = type_str.empty() ? ValueType::double_type() : ValueType::from_spec(type_str);
+ if (type.is_error()) {
+ throw InvalidValueTypeException(key, type_str);
+ }
+ return {key, type};
+}
+
+std::unique_ptr<Value>
+QueryValue::make_default_value(const IIndexEnvironment& env) const
+{
+ Property p = config_lookup(env);
+ if (_type.is_double()) {
+ if (p.found()) {
+ return std::make_unique<DoubleValue>(as_feature(p.get()));
+ } else {
+ return std::make_unique<DoubleValue>(0);
+ }
+ } else {
+ if (p.found()) {
+ auto tensor = as_tensor(p.get(), _type);
+ if (tensor.get() == nullptr) {
+ throw InvalidTensorValueException(_type, p.get().c_str());
+ }
+ return tensor;
+ } else {
+ return empty_tensor(_type);
+ }
+ }
+}
+
+void
+QueryValue::prepare_shared_state(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const
+{
+ if (!_stored_value_key.empty() && _type.has_dimensions() && (store.get(_stored_value_key) == nullptr)) {
+ if (auto value = decode_tensor_value(request_lookup(env), _type)) {
+ store.add(_stored_value_key, std::make_unique<ValueWrapper>(std::move(value)));
+ }
+ }
+}
+
+const Value*
+QueryValue::lookup_value(const fef::IObjectStore& store) const
+{
+ if (const Anything* wrapped_value = store.get(_stored_value_key)) {
+ return ValueWrapper::getValue(*wrapped_value).get();
+ }
+ return nullptr;
+}
+
+double
+QueryValue::lookup_number(const fef::IQueryEnvironment& env, double default_value) const
+{
+ assert(!_type.has_dimensions());
+ auto p = request_lookup(env);
+ if (p.found()) {
+ return as_feature(p.get());
+ }
+ return default_value;
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/fef/query_value.h b/searchlib/src/vespa/searchlib/fef/query_value.h
new file mode 100644
index 00000000000..477b6aa451f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/fef/query_value.h
@@ -0,0 +1,89 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "properties.h"
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/vespalib/util/exception.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <memory>
+
+namespace vespalib::eval { struct Value; }
+
+namespace search::fef {
+
+class IIndexEnvironment;
+class IQueryEnvironment;
+
+/**
+ * Exception for when the value type is an error.
+ */
+class InvalidValueTypeException : public vespalib::Exception {
+private:
+ vespalib::string _type_str;
+
+public:
+ InvalidValueTypeException(const vespalib::string& query_key, const vespalib::string& type_str_in);
+ const vespalib::string& type_str() const { return _type_str; }
+};
+
+/**
+ * Exception for when a tensor value could not be created from an expression.
+ */
+class InvalidTensorValueException : public vespalib::Exception {
+private:
+ vespalib::string _expr;
+
+public:
+ InvalidTensorValueException(const vespalib::eval::ValueType& type, const vespalib::string& expr_in);
+ const vespalib::string& expr() const { return _expr; }
+};
+
+/**
+ * Class representing a vespalib::eval::Value (number or tensor) passed down with the query.
+ *
+ * The value type and optional default value are defined in IIndexEnvironment properties and extracted at config time.
+ * Per query, the value is extracted from IQueryEnvironment properties. This is stored in the shared IObjectStore.
+ */
+class QueryValue {
+private:
+ vespalib::string _key; // 'foo'
+ vespalib::string _name; // 'query(foo)'
+ vespalib::string _old_key; // '$foo'
+ vespalib::string _stored_value_key; // query.value.foo
+ vespalib::eval::ValueType _type;
+
+ Property config_lookup(const IIndexEnvironment& env) const;
+ Property request_lookup(const IQueryEnvironment& env) const;
+
+public:
+ QueryValue();
+ QueryValue(const vespalib::string& key, const vespalib::eval::ValueType& type);
+
+ /**
+ * Create a QueryValue using properties from the given index environment to extract the value type.
+ *
+ * Throws InvalidValueTypeException if the value type is an error.
+ */
+ static QueryValue from_config(const vespalib::string& key, const IIndexEnvironment& env);
+
+ const vespalib::eval::ValueType& type() const { return _type; }
+
+ /**
+ * Create a default value based on properties from the given index environment.
+ *
+ * An empty value is created if not found.
+ * Throws InvalidTensorValueException if a tensor value could not be created.
+ */
+ std::unique_ptr<vespalib::eval::Value> make_default_value(const IIndexEnvironment& env) const;
+
+ void prepare_shared_state(const fef::IQueryEnvironment& env, fef::IObjectStore& store) const;
+
+ const vespalib::eval::Value* lookup_value(const fef::IObjectStore& store) const;
+
+ double lookup_number(const fef::IQueryEnvironment& env, double default_value) const;
+
+};
+
+}
+