aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-08-07 18:24:54 +0200
committerGitHub <noreply@github.com>2023-08-07 18:24:54 +0200
commit217271fe10d5405d0a08adbb45b6a36cb4014f30 (patch)
tree2cf9b4ae368c43fc0bb6504d5f9f9e2f082184a8
parentce5a6030cca10e3c4f0293f6700dd59f07593462 (diff)
parent2da5042dbe3fb69fc2688958bad242bb8695cd2f (diff)
Merge pull request #27970 from vespa-engine/arnej/better-check-in-tensor-fromv8.208.14
more robust checking of value type
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h2
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h4
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp36
5 files changed, 52 insertions, 32 deletions
diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp
index 3e5d1da6a1a..7c267413a86 100644
--- a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp
@@ -36,7 +36,8 @@ TensorFactoryBlueprint::TensorFactoryBlueprint(const vespalib::string &baseName)
: Blueprint(baseName),
_sourceType(),
_sourceParam(),
- _dimension("0") // default dimension is set to the source param if not specified.
+ _dimension("0"), // default dimension is set to the source param if not specified.
+ _valueType(vespalib::eval::ValueType::error_type())
{
}
diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h
index 26fcc79b6f5..47ccb038ac7 100644
--- a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h
+++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h
@@ -4,6 +4,7 @@
#include <vespa/searchlib/fef/blueprint.h>
#include <vespa/vespalib/stllike/string.h>
+#include <vespa/eval/eval/value_type.h>
namespace search::features {
@@ -19,6 +20,7 @@ protected:
vespalib::string _sourceType;
vespalib::string _sourceParam;
vespalib::string _dimension;
+ vespalib::eval::ValueType _valueType;
bool extractSource(const vespalib::string &source);
TensorFactoryBlueprint(const vespalib::string &baseName);
diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h
index 5a3fede76e8..7b04d10cea2 100644
--- a/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h
+++ b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h
@@ -28,9 +28,9 @@ private:
public:
TensorFromAttributeExecutor(const search::attribute::IAttributeVector *attribute,
- const vespalib::string &dimension)
+ const vespalib::eval::ValueType &valueType)
: _attribute(attribute),
- _type(vespalib::eval::ValueType::make_type(CellType::DOUBLE, {{dimension}})),
+ _type(valueType),
_attrBuffer(),
_addr_ref(),
_tensor()
diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp
index b72a75bd19f..f36c1dbfdaa 100644
--- a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp
@@ -41,15 +41,23 @@ TensorFromLabelsBlueprint::setup(const search::fef::IIndexEnvironment &env,
// _params[0] = source ('attribute(name)' OR 'query(param)');
// _params[1] = dimension (optional);
bool validSource = extractSource(params[0].getValue());
+ if (! validSource) {
+ return fail("invalid source: '%s'", params[0].getValue().c_str());
+ }
if (params.size() == 2) {
_dimension = params[1].getValue();
} else {
_dimension = _sourceParam;
}
+ auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}});
+ _valueType = ValueType::from_spec(vt.to_spec());
+ if (_valueType.is_error()) {
+ return fail("invalid dimension name: '%s'", _dimension.c_str());
+ }
describeOutput("tensor",
"The tensor created from the given source (attribute field or query parameter)",
- FeatureType::object(ValueType::make_type(CellType::DOUBLE, {{_dimension}})));
- return validSource;
+ FeatureType::object(_valueType));
+ return true;
}
namespace {
@@ -57,23 +65,24 @@ namespace {
FeatureExecutor &
createAttributeExecutor(const search::fef::IQueryEnvironment &env,
const vespalib::string &attrName,
- const vespalib::string &dimension, vespalib::Stash &stash)
+ const ValueType &valueType,
+ vespalib::Stash &stash)
{
const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName);
if (attribute == NULL) {
Issue::report("tensor_from_labels feature: The attribute vector '%s' was not found."
" Returning empty tensor.", attrName.c_str());
- return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash);
+ return ConstantTensorExecutor::createEmpty(valueType, stash);
}
if (attribute->isFloatingPointType()) {
Issue::report("tensor_from_labels feature: The attribute vector '%s' must have basic type string or integer."
" Returning empty tensor.", attrName.c_str());
- return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash);
+ return ConstantTensorExecutor::createEmpty(valueType, stash);
}
if (attribute->getCollectionType() == search::attribute::CollectionType::WSET) {
Issue::report("tensor_from_labels feature: The attribute vector '%s' is a weighted set - use tensorFromWeightedSet instead."
" Returning empty tensor.", attrName.c_str());
- return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash);
+ return ConstantTensorExecutor::createEmpty(valueType, stash);
}
// Note that for array attribute vectors the default weight is 1.0 for all values.
// This means we can get the attribute content as weighted content and build
@@ -81,25 +90,25 @@ createAttributeExecutor(const search::fef::IQueryEnvironment &env,
if (attribute->isIntegerType()) {
// Using WeightedStringContent ensures that the integer values are converted
// to strings while extracting them from the attribute.
- return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, dimension);
+ return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, valueType);
}
// When the underlying attribute is of type string we can reference these values
// using WeightedConstCharContent.
- return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, dimension);
+ return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, valueType);
}
FeatureExecutor &
createQueryExecutor(const search::fef::IQueryEnvironment &env,
const vespalib::string &queryKey,
- const vespalib::string &dimension, vespalib::Stash &stash)
+ const ValueType &valueType,
+ vespalib::Stash &stash)
{
- ValueType type = ValueType::make_type(CellType::DOUBLE, {{dimension}});
search::fef::Property prop = env.getProperties().lookup(queryKey);
if (prop.found() && !prop.get().empty()) {
std::vector<vespalib::string> vector;
ArrayParser::parse(prop.get(), vector);
auto factory = FastValueBuilderFactory::get();
- auto builder = factory.create_value_builder<double>(type, 1, 1, vector.size());
+ auto builder = factory.create_value_builder<double>(valueType, 1, 1, vector.size());
std::vector<vespalib::stringref> addr_ref;
for (const auto &elem : vector) {
addr_ref.clear();
@@ -109,7 +118,7 @@ createQueryExecutor(const search::fef::IQueryEnvironment &env,
}
return ConstantTensorExecutor::create(builder->build(std::move(builder)), stash);
}
- return ConstantTensorExecutor::createEmpty(type, stash);
+ return ConstantTensorExecutor::createEmpty(valueType, stash);
}
}
@@ -118,11 +127,11 @@ FeatureExecutor &
TensorFromLabelsBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const
{
if (_sourceType == ATTRIBUTE_SOURCE) {
- return createAttributeExecutor(env, _sourceParam, _dimension, stash);
+ return createAttributeExecutor(env, _sourceParam, _valueType, stash);
} else if (_sourceType == QUERY_SOURCE) {
- return createQueryExecutor(env, _sourceParam, _dimension, stash);
+ return createQueryExecutor(env, _sourceParam, _valueType, stash);
}
- return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{_dimension}}), stash);
+ return ConstantTensorExecutor::createEmpty(_valueType, stash);
}
} // namespace features
diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp
index cbe262a0cbd..312f9ee2bc6 100644
--- a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp
+++ b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp
@@ -54,15 +54,23 @@ TensorFromWeightedSetBlueprint::setup(const search::fef::IIndexEnvironment &env,
// _params[0] = source ('attribute(name)' OR 'query(param)');
// _params[1] = dimension (optional);
bool validSource = extractSource(params[0].getValue());
+ if (! validSource) {
+ return fail("invalid source: '%s'", params[0].getValue().c_str());
+ }
if (params.size() == 2) {
_dimension = params[1].getValue();
} else {
_dimension = _sourceParam;
}
+ auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}});
+ _valueType = ValueType::from_spec(vt.to_spec());
+ if (_valueType.is_error()) {
+ return fail("invalid dimension name: '%s'", _dimension.c_str());
+ }
describeOutput("tensor",
"The tensor created from the given weighted set source (attribute field or query parameter)",
- FeatureType::object(ValueType::make_type(CellType::DOUBLE, {{_dimension}})));
- return validSource;
+ FeatureType::object(_valueType));
+ return true;
}
namespace {
@@ -70,45 +78,45 @@ namespace {
FeatureExecutor &
createAttributeExecutor(const search::fef::IQueryEnvironment &env,
const vespalib::string &attrName,
- const vespalib::string &dimension,
+ const ValueType &valueType,
vespalib::Stash &stash)
{
const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName);
if (attribute == NULL) {
Issue::report("tensor_from_weighted_set feature: The attribute vector '%s' was not found."
" Returning empty tensor.", attrName.c_str());
- return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash);
+ return ConstantTensorExecutor::createEmpty(valueType, stash);
}
if (attribute->getCollectionType() != search::attribute::CollectionType::WSET ||
attribute->isFloatingPointType())
{
Issue::report("tensor_from_weighted_set feature: The attribute vector '%s' is NOT of type weighted set of string or integer."
" Returning empty tensor.", attrName.c_str());
- return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash);
+ return ConstantTensorExecutor::createEmpty(valueType, stash);
}
if (attribute->isIntegerType()) {
// Using WeightedStringContent ensures that the integer values are converted
// to strings while extracting them from the attribute.
- return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, dimension);
+ return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, valueType);
}
// When the underlying attribute is of type string we can reference these values
// using WeightedConstCharContent.
- return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, dimension);
+ return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, valueType);
}
FeatureExecutor &
createQueryExecutor(const search::fef::IQueryEnvironment &env,
const vespalib::string &queryKey,
- const vespalib::string &dimension, vespalib::Stash &stash)
+ const ValueType &valueType,
+ vespalib::Stash &stash)
{
- ValueType type = ValueType::make_type(CellType::DOUBLE, {{dimension}});
search::fef::Property prop = env.getProperties().lookup(queryKey);
if (prop.found() && !prop.get().empty()) {
WeightedStringVector vector;
WeightedSetParser::parse(prop.get(), vector);
auto factory = FastValueBuilderFactory::get();
size_t sz = vector._data.size();
- auto builder = factory.create_value_builder<double>(type, 1, 1, sz);
+ auto builder = factory.create_value_builder<double>(valueType, 1, 1, sz);
std::vector<vespalib::stringref> addr_ref;
for (const auto &elem : vector._data) {
addr_ref.clear();
@@ -118,7 +126,7 @@ createQueryExecutor(const search::fef::IQueryEnvironment &env,
}
return ConstantTensorExecutor::create(builder->build(std::move(builder)), stash);
}
- return ConstantTensorExecutor::createEmpty(type, stash);
+ return ConstantTensorExecutor::createEmpty(valueType, stash);
}
}
@@ -127,11 +135,11 @@ FeatureExecutor &
TensorFromWeightedSetBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const
{
if (_sourceType == ATTRIBUTE_SOURCE) {
- return createAttributeExecutor(env, _sourceParam, _dimension, stash);
+ return createAttributeExecutor(env, _sourceParam, _valueType, stash);
} else if (_sourceType == QUERY_SOURCE) {
- return createQueryExecutor(env, _sourceParam, _dimension, stash);
+ return createQueryExecutor(env, _sourceParam, _valueType, stash);
}
- return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{_dimension}}), stash);
+ return ConstantTensorExecutor::createEmpty(_valueType, stash);
}
} // namespace features