diff options
author | Arne Juul <arnej@yahooinc.com> | 2023-08-04 12:22:52 +0000 |
---|---|---|
committer | Arne Juul <arnej@yahooinc.com> | 2023-08-04 12:22:52 +0000 |
commit | 0d189d33ca259dd191c6c14aa52027493b4746b8 (patch) | |
tree | 860aa676f45219d81b103de7c45858f882ee38f5 | |
parent | 64272a471021111f38a695a018d57bf3f042d3ab (diff) |
more robust checking of value type
5 files changed, 40 insertions, 32 deletions
diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp index 3e5d1da6a1a..7c267413a86 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp @@ -36,7 +36,8 @@ TensorFactoryBlueprint::TensorFactoryBlueprint(const vespalib::string &baseName) : Blueprint(baseName), _sourceType(), _sourceParam(), - _dimension("0") // default dimension is set to the source param if not specified. + _dimension("0"), // default dimension is set to the source param if not specified. + _valueType(vespalib::eval::ValueType::error_type()) { } diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h index 26fcc79b6f5..47ccb038ac7 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h +++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h @@ -4,6 +4,7 @@ #include <vespa/searchlib/fef/blueprint.h> #include <vespa/vespalib/stllike/string.h> +#include <vespa/eval/eval/value_type.h> namespace search::features { @@ -19,6 +20,7 @@ protected: vespalib::string _sourceType; vespalib::string _sourceParam; vespalib::string _dimension; + vespalib::eval::ValueType _valueType; bool extractSource(const vespalib::string &source); TensorFactoryBlueprint(const vespalib::string &baseName); diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h index 5a3fede76e8..7b04d10cea2 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h +++ b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h @@ -28,9 +28,9 @@ private: public: TensorFromAttributeExecutor(const search::attribute::IAttributeVector *attribute, - const vespalib::string &dimension) + const vespalib::eval::ValueType &valueType) : _attribute(attribute), - _type(vespalib::eval::ValueType::make_type(CellType::DOUBLE, {{dimension}})), + _type(valueType), _attrBuffer(), _addr_ref(), _tensor() diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp index b72a75bd19f..6628a889903 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp @@ -46,10 +46,12 @@ TensorFromLabelsBlueprint::setup(const search::fef::IIndexEnvironment &env, } else { _dimension = _sourceParam; } + auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}}); + _valueType = ValueType::from_spec(vt.to_spec()); describeOutput("tensor", "The tensor created from the given source (attribute field or query parameter)", - FeatureType::object(ValueType::make_type(CellType::DOUBLE, {{_dimension}}))); - return validSource; + FeatureType::object(_valueType)); + return validSource && ! _valueType.is_error(); } namespace { @@ -57,23 +59,24 @@ namespace { FeatureExecutor & createAttributeExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &attrName, - const vespalib::string &dimension, vespalib::Stash &stash) + const ValueType &valueType, + vespalib::Stash &stash) { const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName); if (attribute == NULL) { Issue::report("tensor_from_labels feature: The attribute vector '%s' was not found." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->isFloatingPointType()) { Issue::report("tensor_from_labels feature: The attribute vector '%s' must have basic type string or integer." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->getCollectionType() == search::attribute::CollectionType::WSET) { Issue::report("tensor_from_labels feature: The attribute vector '%s' is a weighted set - use tensorFromWeightedSet instead." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } // Note that for array attribute vectors the default weight is 1.0 for all values. // This means we can get the attribute content as weighted content and build @@ -81,25 +84,25 @@ createAttributeExecutor(const search::fef::IQueryEnvironment &env, if (attribute->isIntegerType()) { // Using WeightedStringContent ensures that the integer values are converted // to strings while extracting them from the attribute. - return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, dimension); + return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, valueType); } // When the underlying attribute is of type string we can reference these values // using WeightedConstCharContent. - return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, dimension); + return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, valueType); } FeatureExecutor & createQueryExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &queryKey, - const vespalib::string &dimension, vespalib::Stash &stash) + const ValueType &valueType, + vespalib::Stash &stash) { - ValueType type = ValueType::make_type(CellType::DOUBLE, {{dimension}}); search::fef::Property prop = env.getProperties().lookup(queryKey); if (prop.found() && !prop.get().empty()) { std::vector<vespalib::string> vector; ArrayParser::parse(prop.get(), vector); auto factory = FastValueBuilderFactory::get(); - auto builder = factory.create_value_builder<double>(type, 1, 1, vector.size()); + auto builder = factory.create_value_builder<double>(valueType, 1, 1, vector.size()); std::vector<vespalib::stringref> addr_ref; for (const auto &elem : vector) { addr_ref.clear(); @@ -109,7 +112,7 @@ createQueryExecutor(const search::fef::IQueryEnvironment &env, } return ConstantTensorExecutor::create(builder->build(std::move(builder)), stash); } - return ConstantTensorExecutor::createEmpty(type, stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } } @@ -118,11 +121,11 @@ FeatureExecutor & TensorFromLabelsBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const { if (_sourceType == ATTRIBUTE_SOURCE) { - return createAttributeExecutor(env, _sourceParam, _dimension, stash); + return createAttributeExecutor(env, _sourceParam, _valueType, stash); } else if (_sourceType == QUERY_SOURCE) { - return createQueryExecutor(env, _sourceParam, _dimension, stash); + return createQueryExecutor(env, _sourceParam, _valueType, stash); } - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{_dimension}}), stash); + return ConstantTensorExecutor::createEmpty(_valueType, stash); } } // namespace features diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp index cbe262a0cbd..c635675b216 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp @@ -59,10 +59,12 @@ TensorFromWeightedSetBlueprint::setup(const search::fef::IIndexEnvironment &env, } else { _dimension = _sourceParam; } + auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}}); + _valueType = ValueType::from_spec(vt.to_spec()); describeOutput("tensor", "The tensor created from the given weighted set source (attribute field or query parameter)", - FeatureType::object(ValueType::make_type(CellType::DOUBLE, {{_dimension}}))); - return validSource; + FeatureType::object(_valueType)); + return validSource && ! _valueType.is_error(); } namespace { @@ -70,45 +72,45 @@ namespace { FeatureExecutor & createAttributeExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &attrName, - const vespalib::string &dimension, + const ValueType &valueType, vespalib::Stash &stash) { const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName); if (attribute == NULL) { Issue::report("tensor_from_weighted_set feature: The attribute vector '%s' was not found." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->getCollectionType() != search::attribute::CollectionType::WSET || attribute->isFloatingPointType()) { Issue::report("tensor_from_weighted_set feature: The attribute vector '%s' is NOT of type weighted set of string or integer." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->isIntegerType()) { // Using WeightedStringContent ensures that the integer values are converted // to strings while extracting them from the attribute. - return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, dimension); + return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, valueType); } // When the underlying attribute is of type string we can reference these values // using WeightedConstCharContent. - return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, dimension); + return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, valueType); } FeatureExecutor & createQueryExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &queryKey, - const vespalib::string &dimension, vespalib::Stash &stash) + const ValueType &valueType, + vespalib::Stash &stash) { - ValueType type = ValueType::make_type(CellType::DOUBLE, {{dimension}}); search::fef::Property prop = env.getProperties().lookup(queryKey); if (prop.found() && !prop.get().empty()) { WeightedStringVector vector; WeightedSetParser::parse(prop.get(), vector); auto factory = FastValueBuilderFactory::get(); size_t sz = vector._data.size(); - auto builder = factory.create_value_builder<double>(type, 1, 1, sz); + auto builder = factory.create_value_builder<double>(valueType, 1, 1, sz); std::vector<vespalib::stringref> addr_ref; for (const auto &elem : vector._data) { addr_ref.clear(); @@ -118,7 +120,7 @@ createQueryExecutor(const search::fef::IQueryEnvironment &env, } return ConstantTensorExecutor::create(builder->build(std::move(builder)), stash); } - return ConstantTensorExecutor::createEmpty(type, stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } } @@ -127,11 +129,11 @@ FeatureExecutor & TensorFromWeightedSetBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const { if (_sourceType == ATTRIBUTE_SOURCE) { - return createAttributeExecutor(env, _sourceParam, _dimension, stash); + return createAttributeExecutor(env, _sourceParam, _valueType, stash); } else if (_sourceType == QUERY_SOURCE) { - return createQueryExecutor(env, _sourceParam, _dimension, stash); + return createQueryExecutor(env, _sourceParam, _valueType, stash); } - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{_dimension}}), stash); + return ConstantTensorExecutor::createEmpty(_valueType, stash); } } // namespace features |