From 0d189d33ca259dd191c6c14aa52027493b4746b8 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Fri, 4 Aug 2023 12:22:52 +0000 Subject: more robust checking of value type --- .../features/tensor_factory_blueprint.cpp | 3 +- .../searchlib/features/tensor_factory_blueprint.h | 2 ++ .../features/tensor_from_attribute_executor.h | 4 +-- .../features/tensor_from_labels_feature.cpp | 33 ++++++++++++---------- .../features/tensor_from_weighted_set_feature.cpp | 30 +++++++++++--------- 5 files changed, 40 insertions(+), 32 deletions(-) diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp index 3e5d1da6a1a..7c267413a86 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp @@ -36,7 +36,8 @@ TensorFactoryBlueprint::TensorFactoryBlueprint(const vespalib::string &baseName) : Blueprint(baseName), _sourceType(), _sourceParam(), - _dimension("0") // default dimension is set to the source param if not specified. + _dimension("0"), // default dimension is set to the source param if not specified. + _valueType(vespalib::eval::ValueType::error_type()) { } diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h index 26fcc79b6f5..47ccb038ac7 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h +++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h @@ -4,6 +4,7 @@ #include #include +#include namespace search::features { @@ -19,6 +20,7 @@ protected: vespalib::string _sourceType; vespalib::string _sourceParam; vespalib::string _dimension; + vespalib::eval::ValueType _valueType; bool extractSource(const vespalib::string &source); TensorFactoryBlueprint(const vespalib::string &baseName); diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h index 5a3fede76e8..7b04d10cea2 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h +++ b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h @@ -28,9 +28,9 @@ private: public: TensorFromAttributeExecutor(const search::attribute::IAttributeVector *attribute, - const vespalib::string &dimension) + const vespalib::eval::ValueType &valueType) : _attribute(attribute), - _type(vespalib::eval::ValueType::make_type(CellType::DOUBLE, {{dimension}})), + _type(valueType), _attrBuffer(), _addr_ref(), _tensor() diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp index b72a75bd19f..6628a889903 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp @@ -46,10 +46,12 @@ TensorFromLabelsBlueprint::setup(const search::fef::IIndexEnvironment &env, } else { _dimension = _sourceParam; } + auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}}); + _valueType = ValueType::from_spec(vt.to_spec()); describeOutput("tensor", "The tensor created from the given source (attribute field or query parameter)", - FeatureType::object(ValueType::make_type(CellType::DOUBLE, {{_dimension}}))); - return validSource; + FeatureType::object(_valueType)); + return validSource && ! _valueType.is_error(); } namespace { @@ -57,23 +59,24 @@ namespace { FeatureExecutor & createAttributeExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &attrName, - const vespalib::string &dimension, vespalib::Stash &stash) + const ValueType &valueType, + vespalib::Stash &stash) { const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName); if (attribute == NULL) { Issue::report("tensor_from_labels feature: The attribute vector '%s' was not found." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->isFloatingPointType()) { Issue::report("tensor_from_labels feature: The attribute vector '%s' must have basic type string or integer." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->getCollectionType() == search::attribute::CollectionType::WSET) { Issue::report("tensor_from_labels feature: The attribute vector '%s' is a weighted set - use tensorFromWeightedSet instead." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } // Note that for array attribute vectors the default weight is 1.0 for all values. // This means we can get the attribute content as weighted content and build @@ -81,25 +84,25 @@ createAttributeExecutor(const search::fef::IQueryEnvironment &env, if (attribute->isIntegerType()) { // Using WeightedStringContent ensures that the integer values are converted // to strings while extracting them from the attribute. - return stash.create>(attribute, dimension); + return stash.create>(attribute, valueType); } // When the underlying attribute is of type string we can reference these values // using WeightedConstCharContent. - return stash.create>(attribute, dimension); + return stash.create>(attribute, valueType); } FeatureExecutor & createQueryExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &queryKey, - const vespalib::string &dimension, vespalib::Stash &stash) + const ValueType &valueType, + vespalib::Stash &stash) { - ValueType type = ValueType::make_type(CellType::DOUBLE, {{dimension}}); search::fef::Property prop = env.getProperties().lookup(queryKey); if (prop.found() && !prop.get().empty()) { std::vector vector; ArrayParser::parse(prop.get(), vector); auto factory = FastValueBuilderFactory::get(); - auto builder = factory.create_value_builder(type, 1, 1, vector.size()); + auto builder = factory.create_value_builder(valueType, 1, 1, vector.size()); std::vector addr_ref; for (const auto &elem : vector) { addr_ref.clear(); @@ -109,7 +112,7 @@ createQueryExecutor(const search::fef::IQueryEnvironment &env, } return ConstantTensorExecutor::create(builder->build(std::move(builder)), stash); } - return ConstantTensorExecutor::createEmpty(type, stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } } @@ -118,11 +121,11 @@ FeatureExecutor & TensorFromLabelsBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const { if (_sourceType == ATTRIBUTE_SOURCE) { - return createAttributeExecutor(env, _sourceParam, _dimension, stash); + return createAttributeExecutor(env, _sourceParam, _valueType, stash); } else if (_sourceType == QUERY_SOURCE) { - return createQueryExecutor(env, _sourceParam, _dimension, stash); + return createQueryExecutor(env, _sourceParam, _valueType, stash); } - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{_dimension}}), stash); + return ConstantTensorExecutor::createEmpty(_valueType, stash); } } // namespace features diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp index cbe262a0cbd..c635675b216 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp @@ -59,10 +59,12 @@ TensorFromWeightedSetBlueprint::setup(const search::fef::IIndexEnvironment &env, } else { _dimension = _sourceParam; } + auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}}); + _valueType = ValueType::from_spec(vt.to_spec()); describeOutput("tensor", "The tensor created from the given weighted set source (attribute field or query parameter)", - FeatureType::object(ValueType::make_type(CellType::DOUBLE, {{_dimension}}))); - return validSource; + FeatureType::object(_valueType)); + return validSource && ! _valueType.is_error(); } namespace { @@ -70,45 +72,45 @@ namespace { FeatureExecutor & createAttributeExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &attrName, - const vespalib::string &dimension, + const ValueType &valueType, vespalib::Stash &stash) { const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName); if (attribute == NULL) { Issue::report("tensor_from_weighted_set feature: The attribute vector '%s' was not found." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->getCollectionType() != search::attribute::CollectionType::WSET || attribute->isFloatingPointType()) { Issue::report("tensor_from_weighted_set feature: The attribute vector '%s' is NOT of type weighted set of string or integer." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->isIntegerType()) { // Using WeightedStringContent ensures that the integer values are converted // to strings while extracting them from the attribute. - return stash.create>(attribute, dimension); + return stash.create>(attribute, valueType); } // When the underlying attribute is of type string we can reference these values // using WeightedConstCharContent. - return stash.create>(attribute, dimension); + return stash.create>(attribute, valueType); } FeatureExecutor & createQueryExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &queryKey, - const vespalib::string &dimension, vespalib::Stash &stash) + const ValueType &valueType, + vespalib::Stash &stash) { - ValueType type = ValueType::make_type(CellType::DOUBLE, {{dimension}}); search::fef::Property prop = env.getProperties().lookup(queryKey); if (prop.found() && !prop.get().empty()) { WeightedStringVector vector; WeightedSetParser::parse(prop.get(), vector); auto factory = FastValueBuilderFactory::get(); size_t sz = vector._data.size(); - auto builder = factory.create_value_builder(type, 1, 1, sz); + auto builder = factory.create_value_builder(valueType, 1, 1, sz); std::vector addr_ref; for (const auto &elem : vector._data) { addr_ref.clear(); @@ -118,7 +120,7 @@ createQueryExecutor(const search::fef::IQueryEnvironment &env, } return ConstantTensorExecutor::create(builder->build(std::move(builder)), stash); } - return ConstantTensorExecutor::createEmpty(type, stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } } @@ -127,11 +129,11 @@ FeatureExecutor & TensorFromWeightedSetBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const { if (_sourceType == ATTRIBUTE_SOURCE) { - return createAttributeExecutor(env, _sourceParam, _dimension, stash); + return createAttributeExecutor(env, _sourceParam, _valueType, stash); } else if (_sourceType == QUERY_SOURCE) { - return createQueryExecutor(env, _sourceParam, _dimension, stash); + return createQueryExecutor(env, _sourceParam, _valueType, stash); } - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{_dimension}}), stash); + return ConstantTensorExecutor::createEmpty(_valueType, stash); } } // namespace features -- cgit v1.2.3 From 2da5042dbe3fb69fc2688958bad242bb8695cd2f Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Mon, 7 Aug 2023 12:56:39 +0000 Subject: better error messages --- .../src/vespa/searchlib/features/tensor_from_labels_feature.cpp | 8 +++++++- .../vespa/searchlib/features/tensor_from_weighted_set_feature.cpp | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp index 6628a889903..f36c1dbfdaa 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp @@ -41,6 +41,9 @@ TensorFromLabelsBlueprint::setup(const search::fef::IIndexEnvironment &env, // _params[0] = source ('attribute(name)' OR 'query(param)'); // _params[1] = dimension (optional); bool validSource = extractSource(params[0].getValue()); + if (! validSource) { + return fail("invalid source: '%s'", params[0].getValue().c_str()); + } if (params.size() == 2) { _dimension = params[1].getValue(); } else { @@ -48,10 +51,13 @@ TensorFromLabelsBlueprint::setup(const search::fef::IIndexEnvironment &env, } auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}}); _valueType = ValueType::from_spec(vt.to_spec()); + if (_valueType.is_error()) { + return fail("invalid dimension name: '%s'", _dimension.c_str()); + } describeOutput("tensor", "The tensor created from the given source (attribute field or query parameter)", FeatureType::object(_valueType)); - return validSource && ! _valueType.is_error(); + return true; } namespace { diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp index c635675b216..312f9ee2bc6 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp @@ -54,6 +54,9 @@ TensorFromWeightedSetBlueprint::setup(const search::fef::IIndexEnvironment &env, // _params[0] = source ('attribute(name)' OR 'query(param)'); // _params[1] = dimension (optional); bool validSource = extractSource(params[0].getValue()); + if (! validSource) { + return fail("invalid source: '%s'", params[0].getValue().c_str()); + } if (params.size() == 2) { _dimension = params[1].getValue(); } else { @@ -61,10 +64,13 @@ TensorFromWeightedSetBlueprint::setup(const search::fef::IIndexEnvironment &env, } auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}}); _valueType = ValueType::from_spec(vt.to_spec()); + if (_valueType.is_error()) { + return fail("invalid dimension name: '%s'", _dimension.c_str()); + } describeOutput("tensor", "The tensor created from the given weighted set source (attribute field or query parameter)", FeatureType::object(_valueType)); - return validSource && ! _valueType.is_error(); + return true; } namespace { -- cgit v1.2.3