diff options
5 files changed, 52 insertions, 32 deletions
diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp index 3e5d1da6a1a..7c267413a86 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp @@ -36,7 +36,8 @@ TensorFactoryBlueprint::TensorFactoryBlueprint(const vespalib::string &baseName) : Blueprint(baseName), _sourceType(), _sourceParam(), - _dimension("0") // default dimension is set to the source param if not specified. + _dimension("0"), // default dimension is set to the source param if not specified. + _valueType(vespalib::eval::ValueType::error_type()) { } diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h index 26fcc79b6f5..47ccb038ac7 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h +++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h @@ -4,6 +4,7 @@ #include <vespa/searchlib/fef/blueprint.h> #include <vespa/vespalib/stllike/string.h> +#include <vespa/eval/eval/value_type.h> namespace search::features { @@ -19,6 +20,7 @@ protected: vespalib::string _sourceType; vespalib::string _sourceParam; vespalib::string _dimension; + vespalib::eval::ValueType _valueType; bool extractSource(const vespalib::string &source); TensorFactoryBlueprint(const vespalib::string &baseName); diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h index 5a3fede76e8..7b04d10cea2 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h +++ b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h @@ -28,9 +28,9 @@ private: public: TensorFromAttributeExecutor(const search::attribute::IAttributeVector *attribute, - const vespalib::string &dimension) + const vespalib::eval::ValueType &valueType) : _attribute(attribute), - _type(vespalib::eval::ValueType::make_type(CellType::DOUBLE, {{dimension}})), + _type(valueType), _attrBuffer(), _addr_ref(), _tensor() diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp index b72a75bd19f..f36c1dbfdaa 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp @@ -41,15 +41,23 @@ TensorFromLabelsBlueprint::setup(const search::fef::IIndexEnvironment &env, // _params[0] = source ('attribute(name)' OR 'query(param)'); // _params[1] = dimension (optional); bool validSource = extractSource(params[0].getValue()); + if (! validSource) { + return fail("invalid source: '%s'", params[0].getValue().c_str()); + } if (params.size() == 2) { _dimension = params[1].getValue(); } else { _dimension = _sourceParam; } + auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}}); + _valueType = ValueType::from_spec(vt.to_spec()); + if (_valueType.is_error()) { + return fail("invalid dimension name: '%s'", _dimension.c_str()); + } describeOutput("tensor", "The tensor created from the given source (attribute field or query parameter)", - FeatureType::object(ValueType::make_type(CellType::DOUBLE, {{_dimension}}))); - return validSource; + FeatureType::object(_valueType)); + return true; } namespace { @@ -57,23 +65,24 @@ namespace { FeatureExecutor & createAttributeExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &attrName, - const vespalib::string &dimension, vespalib::Stash &stash) + const ValueType &valueType, + vespalib::Stash &stash) { const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName); if (attribute == NULL) { Issue::report("tensor_from_labels feature: The attribute vector '%s' was not found." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->isFloatingPointType()) { Issue::report("tensor_from_labels feature: The attribute vector '%s' must have basic type string or integer." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->getCollectionType() == search::attribute::CollectionType::WSET) { Issue::report("tensor_from_labels feature: The attribute vector '%s' is a weighted set - use tensorFromWeightedSet instead." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } // Note that for array attribute vectors the default weight is 1.0 for all values. // This means we can get the attribute content as weighted content and build @@ -81,25 +90,25 @@ createAttributeExecutor(const search::fef::IQueryEnvironment &env, if (attribute->isIntegerType()) { // Using WeightedStringContent ensures that the integer values are converted // to strings while extracting them from the attribute. - return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, dimension); + return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, valueType); } // When the underlying attribute is of type string we can reference these values // using WeightedConstCharContent. - return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, dimension); + return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, valueType); } FeatureExecutor & createQueryExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &queryKey, - const vespalib::string &dimension, vespalib::Stash &stash) + const ValueType &valueType, + vespalib::Stash &stash) { - ValueType type = ValueType::make_type(CellType::DOUBLE, {{dimension}}); search::fef::Property prop = env.getProperties().lookup(queryKey); if (prop.found() && !prop.get().empty()) { std::vector<vespalib::string> vector; ArrayParser::parse(prop.get(), vector); auto factory = FastValueBuilderFactory::get(); - auto builder = factory.create_value_builder<double>(type, 1, 1, vector.size()); + auto builder = factory.create_value_builder<double>(valueType, 1, 1, vector.size()); std::vector<vespalib::stringref> addr_ref; for (const auto &elem : vector) { addr_ref.clear(); @@ -109,7 +118,7 @@ createQueryExecutor(const search::fef::IQueryEnvironment &env, } return ConstantTensorExecutor::create(builder->build(std::move(builder)), stash); } - return ConstantTensorExecutor::createEmpty(type, stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } } @@ -118,11 +127,11 @@ FeatureExecutor & TensorFromLabelsBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const { if (_sourceType == ATTRIBUTE_SOURCE) { - return createAttributeExecutor(env, _sourceParam, _dimension, stash); + return createAttributeExecutor(env, _sourceParam, _valueType, stash); } else if (_sourceType == QUERY_SOURCE) { - return createQueryExecutor(env, _sourceParam, _dimension, stash); + return createQueryExecutor(env, _sourceParam, _valueType, stash); } - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{_dimension}}), stash); + return ConstantTensorExecutor::createEmpty(_valueType, stash); } } // namespace features diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp index cbe262a0cbd..312f9ee2bc6 100644 --- a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp @@ -54,15 +54,23 @@ TensorFromWeightedSetBlueprint::setup(const search::fef::IIndexEnvironment &env, // _params[0] = source ('attribute(name)' OR 'query(param)'); // _params[1] = dimension (optional); bool validSource = extractSource(params[0].getValue()); + if (! validSource) { + return fail("invalid source: '%s'", params[0].getValue().c_str()); + } if (params.size() == 2) { _dimension = params[1].getValue(); } else { _dimension = _sourceParam; } + auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}}); + _valueType = ValueType::from_spec(vt.to_spec()); + if (_valueType.is_error()) { + return fail("invalid dimension name: '%s'", _dimension.c_str()); + } describeOutput("tensor", "The tensor created from the given weighted set source (attribute field or query parameter)", - FeatureType::object(ValueType::make_type(CellType::DOUBLE, {{_dimension}}))); - return validSource; + FeatureType::object(_valueType)); + return true; } namespace { @@ -70,45 +78,45 @@ namespace { FeatureExecutor & createAttributeExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &attrName, - const vespalib::string &dimension, + const ValueType &valueType, vespalib::Stash &stash) { const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName); if (attribute == NULL) { Issue::report("tensor_from_weighted_set feature: The attribute vector '%s' was not found." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->getCollectionType() != search::attribute::CollectionType::WSET || attribute->isFloatingPointType()) { Issue::report("tensor_from_weighted_set feature: The attribute vector '%s' is NOT of type weighted set of string or integer." " Returning empty tensor.", attrName.c_str()); - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } if (attribute->isIntegerType()) { // Using WeightedStringContent ensures that the integer values are converted // to strings while extracting them from the attribute. - return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, dimension); + return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, valueType); } // When the underlying attribute is of type string we can reference these values // using WeightedConstCharContent. - return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, dimension); + return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, valueType); } FeatureExecutor & createQueryExecutor(const search::fef::IQueryEnvironment &env, const vespalib::string &queryKey, - const vespalib::string &dimension, vespalib::Stash &stash) + const ValueType &valueType, + vespalib::Stash &stash) { - ValueType type = ValueType::make_type(CellType::DOUBLE, {{dimension}}); search::fef::Property prop = env.getProperties().lookup(queryKey); if (prop.found() && !prop.get().empty()) { WeightedStringVector vector; WeightedSetParser::parse(prop.get(), vector); auto factory = FastValueBuilderFactory::get(); size_t sz = vector._data.size(); - auto builder = factory.create_value_builder<double>(type, 1, 1, sz); + auto builder = factory.create_value_builder<double>(valueType, 1, 1, sz); std::vector<vespalib::stringref> addr_ref; for (const auto &elem : vector._data) { addr_ref.clear(); @@ -118,7 +126,7 @@ createQueryExecutor(const search::fef::IQueryEnvironment &env, } return ConstantTensorExecutor::create(builder->build(std::move(builder)), stash); } - return ConstantTensorExecutor::createEmpty(type, stash); + return ConstantTensorExecutor::createEmpty(valueType, stash); } } @@ -127,11 +135,11 @@ FeatureExecutor & TensorFromWeightedSetBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const { if (_sourceType == ATTRIBUTE_SOURCE) { - return createAttributeExecutor(env, _sourceParam, _dimension, stash); + return createAttributeExecutor(env, _sourceParam, _valueType, stash); } else if (_sourceType == QUERY_SOURCE) { - return createQueryExecutor(env, _sourceParam, _dimension, stash); + return createQueryExecutor(env, _sourceParam, _valueType, stash); } - return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{_dimension}}), stash); + return ConstantTensorExecutor::createEmpty(_valueType, stash); } } // namespace features |