diff options
4 files changed, 127 insertions, 38 deletions
diff --git a/searchcommon/src/vespa/searchcommon/common/schema.cpp b/searchcommon/src/vespa/searchcommon/common/schema.cpp index a21cc43572e..c59edbef22f 100644 --- a/searchcommon/src/vespa/searchcommon/common/schema.cpp +++ b/searchcommon/src/vespa/searchcommon/common/schema.cpp @@ -70,16 +70,20 @@ namespace index { const uint32_t Schema::UNKNOWN_FIELD_ID(std::numeric_limits<uint32_t>::max()); Schema::Field::Field(vespalib::stringref n, DataType dt) - : _name(n), - _dataType(dt), - _collectionType(schema::CollectionType::SINGLE) + : Field(n, dt, schema::CollectionType::SINGLE, "") { } Schema::Field::Field(vespalib::stringref n, DataType dt, CollectionType ct) + : Field(n, dt, ct, "") +{ +} + +Schema::Field::Field(vespalib::stringref n, DataType dt, CollectionType ct, vespalib::stringref tensor_spec) : _name(n), _dataType(dt), - _collectionType(ct) + _collectionType(ct), + _tensor_spec(tensor_spec) { } @@ -111,15 +115,14 @@ Schema::Field::operator==(const Field &rhs) const { return _name == rhs._name && _dataType == rhs._dataType && - _collectionType == rhs._collectionType; + _collectionType == rhs._collectionType && + _tensor_spec == rhs._tensor_spec; } bool Schema::Field::operator!=(const Field &rhs) const { - return _name != rhs._name || - _dataType != rhs._dataType || - _collectionType != rhs._collectionType; + return !((*this) == rhs); } Schema::IndexField::IndexField(vespalib::stringref name, DataType dt) diff --git a/searchcommon/src/vespa/searchcommon/common/schema.h b/searchcommon/src/vespa/searchcommon/common/schema.h index e17d219d7e8..9003578adaf 100644 --- a/searchcommon/src/vespa/searchcommon/common/schema.h +++ b/searchcommon/src/vespa/searchcommon/common/schema.h @@ -35,10 +35,12 @@ public: vespalib::string _name; DataType _dataType; CollectionType _collectionType; + vespalib::string _tensor_spec; public: Field(vespalib::stringref n, DataType dt); Field(vespalib::stringref n, DataType dt, CollectionType ct); + Field(vespalib::stringref n, DataType dt, CollectionType ct, vespalib::stringref tensor_spec); /** * Create this field based on the given config lines. @@ -58,6 +60,7 @@ public: const vespalib::string &getName() const { return _name; } DataType getDataType() const { return _dataType; } CollectionType getCollectionType() const { return _collectionType; } + const vespalib::string& get_tensor_spec() const { return _tensor_spec; } bool matchingTypes(const Field &rhs) const { return getDataType() == rhs.getDataType() && diff --git a/searchcore/src/tests/proton/attribute/attribute_test.cpp b/searchcore/src/tests/proton/attribute/attribute_test.cpp index c101c3e2bd5..feebe63f01a 100644 --- a/searchcore/src/tests/proton/attribute/attribute_test.cpp +++ b/searchcore/src/tests/proton/attribute/attribute_test.cpp @@ -34,6 +34,7 @@ #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/searchlib/predicate/predicate_hash.h> #include <vespa/searchlib/predicate/predicate_index.h> +#include <vespa/searchlib/tensor/dense_tensor_attribute.h> #include <vespa/searchlib/tensor/tensor_attribute.h> #include <vespa/searchlib/test/directory_handler.h> #include <vespa/vespalib/btree/btreeroot.hpp> @@ -71,6 +72,8 @@ using search::index::DummyFileHeaderContext; using search::index::schema::CollectionType; using search::predicate::PredicateHash; using search::predicate::PredicateIndex; +using search::tensor::DenseTensorAttribute; +using search::tensor::PrepareResult; using search::tensor::TensorAttribute; using search::test::DirectoryHandler; using std::string; @@ -153,10 +156,14 @@ public: } AttributeVector::SP addAttribute(const AttributeSpec &spec) { auto ret = _mgr->addAttribute(spec.getName(), - AttributeFactory::createAttribute(spec.getName(), spec.getConfig())); + AttributeFactory::createAttribute(spec.getName(), spec.getConfig())); allocAttributeWriter(); return ret; } + void add_attribute(AttributeVector::SP attr) { + _mgr->addAttribute(attr->getName(), std::move(attr)); + allocAttributeWriter(); + } void put(SerialNum serialNum, const Document &doc, DocumentIdT lid, bool immediateCommit = true) { _aw->put(serialNum, doc, lid, immediateCommit, emptyCallback); @@ -625,18 +632,20 @@ Tensor::UP make_tensor(const TensorSpec &spec) { return Tensor::UP(dynamic_cast<Tensor*>(tensor.release())); } +const vespalib::string sparse_tensor = "tensor(x{},y{})"; + AttributeVector::SP createTensorAttribute(AttributeWriterTest &t) { AVConfig cfg(AVBasicType::TENSOR); - cfg.setTensorType(ValueType::from_spec("tensor(x{},y{})")); + cfg.setTensorType(ValueType::from_spec(sparse_tensor)); auto ret = t.addAttribute({"a1", cfg}); return ret; } Schema -createTensorSchema() { +createTensorSchema(const vespalib::string& tensor_spec = sparse_tensor) { Schema schema; - schema.addAttributeField(Schema::AttributeField("a1", schema::DataType::TENSOR, CollectionType::SINGLE)); + schema.addAttributeField(Schema::AttributeField("a1", schema::DataType::TENSOR, CollectionType::SINGLE, tensor_spec)); return schema; } @@ -654,7 +663,7 @@ TEST_F(AttributeWriterTest, can_write_to_tensor_attribute) auto a1 = createTensorAttribute(*this); Schema s = createTensorSchema(); DocBuilder builder(s); - auto tensor = make_tensor(TensorSpec("tensor(x{},y{})") + auto tensor = make_tensor(TensorSpec(sparse_tensor) .add({{"x", "4"}, {"y", "5"}}, 7)); Document::UP doc = createTensorPutDoc(builder, *tensor); put(1, *doc, 1); @@ -671,7 +680,7 @@ TEST_F(AttributeWriterTest, handles_tensor_assign_update) auto a1 = createTensorAttribute(*this); Schema s = createTensorSchema(); DocBuilder builder(s); - auto tensor = make_tensor(TensorSpec("tensor(x{},y{})") + auto tensor = make_tensor(TensorSpec(sparse_tensor) .add({{"x", "6"}, {"y", "7"}}, 9)); auto doc = createTensorPutDoc(builder, *tensor); put(1, *doc, 1); @@ -684,9 +693,9 @@ TEST_F(AttributeWriterTest, handles_tensor_assign_update) const document::DocumentType &dt(builder.getDocumentType()); DocumentUpdate upd(*builder.getDocumentTypeRepo(), dt, DocumentId("id:ns:searchdocument::1")); - auto new_tensor = make_tensor(TensorSpec("tensor(x{},y{})") + auto new_tensor = make_tensor(TensorSpec(sparse_tensor) .add({{"x", "8"}, {"y", "9"}}, 11)); - TensorDataType xySparseTensorDataType(vespalib::eval::ValueType::from_spec("tensor(x{},y{})")); + TensorDataType xySparseTensorDataType(vespalib::eval::ValueType::from_spec(sparse_tensor)); TensorFieldValue new_value(xySparseTensorDataType); new_value = new_tensor->clone(); upd.addUpdate(FieldUpdate(upd.getType().getField("a1")) @@ -762,15 +771,55 @@ TEST_F(AttributeWriterTest, spreads_write_over_3_write_contexts) putAttributes(*this, {0, 1, 2}); } +struct MockPrepareResult : public PrepareResult { + uint32_t docid; + const Tensor& tensor; + MockPrepareResult(uint32_t docid_in, const Tensor& tensor_in) : docid(docid_in), tensor(tensor_in) {} +}; + +class MockDenseTensorAttribute : public DenseTensorAttribute { +public: + mutable size_t prepare_set_tensor_cnt; + mutable size_t complete_set_tensor_cnt; + + MockDenseTensorAttribute(vespalib::stringref name, const AVConfig& cfg) + : DenseTensorAttribute(name, cfg), + prepare_set_tensor_cnt(0), + complete_set_tensor_cnt(0) + {} + std::unique_ptr<PrepareResult> prepare_set_tensor(uint32_t docid, const Tensor& tensor) const override { + ++prepare_set_tensor_cnt; + return std::make_unique<MockPrepareResult>(docid, tensor); + } + + virtual void complete_set_tensor(DocId docid, const Tensor& tensor, std::unique_ptr<PrepareResult> prepare_result) override { + ++complete_set_tensor_cnt; + assert(prepare_result); + auto* mock_result = dynamic_cast<MockPrepareResult*>(prepare_result.get()); + assert(mock_result); + EXPECT_EQ(docid, mock_result->docid); + EXPECT_EQ(tensor, mock_result->tensor); + } +}; + +const vespalib::string dense_tensor = "tensor(x[2])"; + AVConfig get_tensor_config(bool allow_multi_threaded_indexing) { AVConfig cfg(AVBasicType::TENSOR); - cfg.setTensorType(ValueType::from_spec("tensor(x[2])")); + cfg.setTensorType(ValueType::from_spec(dense_tensor)); cfg.set_hnsw_index_params(HnswIndexParams(4, 4, DistanceMetric::Euclidean, allow_multi_threaded_indexing)); return cfg; } +std::shared_ptr<MockDenseTensorAttribute> +make_mock_tensor_attribute(const vespalib::string& name, bool allow_multi_threaded_indexing) +{ + auto cfg = get_tensor_config(allow_multi_threaded_indexing); + return std::make_shared<MockDenseTensorAttribute>(name, cfg); +} + TEST_F(AttributeWriterTest, tensor_attributes_using_two_phase_put_are_in_separate_write_contexts) { addAttribute("a1"); @@ -793,6 +842,34 @@ TEST_F(AttributeWriterTest, tensor_attributes_using_two_phase_put_are_in_separat EXPECT_EQ("t2", ctx[2].getFields()[0].getAttribute().getName()); } +TEST_F(AttributeWriterTest, handles_put_in_two_phases_when_specified_for_tensor_attribute) +{ + setup(2); + auto a1 = make_mock_tensor_attribute("a1", true); + add_attribute(a1); + Schema schema = createTensorSchema(dense_tensor); + DocBuilder builder(schema); + auto tensor = make_tensor(TensorSpec(dense_tensor) + .add({{"x", 0}}, 3).add({{"x", 1}}, 5)); + auto doc = createTensorPutDoc(builder, *tensor); + + put(1, *doc, 1); + EXPECT_EQ(1, a1->prepare_set_tensor_cnt); + EXPECT_EQ(1, a1->complete_set_tensor_cnt); + assertExecuteHistory({1, 0}); + + put(2, *doc, 2); + EXPECT_EQ(2, a1->prepare_set_tensor_cnt); + EXPECT_EQ(2, a1->complete_set_tensor_cnt); + assertExecuteHistory({1, 0, 0, 0}); + + put(3, *doc, 3); + EXPECT_EQ(3, a1->prepare_set_tensor_cnt); + EXPECT_EQ(3, a1->complete_set_tensor_cnt); + // Note that the prepare step is executed round-robin between the 2 threads. + assertExecuteHistory({1, 0, 0, 0, 1, 0}); +} + ImportedAttributeVector::SP createImportedAttribute(const vespalib::string &name) diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp index a7ad475d6aa..1491e3e21de 100644 --- a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp +++ b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp @@ -10,38 +10,44 @@ using namespace document; namespace search::index { namespace { -TensorDataType tensorDataType(vespalib::eval::ValueType::from_spec("tensor(x{}, y{})")); +const vespalib::string default_tensor_data_type = "tensor(x{},y{})"; -const DataType *convert(Schema::DataType type) { +vespalib::string +resolve_tensor_spec(const vespalib::string& field_tensor_spec) +{ + return field_tensor_spec.empty() ? default_tensor_data_type : field_tensor_spec; +} + +DataType::Type convert(Schema::DataType type) { switch (type) { case schema::DataType::BOOL: case schema::DataType::UINT2: case schema::DataType::UINT4: case schema::DataType::INT8: - return DataType::BYTE; + return DataType::T_BYTE; case schema::DataType::INT16: - return DataType::SHORT; + return DataType::T_SHORT; case schema::DataType::INT32: - return DataType::INT; + return DataType::T_INT; case schema::DataType::INT64: - return DataType::LONG; + return DataType::T_LONG; case schema::DataType::FLOAT: - return DataType::FLOAT; + return DataType::T_FLOAT; case schema::DataType::DOUBLE: - return DataType::DOUBLE; + return DataType::T_DOUBLE; case schema::DataType::STRING: - return DataType::STRING; + return DataType::T_STRING; case schema::DataType::RAW: - return DataType::RAW; + return DataType::T_RAW; case schema::DataType::BOOLEANTREE: - return DataType::PREDICATE; + return DataType::T_PREDICATE; case schema::DataType::TENSOR: - return &tensorDataType; + return DataType::T_TENSOR; default: break; } assert(!"Unknown datatype in schema"); - return 0; + return DataType::MAX; } void @@ -142,12 +148,12 @@ document::DocumenttypesConfig DocTypeBuilder::makeConfig() const { if (usf != usedFields.end()) { continue; // taken as index field } - const DataType *primitiveType = convert(field.getDataType()); - if (primitiveType->getId() == DataType::T_TENSOR) { - header_struct.addTensorField(field.getName(), dynamic_cast<const TensorDataType &>(*primitiveType).getTensorType().to_spec()); + auto type_id = convert(field.getDataType()); + if (type_id == DataType::T_TENSOR) { + header_struct.addTensorField(field.getName(), resolve_tensor_spec(field.get_tensor_spec())); } else { header_struct.addField(field.getName(), type_cache.getType( - primitiveType->getId(), field.getCollectionType())); + type_id, field.getCollectionType())); } usedFields.insert(field.getName()); } @@ -158,12 +164,12 @@ document::DocumenttypesConfig DocTypeBuilder::makeConfig() const { if (usf != usedFields.end()) { continue; // taken as index field or attribute field } - const DataType *primitiveType(convert(field.getDataType())); - if (primitiveType->getId() == DataType::T_TENSOR) { - header_struct.addTensorField(field.getName(), dynamic_cast<const TensorDataType &>(*primitiveType).getTensorType().to_spec()); + auto type_id = convert(field.getDataType()); + if (type_id == DataType::T_TENSOR) { + header_struct.addTensorField(field.getName(), resolve_tensor_spec(field.get_tensor_spec())); } else { header_struct.addField(field.getName(), type_cache.getType( - primitiveType->getId(), field.getCollectionType())); + type_id, field.getCollectionType())); } usedFields.insert(field.getName()); } |