summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-06-15 12:00:36 +0000
committerGeir Storli <geirst@verizonmedia.com>2020-06-17 13:18:03 +0000
commitbddf1e9ebabf285963a5fea5d56461490b70c732 (patch)
treebd3a7c031c6116969adee9603c066aaf22c3a4dc
parent5566148a0ad253569e44b4e21ada7e8e59241eaf (diff)
Test that attribute writer can handle put in two phases.
-rw-r--r--searchcommon/src/vespa/searchcommon/common/schema.cpp19
-rw-r--r--searchcommon/src/vespa/searchcommon/common/schema.h3
-rw-r--r--searchcore/src/tests/proton/attribute/attribute_test.cpp95
-rw-r--r--searchlib/src/vespa/searchlib/index/doctypebuilder.cpp48
4 files changed, 127 insertions, 38 deletions
diff --git a/searchcommon/src/vespa/searchcommon/common/schema.cpp b/searchcommon/src/vespa/searchcommon/common/schema.cpp
index a21cc43572e..c59edbef22f 100644
--- a/searchcommon/src/vespa/searchcommon/common/schema.cpp
+++ b/searchcommon/src/vespa/searchcommon/common/schema.cpp
@@ -70,16 +70,20 @@ namespace index {
const uint32_t Schema::UNKNOWN_FIELD_ID(std::numeric_limits<uint32_t>::max());
Schema::Field::Field(vespalib::stringref n, DataType dt)
- : _name(n),
- _dataType(dt),
- _collectionType(schema::CollectionType::SINGLE)
+ : Field(n, dt, schema::CollectionType::SINGLE, "")
{
}
Schema::Field::Field(vespalib::stringref n, DataType dt, CollectionType ct)
+ : Field(n, dt, ct, "")
+{
+}
+
+Schema::Field::Field(vespalib::stringref n, DataType dt, CollectionType ct, vespalib::stringref tensor_spec)
: _name(n),
_dataType(dt),
- _collectionType(ct)
+ _collectionType(ct),
+ _tensor_spec(tensor_spec)
{
}
@@ -111,15 +115,14 @@ Schema::Field::operator==(const Field &rhs) const
{
return _name == rhs._name &&
_dataType == rhs._dataType &&
- _collectionType == rhs._collectionType;
+ _collectionType == rhs._collectionType &&
+ _tensor_spec == rhs._tensor_spec;
}
bool
Schema::Field::operator!=(const Field &rhs) const
{
- return _name != rhs._name ||
- _dataType != rhs._dataType ||
- _collectionType != rhs._collectionType;
+ return !((*this) == rhs);
}
Schema::IndexField::IndexField(vespalib::stringref name, DataType dt)
diff --git a/searchcommon/src/vespa/searchcommon/common/schema.h b/searchcommon/src/vespa/searchcommon/common/schema.h
index e17d219d7e8..9003578adaf 100644
--- a/searchcommon/src/vespa/searchcommon/common/schema.h
+++ b/searchcommon/src/vespa/searchcommon/common/schema.h
@@ -35,10 +35,12 @@ public:
vespalib::string _name;
DataType _dataType;
CollectionType _collectionType;
+ vespalib::string _tensor_spec;
public:
Field(vespalib::stringref n, DataType dt);
Field(vespalib::stringref n, DataType dt, CollectionType ct);
+ Field(vespalib::stringref n, DataType dt, CollectionType ct, vespalib::stringref tensor_spec);
/**
* Create this field based on the given config lines.
@@ -58,6 +60,7 @@ public:
const vespalib::string &getName() const { return _name; }
DataType getDataType() const { return _dataType; }
CollectionType getCollectionType() const { return _collectionType; }
+ const vespalib::string& get_tensor_spec() const { return _tensor_spec; }
bool matchingTypes(const Field &rhs) const {
return getDataType() == rhs.getDataType() &&
diff --git a/searchcore/src/tests/proton/attribute/attribute_test.cpp b/searchcore/src/tests/proton/attribute/attribute_test.cpp
index c101c3e2bd5..feebe63f01a 100644
--- a/searchcore/src/tests/proton/attribute/attribute_test.cpp
+++ b/searchcore/src/tests/proton/attribute/attribute_test.cpp
@@ -34,6 +34,7 @@
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/predicate/predicate_hash.h>
#include <vespa/searchlib/predicate/predicate_index.h>
+#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
#include <vespa/searchlib/tensor/tensor_attribute.h>
#include <vespa/searchlib/test/directory_handler.h>
#include <vespa/vespalib/btree/btreeroot.hpp>
@@ -71,6 +72,8 @@ using search::index::DummyFileHeaderContext;
using search::index::schema::CollectionType;
using search::predicate::PredicateHash;
using search::predicate::PredicateIndex;
+using search::tensor::DenseTensorAttribute;
+using search::tensor::PrepareResult;
using search::tensor::TensorAttribute;
using search::test::DirectoryHandler;
using std::string;
@@ -153,10 +156,14 @@ public:
}
AttributeVector::SP addAttribute(const AttributeSpec &spec) {
auto ret = _mgr->addAttribute(spec.getName(),
- AttributeFactory::createAttribute(spec.getName(), spec.getConfig()));
+ AttributeFactory::createAttribute(spec.getName(), spec.getConfig()));
allocAttributeWriter();
return ret;
}
+ void add_attribute(AttributeVector::SP attr) {
+ _mgr->addAttribute(attr->getName(), std::move(attr));
+ allocAttributeWriter();
+ }
void put(SerialNum serialNum, const Document &doc, DocumentIdT lid,
bool immediateCommit = true) {
_aw->put(serialNum, doc, lid, immediateCommit, emptyCallback);
@@ -625,18 +632,20 @@ Tensor::UP make_tensor(const TensorSpec &spec) {
return Tensor::UP(dynamic_cast<Tensor*>(tensor.release()));
}
+const vespalib::string sparse_tensor = "tensor(x{},y{})";
+
AttributeVector::SP
createTensorAttribute(AttributeWriterTest &t) {
AVConfig cfg(AVBasicType::TENSOR);
- cfg.setTensorType(ValueType::from_spec("tensor(x{},y{})"));
+ cfg.setTensorType(ValueType::from_spec(sparse_tensor));
auto ret = t.addAttribute({"a1", cfg});
return ret;
}
Schema
-createTensorSchema() {
+createTensorSchema(const vespalib::string& tensor_spec = sparse_tensor) {
Schema schema;
- schema.addAttributeField(Schema::AttributeField("a1", schema::DataType::TENSOR, CollectionType::SINGLE));
+ schema.addAttributeField(Schema::AttributeField("a1", schema::DataType::TENSOR, CollectionType::SINGLE, tensor_spec));
return schema;
}
@@ -654,7 +663,7 @@ TEST_F(AttributeWriterTest, can_write_to_tensor_attribute)
auto a1 = createTensorAttribute(*this);
Schema s = createTensorSchema();
DocBuilder builder(s);
- auto tensor = make_tensor(TensorSpec("tensor(x{},y{})")
+ auto tensor = make_tensor(TensorSpec(sparse_tensor)
.add({{"x", "4"}, {"y", "5"}}, 7));
Document::UP doc = createTensorPutDoc(builder, *tensor);
put(1, *doc, 1);
@@ -671,7 +680,7 @@ TEST_F(AttributeWriterTest, handles_tensor_assign_update)
auto a1 = createTensorAttribute(*this);
Schema s = createTensorSchema();
DocBuilder builder(s);
- auto tensor = make_tensor(TensorSpec("tensor(x{},y{})")
+ auto tensor = make_tensor(TensorSpec(sparse_tensor)
.add({{"x", "6"}, {"y", "7"}}, 9));
auto doc = createTensorPutDoc(builder, *tensor);
put(1, *doc, 1);
@@ -684,9 +693,9 @@ TEST_F(AttributeWriterTest, handles_tensor_assign_update)
const document::DocumentType &dt(builder.getDocumentType());
DocumentUpdate upd(*builder.getDocumentTypeRepo(), dt, DocumentId("id:ns:searchdocument::1"));
- auto new_tensor = make_tensor(TensorSpec("tensor(x{},y{})")
+ auto new_tensor = make_tensor(TensorSpec(sparse_tensor)
.add({{"x", "8"}, {"y", "9"}}, 11));
- TensorDataType xySparseTensorDataType(vespalib::eval::ValueType::from_spec("tensor(x{},y{})"));
+ TensorDataType xySparseTensorDataType(vespalib::eval::ValueType::from_spec(sparse_tensor));
TensorFieldValue new_value(xySparseTensorDataType);
new_value = new_tensor->clone();
upd.addUpdate(FieldUpdate(upd.getType().getField("a1"))
@@ -762,15 +771,55 @@ TEST_F(AttributeWriterTest, spreads_write_over_3_write_contexts)
putAttributes(*this, {0, 1, 2});
}
+struct MockPrepareResult : public PrepareResult {
+ uint32_t docid;
+ const Tensor& tensor;
+ MockPrepareResult(uint32_t docid_in, const Tensor& tensor_in) : docid(docid_in), tensor(tensor_in) {}
+};
+
+class MockDenseTensorAttribute : public DenseTensorAttribute {
+public:
+ mutable size_t prepare_set_tensor_cnt;
+ mutable size_t complete_set_tensor_cnt;
+
+ MockDenseTensorAttribute(vespalib::stringref name, const AVConfig& cfg)
+ : DenseTensorAttribute(name, cfg),
+ prepare_set_tensor_cnt(0),
+ complete_set_tensor_cnt(0)
+ {}
+ std::unique_ptr<PrepareResult> prepare_set_tensor(uint32_t docid, const Tensor& tensor) const override {
+ ++prepare_set_tensor_cnt;
+ return std::make_unique<MockPrepareResult>(docid, tensor);
+ }
+
+ virtual void complete_set_tensor(DocId docid, const Tensor& tensor, std::unique_ptr<PrepareResult> prepare_result) override {
+ ++complete_set_tensor_cnt;
+ assert(prepare_result);
+ auto* mock_result = dynamic_cast<MockPrepareResult*>(prepare_result.get());
+ assert(mock_result);
+ EXPECT_EQ(docid, mock_result->docid);
+ EXPECT_EQ(tensor, mock_result->tensor);
+ }
+};
+
+const vespalib::string dense_tensor = "tensor(x[2])";
+
AVConfig
get_tensor_config(bool allow_multi_threaded_indexing)
{
AVConfig cfg(AVBasicType::TENSOR);
- cfg.setTensorType(ValueType::from_spec("tensor(x[2])"));
+ cfg.setTensorType(ValueType::from_spec(dense_tensor));
cfg.set_hnsw_index_params(HnswIndexParams(4, 4, DistanceMetric::Euclidean, allow_multi_threaded_indexing));
return cfg;
}
+std::shared_ptr<MockDenseTensorAttribute>
+make_mock_tensor_attribute(const vespalib::string& name, bool allow_multi_threaded_indexing)
+{
+ auto cfg = get_tensor_config(allow_multi_threaded_indexing);
+ return std::make_shared<MockDenseTensorAttribute>(name, cfg);
+}
+
TEST_F(AttributeWriterTest, tensor_attributes_using_two_phase_put_are_in_separate_write_contexts)
{
addAttribute("a1");
@@ -793,6 +842,34 @@ TEST_F(AttributeWriterTest, tensor_attributes_using_two_phase_put_are_in_separat
EXPECT_EQ("t2", ctx[2].getFields()[0].getAttribute().getName());
}
+TEST_F(AttributeWriterTest, handles_put_in_two_phases_when_specified_for_tensor_attribute)
+{
+ setup(2);
+ auto a1 = make_mock_tensor_attribute("a1", true);
+ add_attribute(a1);
+ Schema schema = createTensorSchema(dense_tensor);
+ DocBuilder builder(schema);
+ auto tensor = make_tensor(TensorSpec(dense_tensor)
+ .add({{"x", 0}}, 3).add({{"x", 1}}, 5));
+ auto doc = createTensorPutDoc(builder, *tensor);
+
+ put(1, *doc, 1);
+ EXPECT_EQ(1, a1->prepare_set_tensor_cnt);
+ EXPECT_EQ(1, a1->complete_set_tensor_cnt);
+ assertExecuteHistory({1, 0});
+
+ put(2, *doc, 2);
+ EXPECT_EQ(2, a1->prepare_set_tensor_cnt);
+ EXPECT_EQ(2, a1->complete_set_tensor_cnt);
+ assertExecuteHistory({1, 0, 0, 0});
+
+ put(3, *doc, 3);
+ EXPECT_EQ(3, a1->prepare_set_tensor_cnt);
+ EXPECT_EQ(3, a1->complete_set_tensor_cnt);
+ // Note that the prepare step is executed round-robin between the 2 threads.
+ assertExecuteHistory({1, 0, 0, 0, 1, 0});
+}
+
ImportedAttributeVector::SP
createImportedAttribute(const vespalib::string &name)
diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp
index a7ad475d6aa..1491e3e21de 100644
--- a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp
+++ b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp
@@ -10,38 +10,44 @@ using namespace document;
namespace search::index {
namespace {
-TensorDataType tensorDataType(vespalib::eval::ValueType::from_spec("tensor(x{}, y{})"));
+const vespalib::string default_tensor_data_type = "tensor(x{},y{})";
-const DataType *convert(Schema::DataType type) {
+vespalib::string
+resolve_tensor_spec(const vespalib::string& field_tensor_spec)
+{
+ return field_tensor_spec.empty() ? default_tensor_data_type : field_tensor_spec;
+}
+
+DataType::Type convert(Schema::DataType type) {
switch (type) {
case schema::DataType::BOOL:
case schema::DataType::UINT2:
case schema::DataType::UINT4:
case schema::DataType::INT8:
- return DataType::BYTE;
+ return DataType::T_BYTE;
case schema::DataType::INT16:
- return DataType::SHORT;
+ return DataType::T_SHORT;
case schema::DataType::INT32:
- return DataType::INT;
+ return DataType::T_INT;
case schema::DataType::INT64:
- return DataType::LONG;
+ return DataType::T_LONG;
case schema::DataType::FLOAT:
- return DataType::FLOAT;
+ return DataType::T_FLOAT;
case schema::DataType::DOUBLE:
- return DataType::DOUBLE;
+ return DataType::T_DOUBLE;
case schema::DataType::STRING:
- return DataType::STRING;
+ return DataType::T_STRING;
case schema::DataType::RAW:
- return DataType::RAW;
+ return DataType::T_RAW;
case schema::DataType::BOOLEANTREE:
- return DataType::PREDICATE;
+ return DataType::T_PREDICATE;
case schema::DataType::TENSOR:
- return &tensorDataType;
+ return DataType::T_TENSOR;
default:
break;
}
assert(!"Unknown datatype in schema");
- return 0;
+ return DataType::MAX;
}
void
@@ -142,12 +148,12 @@ document::DocumenttypesConfig DocTypeBuilder::makeConfig() const {
if (usf != usedFields.end()) {
continue; // taken as index field
}
- const DataType *primitiveType = convert(field.getDataType());
- if (primitiveType->getId() == DataType::T_TENSOR) {
- header_struct.addTensorField(field.getName(), dynamic_cast<const TensorDataType &>(*primitiveType).getTensorType().to_spec());
+ auto type_id = convert(field.getDataType());
+ if (type_id == DataType::T_TENSOR) {
+ header_struct.addTensorField(field.getName(), resolve_tensor_spec(field.get_tensor_spec()));
} else {
header_struct.addField(field.getName(), type_cache.getType(
- primitiveType->getId(), field.getCollectionType()));
+ type_id, field.getCollectionType()));
}
usedFields.insert(field.getName());
}
@@ -158,12 +164,12 @@ document::DocumenttypesConfig DocTypeBuilder::makeConfig() const {
if (usf != usedFields.end()) {
continue; // taken as index field or attribute field
}
- const DataType *primitiveType(convert(field.getDataType()));
- if (primitiveType->getId() == DataType::T_TENSOR) {
- header_struct.addTensorField(field.getName(), dynamic_cast<const TensorDataType &>(*primitiveType).getTensorType().to_spec());
+ auto type_id = convert(field.getDataType());
+ if (type_id == DataType::T_TENSOR) {
+ header_struct.addTensorField(field.getName(), resolve_tensor_spec(field.get_tensor_spec()));
} else {
header_struct.addField(field.getName(), type_cache.getType(
- primitiveType->getId(), field.getCollectionType()));
+ type_id, field.getCollectionType()));
}
usedFields.insert(field.getName());
}