diff options
author | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2017-02-03 11:37:16 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-02-03 11:37:16 +0100 |
commit | 710ea102b684cfa0d29b4f21c206494cc98a0218 (patch) | |
tree | 23e2685a7659432da067a22a287a7a13c82666f1 | |
parent | bcd8be27f5cb0182b1356fc07c486239589ae44d (diff) | |
parent | 01c81351d040ed343e060ea4388a015bdbd84843 (diff) |
Merge pull request #1670 from yahoo/vekterli/add-reference-type-and-field-value-to-cpp
Add reference type and field value to C++
24 files changed, 868 insertions, 52 deletions
diff --git a/document/src/tests/datatype/CMakeLists.txt b/document/src/tests/datatype/CMakeLists.txt index 1061b115be9..7b46ca675b3 100644 --- a/document/src/tests/datatype/CMakeLists.txt +++ b/document/src/tests/datatype/CMakeLists.txt @@ -8,3 +8,13 @@ vespa_add_executable(document_datatype_test_app TEST document_documentconfig ) vespa_add_test(NAME document_datatype_test_app COMMAND document_datatype_test_app) + +vespa_add_executable(document_referencedatatype_test_app TEST + SOURCES + referencedatatype_test.cpp + DEPENDS + document + AFTER + document_documentconfig +) +vespa_add_test(NAME document_referencedatatype_test_app COMMAND document_referencedatatype_test_app) diff --git a/document/src/tests/datatype/referencedatatype_test.cpp b/document/src/tests/datatype/referencedatatype_test.cpp new file mode 100644 index 00000000000..2844a84b6cc --- /dev/null +++ b/document/src/tests/datatype/referencedatatype_test.cpp @@ -0,0 +1,71 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/document/base/field.h> +#include <vespa/document/datatype/referencedatatype.h> +#include <vespa/document/fieldvalue/referencefieldvalue.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/exceptions.h> +#include <ostream> +#include <sstream> + +using namespace document; + +struct Fixture { + DocumentType docType{"foo"}; + ReferenceDataType refType{docType, 12345}; +}; + +TEST_F("Constructor generates type-parameterized name and sets type ID", Fixture) { + EXPECT_EQUAL("Reference<foo>", f.refType.getName()); + EXPECT_EQUAL(12345, f.refType.getId()); +} + +TEST_F("Target document type is accessible via data type", Fixture) { + EXPECT_EQUAL(f.docType, f.refType.getTargetType()); +} + +TEST_F("Empty ReferenceFieldValue instances can be created from type", Fixture) { + auto fv = f.refType.createFieldValue(); + ASSERT_TRUE(fv.get() != nullptr); + ASSERT_TRUE(dynamic_cast<ReferenceFieldValue*>(fv.get()) != nullptr); + EXPECT_EQUAL(&f.refType, fv->getDataType()); +} + +TEST_F("operator== checks document type and type ID", Fixture) { + EXPECT_NOT_EQUAL(f.refType, *DataType::STRING); + EXPECT_EQUAL(f.refType, f.refType); + + DocumentType otherDocType("bar"); + ReferenceDataType refWithDifferentType(otherDocType, 12345); + ReferenceDataType refWithSameTypeDifferentId(f.docType, 56789); + + EXPECT_NOT_EQUAL(f.refType, refWithDifferentType); + EXPECT_NOT_EQUAL(f.refType, refWithSameTypeDifferentId); +} + +TEST_F("clone() creates new type instance equal to old instance", Fixture) { + std::unique_ptr<ReferenceDataType> cloned(f.refType.clone()); + ASSERT_TRUE(cloned.get() != nullptr); + EXPECT_EQUAL(f.refType, *cloned); +} + +TEST_F("print() emits type name and id", Fixture) { + std::ostringstream ss; + f.refType.print(ss, true, ""); + EXPECT_EQUAL("ReferenceDataType(foo, id 12345)", ss.str()); +} + +TEST_F("buildFieldPath returns empty path for empty input", Fixture) { + auto fp = f.refType.buildFieldPath(""); + ASSERT_TRUE(fp.get() != nullptr); + EXPECT_TRUE(fp->empty()); +} + +TEST_F("buildFieldPath throws IllegalArgumentException for non-empty input", Fixture) { + EXPECT_EXCEPTION(f.refType.buildFieldPath("herebedragons"), + vespalib::IllegalArgumentException, + "Reference data type does not support further field " + "recursion: 'herebedragons'"); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/document/src/tests/fieldvalue/CMakeLists.txt b/document/src/tests/fieldvalue/CMakeLists.txt index c44cca690b7..852ff35ea52 100644 --- a/document/src/tests/fieldvalue/CMakeLists.txt +++ b/document/src/tests/fieldvalue/CMakeLists.txt @@ -26,3 +26,13 @@ vespa_add_executable(document_predicatefieldvalue_test_app TEST document_documentconfig ) vespa_add_test(NAME document_predicatefieldvalue_test_app COMMAND document_predicatefieldvalue_test_app) + +vespa_add_executable(document_referencefieldvalue_test_app TEST + SOURCES + referencefieldvalue_test.cpp + DEPENDS + document + AFTER + document_documentconfig +) +vespa_add_test(NAME document_referencefieldvalue_test_app COMMAND document_referencefieldvalue_test_app) diff --git a/document/src/tests/fieldvalue/referencefieldvalue_test.cpp b/document/src/tests/fieldvalue/referencefieldvalue_test.cpp new file mode 100644 index 00000000000..1399c71fcc3 --- /dev/null +++ b/document/src/tests/fieldvalue/referencefieldvalue_test.cpp @@ -0,0 +1,193 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/document/base/field.h> +#include <vespa/document/datatype/referencedatatype.h> +#include <vespa/document/fieldvalue/referencefieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/exceptions.h> +#include <ostream> +#include <sstream> + +using namespace document; + +namespace { + +struct Fixture { + DocumentType docType{"foo"}; + ReferenceDataType refType{docType, 12345}; + + DocumentType otherDocType{"bar"}; + ReferenceDataType otherRefType{otherDocType, 54321}; +}; + +} + +using vespalib::IllegalArgumentException; + +TEST_F("Default-constructed reference is empty and bound to type", Fixture) { + ReferenceFieldValue fv(f.refType); + ASSERT_TRUE(fv.getDataType() != nullptr); + EXPECT_EQUAL(f.refType, *fv.getDataType()); + ASSERT_FALSE(fv.hasValidDocumentId()); +} + +TEST_F("Reference can be constructed with document ID", Fixture) { + ReferenceFieldValue fv(f.refType, DocumentId("id:ns:foo::itsa-me")); + ASSERT_TRUE(fv.getDataType() != nullptr); + EXPECT_EQUAL(f.refType, *fv.getDataType()); + ASSERT_TRUE(fv.hasValidDocumentId()); + EXPECT_EQUAL(DocumentId("id:ns:foo::itsa-me"), fv.getDocumentId()); +} + +TEST_F("Newly constructed reference is marked as changed", Fixture) { + ReferenceFieldValue fv(f.refType); + EXPECT_TRUE(fv.hasChanged()); + + ReferenceFieldValue fv2(f.refType, DocumentId("id:ns:foo::itsa-me")); + EXPECT_TRUE(fv2.hasChanged()); +} + +TEST_F("Exception is thrown if constructor doc ID type does not match referenced document type", Fixture) { + EXPECT_EXCEPTION( + ReferenceFieldValue(f.refType, DocumentId("id:ns:bar::wario-time")), + IllegalArgumentException, + "Can't assign document ID 'id:ns:bar::wario-time' (of type 'bar') " + "to reference of document type 'foo'"); +} + +TEST_F("Exception is thrown if doc ID does not have a type", Fixture) { + // Could have had a special cased message for this, but type-less IDs are + // not expected to be allowed through the feed pipeline at all. We just + // want to ensure it fails in a controlled fashion if encountered. + EXPECT_EXCEPTION( + ReferenceFieldValue(f.refType, DocumentId("doc:foo:bario")), + IllegalArgumentException, + "Can't assign document ID 'doc:foo:bario' (of type '') " + "to reference of document type 'foo'"); +} + +TEST_F("assign()ing a non-reference field value throws exception", Fixture) { + ReferenceFieldValue fv(f.refType); + EXPECT_EXCEPTION(fv.assign(StringFieldValue("waluigi time!!")), + IllegalArgumentException, + "Can't assign field value of type String to a " + "ReferenceFieldValue"); +} + +TEST_F("Can explicitly assign new document ID to reference", Fixture) { + ReferenceFieldValue fv(f.refType); + fv.setDeserializedDocumentId(DocumentId("id:ns:foo::yoshi-eggs")); + + ASSERT_TRUE(fv.hasValidDocumentId()); + EXPECT_EQUAL(DocumentId("id:ns:foo::yoshi-eggs"), fv.getDocumentId()); + // Type remains unchanged + EXPECT_EQUAL(f.refType, *fv.getDataType()); +} + +TEST_F("Assigning explicit document ID clears changed-flag", Fixture) { + ReferenceFieldValue fv(f.refType); + fv.setDeserializedDocumentId(DocumentId("id:ns:foo::yoshi-eggs")); + EXPECT_FALSE(fv.hasChanged()); +} + +TEST_F("Exception is thrown if explicitly assigned doc ID does not have same type as reference target type", Fixture) { + ReferenceFieldValue fv(f.refType); + + EXPECT_EXCEPTION( + fv.setDeserializedDocumentId(DocumentId("id:ns:bar::another-castle")), + IllegalArgumentException, + "Can't assign document ID 'id:ns:bar::another-castle' (of type " + "'bar') to reference of document type 'foo'"); +} + +TEST_F("assign()ing another reference field value assigns doc ID and type", Fixture) { + ReferenceFieldValue src(f.refType, DocumentId("id:ns:foo::yoshi")); + ReferenceFieldValue dest(f.otherRefType); + + dest.assign(src); + ASSERT_TRUE(dest.hasValidDocumentId()); + EXPECT_EQUAL(src.getDocumentId(), dest.getDocumentId()); + EXPECT_EQUAL(src.getDataType(), dest.getDataType()); +} + +// Different FieldValue subclasses actually disagree on whether this should be +// the case, e.g. LiteralFieldValue and TensorFieldValue. We go with the +// latter's approach, as that should be the most conservative one. +TEST_F("assign() marks assignee as changed", Fixture) { + ReferenceFieldValue src(f.refType, DocumentId("id:ns:foo::yoshi")); + ReferenceFieldValue dest(f.refType); + + dest.setDeserializedDocumentId(DocumentId("id:ns:foo::yoshi-eggs")); + EXPECT_FALSE(dest.hasChanged()); + + dest.assign(src); + EXPECT_TRUE(dest.hasChanged()); +} + +TEST_F("clone()ing creates new instance with same ID and type", Fixture) { + ReferenceFieldValue src(f.refType, DocumentId("id:ns:foo::yoshi")); + + std::unique_ptr<ReferenceFieldValue> cloned(src.clone()); + ASSERT_TRUE(cloned.get() != nullptr); + ASSERT_TRUE(cloned->hasValidDocumentId()); + EXPECT_EQUAL(src.getDocumentId(), cloned->getDocumentId()); + EXPECT_EQUAL(src.getDataType(), cloned->getDataType()); + EXPECT_TRUE(cloned->hasChanged()); +} + +TEST_F("Can clone() value without document ID", Fixture) { + ReferenceFieldValue src(f.refType); + + std::unique_ptr<ReferenceFieldValue> cloned(src.clone()); + ASSERT_TRUE(cloned.get() != nullptr); + EXPECT_FALSE(cloned->hasValidDocumentId()); + EXPECT_EQUAL(src.getDataType(), cloned->getDataType()); + EXPECT_TRUE(cloned->hasChanged()); +} + +TEST_F("compare() orders first on type ID, then on document ID", Fixture) { + // foo type has id 12345 + ReferenceFieldValue fvType1Id1(f.refType, DocumentId("id:ns:foo::AA")); + ReferenceFieldValue fvType1Id2(f.refType, DocumentId("id:ns:foo::AB")); + // bar type has id 54321 + ReferenceFieldValue fvType2Id1(f.otherRefType, DocumentId("id:ns:bar::AA")); + ReferenceFieldValue fvType2Id2(f.otherRefType, DocumentId("id:ns:bar::AB")); + + // Different types + EXPECT_TRUE(fvType1Id1.compare(fvType2Id1) < 0); + EXPECT_TRUE(fvType2Id1.compare(fvType1Id1) > 0); + + // Same types, different IDs + EXPECT_TRUE(fvType1Id1.compare(fvType1Id2) < 0); + EXPECT_TRUE(fvType1Id2.compare(fvType1Id1) > 0); + EXPECT_TRUE(fvType2Id1.compare(fvType2Id2) < 0); + + // Different types and IDs + EXPECT_TRUE(fvType1Id1.compare(fvType2Id2) < 0); + EXPECT_TRUE(fvType2Id2.compare(fvType1Id1) > 0); + + // Equal types and ID + EXPECT_EQUAL(0, fvType1Id1.compare(fvType1Id1)); + EXPECT_EQUAL(0, fvType1Id2.compare(fvType1Id2)); + EXPECT_EQUAL(0, fvType2Id1.compare(fvType2Id1)); +} + +TEST_F("print() includes reference type and document ID", Fixture) { + ReferenceFieldValue src(f.refType, DocumentId("id:ns:foo::yoshi")); + std::ostringstream ss; + src.print(ss, false, ""); + EXPECT_EQUAL("ReferenceFieldValue(ReferenceDataType(foo, id 12345), " + "DocumentId(id:ns:foo::yoshi))", ss.str()); +} + +TEST_F("print() only indents start of output line", Fixture) { + ReferenceFieldValue src(f.refType, DocumentId("id:ns:foo::yoshi")); + std::ostringstream ss; + src.print(ss, false, " "); + EXPECT_EQUAL(" ReferenceFieldValue(ReferenceDataType(foo, id 12345), " + "DocumentId(id:ns:foo::yoshi))", ss.str()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } + diff --git a/document/src/tests/repo/documenttyperepo_test.cpp b/document/src/tests/repo/documenttyperepo_test.cpp index b3ff4ce098e..38ae51b980f 100644 --- a/document/src/tests/repo/documenttyperepo_test.cpp +++ b/document/src/tests/repo/documenttyperepo_test.cpp @@ -500,4 +500,34 @@ TEST("Require that Array can have nested DocumentType") { ASSERT_TRUE(type); } +TEST("Reference fields are resolved to correct reference type") { + const int doc_with_refs_id = 5678; + const int type_2_id = doc_type_id + 1; + const int ref1_id = 777; + const int ref2_id = 888; + DocumenttypesConfigBuilderHelper builder; + builder.document(doc_type_id, type_name, + Struct(header_name), Struct(body_name)); + builder.document(type_2_id, type_name_2, + Struct(header_name_2), Struct(body_name_2)); + builder.document(doc_with_refs_id, "doc_with_refs", + Struct("doc_with_refs.header") + .addField("ref1", ref1_id), + Struct("doc_with_refs.body") + .addField("ref2", ref2_id) + .addField("ref3", ref1_id)) + .referenceType(ref1_id, doc_type_id) + .referenceType(ref2_id, type_2_id); + + DocumentTypeRepo repo(builder.config()); + const DocumentType *type = repo.getDocumentType(doc_with_refs_id); + ASSERT_TRUE(type != nullptr); + const auto* ref1_type(repo.getDataType(*type, ref1_id)); + const auto* ref2_type(repo.getDataType(*type, ref2_id)); + + EXPECT_EQUAL(*ref1_type, type->getFieldsType().getField("ref1").getDataType()); + EXPECT_EQUAL(*ref2_type, type->getFieldsType().getField("ref2").getDataType()); + EXPECT_EQUAL(*ref1_type, type->getFieldsType().getField("ref3").getDataType()); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/document/src/tests/serialization/vespadocumentserializer_test.cpp b/document/src/tests/serialization/vespadocumentserializer_test.cpp index 9ef9f80aaee..a1436b954e7 100644 --- a/document/src/tests/serialization/vespadocumentserializer_test.cpp +++ b/document/src/tests/serialization/vespadocumentserializer_test.cpp @@ -24,6 +24,7 @@ #include <vespa/document/fieldvalue/structfieldvalue.h> #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> #include <vespa/document/fieldvalue/tensorfieldvalue.h> +#include <vespa/document/fieldvalue/referencefieldvalue.h> #include <vespa/document/predicate/predicate.h> #include <vespa/document/predicate/predicate_slime_builder.h> #include <vespa/document/repo/configbuilder.h> @@ -70,6 +71,12 @@ const int a_id = 12345; const string a_name = "annotation"; const int predicate_doc_type_id = 321; const string predicate_field_name = "my_predicate"; +const int doc_with_ref_type_id = 54321; +const string doc_with_ref_name = "doc_with_ref"; +const string ref_field_name = "ref_field"; +const int ref_type_id = 789; + +constexpr uint16_t serialization_version = Document::getNewestSerializationVersion(); DocumenttypesConfig getDocTypesConfig() { DocumenttypesConfigBuilderHelper builder; @@ -92,25 +99,41 @@ DocumenttypesConfig getDocTypesConfig() { Struct("my_type.header"), Struct("my_type.body") .addField(predicate_field_name, DataType::T_PREDICATE)); + builder.document(doc_with_ref_type_id, doc_with_ref_name, + Struct(doc_with_ref_name + ".header"), + Struct(doc_with_ref_name + ".body") + .addField(ref_field_name, ref_type_id)) + .referenceType(ref_type_id, doc_type_id); return builder.config(); } const DocumentTypeRepo doc_repo(getDocTypesConfig()); const FixedTypeRepo repo(doc_repo, *doc_repo.getDocumentType(doc_type_id)); -template <typename T> T newFieldValue(const T&) { return T(); } -template <> ArrayFieldValue newFieldValue(const ArrayFieldValue &value) -{ return ArrayFieldValue(*value.getDataType()); } -template <> MapFieldValue newFieldValue(const MapFieldValue &value) -{ return MapFieldValue(*value.getDataType()); } +template <typename T> T newFieldValue(const T&) { + return T(); +} +template <> ArrayFieldValue newFieldValue(const ArrayFieldValue &value) { + return ArrayFieldValue(*value.getDataType()); +} +template <> MapFieldValue newFieldValue(const MapFieldValue &value) { + return MapFieldValue(*value.getDataType()); +} template <> WeightedSetFieldValue newFieldValue( - const WeightedSetFieldValue &value) -{ return WeightedSetFieldValue(*value.getDataType()); } -template <> StructFieldValue newFieldValue(const StructFieldValue &value) -{ return StructFieldValue(*value.getDataType()); } + const WeightedSetFieldValue &value) { + return WeightedSetFieldValue(*value.getDataType()); +} +template <> StructFieldValue newFieldValue(const StructFieldValue &value) { + return StructFieldValue(*value.getDataType()); +} template <> AnnotationReferenceFieldValue newFieldValue( - const AnnotationReferenceFieldValue &value) -{ return AnnotationReferenceFieldValue(*value.getDataType()); } + const AnnotationReferenceFieldValue &value) { + return AnnotationReferenceFieldValue(*value.getDataType()); +} +template <> ReferenceFieldValue newFieldValue(const ReferenceFieldValue& value) { + return ReferenceFieldValue(dynamic_cast<const ReferenceDataType&>( + *value.getDataType())); +} template<typename T> void testDeserializeAndClone(const T& value, const nbostream &stream, bool checkEqual=true) { @@ -118,7 +141,7 @@ void testDeserializeAndClone(const T& value, const nbostream &stream, bool check vespalib::MallocPtr buf(stream.size()); memcpy(buf.str(), stream.peek(), stream.size()); nbostream_longlivedbuf is(buf.c_str(), buf.size()); - VespaDocumentDeserializer deserializer(repo, is, 8); + VespaDocumentDeserializer deserializer(repo, is, serialization_version); deserializer.read(read_value); EXPECT_EQUAL(0u, is.size()); @@ -134,7 +157,8 @@ void testDeserializeAndClone(const T& value, const nbostream &stream, bool check // Leaves the stream's read position at the start of the serialized object. template<typename T> -void serializeAndDeserialize(const T& value, nbostream &stream, bool checkEqual=true) { +void serializeAndDeserialize(const T& value, nbostream &stream, + const FixedTypeRepo& fixed_repo, bool checkEqual = true) { size_t start_size = stream.size(); VespaDocumentSerializer serializer(stream); serializer.write(value); @@ -143,7 +167,7 @@ void serializeAndDeserialize(const T& value, nbostream &stream, bool checkEqual= testDeserializeAndClone(value, stream, checkEqual); T read_value = newFieldValue(value); - VespaDocumentDeserializer deserializer(repo, stream, 8); + VespaDocumentDeserializer deserializer(fixed_repo, stream, serialization_version); deserializer.read(read_value); EXPECT_EQUAL(0u, stream.size()); @@ -153,6 +177,11 @@ void serializeAndDeserialize(const T& value, nbostream &stream, bool checkEqual= stream.adjustReadPos(-serialized_size); } +template<typename T> +void serializeAndDeserialize(const T& value, nbostream &stream, bool checkEqual=true) { + serializeAndDeserialize(value, stream, repo, checkEqual); +} + template <typename T> struct ValueType { typedef typename T::Number Type; }; template <> struct ValueType<IntFieldValue> { typedef uint32_t Type; }; template <> struct ValueType<LongFieldValue> { typedef uint64_t Type; }; @@ -230,7 +259,7 @@ TEST("requireThatStringFieldValueCanBeSerialized") { TEST_DO(checkStringFieldValueWithAnnotation()); } -TEST("require that strings can be redesrialized") { +TEST("require that strings can be re-deserialized") { StringFieldValue value("foo"); nbostream streamNotAnnotated; VespaDocumentSerializer serializer(streamNotAnnotated); @@ -249,13 +278,15 @@ TEST("require that strings can be redesrialized") { StringFieldValue deserialized; { - VespaDocumentDeserializer deserializer(repo, streamAnnotated, 8); + VespaDocumentDeserializer deserializer( + repo, streamAnnotated, serialization_version); deserializer.read(deserialized); } EXPECT_EQUAL("foo", deserialized.getValueRef()); EXPECT_TRUE(deserialized.hasSpanTrees()); { - VespaDocumentDeserializer deserializer(repo, streamNotAnnotated, 8); + VespaDocumentDeserializer deserializer( + repo, streamNotAnnotated, serialization_version); deserializer.read(deserialized); } EXPECT_EQUAL("foo", deserialized.getValueRef()); @@ -506,7 +537,7 @@ TEST("requireThatReserializationPreservesCompressionIfUnmodified") { StructDataType struct_type(getStructDataType()); StructFieldValue value2(struct_type); - VespaDocumentDeserializer deserializer(repo, os, 8); + VespaDocumentDeserializer deserializer(repo, os, serialization_version); deserializer.read(value2); checkStructSerialization(value, CompressionConfig::LZ4); // No lazy serialization of structs anymore, only documents @@ -517,7 +548,6 @@ TEST("requireThatReserializationPreservesCompressionIfUnmodified") { template <typename T, int N> int arraysize(const T (&)[N]) { return N; } TEST("requireThatDocumentCanBeSerialized") { - const uint32_t serialization_version = 8; const DocumentType &type = repo.getDocumentType(); DocumentId doc_id("doc::testdoc"); @@ -584,8 +614,6 @@ TEST("requireThatUnmodifiedDocumentRetainsUnknownFieldOnSerialization") { DocumentTypeRepo repo1Field(builder1.config()); DocumentTypeRepo repo2Fields(builder2.config()); - uint32_t serial_version = 8; - DocumentId doc_id("doc::testdoc"); Document value(*repo2Fields.getDocumentType(doc_type_id), doc_id); @@ -598,7 +626,7 @@ TEST("requireThatUnmodifiedDocumentRetainsUnknownFieldOnSerialization") { Document read_value; // Deserialize+serialize with type where field1 is not known. - VespaDocumentDeserializer deserializer1(repo1Field, stream, serial_version); + VespaDocumentDeserializer deserializer1(repo1Field, stream, serialization_version); deserializer1.read(read_value); EXPECT_EQUAL(0u, stream.size()); @@ -609,7 +637,7 @@ TEST("requireThatUnmodifiedDocumentRetainsUnknownFieldOnSerialization") { Document read_value_2; // Field should not have vanished. - VespaDocumentDeserializer deserializer2(repo2Fields, stream, serial_version); + VespaDocumentDeserializer deserializer2(repo2Fields, stream, serialization_version); deserializer2.read(read_value_2); EXPECT_EQUAL(value, read_value_2); } @@ -660,7 +688,7 @@ TEST("requireThatReadDocumentTypeThrowsIfUnknownType") { stream << static_cast<uint16_t>(0); // version (unused) DocumentType value; - VespaDocumentDeserializer deserializer(repo, stream, 8); + VespaDocumentDeserializer deserializer(repo, stream, serialization_version); EXPECT_EXCEPTION(deserializer.read(value), DocumentTypeNotFoundException, "Document type " + my_type + " not found"); } @@ -698,7 +726,7 @@ void deserializeAndCheck(const string &file_name, FieldValueT &value, nbostream_longlivedbuf stream(&content[0], content.size()); Document doc; - VespaDocumentDeserializer deserializer(myrepo, stream, 8); + VespaDocumentDeserializer deserializer(myrepo, stream, serialization_version); deserializer.read(doc); ASSERT_EQUAL(0, value.compare(*doc.getValue(field_name))); @@ -850,6 +878,55 @@ TEST("Require that tensor deserialization matches Java") { { "dimX", "dimY" })); } +struct RefFixture { + FixedTypeRepo fixed_repo{doc_repo, *doc_repo.getDocumentType(doc_with_ref_type_id)}; + + const ReferenceDataType& ref_type() const { + auto* raw_type = fixed_repo.getDataType(ref_type_id); + assert(raw_type != nullptr); + return dynamic_cast<const ReferenceDataType&>(*raw_type); + } + + void roundtrip_serialize(const ReferenceFieldValue& src, ReferenceFieldValue& dest) { + nbostream stream; + VespaDocumentSerializer serializer(stream); + serializer.write(src); + + VespaDocumentDeserializer deserializer(fixed_repo, stream, serialization_version); + deserializer.read(dest); + } +}; + +TEST_F("Empty ReferenceFieldValue can be roundtrip serialized", RefFixture) { + ReferenceFieldValue empty_ref(f.ref_type()); + nbostream stream; + serializeAndDeserialize(empty_ref, stream, f.fixed_repo); +} + +TEST_F("ReferenceFieldValue with ID can be roundtrip serialized", RefFixture) { + ReferenceFieldValue ref_with_id( + f.ref_type(), DocumentId("id:ns:" + doc_name + "::foo")); + nbostream stream; + serializeAndDeserialize(ref_with_id, stream, f.fixed_repo); +} + +TEST_F("Empty ReferenceFieldValue has changed-flag cleared after deserialization", RefFixture) { + ReferenceFieldValue src(f.ref_type()); + ReferenceFieldValue dest(f.ref_type()); + f.roundtrip_serialize(src, dest); + + EXPECT_FALSE(dest.hasChanged()); +} + +TEST_F("ReferenceFieldValue with ID has changed-flag cleared after deserialization", RefFixture) { + ReferenceFieldValue src( + f.ref_type(), DocumentId("id:ns:" + doc_name + "::foo")); + ReferenceFieldValue dest(f.ref_type()); + f.roundtrip_serialize(src, dest); + + EXPECT_FALSE(dest.hasChanged()); +} + } // namespace TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/document/src/vespa/document/config/documenttypes.def b/document/src/vespa/document/config/documenttypes.def index 21cde02ad54..e8f045a3747 100644 --- a/document/src/vespa/document/config/documenttypes.def +++ b/document/src/vespa/document/config/documenttypes.def @@ -100,3 +100,11 @@ documenttype[].annotationtype[].inherits[].id int ## Field sets documenttype[].fieldsets{}.fields[] string + +## ID of reference type. This is a regular data type, but is kept in its own +## array to avoid polluting the existing datatype array with a new default +## field value. +documenttype[].referencetype[].id int + +## Numeric ID of the document type instances of the reference point to. +documenttype[].referencetype[].target_type_id int diff --git a/document/src/vespa/document/datatype/CMakeLists.txt b/document/src/vespa/document/datatype/CMakeLists.txt index e6826075762..2f46ae29166 100644 --- a/document/src/vespa/document/datatype/CMakeLists.txt +++ b/document/src/vespa/document/datatype/CMakeLists.txt @@ -15,6 +15,7 @@ vespa_add_library(document_datatypes OBJECT structureddatatype.cpp urldatatype.cpp weightedsetdatatype.cpp + referencedatatype.cpp DEPENDS AFTER document_documentconfig diff --git a/document/src/vespa/document/datatype/referencedatatype.cpp b/document/src/vespa/document/datatype/referencedatatype.cpp new file mode 100644 index 00000000000..add40b16c44 --- /dev/null +++ b/document/src/vespa/document/datatype/referencedatatype.cpp @@ -0,0 +1,46 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "referencedatatype.h" +#include <vespa/document/fieldvalue/referencefieldvalue.h> +#include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <ostream> + +namespace document { + +ReferenceDataType::ReferenceDataType(const DocumentType& targetDocType, int id) + : DataType(vespalib::make_string("Reference<%s>", targetDocType.getName().c_str()), id), + _targetDocType(targetDocType) +{ +} + +ReferenceDataType::~ReferenceDataType() { +} + +std::unique_ptr<FieldValue> ReferenceDataType::createFieldValue() const { + return std::make_unique<ReferenceFieldValue>(*this); +} + +void ReferenceDataType::print(std::ostream& os, bool verbose, const std::string& indent) const { + (void) verbose; + (void) indent; + os << "ReferenceDataType(" << _targetDocType.getName() + << ", id " << getId() << ')'; +} + +ReferenceDataType* ReferenceDataType::clone() const { + return new ReferenceDataType(_targetDocType, getId()); +} + +std::unique_ptr<FieldPath> ReferenceDataType::onBuildFieldPath( + const vespalib::stringref& remainingFieldName) const { + if (!remainingFieldName.empty()) { + throw vespalib::IllegalArgumentException( + vespalib::make_string("Reference data type does not support " + "further field recursion: '%s'", + remainingFieldName.c_str()), VESPA_STRLOC); + } + return std::make_unique<FieldPath>(); +} + +} // document diff --git a/document/src/vespa/document/datatype/referencedatatype.h b/document/src/vespa/document/datatype/referencedatatype.h new file mode 100644 index 00000000000..5a39addead1 --- /dev/null +++ b/document/src/vespa/document/datatype/referencedatatype.h @@ -0,0 +1,30 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "documenttype.h" + +namespace document { + +/** + * A ReferenceDataType specifies a particular concrete document type that a + * ReferenceFieldValue instance binds to. + */ +class ReferenceDataType : public DataType { + const DocumentType& _targetDocType; +public: + ReferenceDataType(const DocumentType& targetDocType, int id); + ~ReferenceDataType(); + + const DocumentType& getTargetType() const noexcept { + return _targetDocType; + } + + std::unique_ptr<FieldValue> createFieldValue() const override; + void print(std::ostream&, bool verbose, const std::string& indent) const override; + ReferenceDataType* clone() const override; + std::unique_ptr<FieldPath> onBuildFieldPath( + const vespalib::stringref& remainingFieldName) const override; +}; + +} // document diff --git a/document/src/vespa/document/fieldvalue/CMakeLists.txt b/document/src/vespa/document/fieldvalue/CMakeLists.txt index 759d4d42b40..8c76db54d5f 100644 --- a/document/src/vespa/document/fieldvalue/CMakeLists.txt +++ b/document/src/vespa/document/fieldvalue/CMakeLists.txt @@ -23,6 +23,7 @@ vespa_add_library(document_fieldvalues OBJECT structuredfieldvalue.cpp tensorfieldvalue.cpp weightedsetfieldvalue.cpp + referencefieldvalue.cpp DEPENDS AFTER document_documentconfig diff --git a/document/src/vespa/document/fieldvalue/document.h b/document/src/vespa/document/fieldvalue/document.h index 9942b9a0997..cfaa7e33c00 100644 --- a/document/src/vespa/document/fieldvalue/document.h +++ b/document/src/vespa/document/fieldvalue/document.h @@ -36,7 +36,7 @@ public: typedef std::unique_ptr<Document> UP; typedef std::shared_ptr<Document> SP; - static uint16_t getNewestSerializationVersion() { return 8; }; + static constexpr uint16_t getNewestSerializationVersion() { return 8; } Document(); Document(const Document&); diff --git a/document/src/vespa/document/fieldvalue/fieldvaluevisitor.h b/document/src/vespa/document/fieldvalue/fieldvaluevisitor.h index e57b4394491..adaeaca9ad0 100644 --- a/document/src/vespa/document/fieldvalue/fieldvaluevisitor.h +++ b/document/src/vespa/document/fieldvalue/fieldvaluevisitor.h @@ -19,6 +19,7 @@ class StringFieldValue; class StructFieldValue; class WeightedSetFieldValue; class TensorFieldValue; +class ReferenceFieldValue; struct FieldValueVisitor { virtual ~FieldValueVisitor() {} @@ -39,6 +40,7 @@ struct FieldValueVisitor { virtual void visit(StructFieldValue &value) = 0; virtual void visit(WeightedSetFieldValue &value) = 0; virtual void visit(TensorFieldValue &value) = 0; + virtual void visit(ReferenceFieldValue& value) = 0; }; struct ConstFieldValueVisitor { @@ -60,6 +62,7 @@ struct ConstFieldValueVisitor { virtual void visit(const StructFieldValue &value) = 0; virtual void visit(const WeightedSetFieldValue &value) = 0; virtual void visit(const TensorFieldValue &value) = 0; + virtual void visit(const ReferenceFieldValue& value) = 0; }; } // namespace document diff --git a/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp b/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp new file mode 100644 index 00000000000..76b99e095e9 --- /dev/null +++ b/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp @@ -0,0 +1,124 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "referencefieldvalue.h" +#include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <cassert> + +using vespalib::IllegalArgumentException; +using vespalib::make_string; + +namespace document { + +IMPLEMENT_IDENTIFIABLE(ReferenceFieldValue, FieldValue); + +ReferenceFieldValue::ReferenceFieldValue() + : _dataType(nullptr), + _documentId(), + _altered(true) +{ +} + +ReferenceFieldValue::ReferenceFieldValue(const ReferenceDataType& dataType) + : _dataType(&dataType), + _documentId(), + _altered(true) +{ +} + +ReferenceFieldValue::ReferenceFieldValue( + const ReferenceDataType& dataType, + const DocumentId& documentId) + : _dataType(&dataType), + _documentId(documentId), + _altered(true) +{ + requireIdOfMatchingType(_documentId, _dataType->getTargetType()); +} + +ReferenceFieldValue::~ReferenceFieldValue() { +} + +void ReferenceFieldValue::requireIdOfMatchingType( + const DocumentId& id, const DocumentType& type) +{ + if (id.getDocType() != type.getName()) { + throw IllegalArgumentException( + make_string("Can't assign document ID '%s' (of type '%s') to " + "reference of document type '%s'", + id.toString().c_str(), + id.getDocType().c_str(), + type.getName().c_str()), + VESPA_STRLOC); + } +} + +FieldValue& ReferenceFieldValue::assign(const FieldValue& rhs) { + const auto* refValueRhs(dynamic_cast<const ReferenceFieldValue*>(&rhs)); + if (refValueRhs == nullptr) { + throw IllegalArgumentException( + make_string("Can't assign field value of type %s to " + "a ReferenceFieldValue", + rhs.getDataType()->getName().c_str()), + VESPA_STRLOC); + } + if (refValueRhs == this) { + return *this; + } + _documentId = refValueRhs->_documentId; + _dataType = refValueRhs->_dataType; + _altered = true; + return *this; +} + +void ReferenceFieldValue::setDeserializedDocumentId(const DocumentId& id) { + assert(_dataType != nullptr); + requireIdOfMatchingType(id, _dataType->getTargetType()); + _documentId = id; + _altered = false; +} + +ReferenceFieldValue* ReferenceFieldValue::clone() const { + assert(_dataType != nullptr); + if (hasValidDocumentId()) { + return new ReferenceFieldValue(*_dataType, _documentId); + } else { + return new ReferenceFieldValue(*_dataType); + } +} + +int ReferenceFieldValue::compare(const FieldValue& rhs) const { + const int parentCompare = FieldValue::compare(rhs); + if (parentCompare != 0) { + return parentCompare; + } + // Type equality is checked by the parent. + const auto& refValueRhs(dynamic_cast<const ReferenceFieldValue&>(rhs)); + // TODO PERF: DocumentId does currently _not_ expose any methods that + // cheaply allow an ordering to be established. Only (in)equality operators. + // IdString::operator== is already implemented in the same way as this, so + // don't put this code in your inner loops, kids! + return _documentId.toString().compare(refValueRhs._documentId.toString()); +} + +void ReferenceFieldValue::print(std::ostream& os, bool verbose, const std::string& indent) const { + (void) verbose; + assert(_dataType != nullptr); + os << indent << "ReferenceFieldValue(" << *_dataType << ", DocumentId("; + _documentId.print(os, false, ""); + os << "))"; +} + +bool ReferenceFieldValue::hasChanged() const { + return _altered; +} + +void ReferenceFieldValue::accept(FieldValueVisitor& visitor) { + visitor.visit(*this); +} + +void ReferenceFieldValue::accept(ConstFieldValueVisitor& visitor) const { + visitor.visit(*this); +} + +} // document diff --git a/document/src/vespa/document/fieldvalue/referencefieldvalue.h b/document/src/vespa/document/fieldvalue/referencefieldvalue.h new file mode 100644 index 00000000000..7b77ef26248 --- /dev/null +++ b/document/src/vespa/document/fieldvalue/referencefieldvalue.h @@ -0,0 +1,78 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "fieldvalue.h" +#include <vespa/document/datatype/referencedatatype.h> +#include <vespa/document/base/documentid.h> + +namespace document { + +/** + * A reference field value allows search queries to access fields in other + * document instances as if they were fields natively stored within the + * searched document. This allows modelling one-to-many relations such as a + * parent document with many children containing references back to the parent. + * + * Each ReferenceFieldValue may contain a single document ID which specifies the + * instance the field should refer to. This document ID must have a type + * matching that of the reference data type of the field itself. + * + * Note that references are not polymorphic. This means that if you have a + * document type "foo" inheriting "bar", you cannot have a reference<bar> field + * containing a document ID for a "foo" document. + */ +class ReferenceFieldValue : public FieldValue { + const ReferenceDataType* _dataType; + // TODO wrap in std::optional once available. + DocumentId _documentId; + bool _altered; +public: + // Empty constructor required for Identifiable. + ReferenceFieldValue(); + + explicit ReferenceFieldValue(const ReferenceDataType& dataType); + + ReferenceFieldValue(const ReferenceDataType& dataType, + const DocumentId& documentId); + + ~ReferenceFieldValue(); + + ReferenceFieldValue(const ReferenceFieldValue&) = default; + ReferenceFieldValue& operator=(const ReferenceFieldValue&) = default; + + bool hasValidDocumentId() const noexcept { + return _documentId.hasDocType(); + } + + // Returned value is only well-defined if hasValidDocumentId() == true. + const DocumentId& getDocumentId() const noexcept { + return _documentId; + } + + // Should only be called by deserializer code, as it will clear hasChanged. + // `id` must be a valid document ID and cannot be empty. + void setDeserializedDocumentId(const DocumentId& id); + void clearChanged() { + _altered = false; + } + + const DataType* getDataType() const override { return _dataType; } + FieldValue& assign(const FieldValue&) override; + ReferenceFieldValue* clone() const override; + int compare(const FieldValue&) const override; + void printXml(XmlOutputStream&) const override { /* Not implemented */ } + void print(std::ostream&, bool, const std::string&) const override; + bool hasChanged() const override; + void accept(FieldValueVisitor&) override; + void accept(ConstFieldValueVisitor&) const override; + + DECLARE_IDENTIFIABLE(ReferenceFieldValue); +private: + // Throws vespalib::IllegalArgumentException if doc type of `id` does not + // match the name of `type`. + static void requireIdOfMatchingType( + const DocumentId& id, const DocumentType& type); +}; + +} // document diff --git a/document/src/vespa/document/repo/configbuilder.h b/document/src/vespa/document/repo/configbuilder.h index a1c52b81024..11d5c99bee3 100644 --- a/document/src/vespa/document/repo/configbuilder.h +++ b/document/src/vespa/document/repo/configbuilder.h @@ -137,6 +137,12 @@ struct DocTypeRep { addType(type, doc_type); return annotationType(id, name, type.id); } + DocTypeRep& referenceType(int32_t id, int32_t target_type_id) { + doc_type.referencetype.resize(doc_type.referencetype.size() + 1); + doc_type.referencetype.back().id = id; + doc_type.referencetype.back().targetTypeId = target_type_id; + return *this; + } }; class DocumenttypesConfigBuilderHelper { diff --git a/document/src/vespa/document/repo/documenttyperepo.cpp b/document/src/vespa/document/repo/documenttyperepo.cpp index d0b72fee2f0..338cbf3bcc4 100644 --- a/document/src/vespa/document/repo/documenttyperepo.cpp +++ b/document/src/vespa/document/repo/documenttyperepo.cpp @@ -10,6 +10,7 @@ #include <vespa/document/datatype/positiondatatype.h> #include <vespa/document/datatype/urldatatype.h> #include <vespa/document/datatype/weightedsetdatatype.h> +#include <vespa/document/datatype/referencedatatype.h> #include <vespa/vespalib/objects/identifiable.h> #include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/vespalib/util/closure.h> @@ -199,7 +200,7 @@ struct DataTypeRepo { Repo repo; AnnotationTypeRepo annotations; - DataTypeRepo() : doc_type(0) {} + DataTypeRepo() : doc_type(nullptr) {} ~DataTypeRepo() { delete doc_type; } }; @@ -444,6 +445,16 @@ void addFieldSet(const DocumenttypesConfig::Documenttype::FieldsetsMap & fsv, Do } } +void addReferenceTypes( + const vector<DocumenttypesConfig::Documenttype::Referencetype> &ref_types, + Repo& data_type_repo, + const DocumentTypeMap& doc_type_map) { + for (const auto& ref_type : ref_types) { + const auto* target_doc_type = lookupRepo(ref_type.targetTypeId, doc_type_map).doc_type; + data_type_repo.addDataType(std::make_unique<ReferenceDataType>(*target_doc_type, ref_type.id)); + } +} + void configureDataTypeRepo( const DocumenttypesConfig::Documenttype &doc_type, DocumentTypeMap &type_map) { @@ -452,6 +463,7 @@ void configureDataTypeRepo( doc_type.inherits, type_map, data_types->annotations); addAnnotationTypes(doc_type.annotationtype, data_types->annotations); inheritDataTypes(doc_type.inherits, type_map, data_types->repo); + addReferenceTypes(doc_type.referencetype, data_types->repo, type_map); addDataTypes(doc_type.datatype, data_types->repo, data_types->annotations); setAnnotationDataTypes(doc_type.annotationtype, data_types->annotations, data_types->repo); @@ -534,7 +546,7 @@ DocumentTypeRepo::~DocumentTypeRepo() { const DocumentType *DocumentTypeRepo::getDocumentType(int32_t type_id) const { const DataTypeRepo *repo = FindPtr(_doc_types, type_id); - return repo ? repo->doc_type : 0; + return repo ? repo->doc_type : nullptr; } const DocumentType *DocumentTypeRepo::getDocumentType(const stringref &name) const { @@ -549,26 +561,26 @@ const DocumentType *DocumentTypeRepo::getDocumentType(const stringref &name) con return it->second->doc_type; } } - return 0; + return nullptr; } const DataType * DocumentTypeRepo::getDataType(const DocumentType &doc_type, int32_t id) const { const DataTypeRepo *dt_repo = FindPtr(_doc_types, doc_type.getId()); - return dt_repo ? dt_repo->repo.lookup(id) : 0; + return dt_repo ? dt_repo->repo.lookup(id) : nullptr; } const DataType * DocumentTypeRepo::getDataType( const DocumentType &doc_type, const stringref &name) const { const DataTypeRepo *dt_repo = FindPtr(_doc_types, doc_type.getId()); - return dt_repo ? dt_repo->repo.lookup(name) : 0; + return dt_repo ? dt_repo->repo.lookup(name) : nullptr; } const AnnotationType *DocumentTypeRepo::getAnnotationType( const DocumentType &doc_type, int32_t id) const { const DataTypeRepo *dt_repo = FindPtr(_doc_types, doc_type.getId()); - return dt_repo ? dt_repo->annotations.lookup(id) : 0; + return dt_repo ? dt_repo->annotations.lookup(id) : nullptr; } void DocumentTypeRepo::forEachDocumentType( diff --git a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp index b1b827b6c69..fc0cf593334 100644 --- a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp +++ b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp @@ -17,6 +17,7 @@ #include <vespa/document/fieldvalue/stringfieldvalue.h> #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> #include <vespa/document/fieldvalue/tensorfieldvalue.h> +#include <vespa/document/fieldvalue/referencefieldvalue.h> #include <vespa/vespalib/data/slime/binary_format.h> #include <vespa/vespalib/data/slime/slime.h> #include <vespa/vespalib/stllike/asciistream.h> @@ -389,4 +390,15 @@ VespaDocumentDeserializer::read(TensorFieldValue &value) _stream.adjustReadPos(length); } +void VespaDocumentDeserializer::read(ReferenceFieldValue& value) { + const bool hasId(readValue<uint8_t>(_stream) == 1); + if (hasId) { + DocumentId id; + read(id); + value.setDeserializedDocumentId(id); + } else { + value.clearChanged(); + } +} + } // document diff --git a/document/src/vespa/document/serialization/vespadocumentdeserializer.h b/document/src/vespa/document/serialization/vespadocumentdeserializer.h index 388ae1c5ad0..b4725327ec4 100644 --- a/document/src/vespa/document/serialization/vespadocumentdeserializer.h +++ b/document/src/vespa/document/serialization/vespadocumentdeserializer.h @@ -19,22 +19,23 @@ class VespaDocumentDeserializer : private FieldValueVisitor { FixedTypeRepo _repo; uint16_t _version; - virtual void visit(AnnotationReferenceFieldValue &value) { read(value); } - virtual void visit(ArrayFieldValue &value) { read(value); } - virtual void visit(ByteFieldValue &value) { read(value); } - virtual void visit(Document &value) { read(value); } - virtual void visit(DoubleFieldValue &value) { read(value); } - virtual void visit(FloatFieldValue &value) { read(value); } - virtual void visit(IntFieldValue &value) { read(value); } - virtual void visit(LongFieldValue &value) { read(value); } - virtual void visit(MapFieldValue &value) { read(value); } - virtual void visit(PredicateFieldValue &value) { read(value); } - virtual void visit(RawFieldValue &value) { read(value); } - virtual void visit(ShortFieldValue &value) { read(value); } - virtual void visit(StringFieldValue &value) { read(value); } - virtual void visit(StructFieldValue &value) { read(value); } - virtual void visit(WeightedSetFieldValue &value) { read(value); } - virtual void visit(TensorFieldValue &value) { read(value); } + void visit(AnnotationReferenceFieldValue &value) override { read(value); } + void visit(ArrayFieldValue &value) override { read(value); } + void visit(ByteFieldValue &value) override { read(value); } + void visit(Document &value) override { read(value); } + void visit(DoubleFieldValue &value) override { read(value); } + void visit(FloatFieldValue &value) override { read(value); } + void visit(IntFieldValue &value) override { read(value); } + void visit(LongFieldValue &value) override { read(value); } + void visit(MapFieldValue &value) override { read(value); } + void visit(PredicateFieldValue &value) override { read(value); } + void visit(RawFieldValue &value) override { read(value); } + void visit(ShortFieldValue &value) override { read(value); } + void visit(StringFieldValue &value) override { read(value); } + void visit(StructFieldValue &value) override { read(value); } + void visit(WeightedSetFieldValue &value) override { read(value); } + void visit(TensorFieldValue &value) override { read(value); } + void visit(ReferenceFieldValue &value) override { read(value); } void readDocument(Document &value); @@ -75,6 +76,7 @@ public: void readStructNoReset(StructFieldValue &value); void read(WeightedSetFieldValue &value); void read(TensorFieldValue &value); + void read(ReferenceFieldValue& value); }; } // namespace document diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.cpp b/document/src/vespa/document/serialization/vespadocumentserializer.cpp index c9ab4be172a..d4d3856eb7a 100644 --- a/document/src/vespa/document/serialization/vespadocumentserializer.cpp +++ b/document/src/vespa/document/serialization/vespadocumentserializer.cpp @@ -19,6 +19,7 @@ #include <vespa/document/fieldvalue/stringfieldvalue.h> #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> #include <vespa/document/fieldvalue/tensorfieldvalue.h> +#include <vespa/document/fieldvalue/referencefieldvalue.h> #include <vespa/document/update/updates.h> #include <vespa/document/update/fieldpathupdates.h> #include <vespa/vespalib/data/slime/binary_format.h> @@ -399,6 +400,13 @@ VespaDocumentSerializer::write(const TensorFieldValue &value) { } } +void VespaDocumentSerializer::write(const ReferenceFieldValue& value) { + _stream << static_cast<uint8_t>(value.hasValidDocumentId() ? 1 : 0); + if (value.hasValidDocumentId()) { + write(value.getDocumentId()); + } +} + namespace { const uint8_t CONTENT_HASTYPE(0x01); const uint8_t CONTENT_HASVALUE(0x01); diff --git a/document/src/vespa/document/serialization/vespadocumentserializer.h b/document/src/vespa/document/serialization/vespadocumentserializer.h index 534ee3a6c2d..06a169b8e06 100644 --- a/document/src/vespa/document/serialization/vespadocumentserializer.h +++ b/document/src/vespa/document/serialization/vespadocumentserializer.h @@ -52,6 +52,7 @@ public: void write(const StructFieldValue &val, const FieldSet& fieldSet); void write(const WeightedSetFieldValue &value); void write(const TensorFieldValue &value); + void write(const ReferenceFieldValue& value); void write42(const DocumentUpdate &value); void writeHEAD(const DocumentUpdate &value); @@ -102,7 +103,8 @@ private: void visit(const StringFieldValue &value) override { write(value); } void visit(const StructFieldValue &value) override { write(value, AllFields()); } void visit(const WeightedSetFieldValue &value) override { write(value); } - void visit(const TensorFieldValue &value) override { write(value); } + void visit(const TensorFieldValue &value) override { write(value); } + void visit(const ReferenceFieldValue& value) override { write(value); } vespalib::nbostream &_stream; }; diff --git a/document/src/vespa/document/util/identifiableid.h b/document/src/vespa/document/util/identifiableid.h index 75651281e83..4bbbb38294f 100644 --- a/document/src/vespa/document/util/identifiableid.h +++ b/document/src/vespa/document/util/identifiableid.h @@ -40,6 +40,7 @@ #define CID_MapFieldValue DOCUMENT_CID(36) #define CID_PredicateFieldValue DOCUMENT_CID(37) #define CID_TensorFieldValue DOCUMENT_CID(38) +#define CID_ReferenceFieldValue DOCUMENT_CID(39) #define CID_DataType DOCUMENT_CID(50) #define CID_PrimitiveDataType DOCUMENT_CID(51) @@ -58,6 +59,7 @@ #define CID_MapDataType DOCUMENT_CID(65) #define CID_AnnotationReferenceDataType DOCUMENT_CID(66) #define CID_TensorDataType DOCUMENT_CID(67) +#define CID_ReferenceDataType DOCUMENT_CID(68) #define CID_document_FieldPathEntry DOCUMENT_CID(80) diff --git a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp index c2659d5b06c..8f9e0836191 100644 --- a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp +++ b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp @@ -15,6 +15,7 @@ #include <vespa/document/datatype/structdatatype.h> #include <vespa/document/datatype/urldatatype.h> #include <vespa/document/datatype/weightedsetdatatype.h> +#include <vespa/document/datatype/referencedatatype.h> #include <vespa/document/fieldvalue/arrayfieldvalue.h> #include <vespa/document/fieldvalue/bytefieldvalue.h> #include <vespa/document/fieldvalue/document.h> @@ -29,6 +30,7 @@ #include <vespa/document/fieldvalue/structfieldvalue.h> #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> #include <vespa/document/fieldvalue/tensorfieldvalue.h> +#include <vespa/document/fieldvalue/referencefieldvalue.h> #include <vespa/document/predicate/predicate.h> #include <vespa/document/repo/configbuilder.h> #include <vespa/document/repo/documenttyperepo.h> @@ -84,6 +86,8 @@ using document::UrlDataType; using document::WeightedSetDataType; using document::WeightedSetFieldValue; using document::TensorFieldValue; +using document::ReferenceDataType; +using document::ReferenceFieldValue; using search::index::Schema; using vespalib::Slime; using vespalib::slime::Cursor; @@ -132,6 +136,7 @@ class Test : public vespalib::TestApp { void tearDown(); const DataType &getDataType(const string &name) const; + const ReferenceDataType& getAsRefType(const string& name) const; template <typename T> T getValueAs(const string &field_name, const Document &doc); @@ -149,6 +154,7 @@ class Test : public vespalib::TestApp { cvtSummaryAs(bool markup, const FieldValue::UP &fv); void checkString(const string &str, const FieldValue *value); + void checkStringForAllConversions(const string& expected, const FieldValue* fv); void checkData(const search::RawBuf &data, const FieldValue *value); void checkTensor(const Tensor::UP &tensor, const FieldValue *value); template <unsigned int N> @@ -172,6 +178,9 @@ class Test : public vespalib::TestApp { void requireThatLinguisticsAnnotationUsesDefaultDataTypes(); void requireThatPredicateIsPrinted(); void requireThatTensorIsNotConverted(); + void requireThatNonEmptyReferenceIsConvertedToStringWithId(); + void requireThatEmptyReferenceIsConvertedToEmptyString(); + void requireThatReferenceInCompositeTypeEmitsSlimeData(); const DocumentType &getDocType() const { return *_documentType; } Document makeDocument(); StringFieldValue annotateTerm(const string &term); @@ -186,6 +195,11 @@ public: DocumenttypesConfig getDocumenttypesConfig() { using namespace document::config_builder; DocumenttypesConfigBuilderHelper builder; + const int ref_target_doctype_id = 1234; + const int ref_type_id = 5678; + builder.document(ref_target_doctype_id, "target_dummy_document", + Struct("target_dummy_document.header"), + Struct("target_dummy_document.body")); builder.document(42, "indexingdocument", Struct("indexingdocument.header") .addField("empty", DataType::T_STRING) @@ -208,8 +222,12 @@ DocumenttypesConfig getDocumenttypesConfig() { .addField("float", DataType::T_FLOAT) .addField("chinese", DataType::T_STRING) .addField("predicate", DataType::T_PREDICATE) - .addField("tensor", DataType::T_TENSOR), - Struct("indexingdocument.body")); + .addField("tensor", DataType::T_TENSOR) + .addField("ref", ref_type_id) + .addField("nested", Struct("indexingdocument.header.nested") + .addField("inner_ref", ref_type_id)), + Struct("indexingdocument.body")) + .referenceType(ref_type_id, ref_target_doctype_id); return builder.config(); } @@ -247,6 +265,9 @@ Test::Main() TEST_CALL(requireThatLinguisticsAnnotationUsesDefaultDataTypes()); TEST_CALL(requireThatPredicateIsPrinted()); TEST_CALL(requireThatTensorIsNotConverted()); + TEST_CALL(requireThatNonEmptyReferenceIsConvertedToStringWithId()); + TEST_CALL(requireThatEmptyReferenceIsConvertedToEmptyString()); + TEST_CALL(requireThatReferenceInCompositeTypeEmitsSlimeData()); TEST_DONE(); } @@ -427,7 +448,7 @@ void Test::checkData(const search::RawBuf &buf, const FieldValue *value) { const RawFieldValue *s = dynamic_cast<const RawFieldValue *>(value); ASSERT_TRUE(s); auto got = s->getAsRaw(); - EXPECT_EQUAL(buf.GetUsedLen(), got.second); + ASSERT_EQUAL(buf.GetUsedLen(), got.second); EXPECT_TRUE(memcmp(buf.GetDrainPos(), got.first, got.second) == 0); } @@ -683,6 +704,55 @@ Test::requireThatTensorIsNotConverted() true).get())); } +void Test::checkStringForAllConversions(const string& expected, const FieldValue* fv) { + ASSERT_TRUE(fv != nullptr); + for (bool use_slime : {true, false}) { + checkString(expected, SFC::convertSummaryField(false, *fv, use_slime).get()); + } +} + +const ReferenceDataType& Test::getAsRefType(const string& name) const { + return dynamic_cast<const ReferenceDataType&>(getDataType(name)); +} + +void Test::requireThatNonEmptyReferenceIsConvertedToStringWithId() { + Document doc(getDocType(), DocumentId("doc:scheme:")); + doc.setRepo(*_documentRepo); + doc.setValue("ref", ReferenceFieldValue( + getAsRefType("Reference<target_dummy_document>"), + DocumentId("id:ns:target_dummy_document::foo"))); + + checkStringForAllConversions("id:ns:target_dummy_document::foo", + doc.getValue("ref").get()); +} + +void Test::requireThatEmptyReferenceIsConvertedToEmptyString() { + Document doc(getDocType(), DocumentId("doc:scheme:")); + doc.setRepo(*_documentRepo); + doc.setValue("ref", ReferenceFieldValue( + getAsRefType("Reference<target_dummy_document>"))); + + checkStringForAllConversions("", doc.getValue("ref").get()); + +} + +// Own test for this to ensure that SlimeFiller code path is executed, +// as this only triggers for composite field types. +void Test::requireThatReferenceInCompositeTypeEmitsSlimeData() { + Document doc(getDocType(), DocumentId("doc:scheme:")); + doc.setRepo(*_documentRepo); + + StructFieldValue sfv(getDataType("indexingdocument.header.nested")); + sfv.setValue("inner_ref", ReferenceFieldValue( + getAsRefType("Reference<target_dummy_document>"), + DocumentId("id:ns:target_dummy_document::foo"))); + doc.setValue("nested", sfv); + + FieldBlock expect(R"({"inner_ref":"id:ns:target_dummy_document::foo"})"); + checkData(expect.binary, + SFC::convertSummaryField(false, *doc.getValue("nested"), true).get()); +} + } // namespace TEST_APPHOOK(Test); diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp index f14ffcd3f5f..03ae8055c1c 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp @@ -29,6 +29,7 @@ #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> #include <vespa/document/fieldvalue/annotationreferencefieldvalue.h> #include <vespa/document/fieldvalue/tensorfieldvalue.h> +#include <vespa/document/fieldvalue/referencefieldvalue.h> #include <vespa/searchcommon/common/schema.h> #include <vespa/searchlib/util/url.h> #include <vespa/vespalib/encoding/base64.h> @@ -79,6 +80,7 @@ using document::StructFieldValue; using document::WeightedSetDataType; using document::WeightedSetFieldValue; using document::TensorFieldValue; +using document::ReferenceFieldValue; using search::index::Schema; using search::util::URL; using std::make_pair; @@ -377,6 +379,12 @@ class JsonFiller : public ConstFieldValueVisitor { } } + void visit(const ReferenceFieldValue& value) override { + _json.appendString(value.hasValidDocumentId() + ? value.getDocumentId().toString() + : string()); + } + public: JsonFiller(bool markup, JSONWriter &json) : _json(json), _tokenize(markup) {} @@ -477,6 +485,12 @@ class SummaryFieldValueConverter : protected ConstFieldValueVisitor visitPrimitive(value); } + void visit(const ReferenceFieldValue& value) override { + if (value.hasValidDocumentId()) { + _str << value.getDocumentId().toString(); + } // else: implicit empty string + } + public: SummaryFieldValueConverter(bool tokenize, FieldValueConverter &subConverter) : _str(), _tokenize(tokenize), @@ -641,6 +655,12 @@ class SlimeFiller : public ConstFieldValueVisitor { _inserter.insertData(vespalib::slime::Memory(s.peek(), s.size())); } + void visit(const ReferenceFieldValue& value) override { + _inserter.insertString(Memory(value.hasValidDocumentId() + ? value.getDocumentId().toString() + : string())); + } + public: SlimeFiller(Inserter &inserter, bool tokenize) : _inserter(inserter), _tokenize(tokenize) {} |