diff options
author | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2017-02-02 15:17:54 +0100 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2017-02-02 15:18:39 +0100 |
commit | 02ee9752e434e1b17e7f4606c41115b291ae9547 (patch) | |
tree | 5537ff49e7253448cb9a0470a4473c864715214f | |
parent | cfd95b6168969077d945205e0dc9fd9f8a61f4a8 (diff) |
Implement reference support for summary field conversion
7 files changed, 117 insertions, 7 deletions
diff --git a/document/src/tests/fieldvalue/referencefieldvalue_test.cpp b/document/src/tests/fieldvalue/referencefieldvalue_test.cpp index dc9430f0c11..f1fdfe5f97f 100644 --- a/document/src/tests/fieldvalue/referencefieldvalue_test.cpp +++ b/document/src/tests/fieldvalue/referencefieldvalue_test.cpp @@ -130,11 +130,22 @@ TEST_F("clone()ing creates new instance with same ID and type", Fixture) { std::unique_ptr<ReferenceFieldValue> cloned(src.clone()); ASSERT_TRUE(cloned.get() != nullptr); + ASSERT_TRUE(cloned->hasValidDocumentId()); EXPECT_EQUAL(src.getDocumentId(), cloned->getDocumentId()); EXPECT_EQUAL(src.getDataType(), cloned->getDataType()); EXPECT_TRUE(cloned->hasChanged()); } +TEST_F("Can clone() value without document ID", Fixture) { + ReferenceFieldValue src(f.refType); + + std::unique_ptr<ReferenceFieldValue> cloned(src.clone()); + ASSERT_TRUE(cloned.get() != nullptr); + EXPECT_FALSE(cloned->hasValidDocumentId()); + EXPECT_EQUAL(src.getDataType(), cloned->getDataType()); + EXPECT_TRUE(cloned->hasChanged()); +} + TEST_F("compare() orders first on type ID, then on document ID", Fixture) { // foo type has id 12345 ReferenceFieldValue fvType1Id1(f.refType, DocumentId("id:ns:foo::AA")); diff --git a/document/src/tests/serialization/vespadocumentserializer_test.cpp b/document/src/tests/serialization/vespadocumentserializer_test.cpp index 4d30fce7ef1..a72d69ad0c3 100644 --- a/document/src/tests/serialization/vespadocumentserializer_test.cpp +++ b/document/src/tests/serialization/vespadocumentserializer_test.cpp @@ -886,7 +886,7 @@ struct RefFixture { return dynamic_cast<const ReferenceDataType&>(*raw_type); } - void roundtripSerialize(const ReferenceFieldValue& src, ReferenceFieldValue& dest) { + void roundtrip_serialize(const ReferenceFieldValue& src, ReferenceFieldValue& dest) { nbostream stream; VespaDocumentSerializer serializer(stream); serializer.write(src); @@ -912,7 +912,7 @@ TEST_F("ReferenceFieldValue with ID can be roundtrip serialized", RefFixture) { TEST_F("Empty ReferenceFieldValue has changed-flag cleared after deserialization", RefFixture) { ReferenceFieldValue src(f.ref_type()); ReferenceFieldValue dest(f.ref_type()); - f.roundtripSerialize(src, dest); + f.roundtrip_serialize(src, dest); EXPECT_FALSE(dest.hasChanged()); } @@ -921,7 +921,7 @@ TEST_F("ReferenceFieldValue with ID has changed-flag cleared after deserializati ReferenceFieldValue src( f.ref_type(), DocumentId("id:ns:" + doc_name + "::foo")); ReferenceFieldValue dest(f.ref_type()); - f.roundtripSerialize(src, dest); + f.roundtrip_serialize(src, dest); EXPECT_FALSE(dest.hasChanged()); } diff --git a/document/src/vespa/document/config/documenttypes.def b/document/src/vespa/document/config/documenttypes.def index 5b820213e3e..e8f045a3747 100644 --- a/document/src/vespa/document/config/documenttypes.def +++ b/document/src/vespa/document/config/documenttypes.def @@ -101,6 +101,10 @@ documenttype[].annotationtype[].inherits[].id int ## Field sets documenttype[].fieldsets{}.fields[] string +## ID of reference type. This is a regular data type, but is kept in its own +## array to avoid polluting the existing datatype array with a new default +## field value. documenttype[].referencetype[].id int +## Numeric ID of the document type instances of the reference point to. documenttype[].referencetype[].target_type_id int diff --git a/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp b/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp index 3e1c7659870..76b99e095e9 100644 --- a/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp +++ b/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp @@ -80,7 +80,11 @@ void ReferenceFieldValue::setDeserializedDocumentId(const DocumentId& id) { ReferenceFieldValue* ReferenceFieldValue::clone() const { assert(_dataType != nullptr); - return new ReferenceFieldValue(*_dataType, _documentId); + if (hasValidDocumentId()) { + return new ReferenceFieldValue(*_dataType, _documentId); + } else { + return new ReferenceFieldValue(*_dataType); + } } int ReferenceFieldValue::compare(const FieldValue& rhs) const { diff --git a/document/src/vespa/document/fieldvalue/referencefieldvalue.h b/document/src/vespa/document/fieldvalue/referencefieldvalue.h index ff7499df321..be8801cd4d2 100644 --- a/document/src/vespa/document/fieldvalue/referencefieldvalue.h +++ b/document/src/vespa/document/fieldvalue/referencefieldvalue.h @@ -10,6 +10,7 @@ namespace document { class ReferenceFieldValue : public FieldValue { const ReferenceDataType* _dataType; + // TODO wrap in std::optional once available. DocumentId _documentId; bool _altered; public: diff --git a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp index c2659d5b06c..8f9e0836191 100644 --- a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp +++ b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp @@ -15,6 +15,7 @@ #include <vespa/document/datatype/structdatatype.h> #include <vespa/document/datatype/urldatatype.h> #include <vespa/document/datatype/weightedsetdatatype.h> +#include <vespa/document/datatype/referencedatatype.h> #include <vespa/document/fieldvalue/arrayfieldvalue.h> #include <vespa/document/fieldvalue/bytefieldvalue.h> #include <vespa/document/fieldvalue/document.h> @@ -29,6 +30,7 @@ #include <vespa/document/fieldvalue/structfieldvalue.h> #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> #include <vespa/document/fieldvalue/tensorfieldvalue.h> +#include <vespa/document/fieldvalue/referencefieldvalue.h> #include <vespa/document/predicate/predicate.h> #include <vespa/document/repo/configbuilder.h> #include <vespa/document/repo/documenttyperepo.h> @@ -84,6 +86,8 @@ using document::UrlDataType; using document::WeightedSetDataType; using document::WeightedSetFieldValue; using document::TensorFieldValue; +using document::ReferenceDataType; +using document::ReferenceFieldValue; using search::index::Schema; using vespalib::Slime; using vespalib::slime::Cursor; @@ -132,6 +136,7 @@ class Test : public vespalib::TestApp { void tearDown(); const DataType &getDataType(const string &name) const; + const ReferenceDataType& getAsRefType(const string& name) const; template <typename T> T getValueAs(const string &field_name, const Document &doc); @@ -149,6 +154,7 @@ class Test : public vespalib::TestApp { cvtSummaryAs(bool markup, const FieldValue::UP &fv); void checkString(const string &str, const FieldValue *value); + void checkStringForAllConversions(const string& expected, const FieldValue* fv); void checkData(const search::RawBuf &data, const FieldValue *value); void checkTensor(const Tensor::UP &tensor, const FieldValue *value); template <unsigned int N> @@ -172,6 +178,9 @@ class Test : public vespalib::TestApp { void requireThatLinguisticsAnnotationUsesDefaultDataTypes(); void requireThatPredicateIsPrinted(); void requireThatTensorIsNotConverted(); + void requireThatNonEmptyReferenceIsConvertedToStringWithId(); + void requireThatEmptyReferenceIsConvertedToEmptyString(); + void requireThatReferenceInCompositeTypeEmitsSlimeData(); const DocumentType &getDocType() const { return *_documentType; } Document makeDocument(); StringFieldValue annotateTerm(const string &term); @@ -186,6 +195,11 @@ public: DocumenttypesConfig getDocumenttypesConfig() { using namespace document::config_builder; DocumenttypesConfigBuilderHelper builder; + const int ref_target_doctype_id = 1234; + const int ref_type_id = 5678; + builder.document(ref_target_doctype_id, "target_dummy_document", + Struct("target_dummy_document.header"), + Struct("target_dummy_document.body")); builder.document(42, "indexingdocument", Struct("indexingdocument.header") .addField("empty", DataType::T_STRING) @@ -208,8 +222,12 @@ DocumenttypesConfig getDocumenttypesConfig() { .addField("float", DataType::T_FLOAT) .addField("chinese", DataType::T_STRING) .addField("predicate", DataType::T_PREDICATE) - .addField("tensor", DataType::T_TENSOR), - Struct("indexingdocument.body")); + .addField("tensor", DataType::T_TENSOR) + .addField("ref", ref_type_id) + .addField("nested", Struct("indexingdocument.header.nested") + .addField("inner_ref", ref_type_id)), + Struct("indexingdocument.body")) + .referenceType(ref_type_id, ref_target_doctype_id); return builder.config(); } @@ -247,6 +265,9 @@ Test::Main() TEST_CALL(requireThatLinguisticsAnnotationUsesDefaultDataTypes()); TEST_CALL(requireThatPredicateIsPrinted()); TEST_CALL(requireThatTensorIsNotConverted()); + TEST_CALL(requireThatNonEmptyReferenceIsConvertedToStringWithId()); + TEST_CALL(requireThatEmptyReferenceIsConvertedToEmptyString()); + TEST_CALL(requireThatReferenceInCompositeTypeEmitsSlimeData()); TEST_DONE(); } @@ -427,7 +448,7 @@ void Test::checkData(const search::RawBuf &buf, const FieldValue *value) { const RawFieldValue *s = dynamic_cast<const RawFieldValue *>(value); ASSERT_TRUE(s); auto got = s->getAsRaw(); - EXPECT_EQUAL(buf.GetUsedLen(), got.second); + ASSERT_EQUAL(buf.GetUsedLen(), got.second); EXPECT_TRUE(memcmp(buf.GetDrainPos(), got.first, got.second) == 0); } @@ -683,6 +704,55 @@ Test::requireThatTensorIsNotConverted() true).get())); } +void Test::checkStringForAllConversions(const string& expected, const FieldValue* fv) { + ASSERT_TRUE(fv != nullptr); + for (bool use_slime : {true, false}) { + checkString(expected, SFC::convertSummaryField(false, *fv, use_slime).get()); + } +} + +const ReferenceDataType& Test::getAsRefType(const string& name) const { + return dynamic_cast<const ReferenceDataType&>(getDataType(name)); +} + +void Test::requireThatNonEmptyReferenceIsConvertedToStringWithId() { + Document doc(getDocType(), DocumentId("doc:scheme:")); + doc.setRepo(*_documentRepo); + doc.setValue("ref", ReferenceFieldValue( + getAsRefType("Reference<target_dummy_document>"), + DocumentId("id:ns:target_dummy_document::foo"))); + + checkStringForAllConversions("id:ns:target_dummy_document::foo", + doc.getValue("ref").get()); +} + +void Test::requireThatEmptyReferenceIsConvertedToEmptyString() { + Document doc(getDocType(), DocumentId("doc:scheme:")); + doc.setRepo(*_documentRepo); + doc.setValue("ref", ReferenceFieldValue( + getAsRefType("Reference<target_dummy_document>"))); + + checkStringForAllConversions("", doc.getValue("ref").get()); + +} + +// Own test for this to ensure that SlimeFiller code path is executed, +// as this only triggers for composite field types. +void Test::requireThatReferenceInCompositeTypeEmitsSlimeData() { + Document doc(getDocType(), DocumentId("doc:scheme:")); + doc.setRepo(*_documentRepo); + + StructFieldValue sfv(getDataType("indexingdocument.header.nested")); + sfv.setValue("inner_ref", ReferenceFieldValue( + getAsRefType("Reference<target_dummy_document>"), + DocumentId("id:ns:target_dummy_document::foo"))); + doc.setValue("nested", sfv); + + FieldBlock expect(R"({"inner_ref":"id:ns:target_dummy_document::foo"})"); + checkData(expect.binary, + SFC::convertSummaryField(false, *doc.getValue("nested"), true).get()); +} + } // namespace TEST_APPHOOK(Test); diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp index f14ffcd3f5f..12977ea4017 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp @@ -29,6 +29,7 @@ #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> #include <vespa/document/fieldvalue/annotationreferencefieldvalue.h> #include <vespa/document/fieldvalue/tensorfieldvalue.h> +#include <vespa/document/fieldvalue/referencefieldvalue.h> #include <vespa/searchcommon/common/schema.h> #include <vespa/searchlib/util/url.h> #include <vespa/vespalib/encoding/base64.h> @@ -79,6 +80,7 @@ using document::StructFieldValue; using document::WeightedSetDataType; using document::WeightedSetFieldValue; using document::TensorFieldValue; +using document::ReferenceFieldValue; using search::index::Schema; using search::util::URL; using std::make_pair; @@ -377,6 +379,12 @@ class JsonFiller : public ConstFieldValueVisitor { } } + void visit(const ReferenceFieldValue& value) override { + _json.appendString(value.hasValidDocumentId() + ? value.getDocumentId().toString() + : string()); + } + public: JsonFiller(bool markup, JSONWriter &json) : _json(json), _tokenize(markup) {} @@ -477,6 +485,12 @@ class SummaryFieldValueConverter : protected ConstFieldValueVisitor visitPrimitive(value); } + void visit(const ReferenceFieldValue& value) override { + if (value.hasValidDocumentId()) { + _str << value.getDocumentId().toString(); + } // else:: implicit empty string + } + public: SummaryFieldValueConverter(bool tokenize, FieldValueConverter &subConverter) : _str(), _tokenize(tokenize), @@ -641,6 +655,12 @@ class SlimeFiller : public ConstFieldValueVisitor { _inserter.insertData(vespalib::slime::Memory(s.peek(), s.size())); } + void visit(const ReferenceFieldValue& value) override { + _inserter.insertString(Memory(value.hasValidDocumentId() + ? value.getDocumentId().toString() + : string())); + } + public: SlimeFiller(Inserter &inserter, bool tokenize) : _inserter(inserter), _tokenize(tokenize) {} |