aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2017-02-02 15:17:54 +0100
committerTor Brede Vekterli <vekterli@yahoo-inc.com>2017-02-02 15:18:39 +0100
commit02ee9752e434e1b17e7f4606c41115b291ae9547 (patch)
tree5537ff49e7253448cb9a0470a4473c864715214f
parentcfd95b6168969077d945205e0dc9fd9f8a61f4a8 (diff)
Implement reference support for summary field conversion
-rw-r--r--document/src/tests/fieldvalue/referencefieldvalue_test.cpp11
-rw-r--r--document/src/tests/serialization/vespadocumentserializer_test.cpp6
-rw-r--r--document/src/vespa/document/config/documenttypes.def4
-rw-r--r--document/src/vespa/document/fieldvalue/referencefieldvalue.cpp6
-rw-r--r--document/src/vespa/document/fieldvalue/referencefieldvalue.h1
-rw-r--r--searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp76
-rw-r--r--searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp20
7 files changed, 117 insertions, 7 deletions
diff --git a/document/src/tests/fieldvalue/referencefieldvalue_test.cpp b/document/src/tests/fieldvalue/referencefieldvalue_test.cpp
index dc9430f0c11..f1fdfe5f97f 100644
--- a/document/src/tests/fieldvalue/referencefieldvalue_test.cpp
+++ b/document/src/tests/fieldvalue/referencefieldvalue_test.cpp
@@ -130,11 +130,22 @@ TEST_F("clone()ing creates new instance with same ID and type", Fixture) {
std::unique_ptr<ReferenceFieldValue> cloned(src.clone());
ASSERT_TRUE(cloned.get() != nullptr);
+ ASSERT_TRUE(cloned->hasValidDocumentId());
EXPECT_EQUAL(src.getDocumentId(), cloned->getDocumentId());
EXPECT_EQUAL(src.getDataType(), cloned->getDataType());
EXPECT_TRUE(cloned->hasChanged());
}
+TEST_F("Can clone() value without document ID", Fixture) {
+ ReferenceFieldValue src(f.refType);
+
+ std::unique_ptr<ReferenceFieldValue> cloned(src.clone());
+ ASSERT_TRUE(cloned.get() != nullptr);
+ EXPECT_FALSE(cloned->hasValidDocumentId());
+ EXPECT_EQUAL(src.getDataType(), cloned->getDataType());
+ EXPECT_TRUE(cloned->hasChanged());
+}
+
TEST_F("compare() orders first on type ID, then on document ID", Fixture) {
// foo type has id 12345
ReferenceFieldValue fvType1Id1(f.refType, DocumentId("id:ns:foo::AA"));
diff --git a/document/src/tests/serialization/vespadocumentserializer_test.cpp b/document/src/tests/serialization/vespadocumentserializer_test.cpp
index 4d30fce7ef1..a72d69ad0c3 100644
--- a/document/src/tests/serialization/vespadocumentserializer_test.cpp
+++ b/document/src/tests/serialization/vespadocumentserializer_test.cpp
@@ -886,7 +886,7 @@ struct RefFixture {
return dynamic_cast<const ReferenceDataType&>(*raw_type);
}
- void roundtripSerialize(const ReferenceFieldValue& src, ReferenceFieldValue& dest) {
+ void roundtrip_serialize(const ReferenceFieldValue& src, ReferenceFieldValue& dest) {
nbostream stream;
VespaDocumentSerializer serializer(stream);
serializer.write(src);
@@ -912,7 +912,7 @@ TEST_F("ReferenceFieldValue with ID can be roundtrip serialized", RefFixture) {
TEST_F("Empty ReferenceFieldValue has changed-flag cleared after deserialization", RefFixture) {
ReferenceFieldValue src(f.ref_type());
ReferenceFieldValue dest(f.ref_type());
- f.roundtripSerialize(src, dest);
+ f.roundtrip_serialize(src, dest);
EXPECT_FALSE(dest.hasChanged());
}
@@ -921,7 +921,7 @@ TEST_F("ReferenceFieldValue with ID has changed-flag cleared after deserializati
ReferenceFieldValue src(
f.ref_type(), DocumentId("id:ns:" + doc_name + "::foo"));
ReferenceFieldValue dest(f.ref_type());
- f.roundtripSerialize(src, dest);
+ f.roundtrip_serialize(src, dest);
EXPECT_FALSE(dest.hasChanged());
}
diff --git a/document/src/vespa/document/config/documenttypes.def b/document/src/vespa/document/config/documenttypes.def
index 5b820213e3e..e8f045a3747 100644
--- a/document/src/vespa/document/config/documenttypes.def
+++ b/document/src/vespa/document/config/documenttypes.def
@@ -101,6 +101,10 @@ documenttype[].annotationtype[].inherits[].id int
## Field sets
documenttype[].fieldsets{}.fields[] string
+## ID of reference type. This is a regular data type, but is kept in its own
+## array to avoid polluting the existing datatype array with a new default
+## field value.
documenttype[].referencetype[].id int
+## Numeric ID of the document type instances of the reference point to.
documenttype[].referencetype[].target_type_id int
diff --git a/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp b/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp
index 3e1c7659870..76b99e095e9 100644
--- a/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp
+++ b/document/src/vespa/document/fieldvalue/referencefieldvalue.cpp
@@ -80,7 +80,11 @@ void ReferenceFieldValue::setDeserializedDocumentId(const DocumentId& id) {
ReferenceFieldValue* ReferenceFieldValue::clone() const {
assert(_dataType != nullptr);
- return new ReferenceFieldValue(*_dataType, _documentId);
+ if (hasValidDocumentId()) {
+ return new ReferenceFieldValue(*_dataType, _documentId);
+ } else {
+ return new ReferenceFieldValue(*_dataType);
+ }
}
int ReferenceFieldValue::compare(const FieldValue& rhs) const {
diff --git a/document/src/vespa/document/fieldvalue/referencefieldvalue.h b/document/src/vespa/document/fieldvalue/referencefieldvalue.h
index ff7499df321..be8801cd4d2 100644
--- a/document/src/vespa/document/fieldvalue/referencefieldvalue.h
+++ b/document/src/vespa/document/fieldvalue/referencefieldvalue.h
@@ -10,6 +10,7 @@ namespace document {
class ReferenceFieldValue : public FieldValue {
const ReferenceDataType* _dataType;
+ // TODO wrap in std::optional once available.
DocumentId _documentId;
bool _altered;
public:
diff --git a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp
index c2659d5b06c..8f9e0836191 100644
--- a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp
+++ b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp
@@ -15,6 +15,7 @@
#include <vespa/document/datatype/structdatatype.h>
#include <vespa/document/datatype/urldatatype.h>
#include <vespa/document/datatype/weightedsetdatatype.h>
+#include <vespa/document/datatype/referencedatatype.h>
#include <vespa/document/fieldvalue/arrayfieldvalue.h>
#include <vespa/document/fieldvalue/bytefieldvalue.h>
#include <vespa/document/fieldvalue/document.h>
@@ -29,6 +30,7 @@
#include <vespa/document/fieldvalue/structfieldvalue.h>
#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
#include <vespa/document/fieldvalue/tensorfieldvalue.h>
+#include <vespa/document/fieldvalue/referencefieldvalue.h>
#include <vespa/document/predicate/predicate.h>
#include <vespa/document/repo/configbuilder.h>
#include <vespa/document/repo/documenttyperepo.h>
@@ -84,6 +86,8 @@ using document::UrlDataType;
using document::WeightedSetDataType;
using document::WeightedSetFieldValue;
using document::TensorFieldValue;
+using document::ReferenceDataType;
+using document::ReferenceFieldValue;
using search::index::Schema;
using vespalib::Slime;
using vespalib::slime::Cursor;
@@ -132,6 +136,7 @@ class Test : public vespalib::TestApp {
void tearDown();
const DataType &getDataType(const string &name) const;
+ const ReferenceDataType& getAsRefType(const string& name) const;
template <typename T>
T getValueAs(const string &field_name, const Document &doc);
@@ -149,6 +154,7 @@ class Test : public vespalib::TestApp {
cvtSummaryAs(bool markup, const FieldValue::UP &fv);
void checkString(const string &str, const FieldValue *value);
+ void checkStringForAllConversions(const string& expected, const FieldValue* fv);
void checkData(const search::RawBuf &data, const FieldValue *value);
void checkTensor(const Tensor::UP &tensor, const FieldValue *value);
template <unsigned int N>
@@ -172,6 +178,9 @@ class Test : public vespalib::TestApp {
void requireThatLinguisticsAnnotationUsesDefaultDataTypes();
void requireThatPredicateIsPrinted();
void requireThatTensorIsNotConverted();
+ void requireThatNonEmptyReferenceIsConvertedToStringWithId();
+ void requireThatEmptyReferenceIsConvertedToEmptyString();
+ void requireThatReferenceInCompositeTypeEmitsSlimeData();
const DocumentType &getDocType() const { return *_documentType; }
Document makeDocument();
StringFieldValue annotateTerm(const string &term);
@@ -186,6 +195,11 @@ public:
DocumenttypesConfig getDocumenttypesConfig() {
using namespace document::config_builder;
DocumenttypesConfigBuilderHelper builder;
+ const int ref_target_doctype_id = 1234;
+ const int ref_type_id = 5678;
+ builder.document(ref_target_doctype_id, "target_dummy_document",
+ Struct("target_dummy_document.header"),
+ Struct("target_dummy_document.body"));
builder.document(42, "indexingdocument",
Struct("indexingdocument.header")
.addField("empty", DataType::T_STRING)
@@ -208,8 +222,12 @@ DocumenttypesConfig getDocumenttypesConfig() {
.addField("float", DataType::T_FLOAT)
.addField("chinese", DataType::T_STRING)
.addField("predicate", DataType::T_PREDICATE)
- .addField("tensor", DataType::T_TENSOR),
- Struct("indexingdocument.body"));
+ .addField("tensor", DataType::T_TENSOR)
+ .addField("ref", ref_type_id)
+ .addField("nested", Struct("indexingdocument.header.nested")
+ .addField("inner_ref", ref_type_id)),
+ Struct("indexingdocument.body"))
+ .referenceType(ref_type_id, ref_target_doctype_id);
return builder.config();
}
@@ -247,6 +265,9 @@ Test::Main()
TEST_CALL(requireThatLinguisticsAnnotationUsesDefaultDataTypes());
TEST_CALL(requireThatPredicateIsPrinted());
TEST_CALL(requireThatTensorIsNotConverted());
+ TEST_CALL(requireThatNonEmptyReferenceIsConvertedToStringWithId());
+ TEST_CALL(requireThatEmptyReferenceIsConvertedToEmptyString());
+ TEST_CALL(requireThatReferenceInCompositeTypeEmitsSlimeData());
TEST_DONE();
}
@@ -427,7 +448,7 @@ void Test::checkData(const search::RawBuf &buf, const FieldValue *value) {
const RawFieldValue *s = dynamic_cast<const RawFieldValue *>(value);
ASSERT_TRUE(s);
auto got = s->getAsRaw();
- EXPECT_EQUAL(buf.GetUsedLen(), got.second);
+ ASSERT_EQUAL(buf.GetUsedLen(), got.second);
EXPECT_TRUE(memcmp(buf.GetDrainPos(), got.first, got.second) == 0);
}
@@ -683,6 +704,55 @@ Test::requireThatTensorIsNotConverted()
true).get()));
}
+void Test::checkStringForAllConversions(const string& expected, const FieldValue* fv) {
+ ASSERT_TRUE(fv != nullptr);
+ for (bool use_slime : {true, false}) {
+ checkString(expected, SFC::convertSummaryField(false, *fv, use_slime).get());
+ }
+}
+
+const ReferenceDataType& Test::getAsRefType(const string& name) const {
+ return dynamic_cast<const ReferenceDataType&>(getDataType(name));
+}
+
+void Test::requireThatNonEmptyReferenceIsConvertedToStringWithId() {
+ Document doc(getDocType(), DocumentId("doc:scheme:"));
+ doc.setRepo(*_documentRepo);
+ doc.setValue("ref", ReferenceFieldValue(
+ getAsRefType("Reference<target_dummy_document>"),
+ DocumentId("id:ns:target_dummy_document::foo")));
+
+ checkStringForAllConversions("id:ns:target_dummy_document::foo",
+ doc.getValue("ref").get());
+}
+
+void Test::requireThatEmptyReferenceIsConvertedToEmptyString() {
+ Document doc(getDocType(), DocumentId("doc:scheme:"));
+ doc.setRepo(*_documentRepo);
+ doc.setValue("ref", ReferenceFieldValue(
+ getAsRefType("Reference<target_dummy_document>")));
+
+ checkStringForAllConversions("", doc.getValue("ref").get());
+
+}
+
+// Own test for this to ensure that SlimeFiller code path is executed,
+// as this only triggers for composite field types.
+void Test::requireThatReferenceInCompositeTypeEmitsSlimeData() {
+ Document doc(getDocType(), DocumentId("doc:scheme:"));
+ doc.setRepo(*_documentRepo);
+
+ StructFieldValue sfv(getDataType("indexingdocument.header.nested"));
+ sfv.setValue("inner_ref", ReferenceFieldValue(
+ getAsRefType("Reference<target_dummy_document>"),
+ DocumentId("id:ns:target_dummy_document::foo")));
+ doc.setValue("nested", sfv);
+
+ FieldBlock expect(R"({"inner_ref":"id:ns:target_dummy_document::foo"})");
+ checkData(expect.binary,
+ SFC::convertSummaryField(false, *doc.getValue("nested"), true).get());
+}
+
} // namespace
TEST_APPHOOK(Test);
diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp
index f14ffcd3f5f..12977ea4017 100644
--- a/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp
+++ b/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp
@@ -29,6 +29,7 @@
#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
#include <vespa/document/fieldvalue/annotationreferencefieldvalue.h>
#include <vespa/document/fieldvalue/tensorfieldvalue.h>
+#include <vespa/document/fieldvalue/referencefieldvalue.h>
#include <vespa/searchcommon/common/schema.h>
#include <vespa/searchlib/util/url.h>
#include <vespa/vespalib/encoding/base64.h>
@@ -79,6 +80,7 @@ using document::StructFieldValue;
using document::WeightedSetDataType;
using document::WeightedSetFieldValue;
using document::TensorFieldValue;
+using document::ReferenceFieldValue;
using search::index::Schema;
using search::util::URL;
using std::make_pair;
@@ -377,6 +379,12 @@ class JsonFiller : public ConstFieldValueVisitor {
}
}
+ void visit(const ReferenceFieldValue& value) override {
+ _json.appendString(value.hasValidDocumentId()
+ ? value.getDocumentId().toString()
+ : string());
+ }
+
public:
JsonFiller(bool markup, JSONWriter &json)
: _json(json), _tokenize(markup) {}
@@ -477,6 +485,12 @@ class SummaryFieldValueConverter : protected ConstFieldValueVisitor
visitPrimitive(value);
}
+ void visit(const ReferenceFieldValue& value) override {
+ if (value.hasValidDocumentId()) {
+ _str << value.getDocumentId().toString();
+ } // else:: implicit empty string
+ }
+
public:
SummaryFieldValueConverter(bool tokenize, FieldValueConverter &subConverter)
: _str(), _tokenize(tokenize),
@@ -641,6 +655,12 @@ class SlimeFiller : public ConstFieldValueVisitor {
_inserter.insertData(vespalib::slime::Memory(s.peek(), s.size()));
}
+ void visit(const ReferenceFieldValue& value) override {
+ _inserter.insertString(Memory(value.hasValidDocumentId()
+ ? value.getDocumentId().toString()
+ : string()));
+ }
+
public:
SlimeFiller(Inserter &inserter, bool tokenize)
: _inserter(inserter), _tokenize(tokenize) {}