aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-03-01 16:45:26 +0000
committerGeir Storli <geirst@yahooinc.com>2023-03-01 16:45:26 +0000
commit22a7b628fb82db0b43ca6550cd29edc84b40cbcb (patch)
treec1c593ccc33fc38d0514ad29c5e9781e8cf53e78
parent31db1c671fac8e34f098f9d6d9797c2c1e05d433 (diff)
Always propagate the document type to the internal StructFieldValue.
After deserialization of an empty document the internal StructFieldValue would not have the document type. Then, if a StringFieldValue of that empty document was updated (e.g. by an AssignValueUpdate), later lazy deserialization of its annotations would crash as the document type was not present.
-rw-r--r--document/src/tests/documentupdatetestcase.cpp73
-rw-r--r--document/src/vespa/document/serialization/vespadocumentdeserializer.cpp1
2 files changed, 71 insertions, 3 deletions
diff --git a/document/src/tests/documentupdatetestcase.cpp b/document/src/tests/documentupdatetestcase.cpp
index 40f398ee93e..7a5d88d1013 100644
--- a/document/src/tests/documentupdatetestcase.cpp
+++ b/document/src/tests/documentupdatetestcase.cpp
@@ -1,14 +1,17 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/document/test/fieldvalue_helpers.h>
-#include <vespa/document/base/testdocman.h>
+#include <vespa/document/annotation/spanlist.h>
#include <vespa/document/base/exceptions.h>
-#include <vespa/document/datatype/tensor_data_type.h>
+#include <vespa/document/base/testdocman.h>
#include <vespa/document/datatype/documenttype.h>
+#include <vespa/document/datatype/tensor_data_type.h>
#include <vespa/document/fieldvalue/fieldvalues.h>
#include <vespa/document/fieldvalue/tensorfieldvalue.h>
#include <vespa/document/repo/configbuilder.h>
#include <vespa/document/repo/documenttyperepo.h>
+#include <vespa/document/repo/fixedtyperepo.h>
+#include <vespa/document/serialization/vespadocumentdeserializer.h>
#include <vespa/document/serialization/vespadocumentserializer.h>
#include <vespa/document/update/addvalueupdate.h>
#include <vespa/document/update/arithmeticvalueupdate.h>
@@ -26,8 +29,8 @@
#include <vespa/document/util/bytebuffer.h>
#include <vespa/eval/eval/simple_value.h>
#include <vespa/eval/eval/tensor_spec.h>
-#include <vespa/eval/eval/value.h>
#include <vespa/eval/eval/test/value_compare.h>
+#include <vespa/eval/eval/value.h>
#include <vespa/vespalib/objects/nbostream.h>
#include <vespa/vespalib/util/exception.h>
#include <vespa/vespalib/util/exceptions.h>
@@ -1329,4 +1332,68 @@ TEST(DocumentUpdateTest, array_element_update_for_invalid_index_is_ignored)
EXPECT_EQ(array_value, *result_array);
}
+struct UpdateToEmptyDocumentFixture {
+ std::unique_ptr<DocumentTypeRepo> repo;
+ const DocumentType& doc_type;
+ FixedTypeRepo fixed_repo;
+
+ UpdateToEmptyDocumentFixture()
+ : repo(make_repo()),
+ doc_type(*repo->getDocumentType("test")),
+ fixed_repo(*repo, doc_type)
+ {
+ }
+
+ std::unique_ptr<DocumentTypeRepo> make_repo() {
+ config_builder::DocumenttypesConfigBuilderHelper builder;
+ builder.document(222, "test",
+ Struct("test.header").addField("text", DataType::T_STRING),
+ Struct("test.body"));
+ return std::make_unique<DocumentTypeRepo>(builder.config());
+ }
+
+ Document::UP make_empty_doc() {
+ vespalib::nbostream stream;
+ {
+ Document doc(doc_type, DocumentId("id:test:test::0"));
+ VespaDocumentSerializer serializer(stream);
+ serializer.write(doc);
+ }
+ // This simulates that the document is read from e.g. the document store
+ return std::make_unique<Document>(*repo, stream);
+ }
+
+ DocumentUpdate::UP make_update() {
+ auto text = std::make_unique<StringFieldValue>("hello world");
+ auto span_list_up = std::make_unique<SpanList>();
+ auto span_list = span_list_up.get();
+ auto tree = std::make_unique<SpanTree>("my_span_tree", std::move(span_list_up));
+ tree->annotate(span_list->add(std::make_unique<Span>(0, 5)), *AnnotationType::TERM);
+ tree->annotate(span_list->add(std::make_unique<Span>(6, 3)), *AnnotationType::TERM);
+ StringFieldValue::SpanTrees trees;
+ trees.push_back(std::move(tree));
+ text->setSpanTrees(trees, fixed_repo);
+
+ auto result = std::make_unique<DocumentUpdate>(*repo, doc_type, DocumentId("id:test:test::0"));
+ result->addUpdate(FieldUpdate(doc_type.getField("text"))
+ .addUpdate(std::make_unique<AssignValueUpdate>(std::move(text))));
+ return result;
+ }
+};
+
+TEST(DocumentUpdateTest, string_field_annotations_can_be_deserialized_after_assign_update_to_empty_document)
+{
+ UpdateToEmptyDocumentFixture f;
+ auto doc = f.make_empty_doc();
+ auto update = f.make_update();
+ update->applyTo(*doc);
+ auto fv = doc->getValue("text");
+ auto& text = dynamic_cast<StringFieldValue&>(*fv);
+ // This uses both the DocumentTypeRepo and DocumentType in order to deserialize the annotations.
+ auto tree = text.getSpanTrees();
+ EXPECT_EQ("hello world", text.getValue());
+ ASSERT_EQ(1, tree.size());
+ ASSERT_EQ(2, tree[0]->numAnnotations());
+}
+
} // namespace document
diff --git a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp
index bbe4f5373cb..8b75c8758ee 100644
--- a/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp
+++ b/document/src/vespa/document/serialization/vespadocumentdeserializer.cpp
@@ -86,6 +86,7 @@ VespaDocumentDeserializer::readDocument(Document &value) {
value.getFields().reset();
}
value.setRepo(_repo.getDocumentTypeRepo());
+ value.getFields().setDocumentType(value.getType());
FixedTypeRepo repo(_repo.getDocumentTypeRepo(), value.getType());
VarScope<FixedTypeRepo> repo_scope(_repo, repo);