aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchcore/CMakeLists.txt1
-rw-r--r--searchcore/src/tests/proton/attribute/document_field_extractor/CMakeLists.txt9
-rw-r--r--searchcore/src/tests/proton/attribute/document_field_extractor/document_field_extractor_test.cpp380
-rw-r--r--searchcore/src/vespa/searchcore/proton/attribute/CMakeLists.txt1
-rw-r--r--searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp17
-rw-r--r--searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.cpp260
-rw-r--r--searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.h55
7 files changed, 715 insertions, 8 deletions
diff --git a/searchcore/CMakeLists.txt b/searchcore/CMakeLists.txt
index 36e76b02b0b..b1570c10221 100644
--- a/searchcore/CMakeLists.txt
+++ b/searchcore/CMakeLists.txt
@@ -68,6 +68,7 @@ vespa_define_module(
src/tests/proton/attribute/attribute_populator
src/tests/proton/attribute/attribute_usage_filter
src/tests/proton/attribute/attributes_state_explorer
+ src/tests/proton/attribute/document_field_extractor
src/tests/proton/attribute/document_field_populator
src/tests/proton/attribute/exclusive_attribute_read_accessor
src/tests/proton/attribute/imported_attributes_context
diff --git a/searchcore/src/tests/proton/attribute/document_field_extractor/CMakeLists.txt b/searchcore/src/tests/proton/attribute/document_field_extractor/CMakeLists.txt
new file mode 100644
index 00000000000..7d7b798febe
--- /dev/null
+++ b/searchcore/src/tests/proton/attribute/document_field_extractor/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchcore_document_field_extractor_test_app TEST
+ SOURCES
+ document_field_extractor_test.cpp
+ DEPENDS
+ searchcore_attribute
+ searchcore_pcommon
+)
+vespa_add_test(NAME searchcore_document_field_extractor_test_app COMMAND searchcore_document_field_extractor_test_app)
diff --git a/searchcore/src/tests/proton/attribute/document_field_extractor/document_field_extractor_test.cpp b/searchcore/src/tests/proton/attribute/document_field_extractor/document_field_extractor_test.cpp
new file mode 100644
index 00000000000..e8ec5a0d953
--- /dev/null
+++ b/searchcore/src/tests/proton/attribute/document_field_extractor/document_field_extractor_test.cpp
@@ -0,0 +1,380 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/document/base/documentid.h>
+#include <vespa/document/base/exceptions.h>
+#include <vespa/document/base/field.h>
+#include <vespa/document/base/fieldpath.h>
+#include <vespa/document/datatype/datatypes.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/intfieldvalue.h>
+#include <vespa/document/fieldvalue/mapfieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/structfieldvalue.h>
+#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
+#include <vespa/searchcore/proton/attribute/document_field_extractor.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using document::Field;
+using document::DataType;
+using document::DocumentType;
+using document::StructDataType;
+using document::ArrayDataType;
+using document::WeightedSetDataType;
+using document::MapDataType;
+using document::StructFieldValue;
+using document::ArrayFieldValue;
+using document::WeightedSetFieldValue;
+using document::IntFieldValue;
+using document::StringFieldValue;
+using document::MapFieldValue;
+using document::Document;
+using document::DocumentId;
+using document::FieldPath;
+using document::FieldValue;
+using document::FieldNotFoundException;
+using proton::DocumentFieldExtractor;
+
+namespace
+{
+
+const ArrayDataType arrayTypeInt(*DataType::INT);
+const ArrayDataType arrayTypeString(*DataType::STRING);
+const WeightedSetDataType weightedSetTypeInt(*DataType::INT, false, false);
+const WeightedSetDataType weightedSetTypeString(*DataType::STRING, false, false);
+const int32_t noInt(std::numeric_limits<int32_t>::min());
+const vespalib::string noString("");
+
+std::unique_ptr<FieldValue>
+makeIntArray(const std::vector<int32_t> &array)
+{
+ auto result = std::make_unique<ArrayFieldValue>(arrayTypeInt);
+ for (const auto &elem : array) {
+ result->append(std::make_unique<IntFieldValue>(elem));
+ }
+ return result;
+}
+
+std::unique_ptr<FieldValue>
+makeStringArray(const std::vector<vespalib::string> &array)
+{
+ auto result = std::make_unique<ArrayFieldValue>(arrayTypeString);
+ for (const auto &elem : array) {
+ result->append(std::make_unique<StringFieldValue>(elem));
+ }
+ return result;
+}
+
+std::unique_ptr<FieldValue>
+makeIntWeightedSet(const std::vector<std::pair<int32_t, int32_t>> &array)
+{
+ auto result = std::make_unique<WeightedSetFieldValue>(weightedSetTypeInt);
+ for (const auto &elem : array) {
+ result->add(IntFieldValue(elem.first), elem.second);
+ }
+ return result;
+}
+
+std::unique_ptr<FieldValue>
+makeStringWeightedSet(const std::vector<std::pair<vespalib::string, int32_t>> &array)
+{
+ auto result = std::make_unique<WeightedSetFieldValue>(weightedSetTypeString);
+ for (const auto &elem : array) {
+ result->add(StringFieldValue(elem.first), elem.second);
+ }
+ return result;
+}
+
+template <typename F1, typename F2>
+void
+checkFieldPathChange(F1 f1, F2 f2, const vespalib::string &path, bool same)
+{
+ FieldPath fieldPath1 = f1.makeFieldPath(path);
+ FieldPath fieldPath2 = f2.makeFieldPath(path);
+ EXPECT_TRUE(!fieldPath1.empty());
+ EXPECT_TRUE(!fieldPath2.empty());
+ EXPECT_TRUE(DocumentFieldExtractor::isSupported(fieldPath1));
+ EXPECT_TRUE(DocumentFieldExtractor::isSupported(fieldPath2));
+ EXPECT_EQUAL(same, DocumentFieldExtractor::isCompatible(fieldPath1, fieldPath2));
+}
+
+}
+
+struct FixtureBase
+{
+ DocumentType type;
+ const Field tagField;
+ const Field nameField;
+
+ FixtureBase(bool byteTag)
+ : type("test"),
+ tagField("tag", 1, byteTag ? *DataType::BYTE : *DataType::INT, true),
+ nameField("name", 2, *DataType::STRING, true)
+ {
+ }
+
+ ~FixtureBase();
+
+ std::unique_ptr<Document>
+ makeDoc()
+ {
+ return std::make_unique<Document>(type, DocumentId("id::test::1"));
+ }
+
+ FieldPath
+ makeFieldPath(const vespalib::string &path)
+ {
+ FieldPath fieldPath;
+ try {
+ type.buildFieldPath(fieldPath, path);
+ } catch (FieldNotFoundException &) {
+ fieldPath = FieldPath();
+ }
+ if (!DocumentFieldExtractor::isSupported(fieldPath)) {
+ fieldPath = FieldPath();
+ }
+ return fieldPath;
+ }
+
+ void
+ assertExtracted(DocumentFieldExtractor &extractor,
+ const vespalib::string &path,
+ std::unique_ptr<FieldValue> expected) {
+ FieldPath fieldPath(makeFieldPath(path));
+ std::unique_ptr<FieldValue> fv = extractor.getFieldValue(fieldPath);
+ if (expected) {
+ ASSERT_TRUE(fv);
+ EXPECT_EQUAL(*expected, *fv);
+ } else {
+ EXPECT_TRUE(!fv);
+ }
+ }
+};
+
+FixtureBase::~FixtureBase() = default;
+
+struct SimpleFixture : public FixtureBase
+{
+ SimpleFixture(bool byteTag = false)
+ : FixtureBase(byteTag)
+ {
+ type.addField(tagField);
+ type.addField(nameField);
+ }
+};
+
+TEST_F("require that simple fields give simple values", SimpleFixture)
+{
+ auto doc = f.makeDoc();
+ doc->setValue(f.tagField, IntFieldValue(200));
+ doc->setValue(f.nameField, StringFieldValue("name200b"));
+ DocumentFieldExtractor extractor(*doc);
+ TEST_DO(f.assertExtracted(extractor, "tag", std::make_unique<IntFieldValue>(200)));
+ TEST_DO(f.assertExtracted(extractor, "name", std::make_unique<StringFieldValue>("name200b")));
+}
+
+struct ArrayFixture : public FixtureBase
+{
+ const ArrayDataType tagArrayFieldType;
+ const Field tagArrayField;
+ const ArrayDataType valueArrayFieldType;
+ const Field valueArrayField;
+
+ ArrayFixture(bool byteTag = false)
+ : FixtureBase(byteTag),
+ tagArrayFieldType(tagField.getDataType()),
+ tagArrayField("tag", tagArrayFieldType, true),
+ valueArrayFieldType(nameField.getDataType()),
+ valueArrayField("val", valueArrayFieldType, true)
+ {
+ type.addField(tagArrayField);
+ type.addField(valueArrayField);
+ }
+
+ ~ArrayFixture();
+};
+
+ArrayFixture::~ArrayFixture() = default;
+
+TEST_F("require that array fields give array values", ArrayFixture)
+{
+ auto doc = f.makeDoc();
+ doc->setValue(f.tagArrayField, *makeIntArray({ 300, 301 }));
+ doc->setValue(f.valueArrayField, *makeStringArray({"v500", "v502"}));
+ DocumentFieldExtractor extractor(*doc);
+ TEST_DO(f.assertExtracted(extractor, "tag", makeIntArray({ 300, 301})));
+ TEST_DO(f.assertExtracted(extractor, "val", makeStringArray({"v500", "v502"})));
+}
+
+struct WeightedSetFixture : public FixtureBase
+{
+ const WeightedSetDataType tagWeightedSetFieldType;
+ const Field tagWeightedSetField;
+ const WeightedSetDataType valueWeightedSetFieldType;
+ const Field valueWeightedSetField;
+
+ WeightedSetFixture(bool byteTag = false)
+ : FixtureBase(byteTag),
+ tagWeightedSetFieldType(tagField.getDataType(), false, false),
+ tagWeightedSetField("tag", tagWeightedSetFieldType, true),
+ valueWeightedSetFieldType(*DataType::STRING, false, false),
+ valueWeightedSetField("val", valueWeightedSetFieldType, true)
+ {
+ type.addField(tagWeightedSetField);
+ type.addField(valueWeightedSetField);
+ }
+
+ ~WeightedSetFixture();
+};
+
+WeightedSetFixture::~WeightedSetFixture() = default;
+
+TEST_F("require that weighted set fields give weighted set values", WeightedSetFixture)
+{
+ auto doc = f.makeDoc();
+ doc->setValue(f.tagWeightedSetField, *makeIntWeightedSet({{400, 10}, { 401, 13}}));
+ doc->setValue(f.valueWeightedSetField, *makeStringWeightedSet({{"600", 17}, {"604", 19}}));
+ DocumentFieldExtractor extractor(*doc);
+ TEST_DO(f.assertExtracted(extractor, "tag", makeIntWeightedSet({{ 400, 10}, {401, 13}})));
+ TEST_DO(f.assertExtracted(extractor, "val", makeStringWeightedSet({{"600", 17}, {"604", 19}})));
+}
+
+struct StructFixtureBase : public FixtureBase
+{
+ StructDataType structFieldType;
+
+ StructFixtureBase(bool byteTag)
+ : FixtureBase(byteTag),
+ structFieldType("struct")
+ {
+ structFieldType.addField(tagField);
+ structFieldType.addField(nameField);
+ }
+
+ std::unique_ptr<StructFieldValue>
+ makeStruct()
+ {
+ return std::make_unique<StructFieldValue>(structFieldType);
+ }
+
+ std::unique_ptr<StructFieldValue>
+ makeStruct(int tag, const vespalib::string &value)
+ {
+ auto ret = makeStruct();
+ ret->setValue(tagField, IntFieldValue(tag));
+ ret->setValue(nameField, StringFieldValue(value));
+ return ret;
+ }
+
+ std::unique_ptr<StructFieldValue>
+ makeStruct(int tag)
+ {
+ auto ret = makeStruct();
+ ret->setValue(tagField, IntFieldValue(tag));
+ return ret;
+ }
+
+ std::unique_ptr<StructFieldValue>
+ makeStruct(const vespalib::string &value)
+ {
+ auto ret = makeStruct();
+ ret->setValue(nameField, StringFieldValue(value));
+ return ret;
+ }
+};
+
+struct StructArrayFixture : public StructFixtureBase
+{
+ const ArrayDataType structArrayFieldType;
+ const Field structArrayField;
+
+ StructArrayFixture(bool byteTag = false)
+ : StructFixtureBase(byteTag),
+ structArrayFieldType(structFieldType),
+ structArrayField("s", 11, structArrayFieldType, true)
+ {
+ type.addField(structArrayField);
+ }
+
+ ~StructArrayFixture();
+};
+
+StructArrayFixture::~StructArrayFixture() = default;
+
+TEST_F("require that struct array field gives array values", StructArrayFixture)
+{
+ auto doc = f.makeDoc();
+ ArrayFieldValue structArrayFieldValue(f.structArrayFieldType);
+ structArrayFieldValue.add(*f.makeStruct(1, "name1"));
+ structArrayFieldValue.add(*f.makeStruct(2));
+ structArrayFieldValue.add(*f.makeStruct("name3"));
+ doc->setValue(f.structArrayField, structArrayFieldValue);
+ DocumentFieldExtractor extractor(*doc);
+ TEST_DO(f.assertExtracted(extractor, "s.tag", makeIntArray({ 1, 2, noInt })));
+ TEST_DO(f.assertExtracted(extractor, "s.name", makeStringArray({ "name1", noString, "name3" })));
+}
+
+struct StructMapFixture : public StructFixtureBase
+{
+ const MapDataType structMapFieldType;
+ const Field structMapField;
+
+ StructMapFixture(bool byteTag = false, bool byteKey = false)
+ : StructFixtureBase(byteTag),
+ structMapFieldType(byteKey ? *DataType::BYTE : *DataType::STRING, structFieldType),
+ structMapField("s", 12, structMapFieldType, true)
+ {
+ type.addField(structMapField);
+ }
+
+ ~StructMapFixture();
+};
+
+StructMapFixture::~StructMapFixture() = default;
+
+TEST_F("require that struct map field gives array values", StructMapFixture)
+{
+ auto doc = f.makeDoc();
+ MapFieldValue structMapFieldValue(f.structMapFieldType);
+ structMapFieldValue.put(StringFieldValue("m0"), *f.makeStruct(10, "name10"));
+ structMapFieldValue.put(StringFieldValue("m1"), *f.makeStruct(11));
+ structMapFieldValue.put(StringFieldValue("m2"), *f.makeStruct("name12"));
+ structMapFieldValue.put(StringFieldValue("m3"), *f.makeStruct());
+ doc->setValue(f.structMapField, structMapFieldValue);
+ DocumentFieldExtractor extractor(*doc);
+ TEST_DO(f.assertExtracted(extractor, "s.key", makeStringArray({ "m0", "m1", "m2", "m3" })));
+ TEST_DO(f.assertExtracted(extractor, "s.value.tag", makeIntArray({ 10, 11, noInt, noInt })));
+ TEST_DO(f.assertExtracted(extractor, "s.value.name", makeStringArray({ "name10", noString, "name12", noString })));
+}
+
+TEST_F("require that unknown field gives null value", FixtureBase(false))
+{
+ auto doc = f.makeDoc();
+ DocumentFieldExtractor extractor(*doc);
+ TEST_DO(f.assertExtracted(extractor, "unknown", std::unique_ptr<FieldValue>()));
+}
+
+TEST("require that type changes are detected")
+{
+ TEST_DO(checkFieldPathChange(SimpleFixture(false), SimpleFixture(false), "tag", true));
+ TEST_DO(checkFieldPathChange(SimpleFixture(false), SimpleFixture(true), "tag", false));
+ TEST_DO(checkFieldPathChange(ArrayFixture(false), ArrayFixture(false), "tag", true));
+ TEST_DO(checkFieldPathChange(ArrayFixture(false), ArrayFixture(true), "tag", false));
+ TEST_DO(checkFieldPathChange(SimpleFixture(false), ArrayFixture(false), "tag", false));
+ TEST_DO(checkFieldPathChange(WeightedSetFixture(false), WeightedSetFixture(false), "tag", true));
+ TEST_DO(checkFieldPathChange(WeightedSetFixture(false), WeightedSetFixture(true), "tag", false));
+ TEST_DO(checkFieldPathChange(SimpleFixture(false), WeightedSetFixture(false), "tag", false));
+ TEST_DO(checkFieldPathChange(ArrayFixture(false), WeightedSetFixture(false), "tag", false));
+ TEST_DO(checkFieldPathChange(StructArrayFixture(false), StructArrayFixture(false), "s.tag", true));
+ TEST_DO(checkFieldPathChange(StructArrayFixture(false), StructArrayFixture(true), "s.tag", false));
+ TEST_DO(checkFieldPathChange(StructMapFixture(false, false), StructMapFixture(false, false), "s.value.tag", true));
+ TEST_DO(checkFieldPathChange(StructMapFixture(false, false), StructMapFixture(true, false), "s.value.tag", false));
+ TEST_DO(checkFieldPathChange(StructMapFixture(false, false), StructMapFixture(false, true), "s.value.tag", false));
+ TEST_DO(checkFieldPathChange(StructMapFixture(false, false), StructMapFixture(false, false), "s.key", true));
+ TEST_DO(checkFieldPathChange(StructMapFixture(false, false), StructMapFixture(false, true), "s.key", false));
+}
+
+TEST_MAIN()
+{
+ TEST_RUN_ALL();
+}
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/attribute/CMakeLists.txt
index 5b6a9faa05d..30f6c2d92c2 100644
--- a/searchcore/src/vespa/searchcore/proton/attribute/CMakeLists.txt
+++ b/searchcore/src/vespa/searchcore/proton/attribute/CMakeLists.txt
@@ -23,6 +23,7 @@ vespa_add_library(searchcore_attribute STATIC
attributedisklayout.cpp
attributemanager.cpp
attributesconfigscout.cpp
+ document_field_extractor.cpp
document_field_populator.cpp
document_field_retriever.cpp
exclusive_attribute_read_accessor.cpp
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp
index a6329db2aee..0cb260ed9a8 100644
--- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp
+++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp
@@ -2,6 +2,7 @@
#include "attribute_writer.h"
#include "attributemanager.h"
+#include "document_field_extractor.h"
#include <vespa/document/base/exceptions.h>
#include <vespa/document/datatype/documenttype.h>
#include <vespa/searchcore/proton/attribute/imported_attributes_repo.h>
@@ -51,7 +52,9 @@ AttributeWriter::WriteContext::buildFieldPaths(const DocumentType &docType)
FieldPath fp;
try {
docType.buildFieldPath(fp, name);
- } catch (document::FieldNotFoundException & e) { }
+ } catch (document::FieldNotFoundException & e) {
+ fp = FieldPath();
+ }
assert(fieldId < _fieldPaths.size());
_fieldPaths[fieldId] = std::move(fp);
@@ -200,12 +203,12 @@ class PutTask : public vespalib::Executor::Task
std::remove_reference_t<AttributeWriter::OnWriteDoneType> _onWriteDone;
std::vector<FieldValue::UP> _fieldValues;
public:
- PutTask(const AttributeWriter::WriteContext &wc, SerialNum serialNum, const Document &doc, uint32_t lid, bool immediateCommit, AttributeWriter::OnWriteDoneType onWriteDone);
+ PutTask(const AttributeWriter::WriteContext &wc, SerialNum serialNum, DocumentFieldExtractor &fieldExtractor, uint32_t lid, bool immediateCommit, AttributeWriter::OnWriteDoneType onWriteDone);
virtual ~PutTask() override;
virtual void run() override;
};
-PutTask::PutTask(const AttributeWriter::WriteContext &wc, SerialNum serialNum, const Document &doc, uint32_t lid, bool immediateCommit, AttributeWriter::OnWriteDoneType onWriteDone)
+PutTask::PutTask(const AttributeWriter::WriteContext &wc, SerialNum serialNum, DocumentFieldExtractor &fieldExtractor, uint32_t lid, bool immediateCommit, AttributeWriter::OnWriteDoneType onWriteDone)
: _wc(wc),
_serialNum(serialNum),
_lid(lid),
@@ -215,10 +218,7 @@ PutTask::PutTask(const AttributeWriter::WriteContext &wc, SerialNum serialNum, c
const auto &fieldPaths = _wc.getFieldPaths();
_fieldValues.reserve(fieldPaths.size());
for (const auto &fieldPath : fieldPaths) {
- FieldValue::UP fv;
- if (!fieldPath.empty()) {
- fv = doc.getNestedFieldValue(fieldPath.getFullRange());
- }
+ FieldValue::UP fv = fieldExtractor.getFieldValue(fieldPath);
_fieldValues.emplace_back(std::move(fv));
}
}
@@ -382,8 +382,9 @@ void
AttributeWriter::internalPut(SerialNum serialNum, const Document &doc, DocumentIdT lid,
bool immediateCommit, OnWriteDoneType onWriteDone)
{
+ DocumentFieldExtractor extractor(doc);
for (const auto &wc : _writeContexts) {
- auto putTask = std::make_unique<PutTask>(wc, serialNum, doc, lid, immediateCommit, onWriteDone);
+ auto putTask = std::make_unique<PutTask>(wc, serialNum, extractor, lid, immediateCommit, onWriteDone);
_attributeFieldWriter.executeTask(wc.getExecutorId(), std::move(putTask));
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.cpp b/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.cpp
new file mode 100644
index 00000000000..143441eaae9
--- /dev/null
+++ b/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.cpp
@@ -0,0 +1,260 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "document_field_extractor.h"
+#include <vespa/document/datatype/arraydatatype.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/bytefieldvalue.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/doublefieldvalue.h>
+#include <vespa/document/fieldvalue/floatfieldvalue.h>
+#include <vespa/document/fieldvalue/intfieldvalue.h>
+#include <vespa/document/fieldvalue/longfieldvalue.h>
+#include <vespa/document/fieldvalue/shortfieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/structfieldvalue.h>
+#include <vespa/document/fieldvalue/mapfieldvalue.h>
+#include <vespa/searchcommon/common/undefinedvalues.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+using document::FieldValue;
+using document::ByteFieldValue;
+using document::ShortFieldValue;
+using document::IntFieldValue;
+using document::LongFieldValue;
+using document::FloatFieldValue;
+using document::DoubleFieldValue;
+using document::StringFieldValue;
+using document::StructFieldValue;
+using document::MapFieldValue;
+using document::DataType;
+using document::ArrayDataType;
+using document::ArrayFieldValue;
+using document::Document;
+using document::FieldPath;
+using document::FieldPathEntry;
+using document::FieldValueVisitor;
+using vespalib::IllegalStateException;
+using vespalib::make_string;
+using search::attribute::getUndefined;
+
+namespace proton {
+
+namespace {
+
+class SetUndefinedValueVisitor : public FieldValueVisitor
+{
+ void visit(document::AnnotationReferenceFieldValue &) override { }
+ void visit(ArrayFieldValue &) override { }
+ void visit(ByteFieldValue &value) override { value = getUndefined<int8_t>(); }
+ void visit(Document &) override { }
+ void visit(DoubleFieldValue &value) override { value = getUndefined<double>(); }
+ void visit(FloatFieldValue &value) override { value = getUndefined<float>(); }
+ void visit(IntFieldValue &value) override { value = getUndefined<int32_t>(); }
+ void visit(LongFieldValue &value) override { value = getUndefined<int64_t>(); }
+ void visit(MapFieldValue &) override { }
+ void visit(document::PredicateFieldValue &) override { }
+ void visit(document::RawFieldValue &) override { }
+ void visit(ShortFieldValue &value) override { value = getUndefined<int16_t>(); }
+ void visit(StringFieldValue &) override { }
+ void visit(StructFieldValue &) override { }
+ void visit(document::WeightedSetFieldValue &) override { }
+ void visit(document::TensorFieldValue &) override { }
+ void visit(document::ReferenceFieldValue &) override { }
+};
+
+SetUndefinedValueVisitor setUndefinedValueVisitor;
+
+const ArrayDataType arrayTypeByte(*DataType::BYTE);
+const ArrayDataType arrayTypeShort(*DataType::SHORT);
+const ArrayDataType arrayTypeInt(*DataType::INT);
+const ArrayDataType arrayTypeLong(*DataType::LONG);
+const ArrayDataType arrayTypeFloat(*DataType::FLOAT);
+const ArrayDataType arrayTypeDouble(*DataType::DOUBLE);
+const ArrayDataType arrayTypeString(*DataType::STRING);
+
+const DataType *
+getArrayType(const DataType &fieldType)
+{
+ switch (fieldType.getId()) {
+ case DataType::Type::T_BYTE:
+ return &arrayTypeByte;
+ case DataType::Type::T_SHORT:
+ return &arrayTypeShort;
+ case DataType::Type::T_INT:
+ return &arrayTypeInt;
+ case DataType::Type::T_LONG:
+ return &arrayTypeLong;
+ case DataType::Type::T_FLOAT:
+ return &arrayTypeFloat;
+ case DataType::Type::T_DOUBLE:
+ return &arrayTypeDouble;
+ case DataType::Type::T_STRING:
+ return &arrayTypeString;
+ default:
+ return nullptr;
+ }
+}
+
+std::unique_ptr<ArrayFieldValue>
+makeArray(const FieldPathEntry &fieldPathEntry, size_t size)
+{
+ const auto arrayType = getArrayType(fieldPathEntry.getDataType());
+ auto array = std::make_unique<ArrayFieldValue>(*arrayType);
+ array->resize(size);
+ return array;
+}
+
+bool
+checkInherits(const FieldValue &fieldValue, unsigned id)
+{
+ const vespalib::Identifiable::RuntimeClass &rc = fieldValue.getClass();
+ return rc.inherits(id);
+}
+
+}
+
+DocumentFieldExtractor::DocumentFieldExtractor(const Document &doc)
+ : _doc(doc),
+ _cachedFieldValues()
+{
+}
+
+DocumentFieldExtractor::~DocumentFieldExtractor() = default;
+
+bool
+DocumentFieldExtractor::isSupported(const FieldPath &fieldPath)
+{
+ if (!fieldPath.empty() &&
+ fieldPath[0].getType() != FieldPathEntry::Type::STRUCT_FIELD) {
+ return false;
+ }
+ if (fieldPath.size() == 2) {
+ if (fieldPath[1].getType() != FieldPathEntry::Type::STRUCT_FIELD &&
+ fieldPath[1].getType() != FieldPathEntry::Type::MAP_ALL_KEYS) {
+ return false;
+ }
+ } else if (fieldPath.size() == 3) {
+ if (fieldPath[1].getType() != FieldPathEntry::Type::MAP_ALL_VALUES ||
+ fieldPath[2].getType() != FieldPathEntry::Type::STRUCT_FIELD) {
+ return false;
+ }
+ } else if (fieldPath.size() > 3) {
+ return false;
+ }
+ return true;
+}
+
+bool
+DocumentFieldExtractor::isCompatible(const FieldPath &fieldPath1, const FieldPath &fieldPath2)
+{
+ if (fieldPath1.size() != fieldPath2.size()) {
+ return false;
+ }
+ uint32_t arrayIndex = 0;
+ for (const auto &fieldPathEntry1 : fieldPath1) {
+ const auto &fieldPathEntry2 = fieldPath2[arrayIndex++];
+ if (fieldPathEntry1->getType() != fieldPathEntry2.getType() ||
+ fieldPathEntry1->getDataType() != fieldPathEntry2.getDataType()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+const FieldValue *
+DocumentFieldExtractor::getCachedFieldValue(const FieldPathEntry &fieldPathEntry)
+{
+ auto itr = _cachedFieldValues.find(fieldPathEntry.getName());
+ if (itr != _cachedFieldValues.end()) {
+ return itr->second.get();
+ } else {
+ auto insres = _cachedFieldValues.insert(std::make_pair(fieldPathEntry.getName(), _doc.getValue(fieldPathEntry.getFieldRef())));
+ assert(insres.second);
+ return insres.first->second.get();
+ }
+}
+
+std::unique_ptr<FieldValue>
+DocumentFieldExtractor::getSimpleFieldValue(const FieldPath &fieldPath)
+{
+ return _doc.getNestedFieldValue(fieldPath.getFullRange());
+}
+
+std::unique_ptr<FieldValue>
+DocumentFieldExtractor::getStructArrayFieldValue(const FieldPath &fieldPath)
+{
+ const auto outerFieldValue = getCachedFieldValue(fieldPath[0]);
+ if (outerFieldValue != nullptr && checkInherits(*outerFieldValue, ArrayFieldValue::classId)) {
+ const auto outerArray = static_cast<const ArrayFieldValue *>(outerFieldValue);
+ const auto &innerFieldPathEntry = fieldPath[1];
+ auto array = makeArray(innerFieldPathEntry, outerArray->size());
+ uint32_t arrayIndex = 0;
+ for (const auto &outerElemBase : *outerArray) {
+ auto &arrayElem = (*array)[arrayIndex++];
+ const auto &structElem = static_cast<const StructFieldValue &>(outerElemBase);
+ if (!structElem.getValue(innerFieldPathEntry.getFieldRef(), arrayElem)) {
+ arrayElem.accept(setUndefinedValueVisitor);
+ }
+ }
+ return array;
+ }
+ return std::unique_ptr<FieldValue>();
+}
+
+std::unique_ptr<FieldValue>
+DocumentFieldExtractor::getStructMapKeyFieldValue(const FieldPath &fieldPath)
+{
+ const auto outerFieldValue = getCachedFieldValue(fieldPath[0]);
+ if (outerFieldValue != nullptr && checkInherits(*outerFieldValue, MapFieldValue::classId)) {
+ const auto outerMap = static_cast<const MapFieldValue *>(outerFieldValue);
+ auto array = makeArray(fieldPath[1], outerMap->size());
+ uint32_t arrayIndex = 0;
+ for (const auto &mapElem : *outerMap) {
+ (*array)[arrayIndex++].assign(*mapElem.first);
+ }
+ return array;
+ }
+ return std::unique_ptr<FieldValue>();
+}
+
+std::unique_ptr<document::FieldValue>
+DocumentFieldExtractor::getStructMapFieldValue(const FieldPath &fieldPath)
+{
+ const auto outerFieldValue = getCachedFieldValue(fieldPath[0]);
+ if (outerFieldValue != nullptr && checkInherits(*outerFieldValue, MapFieldValue::classId)) {
+ const auto outerMap = static_cast<const MapFieldValue *>(outerFieldValue);
+ const auto &innerFieldPathEntry = fieldPath[2];
+ auto array = makeArray(innerFieldPathEntry, outerMap->size());
+ uint32_t arrayIndex = 0;
+ for (const auto &mapElem : *outerMap) {
+ auto &arrayElem = (*array)[arrayIndex++];
+ const auto &structElem = static_cast<const StructFieldValue &>(*mapElem.second);
+ if (!structElem.getValue(innerFieldPathEntry.getFieldRef(), arrayElem)) {
+ arrayElem.accept(setUndefinedValueVisitor);
+ }
+ }
+ return array;
+ }
+ return std::unique_ptr<FieldValue>();
+}
+
+std::unique_ptr<FieldValue>
+DocumentFieldExtractor::getFieldValue(const FieldPath &fieldPath)
+{
+ if (fieldPath.size() == 1) {
+ return getSimpleFieldValue(fieldPath);
+ } else if (fieldPath.size() == 2) {
+ if (fieldPath[1].getType() == FieldPathEntry::Type::STRUCT_FIELD) {
+ return getStructArrayFieldValue(fieldPath);
+ } else {
+ return getStructMapKeyFieldValue(fieldPath);
+ }
+ } else if (fieldPath.size() == 3) {
+ return getStructMapFieldValue(fieldPath);
+ }
+ return std::unique_ptr<FieldValue>();
+}
+
+}
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.h b/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.h
new file mode 100644
index 00000000000..71f020a582c
--- /dev/null
+++ b/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.h
@@ -0,0 +1,55 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <memory>
+
+namespace document
+{
+
+class Document;
+class FieldValue;
+class FieldPath;
+class FieldPathEntry;
+
+}
+
+namespace proton {
+
+/**
+ * Class used to extract a field value from a document field or from a
+ * nested field in an array/map of structs.
+ */
+class DocumentFieldExtractor
+{
+ const document::Document &_doc;
+ vespalib::hash_map<vespalib::string, std::unique_ptr<document::FieldValue>> _cachedFieldValues;
+
+ const document::FieldValue *getCachedFieldValue(const document::FieldPathEntry &fieldPathEntry);
+ std::unique_ptr<document::FieldValue> getSimpleFieldValue(const document::FieldPath &fieldPath);
+ std::unique_ptr<document::FieldValue> getStructArrayFieldValue(const document::FieldPath &fieldPath);
+ std::unique_ptr<document::FieldValue> getStructMapKeyFieldValue(const document::FieldPath &fieldPath);
+ std::unique_ptr<document::FieldValue> getStructMapFieldValue(const document::FieldPath &fieldPath);
+
+public:
+ DocumentFieldExtractor(const document::Document &doc);
+ ~DocumentFieldExtractor();
+
+ std::unique_ptr<document::FieldValue> getFieldValue(const document::FieldPath &fieldPath);
+
+ /**
+ * Check if fieldPath is in a supported form.
+ */
+ static bool isSupported(const document::FieldPath &fieldPath);
+
+ /**
+ * Check if two field paths are compatible, i.e. same types in whole path
+ * and same data type would be returned from getFieldValue(). This is
+ * meant to be used when document type in received document doesn't match
+ * the document type for the current config (can happen right before and
+ * after live config change when validation override is used).
+ */
+ static bool isCompatible(const document::FieldPath &fieldPath1, const document::FieldPath &fieldPath2);
+};
+
+}