diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2018-05-26 21:08:47 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@oath.com> | 2018-05-28 11:28:13 +0000 |
commit | 71172f58f85b53dd7dce1a5091d5195770ab0cb8 (patch) | |
tree | 83a97cf5899e5de18e385ba90a7128d9a5c1d557 | |
parent | 3c1334090cef6fb0891515040ad900702275ccea (diff) |
Handle extraction of field values for struct array attribute vectors.
7 files changed, 715 insertions, 8 deletions
diff --git a/searchcore/CMakeLists.txt b/searchcore/CMakeLists.txt index 36e76b02b0b..b1570c10221 100644 --- a/searchcore/CMakeLists.txt +++ b/searchcore/CMakeLists.txt @@ -68,6 +68,7 @@ vespa_define_module( src/tests/proton/attribute/attribute_populator src/tests/proton/attribute/attribute_usage_filter src/tests/proton/attribute/attributes_state_explorer + src/tests/proton/attribute/document_field_extractor src/tests/proton/attribute/document_field_populator src/tests/proton/attribute/exclusive_attribute_read_accessor src/tests/proton/attribute/imported_attributes_context diff --git a/searchcore/src/tests/proton/attribute/document_field_extractor/CMakeLists.txt b/searchcore/src/tests/proton/attribute/document_field_extractor/CMakeLists.txt new file mode 100644 index 00000000000..7d7b798febe --- /dev/null +++ b/searchcore/src/tests/proton/attribute/document_field_extractor/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcore_document_field_extractor_test_app TEST + SOURCES + document_field_extractor_test.cpp + DEPENDS + searchcore_attribute + searchcore_pcommon +) +vespa_add_test(NAME searchcore_document_field_extractor_test_app COMMAND searchcore_document_field_extractor_test_app) diff --git a/searchcore/src/tests/proton/attribute/document_field_extractor/document_field_extractor_test.cpp b/searchcore/src/tests/proton/attribute/document_field_extractor/document_field_extractor_test.cpp new file mode 100644 index 00000000000..e8ec5a0d953 --- /dev/null +++ b/searchcore/src/tests/proton/attribute/document_field_extractor/document_field_extractor_test.cpp @@ -0,0 +1,380 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/document/base/documentid.h> +#include <vespa/document/base/exceptions.h> +#include <vespa/document/base/field.h> +#include <vespa/document/base/fieldpath.h> +#include <vespa/document/datatype/datatypes.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/intfieldvalue.h> +#include <vespa/document/fieldvalue/mapfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/structfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/searchcore/proton/attribute/document_field_extractor.h> +#include <vespa/vespalib/testkit/testapp.h> + +using document::Field; +using document::DataType; +using document::DocumentType; +using document::StructDataType; +using document::ArrayDataType; +using document::WeightedSetDataType; +using document::MapDataType; +using document::StructFieldValue; +using document::ArrayFieldValue; +using document::WeightedSetFieldValue; +using document::IntFieldValue; +using document::StringFieldValue; +using document::MapFieldValue; +using document::Document; +using document::DocumentId; +using document::FieldPath; +using document::FieldValue; +using document::FieldNotFoundException; +using proton::DocumentFieldExtractor; + +namespace +{ + +const ArrayDataType arrayTypeInt(*DataType::INT); +const ArrayDataType arrayTypeString(*DataType::STRING); +const WeightedSetDataType weightedSetTypeInt(*DataType::INT, false, false); +const WeightedSetDataType weightedSetTypeString(*DataType::STRING, false, false); +const int32_t noInt(std::numeric_limits<int32_t>::min()); +const vespalib::string noString(""); + +std::unique_ptr<FieldValue> +makeIntArray(const std::vector<int32_t> &array) +{ + auto result = std::make_unique<ArrayFieldValue>(arrayTypeInt); + for (const auto &elem : array) { + result->append(std::make_unique<IntFieldValue>(elem)); + } + return result; +} + +std::unique_ptr<FieldValue> +makeStringArray(const std::vector<vespalib::string> &array) +{ + auto result = std::make_unique<ArrayFieldValue>(arrayTypeString); + for (const auto &elem : array) { + result->append(std::make_unique<StringFieldValue>(elem)); + } + return result; +} + +std::unique_ptr<FieldValue> +makeIntWeightedSet(const std::vector<std::pair<int32_t, int32_t>> &array) +{ + auto result = std::make_unique<WeightedSetFieldValue>(weightedSetTypeInt); + for (const auto &elem : array) { + result->add(IntFieldValue(elem.first), elem.second); + } + return result; +} + +std::unique_ptr<FieldValue> +makeStringWeightedSet(const std::vector<std::pair<vespalib::string, int32_t>> &array) +{ + auto result = std::make_unique<WeightedSetFieldValue>(weightedSetTypeString); + for (const auto &elem : array) { + result->add(StringFieldValue(elem.first), elem.second); + } + return result; +} + +template <typename F1, typename F2> +void +checkFieldPathChange(F1 f1, F2 f2, const vespalib::string &path, bool same) +{ + FieldPath fieldPath1 = f1.makeFieldPath(path); + FieldPath fieldPath2 = f2.makeFieldPath(path); + EXPECT_TRUE(!fieldPath1.empty()); + EXPECT_TRUE(!fieldPath2.empty()); + EXPECT_TRUE(DocumentFieldExtractor::isSupported(fieldPath1)); + EXPECT_TRUE(DocumentFieldExtractor::isSupported(fieldPath2)); + EXPECT_EQUAL(same, DocumentFieldExtractor::isCompatible(fieldPath1, fieldPath2)); +} + +} + +struct FixtureBase +{ + DocumentType type; + const Field tagField; + const Field nameField; + + FixtureBase(bool byteTag) + : type("test"), + tagField("tag", 1, byteTag ? *DataType::BYTE : *DataType::INT, true), + nameField("name", 2, *DataType::STRING, true) + { + } + + ~FixtureBase(); + + std::unique_ptr<Document> + makeDoc() + { + return std::make_unique<Document>(type, DocumentId("id::test::1")); + } + + FieldPath + makeFieldPath(const vespalib::string &path) + { + FieldPath fieldPath; + try { + type.buildFieldPath(fieldPath, path); + } catch (FieldNotFoundException &) { + fieldPath = FieldPath(); + } + if (!DocumentFieldExtractor::isSupported(fieldPath)) { + fieldPath = FieldPath(); + } + return fieldPath; + } + + void + assertExtracted(DocumentFieldExtractor &extractor, + const vespalib::string &path, + std::unique_ptr<FieldValue> expected) { + FieldPath fieldPath(makeFieldPath(path)); + std::unique_ptr<FieldValue> fv = extractor.getFieldValue(fieldPath); + if (expected) { + ASSERT_TRUE(fv); + EXPECT_EQUAL(*expected, *fv); + } else { + EXPECT_TRUE(!fv); + } + } +}; + +FixtureBase::~FixtureBase() = default; + +struct SimpleFixture : public FixtureBase +{ + SimpleFixture(bool byteTag = false) + : FixtureBase(byteTag) + { + type.addField(tagField); + type.addField(nameField); + } +}; + +TEST_F("require that simple fields give simple values", SimpleFixture) +{ + auto doc = f.makeDoc(); + doc->setValue(f.tagField, IntFieldValue(200)); + doc->setValue(f.nameField, StringFieldValue("name200b")); + DocumentFieldExtractor extractor(*doc); + TEST_DO(f.assertExtracted(extractor, "tag", std::make_unique<IntFieldValue>(200))); + TEST_DO(f.assertExtracted(extractor, "name", std::make_unique<StringFieldValue>("name200b"))); +} + +struct ArrayFixture : public FixtureBase +{ + const ArrayDataType tagArrayFieldType; + const Field tagArrayField; + const ArrayDataType valueArrayFieldType; + const Field valueArrayField; + + ArrayFixture(bool byteTag = false) + : FixtureBase(byteTag), + tagArrayFieldType(tagField.getDataType()), + tagArrayField("tag", tagArrayFieldType, true), + valueArrayFieldType(nameField.getDataType()), + valueArrayField("val", valueArrayFieldType, true) + { + type.addField(tagArrayField); + type.addField(valueArrayField); + } + + ~ArrayFixture(); +}; + +ArrayFixture::~ArrayFixture() = default; + +TEST_F("require that array fields give array values", ArrayFixture) +{ + auto doc = f.makeDoc(); + doc->setValue(f.tagArrayField, *makeIntArray({ 300, 301 })); + doc->setValue(f.valueArrayField, *makeStringArray({"v500", "v502"})); + DocumentFieldExtractor extractor(*doc); + TEST_DO(f.assertExtracted(extractor, "tag", makeIntArray({ 300, 301}))); + TEST_DO(f.assertExtracted(extractor, "val", makeStringArray({"v500", "v502"}))); +} + +struct WeightedSetFixture : public FixtureBase +{ + const WeightedSetDataType tagWeightedSetFieldType; + const Field tagWeightedSetField; + const WeightedSetDataType valueWeightedSetFieldType; + const Field valueWeightedSetField; + + WeightedSetFixture(bool byteTag = false) + : FixtureBase(byteTag), + tagWeightedSetFieldType(tagField.getDataType(), false, false), + tagWeightedSetField("tag", tagWeightedSetFieldType, true), + valueWeightedSetFieldType(*DataType::STRING, false, false), + valueWeightedSetField("val", valueWeightedSetFieldType, true) + { + type.addField(tagWeightedSetField); + type.addField(valueWeightedSetField); + } + + ~WeightedSetFixture(); +}; + +WeightedSetFixture::~WeightedSetFixture() = default; + +TEST_F("require that weighted set fields give weighted set values", WeightedSetFixture) +{ + auto doc = f.makeDoc(); + doc->setValue(f.tagWeightedSetField, *makeIntWeightedSet({{400, 10}, { 401, 13}})); + doc->setValue(f.valueWeightedSetField, *makeStringWeightedSet({{"600", 17}, {"604", 19}})); + DocumentFieldExtractor extractor(*doc); + TEST_DO(f.assertExtracted(extractor, "tag", makeIntWeightedSet({{ 400, 10}, {401, 13}}))); + TEST_DO(f.assertExtracted(extractor, "val", makeStringWeightedSet({{"600", 17}, {"604", 19}}))); +} + +struct StructFixtureBase : public FixtureBase +{ + StructDataType structFieldType; + + StructFixtureBase(bool byteTag) + : FixtureBase(byteTag), + structFieldType("struct") + { + structFieldType.addField(tagField); + structFieldType.addField(nameField); + } + + std::unique_ptr<StructFieldValue> + makeStruct() + { + return std::make_unique<StructFieldValue>(structFieldType); + } + + std::unique_ptr<StructFieldValue> + makeStruct(int tag, const vespalib::string &value) + { + auto ret = makeStruct(); + ret->setValue(tagField, IntFieldValue(tag)); + ret->setValue(nameField, StringFieldValue(value)); + return ret; + } + + std::unique_ptr<StructFieldValue> + makeStruct(int tag) + { + auto ret = makeStruct(); + ret->setValue(tagField, IntFieldValue(tag)); + return ret; + } + + std::unique_ptr<StructFieldValue> + makeStruct(const vespalib::string &value) + { + auto ret = makeStruct(); + ret->setValue(nameField, StringFieldValue(value)); + return ret; + } +}; + +struct StructArrayFixture : public StructFixtureBase +{ + const ArrayDataType structArrayFieldType; + const Field structArrayField; + + StructArrayFixture(bool byteTag = false) + : StructFixtureBase(byteTag), + structArrayFieldType(structFieldType), + structArrayField("s", 11, structArrayFieldType, true) + { + type.addField(structArrayField); + } + + ~StructArrayFixture(); +}; + +StructArrayFixture::~StructArrayFixture() = default; + +TEST_F("require that struct array field gives array values", StructArrayFixture) +{ + auto doc = f.makeDoc(); + ArrayFieldValue structArrayFieldValue(f.structArrayFieldType); + structArrayFieldValue.add(*f.makeStruct(1, "name1")); + structArrayFieldValue.add(*f.makeStruct(2)); + structArrayFieldValue.add(*f.makeStruct("name3")); + doc->setValue(f.structArrayField, structArrayFieldValue); + DocumentFieldExtractor extractor(*doc); + TEST_DO(f.assertExtracted(extractor, "s.tag", makeIntArray({ 1, 2, noInt }))); + TEST_DO(f.assertExtracted(extractor, "s.name", makeStringArray({ "name1", noString, "name3" }))); +} + +struct StructMapFixture : public StructFixtureBase +{ + const MapDataType structMapFieldType; + const Field structMapField; + + StructMapFixture(bool byteTag = false, bool byteKey = false) + : StructFixtureBase(byteTag), + structMapFieldType(byteKey ? *DataType::BYTE : *DataType::STRING, structFieldType), + structMapField("s", 12, structMapFieldType, true) + { + type.addField(structMapField); + } + + ~StructMapFixture(); +}; + +StructMapFixture::~StructMapFixture() = default; + +TEST_F("require that struct map field gives array values", StructMapFixture) +{ + auto doc = f.makeDoc(); + MapFieldValue structMapFieldValue(f.structMapFieldType); + structMapFieldValue.put(StringFieldValue("m0"), *f.makeStruct(10, "name10")); + structMapFieldValue.put(StringFieldValue("m1"), *f.makeStruct(11)); + structMapFieldValue.put(StringFieldValue("m2"), *f.makeStruct("name12")); + structMapFieldValue.put(StringFieldValue("m3"), *f.makeStruct()); + doc->setValue(f.structMapField, structMapFieldValue); + DocumentFieldExtractor extractor(*doc); + TEST_DO(f.assertExtracted(extractor, "s.key", makeStringArray({ "m0", "m1", "m2", "m3" }))); + TEST_DO(f.assertExtracted(extractor, "s.value.tag", makeIntArray({ 10, 11, noInt, noInt }))); + TEST_DO(f.assertExtracted(extractor, "s.value.name", makeStringArray({ "name10", noString, "name12", noString }))); +} + +TEST_F("require that unknown field gives null value", FixtureBase(false)) +{ + auto doc = f.makeDoc(); + DocumentFieldExtractor extractor(*doc); + TEST_DO(f.assertExtracted(extractor, "unknown", std::unique_ptr<FieldValue>())); +} + +TEST("require that type changes are detected") +{ + TEST_DO(checkFieldPathChange(SimpleFixture(false), SimpleFixture(false), "tag", true)); + TEST_DO(checkFieldPathChange(SimpleFixture(false), SimpleFixture(true), "tag", false)); + TEST_DO(checkFieldPathChange(ArrayFixture(false), ArrayFixture(false), "tag", true)); + TEST_DO(checkFieldPathChange(ArrayFixture(false), ArrayFixture(true), "tag", false)); + TEST_DO(checkFieldPathChange(SimpleFixture(false), ArrayFixture(false), "tag", false)); + TEST_DO(checkFieldPathChange(WeightedSetFixture(false), WeightedSetFixture(false), "tag", true)); + TEST_DO(checkFieldPathChange(WeightedSetFixture(false), WeightedSetFixture(true), "tag", false)); + TEST_DO(checkFieldPathChange(SimpleFixture(false), WeightedSetFixture(false), "tag", false)); + TEST_DO(checkFieldPathChange(ArrayFixture(false), WeightedSetFixture(false), "tag", false)); + TEST_DO(checkFieldPathChange(StructArrayFixture(false), StructArrayFixture(false), "s.tag", true)); + TEST_DO(checkFieldPathChange(StructArrayFixture(false), StructArrayFixture(true), "s.tag", false)); + TEST_DO(checkFieldPathChange(StructMapFixture(false, false), StructMapFixture(false, false), "s.value.tag", true)); + TEST_DO(checkFieldPathChange(StructMapFixture(false, false), StructMapFixture(true, false), "s.value.tag", false)); + TEST_DO(checkFieldPathChange(StructMapFixture(false, false), StructMapFixture(false, true), "s.value.tag", false)); + TEST_DO(checkFieldPathChange(StructMapFixture(false, false), StructMapFixture(false, false), "s.key", true)); + TEST_DO(checkFieldPathChange(StructMapFixture(false, false), StructMapFixture(false, true), "s.key", false)); +} + +TEST_MAIN() +{ + TEST_RUN_ALL(); +} diff --git a/searchcore/src/vespa/searchcore/proton/attribute/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/attribute/CMakeLists.txt index 5b6a9faa05d..30f6c2d92c2 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/proton/attribute/CMakeLists.txt @@ -23,6 +23,7 @@ vespa_add_library(searchcore_attribute STATIC attributedisklayout.cpp attributemanager.cpp attributesconfigscout.cpp + document_field_extractor.cpp document_field_populator.cpp document_field_retriever.cpp exclusive_attribute_read_accessor.cpp diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp index a6329db2aee..0cb260ed9a8 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_writer.cpp @@ -2,6 +2,7 @@ #include "attribute_writer.h" #include "attributemanager.h" +#include "document_field_extractor.h" #include <vespa/document/base/exceptions.h> #include <vespa/document/datatype/documenttype.h> #include <vespa/searchcore/proton/attribute/imported_attributes_repo.h> @@ -51,7 +52,9 @@ AttributeWriter::WriteContext::buildFieldPaths(const DocumentType &docType) FieldPath fp; try { docType.buildFieldPath(fp, name); - } catch (document::FieldNotFoundException & e) { } + } catch (document::FieldNotFoundException & e) { + fp = FieldPath(); + } assert(fieldId < _fieldPaths.size()); _fieldPaths[fieldId] = std::move(fp); @@ -200,12 +203,12 @@ class PutTask : public vespalib::Executor::Task std::remove_reference_t<AttributeWriter::OnWriteDoneType> _onWriteDone; std::vector<FieldValue::UP> _fieldValues; public: - PutTask(const AttributeWriter::WriteContext &wc, SerialNum serialNum, const Document &doc, uint32_t lid, bool immediateCommit, AttributeWriter::OnWriteDoneType onWriteDone); + PutTask(const AttributeWriter::WriteContext &wc, SerialNum serialNum, DocumentFieldExtractor &fieldExtractor, uint32_t lid, bool immediateCommit, AttributeWriter::OnWriteDoneType onWriteDone); virtual ~PutTask() override; virtual void run() override; }; -PutTask::PutTask(const AttributeWriter::WriteContext &wc, SerialNum serialNum, const Document &doc, uint32_t lid, bool immediateCommit, AttributeWriter::OnWriteDoneType onWriteDone) +PutTask::PutTask(const AttributeWriter::WriteContext &wc, SerialNum serialNum, DocumentFieldExtractor &fieldExtractor, uint32_t lid, bool immediateCommit, AttributeWriter::OnWriteDoneType onWriteDone) : _wc(wc), _serialNum(serialNum), _lid(lid), @@ -215,10 +218,7 @@ PutTask::PutTask(const AttributeWriter::WriteContext &wc, SerialNum serialNum, c const auto &fieldPaths = _wc.getFieldPaths(); _fieldValues.reserve(fieldPaths.size()); for (const auto &fieldPath : fieldPaths) { - FieldValue::UP fv; - if (!fieldPath.empty()) { - fv = doc.getNestedFieldValue(fieldPath.getFullRange()); - } + FieldValue::UP fv = fieldExtractor.getFieldValue(fieldPath); _fieldValues.emplace_back(std::move(fv)); } } @@ -382,8 +382,9 @@ void AttributeWriter::internalPut(SerialNum serialNum, const Document &doc, DocumentIdT lid, bool immediateCommit, OnWriteDoneType onWriteDone) { + DocumentFieldExtractor extractor(doc); for (const auto &wc : _writeContexts) { - auto putTask = std::make_unique<PutTask>(wc, serialNum, doc, lid, immediateCommit, onWriteDone); + auto putTask = std::make_unique<PutTask>(wc, serialNum, extractor, lid, immediateCommit, onWriteDone); _attributeFieldWriter.executeTask(wc.getExecutorId(), std::move(putTask)); } } diff --git a/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.cpp b/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.cpp new file mode 100644 index 00000000000..143441eaae9 --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.cpp @@ -0,0 +1,260 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "document_field_extractor.h" +#include <vespa/document/datatype/arraydatatype.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/bytefieldvalue.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/doublefieldvalue.h> +#include <vespa/document/fieldvalue/floatfieldvalue.h> +#include <vespa/document/fieldvalue/intfieldvalue.h> +#include <vespa/document/fieldvalue/longfieldvalue.h> +#include <vespa/document/fieldvalue/shortfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/structfieldvalue.h> +#include <vespa/document/fieldvalue/mapfieldvalue.h> +#include <vespa/searchcommon/common/undefinedvalues.h> +#include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/util/stringfmt.h> + +using document::FieldValue; +using document::ByteFieldValue; +using document::ShortFieldValue; +using document::IntFieldValue; +using document::LongFieldValue; +using document::FloatFieldValue; +using document::DoubleFieldValue; +using document::StringFieldValue; +using document::StructFieldValue; +using document::MapFieldValue; +using document::DataType; +using document::ArrayDataType; +using document::ArrayFieldValue; +using document::Document; +using document::FieldPath; +using document::FieldPathEntry; +using document::FieldValueVisitor; +using vespalib::IllegalStateException; +using vespalib::make_string; +using search::attribute::getUndefined; + +namespace proton { + +namespace { + +class SetUndefinedValueVisitor : public FieldValueVisitor +{ + void visit(document::AnnotationReferenceFieldValue &) override { } + void visit(ArrayFieldValue &) override { } + void visit(ByteFieldValue &value) override { value = getUndefined<int8_t>(); } + void visit(Document &) override { } + void visit(DoubleFieldValue &value) override { value = getUndefined<double>(); } + void visit(FloatFieldValue &value) override { value = getUndefined<float>(); } + void visit(IntFieldValue &value) override { value = getUndefined<int32_t>(); } + void visit(LongFieldValue &value) override { value = getUndefined<int64_t>(); } + void visit(MapFieldValue &) override { } + void visit(document::PredicateFieldValue &) override { } + void visit(document::RawFieldValue &) override { } + void visit(ShortFieldValue &value) override { value = getUndefined<int16_t>(); } + void visit(StringFieldValue &) override { } + void visit(StructFieldValue &) override { } + void visit(document::WeightedSetFieldValue &) override { } + void visit(document::TensorFieldValue &) override { } + void visit(document::ReferenceFieldValue &) override { } +}; + +SetUndefinedValueVisitor setUndefinedValueVisitor; + +const ArrayDataType arrayTypeByte(*DataType::BYTE); +const ArrayDataType arrayTypeShort(*DataType::SHORT); +const ArrayDataType arrayTypeInt(*DataType::INT); +const ArrayDataType arrayTypeLong(*DataType::LONG); +const ArrayDataType arrayTypeFloat(*DataType::FLOAT); +const ArrayDataType arrayTypeDouble(*DataType::DOUBLE); +const ArrayDataType arrayTypeString(*DataType::STRING); + +const DataType * +getArrayType(const DataType &fieldType) +{ + switch (fieldType.getId()) { + case DataType::Type::T_BYTE: + return &arrayTypeByte; + case DataType::Type::T_SHORT: + return &arrayTypeShort; + case DataType::Type::T_INT: + return &arrayTypeInt; + case DataType::Type::T_LONG: + return &arrayTypeLong; + case DataType::Type::T_FLOAT: + return &arrayTypeFloat; + case DataType::Type::T_DOUBLE: + return &arrayTypeDouble; + case DataType::Type::T_STRING: + return &arrayTypeString; + default: + return nullptr; + } +} + +std::unique_ptr<ArrayFieldValue> +makeArray(const FieldPathEntry &fieldPathEntry, size_t size) +{ + const auto arrayType = getArrayType(fieldPathEntry.getDataType()); + auto array = std::make_unique<ArrayFieldValue>(*arrayType); + array->resize(size); + return array; +} + +bool +checkInherits(const FieldValue &fieldValue, unsigned id) +{ + const vespalib::Identifiable::RuntimeClass &rc = fieldValue.getClass(); + return rc.inherits(id); +} + +} + +DocumentFieldExtractor::DocumentFieldExtractor(const Document &doc) + : _doc(doc), + _cachedFieldValues() +{ +} + +DocumentFieldExtractor::~DocumentFieldExtractor() = default; + +bool +DocumentFieldExtractor::isSupported(const FieldPath &fieldPath) +{ + if (!fieldPath.empty() && + fieldPath[0].getType() != FieldPathEntry::Type::STRUCT_FIELD) { + return false; + } + if (fieldPath.size() == 2) { + if (fieldPath[1].getType() != FieldPathEntry::Type::STRUCT_FIELD && + fieldPath[1].getType() != FieldPathEntry::Type::MAP_ALL_KEYS) { + return false; + } + } else if (fieldPath.size() == 3) { + if (fieldPath[1].getType() != FieldPathEntry::Type::MAP_ALL_VALUES || + fieldPath[2].getType() != FieldPathEntry::Type::STRUCT_FIELD) { + return false; + } + } else if (fieldPath.size() > 3) { + return false; + } + return true; +} + +bool +DocumentFieldExtractor::isCompatible(const FieldPath &fieldPath1, const FieldPath &fieldPath2) +{ + if (fieldPath1.size() != fieldPath2.size()) { + return false; + } + uint32_t arrayIndex = 0; + for (const auto &fieldPathEntry1 : fieldPath1) { + const auto &fieldPathEntry2 = fieldPath2[arrayIndex++]; + if (fieldPathEntry1->getType() != fieldPathEntry2.getType() || + fieldPathEntry1->getDataType() != fieldPathEntry2.getDataType()) { + return false; + } + } + return true; +} + +const FieldValue * +DocumentFieldExtractor::getCachedFieldValue(const FieldPathEntry &fieldPathEntry) +{ + auto itr = _cachedFieldValues.find(fieldPathEntry.getName()); + if (itr != _cachedFieldValues.end()) { + return itr->second.get(); + } else { + auto insres = _cachedFieldValues.insert(std::make_pair(fieldPathEntry.getName(), _doc.getValue(fieldPathEntry.getFieldRef()))); + assert(insres.second); + return insres.first->second.get(); + } +} + +std::unique_ptr<FieldValue> +DocumentFieldExtractor::getSimpleFieldValue(const FieldPath &fieldPath) +{ + return _doc.getNestedFieldValue(fieldPath.getFullRange()); +} + +std::unique_ptr<FieldValue> +DocumentFieldExtractor::getStructArrayFieldValue(const FieldPath &fieldPath) +{ + const auto outerFieldValue = getCachedFieldValue(fieldPath[0]); + if (outerFieldValue != nullptr && checkInherits(*outerFieldValue, ArrayFieldValue::classId)) { + const auto outerArray = static_cast<const ArrayFieldValue *>(outerFieldValue); + const auto &innerFieldPathEntry = fieldPath[1]; + auto array = makeArray(innerFieldPathEntry, outerArray->size()); + uint32_t arrayIndex = 0; + for (const auto &outerElemBase : *outerArray) { + auto &arrayElem = (*array)[arrayIndex++]; + const auto &structElem = static_cast<const StructFieldValue &>(outerElemBase); + if (!structElem.getValue(innerFieldPathEntry.getFieldRef(), arrayElem)) { + arrayElem.accept(setUndefinedValueVisitor); + } + } + return array; + } + return std::unique_ptr<FieldValue>(); +} + +std::unique_ptr<FieldValue> +DocumentFieldExtractor::getStructMapKeyFieldValue(const FieldPath &fieldPath) +{ + const auto outerFieldValue = getCachedFieldValue(fieldPath[0]); + if (outerFieldValue != nullptr && checkInherits(*outerFieldValue, MapFieldValue::classId)) { + const auto outerMap = static_cast<const MapFieldValue *>(outerFieldValue); + auto array = makeArray(fieldPath[1], outerMap->size()); + uint32_t arrayIndex = 0; + for (const auto &mapElem : *outerMap) { + (*array)[arrayIndex++].assign(*mapElem.first); + } + return array; + } + return std::unique_ptr<FieldValue>(); +} + +std::unique_ptr<document::FieldValue> +DocumentFieldExtractor::getStructMapFieldValue(const FieldPath &fieldPath) +{ + const auto outerFieldValue = getCachedFieldValue(fieldPath[0]); + if (outerFieldValue != nullptr && checkInherits(*outerFieldValue, MapFieldValue::classId)) { + const auto outerMap = static_cast<const MapFieldValue *>(outerFieldValue); + const auto &innerFieldPathEntry = fieldPath[2]; + auto array = makeArray(innerFieldPathEntry, outerMap->size()); + uint32_t arrayIndex = 0; + for (const auto &mapElem : *outerMap) { + auto &arrayElem = (*array)[arrayIndex++]; + const auto &structElem = static_cast<const StructFieldValue &>(*mapElem.second); + if (!structElem.getValue(innerFieldPathEntry.getFieldRef(), arrayElem)) { + arrayElem.accept(setUndefinedValueVisitor); + } + } + return array; + } + return std::unique_ptr<FieldValue>(); +} + +std::unique_ptr<FieldValue> +DocumentFieldExtractor::getFieldValue(const FieldPath &fieldPath) +{ + if (fieldPath.size() == 1) { + return getSimpleFieldValue(fieldPath); + } else if (fieldPath.size() == 2) { + if (fieldPath[1].getType() == FieldPathEntry::Type::STRUCT_FIELD) { + return getStructArrayFieldValue(fieldPath); + } else { + return getStructMapKeyFieldValue(fieldPath); + } + } else if (fieldPath.size() == 3) { + return getStructMapFieldValue(fieldPath); + } + return std::unique_ptr<FieldValue>(); +} + +} diff --git a/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.h b/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.h new file mode 100644 index 00000000000..71f020a582c --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/attribute/document_field_extractor.h @@ -0,0 +1,55 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/stllike/hash_map.h> +#include <memory> + +namespace document +{ + +class Document; +class FieldValue; +class FieldPath; +class FieldPathEntry; + +} + +namespace proton { + +/** + * Class used to extract a field value from a document field or from a + * nested field in an array/map of structs. + */ +class DocumentFieldExtractor +{ + const document::Document &_doc; + vespalib::hash_map<vespalib::string, std::unique_ptr<document::FieldValue>> _cachedFieldValues; + + const document::FieldValue *getCachedFieldValue(const document::FieldPathEntry &fieldPathEntry); + std::unique_ptr<document::FieldValue> getSimpleFieldValue(const document::FieldPath &fieldPath); + std::unique_ptr<document::FieldValue> getStructArrayFieldValue(const document::FieldPath &fieldPath); + std::unique_ptr<document::FieldValue> getStructMapKeyFieldValue(const document::FieldPath &fieldPath); + std::unique_ptr<document::FieldValue> getStructMapFieldValue(const document::FieldPath &fieldPath); + +public: + DocumentFieldExtractor(const document::Document &doc); + ~DocumentFieldExtractor(); + + std::unique_ptr<document::FieldValue> getFieldValue(const document::FieldPath &fieldPath); + + /** + * Check if fieldPath is in a supported form. + */ + static bool isSupported(const document::FieldPath &fieldPath); + + /** + * Check if two field paths are compatible, i.e. same types in whole path + * and same data type would be returned from getFieldValue(). This is + * meant to be used when document type in received document doesn't match + * the document type for the current config (can happen right before and + * after live config change when validation override is used). + */ + static bool isCompatible(const document::FieldPath &fieldPath1, const document::FieldPath &fieldPath2); +}; + +} |