diff options
author | Arne H Juul <arnej@yahooinc.com> | 2022-03-30 08:35:48 +0000 |
---|---|---|
committer | Arne H Juul <arnej@yahooinc.com> | 2022-03-31 13:09:46 +0000 |
commit | d7a5a862966e408de4999718bab4162e06158937 (patch) | |
tree | 31875b37d8ab4ca2028f671e0dc1c0e68fa01070 /document/src | |
parent | 8dcd269877eeaad8ca58274e5d992051e06fb4eb (diff) |
setup from new doctype array
* unit test everything that was in the old unit test
* note: unit test is pragmatic for now
* the new configureDocTypes function should probably be
refactored into a helper class
Diffstat (limited to 'document/src')
-rw-r--r-- | document/src/tests/repo/CMakeLists.txt | 8 | ||||
-rw-r--r-- | document/src/tests/repo/doctype_config_test.cpp | 643 | ||||
-rw-r--r-- | document/src/tests/repo/types.cfg | 326 | ||||
-rw-r--r-- | document/src/vespa/document/repo/documenttyperepo.cpp | 335 |
4 files changed, 1310 insertions, 2 deletions
diff --git a/document/src/tests/repo/CMakeLists.txt b/document/src/tests/repo/CMakeLists.txt index bbbcafc2650..2a9ff4af683 100644 --- a/document/src/tests/repo/CMakeLists.txt +++ b/document/src/tests/repo/CMakeLists.txt @@ -6,3 +6,11 @@ vespa_add_executable(document_documenttyperepo_test_app TEST document ) vespa_add_test(NAME document_documenttyperepo_test_app COMMAND document_documenttyperepo_test_app) + +vespa_add_executable(document_doctype_config_test_app TEST + SOURCES + doctype_config_test.cpp + DEPENDS + document +) +vespa_add_test(NAME document_doctype_config_test_app COMMAND document_doctype_config_test_app) diff --git a/document/src/tests/repo/doctype_config_test.cpp b/document/src/tests/repo/doctype_config_test.cpp new file mode 100644 index 00000000000..5bb879b4e38 --- /dev/null +++ b/document/src/tests/repo/doctype_config_test.cpp @@ -0,0 +1,643 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for documenttyperepo. + +#include <vespa/document/base/testdocrepo.h> +#include <vespa/config/print/asciiconfigwriter.h> +#include <vespa/document/datatype/annotationreferencedatatype.h> +#include <vespa/document/datatype/arraydatatype.h> +#include <vespa/document/datatype/documenttype.h> +#include <vespa/document/datatype/mapdatatype.h> +#include <vespa/document/datatype/tensor_data_type.h> +#include <vespa/document/datatype/weightedsetdatatype.h> +#include <vespa/document/fieldvalue/fieldvalue.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/document/repo/documenttyperepo.h> +#include <vespa/vespalib/objects/identifiable.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/exceptions.h> +#include <set> + +#include <vespa/log/log.h> +LOG_SETUP("doctype_config_test"); + +using config::AsciiConfigWriter; +using std::set; +using std::vector; +using vespalib::Identifiable; +using vespalib::IllegalArgumentException; +using vespalib::string; + +using namespace document::config_builder; +using namespace document; + +namespace { + +const string type_name = "test"; +const int32_t doc_type_id = 787121340; +const string header_name = type_name + ".header"; +const int32_t header_id = 30; +const string type_name_2 = "test_2"; +const string header_name_2 = type_name_2 + ".header"; +const string field_name = "field_name"; +const string derived_name = "derived"; + +using ::document::config::DocumenttypesConfig; +using ::document::config::DocumenttypesConfigBuilder; + +using BDocType = DocumenttypesConfigBuilder::Doctype; +using BDocInherit = DocumenttypesConfigBuilder::Doctype::Inherits; +using BDocFieldsets = DocumenttypesConfigBuilder::Doctype::Fieldsets; +using BDocImportField = DocumenttypesConfigBuilder::Doctype::Importedfield; +using BPrimitiveT = DocumenttypesConfigBuilder::Doctype::Primitivetype; +using BArrayT = DocumenttypesConfigBuilder::Doctype::Arraytype; +using BMapT = DocumenttypesConfigBuilder::Doctype::Maptype; +using BWsetT = DocumenttypesConfigBuilder::Doctype::Wsettype; +using BTensorT = DocumenttypesConfigBuilder::Doctype::Tensortype; +using BDocRefT = DocumenttypesConfigBuilder::Doctype::Documentref; +using BAnnotationT = DocumenttypesConfigBuilder::Doctype::Annotationtype; +using BAnnRefT = DocumenttypesConfigBuilder::Doctype::Annotationref; +using BStructT = DocumenttypesConfigBuilder::Doctype::Structtype; +using BStructField = DocumenttypesConfigBuilder::Doctype::Structtype::Field; +using BStructInherits = DocumenttypesConfigBuilder::Doctype::Structtype::Inherits; + +class BuilderHelper { +private: + int _idx = 10000; + DocumenttypesConfigBuilder _config; + static int hashId(const string& name) { + StructDataType tmp(name); + return tmp.getId(); + } + vector<int> _idxOfBuiltins; + void addPrimitive(BDocType &doc, const string& name, DataType::Type t) { + BPrimitiveT pt; + pt.idx = ++_idx; + pt.name = name; + doc.primitivetype.push_back(pt); + assert(t < _idxOfBuiltins.size()); + _idxOfBuiltins[t] = pt.idx; + LOG(debug, "idx of builtin (%d) = %d", (int)t, pt.idx); + } +public: + ~BuilderHelper(); + BDocType & document(const string& name) { + _config.doctype.reserve(100); + auto & d = _config.doctype.emplace_back(); + d.idx = ++_idx; + d.name = name; + d.internalid = hashId(name); + auto & st = addStruct(d, name + ".header"); + d.contentstruct = st.idx; + if (_config.doctype.size() > 1) { + d.inherits.emplace_back().idx = _config.doctype[0].idx; + } + return d; + } + BStructField & addField(BDocType &doc, const string& name) { + return addField(doc.structtype[0], name); + } + BStructT & addStruct(BDocType &doc, const string& name) { + doc.structtype.reserve(100); + auto & st = doc.structtype.emplace_back(); + st.idx = ++_idx; + st.name = name; + st.internalid = hashId(name); + return st; + } + BStructField & addField(BStructT &st, const string& name) { + st.field.reserve(100); + auto & f = st.field.emplace_back(); + f.name = name; + f.internalid = hashId(name); + return f; + } + BArrayT & addArray(BDocType &doc, int nestedIdx) { + doc.arraytype.reserve(100); + auto & a = doc.arraytype.emplace_back(); + a.idx = ++_idx; + a.elementtype = nestedIdx; + a.internalid = a.idx; + return a; + } + BMapT & addMap(BDocType &doc, int keyIdx, int valIdx) { + doc.maptype.reserve(100); + auto & m = doc.maptype.emplace_back(); + m.idx = ++_idx; + m.keytype = keyIdx; + m.valuetype = valIdx; + m.internalid = m.idx; + return m; + } + BWsetT & addWset(BDocType &doc, int nestedIdx) { + doc.wsettype.reserve(100); + auto & w = doc.wsettype.emplace_back(); + w.idx = ++_idx; + w.elementtype = nestedIdx; + w.internalid = w.idx; + return w; + } + BAnnotationT & addAnnotation(BDocType &doc, const string &name) { + doc.annotationtype.reserve(100); + auto & ann = doc.annotationtype.emplace_back(); + ann.idx = ++_idx; + ann.name = name; + ann.internalid = hashId(name); + return ann; + } + BAnnRefT & addAnnotationRef(BDocType &doc, int annIdx) { + doc.annotationref.reserve(100); + auto & aref = doc.annotationref.emplace_back(); + aref.idx = ++_idx; + aref.annotationtype = annIdx; + aref.internalid = aref.idx; + return aref; + } + BDocRefT & addDocumentRef(BDocType &doc, int targetIdx) { + doc.documentref.reserve(100); + auto & dref = doc.documentref.emplace_back(); + dref.idx = ++_idx; + dref.targettype = targetIdx; + dref.internalid = dref.idx; + return dref; + } + BTensorT & addTensorType(BDocType &doc, const string& spec) { + doc.tensortype.reserve(100); + auto & tt = doc.tensortype.emplace_back(); + tt.idx = ++_idx; + tt.detailedtype = spec; + return tt; + } + const DocumenttypesConfig & config() { return _config; } + BuilderHelper() { + _idxOfBuiltins.resize(DataType::MAX); + LOG(debug, "builtins.size = %zu", _idxOfBuiltins.size()); + auto & root = document("document"); + root.internalid = DataType::T_DOCUMENT; + addPrimitive(root, "int", DataType::T_INT); + addPrimitive(root, "float", DataType::T_FLOAT); + addPrimitive(root, "string", DataType::T_STRING); + addPrimitive(root, "raw", DataType::T_RAW); + addPrimitive(root, "long", DataType::T_LONG); + addPrimitive(root, "double", DataType::T_DOUBLE); + addPrimitive(root, "bool", DataType::T_BOOL); + addPrimitive(root, "uri", DataType::T_URI); + addPrimitive(root, "byte", DataType::T_BYTE); + addPrimitive(root, "tag", DataType::T_TAG); + addPrimitive(root, "short", DataType::T_SHORT); + addPrimitive(root, "predicate", DataType::T_PREDICATE); + } + int builtin(DataType::Type t) { + if (t == DataType::T_DOCUMENT) { + return _config.doctype[0].idx; + } + assert(t < _idxOfBuiltins.size()); + LOG(debug, "lookup builtin %d -> %d", (int)t, _idxOfBuiltins[t]); + return _idxOfBuiltins[t]; + } +}; + +BuilderHelper::~BuilderHelper() = default; + +TEST("requireThatDocumentTypeCanBeLookedUp") { + BuilderHelper builder; + auto &doc = builder.document(type_name); + doc.internalid = doc_type_id; + doc.structtype[0].internalid = header_id; + DocumentTypeRepo repo(builder.config()); + + const DocumentType *type = repo.getDocumentType(type_name); + ASSERT_TRUE(type); + EXPECT_EQUAL(type_name, type->getName()); + EXPECT_EQUAL(doc_type_id, type->getId()); + EXPECT_EQUAL(header_name, type->getFieldsType().getName()); + EXPECT_EQUAL(header_id, type->getFieldsType().getId()); +} + +TEST("requireThatDocumentTypeCanBeLookedUpWhenIdIsNotAHash") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + doc.internalid = doc_type_id + 2; + auto & contents = doc.structtype[0]; + contents.name = header_name; + contents.internalid = header_id + 3; + DocumentTypeRepo repo(builder.config()); + + const DocumentType *type = repo.getDocumentType(type_name); + ASSERT_TRUE(type); + EXPECT_EQUAL(type_name, type->getName()); + EXPECT_EQUAL(doc_type_id + 2, type->getId()); + EXPECT_EQUAL(header_name, type->getFieldsType().getName()); + EXPECT_EQUAL(header_id + 3, type->getFieldsType().getId()); +} + +TEST("requireThatDocumentsCanHaveFields") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + builder.addField(doc, field_name).type = builder.builtin(DataType::T_INT); + DocumentTypeRepo repo(builder.config()); + + const StructDataType &s = repo.getDocumentType(type_name)->getFieldsType(); + ASSERT_EQUAL(1u, s.getFieldCount()); + const Field &field = s.getField(field_name); + EXPECT_EQUAL(DataType::T_INT, field.getDataType().getId()); +} + +template <typename T> +const T &getFieldDataType(const DocumentTypeRepo &repo) { + const DataType &d = repo.getDocumentType(type_name) + ->getFieldsType().getField(field_name).getDataType(); + const T *t = dynamic_cast<const T *>(&d); + ASSERT_TRUE(t); + return *t; +} + +TEST("requireThatArraysCanBeConfigured") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & arr = builder.addArray(doc, builder.builtin(DataType::T_STRING)); + builder.addField(doc, field_name).type = arr.idx; + DocumentTypeRepo repo(builder.config()); + + const ArrayDataType &a = getFieldDataType<ArrayDataType>(repo); + EXPECT_EQUAL(DataType::T_STRING, a.getNestedType().getId()); +} + +TEST("requireThatWsetsCanBeConfigured") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & wset = builder.addWset(doc, builder.builtin(DataType::T_INT)); + wset.removeifzero = true; + wset.createifnonexistent = true; + builder.addField(doc, field_name).type = wset.idx; + DocumentTypeRepo repo(builder.config()); + + const WeightedSetDataType &w = getFieldDataType<WeightedSetDataType>(repo); + EXPECT_EQUAL(DataType::T_INT, w.getNestedType().getId()); + EXPECT_TRUE(w.createIfNonExistent()); + EXPECT_TRUE(w.removeIfZero()); +} + +TEST("requireThatMapsCanBeConfigured") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & map = builder.addMap(doc, + builder.builtin(DataType::T_INT), + builder.builtin(DataType::T_STRING)); + builder.addField(doc, field_name).type = map.idx; + DocumentTypeRepo repo(builder.config()); + + const MapDataType &m = getFieldDataType<MapDataType>(repo); + EXPECT_EQUAL(DataType::T_INT, m.getKeyType().getId()); + EXPECT_EQUAL(DataType::T_STRING, m.getValueType().getId()); +} + +TEST("requireThatAnnotationReferencesCanBeConfigured") { + int32_t annotation_type_id = 424; + BuilderHelper builder; + auto &doc = builder.document(type_name); + auto & ann = builder.addAnnotation(doc, "foo"); + ann.internalid = annotation_type_id; + auto & annRef = builder.addAnnotationRef(doc, ann.idx); + builder.addField(doc, field_name).type = annRef.idx; + DocumentTypeRepo repo(builder.config()); + + const AnnotationReferenceDataType &ar = getFieldDataType<AnnotationReferenceDataType>(repo); + EXPECT_EQUAL(annotation_type_id, ar.getAnnotationType().getId()); + EXPECT_EQUAL("foo", ar.getAnnotationType().getName()); +} + +TEST("requireThatDocumentsCanInheritFields") { + BuilderHelper builder; + auto & pdoc = builder.document(type_name); + auto & cdoc = builder.document(derived_name); + builder.addField(pdoc, field_name).type = builder.builtin(DataType::T_INT); + builder.addField(cdoc, "derived_field").type = builder.builtin(DataType::T_STRING); + cdoc.inherits.emplace_back().idx = pdoc.idx; + DocumentTypeRepo repo(builder.config()); + + const StructDataType &s = repo.getDocumentType(derived_name)->getFieldsType(); + ASSERT_EQUAL(2u, s.getFieldCount()); + const Field &field = s.getField(field_name); + const DataType &type = field.getDataType(); + EXPECT_EQUAL(DataType::T_INT, type.getId()); + EXPECT_EQUAL(DataType::T_STRING, s.getField("derived_field").getDataType().getId()); +} + +TEST("requireThatDocumentsCanUseInheritedTypes") { + const int32_t id = 64; + BuilderHelper builder; + auto & pdoc = builder.document(type_name); + auto & cdoc = builder.document(derived_name); + auto & arr = builder.addArray(pdoc, builder.builtin(DataType::T_INT)); + arr.internalid = id; + builder.addField(pdoc, "foo").type = arr.idx; + builder.addField(cdoc, field_name).type = arr.idx; + cdoc.inherits.emplace_back().idx = pdoc.idx; + + DocumentTypeRepo repo(builder.config()); + + const DataType &type = + repo.getDocumentType(derived_name)->getFieldsType() + .getField(field_name).getDataType(); + EXPECT_EQUAL(id, type.getId()); + EXPECT_TRUE(dynamic_cast<const ArrayDataType *>(&type)); +} + +TEST("requireThatIllegalConfigsCausesExceptions") { + BuilderHelper builder; + auto &doc = builder.document(type_name); + doc.inherits.emplace_back().idx = 20000; + EXPECT_EXCEPTION(DocumentTypeRepo repo(builder.config()), + IllegalArgumentException, "Unable to find document"); +} + +TEST("requireThatDataTypesCanBeLookedUpById") { + BuilderHelper builder; + auto &doc1 = builder.document(type_name); + auto &doc2 = builder.document(derived_name); + doc1.internalid = doc_type_id; + doc1.structtype[0].internalid = header_id; + doc2.internalid = doc_type_id + 1; + DocumentTypeRepo repo(builder.config()); + + const auto * dt1 = repo.getDocumentType(type_name); + const auto * dt2 = repo.getDocumentType(derived_name); + + ASSERT_TRUE(dt1); + ASSERT_TRUE(dt2); + EXPECT_EQUAL(dt1, repo.getDocumentType(doc_type_id)); + EXPECT_EQUAL(dt2, repo.getDocumentType(doc_type_id + 1)); + + const DataType *type = repo.getDataType(*dt1, header_id); + ASSERT_TRUE(type); + EXPECT_EQUAL(header_name, type->getName()); + EXPECT_EQUAL(header_id, type->getId()); + + EXPECT_TRUE(!repo.getDataType(*dt1, -1)); + EXPECT_TRUE(!repo.getDataType(*dt2, header_id)); +} + +TEST("requireThatDataTypesCanBeLookedUpByName") { + BuilderHelper builder; + auto &doc1 = builder.document(type_name); + doc1.structtype[0].internalid = header_id; + builder.document(type_name_2); + DocumentTypeRepo repo(builder.config()); + + const DocumentType * dt1 = repo.getDocumentType(type_name); + const DocumentType * dt2 = repo.getDocumentType(type_name_2); + ASSERT_TRUE(dt1); + ASSERT_TRUE(dt2); + + const DataType *type = repo.getDataType(*dt1, header_name); + ASSERT_TRUE(type); + EXPECT_EQUAL(header_name, type->getName()); + EXPECT_EQUAL(header_id, type->getId()); + + EXPECT_TRUE(repo.getDataType(*dt1, header_name)); + EXPECT_TRUE(!repo.getDataType(*dt1, field_name)); + EXPECT_TRUE(!repo.getDataType(*dt2, header_name)); +} + +TEST("requireThatInheritingDocCanRedefineIdenticalField") { + BuilderHelper builder; + + auto & pdoc = builder.document(type_name); + auto & cdoc = builder.document(derived_name); + builder.addField(pdoc, field_name).type = builder.builtin(DataType::T_STRING); + + builder.addField(cdoc, field_name).type = builder.builtin(DataType::T_STRING); + cdoc.inherits.emplace_back().idx = pdoc.idx; + + DocumentTypeRepo repo(builder.config()); + + const StructDataType &s = repo.getDocumentType(derived_name)->getFieldsType(); + ASSERT_EQUAL(1u, s.getFieldCount()); +} + +TEST("requireThatAnnotationTypesCanBeConfigured") { + const int32_t a_id = 654; + const string a_name = "annotation_name"; + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & ann = builder.addAnnotation(doc, a_name); + ann.internalid = a_id; + ann.datatype = builder.builtin(DataType::T_STRING); + + DocumentTypeRepo repo(builder.config()); + + const DocumentType *type = repo.getDocumentType(type_name); + ASSERT_TRUE(type); + const AnnotationType *a_type = repo.getAnnotationType(*type, a_id); + ASSERT_TRUE(a_type); + EXPECT_EQUAL(a_name, a_type->getName()); + ASSERT_TRUE(a_type->getDataType()); + EXPECT_EQUAL(DataType::T_STRING, a_type->getDataType()->getId()); + + a_type = repo.getAnnotationType(*type, 1); + ASSERT_TRUE(a_type); + EXPECT_EQUAL(1, a_type->getId()); + EXPECT_EQUAL("term", a_type->getName()); + a_type = repo.getAnnotationType(*type, 2); + ASSERT_TRUE(a_type); + EXPECT_EQUAL(2, a_type->getId()); + EXPECT_EQUAL("token_type", a_type->getName()); +} + +TEST("requireThatDocumentsCanUseOtherDocumentTypes") { + BuilderHelper builder; + auto &doc2 = builder.document(type_name_2); + doc2.internalid = doc_type_id + 1; + auto &doc1 = builder.document(type_name); + builder.addField(doc1, field_name).type = doc2.idx; + DocumentTypeRepo repo(builder.config()); + + const DataType &type = repo.getDocumentType(type_name)->getFieldsType() + .getField(field_name).getDataType(); + EXPECT_EQUAL(doc_type_id + 1, type.getId()); + EXPECT_TRUE(dynamic_cast<const DocumentType *>(&type)); +} + +TEST("requireThatDocumentTypesCanBeIterated") { + BuilderHelper builder; + builder.document(type_name).internalid = doc_type_id; + builder.document(type_name_2).internalid = doc_type_id + 1; + DocumentTypeRepo repo(builder.config()); + + set<int> ids; + repo.forEachDocumentType( + [&ids](const DocumentType &type) { ids.insert(type.getId()); }); + + EXPECT_EQUAL(3u, ids.size()); + ASSERT_TRUE(ids.count(DataType::T_DOCUMENT)); + ASSERT_TRUE(ids.count(doc_type_id)); + ASSERT_TRUE(ids.count(doc_type_id + 1)); +} + +TEST("requireThatDocumentLookupChecksName") { + BuilderHelper builder; + auto &doc = builder.document(type_name_2); + doc.internalid = doc_type_id; + DocumentTypeRepo repo(builder.config()); + + // "type_name" will generate the document type id + // "doc_type_id". However, this config assigns that id to a + // different type. + const DocumentType *type = repo.getDocumentType(type_name); + ASSERT_TRUE(!type); +} + +TEST("requireThatBuildFromConfigWorks") { + DocumentTypeRepo repo(readDocumenttypesConfig(TEST_PATH("types.cfg"))); + ASSERT_TRUE(repo.getDocumentType("document")); + ASSERT_TRUE(repo.getDocumentType("types")); +} + +TEST("requireThatStructsCanBeRecursive") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & st = builder.addStruct(doc, "folder"); + builder.addField(st, "subfolder").type = st.idx; + builder.addField(doc, field_name).type = st.idx; + DocumentTypeRepo repo(builder.config()); + + const StructDataType &s = getFieldDataType<StructDataType>(repo); + EXPECT_EQUAL(1u, s.getFieldCount()); + ASSERT_TRUE(s.hasField("subfolder")); + EXPECT_EQUAL(&s, &s.getField("subfolder").getDataType()); +} + +} // namespace + +TEST("requireThatMissingFileCausesException") { + EXPECT_EXCEPTION(readDocumenttypesConfig("illegal/missing_file"), + IllegalArgumentException, "Unable to open file"); +} + +TEST("requireThatFieldsCanHaveAnyDocumentType") { + BuilderHelper builder; + auto &doc1 = builder.document(type_name); + auto &doc2 = builder.document(type_name_2); + + // Circular dependency + builder.addField(doc1, field_name).type = doc2.idx; + builder.addField(doc2, field_name).type = doc1.idx; + + DocumentTypeRepo repo(builder.config()); + const DocumentType *type1 = repo.getDocumentType(type_name); + const DocumentType *type2 = repo.getDocumentType(type_name_2); + ASSERT_TRUE(type1); + EXPECT_TRUE(type1->getFieldsType().hasField(field_name)); + EXPECT_EQUAL(type2, &type1->getFieldsType().getField(field_name).getDataType()); + ASSERT_TRUE(type2); + EXPECT_TRUE(type2->getFieldsType().hasField(field_name)); + EXPECT_EQUAL(type1, &type2->getFieldsType().getField(field_name).getDataType()); +} + +TEST("Require that Array can have nested DocumentType") { + BuilderHelper builder; + auto &doc = builder.document(type_name); + auto &arr = builder.addArray(doc, doc.idx); + builder.addField(doc, field_name).type = arr.idx; + DocumentTypeRepo repo(builder.config()); + const DocumentType *type = repo.getDocumentType(type_name); + ASSERT_TRUE(type); +} + +TEST("Reference fields are resolved to correct reference type") { + const int doc_with_refs_id = 5678; + const int ref1_id = 777; + const int ref2_id = 888; + BuilderHelper builder; + auto & doc1 = builder.document(type_name); + auto & doc2 = builder.document(type_name_2); + auto & doc3 = builder.document("doc_with_refs"); + doc3.internalid = doc_with_refs_id; + auto & refT1 = builder.addDocumentRef(doc3, doc1.idx); + refT1.internalid = ref1_id; + auto & refT2 = builder.addDocumentRef(doc3, doc2.idx); + refT2.internalid = ref2_id; + builder.addField(doc3, "ref1").type = refT1.idx; + builder.addField(doc3, "ref2").type = refT2.idx; + builder.addField(doc3, "ref3").type = refT1.idx; + + DocumentTypeRepo repo(builder.config()); + const DocumentType *type = repo.getDocumentType(doc_with_refs_id); + ASSERT_TRUE(type != nullptr); + const auto* ref1_type(repo.getDataType(*type, ref1_id)); + const auto* ref2_type(repo.getDataType(*type, ref2_id)); + + EXPECT_EQUAL(*ref1_type, type->getFieldsType().getField("ref1").getDataType()); + EXPECT_EQUAL(*ref2_type, type->getFieldsType().getField("ref2").getDataType()); + EXPECT_EQUAL(*ref1_type, type->getFieldsType().getField("ref3").getDataType()); +} + +TEST("Config with no imported fields has empty imported fields set in DocumentType") { + BuilderHelper builder; + builder.document(type_name); + DocumentTypeRepo repo(builder.config()); + const auto *type = repo.getDocumentType(type_name); + ASSERT_TRUE(type != nullptr); + EXPECT_TRUE(type->imported_field_names().empty()); + EXPECT_FALSE(type->has_imported_field_name("foo")); +} + +TEST("Configured imported field names are available in the DocumentType") { + // Note: we cheat a bit by specifying imported field names in types that have no + // reference fields. Add to test if we add config read-time validation of this. :) + BuilderHelper builder; + // Type with one imported field + builder.document(type_name).importedfield.emplace_back().name = "my_cool_field"; + // Type with two imported fields + auto & doc2 = builder.document(type_name_2); + doc2.importedfield.emplace_back().name = "my_awesome_field"; + doc2.importedfield.emplace_back().name = "my_swag_field"; + + DocumentTypeRepo repo(builder.config()); + const auto* type = repo.getDocumentType(type_name); + ASSERT_TRUE(type != nullptr); + EXPECT_EQUAL(1u, type->imported_field_names().size()); + EXPECT_TRUE(type->has_imported_field_name("my_cool_field")); + EXPECT_FALSE(type->has_imported_field_name("my_awesome_field")); + + type = repo.getDocumentType(type_name_2); + ASSERT_TRUE(type != nullptr); + EXPECT_EQUAL(2u, type->imported_field_names().size()); + EXPECT_TRUE(type->has_imported_field_name("my_awesome_field")); + EXPECT_TRUE(type->has_imported_field_name("my_swag_field")); + EXPECT_FALSE(type->has_imported_field_name("my_cool_field")); +} + +namespace { + +const TensorDataType & +asTensorDataType(const DataType &dataType) { + return dynamic_cast<const TensorDataType &>(dataType); +} + +} + +TEST("Tensor fields have tensor types") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & t1t = builder.addTensorType(doc, "tensor(x[3])"); + auto & t2t = builder.addTensorType(doc, "tensor(y{})"); + builder.addField(doc, "tensor1").type = t1t.idx; + builder.addField(doc, "tensor2").type = t2t.idx; + builder.addField(doc, "tensor3").type = t1t.idx; + + DocumentTypeRepo repo(builder.config()); + auto *docType = repo.getDocumentType(type_name); + ASSERT_TRUE(docType != nullptr); + auto &tensorField1 = docType->getField("tensor1"); + auto &tensorField2 = docType->getField("tensor2"); + EXPECT_EQUAL("tensor(x[3])", asTensorDataType(tensorField1.getDataType()).getTensorType().to_spec()); + EXPECT_EQUAL("tensor(y{})", asTensorDataType(tensorField2.getDataType()).getTensorType().to_spec()); + auto &tensorField3 = docType->getField("tensor3"); + EXPECT_TRUE(&tensorField1.getDataType() == &tensorField3.getDataType()); + auto tensorFieldValue1 = tensorField1.getDataType().createFieldValue(); + EXPECT_TRUE(&tensorField1.getDataType() == tensorFieldValue1->getDataType()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/document/src/tests/repo/types.cfg b/document/src/tests/repo/types.cfg new file mode 100644 index 00000000000..cab69f4b1aa --- /dev/null +++ b/document/src/tests/repo/types.cfg @@ -0,0 +1,326 @@ +enablecompression false +usev8geopositions false +doctype[0].name "document" +doctype[0].idx 10000 +doctype[0].internalid 8 +doctype[0].contentstruct 10001 +doctype[0].primitivetype[0].idx 10002 +doctype[0].primitivetype[0].name "bool" +doctype[0].primitivetype[1].idx 10003 +doctype[0].primitivetype[1].name "byte" +doctype[0].primitivetype[2].idx 10004 +doctype[0].primitivetype[2].name "double" +doctype[0].primitivetype[3].idx 10005 +doctype[0].primitivetype[3].name "float" +doctype[0].primitivetype[4].idx 10006 +doctype[0].primitivetype[4].name "float16" +doctype[0].primitivetype[5].idx 10007 +doctype[0].primitivetype[5].name "int" +doctype[0].primitivetype[6].idx 10008 +doctype[0].primitivetype[6].name "long" +doctype[0].primitivetype[7].idx 10010 +doctype[0].primitivetype[7].name "predicate" +doctype[0].primitivetype[8].idx 10011 +doctype[0].primitivetype[8].name "raw" +doctype[0].primitivetype[9].idx 10012 +doctype[0].primitivetype[9].name "string" +doctype[0].primitivetype[10].idx 10014 +doctype[0].primitivetype[10].name "uri" +doctype[0].wsettype[0].idx 10013 +doctype[0].wsettype[0].elementtype 10012 +doctype[0].wsettype[0].createifnonexistent true +doctype[0].wsettype[0].removeifzero true +doctype[0].wsettype[0].internalid 18 +doctype[0].structtype[0].idx 10001 +doctype[0].structtype[0].name "document.header" +doctype[0].structtype[0].internalid -284186494 +doctype[0].structtype[1].idx 10009 +doctype[0].structtype[1].name "position" +doctype[0].structtype[1].field[0].name "x" +doctype[0].structtype[1].field[0].internalid 914677694 +doctype[0].structtype[1].field[0].type 10007 +doctype[0].structtype[1].field[1].name "y" +doctype[0].structtype[1].field[1].internalid 900009410 +doctype[0].structtype[1].field[1].type 10007 +doctype[0].structtype[1].internalid 1381038251 +doctype[1].name "types" +doctype[1].idx 10015 +doctype[1].internalid -853072901 +doctype[1].inherits[0].idx 10000 +doctype[1].contentstruct 10016 +doctype[1].fieldsets{[document]}.fields[0] "Folders" +doctype[1].fieldsets{[document]}.fields[1] "abool" +doctype[1].fieldsets{[document]}.fields[2] "abyte" +doctype[1].fieldsets{[document]}.fields[3] "album0" +doctype[1].fieldsets{[document]}.fields[4] "album1" +doctype[1].fieldsets{[document]}.fields[5] "along" +doctype[1].fieldsets{[document]}.fields[6] "arrarr" +doctype[1].fieldsets{[document]}.fields[7] "arrayfield" +doctype[1].fieldsets{[document]}.fields[8] "arraymapfield" +doctype[1].fieldsets{[document]}.fields[9] "ashortfloat" +doctype[1].fieldsets{[document]}.fields[10] "complexarray" +doctype[1].fieldsets{[document]}.fields[11] "doublemapfield" +doctype[1].fieldsets{[document]}.fields[12] "floatmapfield" +doctype[1].fieldsets{[document]}.fields[13] "intmapfield" +doctype[1].fieldsets{[document]}.fields[14] "juletre" +doctype[1].fieldsets{[document]}.fields[15] "longmapfield" +doctype[1].fieldsets{[document]}.fields[16] "maparr" +doctype[1].fieldsets{[document]}.fields[17] "mystructarr" +doctype[1].fieldsets{[document]}.fields[18] "mystructfield" +doctype[1].fieldsets{[document]}.fields[19] "mystructmap" +doctype[1].fieldsets{[document]}.fields[20] "setfield" +doctype[1].fieldsets{[document]}.fields[21] "setfield2" +doctype[1].fieldsets{[document]}.fields[22] "setfield3" +doctype[1].fieldsets{[document]}.fields[23] "setfield4" +doctype[1].fieldsets{[document]}.fields[24] "stringmapfield" +doctype[1].fieldsets{[document]}.fields[25] "structarrayfield" +doctype[1].fieldsets{[document]}.fields[26] "structfield" +doctype[1].fieldsets{[document]}.fields[27] "tagfield" +doctype[1].arraytype[0].idx 10017 +doctype[1].arraytype[0].elementtype 10007 +doctype[1].arraytype[0].internalid -1245117006 +doctype[1].arraytype[1].idx 10024 +doctype[1].arraytype[1].elementtype 10023 +doctype[1].arraytype[1].internalid -1244829667 +doctype[1].arraytype[2].idx 10031 +doctype[1].arraytype[2].elementtype 10007 +doctype[1].arraytype[2].internalid -1245117006 +doctype[1].arraytype[3].idx 10032 +doctype[1].arraytype[3].elementtype 10033 +doctype[1].arraytype[3].internalid -794985308 +doctype[1].arraytype[4].idx 10033 +doctype[1].arraytype[4].elementtype 10034 +doctype[1].arraytype[4].internalid 1707615575 +doctype[1].arraytype[5].idx 10034 +doctype[1].arraytype[5].elementtype 10012 +doctype[1].arraytype[5].internalid -1486737430 +doctype[1].arraytype[6].idx 10035 +doctype[1].arraytype[6].elementtype 10036 +doctype[1].arraytype[6].internalid 69621385 +doctype[1].arraytype[7].idx 10037 +doctype[1].arraytype[7].elementtype 10038 +doctype[1].arraytype[7].internalid 1416345047 +doctype[1].arraytype[8].idx 10039 +doctype[1].arraytype[8].elementtype 10040 +doctype[1].arraytype[8].internalid 1707615575 +doctype[1].arraytype[9].idx 10040 +doctype[1].arraytype[9].elementtype 10012 +doctype[1].arraytype[9].internalid -1486737430 +doctype[1].arraytype[10].idx 10042 +doctype[1].arraytype[10].elementtype 10003 +doctype[1].arraytype[10].internalid 49942803 +doctype[1].arraytype[11].idx 10045 +doctype[1].arraytype[11].elementtype 10041 +doctype[1].arraytype[11].internalid 759956026 +doctype[1].maptype[0].idx 10025 +doctype[1].maptype[0].keytype 10012 +doctype[1].maptype[0].valuetype 10012 +doctype[1].maptype[0].internalid 339965458 +doctype[1].maptype[1].idx 10026 +doctype[1].maptype[1].keytype 10012 +doctype[1].maptype[1].valuetype 10007 +doctype[1].maptype[1].internalid -1584287606 +doctype[1].maptype[2].idx 10027 +doctype[1].maptype[2].keytype 10012 +doctype[1].maptype[2].valuetype 10005 +doctype[1].maptype[2].internalid 2125154557 +doctype[1].maptype[3].idx 10028 +doctype[1].maptype[3].keytype 10007 +doctype[1].maptype[3].valuetype 10008 +doctype[1].maptype[3].internalid -1715531035 +doctype[1].maptype[4].idx 10029 +doctype[1].maptype[4].keytype 10007 +doctype[1].maptype[4].valuetype 10004 +doctype[1].maptype[4].internalid 2138385264 +doctype[1].maptype[5].idx 10030 +doctype[1].maptype[5].keytype 10012 +doctype[1].maptype[5].valuetype 10031 +doctype[1].maptype[5].internalid 435886609 +doctype[1].maptype[6].idx 10036 +doctype[1].maptype[6].keytype 10012 +doctype[1].maptype[6].valuetype 10012 +doctype[1].maptype[6].internalid 339965458 +doctype[1].maptype[7].idx 10038 +doctype[1].maptype[7].keytype 10007 +doctype[1].maptype[7].valuetype 10039 +doctype[1].maptype[7].internalid -372512406 +doctype[1].maptype[8].idx 10043 +doctype[1].maptype[8].keytype 10012 +doctype[1].maptype[8].valuetype 10012 +doctype[1].maptype[8].internalid 339965458 +doctype[1].maptype[9].idx 10044 +doctype[1].maptype[9].keytype 10007 +doctype[1].maptype[9].valuetype 10041 +doctype[1].maptype[9].internalid 1901258752 +doctype[1].maptype[10].idx 10046 +doctype[1].maptype[10].keytype 10007 +doctype[1].maptype[10].valuetype 10047 +doctype[1].maptype[10].internalid -389833101 +doctype[1].maptype[11].idx 10048 +doctype[1].maptype[11].keytype 10012 +doctype[1].maptype[11].valuetype 10008 +doctype[1].maptype[11].internalid -1865479609 +doctype[1].wsettype[0].idx 10018 +doctype[1].wsettype[0].elementtype 10012 +doctype[1].wsettype[0].createifnonexistent false +doctype[1].wsettype[0].removeifzero false +doctype[1].wsettype[0].internalid 1328286588 +doctype[1].wsettype[1].idx 10019 +doctype[1].wsettype[1].elementtype 10012 +doctype[1].wsettype[1].createifnonexistent true +doctype[1].wsettype[1].removeifzero true +doctype[1].wsettype[1].internalid 18 +doctype[1].wsettype[2].idx 10020 +doctype[1].wsettype[2].elementtype 10012 +doctype[1].wsettype[2].createifnonexistent false +doctype[1].wsettype[2].removeifzero true +doctype[1].wsettype[2].internalid 2125328771 +doctype[1].wsettype[3].idx 10021 +doctype[1].wsettype[3].elementtype 10012 +doctype[1].wsettype[3].createifnonexistent true +doctype[1].wsettype[3].removeifzero false +doctype[1].wsettype[3].internalid 2065577986 +doctype[1].wsettype[4].idx 10022 +doctype[1].wsettype[4].elementtype 10012 +doctype[1].wsettype[4].createifnonexistent true +doctype[1].wsettype[4].removeifzero true +doctype[1].wsettype[4].internalid 18 +doctype[1].wsettype[5].idx 10049 +doctype[1].wsettype[5].elementtype 10012 +doctype[1].wsettype[5].createifnonexistent true +doctype[1].wsettype[5].removeifzero true +doctype[1].wsettype[5].internalid 18 +doctype[1].wsettype[6].idx 10050 +doctype[1].wsettype[6].elementtype 10012 +doctype[1].wsettype[6].createifnonexistent true +doctype[1].wsettype[6].removeifzero true +doctype[1].wsettype[6].internalid 18 +doctype[1].structtype[0].idx 10023 +doctype[1].structtype[0].name "sct" +doctype[1].structtype[0].field[0].name "s1" +doctype[1].structtype[0].field[0].internalid 2146820765 +doctype[1].structtype[0].field[0].type 10012 +doctype[1].structtype[0].field[1].name "s2" +doctype[1].structtype[0].field[1].internalid 45366795 +doctype[1].structtype[0].field[1].type 10012 +doctype[1].structtype[0].internalid 109267174 +doctype[1].structtype[1].idx 10041 +doctype[1].structtype[1].name "mystruct" +doctype[1].structtype[1].field[0].name "bytearr" +doctype[1].structtype[1].field[0].internalid 1079701754 +doctype[1].structtype[1].field[0].type 10042 +doctype[1].structtype[1].field[1].name "mymap" +doctype[1].structtype[1].field[1].internalid 1954178122 +doctype[1].structtype[1].field[1].type 10043 +doctype[1].structtype[1].field[2].name "title" +doctype[1].structtype[1].field[2].internalid 567626448 +doctype[1].structtype[1].field[2].type 10012 +doctype[1].structtype[1].field[3].name "structfield" +doctype[1].structtype[1].field[3].internalid 1726890940 +doctype[1].structtype[1].field[3].type 10012 +doctype[1].structtype[1].internalid -2092985853 +doctype[1].structtype[2].idx 10047 +doctype[1].structtype[2].name "folder" +doctype[1].structtype[2].field[0].name "Version" +doctype[1].structtype[2].field[0].internalid 64430502 +doctype[1].structtype[2].field[0].type 10007 +doctype[1].structtype[2].field[1].name "Name" +doctype[1].structtype[2].field[1].internalid 2002760220 +doctype[1].structtype[2].field[1].type 10012 +doctype[1].structtype[2].field[2].name "FlagsCounter" +doctype[1].structtype[2].field[2].internalid 1741227606 +doctype[1].structtype[2].field[2].type 10048 +doctype[1].structtype[2].field[3].name "anotherfolder" +doctype[1].structtype[2].field[3].internalid 1582421848 +doctype[1].structtype[2].field[3].type 10047 +doctype[1].structtype[2].internalid 294108848 +doctype[1].structtype[3].idx 10016 +doctype[1].structtype[3].name "types.header" +doctype[1].structtype[3].field[0].name "abyte" +doctype[1].structtype[3].field[0].internalid 110138156 +doctype[1].structtype[3].field[0].type 10003 +doctype[1].structtype[3].field[1].name "along" +doctype[1].structtype[3].field[1].internalid 1206464520 +doctype[1].structtype[3].field[1].type 10008 +doctype[1].structtype[3].field[2].name "abool" +doctype[1].structtype[3].field[2].internalid 492328000 +doctype[1].structtype[3].field[2].type 10002 +doctype[1].structtype[3].field[3].name "ashortfloat" +doctype[1].structtype[3].field[3].internalid 1012106297 +doctype[1].structtype[3].field[3].type 10006 +doctype[1].structtype[3].field[4].name "arrayfield" +doctype[1].structtype[3].field[4].internalid 965790107 +doctype[1].structtype[3].field[4].type 10017 +doctype[1].structtype[3].field[5].name "setfield" +doctype[1].structtype[3].field[5].internalid 761581914 +doctype[1].structtype[3].field[5].type 10018 +doctype[1].structtype[3].field[6].name "setfield2" +doctype[1].structtype[3].field[6].internalid 1066659198 +doctype[1].structtype[3].field[6].type 10019 +doctype[1].structtype[3].field[7].name "setfield3" +doctype[1].structtype[3].field[7].internalid 1180155772 +doctype[1].structtype[3].field[7].type 10020 +doctype[1].structtype[3].field[8].name "setfield4" +doctype[1].structtype[3].field[8].internalid 1254131631 +doctype[1].structtype[3].field[8].type 10021 +doctype[1].structtype[3].field[9].name "tagfield" +doctype[1].structtype[3].field[9].internalid 1653562069 +doctype[1].structtype[3].field[9].type 10022 +doctype[1].structtype[3].field[10].name "structfield" +doctype[1].structtype[3].field[10].internalid 486207386 +doctype[1].structtype[3].field[10].type 10023 +doctype[1].structtype[3].field[11].name "structarrayfield" +doctype[1].structtype[3].field[11].internalid 335048518 +doctype[1].structtype[3].field[11].type 10024 +doctype[1].structtype[3].field[12].name "stringmapfield" +doctype[1].structtype[3].field[12].internalid 117465687 +doctype[1].structtype[3].field[12].type 10025 +doctype[1].structtype[3].field[13].name "intmapfield" +doctype[1].structtype[3].field[13].internalid 121004462 +doctype[1].structtype[3].field[13].type 10026 +doctype[1].structtype[3].field[14].name "floatmapfield" +doctype[1].structtype[3].field[14].internalid 1239120925 +doctype[1].structtype[3].field[14].type 10027 +doctype[1].structtype[3].field[15].name "longmapfield" +doctype[1].structtype[3].field[15].internalid 477718745 +doctype[1].structtype[3].field[15].type 10028 +doctype[1].structtype[3].field[16].name "doublemapfield" +doctype[1].structtype[3].field[16].internalid 877047192 +doctype[1].structtype[3].field[16].type 10029 +doctype[1].structtype[3].field[17].name "arraymapfield" +doctype[1].structtype[3].field[17].internalid 1670805928 +doctype[1].structtype[3].field[17].type 10030 +doctype[1].structtype[3].field[18].name "arrarr" +doctype[1].structtype[3].field[18].internalid 1962567166 +doctype[1].structtype[3].field[18].type 10032 +doctype[1].structtype[3].field[19].name "maparr" +doctype[1].structtype[3].field[19].internalid 904375219 +doctype[1].structtype[3].field[19].type 10035 +doctype[1].structtype[3].field[20].name "complexarray" +doctype[1].structtype[3].field[20].internalid 795629533 +doctype[1].structtype[3].field[20].type 10037 +doctype[1].structtype[3].field[21].name "mystructfield" +doctype[1].structtype[3].field[21].internalid 1348513378 +doctype[1].structtype[3].field[21].type 10041 +doctype[1].structtype[3].field[22].name "mystructmap" +doctype[1].structtype[3].field[22].internalid 1511423250 +doctype[1].structtype[3].field[22].type 10044 +doctype[1].structtype[3].field[23].name "mystructarr" +doctype[1].structtype[3].field[23].internalid 595856991 +doctype[1].structtype[3].field[23].type 10045 +doctype[1].structtype[3].field[24].name "Folders" +doctype[1].structtype[3].field[24].internalid 34575524 +doctype[1].structtype[3].field[24].type 10046 +doctype[1].structtype[3].field[25].name "juletre" +doctype[1].structtype[3].field[25].internalid 1039981530 +doctype[1].structtype[3].field[25].type 10008 +doctype[1].structtype[3].field[26].name "album0" +doctype[1].structtype[3].field[26].internalid 764312262 +doctype[1].structtype[3].field[26].type 10049 +doctype[1].structtype[3].field[27].name "album1" +doctype[1].structtype[3].field[27].internalid 1967160809 +doctype[1].structtype[3].field[27].type 10050 +doctype[1].structtype[3].field[28].name "other" +doctype[1].structtype[3].field[28].internalid 2443357 +doctype[1].structtype[3].field[28].type 10008 +doctype[1].structtype[3].internalid 1328581348 diff --git a/document/src/vespa/document/repo/documenttyperepo.cpp b/document/src/vespa/document/repo/documenttyperepo.cpp index 312ce027543..3635574f370 100644 --- a/document/src/vespa/document/repo/documenttyperepo.cpp +++ b/document/src/vespa/document/repo/documenttyperepo.cpp @@ -17,6 +17,7 @@ #include <vespa/document/config/config-documenttypes.h> #include <fstream> #include <cassert> +#include <set> #include <vespa/log/log.h> LOG_SETUP(".documenttyperepo"); @@ -74,7 +75,7 @@ public: void inherit(const Repo &parent); bool addDataType(const DataType &type); - template <typename T> void addDataType(unique_ptr<T> type); + template <typename T> const DataType * addDataType(unique_ptr<T> type); const DataType &addTensorType(const string &spec); const DataType *lookup(int32_t id) const; @@ -108,14 +109,17 @@ bool Repo::addDataType(const DataType &type) { } data_type = &type; data_type_by_name = &type; + LOG(spam, "Added data type to repo: %s [%d]", type.getName().c_str(), type.getId()); return true; } template <typename T> -void Repo::addDataType(unique_ptr<T> type) { +const DataType* Repo::addDataType(unique_ptr<T> type) { + int id = type->getId(); if (addDataType(*type)) { _owned_types.push_back(type.release()); } + return _types[id]; } @@ -502,6 +506,329 @@ void configureAllRepos(const DocumenttypesConfig::DocumenttypeVector &t, Documen } } +using DataTypesByIdx = hash_map<int, const DataType *>; +using StructTypesByIdx = hash_map<int, StructDataType *>; + +const StructDataType * performStructInherit(int idx, + const DocumenttypesConfig::DoctypeVector &t, + const StructTypesByIdx &structs) +{ + for (const auto & docT : t) { + for (const auto & structT : docT.structtype) { + if (idx == structT.idx) { + StructDataType *st = structs[idx]; + for (const auto & inheritD : structT.inherits) { + const auto * parent = performStructInherit(inheritD.type, t, structs); + if (parent == nullptr) { + LOG(error, "Missing parent type [idx %d] for struct %s", + inheritD.type, structT.name.c_str()); + throw IllegalArgumentException("missing parent type"); + } + LOG_ASSERT(st != nullptr); + for (const auto & field : parent->getFieldSet()) { + st->addInheritedField(*field); + } + } + return st; + } + } + } + return nullptr; +} + +void configureDocTypes(const DocumenttypesConfig::DoctypeVector &t, DocumentTypeMap &type_map) { + hash_map<int, StructDataType *> structs_by_idx; + hash_map<int, AnnotationType *> annotations_by_idx; + DataTypesByIdx types_by_idx; + std::set<int> needed_indexes; + for (const auto & docT : t) { + for (const auto & structT : docT.structtype) { + for (const auto & fieldD : structT.field) { + LOG(debug, "doc %s struct %s field %s needs [idx %d]", + docT.name.c_str(), structT.name.c_str(), fieldD.name.c_str(), fieldD.type); + needed_indexes.insert(fieldD.type); + } + } + for (const auto & arrT : docT.arraytype) { + LOG(debug, "doc %s array needs [idx %d]", docT.name.c_str(),arrT.elementtype); + needed_indexes.insert(arrT.elementtype); + } + for (const auto & wsetT : docT.wsettype) { + LOG(debug, "doc %s wset needs [idx %d]", docT.name.c_str(), wsetT.elementtype); + needed_indexes.insert(wsetT.elementtype); + } + for (const auto & mapT : docT.maptype) { + LOG(debug, "doc %s wset needs [idx %d] and [idx %d]", + docT.name.c_str(), mapT.keytype, mapT.valuetype); + needed_indexes.insert(mapT.keytype); + needed_indexes.insert(mapT.valuetype); + } + for (const auto & annT: docT.annotationtype) { + if (annT.datatype != -1) { + LOG(debug, "doc %s ann needs datatype [idx %d]", docT.name.c_str(), annT.datatype); + needed_indexes.insert(annT.datatype); + } + for (const auto & inheritD : annT.inherits) { + LOG(debug, "doc %s ann needs parent [idx %d]", docT.name.c_str(), inheritD.idx); + needed_indexes.insert(inheritD.idx); + } + } + for (const auto & aRef : docT.annotationref) { + LOG(debug, "doc %s ann ref needs annotation [idx %d]", docT.name.c_str(), aRef.annotationtype); + needed_indexes.insert(aRef.annotationtype); + } + for (const auto & refT : docT.documentref) { + LOG(debug, "doc %s doc ref needs target [idx %d]", docT.name.c_str(), refT.targettype); + needed_indexes.insert(refT.targettype); + } + } + for (const auto & docT : t) { + DataTypeRepo * dtr = FindPtr(type_map, docT.internalid); + if (dtr == nullptr) { + dtr = new DataTypeRepo(); + type_map[docT.internalid] = dtr; + LOG(debug, "new doct : %s [%d]", docT.name.c_str(), docT.internalid); + } else { + LOG(debug, "old doct : %s [%d]", docT.name.c_str(), docT.internalid); + } + auto & repo = dtr->repo; + for (const auto & structT : docT.structtype) { + if (const auto * dt = repo.lookup(structT.internalid)) { + LOG(debug, "already has %s [%d], wanted to add %s [%d]", + dt->getName().c_str(), dt->getId(), + structT.name.c_str(), structT.internalid); + types_by_idx[structT.idx] = dt; + needed_indexes.erase(structT.idx); + continue; + } + auto st = std::make_unique<StructDataType>(structT.name, structT.internalid); + needed_indexes.erase(structT.idx); + structs_by_idx[structT.idx] = st.get(); + types_by_idx[structT.idx] = repo.addDataType(std::move(st)); + assert(types_by_idx[structT.idx] == structs_by_idx[structT.idx]); + } + if (dtr->doc_type == nullptr) { + const auto * contentStruct = types_by_idx[docT.contentstruct]; + const auto * fields = dynamic_cast<const StructDataType *>(contentStruct); + if (fields != nullptr) { + dtr->doc_type = new DocumentType(docT.name, docT.internalid, *fields); + } else { + LOG(error, "Missing content struct for '%s' (idx %d not found)", + docT.name.c_str(), docT.contentstruct); + throw IllegalArgumentException("missing content struct"); + } + for (const auto & inheritD : docT.inherits) { + const DataType *dt = types_by_idx[inheritD.idx]; + if (dt == nullptr) { + LOG(error, "parent datatype [idx %d] missing for document %s", + inheritD.idx, docT.name.c_str()); + throw IllegalArgumentException("Unable to find document for inheritance"); + continue; + } + DataTypeRepo * parentRepo = FindPtr(type_map, dt->getId()); + if (parentRepo == nullptr) { + LOG(error, "parent repo [id %d] missing for document %s", + dt->getId(), docT.name.c_str()); + throw IllegalArgumentException("missing parent repo"); + continue; + } + dtr->annotations.inherit(parentRepo->annotations); + } + } + types_by_idx[docT.idx] = dtr->doc_type; + needed_indexes.erase(docT.idx); + for (const auto & primT : docT.primitivetype) { + string name = primT.name; + const DataType *dt = repo.lookup(name); + if (dt == nullptr) { + if (name == "float16") { + name = "float"; + } + name[0] = (name[0] & 0x5F); + dt = repo.lookup(name); + } + if (dt == nullptr) { + LOG(warning, "Missing primitive type '%s'", primT.name.c_str()); + } else { + types_by_idx[primT.idx] = dt; + needed_indexes.erase(primT.idx); + } + } + for (const auto & tensorT : docT.tensortype) { + const DataType & tt = repo.addTensorType(tensorT.detailedtype); + types_by_idx[tensorT.idx] = &tt; + needed_indexes.erase(tensorT.idx); + } + for (const auto & annT: docT.annotationtype) { + auto at = std::make_unique<AnnotationType>(annT.internalid, annT.name); + annotations_by_idx[annT.idx] = at.get(); + needed_indexes.erase(annT.idx); + dtr->annotations.addAnnotationType(std::move(at)); + } + } + for (const auto & docT : t) { + DataTypeRepo * dtr = FindPtr(type_map, docT.internalid); + LOG_ASSERT(dtr != nullptr); + auto & repo = dtr->repo; + for (const auto & refT : docT.documentref) { + if (types_by_idx[refT.idx] != nullptr) { + continue; + } + const auto * target = dynamic_cast<const DocumentType *>(types_by_idx[refT.targettype]); + if (target == nullptr) { + LOG(error, "Missing target document type for reference (idx %d)", refT.targettype); + throw IllegalArgumentException("missing target type"); + } else { + auto rt = std::make_unique<ReferenceDataType>(*target, refT.internalid); + needed_indexes.erase(refT.idx); + types_by_idx[refT.idx] = repo.addDataType(std::move(rt)); + } + } + for (const auto & aRef : docT.annotationref) { + const AnnotationType * target = annotations_by_idx[aRef.annotationtype]; + if (target == nullptr) { + LOG(error, "Missing annotation type [idx %d] for annotationref", + aRef.annotationtype); + throw IllegalArgumentException("missing annotation type"); + } else { + auto ar = std::make_unique<AnnotationReferenceDataType>(*target, aRef.internalid); + needed_indexes.erase(aRef.idx); + types_by_idx[aRef.idx] = repo.addDataType(std::move(ar)); + } + } + } + while (needed_indexes.size() > 0) { + size_t missing_cnt = needed_indexes.size(); + bool missing = false; + for (const auto & docT : t) { + DataTypeRepo * dtr = FindPtr(type_map, docT.internalid); + LOG_ASSERT(dtr != nullptr); + auto & repo = dtr->repo; + for (const auto & arrT : docT.arraytype) { + if (types_by_idx[arrT.idx] != nullptr) { + continue; // OK already + } + const DataType * nested = types_by_idx[arrT.elementtype]; + if (nested == nullptr) { + missing = true; + } else { + auto at = std::make_unique<ArrayDataType>(*nested, arrT.internalid); + needed_indexes.erase(arrT.idx); + types_by_idx[arrT.idx] = repo.addDataType(std::move(at)); + } + } + for (const auto & mapT : docT.maptype) { + if (types_by_idx[mapT.idx] != nullptr) { + continue; // OK already + } + const DataType * kt = types_by_idx[mapT.keytype]; + const DataType * vt = types_by_idx[mapT.valuetype]; + if (kt == nullptr || vt == nullptr) { + missing = true; + } else { + auto mt = std::make_unique<MapDataType>(*kt, *vt, mapT.internalid); + needed_indexes.erase(mapT.idx); + types_by_idx[mapT.idx] = repo.addDataType(std::move(mt)); + } + } + for (const auto & wsetT : docT.wsettype) { + if (types_by_idx[wsetT.idx] != nullptr) { + continue; // OK already + } + const DataType * nested = types_by_idx[wsetT.elementtype]; + if (nested == nullptr) { + missing = true; + } else { + auto wt = std::make_unique<WeightedSetDataType>(*nested, + wsetT.createifnonexistent, wsetT.removeifzero, + wsetT.internalid); + needed_indexes.erase(wsetT.idx); + types_by_idx[wsetT.idx] = repo.addDataType(std::move(wt)); + } + } + } + if (missing) { + LOG(debug, "retry complex types, %zd missing", needed_indexes.size()); + } + if (needed_indexes.size() == missing_cnt) { + for (int idx : needed_indexes) { + LOG(error, "no progress, datatype [idx %d] still missing", idx); + throw IllegalArgumentException("no progress"); + } + break; + } + } + for (const auto & docT : t) { + for (const auto & structT : docT.structtype) { + auto st = structs_by_idx[structT.idx]; + if (st == nullptr) continue; + for (const auto & fieldD : structT.field) { + const DataType *ft = types_by_idx[fieldD.type]; + if (ft == nullptr) { + LOG(error, "Missing type [idx %d] for struct %s field %s", + fieldD.type, structT.name.c_str(), fieldD.name.c_str()); + throw IllegalArgumentException("missing datatype"); + } else { + st->addField(Field(fieldD.name, fieldD.internalid, *ft)); + } + } + } + } + for (const auto & docT : t) { + for (const auto & structT : docT.structtype) { + performStructInherit(structT.idx, t, structs_by_idx); + } + } + for (const auto & docT : t) { + for (const auto & annT: docT.annotationtype) { + if (annT.datatype != -1) { + const DataType * dt = types_by_idx[annT.datatype]; + if (dt == nullptr) { + LOG(error, "Missing datatype [idx %d] for annotation type %s", + annT.datatype, annT.name.c_str()); + throw IllegalArgumentException("missing datatype"); + } else { + AnnotationType * at = annotations_by_idx[annT.idx]; + at->setDataType(*dt); + } + } + for (const auto & inheritD : annT.inherits) { + AnnotationType * at = annotations_by_idx[annT.idx]; + LOG_ASSERT(at != nullptr); + const AnnotationType * parent = annotations_by_idx[inheritD.idx]; + if (parent == nullptr) { + LOG(error, "missing parent [idx %d] for annotation %s", + inheritD.idx, annT.name.c_str()); + throw IllegalArgumentException("missing parent"); + } + } + } + DataTypeRepo * dtr = FindPtr(type_map, docT.internalid); + LOG_ASSERT(dtr != nullptr); + for (const auto & importD : docT.importedfield) { + dtr->doc_type->add_imported_field_name(importD.name); + } + for (const auto & entry : docT.fieldsets) { + DocumentType::FieldSet::Fields fields; + for (const auto& f : entry.second.fields) { + fields.insert(f); + } + dtr->doc_type->addFieldSet(entry.first, fields); + } + for (const auto & inheritD : docT.inherits) { + const DataType *dt = types_by_idx[inheritD.idx]; + const DocumentType * parent = dynamic_cast<const DocumentType *>(dt); + if (parent == nullptr) { + LOG(error, "missing parent type [idx %d] for document %s", + inheritD.idx, docT.name.c_str()); + throw IllegalArgumentException("missing parent type"); + } else { + dtr->doc_type->inherit(*parent); + } + } + } +} + } // namespace DocumentTypeRepo::DocumentTypeRepo() : @@ -527,9 +854,13 @@ DocumentTypeRepo::DocumentTypeRepo(const DocumenttypesConfig &config) : _default(addDefaultDocument(*_doc_types)) { try { + if (config.documenttype.empty() && ! config.doctype.empty()) { + configureDocTypes(config.doctype, *_doc_types); + } else { createAllDocumentTypes(config.documenttype, *_doc_types); addAllDocumentTypesToRepos(*_doc_types); configureAllRepos(config.documenttype, *_doc_types); + } } catch (...) { DeleteMapContent(*_doc_types); throw; |