diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2022-04-01 13:52:37 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-01 13:52:37 +0200 |
commit | f135ba6e2817a7819ae81591e8f73913d1e79355 (patch) | |
tree | 575ed5141b7214ae1fd23f842aadb044abd7f17f | |
parent | f51a32337039c30da84826892d3cea2c306b8368 (diff) | |
parent | 62a3a9c55fde4b8877f53c790e2ca47a534e3259 (diff) |
Merge pull request #21920 from vespa-engine/arnej/alternative-documenttypes-config
Arnej/alternative documenttypes config
-rw-r--r-- | config-model/src/main/java/com/yahoo/vespa/configmodel/producers/DocumentTypes.java | 290 | ||||
-rw-r--r-- | document/src/tests/repo/CMakeLists.txt | 8 | ||||
-rw-r--r-- | document/src/tests/repo/doctype_config_test.cpp | 662 | ||||
-rw-r--r-- | document/src/tests/repo/types.cfg | 326 | ||||
-rw-r--r-- | document/src/vespa/document/config/documenttypes.def | 175 | ||||
-rw-r--r-- | document/src/vespa/document/repo/documenttyperepo.cpp | 506 |
6 files changed, 1962 insertions, 5 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/configmodel/producers/DocumentTypes.java b/config-model/src/main/java/com/yahoo/vespa/configmodel/producers/DocumentTypes.java index e1a28c8114f..630e9f0c097 100644 --- a/config-model/src/main/java/com/yahoo/vespa/configmodel/producers/DocumentTypes.java +++ b/config-model/src/main/java/com/yahoo/vespa/configmodel/producers/DocumentTypes.java @@ -27,6 +27,11 @@ public class DocumentTypes { } public DocumenttypesConfig.Builder produce(DocumentModel model, DocumenttypesConfig.Builder builder) { + /* later: + if (some flag) { + return produceDocTypes(model, builder); + } + */ builder.usev8geopositions(this.useV8GeoPositions); Map<NewDocumentType.Name, NewDocumentType> produced = new HashMap<>(); for (NewDocumentType documentType : model.getDocumentManager().getTypes()) { @@ -236,4 +241,289 @@ public class DocumentTypes { documentBuilder.referencetype(refBuilder); } + // Alternate (new) way to build config: + + private DocumenttypesConfig.Builder produceDocTypes(DocumentModel model, DocumenttypesConfig.Builder builder) { + builder.usev8geopositions(this.useV8GeoPositions); + Map<NewDocumentType.Name, NewDocumentType> produced = new HashMap<>(); + var indexMap = new IdxMap(); + for (NewDocumentType documentType : model.getDocumentManager().getTypes()) { + docTypeInheritOrder(documentType, builder, produced, indexMap); + } + indexMap.verifyAllDone(); + return builder; + } + + private void docTypeInheritOrder(NewDocumentType documentType, + DocumenttypesConfig.Builder builder, + Map<NewDocumentType.Name, NewDocumentType> produced, + IdxMap indexMap) + { + if (! produced.containsKey(documentType.getFullName())) { + for (NewDocumentType inherited : documentType.getInherited()) { + docTypeInheritOrder(inherited, builder, produced, indexMap); + } + docTypeBuild(documentType, builder, indexMap); + produced.put(documentType.getFullName(), documentType); + } + } + + static private class IdxMap { + private Map<Integer, Boolean> doneMap = new HashMap<>(); + private Map<Object, Integer> map = new IdentityHashMap<>(); + void add(Object someType) { + assert(someType != null); + // the adding of "10000" here is mostly to make it more + // unique to grep for when debugging + int nextIdx = 10000 + map.size(); + map.computeIfAbsent(someType, k -> nextIdx); + } + int idxOf(Object someType) { + if (someType instanceof DocumentType) { + var dt = (DocumentType) someType; + if (dt.getId() == 8) { + return idxOf(VespaDocumentType.INSTANCE); + } + } + add(someType); + return map.get(someType); + } + boolean isDone(Object someType) { + return doneMap.computeIfAbsent(idxOf(someType), k -> false); + } + void setDone(Object someType) { + assert(! isDone(someType)); + doneMap.put(idxOf(someType), true); + } + void verifyAllDone() { + for (var entry : map.entrySet()) { + Object needed = entry.getKey(); + if (! isDone(needed)) { + throw new IllegalArgumentException("Could not generate config for all needed types, missing: " + + needed + " of class " + needed.getClass()); + } + } + } + } + + private void docTypeBuild(NewDocumentType documentType, DocumenttypesConfig.Builder builder, IdxMap indexMap) { + DocumenttypesConfig.Doctype.Builder db = new DocumenttypesConfig.Doctype.Builder(); + db. + idx(indexMap.idxOf(documentType)). + name(documentType.getName()). + internalid(documentType.getId()). + contentstruct(indexMap.idxOf(documentType.getContentStruct())); + docTypeBuildFieldSets(documentType.getFieldSets(), db); + docTypeBuildImportedFields(documentType.getImportedFieldNames(), db); + for (NewDocumentType inherited : documentType.getInherited()) { + db.inherits(b -> b.idx(indexMap.idxOf(inherited))); + } + docTypeBuildAnyType(documentType.getContentStruct(), db, indexMap); + + for (DataType dt : sortedList(documentType.getAllTypes().getTypes(), + (a,b) -> a.getName().compareTo(b.getName()))) { + docTypeBuildAnyType(dt, db, indexMap); + } + for (AnnotationType ann : sortedList(documentType.getAnnotations(), + (a,b) -> a.getName().compareTo(b.getName()))) { + docTypeBuildAnnotationType(ann, db, indexMap); + } + builder.doctype(db); + indexMap.setDone(documentType); + } + + private void docTypeBuildFieldSets(Set<FieldSet> fieldSets, DocumenttypesConfig.Doctype.Builder db) { + for (FieldSet fs : fieldSets) { + docTypeBuildOneFieldSet(fs, db); + } + } + + private void docTypeBuildOneFieldSet(FieldSet fs, DocumenttypesConfig.Doctype.Builder db) { + db.fieldsets(fs.getName(), new DocumenttypesConfig.Doctype.Fieldsets.Builder().fields(fs.getFieldNames())); + } + + private void docTypeBuildAnnotationType(AnnotationType annotation, DocumenttypesConfig.Doctype.Builder builder, IdxMap indexMap) { + if (indexMap.isDone(annotation)) { + return; + } + indexMap.setDone(annotation); + var annBuilder = new DocumenttypesConfig.Doctype.Annotationtype.Builder(); + annBuilder + .idx(indexMap.idxOf(annotation)) + .name(annotation.getName()) + .internalid(annotation.getId()); + DataType nested = annotation.getDataType(); + if (nested != null) { + annBuilder.datatype(indexMap.idxOf(nested)); + docTypeBuildAnyType(nested, builder, indexMap); + } + for (AnnotationType inherited : annotation.getInheritedTypes()) { + annBuilder.inherits(inhBuilder -> inhBuilder.idx(indexMap.idxOf(inherited))); + + } + builder.annotationtype(annBuilder); + } + + @SuppressWarnings("deprecation") + private void docTypeBuildAnyType(DataType type, DocumenttypesConfig.Doctype.Builder documentBuilder, IdxMap indexMap) { + if (indexMap.isDone(type)) { + return; + } + if (type instanceof NewDocumentType) { + // should be in the top-level list and handled there + return; + } + if ((type instanceof DocumentType) && (type.getId() == 8)) { + // special handling + return; + } + indexMap.setDone(type); + if (type instanceof TemporaryStructuredDataType) { + throw new IllegalArgumentException("Can not create config for temporary data type: " + type.getName()); + } else if (type instanceof TemporaryUnknownType) { + throw new IllegalArgumentException("Can not create config for temporary data type: " + type.getName()); + } else if (type instanceof OwnedTemporaryType) { + throw new IllegalArgumentException("Can not create config for temporary data type: " + type.getName()); + } else if (type instanceof StructDataType) { + docTypeBuildOneType((StructDataType) type, documentBuilder, indexMap); + } else if (type instanceof ArrayDataType) { + docTypeBuildOneType((ArrayDataType) type, documentBuilder, indexMap); + } else if (type instanceof WeightedSetDataType) { + docTypeBuildOneType((WeightedSetDataType) type, documentBuilder, indexMap); + } else if (type instanceof MapDataType) { + docTypeBuildOneType((MapDataType) type, documentBuilder, indexMap); + } else if (type instanceof AnnotationReferenceDataType) { + docTypeBuildOneType((AnnotationReferenceDataType) type, documentBuilder, indexMap); + } else if (type instanceof TensorDataType) { + docTypeBuildOneType((TensorDataType) type, documentBuilder, indexMap); + } else if (type instanceof NewDocumentReferenceDataType) { + var refType = (NewDocumentReferenceDataType) type; + if (refType.isTemporary()) { + throw new IllegalArgumentException("Still temporary: " + refType); + } + docTypeBuildOneType(refType, documentBuilder, indexMap); + } else if (type instanceof PrimitiveDataType) { + docTypeBuildOneType((PrimitiveDataType) type, documentBuilder, indexMap); + } else if (type instanceof DocumentType) { + throw new IllegalArgumentException("Can not create config for unadorned document type: " + type.getName() + " id "+type.getId()); + } else { + throw new IllegalArgumentException("Can not create config for data type " + type + " of class " + type.getClass()); + } + } + + private void docTypeBuildImportedFields(Collection<String> fieldNames, DocumenttypesConfig.Doctype.Builder builder) { + for (String fieldName : fieldNames) { + builder.importedfield(ib -> ib.name(fieldName)); + } + } + + private void docTypeBuildOneType(StructDataType type, + DocumenttypesConfig.Doctype.Builder builder, + IdxMap indexMap) + { + var structBuilder = new DocumenttypesConfig.Doctype.Structtype.Builder(); + structBuilder + .idx(indexMap.idxOf(type)) + .name(type.getName()) + .internalid(type.getId()); + for (DataType inherited : type.getInheritedTypes()) { + structBuilder.inherits(inheritBuilder -> inheritBuilder + .type(indexMap.idxOf(inherited))); + docTypeBuildAnyType(inherited, builder, indexMap); + } + for (com.yahoo.document.Field field : type.getFieldsThisTypeOnly()) { + DataType fieldType = field.getDataType(); + structBuilder.field(fieldBuilder -> fieldBuilder + .name(field.getName()) + .internalid(field.getId()) + .type(indexMap.idxOf(fieldType))); + docTypeBuildAnyType(fieldType, builder, indexMap); + } + builder.structtype(structBuilder); + } + + private void docTypeBuildOneType(PrimitiveDataType type, + DocumenttypesConfig.Doctype.Builder builder, + IdxMap indexMap) + { + builder.primitivetype(primBuilder -> primBuilder + .idx(indexMap.idxOf(type)) + .name(type.getName())); + } + + private void docTypeBuildOneType(TensorDataType type, + DocumenttypesConfig.Doctype.Builder builder, + IdxMap indexMap) + { + var tt = type.getTensorType(); + String detailed = (tt != null) ? tt.toString() : "tensor"; + builder.tensortype(tensorBuilder -> tensorBuilder + .idx(indexMap.idxOf(type)) + .detailedtype(detailed)); + + } + + private void docTypeBuildOneType(ArrayDataType type, + DocumenttypesConfig.Doctype.Builder builder, + IdxMap indexMap) + { + DataType nested = type.getNestedType(); + System.err.println("array of "+nested+" -> "+type.getName()+" id "+type.getId()); + builder.arraytype(arrayBuilder -> arrayBuilder + .idx(indexMap.idxOf(type)) + .elementtype(indexMap.idxOf(nested)) + .internalid(type.getId())); + docTypeBuildAnyType(nested, builder, indexMap); + } + + private void docTypeBuildOneType(WeightedSetDataType type, + DocumenttypesConfig.Doctype.Builder builder, + IdxMap indexMap) + { + DataType nested = type.getNestedType(); + builder.wsettype(wsetBuilder -> wsetBuilder + .idx(indexMap.idxOf(type)) + .elementtype(indexMap.idxOf(nested)) + .createifnonexistent(type.createIfNonExistent()) + .removeifzero(type.removeIfZero()) + .internalid(type.getId())); + docTypeBuildAnyType(nested, builder, indexMap); + } + + private void docTypeBuildOneType(MapDataType type, + DocumenttypesConfig.Doctype.Builder builder, + IdxMap indexMap) + { + DataType keytype = type.getKeyType(); + DataType valtype = type.getValueType(); + builder.maptype(mapBuilder -> mapBuilder + .idx(indexMap.idxOf(type)) + .keytype(indexMap.idxOf(keytype)) + .valuetype(indexMap.idxOf(valtype)) + .internalid(type.getId())); + docTypeBuildAnyType(keytype, builder, indexMap); + docTypeBuildAnyType(valtype, builder, indexMap); + } + + private void docTypeBuildOneType(AnnotationReferenceDataType type, + DocumenttypesConfig.Doctype.Builder builder, + IdxMap indexMap) + { + builder.annotationref(arefBuilder -> arefBuilder + .idx(indexMap.idxOf(type)) + .annotationtype(indexMap.idxOf(type.getAnnotationType())) + .internalid(type.getId())); + } + + private void docTypeBuildOneType(NewDocumentReferenceDataType type, + DocumenttypesConfig.Doctype.Builder builder, + IdxMap indexMap) + { + builder.documentref(docrefBuilder -> docrefBuilder + .idx(indexMap.idxOf(type)) + .targettype(indexMap.idxOf(type.getTargetType())) + .internalid(type.getId())); + + } + } diff --git a/document/src/tests/repo/CMakeLists.txt b/document/src/tests/repo/CMakeLists.txt index bbbcafc2650..2a9ff4af683 100644 --- a/document/src/tests/repo/CMakeLists.txt +++ b/document/src/tests/repo/CMakeLists.txt @@ -6,3 +6,11 @@ vespa_add_executable(document_documenttyperepo_test_app TEST document ) vespa_add_test(NAME document_documenttyperepo_test_app COMMAND document_documenttyperepo_test_app) + +vespa_add_executable(document_doctype_config_test_app TEST + SOURCES + doctype_config_test.cpp + DEPENDS + document +) +vespa_add_test(NAME document_doctype_config_test_app COMMAND document_doctype_config_test_app) diff --git a/document/src/tests/repo/doctype_config_test.cpp b/document/src/tests/repo/doctype_config_test.cpp new file mode 100644 index 00000000000..84ec1414fcc --- /dev/null +++ b/document/src/tests/repo/doctype_config_test.cpp @@ -0,0 +1,662 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for documenttyperepo. + +#include <vespa/document/base/testdocrepo.h> +#include <vespa/config/print/asciiconfigwriter.h> +#include <vespa/document/datatype/annotationreferencedatatype.h> +#include <vespa/document/datatype/arraydatatype.h> +#include <vespa/document/datatype/documenttype.h> +#include <vespa/document/datatype/mapdatatype.h> +#include <vespa/document/datatype/tensor_data_type.h> +#include <vespa/document/datatype/weightedsetdatatype.h> +#include <vespa/document/fieldvalue/fieldvalue.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/document/repo/documenttyperepo.h> +#include <vespa/vespalib/objects/identifiable.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/exceptions.h> +#include <set> + +#include <vespa/log/log.h> +LOG_SETUP("doctype_config_test"); + +using config::AsciiConfigWriter; +using std::set; +using std::vector; +using vespalib::Identifiable; +using vespalib::IllegalArgumentException; +using vespalib::string; + +using namespace document::config_builder; +using namespace document; + +namespace { + +const string type_name = "test"; +const int32_t doc_type_id = 787121340; +const string header_name = type_name + ".header"; +const int32_t header_id = 30; +const string type_name_2 = "test_2"; +const string header_name_2 = type_name_2 + ".header"; +const string field_name = "field_name"; +const string derived_name = "derived"; + +using ::document::config::DocumenttypesConfigBuilder; + +using BDocType = DocumenttypesConfigBuilder::Doctype; +using BDocInherit = DocumenttypesConfigBuilder::Doctype::Inherits; +using BDocFieldsets = DocumenttypesConfigBuilder::Doctype::Fieldsets; +using BDocImportField = DocumenttypesConfigBuilder::Doctype::Importedfield; +using BPrimitiveT = DocumenttypesConfigBuilder::Doctype::Primitivetype; +using BArrayT = DocumenttypesConfigBuilder::Doctype::Arraytype; +using BMapT = DocumenttypesConfigBuilder::Doctype::Maptype; +using BWsetT = DocumenttypesConfigBuilder::Doctype::Wsettype; +using BTensorT = DocumenttypesConfigBuilder::Doctype::Tensortype; +using BDocRefT = DocumenttypesConfigBuilder::Doctype::Documentref; +using BAnnotationT = DocumenttypesConfigBuilder::Doctype::Annotationtype; +using BAnnRefT = DocumenttypesConfigBuilder::Doctype::Annotationref; +using BStructT = DocumenttypesConfigBuilder::Doctype::Structtype; +using BStructField = DocumenttypesConfigBuilder::Doctype::Structtype::Field; +using BStructInherits = DocumenttypesConfigBuilder::Doctype::Structtype::Inherits; + +class BuilderHelper { +private: + int _idx = 10000; + DocumenttypesConfigBuilder _config; + static int hashId(const string& name) { + StructDataType tmp(name); + return tmp.getId(); + } + vector<int> _idxOfBuiltins; + void addPrimitive(BDocType &doc, const string& name, DataType::Type t) { + BPrimitiveT pt; + pt.idx = ++_idx; + pt.name = name; + doc.primitivetype.push_back(pt); + assert(t < _idxOfBuiltins.size()); + _idxOfBuiltins[t] = pt.idx; + LOG(debug, "idx of builtin (%d) = %d", (int)t, pt.idx); + } +public: + ~BuilderHelper(); + BDocType & document(const string& name) { + _config.doctype.reserve(100); + auto & d = _config.doctype.emplace_back(); + d.idx = ++_idx; + d.name = name; + d.internalid = hashId(name); + auto & st = addStruct(d, name + ".header"); + d.contentstruct = st.idx; + if (_config.doctype.size() > 1) { + d.inherits.emplace_back().idx = _config.doctype[0].idx; + } + return d; + } + BStructField & addField(BDocType &doc, const string& name) { + return addField(doc.structtype[0], name); + } + BStructT & addStruct(BDocType &doc, const string& name) { + doc.structtype.reserve(100); + auto & st = doc.structtype.emplace_back(); + st.idx = ++_idx; + st.name = name; + st.internalid = hashId(name); + return st; + } + BStructField & addField(BStructT &st, const string& name) { + st.field.reserve(100); + auto & f = st.field.emplace_back(); + f.name = name; + f.internalid = hashId(name); + return f; + } + BArrayT & addArray(BDocType &doc, int nestedIdx) { + doc.arraytype.reserve(100); + auto & a = doc.arraytype.emplace_back(); + a.idx = ++_idx; + a.elementtype = nestedIdx; + a.internalid = a.idx; + return a; + } + BMapT & addMap(BDocType &doc, int keyIdx, int valIdx) { + doc.maptype.reserve(100); + auto & m = doc.maptype.emplace_back(); + m.idx = ++_idx; + m.keytype = keyIdx; + m.valuetype = valIdx; + m.internalid = m.idx; + return m; + } + BWsetT & addWset(BDocType &doc, int nestedIdx) { + doc.wsettype.reserve(100); + auto & w = doc.wsettype.emplace_back(); + w.idx = ++_idx; + w.elementtype = nestedIdx; + w.internalid = w.idx; + return w; + } + BAnnotationT & addAnnotation(BDocType &doc, const string &name) { + doc.annotationtype.reserve(100); + auto & ann = doc.annotationtype.emplace_back(); + ann.idx = ++_idx; + ann.name = name; + ann.internalid = hashId(name); + return ann; + } + BAnnRefT & addAnnotationRef(BDocType &doc, int annIdx) { + doc.annotationref.reserve(100); + auto & aref = doc.annotationref.emplace_back(); + aref.idx = ++_idx; + aref.annotationtype = annIdx; + aref.internalid = aref.idx; + return aref; + } + BDocRefT & addDocumentRef(BDocType &doc, int targetIdx) { + doc.documentref.reserve(100); + auto & dref = doc.documentref.emplace_back(); + dref.idx = ++_idx; + dref.targettype = targetIdx; + dref.internalid = dref.idx; + return dref; + } + BTensorT & addTensorType(BDocType &doc, const string& spec) { + doc.tensortype.reserve(100); + auto & tt = doc.tensortype.emplace_back(); + tt.idx = ++_idx; + tt.detailedtype = spec; + return tt; + } + const DocumenttypesConfig & config() { return _config; } + BuilderHelper() { + _idxOfBuiltins.resize(DataType::MAX); + LOG(debug, "builtins.size = %zu", _idxOfBuiltins.size()); + auto & root = document("document"); + root.internalid = DataType::T_DOCUMENT; + addPrimitive(root, "int", DataType::T_INT); + addPrimitive(root, "float", DataType::T_FLOAT); + addPrimitive(root, "string", DataType::T_STRING); + addPrimitive(root, "raw", DataType::T_RAW); + addPrimitive(root, "long", DataType::T_LONG); + addPrimitive(root, "double", DataType::T_DOUBLE); + addPrimitive(root, "bool", DataType::T_BOOL); + addPrimitive(root, "uri", DataType::T_URI); + addPrimitive(root, "byte", DataType::T_BYTE); + addPrimitive(root, "tag", DataType::T_TAG); + addPrimitive(root, "short", DataType::T_SHORT); + addPrimitive(root, "predicate", DataType::T_PREDICATE); + } + int builtin(DataType::Type t) { + if (t == DataType::T_DOCUMENT) { + return _config.doctype[0].idx; + } + assert(t < _idxOfBuiltins.size()); + LOG(debug, "lookup builtin %d -> %d", (int)t, _idxOfBuiltins[t]); + return _idxOfBuiltins[t]; + } +}; + +BuilderHelper::~BuilderHelper() = default; + +TEST("requireThatDocumentTypeCanBeLookedUp") { + BuilderHelper builder; + auto &doc = builder.document(type_name); + doc.internalid = doc_type_id; + doc.structtype[0].internalid = header_id; + DocumentTypeRepo repo(builder.config()); + + const DocumentType *type = repo.getDocumentType(type_name); + ASSERT_TRUE(type); + EXPECT_EQUAL(type_name, type->getName()); + EXPECT_EQUAL(doc_type_id, type->getId()); + EXPECT_EQUAL(header_name, type->getFieldsType().getName()); + EXPECT_EQUAL(header_id, type->getFieldsType().getId()); +} + +TEST("requireThatDocumentTypeCanBeLookedUpWhenIdIsNotAHash") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + doc.internalid = doc_type_id + 2; + auto & contents = doc.structtype[0]; + contents.name = header_name; + contents.internalid = header_id + 3; + DocumentTypeRepo repo(builder.config()); + + const DocumentType *type = repo.getDocumentType(type_name); + ASSERT_TRUE(type); + EXPECT_EQUAL(type_name, type->getName()); + EXPECT_EQUAL(doc_type_id + 2, type->getId()); + EXPECT_EQUAL(header_name, type->getFieldsType().getName()); + EXPECT_EQUAL(header_id + 3, type->getFieldsType().getId()); +} + +TEST("requireThatDocumentsCanHaveFields") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + builder.addField(doc, field_name).type = builder.builtin(DataType::T_INT); + DocumentTypeRepo repo(builder.config()); + + const StructDataType &s = repo.getDocumentType(type_name)->getFieldsType(); + ASSERT_EQUAL(1u, s.getFieldCount()); + const Field &field = s.getField(field_name); + EXPECT_EQUAL(DataType::T_INT, field.getDataType().getId()); +} + +template <typename T> +const T &getFieldDataType(const DocumentTypeRepo &repo) { + const DataType &d = repo.getDocumentType(type_name) + ->getFieldsType().getField(field_name).getDataType(); + const T *t = dynamic_cast<const T *>(&d); + ASSERT_TRUE(t); + return *t; +} + +TEST("requireThatArraysCanBeConfigured") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & arr = builder.addArray(doc, builder.builtin(DataType::T_STRING)); + builder.addField(doc, field_name).type = arr.idx; + DocumentTypeRepo repo(builder.config()); + + const ArrayDataType &a = getFieldDataType<ArrayDataType>(repo); + EXPECT_EQUAL(DataType::T_STRING, a.getNestedType().getId()); +} + +TEST("requireThatWsetsCanBeConfigured") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & wset = builder.addWset(doc, builder.builtin(DataType::T_INT)); + wset.removeifzero = true; + wset.createifnonexistent = true; + builder.addField(doc, field_name).type = wset.idx; + DocumentTypeRepo repo(builder.config()); + + const WeightedSetDataType &w = getFieldDataType<WeightedSetDataType>(repo); + EXPECT_EQUAL(DataType::T_INT, w.getNestedType().getId()); + EXPECT_TRUE(w.createIfNonExistent()); + EXPECT_TRUE(w.removeIfZero()); +} + +TEST("requireThatMapsCanBeConfigured") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & map = builder.addMap(doc, + builder.builtin(DataType::T_INT), + builder.builtin(DataType::T_STRING)); + builder.addField(doc, field_name).type = map.idx; + DocumentTypeRepo repo(builder.config()); + + const MapDataType &m = getFieldDataType<MapDataType>(repo); + EXPECT_EQUAL(DataType::T_INT, m.getKeyType().getId()); + EXPECT_EQUAL(DataType::T_STRING, m.getValueType().getId()); +} + +TEST("requireThatAnnotationReferencesCanBeConfigured") { + int32_t annotation_type_id = 424; + BuilderHelper builder; + auto &doc = builder.document(type_name); + auto & ann = builder.addAnnotation(doc, "foo"); + ann.internalid = annotation_type_id; + auto & annRef = builder.addAnnotationRef(doc, ann.idx); + builder.addField(doc, field_name).type = annRef.idx; + DocumentTypeRepo repo(builder.config()); + + const AnnotationReferenceDataType &ar = getFieldDataType<AnnotationReferenceDataType>(repo); + EXPECT_EQUAL(annotation_type_id, ar.getAnnotationType().getId()); + EXPECT_EQUAL("foo", ar.getAnnotationType().getName()); +} + +TEST("requireThatDocumentsCanInheritFields") { + BuilderHelper builder; + auto & pdoc = builder.document(type_name); + auto & cdoc = builder.document(derived_name); + builder.addField(pdoc, field_name).type = builder.builtin(DataType::T_INT); + builder.addField(cdoc, "derived_field").type = builder.builtin(DataType::T_STRING); + cdoc.inherits.emplace_back().idx = pdoc.idx; + DocumentTypeRepo repo(builder.config()); + + const StructDataType &s = repo.getDocumentType(derived_name)->getFieldsType(); + ASSERT_EQUAL(2u, s.getFieldCount()); + const Field &field = s.getField(field_name); + const DataType &type = field.getDataType(); + EXPECT_EQUAL(DataType::T_INT, type.getId()); + EXPECT_EQUAL(DataType::T_STRING, s.getField("derived_field").getDataType().getId()); +} + +TEST("requireThatDocumentsCanUseInheritedTypes") { + const int32_t id = 64; + BuilderHelper builder; + auto & pdoc = builder.document(type_name); + auto & cdoc = builder.document(derived_name); + auto & arr = builder.addArray(pdoc, builder.builtin(DataType::T_INT)); + arr.internalid = id; + builder.addField(pdoc, "foo").type = arr.idx; + builder.addField(cdoc, field_name).type = arr.idx; + cdoc.inherits.emplace_back().idx = pdoc.idx; + + DocumentTypeRepo repo(builder.config()); + + const DataType &type = + repo.getDocumentType(derived_name)->getFieldsType() + .getField(field_name).getDataType(); + EXPECT_EQUAL(id, type.getId()); + EXPECT_TRUE(dynamic_cast<const ArrayDataType *>(&type)); +} + +TEST("requireThatIllegalConfigsCausesExceptions") { + BuilderHelper builder; + auto &doc = builder.document(type_name); + doc.inherits.emplace_back().idx = 20000; + EXPECT_EXCEPTION(DocumentTypeRepo repo(builder.config()), + IllegalArgumentException, "Unable to find document"); +} + +TEST("requireThatDataTypesCanBeLookedUpById") { + BuilderHelper builder; + auto &doc1 = builder.document(type_name); + auto &doc2 = builder.document(derived_name); + doc1.internalid = doc_type_id; + doc1.structtype[0].internalid = header_id; + doc2.internalid = doc_type_id + 1; + DocumentTypeRepo repo(builder.config()); + + const auto * dt1 = repo.getDocumentType(type_name); + const auto * dt2 = repo.getDocumentType(derived_name); + + ASSERT_TRUE(dt1); + ASSERT_TRUE(dt2); + EXPECT_EQUAL(dt1, repo.getDocumentType(doc_type_id)); + EXPECT_EQUAL(dt2, repo.getDocumentType(doc_type_id + 1)); + + const DataType *type = repo.getDataType(*dt1, header_id); + ASSERT_TRUE(type); + EXPECT_EQUAL(header_name, type->getName()); + EXPECT_EQUAL(header_id, type->getId()); + + EXPECT_TRUE(!repo.getDataType(*dt1, -1)); + EXPECT_TRUE(!repo.getDataType(*dt2, header_id)); +} + +TEST("requireThatDataTypesCanBeLookedUpByName") { + BuilderHelper builder; + auto &doc1 = builder.document(type_name); + doc1.structtype[0].internalid = header_id; + builder.document(type_name_2); + DocumentTypeRepo repo(builder.config()); + + const DocumentType * dt1 = repo.getDocumentType(type_name); + const DocumentType * dt2 = repo.getDocumentType(type_name_2); + ASSERT_TRUE(dt1); + ASSERT_TRUE(dt2); + + const DataType *type = repo.getDataType(*dt1, header_name); + ASSERT_TRUE(type); + EXPECT_EQUAL(header_name, type->getName()); + EXPECT_EQUAL(header_id, type->getId()); + + EXPECT_TRUE(repo.getDataType(*dt1, header_name)); + EXPECT_TRUE(!repo.getDataType(*dt1, field_name)); + EXPECT_TRUE(!repo.getDataType(*dt2, header_name)); +} + +TEST("requireThatInheritingDocCanRedefineIdenticalField") { + BuilderHelper builder; + + auto & pdoc = builder.document(type_name); + auto & cdoc = builder.document(derived_name); + builder.addField(pdoc, field_name).type = builder.builtin(DataType::T_STRING); + + builder.addField(cdoc, field_name).type = builder.builtin(DataType::T_STRING); + cdoc.inherits.emplace_back().idx = pdoc.idx; + + DocumentTypeRepo repo(builder.config()); + + const StructDataType &s = repo.getDocumentType(derived_name)->getFieldsType(); + ASSERT_EQUAL(1u, s.getFieldCount()); +} + +TEST("requireThatAnnotationTypesCanBeConfigured") { + const int32_t a_id = 654; + const string a_name = "annotation_name"; + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & ann = builder.addAnnotation(doc, a_name); + ann.internalid = a_id; + ann.datatype = builder.builtin(DataType::T_STRING); + + DocumentTypeRepo repo(builder.config()); + + const DocumentType *type = repo.getDocumentType(type_name); + ASSERT_TRUE(type); + const AnnotationType *a_type = repo.getAnnotationType(*type, a_id); + ASSERT_TRUE(a_type); + EXPECT_EQUAL(a_name, a_type->getName()); + ASSERT_TRUE(a_type->getDataType()); + EXPECT_EQUAL(DataType::T_STRING, a_type->getDataType()->getId()); + + a_type = repo.getAnnotationType(*type, 1); + ASSERT_TRUE(a_type); + EXPECT_EQUAL(1, a_type->getId()); + EXPECT_EQUAL("term", a_type->getName()); + a_type = repo.getAnnotationType(*type, 2); + ASSERT_TRUE(a_type); + EXPECT_EQUAL(2, a_type->getId()); + EXPECT_EQUAL("token_type", a_type->getName()); +} + +TEST("requireThatDocumentsCanUseOtherDocumentTypes") { + BuilderHelper builder; + auto &doc2 = builder.document(type_name_2); + doc2.internalid = doc_type_id + 1; + auto &doc1 = builder.document(type_name); + builder.addField(doc1, field_name).type = doc2.idx; + DocumentTypeRepo repo(builder.config()); + + const DataType &type = repo.getDocumentType(type_name)->getFieldsType() + .getField(field_name).getDataType(); + EXPECT_EQUAL(doc_type_id + 1, type.getId()); + EXPECT_TRUE(dynamic_cast<const DocumentType *>(&type)); +} + +TEST("requireThatDocumentTypesCanBeIterated") { + BuilderHelper builder; + builder.document(type_name).internalid = doc_type_id; + builder.document(type_name_2).internalid = doc_type_id + 1; + DocumentTypeRepo repo(builder.config()); + + set<int> ids; + repo.forEachDocumentType( + [&ids](const DocumentType &type) { ids.insert(type.getId()); }); + + EXPECT_EQUAL(3u, ids.size()); + ASSERT_TRUE(ids.count(DataType::T_DOCUMENT)); + ASSERT_TRUE(ids.count(doc_type_id)); + ASSERT_TRUE(ids.count(doc_type_id + 1)); +} + +TEST("requireThatDocumentLookupChecksName") { + BuilderHelper builder; + auto &doc = builder.document(type_name_2); + doc.internalid = doc_type_id; + DocumentTypeRepo repo(builder.config()); + + // "type_name" will generate the document type id + // "doc_type_id". However, this config assigns that id to a + // different type. + const DocumentType *type = repo.getDocumentType(type_name); + ASSERT_TRUE(!type); +} + +TEST("requireThatBuildFromConfigWorks") { + DocumentTypeRepo repo(readDocumenttypesConfig(TEST_PATH("types.cfg"))); + ASSERT_TRUE(repo.getDocumentType("document")); + ASSERT_TRUE(repo.getDocumentType("types")); +} + +TEST("requireThatStructsCanInheritFields") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & st1 = builder.addStruct(doc, "sa"); + auto & st2 = builder.addStruct(doc, "sb"); + auto & st3 = builder.addStruct(doc, "sc"); + builder.addField(st1, "fa").type = builder.builtin(DataType::T_INT); + builder.addField(st2, "fb").type = builder.builtin(DataType::T_LONG); + builder.addField(st3, "fc").type = builder.builtin(DataType::T_STRING); + st1.inherits.emplace_back().type = st2.idx; + st2.inherits.emplace_back().type = st3.idx; + builder.addField(doc, field_name).type = st1.idx; + DocumentTypeRepo repo(builder.config()); + const StructDataType &s = getFieldDataType<StructDataType>(repo); + EXPECT_EQUAL(3u, s.getFieldCount()); + ASSERT_TRUE(s.hasField("fa")); + ASSERT_TRUE(s.hasField("fb")); + ASSERT_TRUE(s.hasField("fc")); +} + +TEST("requireThatStructsCanBeRecursive") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & st = builder.addStruct(doc, "folder"); + builder.addField(st, "subfolder").type = st.idx; + builder.addField(doc, field_name).type = st.idx; + DocumentTypeRepo repo(builder.config()); + + const StructDataType &s = getFieldDataType<StructDataType>(repo); + EXPECT_EQUAL(1u, s.getFieldCount()); + ASSERT_TRUE(s.hasField("subfolder")); + EXPECT_EQUAL(&s, &s.getField("subfolder").getDataType()); +} + +} // namespace + +TEST("requireThatMissingFileCausesException") { + EXPECT_EXCEPTION(readDocumenttypesConfig("illegal/missing_file"), + IllegalArgumentException, "Unable to open file"); +} + +TEST("requireThatFieldsCanHaveAnyDocumentType") { + BuilderHelper builder; + auto &doc1 = builder.document(type_name); + auto &doc2 = builder.document(type_name_2); + + // Circular dependency + builder.addField(doc1, field_name).type = doc2.idx; + builder.addField(doc2, field_name).type = doc1.idx; + + DocumentTypeRepo repo(builder.config()); + const DocumentType *type1 = repo.getDocumentType(type_name); + const DocumentType *type2 = repo.getDocumentType(type_name_2); + ASSERT_TRUE(type1); + EXPECT_TRUE(type1->getFieldsType().hasField(field_name)); + EXPECT_EQUAL(type2, &type1->getFieldsType().getField(field_name).getDataType()); + ASSERT_TRUE(type2); + EXPECT_TRUE(type2->getFieldsType().hasField(field_name)); + EXPECT_EQUAL(type1, &type2->getFieldsType().getField(field_name).getDataType()); +} + +TEST("Require that Array can have nested DocumentType") { + BuilderHelper builder; + auto &doc = builder.document(type_name); + auto &arr = builder.addArray(doc, doc.idx); + builder.addField(doc, field_name).type = arr.idx; + DocumentTypeRepo repo(builder.config()); + const DocumentType *type = repo.getDocumentType(type_name); + ASSERT_TRUE(type); +} + +TEST("Reference fields are resolved to correct reference type") { + const int doc_with_refs_id = 5678; + const int ref1_id = 777; + const int ref2_id = 888; + BuilderHelper builder; + auto & doc1 = builder.document(type_name); + auto & doc2 = builder.document(type_name_2); + auto & doc3 = builder.document("doc_with_refs"); + doc3.internalid = doc_with_refs_id; + auto & refT1 = builder.addDocumentRef(doc3, doc1.idx); + refT1.internalid = ref1_id; + auto & refT2 = builder.addDocumentRef(doc3, doc2.idx); + refT2.internalid = ref2_id; + builder.addField(doc3, "ref1").type = refT1.idx; + builder.addField(doc3, "ref2").type = refT2.idx; + builder.addField(doc3, "ref3").type = refT1.idx; + + DocumentTypeRepo repo(builder.config()); + const DocumentType *type = repo.getDocumentType(doc_with_refs_id); + ASSERT_TRUE(type != nullptr); + const auto* ref1_type(repo.getDataType(*type, ref1_id)); + const auto* ref2_type(repo.getDataType(*type, ref2_id)); + + EXPECT_EQUAL(*ref1_type, type->getFieldsType().getField("ref1").getDataType()); + EXPECT_EQUAL(*ref2_type, type->getFieldsType().getField("ref2").getDataType()); + EXPECT_EQUAL(*ref1_type, type->getFieldsType().getField("ref3").getDataType()); +} + +TEST("Config with no imported fields has empty imported fields set in DocumentType") { + BuilderHelper builder; + builder.document(type_name); + DocumentTypeRepo repo(builder.config()); + const auto *type = repo.getDocumentType(type_name); + ASSERT_TRUE(type != nullptr); + EXPECT_TRUE(type->imported_field_names().empty()); + EXPECT_FALSE(type->has_imported_field_name("foo")); +} + +TEST("Configured imported field names are available in the DocumentType") { + // Note: we cheat a bit by specifying imported field names in types that have no + // reference fields. Add to test if we add config read-time validation of this. :) + BuilderHelper builder; + // Type with one imported field + builder.document(type_name).importedfield.emplace_back().name = "my_cool_field"; + // Type with two imported fields + auto & doc2 = builder.document(type_name_2); + doc2.importedfield.emplace_back().name = "my_awesome_field"; + doc2.importedfield.emplace_back().name = "my_swag_field"; + + DocumentTypeRepo repo(builder.config()); + const auto* type = repo.getDocumentType(type_name); + ASSERT_TRUE(type != nullptr); + EXPECT_EQUAL(1u, type->imported_field_names().size()); + EXPECT_TRUE(type->has_imported_field_name("my_cool_field")); + EXPECT_FALSE(type->has_imported_field_name("my_awesome_field")); + + type = repo.getDocumentType(type_name_2); + ASSERT_TRUE(type != nullptr); + EXPECT_EQUAL(2u, type->imported_field_names().size()); + EXPECT_TRUE(type->has_imported_field_name("my_awesome_field")); + EXPECT_TRUE(type->has_imported_field_name("my_swag_field")); + EXPECT_FALSE(type->has_imported_field_name("my_cool_field")); +} + +namespace { + +const TensorDataType & +asTensorDataType(const DataType &dataType) { + return dynamic_cast<const TensorDataType &>(dataType); +} + +} + +TEST("Tensor fields have tensor types") { + BuilderHelper builder; + auto & doc = builder.document(type_name); + auto & t1t = builder.addTensorType(doc, "tensor(x[3])"); + auto & t2t = builder.addTensorType(doc, "tensor(y{})"); + builder.addField(doc, "tensor1").type = t1t.idx; + builder.addField(doc, "tensor2").type = t2t.idx; + builder.addField(doc, "tensor3").type = t1t.idx; + + DocumentTypeRepo repo(builder.config()); + auto *docType = repo.getDocumentType(type_name); + ASSERT_TRUE(docType != nullptr); + auto &tensorField1 = docType->getField("tensor1"); + auto &tensorField2 = docType->getField("tensor2"); + EXPECT_EQUAL("tensor(x[3])", asTensorDataType(tensorField1.getDataType()).getTensorType().to_spec()); + EXPECT_EQUAL("tensor(y{})", asTensorDataType(tensorField2.getDataType()).getTensorType().to_spec()); + auto &tensorField3 = docType->getField("tensor3"); + EXPECT_TRUE(&tensorField1.getDataType() == &tensorField3.getDataType()); + auto tensorFieldValue1 = tensorField1.getDataType().createFieldValue(); + EXPECT_TRUE(&tensorField1.getDataType() == tensorFieldValue1->getDataType()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/document/src/tests/repo/types.cfg b/document/src/tests/repo/types.cfg new file mode 100644 index 00000000000..cab69f4b1aa --- /dev/null +++ b/document/src/tests/repo/types.cfg @@ -0,0 +1,326 @@ +enablecompression false +usev8geopositions false +doctype[0].name "document" +doctype[0].idx 10000 +doctype[0].internalid 8 +doctype[0].contentstruct 10001 +doctype[0].primitivetype[0].idx 10002 +doctype[0].primitivetype[0].name "bool" +doctype[0].primitivetype[1].idx 10003 +doctype[0].primitivetype[1].name "byte" +doctype[0].primitivetype[2].idx 10004 +doctype[0].primitivetype[2].name "double" +doctype[0].primitivetype[3].idx 10005 +doctype[0].primitivetype[3].name "float" +doctype[0].primitivetype[4].idx 10006 +doctype[0].primitivetype[4].name "float16" +doctype[0].primitivetype[5].idx 10007 +doctype[0].primitivetype[5].name "int" +doctype[0].primitivetype[6].idx 10008 +doctype[0].primitivetype[6].name "long" +doctype[0].primitivetype[7].idx 10010 +doctype[0].primitivetype[7].name "predicate" +doctype[0].primitivetype[8].idx 10011 +doctype[0].primitivetype[8].name "raw" +doctype[0].primitivetype[9].idx 10012 +doctype[0].primitivetype[9].name "string" +doctype[0].primitivetype[10].idx 10014 +doctype[0].primitivetype[10].name "uri" +doctype[0].wsettype[0].idx 10013 +doctype[0].wsettype[0].elementtype 10012 +doctype[0].wsettype[0].createifnonexistent true +doctype[0].wsettype[0].removeifzero true +doctype[0].wsettype[0].internalid 18 +doctype[0].structtype[0].idx 10001 +doctype[0].structtype[0].name "document.header" +doctype[0].structtype[0].internalid -284186494 +doctype[0].structtype[1].idx 10009 +doctype[0].structtype[1].name "position" +doctype[0].structtype[1].field[0].name "x" +doctype[0].structtype[1].field[0].internalid 914677694 +doctype[0].structtype[1].field[0].type 10007 +doctype[0].structtype[1].field[1].name "y" +doctype[0].structtype[1].field[1].internalid 900009410 +doctype[0].structtype[1].field[1].type 10007 +doctype[0].structtype[1].internalid 1381038251 +doctype[1].name "types" +doctype[1].idx 10015 +doctype[1].internalid -853072901 +doctype[1].inherits[0].idx 10000 +doctype[1].contentstruct 10016 +doctype[1].fieldsets{[document]}.fields[0] "Folders" +doctype[1].fieldsets{[document]}.fields[1] "abool" +doctype[1].fieldsets{[document]}.fields[2] "abyte" +doctype[1].fieldsets{[document]}.fields[3] "album0" +doctype[1].fieldsets{[document]}.fields[4] "album1" +doctype[1].fieldsets{[document]}.fields[5] "along" +doctype[1].fieldsets{[document]}.fields[6] "arrarr" +doctype[1].fieldsets{[document]}.fields[7] "arrayfield" +doctype[1].fieldsets{[document]}.fields[8] "arraymapfield" +doctype[1].fieldsets{[document]}.fields[9] "ashortfloat" +doctype[1].fieldsets{[document]}.fields[10] "complexarray" +doctype[1].fieldsets{[document]}.fields[11] "doublemapfield" +doctype[1].fieldsets{[document]}.fields[12] "floatmapfield" +doctype[1].fieldsets{[document]}.fields[13] "intmapfield" +doctype[1].fieldsets{[document]}.fields[14] "juletre" +doctype[1].fieldsets{[document]}.fields[15] "longmapfield" +doctype[1].fieldsets{[document]}.fields[16] "maparr" +doctype[1].fieldsets{[document]}.fields[17] "mystructarr" +doctype[1].fieldsets{[document]}.fields[18] "mystructfield" +doctype[1].fieldsets{[document]}.fields[19] "mystructmap" +doctype[1].fieldsets{[document]}.fields[20] "setfield" +doctype[1].fieldsets{[document]}.fields[21] "setfield2" +doctype[1].fieldsets{[document]}.fields[22] "setfield3" +doctype[1].fieldsets{[document]}.fields[23] "setfield4" +doctype[1].fieldsets{[document]}.fields[24] "stringmapfield" +doctype[1].fieldsets{[document]}.fields[25] "structarrayfield" +doctype[1].fieldsets{[document]}.fields[26] "structfield" +doctype[1].fieldsets{[document]}.fields[27] "tagfield" +doctype[1].arraytype[0].idx 10017 +doctype[1].arraytype[0].elementtype 10007 +doctype[1].arraytype[0].internalid -1245117006 +doctype[1].arraytype[1].idx 10024 +doctype[1].arraytype[1].elementtype 10023 +doctype[1].arraytype[1].internalid -1244829667 +doctype[1].arraytype[2].idx 10031 +doctype[1].arraytype[2].elementtype 10007 +doctype[1].arraytype[2].internalid -1245117006 +doctype[1].arraytype[3].idx 10032 +doctype[1].arraytype[3].elementtype 10033 +doctype[1].arraytype[3].internalid -794985308 +doctype[1].arraytype[4].idx 10033 +doctype[1].arraytype[4].elementtype 10034 +doctype[1].arraytype[4].internalid 1707615575 +doctype[1].arraytype[5].idx 10034 +doctype[1].arraytype[5].elementtype 10012 +doctype[1].arraytype[5].internalid -1486737430 +doctype[1].arraytype[6].idx 10035 +doctype[1].arraytype[6].elementtype 10036 +doctype[1].arraytype[6].internalid 69621385 +doctype[1].arraytype[7].idx 10037 +doctype[1].arraytype[7].elementtype 10038 +doctype[1].arraytype[7].internalid 1416345047 +doctype[1].arraytype[8].idx 10039 +doctype[1].arraytype[8].elementtype 10040 +doctype[1].arraytype[8].internalid 1707615575 +doctype[1].arraytype[9].idx 10040 +doctype[1].arraytype[9].elementtype 10012 +doctype[1].arraytype[9].internalid -1486737430 +doctype[1].arraytype[10].idx 10042 +doctype[1].arraytype[10].elementtype 10003 +doctype[1].arraytype[10].internalid 49942803 +doctype[1].arraytype[11].idx 10045 +doctype[1].arraytype[11].elementtype 10041 +doctype[1].arraytype[11].internalid 759956026 +doctype[1].maptype[0].idx 10025 +doctype[1].maptype[0].keytype 10012 +doctype[1].maptype[0].valuetype 10012 +doctype[1].maptype[0].internalid 339965458 +doctype[1].maptype[1].idx 10026 +doctype[1].maptype[1].keytype 10012 +doctype[1].maptype[1].valuetype 10007 +doctype[1].maptype[1].internalid -1584287606 +doctype[1].maptype[2].idx 10027 +doctype[1].maptype[2].keytype 10012 +doctype[1].maptype[2].valuetype 10005 +doctype[1].maptype[2].internalid 2125154557 +doctype[1].maptype[3].idx 10028 +doctype[1].maptype[3].keytype 10007 +doctype[1].maptype[3].valuetype 10008 +doctype[1].maptype[3].internalid -1715531035 +doctype[1].maptype[4].idx 10029 +doctype[1].maptype[4].keytype 10007 +doctype[1].maptype[4].valuetype 10004 +doctype[1].maptype[4].internalid 2138385264 +doctype[1].maptype[5].idx 10030 +doctype[1].maptype[5].keytype 10012 +doctype[1].maptype[5].valuetype 10031 +doctype[1].maptype[5].internalid 435886609 +doctype[1].maptype[6].idx 10036 +doctype[1].maptype[6].keytype 10012 +doctype[1].maptype[6].valuetype 10012 +doctype[1].maptype[6].internalid 339965458 +doctype[1].maptype[7].idx 10038 +doctype[1].maptype[7].keytype 10007 +doctype[1].maptype[7].valuetype 10039 +doctype[1].maptype[7].internalid -372512406 +doctype[1].maptype[8].idx 10043 +doctype[1].maptype[8].keytype 10012 +doctype[1].maptype[8].valuetype 10012 +doctype[1].maptype[8].internalid 339965458 +doctype[1].maptype[9].idx 10044 +doctype[1].maptype[9].keytype 10007 +doctype[1].maptype[9].valuetype 10041 +doctype[1].maptype[9].internalid 1901258752 +doctype[1].maptype[10].idx 10046 +doctype[1].maptype[10].keytype 10007 +doctype[1].maptype[10].valuetype 10047 +doctype[1].maptype[10].internalid -389833101 +doctype[1].maptype[11].idx 10048 +doctype[1].maptype[11].keytype 10012 +doctype[1].maptype[11].valuetype 10008 +doctype[1].maptype[11].internalid -1865479609 +doctype[1].wsettype[0].idx 10018 +doctype[1].wsettype[0].elementtype 10012 +doctype[1].wsettype[0].createifnonexistent false +doctype[1].wsettype[0].removeifzero false +doctype[1].wsettype[0].internalid 1328286588 +doctype[1].wsettype[1].idx 10019 +doctype[1].wsettype[1].elementtype 10012 +doctype[1].wsettype[1].createifnonexistent true +doctype[1].wsettype[1].removeifzero true +doctype[1].wsettype[1].internalid 18 +doctype[1].wsettype[2].idx 10020 +doctype[1].wsettype[2].elementtype 10012 +doctype[1].wsettype[2].createifnonexistent false +doctype[1].wsettype[2].removeifzero true +doctype[1].wsettype[2].internalid 2125328771 +doctype[1].wsettype[3].idx 10021 +doctype[1].wsettype[3].elementtype 10012 +doctype[1].wsettype[3].createifnonexistent true +doctype[1].wsettype[3].removeifzero false +doctype[1].wsettype[3].internalid 2065577986 +doctype[1].wsettype[4].idx 10022 +doctype[1].wsettype[4].elementtype 10012 +doctype[1].wsettype[4].createifnonexistent true +doctype[1].wsettype[4].removeifzero true +doctype[1].wsettype[4].internalid 18 +doctype[1].wsettype[5].idx 10049 +doctype[1].wsettype[5].elementtype 10012 +doctype[1].wsettype[5].createifnonexistent true +doctype[1].wsettype[5].removeifzero true +doctype[1].wsettype[5].internalid 18 +doctype[1].wsettype[6].idx 10050 +doctype[1].wsettype[6].elementtype 10012 +doctype[1].wsettype[6].createifnonexistent true +doctype[1].wsettype[6].removeifzero true +doctype[1].wsettype[6].internalid 18 +doctype[1].structtype[0].idx 10023 +doctype[1].structtype[0].name "sct" +doctype[1].structtype[0].field[0].name "s1" +doctype[1].structtype[0].field[0].internalid 2146820765 +doctype[1].structtype[0].field[0].type 10012 +doctype[1].structtype[0].field[1].name "s2" +doctype[1].structtype[0].field[1].internalid 45366795 +doctype[1].structtype[0].field[1].type 10012 +doctype[1].structtype[0].internalid 109267174 +doctype[1].structtype[1].idx 10041 +doctype[1].structtype[1].name "mystruct" +doctype[1].structtype[1].field[0].name "bytearr" +doctype[1].structtype[1].field[0].internalid 1079701754 +doctype[1].structtype[1].field[0].type 10042 +doctype[1].structtype[1].field[1].name "mymap" +doctype[1].structtype[1].field[1].internalid 1954178122 +doctype[1].structtype[1].field[1].type 10043 +doctype[1].structtype[1].field[2].name "title" +doctype[1].structtype[1].field[2].internalid 567626448 +doctype[1].structtype[1].field[2].type 10012 +doctype[1].structtype[1].field[3].name "structfield" +doctype[1].structtype[1].field[3].internalid 1726890940 +doctype[1].structtype[1].field[3].type 10012 +doctype[1].structtype[1].internalid -2092985853 +doctype[1].structtype[2].idx 10047 +doctype[1].structtype[2].name "folder" +doctype[1].structtype[2].field[0].name "Version" +doctype[1].structtype[2].field[0].internalid 64430502 +doctype[1].structtype[2].field[0].type 10007 +doctype[1].structtype[2].field[1].name "Name" +doctype[1].structtype[2].field[1].internalid 2002760220 +doctype[1].structtype[2].field[1].type 10012 +doctype[1].structtype[2].field[2].name "FlagsCounter" +doctype[1].structtype[2].field[2].internalid 1741227606 +doctype[1].structtype[2].field[2].type 10048 +doctype[1].structtype[2].field[3].name "anotherfolder" +doctype[1].structtype[2].field[3].internalid 1582421848 +doctype[1].structtype[2].field[3].type 10047 +doctype[1].structtype[2].internalid 294108848 +doctype[1].structtype[3].idx 10016 +doctype[1].structtype[3].name "types.header" +doctype[1].structtype[3].field[0].name "abyte" +doctype[1].structtype[3].field[0].internalid 110138156 +doctype[1].structtype[3].field[0].type 10003 +doctype[1].structtype[3].field[1].name "along" +doctype[1].structtype[3].field[1].internalid 1206464520 +doctype[1].structtype[3].field[1].type 10008 +doctype[1].structtype[3].field[2].name "abool" +doctype[1].structtype[3].field[2].internalid 492328000 +doctype[1].structtype[3].field[2].type 10002 +doctype[1].structtype[3].field[3].name "ashortfloat" +doctype[1].structtype[3].field[3].internalid 1012106297 +doctype[1].structtype[3].field[3].type 10006 +doctype[1].structtype[3].field[4].name "arrayfield" +doctype[1].structtype[3].field[4].internalid 965790107 +doctype[1].structtype[3].field[4].type 10017 +doctype[1].structtype[3].field[5].name "setfield" +doctype[1].structtype[3].field[5].internalid 761581914 +doctype[1].structtype[3].field[5].type 10018 +doctype[1].structtype[3].field[6].name "setfield2" +doctype[1].structtype[3].field[6].internalid 1066659198 +doctype[1].structtype[3].field[6].type 10019 +doctype[1].structtype[3].field[7].name "setfield3" +doctype[1].structtype[3].field[7].internalid 1180155772 +doctype[1].structtype[3].field[7].type 10020 +doctype[1].structtype[3].field[8].name "setfield4" +doctype[1].structtype[3].field[8].internalid 1254131631 +doctype[1].structtype[3].field[8].type 10021 +doctype[1].structtype[3].field[9].name "tagfield" +doctype[1].structtype[3].field[9].internalid 1653562069 +doctype[1].structtype[3].field[9].type 10022 +doctype[1].structtype[3].field[10].name "structfield" +doctype[1].structtype[3].field[10].internalid 486207386 +doctype[1].structtype[3].field[10].type 10023 +doctype[1].structtype[3].field[11].name "structarrayfield" +doctype[1].structtype[3].field[11].internalid 335048518 +doctype[1].structtype[3].field[11].type 10024 +doctype[1].structtype[3].field[12].name "stringmapfield" +doctype[1].structtype[3].field[12].internalid 117465687 +doctype[1].structtype[3].field[12].type 10025 +doctype[1].structtype[3].field[13].name "intmapfield" +doctype[1].structtype[3].field[13].internalid 121004462 +doctype[1].structtype[3].field[13].type 10026 +doctype[1].structtype[3].field[14].name "floatmapfield" +doctype[1].structtype[3].field[14].internalid 1239120925 +doctype[1].structtype[3].field[14].type 10027 +doctype[1].structtype[3].field[15].name "longmapfield" +doctype[1].structtype[3].field[15].internalid 477718745 +doctype[1].structtype[3].field[15].type 10028 +doctype[1].structtype[3].field[16].name "doublemapfield" +doctype[1].structtype[3].field[16].internalid 877047192 +doctype[1].structtype[3].field[16].type 10029 +doctype[1].structtype[3].field[17].name "arraymapfield" +doctype[1].structtype[3].field[17].internalid 1670805928 +doctype[1].structtype[3].field[17].type 10030 +doctype[1].structtype[3].field[18].name "arrarr" +doctype[1].structtype[3].field[18].internalid 1962567166 +doctype[1].structtype[3].field[18].type 10032 +doctype[1].structtype[3].field[19].name "maparr" +doctype[1].structtype[3].field[19].internalid 904375219 +doctype[1].structtype[3].field[19].type 10035 +doctype[1].structtype[3].field[20].name "complexarray" +doctype[1].structtype[3].field[20].internalid 795629533 +doctype[1].structtype[3].field[20].type 10037 +doctype[1].structtype[3].field[21].name "mystructfield" +doctype[1].structtype[3].field[21].internalid 1348513378 +doctype[1].structtype[3].field[21].type 10041 +doctype[1].structtype[3].field[22].name "mystructmap" +doctype[1].structtype[3].field[22].internalid 1511423250 +doctype[1].structtype[3].field[22].type 10044 +doctype[1].structtype[3].field[23].name "mystructarr" +doctype[1].structtype[3].field[23].internalid 595856991 +doctype[1].structtype[3].field[23].type 10045 +doctype[1].structtype[3].field[24].name "Folders" +doctype[1].structtype[3].field[24].internalid 34575524 +doctype[1].structtype[3].field[24].type 10046 +doctype[1].structtype[3].field[25].name "juletre" +doctype[1].structtype[3].field[25].internalid 1039981530 +doctype[1].structtype[3].field[25].type 10008 +doctype[1].structtype[3].field[26].name "album0" +doctype[1].structtype[3].field[26].internalid 764312262 +doctype[1].structtype[3].field[26].type 10049 +doctype[1].structtype[3].field[27].name "album1" +doctype[1].structtype[3].field[27].internalid 1967160809 +doctype[1].structtype[3].field[27].type 10050 +doctype[1].structtype[3].field[28].name "other" +doctype[1].structtype[3].field[28].internalid 2443357 +doctype[1].structtype[3].field[28].type 10008 +doctype[1].structtype[3].internalid 1328581348 diff --git a/document/src/vespa/document/config/documenttypes.def b/document/src/vespa/document/config/documenttypes.def index 2e0483f025b..202447295c3 100644 --- a/document/src/vespa/document/config/documenttypes.def +++ b/document/src/vespa/document/config/documenttypes.def @@ -44,7 +44,7 @@ documenttype[].datatype[].map.value.id int default=0 ## This is the id of the datatype of the key in the wset. documenttype[].datatype[].wset.key.id int default=0 -## Should an update to a nonexistent element cause it to be created +## Should an update to a nonexistent element cause it to be created documenttype[].datatype[].wset.createifnonexistent bool default=false ## Should an element in a weighted set be removed if an update changes the weight to 0 @@ -110,3 +110,176 @@ documenttype[].referencetype[].target_type_id int ## Imported fields (specified outside the document block in the schema) documenttype[].importedfield[].name string + + +# Here starts a new model for how datatypes are configured, where +# everything is per document-type, and each documenttype contains the +# datatypes it defines. + +# Note: we will include the built-in "document" document +# type that all other doctypes inherit from also, in order +# to get all the primitive and built-in types declared +# with an idx we can refer to. + +# Note: indexes are only meaningful as internal references in this +# config; they will typically be sequential (1,2,3,...) in the order +# that they are generated (but nothing should depend on that). + + +## Name of the document type. Must be unique. +doctype[].name string + +## Index of this type (as a datatype which can be referred to). +doctype[].idx int + +## Internal ID of this datatype +doctype[].internalid int + +## Specify document types to inherit +doctype[].inherits[].idx int + +## Index of struct defining document fields +doctype[].contentstruct int + +## Field sets available for this document type +doctype[].fieldsets{}.fields[] string + +## Imported fields (specified outside the document block in the schema) +doctype[].importedfield[].name string + +# Everything below here is configuration of data types defined by +# this document type. + +# Primitive types must be present as built-in static members. + +## Index of primitive type +doctype[].primitivetype[].idx int + +## The name of this primitive type +doctype[].primitivetype[].name string + + +# Arrays are the simplest collection type: + +## Index of this array type +doctype[].arraytype[].idx int + +## Index of the element type this array type contains +doctype[].arraytype[].elementtype int + +## Internal ID of this datatype +doctype[].arraytype[].internalid int + + +# Maps are another collection type: + +## Index of this map type +doctype[].maptype[].idx int + +## Index of the key type used by this map type +doctype[].maptype[].keytype int + +## Index of the key type used by this map type +doctype[].maptype[].valuetype int + +## Internal ID of this datatype +doctype[].maptype[].internalid int + + +# Weighted sets are more complicated; +# they can be considered as an collection +# of unique elements where each element has +# an associated weight: + +## Index of this weighted set type +doctype[].wsettype[].idx int + +## Index of the element types contained in this weighted set type +doctype[].wsettype[].elementtype int + +## Should an update to a nonexistent element cause it to be created +doctype[].wsettype[].createifnonexistent bool default=false + +## Should an element in a weighted set be removed if an update changes the weight to 0 +doctype[].wsettype[].removeifzero bool default=false + +## Internal ID of this datatype +doctype[].wsettype[].internalid int + + +# Tensors have their own type system + +## Index of this tensor type +doctype[].tensortype[].idx int + +## Description of the type of the actual tensors contained +doctype[].tensortype[].detailedtype string + + +# Document references refer to parent documents that a document can +# import fields from: + +## Index of this reference data type: +doctype[].documentref[].idx int + +## Index of the document type this reference type refers to: +doctype[].documentref[].targettype int + +## Internal ID of this datatype +doctype[].documentref[].internalid int + + +# Annotation types are another world, but are modeled here +# as if they were also datatypes contained inside document types: + +## Index of an annotation type. +doctype[].annotationtype[].idx int + +## Name of the annotation type. +doctype[].annotationtype[].name string + +## Internal id of this annotation type +doctype[].annotationtype[].internalid int + +## Index of contained datatype of the annotation type, if any +doctype[].annotationtype[].datatype int default=-1 + +## Index of annotation type that this type inherits. +doctype[].annotationtype[].inherits[].idx int + + +# Annotation references are field values referring to +# an annotation of a certain annotation type. + +## Index of this annotation reference type +doctype[].annotationref[].idx int + +## Index of the annotation type this annotation reference type refers to +doctype[].annotationref[].annotationtype int + +## Internal ID of this datatype +doctype[].annotationref[].internalid int + + +# A struct is just a named collections of fields: + +## Index of this struct type +doctype[].structtype[].idx int + +## Name of the struct type. Must be unique within documenttype. +doctype[].structtype[].name string + +## Index of another struct type to inherit +doctype[].structtype[].inherits[].type int + +## Name of a struct field. Must be unique within the struct type. +doctype[].structtype[].field[].name string + +## The "field id" - used in serialized format! +doctype[].structtype[].field[].internalid int + +## Index of the type of this field +doctype[].structtype[].field[].type int + +## Internal ID of this datatype +doctype[].structtype[].internalid int diff --git a/document/src/vespa/document/repo/documenttyperepo.cpp b/document/src/vespa/document/repo/documenttyperepo.cpp index 312ce027543..d8f272d5d55 100644 --- a/document/src/vespa/document/repo/documenttyperepo.cpp +++ b/document/src/vespa/document/repo/documenttyperepo.cpp @@ -17,6 +17,7 @@ #include <vespa/document/config/config-documenttypes.h> #include <fstream> #include <cassert> +#include <set> #include <vespa/log/log.h> LOG_SETUP(".documenttyperepo"); @@ -74,7 +75,7 @@ public: void inherit(const Repo &parent); bool addDataType(const DataType &type); - template <typename T> void addDataType(unique_ptr<T> type); + template <typename T> const DataType * addDataType(unique_ptr<T> type); const DataType &addTensorType(const string &spec); const DataType *lookup(int32_t id) const; @@ -108,14 +109,17 @@ bool Repo::addDataType(const DataType &type) { } data_type = &type; data_type_by_name = &type; + LOG(spam, "Added data type to repo: %s [%d]", type.getName().c_str(), type.getId()); return true; } template <typename T> -void Repo::addDataType(unique_ptr<T> type) { +const DataType* Repo::addDataType(unique_ptr<T> type) { + int id = type->getId(); if (addDataType(*type)) { _owned_types.push_back(type.release()); } + return _types[id]; } @@ -172,7 +176,7 @@ public: ~AnnotationTypeRepo() { DeleteContent(_owned_types); } void inherit(const AnnotationTypeRepo &parent); - void addAnnotationType(AnnotationType::UP annotation_type); + AnnotationType * addAnnotationType(AnnotationType::UP annotation_type); void setAnnotationDataType(int32_t id, const DataType &datatype); const AnnotationType *lookup(int32_t id) const; @@ -182,7 +186,7 @@ void AnnotationTypeRepo::inherit(const AnnotationTypeRepo &parent) { _annotation_types.insert(parent._annotation_types.begin(), parent._annotation_types.end()); } -void AnnotationTypeRepo::addAnnotationType(AnnotationType::UP type) { +AnnotationType * AnnotationTypeRepo::addAnnotationType(AnnotationType::UP type) { AnnotationType *& a_type = _annotation_types[type->getId()]; if (a_type) { if (*type != *a_type) { @@ -194,6 +198,7 @@ void AnnotationTypeRepo::addAnnotationType(AnnotationType::UP type) { a_type = type.get(); _owned_types.push_back(type.release()); } + return a_type; } void AnnotationTypeRepo::setAnnotationDataType(int32_t id, const DataType &d) { @@ -502,6 +507,495 @@ void configureAllRepos(const DocumenttypesConfig::DocumenttypeVector &t, Documen } } +using DataTypesByIdx = hash_map<int, const DataType *>; +using StructTypesByIdx = hash_map<int, StructDataType *>; +using DocTypesByIdx = hash_map<int, DocumentType *>; + + +class ApplyNewDoctypeConfig { +private: + using DTC = ::document::config::DocumenttypesConfig; + + using CDocType = DTC::Doctype; + using CDocInherit = DTC::Doctype::Inherits; + using CDocFieldsets = DTC::Doctype::Fieldsets; + using CDocImportField = DTC::Doctype::Importedfield; + using CPrimitiveT = DTC::Doctype::Primitivetype; + using CArrayT = DTC::Doctype::Arraytype; + using CMapT = DTC::Doctype::Maptype; + using CWsetT = DTC::Doctype::Wsettype; + using CTensorT = DTC::Doctype::Tensortype; + using CDocRefT = DTC::Doctype::Documentref; + using CAnnotationT = DTC::Doctype::Annotationtype; + using CAnnRefT = DTC::Doctype::Annotationref; + using CStructT = DTC::Doctype::Structtype; + using CStructField = DTC::Doctype::Structtype::Field; + using CStructInherits = DTC::Doctype::Structtype::Inherits; + + struct DocTypeInProgress { + const CDocType & cfg; + DataTypeRepo * data_type_repo; + DocumentType * dtype = nullptr; + bool builtin = false; + + DocTypeInProgress(const CDocType & config, DocumentTypeMap &doc_types) + : cfg(config), + data_type_repo(doc_types[cfg.internalid]) + { + if (data_type_repo) { + LOG(debug, "old doct : %s [%d]", cfg.name.c_str(), cfg.internalid); + builtin = true; + } else { + LOG(debug, "new doct : %s [%d]", cfg.name.c_str(), cfg.internalid); + data_type_repo = new DataTypeRepo(); + doc_types[cfg.internalid] = data_type_repo; + } + } + + Repo& repo() { return data_type_repo->repo; } + }; + + struct StructInProgress { + const CStructT & cfg; + StructDataType *stype = nullptr; + const StructDataType *oldtype = nullptr; + bool finished = false; + StructInProgress(const CStructT & config) : cfg(config) {} + }; + using StructsInProgress = std::map<int, StructInProgress>; + StructsInProgress _structs_in_progress; + + using DocTypesInProgress = std::map<int, DocTypeInProgress>; + using MadeTypes = std::map<int, const DataType *>; + + const DocumenttypesConfig::DoctypeVector & _input; + DocumentTypeMap & _output; + + DocTypesInProgress _doc_types_in_progress; + hash_map<int, AnnotationType *> _annotations_by_idx; + MadeTypes _made_types; + std::set<int> _needed_idx_set; + + void apply() { + findNeeded(); + for (const CDocType & docT : _input) { + auto [iter,succ] = _doc_types_in_progress.emplace(docT.idx, + DocTypeInProgress(docT, _output)); + LOG_ASSERT(succ); + auto & dtInP = iter->second; + createSimpleTypes(dtInP); + createEmptyStructs(dtInP); + initializeDocTypeAndInheritAnnotations(dtInP); + createEmptyAnnotationTypes(dtInP); + createReferenceTypes(dtInP); + } + createComplexTypes(); + fillStructs(); + for (const CDocType & docT : _input) { + auto iter = _doc_types_in_progress.find(docT.idx); + LOG_ASSERT(iter != _doc_types_in_progress.end()); + auto & dtInP = iter->second; + fillDocument(dtInP); + fillAnnotationTypes(dtInP); + } + for (const auto & docT : _input) { + for (const auto & structT : docT.structtype) { + performStructInherit(structT.idx); + } + } + } + + void madeType(const DataType *t, int idx) { + _made_types[idx] = t; + _needed_idx_set.erase(idx); + } + + void createSimpleTypes(DocTypeInProgress & dtInP) { + for (const auto & primT : dtInP.cfg.primitivetype) { + string name = primT.name; + const DataType *t = dtInP.repo().lookup(name); + if (t == nullptr) { + if (name == "float16") { + // is this even sane? + name = "float"; + } + name[0] = (name[0] & 0x5F); + t = dtInP.repo().lookup(name); + } + if (t == nullptr) { + LOG(error, "Missing primitive type '%s'", primT.name.c_str()); + throw IllegalArgumentException("missing primitive type"); + } else { + madeType(t, primT.idx); + } + } + for (const auto & tensorT : dtInP.cfg.tensortype) { + const DataType & tt = dtInP.repo().addTensorType(tensorT.detailedtype); + madeType(&tt, tensorT.idx); + } + } + + void createEmptyStructs(DocTypeInProgress & dtInP) { + for (const auto & structT : dtInP.cfg.structtype) { + StructInProgress in_progress(structT); + if (const auto * oldt = dtInP.repo().lookup(structT.internalid)) { + auto st = dynamic_cast<const StructDataType *>(oldt); + if (st) { + LOG(debug, "already has %s [%d], wanted to add %s [%d]", + st->getName().c_str(), st->getId(), + structT.name.c_str(), structT.internalid); + in_progress.oldtype = st; + in_progress.finished = true; + madeType(st, structT.idx); + } else { + throw IllegalArgumentException("struct internalid -> not a struct"); + } + } else { + auto up = std::make_unique<StructDataType>(structT.name, structT.internalid); + in_progress.stype = up.get(); + const DataType *t = dtInP.repo().addDataType(std::move(up)); + LOG_ASSERT(t == in_progress.stype); + madeType(t, structT.idx); + } + auto [iter, succ] = _structs_in_progress.emplace(structT.idx, in_progress); + LOG_ASSERT(succ); + } + } + + const StructDataType * findStruct(int idx) { + auto iter = _structs_in_progress.find(idx); + if (iter == _structs_in_progress.end()) return nullptr; + const auto & in_progress = iter->second; + if (in_progress.finished) { + return in_progress.oldtype; + } + return in_progress.stype; + } + + void initializeDocTypeAndInheritAnnotations(DocTypeInProgress & dtInP) { + if (dtInP.builtin) { + madeType(dtInP.data_type_repo->doc_type, dtInP.cfg.idx); + return; + } + LOG_ASSERT(dtInP.data_type_repo->doc_type == nullptr); + const auto & docT = dtInP.cfg; + const StructDataType * fields = findStruct(docT.contentstruct); + if (fields != nullptr) { + dtInP.data_type_repo->doc_type = new DocumentType(docT.name, docT.internalid, *fields); + madeType(dtInP.data_type_repo->doc_type, docT.idx); + } else { + LOG(error, "Missing content struct for '%s' (idx %d not found)", + docT.name.c_str(), docT.contentstruct); + throw IllegalArgumentException("missing content struct"); + } + // depends on config in inheritance order + for (const auto & inheritD : docT.inherits) { + const DataType *dt = _made_types[inheritD.idx]; + if (dt == nullptr) { + LOG(error, "parent datatype [idx %d] missing for document %s", + inheritD.idx, docT.name.c_str()); + throw IllegalArgumentException("Unable to find document for inheritance"); + } + DataTypeRepo * parentRepo = FindPtr(_output, dt->getId()); + if (parentRepo == nullptr) { + LOG(error, "parent repo [id %d] missing for document %s", + dt->getId(), docT.name.c_str()); + throw IllegalArgumentException("missing parent repo"); + } + dtInP.data_type_repo->annotations.inherit(parentRepo->annotations); + } + } + + void createEmptyAnnotationTypes(DocTypeInProgress & dtInP) { + auto & annRepo = dtInP.data_type_repo->annotations; + for (const auto & annT: dtInP.cfg.annotationtype) { + if (annRepo.lookup(annT.internalid)) { + throw IllegalArgumentException("duplicate annotation type id"); + } + auto at = std::make_unique<AnnotationType>(annT.internalid, annT.name); + _annotations_by_idx[annT.idx] = at.get(); + _needed_idx_set.erase(annT.idx); + const auto * t = annRepo.addAnnotationType(std::move(at)); + LOG_ASSERT(t == _annotations_by_idx[annT.idx]); + } + } + + void createReferenceTypes(DocTypeInProgress & dtInP) { + for (const auto & aRef : dtInP.cfg.annotationref) { + const AnnotationType * target = _annotations_by_idx[aRef.annotationtype]; + if (target == nullptr) { + LOG(error, "Missing annotation type [idx %d] for annotationref", + aRef.annotationtype); + throw IllegalArgumentException("missing annotation type"); + } else { + auto ar = std::make_unique<AnnotationReferenceDataType>(*target, aRef.internalid); + madeType(dtInP.repo().addDataType(std::move(ar)), aRef.idx); + } + } + for (const auto & refT : dtInP.cfg.documentref) { + const auto * target = dynamic_cast<const DocumentType *>(_made_types[refT.targettype]); + if (target == nullptr) { + LOG(error, "Missing target document type for reference (idx %d)", refT.targettype); + throw IllegalArgumentException("missing target type"); + } else { + auto rt = std::make_unique<ReferenceDataType>(*target, refT.internalid); + madeType(dtInP.repo().addDataType(std::move(rt)), refT.idx); + } + } + } + + void createComplexTypes() { + while (_needed_idx_set.size() > 0) { + size_t missing_cnt = _needed_idx_set.size(); + for (const auto & docT : _input) { + auto iter = _doc_types_in_progress.find(docT.idx); + LOG_ASSERT(iter != _doc_types_in_progress.end()); + auto & dtInP = iter->second; + createComplexTypesForDocType(dtInP.cfg, dtInP.repo()); + } + if (_needed_idx_set.size() == missing_cnt) { + for (int idx : _needed_idx_set) { + LOG(error, "no progress, datatype [idx %d] still missing", idx); + } + throw IllegalArgumentException("no progress"); + } + LOG(info, "retry complex types, %zd missing", _needed_idx_set.size()); + } + } + + void createComplexTypesForDocType(const CDocType & docT, Repo& repo) { + for (const auto & arrT : docT.arraytype) { + if (_made_types[arrT.idx] != nullptr) { + continue; // OK already + } + if (const DataType * nested = _made_types[arrT.elementtype]) { + auto at = std::make_unique<ArrayDataType>(*nested, arrT.internalid); + madeType(repo.addDataType(std::move(at)), arrT.idx); + } + } + for (const auto & mapT : docT.maptype) { + if (_made_types[mapT.idx] != nullptr) { + continue; // OK already + } + const DataType * kt = _made_types[mapT.keytype]; + const DataType * vt = _made_types[mapT.valuetype]; + if (kt && vt) { + auto mt = std::make_unique<MapDataType>(*kt, *vt, mapT.internalid); + madeType(repo.addDataType(std::move(mt)), mapT.idx); + } + } + for (const auto & wsetT : docT.wsettype) { + if (_made_types[wsetT.idx] != nullptr) { + continue; // OK already + } + if (const DataType * nested = _made_types[wsetT.elementtype]) { + auto wt = std::make_unique<WeightedSetDataType>(*nested, + wsetT.createifnonexistent, wsetT.removeifzero, + wsetT.internalid); + madeType(repo.addDataType(std::move(wt)), wsetT.idx); + } + } + } + + void fillStructs() { + for (auto & [idx, in_progress] : _structs_in_progress) { + if (in_progress.finished) { + continue; + } + auto st = in_progress.stype; + LOG_ASSERT(st); + for (const auto & fieldD : in_progress.cfg.field) { + const DataType *ft = _made_types[fieldD.type]; + if (ft == nullptr) { + LOG(error, "Missing type [idx %d] for struct %s field %s", + fieldD.type, in_progress.cfg.name.c_str(), fieldD.name.c_str()); + throw IllegalArgumentException("missing datatype"); + } else { + st->addField(Field(fieldD.name, fieldD.internalid, *ft)); + } + } + } + } + + void fillDocument(DocTypeInProgress & dtInP) { + if (dtInP.builtin) { + return; + } + const CDocType & docT = dtInP.cfg; + auto * doc_type = dtInP.data_type_repo->doc_type; + LOG_ASSERT(doc_type != nullptr); + for (const auto & importD : docT.importedfield) { + doc_type->add_imported_field_name(importD.name); + } + for (const auto & entry : docT.fieldsets) { + DocumentType::FieldSet::Fields fields; + for (const auto& f : entry.second.fields) { + fields.insert(f); + } + doc_type->addFieldSet(entry.first, fields); + } + for (const auto & inheritD : docT.inherits) { + const DataType *dt = _made_types[inheritD.idx]; + const DocumentType * parent = dynamic_cast<const DocumentType *>(dt); + if (parent == nullptr) { + LOG(error, "missing parent type [idx %d] for document %s", + inheritD.idx, docT.name.c_str()); + throw IllegalArgumentException("missing parent type"); + } else { + doc_type->inherit(*parent); + } + } + } + + void fillAnnotationTypes(DocTypeInProgress & dtInP) { + for (const auto & annT: dtInP.cfg.annotationtype) { + AnnotationType * at = _annotations_by_idx[annT.idx]; + if (annT.datatype != -1) { + const DataType * dt = _made_types[annT.datatype]; + if (dt == nullptr) { + LOG(error, "Missing datatype [idx %d] for annotation type %s", + annT.datatype, annT.name.c_str()); + throw IllegalArgumentException("missing datatype"); + } else { + at->setDataType(*dt); + } + } + for (const auto & inheritD : annT.inherits) { + LOG_ASSERT(at != nullptr); + const AnnotationType * parent = _annotations_by_idx[inheritD.idx]; + if (parent == nullptr) { + LOG(error, "missing parent [idx %d] for annotation %s", + inheritD.idx, annT.name.c_str()); + throw IllegalArgumentException("missing parent"); + } + } + } + } + + class EnsureIndexes { + std::set<int> _set; + public: + void add(int idx) { + auto [iter, succ] = _set.insert(idx); + if (! succ) { + throw IllegalArgumentException("duplicate type idx"); + } + LOG(info, "ensure indexes: add %d", idx); + } + void check(int idx) { + if (! _set.contains(idx)) { + LOG(error, "ensure indexes: missing %d", idx); + throw IllegalArgumentException("needed idx missing"); + } + } + }; + + void findNeeded() { + EnsureIndexes idx_set; + for (const auto & docT : _input) { + LOG(info, "doc %s", docT.name.c_str()); + idx_set.add(docT.idx); + for (const auto & structT : docT.structtype) { + idx_set.add(structT.idx); + for (const auto & fieldD : structT.field) { + LOG(debug, "doc %s struct %s field %s needs [idx %d]", + docT.name.c_str(), structT.name.c_str(), fieldD.name.c_str(), fieldD.type); + _needed_idx_set.insert(fieldD.type); + } + } + for (const auto & primT : docT.primitivetype) { + idx_set.add(primT.idx); + } + for (const auto & tensorT : docT.tensortype) { + idx_set.add(tensorT.idx); + } + for (const auto & arrT : docT.arraytype) { + idx_set.add(arrT.idx); + LOG(debug, "doc %s array needs [idx %d]", docT.name.c_str(),arrT.elementtype); + _needed_idx_set.insert(arrT.elementtype); + } + for (const auto & wsetT : docT.wsettype) { + idx_set.add(wsetT.idx); + LOG(debug, "doc %s wset needs [idx %d]", docT.name.c_str(), wsetT.elementtype); + _needed_idx_set.insert(wsetT.elementtype); + } + for (const auto & mapT : docT.maptype) { + idx_set.add(mapT.idx); + LOG(debug, "doc %s wset needs [idx %d] and [idx %d]", + docT.name.c_str(), mapT.keytype, mapT.valuetype); + _needed_idx_set.insert(mapT.keytype); + _needed_idx_set.insert(mapT.valuetype); + } + for (const auto & annT: docT.annotationtype) { + idx_set.add(annT.idx); + if (annT.datatype != -1) { + LOG(debug, "doc %s ann needs datatype [idx %d]", docT.name.c_str(), annT.datatype); + _needed_idx_set.insert(annT.datatype); + } + for (const auto & inheritD : annT.inherits) { + LOG(debug, "doc %s ann needs parent [idx %d]", docT.name.c_str(), inheritD.idx); + _needed_idx_set.insert(inheritD.idx); + } + } + for (const auto & aRef : docT.annotationref) { + idx_set.add(aRef.idx); + LOG(debug, "doc %s ann ref needs annotation [idx %d]", docT.name.c_str(), aRef.annotationtype); + _needed_idx_set.insert(aRef.annotationtype); + } + for (const auto & refT : docT.documentref) { + idx_set.add(refT.idx); + LOG(debug, "doc %s doc ref needs target [idx %d]", docT.name.c_str(), refT.targettype); + _needed_idx_set.insert(refT.targettype); + } + } + for (int needed : _needed_idx_set) { + idx_set.check(needed); + } + } + + const StructDataType * performStructInherit(int idx) { + auto iter = _structs_in_progress.find(idx); + if (iter == _structs_in_progress.end()) { + throw IllegalArgumentException("inherit from non-struct"); + } + auto & in_progress = iter->second; + if (in_progress.finished) { + return in_progress.oldtype; + } + const auto & structT = in_progress.cfg; + for (const auto & inheritD : structT.inherits) { + const auto * parent = performStructInherit(inheritD.type); + if (parent == nullptr) { + LOG(error, "Missing parent type [idx %d] for struct %s", + inheritD.type, structT.name.c_str()); + throw IllegalArgumentException("missing parent type"); + } + for (const auto & field : parent->getFieldSet()) { + in_progress.stype->addInheritedField(*field); + } + } + in_progress.finished = true; + in_progress.oldtype = in_progress.stype; + return in_progress.oldtype; + } + +public: + ApplyNewDoctypeConfig(const DocumenttypesConfig::DoctypeVector & input, + DocumentTypeMap & output) + : _input(input), _output(output) + { + apply(); + } + ~ApplyNewDoctypeConfig(); +}; + +ApplyNewDoctypeConfig::~ApplyNewDoctypeConfig() = default; + +void configureDocTypes(const DocumenttypesConfig::DoctypeVector &t, DocumentTypeMap &type_map) { + LOG(info, "applying new doc type config"); + ApplyNewDoctypeConfig(t, type_map); +} + } // namespace DocumentTypeRepo::DocumentTypeRepo() : @@ -527,9 +1021,13 @@ DocumentTypeRepo::DocumentTypeRepo(const DocumenttypesConfig &config) : _default(addDefaultDocument(*_doc_types)) { try { + if (config.documenttype.empty() && ! config.doctype.empty()) { + configureDocTypes(config.doctype, *_doc_types); + } else { createAllDocumentTypes(config.documenttype, *_doc_types); addAllDocumentTypesToRepos(*_doc_types); configureAllRepos(config.documenttype, *_doc_types); + } } catch (...) { DeleteMapContent(*_doc_types); throw; |