summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2022-04-01 13:52:37 +0200
committerGitHub <noreply@github.com>2022-04-01 13:52:37 +0200
commitf135ba6e2817a7819ae81591e8f73913d1e79355 (patch)
tree575ed5141b7214ae1fd23f842aadb044abd7f17f
parentf51a32337039c30da84826892d3cea2c306b8368 (diff)
parent62a3a9c55fde4b8877f53c790e2ca47a534e3259 (diff)
Merge pull request #21920 from vespa-engine/arnej/alternative-documenttypes-config
Arnej/alternative documenttypes config
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/configmodel/producers/DocumentTypes.java290
-rw-r--r--document/src/tests/repo/CMakeLists.txt8
-rw-r--r--document/src/tests/repo/doctype_config_test.cpp662
-rw-r--r--document/src/tests/repo/types.cfg326
-rw-r--r--document/src/vespa/document/config/documenttypes.def175
-rw-r--r--document/src/vespa/document/repo/documenttyperepo.cpp506
6 files changed, 1962 insertions, 5 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/configmodel/producers/DocumentTypes.java b/config-model/src/main/java/com/yahoo/vespa/configmodel/producers/DocumentTypes.java
index e1a28c8114f..630e9f0c097 100644
--- a/config-model/src/main/java/com/yahoo/vespa/configmodel/producers/DocumentTypes.java
+++ b/config-model/src/main/java/com/yahoo/vespa/configmodel/producers/DocumentTypes.java
@@ -27,6 +27,11 @@ public class DocumentTypes {
}
public DocumenttypesConfig.Builder produce(DocumentModel model, DocumenttypesConfig.Builder builder) {
+ /* later:
+ if (some flag) {
+ return produceDocTypes(model, builder);
+ }
+ */
builder.usev8geopositions(this.useV8GeoPositions);
Map<NewDocumentType.Name, NewDocumentType> produced = new HashMap<>();
for (NewDocumentType documentType : model.getDocumentManager().getTypes()) {
@@ -236,4 +241,289 @@ public class DocumentTypes {
documentBuilder.referencetype(refBuilder);
}
+ // Alternate (new) way to build config:
+
+ private DocumenttypesConfig.Builder produceDocTypes(DocumentModel model, DocumenttypesConfig.Builder builder) {
+ builder.usev8geopositions(this.useV8GeoPositions);
+ Map<NewDocumentType.Name, NewDocumentType> produced = new HashMap<>();
+ var indexMap = new IdxMap();
+ for (NewDocumentType documentType : model.getDocumentManager().getTypes()) {
+ docTypeInheritOrder(documentType, builder, produced, indexMap);
+ }
+ indexMap.verifyAllDone();
+ return builder;
+ }
+
+ private void docTypeInheritOrder(NewDocumentType documentType,
+ DocumenttypesConfig.Builder builder,
+ Map<NewDocumentType.Name, NewDocumentType> produced,
+ IdxMap indexMap)
+ {
+ if (! produced.containsKey(documentType.getFullName())) {
+ for (NewDocumentType inherited : documentType.getInherited()) {
+ docTypeInheritOrder(inherited, builder, produced, indexMap);
+ }
+ docTypeBuild(documentType, builder, indexMap);
+ produced.put(documentType.getFullName(), documentType);
+ }
+ }
+
+ static private class IdxMap {
+ private Map<Integer, Boolean> doneMap = new HashMap<>();
+ private Map<Object, Integer> map = new IdentityHashMap<>();
+ void add(Object someType) {
+ assert(someType != null);
+ // the adding of "10000" here is mostly to make it more
+ // unique to grep for when debugging
+ int nextIdx = 10000 + map.size();
+ map.computeIfAbsent(someType, k -> nextIdx);
+ }
+ int idxOf(Object someType) {
+ if (someType instanceof DocumentType) {
+ var dt = (DocumentType) someType;
+ if (dt.getId() == 8) {
+ return idxOf(VespaDocumentType.INSTANCE);
+ }
+ }
+ add(someType);
+ return map.get(someType);
+ }
+ boolean isDone(Object someType) {
+ return doneMap.computeIfAbsent(idxOf(someType), k -> false);
+ }
+ void setDone(Object someType) {
+ assert(! isDone(someType));
+ doneMap.put(idxOf(someType), true);
+ }
+ void verifyAllDone() {
+ for (var entry : map.entrySet()) {
+ Object needed = entry.getKey();
+ if (! isDone(needed)) {
+ throw new IllegalArgumentException("Could not generate config for all needed types, missing: " +
+ needed + " of class " + needed.getClass());
+ }
+ }
+ }
+ }
+
+ private void docTypeBuild(NewDocumentType documentType, DocumenttypesConfig.Builder builder, IdxMap indexMap) {
+ DocumenttypesConfig.Doctype.Builder db = new DocumenttypesConfig.Doctype.Builder();
+ db.
+ idx(indexMap.idxOf(documentType)).
+ name(documentType.getName()).
+ internalid(documentType.getId()).
+ contentstruct(indexMap.idxOf(documentType.getContentStruct()));
+ docTypeBuildFieldSets(documentType.getFieldSets(), db);
+ docTypeBuildImportedFields(documentType.getImportedFieldNames(), db);
+ for (NewDocumentType inherited : documentType.getInherited()) {
+ db.inherits(b -> b.idx(indexMap.idxOf(inherited)));
+ }
+ docTypeBuildAnyType(documentType.getContentStruct(), db, indexMap);
+
+ for (DataType dt : sortedList(documentType.getAllTypes().getTypes(),
+ (a,b) -> a.getName().compareTo(b.getName()))) {
+ docTypeBuildAnyType(dt, db, indexMap);
+ }
+ for (AnnotationType ann : sortedList(documentType.getAnnotations(),
+ (a,b) -> a.getName().compareTo(b.getName()))) {
+ docTypeBuildAnnotationType(ann, db, indexMap);
+ }
+ builder.doctype(db);
+ indexMap.setDone(documentType);
+ }
+
+ private void docTypeBuildFieldSets(Set<FieldSet> fieldSets, DocumenttypesConfig.Doctype.Builder db) {
+ for (FieldSet fs : fieldSets) {
+ docTypeBuildOneFieldSet(fs, db);
+ }
+ }
+
+ private void docTypeBuildOneFieldSet(FieldSet fs, DocumenttypesConfig.Doctype.Builder db) {
+ db.fieldsets(fs.getName(), new DocumenttypesConfig.Doctype.Fieldsets.Builder().fields(fs.getFieldNames()));
+ }
+
+ private void docTypeBuildAnnotationType(AnnotationType annotation, DocumenttypesConfig.Doctype.Builder builder, IdxMap indexMap) {
+ if (indexMap.isDone(annotation)) {
+ return;
+ }
+ indexMap.setDone(annotation);
+ var annBuilder = new DocumenttypesConfig.Doctype.Annotationtype.Builder();
+ annBuilder
+ .idx(indexMap.idxOf(annotation))
+ .name(annotation.getName())
+ .internalid(annotation.getId());
+ DataType nested = annotation.getDataType();
+ if (nested != null) {
+ annBuilder.datatype(indexMap.idxOf(nested));
+ docTypeBuildAnyType(nested, builder, indexMap);
+ }
+ for (AnnotationType inherited : annotation.getInheritedTypes()) {
+ annBuilder.inherits(inhBuilder -> inhBuilder.idx(indexMap.idxOf(inherited)));
+
+ }
+ builder.annotationtype(annBuilder);
+ }
+
+ @SuppressWarnings("deprecation")
+ private void docTypeBuildAnyType(DataType type, DocumenttypesConfig.Doctype.Builder documentBuilder, IdxMap indexMap) {
+ if (indexMap.isDone(type)) {
+ return;
+ }
+ if (type instanceof NewDocumentType) {
+ // should be in the top-level list and handled there
+ return;
+ }
+ if ((type instanceof DocumentType) && (type.getId() == 8)) {
+ // special handling
+ return;
+ }
+ indexMap.setDone(type);
+ if (type instanceof TemporaryStructuredDataType) {
+ throw new IllegalArgumentException("Can not create config for temporary data type: " + type.getName());
+ } else if (type instanceof TemporaryUnknownType) {
+ throw new IllegalArgumentException("Can not create config for temporary data type: " + type.getName());
+ } else if (type instanceof OwnedTemporaryType) {
+ throw new IllegalArgumentException("Can not create config for temporary data type: " + type.getName());
+ } else if (type instanceof StructDataType) {
+ docTypeBuildOneType((StructDataType) type, documentBuilder, indexMap);
+ } else if (type instanceof ArrayDataType) {
+ docTypeBuildOneType((ArrayDataType) type, documentBuilder, indexMap);
+ } else if (type instanceof WeightedSetDataType) {
+ docTypeBuildOneType((WeightedSetDataType) type, documentBuilder, indexMap);
+ } else if (type instanceof MapDataType) {
+ docTypeBuildOneType((MapDataType) type, documentBuilder, indexMap);
+ } else if (type instanceof AnnotationReferenceDataType) {
+ docTypeBuildOneType((AnnotationReferenceDataType) type, documentBuilder, indexMap);
+ } else if (type instanceof TensorDataType) {
+ docTypeBuildOneType((TensorDataType) type, documentBuilder, indexMap);
+ } else if (type instanceof NewDocumentReferenceDataType) {
+ var refType = (NewDocumentReferenceDataType) type;
+ if (refType.isTemporary()) {
+ throw new IllegalArgumentException("Still temporary: " + refType);
+ }
+ docTypeBuildOneType(refType, documentBuilder, indexMap);
+ } else if (type instanceof PrimitiveDataType) {
+ docTypeBuildOneType((PrimitiveDataType) type, documentBuilder, indexMap);
+ } else if (type instanceof DocumentType) {
+ throw new IllegalArgumentException("Can not create config for unadorned document type: " + type.getName() + " id "+type.getId());
+ } else {
+ throw new IllegalArgumentException("Can not create config for data type " + type + " of class " + type.getClass());
+ }
+ }
+
+ private void docTypeBuildImportedFields(Collection<String> fieldNames, DocumenttypesConfig.Doctype.Builder builder) {
+ for (String fieldName : fieldNames) {
+ builder.importedfield(ib -> ib.name(fieldName));
+ }
+ }
+
+ private void docTypeBuildOneType(StructDataType type,
+ DocumenttypesConfig.Doctype.Builder builder,
+ IdxMap indexMap)
+ {
+ var structBuilder = new DocumenttypesConfig.Doctype.Structtype.Builder();
+ structBuilder
+ .idx(indexMap.idxOf(type))
+ .name(type.getName())
+ .internalid(type.getId());
+ for (DataType inherited : type.getInheritedTypes()) {
+ structBuilder.inherits(inheritBuilder -> inheritBuilder
+ .type(indexMap.idxOf(inherited)));
+ docTypeBuildAnyType(inherited, builder, indexMap);
+ }
+ for (com.yahoo.document.Field field : type.getFieldsThisTypeOnly()) {
+ DataType fieldType = field.getDataType();
+ structBuilder.field(fieldBuilder -> fieldBuilder
+ .name(field.getName())
+ .internalid(field.getId())
+ .type(indexMap.idxOf(fieldType)));
+ docTypeBuildAnyType(fieldType, builder, indexMap);
+ }
+ builder.structtype(structBuilder);
+ }
+
+ private void docTypeBuildOneType(PrimitiveDataType type,
+ DocumenttypesConfig.Doctype.Builder builder,
+ IdxMap indexMap)
+ {
+ builder.primitivetype(primBuilder -> primBuilder
+ .idx(indexMap.idxOf(type))
+ .name(type.getName()));
+ }
+
+ private void docTypeBuildOneType(TensorDataType type,
+ DocumenttypesConfig.Doctype.Builder builder,
+ IdxMap indexMap)
+ {
+ var tt = type.getTensorType();
+ String detailed = (tt != null) ? tt.toString() : "tensor";
+ builder.tensortype(tensorBuilder -> tensorBuilder
+ .idx(indexMap.idxOf(type))
+ .detailedtype(detailed));
+
+ }
+
+ private void docTypeBuildOneType(ArrayDataType type,
+ DocumenttypesConfig.Doctype.Builder builder,
+ IdxMap indexMap)
+ {
+ DataType nested = type.getNestedType();
+ System.err.println("array of "+nested+" -> "+type.getName()+" id "+type.getId());
+ builder.arraytype(arrayBuilder -> arrayBuilder
+ .idx(indexMap.idxOf(type))
+ .elementtype(indexMap.idxOf(nested))
+ .internalid(type.getId()));
+ docTypeBuildAnyType(nested, builder, indexMap);
+ }
+
+ private void docTypeBuildOneType(WeightedSetDataType type,
+ DocumenttypesConfig.Doctype.Builder builder,
+ IdxMap indexMap)
+ {
+ DataType nested = type.getNestedType();
+ builder.wsettype(wsetBuilder -> wsetBuilder
+ .idx(indexMap.idxOf(type))
+ .elementtype(indexMap.idxOf(nested))
+ .createifnonexistent(type.createIfNonExistent())
+ .removeifzero(type.removeIfZero())
+ .internalid(type.getId()));
+ docTypeBuildAnyType(nested, builder, indexMap);
+ }
+
+ private void docTypeBuildOneType(MapDataType type,
+ DocumenttypesConfig.Doctype.Builder builder,
+ IdxMap indexMap)
+ {
+ DataType keytype = type.getKeyType();
+ DataType valtype = type.getValueType();
+ builder.maptype(mapBuilder -> mapBuilder
+ .idx(indexMap.idxOf(type))
+ .keytype(indexMap.idxOf(keytype))
+ .valuetype(indexMap.idxOf(valtype))
+ .internalid(type.getId()));
+ docTypeBuildAnyType(keytype, builder, indexMap);
+ docTypeBuildAnyType(valtype, builder, indexMap);
+ }
+
+ private void docTypeBuildOneType(AnnotationReferenceDataType type,
+ DocumenttypesConfig.Doctype.Builder builder,
+ IdxMap indexMap)
+ {
+ builder.annotationref(arefBuilder -> arefBuilder
+ .idx(indexMap.idxOf(type))
+ .annotationtype(indexMap.idxOf(type.getAnnotationType()))
+ .internalid(type.getId()));
+ }
+
+ private void docTypeBuildOneType(NewDocumentReferenceDataType type,
+ DocumenttypesConfig.Doctype.Builder builder,
+ IdxMap indexMap)
+ {
+ builder.documentref(docrefBuilder -> docrefBuilder
+ .idx(indexMap.idxOf(type))
+ .targettype(indexMap.idxOf(type.getTargetType()))
+ .internalid(type.getId()));
+
+ }
+
}
diff --git a/document/src/tests/repo/CMakeLists.txt b/document/src/tests/repo/CMakeLists.txt
index bbbcafc2650..2a9ff4af683 100644
--- a/document/src/tests/repo/CMakeLists.txt
+++ b/document/src/tests/repo/CMakeLists.txt
@@ -6,3 +6,11 @@ vespa_add_executable(document_documenttyperepo_test_app TEST
document
)
vespa_add_test(NAME document_documenttyperepo_test_app COMMAND document_documenttyperepo_test_app)
+
+vespa_add_executable(document_doctype_config_test_app TEST
+ SOURCES
+ doctype_config_test.cpp
+ DEPENDS
+ document
+)
+vespa_add_test(NAME document_doctype_config_test_app COMMAND document_doctype_config_test_app)
diff --git a/document/src/tests/repo/doctype_config_test.cpp b/document/src/tests/repo/doctype_config_test.cpp
new file mode 100644
index 00000000000..84ec1414fcc
--- /dev/null
+++ b/document/src/tests/repo/doctype_config_test.cpp
@@ -0,0 +1,662 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for documenttyperepo.
+
+#include <vespa/document/base/testdocrepo.h>
+#include <vespa/config/print/asciiconfigwriter.h>
+#include <vespa/document/datatype/annotationreferencedatatype.h>
+#include <vespa/document/datatype/arraydatatype.h>
+#include <vespa/document/datatype/documenttype.h>
+#include <vespa/document/datatype/mapdatatype.h>
+#include <vespa/document/datatype/tensor_data_type.h>
+#include <vespa/document/datatype/weightedsetdatatype.h>
+#include <vespa/document/fieldvalue/fieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
+#include <vespa/document/repo/documenttyperepo.h>
+#include <vespa/vespalib/objects/identifiable.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <set>
+
+#include <vespa/log/log.h>
+LOG_SETUP("doctype_config_test");
+
+using config::AsciiConfigWriter;
+using std::set;
+using std::vector;
+using vespalib::Identifiable;
+using vespalib::IllegalArgumentException;
+using vespalib::string;
+
+using namespace document::config_builder;
+using namespace document;
+
+namespace {
+
+const string type_name = "test";
+const int32_t doc_type_id = 787121340;
+const string header_name = type_name + ".header";
+const int32_t header_id = 30;
+const string type_name_2 = "test_2";
+const string header_name_2 = type_name_2 + ".header";
+const string field_name = "field_name";
+const string derived_name = "derived";
+
+using ::document::config::DocumenttypesConfigBuilder;
+
+using BDocType = DocumenttypesConfigBuilder::Doctype;
+using BDocInherit = DocumenttypesConfigBuilder::Doctype::Inherits;
+using BDocFieldsets = DocumenttypesConfigBuilder::Doctype::Fieldsets;
+using BDocImportField = DocumenttypesConfigBuilder::Doctype::Importedfield;
+using BPrimitiveT = DocumenttypesConfigBuilder::Doctype::Primitivetype;
+using BArrayT = DocumenttypesConfigBuilder::Doctype::Arraytype;
+using BMapT = DocumenttypesConfigBuilder::Doctype::Maptype;
+using BWsetT = DocumenttypesConfigBuilder::Doctype::Wsettype;
+using BTensorT = DocumenttypesConfigBuilder::Doctype::Tensortype;
+using BDocRefT = DocumenttypesConfigBuilder::Doctype::Documentref;
+using BAnnotationT = DocumenttypesConfigBuilder::Doctype::Annotationtype;
+using BAnnRefT = DocumenttypesConfigBuilder::Doctype::Annotationref;
+using BStructT = DocumenttypesConfigBuilder::Doctype::Structtype;
+using BStructField = DocumenttypesConfigBuilder::Doctype::Structtype::Field;
+using BStructInherits = DocumenttypesConfigBuilder::Doctype::Structtype::Inherits;
+
+class BuilderHelper {
+private:
+ int _idx = 10000;
+ DocumenttypesConfigBuilder _config;
+ static int hashId(const string& name) {
+ StructDataType tmp(name);
+ return tmp.getId();
+ }
+ vector<int> _idxOfBuiltins;
+ void addPrimitive(BDocType &doc, const string& name, DataType::Type t) {
+ BPrimitiveT pt;
+ pt.idx = ++_idx;
+ pt.name = name;
+ doc.primitivetype.push_back(pt);
+ assert(t < _idxOfBuiltins.size());
+ _idxOfBuiltins[t] = pt.idx;
+ LOG(debug, "idx of builtin (%d) = %d", (int)t, pt.idx);
+ }
+public:
+ ~BuilderHelper();
+ BDocType & document(const string& name) {
+ _config.doctype.reserve(100);
+ auto & d = _config.doctype.emplace_back();
+ d.idx = ++_idx;
+ d.name = name;
+ d.internalid = hashId(name);
+ auto & st = addStruct(d, name + ".header");
+ d.contentstruct = st.idx;
+ if (_config.doctype.size() > 1) {
+ d.inherits.emplace_back().idx = _config.doctype[0].idx;
+ }
+ return d;
+ }
+ BStructField & addField(BDocType &doc, const string& name) {
+ return addField(doc.structtype[0], name);
+ }
+ BStructT & addStruct(BDocType &doc, const string& name) {
+ doc.structtype.reserve(100);
+ auto & st = doc.structtype.emplace_back();
+ st.idx = ++_idx;
+ st.name = name;
+ st.internalid = hashId(name);
+ return st;
+ }
+ BStructField & addField(BStructT &st, const string& name) {
+ st.field.reserve(100);
+ auto & f = st.field.emplace_back();
+ f.name = name;
+ f.internalid = hashId(name);
+ return f;
+ }
+ BArrayT & addArray(BDocType &doc, int nestedIdx) {
+ doc.arraytype.reserve(100);
+ auto & a = doc.arraytype.emplace_back();
+ a.idx = ++_idx;
+ a.elementtype = nestedIdx;
+ a.internalid = a.idx;
+ return a;
+ }
+ BMapT & addMap(BDocType &doc, int keyIdx, int valIdx) {
+ doc.maptype.reserve(100);
+ auto & m = doc.maptype.emplace_back();
+ m.idx = ++_idx;
+ m.keytype = keyIdx;
+ m.valuetype = valIdx;
+ m.internalid = m.idx;
+ return m;
+ }
+ BWsetT & addWset(BDocType &doc, int nestedIdx) {
+ doc.wsettype.reserve(100);
+ auto & w = doc.wsettype.emplace_back();
+ w.idx = ++_idx;
+ w.elementtype = nestedIdx;
+ w.internalid = w.idx;
+ return w;
+ }
+ BAnnotationT & addAnnotation(BDocType &doc, const string &name) {
+ doc.annotationtype.reserve(100);
+ auto & ann = doc.annotationtype.emplace_back();
+ ann.idx = ++_idx;
+ ann.name = name;
+ ann.internalid = hashId(name);
+ return ann;
+ }
+ BAnnRefT & addAnnotationRef(BDocType &doc, int annIdx) {
+ doc.annotationref.reserve(100);
+ auto & aref = doc.annotationref.emplace_back();
+ aref.idx = ++_idx;
+ aref.annotationtype = annIdx;
+ aref.internalid = aref.idx;
+ return aref;
+ }
+ BDocRefT & addDocumentRef(BDocType &doc, int targetIdx) {
+ doc.documentref.reserve(100);
+ auto & dref = doc.documentref.emplace_back();
+ dref.idx = ++_idx;
+ dref.targettype = targetIdx;
+ dref.internalid = dref.idx;
+ return dref;
+ }
+ BTensorT & addTensorType(BDocType &doc, const string& spec) {
+ doc.tensortype.reserve(100);
+ auto & tt = doc.tensortype.emplace_back();
+ tt.idx = ++_idx;
+ tt.detailedtype = spec;
+ return tt;
+ }
+ const DocumenttypesConfig & config() { return _config; }
+ BuilderHelper() {
+ _idxOfBuiltins.resize(DataType::MAX);
+ LOG(debug, "builtins.size = %zu", _idxOfBuiltins.size());
+ auto & root = document("document");
+ root.internalid = DataType::T_DOCUMENT;
+ addPrimitive(root, "int", DataType::T_INT);
+ addPrimitive(root, "float", DataType::T_FLOAT);
+ addPrimitive(root, "string", DataType::T_STRING);
+ addPrimitive(root, "raw", DataType::T_RAW);
+ addPrimitive(root, "long", DataType::T_LONG);
+ addPrimitive(root, "double", DataType::T_DOUBLE);
+ addPrimitive(root, "bool", DataType::T_BOOL);
+ addPrimitive(root, "uri", DataType::T_URI);
+ addPrimitive(root, "byte", DataType::T_BYTE);
+ addPrimitive(root, "tag", DataType::T_TAG);
+ addPrimitive(root, "short", DataType::T_SHORT);
+ addPrimitive(root, "predicate", DataType::T_PREDICATE);
+ }
+ int builtin(DataType::Type t) {
+ if (t == DataType::T_DOCUMENT) {
+ return _config.doctype[0].idx;
+ }
+ assert(t < _idxOfBuiltins.size());
+ LOG(debug, "lookup builtin %d -> %d", (int)t, _idxOfBuiltins[t]);
+ return _idxOfBuiltins[t];
+ }
+};
+
+BuilderHelper::~BuilderHelper() = default;
+
+TEST("requireThatDocumentTypeCanBeLookedUp") {
+ BuilderHelper builder;
+ auto &doc = builder.document(type_name);
+ doc.internalid = doc_type_id;
+ doc.structtype[0].internalid = header_id;
+ DocumentTypeRepo repo(builder.config());
+
+ const DocumentType *type = repo.getDocumentType(type_name);
+ ASSERT_TRUE(type);
+ EXPECT_EQUAL(type_name, type->getName());
+ EXPECT_EQUAL(doc_type_id, type->getId());
+ EXPECT_EQUAL(header_name, type->getFieldsType().getName());
+ EXPECT_EQUAL(header_id, type->getFieldsType().getId());
+}
+
+TEST("requireThatDocumentTypeCanBeLookedUpWhenIdIsNotAHash") {
+ BuilderHelper builder;
+ auto & doc = builder.document(type_name);
+ doc.internalid = doc_type_id + 2;
+ auto & contents = doc.structtype[0];
+ contents.name = header_name;
+ contents.internalid = header_id + 3;
+ DocumentTypeRepo repo(builder.config());
+
+ const DocumentType *type = repo.getDocumentType(type_name);
+ ASSERT_TRUE(type);
+ EXPECT_EQUAL(type_name, type->getName());
+ EXPECT_EQUAL(doc_type_id + 2, type->getId());
+ EXPECT_EQUAL(header_name, type->getFieldsType().getName());
+ EXPECT_EQUAL(header_id + 3, type->getFieldsType().getId());
+}
+
+TEST("requireThatDocumentsCanHaveFields") {
+ BuilderHelper builder;
+ auto & doc = builder.document(type_name);
+ builder.addField(doc, field_name).type = builder.builtin(DataType::T_INT);
+ DocumentTypeRepo repo(builder.config());
+
+ const StructDataType &s = repo.getDocumentType(type_name)->getFieldsType();
+ ASSERT_EQUAL(1u, s.getFieldCount());
+ const Field &field = s.getField(field_name);
+ EXPECT_EQUAL(DataType::T_INT, field.getDataType().getId());
+}
+
+template <typename T>
+const T &getFieldDataType(const DocumentTypeRepo &repo) {
+ const DataType &d = repo.getDocumentType(type_name)
+ ->getFieldsType().getField(field_name).getDataType();
+ const T *t = dynamic_cast<const T *>(&d);
+ ASSERT_TRUE(t);
+ return *t;
+}
+
+TEST("requireThatArraysCanBeConfigured") {
+ BuilderHelper builder;
+ auto & doc = builder.document(type_name);
+ auto & arr = builder.addArray(doc, builder.builtin(DataType::T_STRING));
+ builder.addField(doc, field_name).type = arr.idx;
+ DocumentTypeRepo repo(builder.config());
+
+ const ArrayDataType &a = getFieldDataType<ArrayDataType>(repo);
+ EXPECT_EQUAL(DataType::T_STRING, a.getNestedType().getId());
+}
+
+TEST("requireThatWsetsCanBeConfigured") {
+ BuilderHelper builder;
+ auto & doc = builder.document(type_name);
+ auto & wset = builder.addWset(doc, builder.builtin(DataType::T_INT));
+ wset.removeifzero = true;
+ wset.createifnonexistent = true;
+ builder.addField(doc, field_name).type = wset.idx;
+ DocumentTypeRepo repo(builder.config());
+
+ const WeightedSetDataType &w = getFieldDataType<WeightedSetDataType>(repo);
+ EXPECT_EQUAL(DataType::T_INT, w.getNestedType().getId());
+ EXPECT_TRUE(w.createIfNonExistent());
+ EXPECT_TRUE(w.removeIfZero());
+}
+
+TEST("requireThatMapsCanBeConfigured") {
+ BuilderHelper builder;
+ auto & doc = builder.document(type_name);
+ auto & map = builder.addMap(doc,
+ builder.builtin(DataType::T_INT),
+ builder.builtin(DataType::T_STRING));
+ builder.addField(doc, field_name).type = map.idx;
+ DocumentTypeRepo repo(builder.config());
+
+ const MapDataType &m = getFieldDataType<MapDataType>(repo);
+ EXPECT_EQUAL(DataType::T_INT, m.getKeyType().getId());
+ EXPECT_EQUAL(DataType::T_STRING, m.getValueType().getId());
+}
+
+TEST("requireThatAnnotationReferencesCanBeConfigured") {
+ int32_t annotation_type_id = 424;
+ BuilderHelper builder;
+ auto &doc = builder.document(type_name);
+ auto & ann = builder.addAnnotation(doc, "foo");
+ ann.internalid = annotation_type_id;
+ auto & annRef = builder.addAnnotationRef(doc, ann.idx);
+ builder.addField(doc, field_name).type = annRef.idx;
+ DocumentTypeRepo repo(builder.config());
+
+ const AnnotationReferenceDataType &ar = getFieldDataType<AnnotationReferenceDataType>(repo);
+ EXPECT_EQUAL(annotation_type_id, ar.getAnnotationType().getId());
+ EXPECT_EQUAL("foo", ar.getAnnotationType().getName());
+}
+
+TEST("requireThatDocumentsCanInheritFields") {
+ BuilderHelper builder;
+ auto & pdoc = builder.document(type_name);
+ auto & cdoc = builder.document(derived_name);
+ builder.addField(pdoc, field_name).type = builder.builtin(DataType::T_INT);
+ builder.addField(cdoc, "derived_field").type = builder.builtin(DataType::T_STRING);
+ cdoc.inherits.emplace_back().idx = pdoc.idx;
+ DocumentTypeRepo repo(builder.config());
+
+ const StructDataType &s = repo.getDocumentType(derived_name)->getFieldsType();
+ ASSERT_EQUAL(2u, s.getFieldCount());
+ const Field &field = s.getField(field_name);
+ const DataType &type = field.getDataType();
+ EXPECT_EQUAL(DataType::T_INT, type.getId());
+ EXPECT_EQUAL(DataType::T_STRING, s.getField("derived_field").getDataType().getId());
+}
+
+TEST("requireThatDocumentsCanUseInheritedTypes") {
+ const int32_t id = 64;
+ BuilderHelper builder;
+ auto & pdoc = builder.document(type_name);
+ auto & cdoc = builder.document(derived_name);
+ auto & arr = builder.addArray(pdoc, builder.builtin(DataType::T_INT));
+ arr.internalid = id;
+ builder.addField(pdoc, "foo").type = arr.idx;
+ builder.addField(cdoc, field_name).type = arr.idx;
+ cdoc.inherits.emplace_back().idx = pdoc.idx;
+
+ DocumentTypeRepo repo(builder.config());
+
+ const DataType &type =
+ repo.getDocumentType(derived_name)->getFieldsType()
+ .getField(field_name).getDataType();
+ EXPECT_EQUAL(id, type.getId());
+ EXPECT_TRUE(dynamic_cast<const ArrayDataType *>(&type));
+}
+
+TEST("requireThatIllegalConfigsCausesExceptions") {
+ BuilderHelper builder;
+ auto &doc = builder.document(type_name);
+ doc.inherits.emplace_back().idx = 20000;
+ EXPECT_EXCEPTION(DocumentTypeRepo repo(builder.config()),
+ IllegalArgumentException, "Unable to find document");
+}
+
+TEST("requireThatDataTypesCanBeLookedUpById") {
+ BuilderHelper builder;
+ auto &doc1 = builder.document(type_name);
+ auto &doc2 = builder.document(derived_name);
+ doc1.internalid = doc_type_id;
+ doc1.structtype[0].internalid = header_id;
+ doc2.internalid = doc_type_id + 1;
+ DocumentTypeRepo repo(builder.config());
+
+ const auto * dt1 = repo.getDocumentType(type_name);
+ const auto * dt2 = repo.getDocumentType(derived_name);
+
+ ASSERT_TRUE(dt1);
+ ASSERT_TRUE(dt2);
+ EXPECT_EQUAL(dt1, repo.getDocumentType(doc_type_id));
+ EXPECT_EQUAL(dt2, repo.getDocumentType(doc_type_id + 1));
+
+ const DataType *type = repo.getDataType(*dt1, header_id);
+ ASSERT_TRUE(type);
+ EXPECT_EQUAL(header_name, type->getName());
+ EXPECT_EQUAL(header_id, type->getId());
+
+ EXPECT_TRUE(!repo.getDataType(*dt1, -1));
+ EXPECT_TRUE(!repo.getDataType(*dt2, header_id));
+}
+
+TEST("requireThatDataTypesCanBeLookedUpByName") {
+ BuilderHelper builder;
+ auto &doc1 = builder.document(type_name);
+ doc1.structtype[0].internalid = header_id;
+ builder.document(type_name_2);
+ DocumentTypeRepo repo(builder.config());
+
+ const DocumentType * dt1 = repo.getDocumentType(type_name);
+ const DocumentType * dt2 = repo.getDocumentType(type_name_2);
+ ASSERT_TRUE(dt1);
+ ASSERT_TRUE(dt2);
+
+ const DataType *type = repo.getDataType(*dt1, header_name);
+ ASSERT_TRUE(type);
+ EXPECT_EQUAL(header_name, type->getName());
+ EXPECT_EQUAL(header_id, type->getId());
+
+ EXPECT_TRUE(repo.getDataType(*dt1, header_name));
+ EXPECT_TRUE(!repo.getDataType(*dt1, field_name));
+ EXPECT_TRUE(!repo.getDataType(*dt2, header_name));
+}
+
+TEST("requireThatInheritingDocCanRedefineIdenticalField") {
+ BuilderHelper builder;
+
+ auto & pdoc = builder.document(type_name);
+ auto & cdoc = builder.document(derived_name);
+ builder.addField(pdoc, field_name).type = builder.builtin(DataType::T_STRING);
+
+ builder.addField(cdoc, field_name).type = builder.builtin(DataType::T_STRING);
+ cdoc.inherits.emplace_back().idx = pdoc.idx;
+
+ DocumentTypeRepo repo(builder.config());
+
+ const StructDataType &s = repo.getDocumentType(derived_name)->getFieldsType();
+ ASSERT_EQUAL(1u, s.getFieldCount());
+}
+
+TEST("requireThatAnnotationTypesCanBeConfigured") {
+ const int32_t a_id = 654;
+ const string a_name = "annotation_name";
+ BuilderHelper builder;
+ auto & doc = builder.document(type_name);
+ auto & ann = builder.addAnnotation(doc, a_name);
+ ann.internalid = a_id;
+ ann.datatype = builder.builtin(DataType::T_STRING);
+
+ DocumentTypeRepo repo(builder.config());
+
+ const DocumentType *type = repo.getDocumentType(type_name);
+ ASSERT_TRUE(type);
+ const AnnotationType *a_type = repo.getAnnotationType(*type, a_id);
+ ASSERT_TRUE(a_type);
+ EXPECT_EQUAL(a_name, a_type->getName());
+ ASSERT_TRUE(a_type->getDataType());
+ EXPECT_EQUAL(DataType::T_STRING, a_type->getDataType()->getId());
+
+ a_type = repo.getAnnotationType(*type, 1);
+ ASSERT_TRUE(a_type);
+ EXPECT_EQUAL(1, a_type->getId());
+ EXPECT_EQUAL("term", a_type->getName());
+ a_type = repo.getAnnotationType(*type, 2);
+ ASSERT_TRUE(a_type);
+ EXPECT_EQUAL(2, a_type->getId());
+ EXPECT_EQUAL("token_type", a_type->getName());
+}
+
+TEST("requireThatDocumentsCanUseOtherDocumentTypes") {
+ BuilderHelper builder;
+ auto &doc2 = builder.document(type_name_2);
+ doc2.internalid = doc_type_id + 1;
+ auto &doc1 = builder.document(type_name);
+ builder.addField(doc1, field_name).type = doc2.idx;
+ DocumentTypeRepo repo(builder.config());
+
+ const DataType &type = repo.getDocumentType(type_name)->getFieldsType()
+ .getField(field_name).getDataType();
+ EXPECT_EQUAL(doc_type_id + 1, type.getId());
+ EXPECT_TRUE(dynamic_cast<const DocumentType *>(&type));
+}
+
+TEST("requireThatDocumentTypesCanBeIterated") {
+ BuilderHelper builder;
+ builder.document(type_name).internalid = doc_type_id;
+ builder.document(type_name_2).internalid = doc_type_id + 1;
+ DocumentTypeRepo repo(builder.config());
+
+ set<int> ids;
+ repo.forEachDocumentType(
+ [&ids](const DocumentType &type) { ids.insert(type.getId()); });
+
+ EXPECT_EQUAL(3u, ids.size());
+ ASSERT_TRUE(ids.count(DataType::T_DOCUMENT));
+ ASSERT_TRUE(ids.count(doc_type_id));
+ ASSERT_TRUE(ids.count(doc_type_id + 1));
+}
+
+TEST("requireThatDocumentLookupChecksName") {
+ BuilderHelper builder;
+ auto &doc = builder.document(type_name_2);
+ doc.internalid = doc_type_id;
+ DocumentTypeRepo repo(builder.config());
+
+ // "type_name" will generate the document type id
+ // "doc_type_id". However, this config assigns that id to a
+ // different type.
+ const DocumentType *type = repo.getDocumentType(type_name);
+ ASSERT_TRUE(!type);
+}
+
+TEST("requireThatBuildFromConfigWorks") {
+ DocumentTypeRepo repo(readDocumenttypesConfig(TEST_PATH("types.cfg")));
+ ASSERT_TRUE(repo.getDocumentType("document"));
+ ASSERT_TRUE(repo.getDocumentType("types"));
+}
+
+TEST("requireThatStructsCanInheritFields") {
+ BuilderHelper builder;
+ auto & doc = builder.document(type_name);
+ auto & st1 = builder.addStruct(doc, "sa");
+ auto & st2 = builder.addStruct(doc, "sb");
+ auto & st3 = builder.addStruct(doc, "sc");
+ builder.addField(st1, "fa").type = builder.builtin(DataType::T_INT);
+ builder.addField(st2, "fb").type = builder.builtin(DataType::T_LONG);
+ builder.addField(st3, "fc").type = builder.builtin(DataType::T_STRING);
+ st1.inherits.emplace_back().type = st2.idx;
+ st2.inherits.emplace_back().type = st3.idx;
+ builder.addField(doc, field_name).type = st1.idx;
+ DocumentTypeRepo repo(builder.config());
+ const StructDataType &s = getFieldDataType<StructDataType>(repo);
+ EXPECT_EQUAL(3u, s.getFieldCount());
+ ASSERT_TRUE(s.hasField("fa"));
+ ASSERT_TRUE(s.hasField("fb"));
+ ASSERT_TRUE(s.hasField("fc"));
+}
+
+TEST("requireThatStructsCanBeRecursive") {
+ BuilderHelper builder;
+ auto & doc = builder.document(type_name);
+ auto & st = builder.addStruct(doc, "folder");
+ builder.addField(st, "subfolder").type = st.idx;
+ builder.addField(doc, field_name).type = st.idx;
+ DocumentTypeRepo repo(builder.config());
+
+ const StructDataType &s = getFieldDataType<StructDataType>(repo);
+ EXPECT_EQUAL(1u, s.getFieldCount());
+ ASSERT_TRUE(s.hasField("subfolder"));
+ EXPECT_EQUAL(&s, &s.getField("subfolder").getDataType());
+}
+
+} // namespace
+
+TEST("requireThatMissingFileCausesException") {
+ EXPECT_EXCEPTION(readDocumenttypesConfig("illegal/missing_file"),
+ IllegalArgumentException, "Unable to open file");
+}
+
+TEST("requireThatFieldsCanHaveAnyDocumentType") {
+ BuilderHelper builder;
+ auto &doc1 = builder.document(type_name);
+ auto &doc2 = builder.document(type_name_2);
+
+ // Circular dependency
+ builder.addField(doc1, field_name).type = doc2.idx;
+ builder.addField(doc2, field_name).type = doc1.idx;
+
+ DocumentTypeRepo repo(builder.config());
+ const DocumentType *type1 = repo.getDocumentType(type_name);
+ const DocumentType *type2 = repo.getDocumentType(type_name_2);
+ ASSERT_TRUE(type1);
+ EXPECT_TRUE(type1->getFieldsType().hasField(field_name));
+ EXPECT_EQUAL(type2, &type1->getFieldsType().getField(field_name).getDataType());
+ ASSERT_TRUE(type2);
+ EXPECT_TRUE(type2->getFieldsType().hasField(field_name));
+ EXPECT_EQUAL(type1, &type2->getFieldsType().getField(field_name).getDataType());
+}
+
+TEST("Require that Array can have nested DocumentType") {
+ BuilderHelper builder;
+ auto &doc = builder.document(type_name);
+ auto &arr = builder.addArray(doc, doc.idx);
+ builder.addField(doc, field_name).type = arr.idx;
+ DocumentTypeRepo repo(builder.config());
+ const DocumentType *type = repo.getDocumentType(type_name);
+ ASSERT_TRUE(type);
+}
+
+TEST("Reference fields are resolved to correct reference type") {
+ const int doc_with_refs_id = 5678;
+ const int ref1_id = 777;
+ const int ref2_id = 888;
+ BuilderHelper builder;
+ auto & doc1 = builder.document(type_name);
+ auto & doc2 = builder.document(type_name_2);
+ auto & doc3 = builder.document("doc_with_refs");
+ doc3.internalid = doc_with_refs_id;
+ auto & refT1 = builder.addDocumentRef(doc3, doc1.idx);
+ refT1.internalid = ref1_id;
+ auto & refT2 = builder.addDocumentRef(doc3, doc2.idx);
+ refT2.internalid = ref2_id;
+ builder.addField(doc3, "ref1").type = refT1.idx;
+ builder.addField(doc3, "ref2").type = refT2.idx;
+ builder.addField(doc3, "ref3").type = refT1.idx;
+
+ DocumentTypeRepo repo(builder.config());
+ const DocumentType *type = repo.getDocumentType(doc_with_refs_id);
+ ASSERT_TRUE(type != nullptr);
+ const auto* ref1_type(repo.getDataType(*type, ref1_id));
+ const auto* ref2_type(repo.getDataType(*type, ref2_id));
+
+ EXPECT_EQUAL(*ref1_type, type->getFieldsType().getField("ref1").getDataType());
+ EXPECT_EQUAL(*ref2_type, type->getFieldsType().getField("ref2").getDataType());
+ EXPECT_EQUAL(*ref1_type, type->getFieldsType().getField("ref3").getDataType());
+}
+
+TEST("Config with no imported fields has empty imported fields set in DocumentType") {
+ BuilderHelper builder;
+ builder.document(type_name);
+ DocumentTypeRepo repo(builder.config());
+ const auto *type = repo.getDocumentType(type_name);
+ ASSERT_TRUE(type != nullptr);
+ EXPECT_TRUE(type->imported_field_names().empty());
+ EXPECT_FALSE(type->has_imported_field_name("foo"));
+}
+
+TEST("Configured imported field names are available in the DocumentType") {
+ // Note: we cheat a bit by specifying imported field names in types that have no
+ // reference fields. Add to test if we add config read-time validation of this. :)
+ BuilderHelper builder;
+ // Type with one imported field
+ builder.document(type_name).importedfield.emplace_back().name = "my_cool_field";
+ // Type with two imported fields
+ auto & doc2 = builder.document(type_name_2);
+ doc2.importedfield.emplace_back().name = "my_awesome_field";
+ doc2.importedfield.emplace_back().name = "my_swag_field";
+
+ DocumentTypeRepo repo(builder.config());
+ const auto* type = repo.getDocumentType(type_name);
+ ASSERT_TRUE(type != nullptr);
+ EXPECT_EQUAL(1u, type->imported_field_names().size());
+ EXPECT_TRUE(type->has_imported_field_name("my_cool_field"));
+ EXPECT_FALSE(type->has_imported_field_name("my_awesome_field"));
+
+ type = repo.getDocumentType(type_name_2);
+ ASSERT_TRUE(type != nullptr);
+ EXPECT_EQUAL(2u, type->imported_field_names().size());
+ EXPECT_TRUE(type->has_imported_field_name("my_awesome_field"));
+ EXPECT_TRUE(type->has_imported_field_name("my_swag_field"));
+ EXPECT_FALSE(type->has_imported_field_name("my_cool_field"));
+}
+
+namespace {
+
+const TensorDataType &
+asTensorDataType(const DataType &dataType) {
+ return dynamic_cast<const TensorDataType &>(dataType);
+}
+
+}
+
+TEST("Tensor fields have tensor types") {
+ BuilderHelper builder;
+ auto & doc = builder.document(type_name);
+ auto & t1t = builder.addTensorType(doc, "tensor(x[3])");
+ auto & t2t = builder.addTensorType(doc, "tensor(y{})");
+ builder.addField(doc, "tensor1").type = t1t.idx;
+ builder.addField(doc, "tensor2").type = t2t.idx;
+ builder.addField(doc, "tensor3").type = t1t.idx;
+
+ DocumentTypeRepo repo(builder.config());
+ auto *docType = repo.getDocumentType(type_name);
+ ASSERT_TRUE(docType != nullptr);
+ auto &tensorField1 = docType->getField("tensor1");
+ auto &tensorField2 = docType->getField("tensor2");
+ EXPECT_EQUAL("tensor(x[3])", asTensorDataType(tensorField1.getDataType()).getTensorType().to_spec());
+ EXPECT_EQUAL("tensor(y{})", asTensorDataType(tensorField2.getDataType()).getTensorType().to_spec());
+ auto &tensorField3 = docType->getField("tensor3");
+ EXPECT_TRUE(&tensorField1.getDataType() == &tensorField3.getDataType());
+ auto tensorFieldValue1 = tensorField1.getDataType().createFieldValue();
+ EXPECT_TRUE(&tensorField1.getDataType() == tensorFieldValue1->getDataType());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/document/src/tests/repo/types.cfg b/document/src/tests/repo/types.cfg
new file mode 100644
index 00000000000..cab69f4b1aa
--- /dev/null
+++ b/document/src/tests/repo/types.cfg
@@ -0,0 +1,326 @@
+enablecompression false
+usev8geopositions false
+doctype[0].name "document"
+doctype[0].idx 10000
+doctype[0].internalid 8
+doctype[0].contentstruct 10001
+doctype[0].primitivetype[0].idx 10002
+doctype[0].primitivetype[0].name "bool"
+doctype[0].primitivetype[1].idx 10003
+doctype[0].primitivetype[1].name "byte"
+doctype[0].primitivetype[2].idx 10004
+doctype[0].primitivetype[2].name "double"
+doctype[0].primitivetype[3].idx 10005
+doctype[0].primitivetype[3].name "float"
+doctype[0].primitivetype[4].idx 10006
+doctype[0].primitivetype[4].name "float16"
+doctype[0].primitivetype[5].idx 10007
+doctype[0].primitivetype[5].name "int"
+doctype[0].primitivetype[6].idx 10008
+doctype[0].primitivetype[6].name "long"
+doctype[0].primitivetype[7].idx 10010
+doctype[0].primitivetype[7].name "predicate"
+doctype[0].primitivetype[8].idx 10011
+doctype[0].primitivetype[8].name "raw"
+doctype[0].primitivetype[9].idx 10012
+doctype[0].primitivetype[9].name "string"
+doctype[0].primitivetype[10].idx 10014
+doctype[0].primitivetype[10].name "uri"
+doctype[0].wsettype[0].idx 10013
+doctype[0].wsettype[0].elementtype 10012
+doctype[0].wsettype[0].createifnonexistent true
+doctype[0].wsettype[0].removeifzero true
+doctype[0].wsettype[0].internalid 18
+doctype[0].structtype[0].idx 10001
+doctype[0].structtype[0].name "document.header"
+doctype[0].structtype[0].internalid -284186494
+doctype[0].structtype[1].idx 10009
+doctype[0].structtype[1].name "position"
+doctype[0].structtype[1].field[0].name "x"
+doctype[0].structtype[1].field[0].internalid 914677694
+doctype[0].structtype[1].field[0].type 10007
+doctype[0].structtype[1].field[1].name "y"
+doctype[0].structtype[1].field[1].internalid 900009410
+doctype[0].structtype[1].field[1].type 10007
+doctype[0].structtype[1].internalid 1381038251
+doctype[1].name "types"
+doctype[1].idx 10015
+doctype[1].internalid -853072901
+doctype[1].inherits[0].idx 10000
+doctype[1].contentstruct 10016
+doctype[1].fieldsets{[document]}.fields[0] "Folders"
+doctype[1].fieldsets{[document]}.fields[1] "abool"
+doctype[1].fieldsets{[document]}.fields[2] "abyte"
+doctype[1].fieldsets{[document]}.fields[3] "album0"
+doctype[1].fieldsets{[document]}.fields[4] "album1"
+doctype[1].fieldsets{[document]}.fields[5] "along"
+doctype[1].fieldsets{[document]}.fields[6] "arrarr"
+doctype[1].fieldsets{[document]}.fields[7] "arrayfield"
+doctype[1].fieldsets{[document]}.fields[8] "arraymapfield"
+doctype[1].fieldsets{[document]}.fields[9] "ashortfloat"
+doctype[1].fieldsets{[document]}.fields[10] "complexarray"
+doctype[1].fieldsets{[document]}.fields[11] "doublemapfield"
+doctype[1].fieldsets{[document]}.fields[12] "floatmapfield"
+doctype[1].fieldsets{[document]}.fields[13] "intmapfield"
+doctype[1].fieldsets{[document]}.fields[14] "juletre"
+doctype[1].fieldsets{[document]}.fields[15] "longmapfield"
+doctype[1].fieldsets{[document]}.fields[16] "maparr"
+doctype[1].fieldsets{[document]}.fields[17] "mystructarr"
+doctype[1].fieldsets{[document]}.fields[18] "mystructfield"
+doctype[1].fieldsets{[document]}.fields[19] "mystructmap"
+doctype[1].fieldsets{[document]}.fields[20] "setfield"
+doctype[1].fieldsets{[document]}.fields[21] "setfield2"
+doctype[1].fieldsets{[document]}.fields[22] "setfield3"
+doctype[1].fieldsets{[document]}.fields[23] "setfield4"
+doctype[1].fieldsets{[document]}.fields[24] "stringmapfield"
+doctype[1].fieldsets{[document]}.fields[25] "structarrayfield"
+doctype[1].fieldsets{[document]}.fields[26] "structfield"
+doctype[1].fieldsets{[document]}.fields[27] "tagfield"
+doctype[1].arraytype[0].idx 10017
+doctype[1].arraytype[0].elementtype 10007
+doctype[1].arraytype[0].internalid -1245117006
+doctype[1].arraytype[1].idx 10024
+doctype[1].arraytype[1].elementtype 10023
+doctype[1].arraytype[1].internalid -1244829667
+doctype[1].arraytype[2].idx 10031
+doctype[1].arraytype[2].elementtype 10007
+doctype[1].arraytype[2].internalid -1245117006
+doctype[1].arraytype[3].idx 10032
+doctype[1].arraytype[3].elementtype 10033
+doctype[1].arraytype[3].internalid -794985308
+doctype[1].arraytype[4].idx 10033
+doctype[1].arraytype[4].elementtype 10034
+doctype[1].arraytype[4].internalid 1707615575
+doctype[1].arraytype[5].idx 10034
+doctype[1].arraytype[5].elementtype 10012
+doctype[1].arraytype[5].internalid -1486737430
+doctype[1].arraytype[6].idx 10035
+doctype[1].arraytype[6].elementtype 10036
+doctype[1].arraytype[6].internalid 69621385
+doctype[1].arraytype[7].idx 10037
+doctype[1].arraytype[7].elementtype 10038
+doctype[1].arraytype[7].internalid 1416345047
+doctype[1].arraytype[8].idx 10039
+doctype[1].arraytype[8].elementtype 10040
+doctype[1].arraytype[8].internalid 1707615575
+doctype[1].arraytype[9].idx 10040
+doctype[1].arraytype[9].elementtype 10012
+doctype[1].arraytype[9].internalid -1486737430
+doctype[1].arraytype[10].idx 10042
+doctype[1].arraytype[10].elementtype 10003
+doctype[1].arraytype[10].internalid 49942803
+doctype[1].arraytype[11].idx 10045
+doctype[1].arraytype[11].elementtype 10041
+doctype[1].arraytype[11].internalid 759956026
+doctype[1].maptype[0].idx 10025
+doctype[1].maptype[0].keytype 10012
+doctype[1].maptype[0].valuetype 10012
+doctype[1].maptype[0].internalid 339965458
+doctype[1].maptype[1].idx 10026
+doctype[1].maptype[1].keytype 10012
+doctype[1].maptype[1].valuetype 10007
+doctype[1].maptype[1].internalid -1584287606
+doctype[1].maptype[2].idx 10027
+doctype[1].maptype[2].keytype 10012
+doctype[1].maptype[2].valuetype 10005
+doctype[1].maptype[2].internalid 2125154557
+doctype[1].maptype[3].idx 10028
+doctype[1].maptype[3].keytype 10007
+doctype[1].maptype[3].valuetype 10008
+doctype[1].maptype[3].internalid -1715531035
+doctype[1].maptype[4].idx 10029
+doctype[1].maptype[4].keytype 10007
+doctype[1].maptype[4].valuetype 10004
+doctype[1].maptype[4].internalid 2138385264
+doctype[1].maptype[5].idx 10030
+doctype[1].maptype[5].keytype 10012
+doctype[1].maptype[5].valuetype 10031
+doctype[1].maptype[5].internalid 435886609
+doctype[1].maptype[6].idx 10036
+doctype[1].maptype[6].keytype 10012
+doctype[1].maptype[6].valuetype 10012
+doctype[1].maptype[6].internalid 339965458
+doctype[1].maptype[7].idx 10038
+doctype[1].maptype[7].keytype 10007
+doctype[1].maptype[7].valuetype 10039
+doctype[1].maptype[7].internalid -372512406
+doctype[1].maptype[8].idx 10043
+doctype[1].maptype[8].keytype 10012
+doctype[1].maptype[8].valuetype 10012
+doctype[1].maptype[8].internalid 339965458
+doctype[1].maptype[9].idx 10044
+doctype[1].maptype[9].keytype 10007
+doctype[1].maptype[9].valuetype 10041
+doctype[1].maptype[9].internalid 1901258752
+doctype[1].maptype[10].idx 10046
+doctype[1].maptype[10].keytype 10007
+doctype[1].maptype[10].valuetype 10047
+doctype[1].maptype[10].internalid -389833101
+doctype[1].maptype[11].idx 10048
+doctype[1].maptype[11].keytype 10012
+doctype[1].maptype[11].valuetype 10008
+doctype[1].maptype[11].internalid -1865479609
+doctype[1].wsettype[0].idx 10018
+doctype[1].wsettype[0].elementtype 10012
+doctype[1].wsettype[0].createifnonexistent false
+doctype[1].wsettype[0].removeifzero false
+doctype[1].wsettype[0].internalid 1328286588
+doctype[1].wsettype[1].idx 10019
+doctype[1].wsettype[1].elementtype 10012
+doctype[1].wsettype[1].createifnonexistent true
+doctype[1].wsettype[1].removeifzero true
+doctype[1].wsettype[1].internalid 18
+doctype[1].wsettype[2].idx 10020
+doctype[1].wsettype[2].elementtype 10012
+doctype[1].wsettype[2].createifnonexistent false
+doctype[1].wsettype[2].removeifzero true
+doctype[1].wsettype[2].internalid 2125328771
+doctype[1].wsettype[3].idx 10021
+doctype[1].wsettype[3].elementtype 10012
+doctype[1].wsettype[3].createifnonexistent true
+doctype[1].wsettype[3].removeifzero false
+doctype[1].wsettype[3].internalid 2065577986
+doctype[1].wsettype[4].idx 10022
+doctype[1].wsettype[4].elementtype 10012
+doctype[1].wsettype[4].createifnonexistent true
+doctype[1].wsettype[4].removeifzero true
+doctype[1].wsettype[4].internalid 18
+doctype[1].wsettype[5].idx 10049
+doctype[1].wsettype[5].elementtype 10012
+doctype[1].wsettype[5].createifnonexistent true
+doctype[1].wsettype[5].removeifzero true
+doctype[1].wsettype[5].internalid 18
+doctype[1].wsettype[6].idx 10050
+doctype[1].wsettype[6].elementtype 10012
+doctype[1].wsettype[6].createifnonexistent true
+doctype[1].wsettype[6].removeifzero true
+doctype[1].wsettype[6].internalid 18
+doctype[1].structtype[0].idx 10023
+doctype[1].structtype[0].name "sct"
+doctype[1].structtype[0].field[0].name "s1"
+doctype[1].structtype[0].field[0].internalid 2146820765
+doctype[1].structtype[0].field[0].type 10012
+doctype[1].structtype[0].field[1].name "s2"
+doctype[1].structtype[0].field[1].internalid 45366795
+doctype[1].structtype[0].field[1].type 10012
+doctype[1].structtype[0].internalid 109267174
+doctype[1].structtype[1].idx 10041
+doctype[1].structtype[1].name "mystruct"
+doctype[1].structtype[1].field[0].name "bytearr"
+doctype[1].structtype[1].field[0].internalid 1079701754
+doctype[1].structtype[1].field[0].type 10042
+doctype[1].structtype[1].field[1].name "mymap"
+doctype[1].structtype[1].field[1].internalid 1954178122
+doctype[1].structtype[1].field[1].type 10043
+doctype[1].structtype[1].field[2].name "title"
+doctype[1].structtype[1].field[2].internalid 567626448
+doctype[1].structtype[1].field[2].type 10012
+doctype[1].structtype[1].field[3].name "structfield"
+doctype[1].structtype[1].field[3].internalid 1726890940
+doctype[1].structtype[1].field[3].type 10012
+doctype[1].structtype[1].internalid -2092985853
+doctype[1].structtype[2].idx 10047
+doctype[1].structtype[2].name "folder"
+doctype[1].structtype[2].field[0].name "Version"
+doctype[1].structtype[2].field[0].internalid 64430502
+doctype[1].structtype[2].field[0].type 10007
+doctype[1].structtype[2].field[1].name "Name"
+doctype[1].structtype[2].field[1].internalid 2002760220
+doctype[1].structtype[2].field[1].type 10012
+doctype[1].structtype[2].field[2].name "FlagsCounter"
+doctype[1].structtype[2].field[2].internalid 1741227606
+doctype[1].structtype[2].field[2].type 10048
+doctype[1].structtype[2].field[3].name "anotherfolder"
+doctype[1].structtype[2].field[3].internalid 1582421848
+doctype[1].structtype[2].field[3].type 10047
+doctype[1].structtype[2].internalid 294108848
+doctype[1].structtype[3].idx 10016
+doctype[1].structtype[3].name "types.header"
+doctype[1].structtype[3].field[0].name "abyte"
+doctype[1].structtype[3].field[0].internalid 110138156
+doctype[1].structtype[3].field[0].type 10003
+doctype[1].structtype[3].field[1].name "along"
+doctype[1].structtype[3].field[1].internalid 1206464520
+doctype[1].structtype[3].field[1].type 10008
+doctype[1].structtype[3].field[2].name "abool"
+doctype[1].structtype[3].field[2].internalid 492328000
+doctype[1].structtype[3].field[2].type 10002
+doctype[1].structtype[3].field[3].name "ashortfloat"
+doctype[1].structtype[3].field[3].internalid 1012106297
+doctype[1].structtype[3].field[3].type 10006
+doctype[1].structtype[3].field[4].name "arrayfield"
+doctype[1].structtype[3].field[4].internalid 965790107
+doctype[1].structtype[3].field[4].type 10017
+doctype[1].structtype[3].field[5].name "setfield"
+doctype[1].structtype[3].field[5].internalid 761581914
+doctype[1].structtype[3].field[5].type 10018
+doctype[1].structtype[3].field[6].name "setfield2"
+doctype[1].structtype[3].field[6].internalid 1066659198
+doctype[1].structtype[3].field[6].type 10019
+doctype[1].structtype[3].field[7].name "setfield3"
+doctype[1].structtype[3].field[7].internalid 1180155772
+doctype[1].structtype[3].field[7].type 10020
+doctype[1].structtype[3].field[8].name "setfield4"
+doctype[1].structtype[3].field[8].internalid 1254131631
+doctype[1].structtype[3].field[8].type 10021
+doctype[1].structtype[3].field[9].name "tagfield"
+doctype[1].structtype[3].field[9].internalid 1653562069
+doctype[1].structtype[3].field[9].type 10022
+doctype[1].structtype[3].field[10].name "structfield"
+doctype[1].structtype[3].field[10].internalid 486207386
+doctype[1].structtype[3].field[10].type 10023
+doctype[1].structtype[3].field[11].name "structarrayfield"
+doctype[1].structtype[3].field[11].internalid 335048518
+doctype[1].structtype[3].field[11].type 10024
+doctype[1].structtype[3].field[12].name "stringmapfield"
+doctype[1].structtype[3].field[12].internalid 117465687
+doctype[1].structtype[3].field[12].type 10025
+doctype[1].structtype[3].field[13].name "intmapfield"
+doctype[1].structtype[3].field[13].internalid 121004462
+doctype[1].structtype[3].field[13].type 10026
+doctype[1].structtype[3].field[14].name "floatmapfield"
+doctype[1].structtype[3].field[14].internalid 1239120925
+doctype[1].structtype[3].field[14].type 10027
+doctype[1].structtype[3].field[15].name "longmapfield"
+doctype[1].structtype[3].field[15].internalid 477718745
+doctype[1].structtype[3].field[15].type 10028
+doctype[1].structtype[3].field[16].name "doublemapfield"
+doctype[1].structtype[3].field[16].internalid 877047192
+doctype[1].structtype[3].field[16].type 10029
+doctype[1].structtype[3].field[17].name "arraymapfield"
+doctype[1].structtype[3].field[17].internalid 1670805928
+doctype[1].structtype[3].field[17].type 10030
+doctype[1].structtype[3].field[18].name "arrarr"
+doctype[1].structtype[3].field[18].internalid 1962567166
+doctype[1].structtype[3].field[18].type 10032
+doctype[1].structtype[3].field[19].name "maparr"
+doctype[1].structtype[3].field[19].internalid 904375219
+doctype[1].structtype[3].field[19].type 10035
+doctype[1].structtype[3].field[20].name "complexarray"
+doctype[1].structtype[3].field[20].internalid 795629533
+doctype[1].structtype[3].field[20].type 10037
+doctype[1].structtype[3].field[21].name "mystructfield"
+doctype[1].structtype[3].field[21].internalid 1348513378
+doctype[1].structtype[3].field[21].type 10041
+doctype[1].structtype[3].field[22].name "mystructmap"
+doctype[1].structtype[3].field[22].internalid 1511423250
+doctype[1].structtype[3].field[22].type 10044
+doctype[1].structtype[3].field[23].name "mystructarr"
+doctype[1].structtype[3].field[23].internalid 595856991
+doctype[1].structtype[3].field[23].type 10045
+doctype[1].structtype[3].field[24].name "Folders"
+doctype[1].structtype[3].field[24].internalid 34575524
+doctype[1].structtype[3].field[24].type 10046
+doctype[1].structtype[3].field[25].name "juletre"
+doctype[1].structtype[3].field[25].internalid 1039981530
+doctype[1].structtype[3].field[25].type 10008
+doctype[1].structtype[3].field[26].name "album0"
+doctype[1].structtype[3].field[26].internalid 764312262
+doctype[1].structtype[3].field[26].type 10049
+doctype[1].structtype[3].field[27].name "album1"
+doctype[1].structtype[3].field[27].internalid 1967160809
+doctype[1].structtype[3].field[27].type 10050
+doctype[1].structtype[3].field[28].name "other"
+doctype[1].structtype[3].field[28].internalid 2443357
+doctype[1].structtype[3].field[28].type 10008
+doctype[1].structtype[3].internalid 1328581348
diff --git a/document/src/vespa/document/config/documenttypes.def b/document/src/vespa/document/config/documenttypes.def
index 2e0483f025b..202447295c3 100644
--- a/document/src/vespa/document/config/documenttypes.def
+++ b/document/src/vespa/document/config/documenttypes.def
@@ -44,7 +44,7 @@ documenttype[].datatype[].map.value.id int default=0
## This is the id of the datatype of the key in the wset.
documenttype[].datatype[].wset.key.id int default=0
-## Should an update to a nonexistent element cause it to be created
+## Should an update to a nonexistent element cause it to be created
documenttype[].datatype[].wset.createifnonexistent bool default=false
## Should an element in a weighted set be removed if an update changes the weight to 0
@@ -110,3 +110,176 @@ documenttype[].referencetype[].target_type_id int
## Imported fields (specified outside the document block in the schema)
documenttype[].importedfield[].name string
+
+
+# Here starts a new model for how datatypes are configured, where
+# everything is per document-type, and each documenttype contains the
+# datatypes it defines.
+
+# Note: we will include the built-in "document" document
+# type that all other doctypes inherit from also, in order
+# to get all the primitive and built-in types declared
+# with an idx we can refer to.
+
+# Note: indexes are only meaningful as internal references in this
+# config; they will typically be sequential (1,2,3,...) in the order
+# that they are generated (but nothing should depend on that).
+
+
+## Name of the document type. Must be unique.
+doctype[].name string
+
+## Index of this type (as a datatype which can be referred to).
+doctype[].idx int
+
+## Internal ID of this datatype
+doctype[].internalid int
+
+## Specify document types to inherit
+doctype[].inherits[].idx int
+
+## Index of struct defining document fields
+doctype[].contentstruct int
+
+## Field sets available for this document type
+doctype[].fieldsets{}.fields[] string
+
+## Imported fields (specified outside the document block in the schema)
+doctype[].importedfield[].name string
+
+# Everything below here is configuration of data types defined by
+# this document type.
+
+# Primitive types must be present as built-in static members.
+
+## Index of primitive type
+doctype[].primitivetype[].idx int
+
+## The name of this primitive type
+doctype[].primitivetype[].name string
+
+
+# Arrays are the simplest collection type:
+
+## Index of this array type
+doctype[].arraytype[].idx int
+
+## Index of the element type this array type contains
+doctype[].arraytype[].elementtype int
+
+## Internal ID of this datatype
+doctype[].arraytype[].internalid int
+
+
+# Maps are another collection type:
+
+## Index of this map type
+doctype[].maptype[].idx int
+
+## Index of the key type used by this map type
+doctype[].maptype[].keytype int
+
+## Index of the key type used by this map type
+doctype[].maptype[].valuetype int
+
+## Internal ID of this datatype
+doctype[].maptype[].internalid int
+
+
+# Weighted sets are more complicated;
+# they can be considered as an collection
+# of unique elements where each element has
+# an associated weight:
+
+## Index of this weighted set type
+doctype[].wsettype[].idx int
+
+## Index of the element types contained in this weighted set type
+doctype[].wsettype[].elementtype int
+
+## Should an update to a nonexistent element cause it to be created
+doctype[].wsettype[].createifnonexistent bool default=false
+
+## Should an element in a weighted set be removed if an update changes the weight to 0
+doctype[].wsettype[].removeifzero bool default=false
+
+## Internal ID of this datatype
+doctype[].wsettype[].internalid int
+
+
+# Tensors have their own type system
+
+## Index of this tensor type
+doctype[].tensortype[].idx int
+
+## Description of the type of the actual tensors contained
+doctype[].tensortype[].detailedtype string
+
+
+# Document references refer to parent documents that a document can
+# import fields from:
+
+## Index of this reference data type:
+doctype[].documentref[].idx int
+
+## Index of the document type this reference type refers to:
+doctype[].documentref[].targettype int
+
+## Internal ID of this datatype
+doctype[].documentref[].internalid int
+
+
+# Annotation types are another world, but are modeled here
+# as if they were also datatypes contained inside document types:
+
+## Index of an annotation type.
+doctype[].annotationtype[].idx int
+
+## Name of the annotation type.
+doctype[].annotationtype[].name string
+
+## Internal id of this annotation type
+doctype[].annotationtype[].internalid int
+
+## Index of contained datatype of the annotation type, if any
+doctype[].annotationtype[].datatype int default=-1
+
+## Index of annotation type that this type inherits.
+doctype[].annotationtype[].inherits[].idx int
+
+
+# Annotation references are field values referring to
+# an annotation of a certain annotation type.
+
+## Index of this annotation reference type
+doctype[].annotationref[].idx int
+
+## Index of the annotation type this annotation reference type refers to
+doctype[].annotationref[].annotationtype int
+
+## Internal ID of this datatype
+doctype[].annotationref[].internalid int
+
+
+# A struct is just a named collections of fields:
+
+## Index of this struct type
+doctype[].structtype[].idx int
+
+## Name of the struct type. Must be unique within documenttype.
+doctype[].structtype[].name string
+
+## Index of another struct type to inherit
+doctype[].structtype[].inherits[].type int
+
+## Name of a struct field. Must be unique within the struct type.
+doctype[].structtype[].field[].name string
+
+## The "field id" - used in serialized format!
+doctype[].structtype[].field[].internalid int
+
+## Index of the type of this field
+doctype[].structtype[].field[].type int
+
+## Internal ID of this datatype
+doctype[].structtype[].internalid int
diff --git a/document/src/vespa/document/repo/documenttyperepo.cpp b/document/src/vespa/document/repo/documenttyperepo.cpp
index 312ce027543..d8f272d5d55 100644
--- a/document/src/vespa/document/repo/documenttyperepo.cpp
+++ b/document/src/vespa/document/repo/documenttyperepo.cpp
@@ -17,6 +17,7 @@
#include <vespa/document/config/config-documenttypes.h>
#include <fstream>
#include <cassert>
+#include <set>
#include <vespa/log/log.h>
LOG_SETUP(".documenttyperepo");
@@ -74,7 +75,7 @@ public:
void inherit(const Repo &parent);
bool addDataType(const DataType &type);
- template <typename T> void addDataType(unique_ptr<T> type);
+ template <typename T> const DataType * addDataType(unique_ptr<T> type);
const DataType &addTensorType(const string &spec);
const DataType *lookup(int32_t id) const;
@@ -108,14 +109,17 @@ bool Repo::addDataType(const DataType &type) {
}
data_type = &type;
data_type_by_name = &type;
+ LOG(spam, "Added data type to repo: %s [%d]", type.getName().c_str(), type.getId());
return true;
}
template <typename T>
-void Repo::addDataType(unique_ptr<T> type) {
+const DataType* Repo::addDataType(unique_ptr<T> type) {
+ int id = type->getId();
if (addDataType(*type)) {
_owned_types.push_back(type.release());
}
+ return _types[id];
}
@@ -172,7 +176,7 @@ public:
~AnnotationTypeRepo() { DeleteContent(_owned_types); }
void inherit(const AnnotationTypeRepo &parent);
- void addAnnotationType(AnnotationType::UP annotation_type);
+ AnnotationType * addAnnotationType(AnnotationType::UP annotation_type);
void setAnnotationDataType(int32_t id, const DataType &datatype);
const AnnotationType *lookup(int32_t id) const;
@@ -182,7 +186,7 @@ void AnnotationTypeRepo::inherit(const AnnotationTypeRepo &parent) {
_annotation_types.insert(parent._annotation_types.begin(), parent._annotation_types.end());
}
-void AnnotationTypeRepo::addAnnotationType(AnnotationType::UP type) {
+AnnotationType * AnnotationTypeRepo::addAnnotationType(AnnotationType::UP type) {
AnnotationType *& a_type = _annotation_types[type->getId()];
if (a_type) {
if (*type != *a_type) {
@@ -194,6 +198,7 @@ void AnnotationTypeRepo::addAnnotationType(AnnotationType::UP type) {
a_type = type.get();
_owned_types.push_back(type.release());
}
+ return a_type;
}
void AnnotationTypeRepo::setAnnotationDataType(int32_t id, const DataType &d) {
@@ -502,6 +507,495 @@ void configureAllRepos(const DocumenttypesConfig::DocumenttypeVector &t, Documen
}
}
+using DataTypesByIdx = hash_map<int, const DataType *>;
+using StructTypesByIdx = hash_map<int, StructDataType *>;
+using DocTypesByIdx = hash_map<int, DocumentType *>;
+
+
+class ApplyNewDoctypeConfig {
+private:
+ using DTC = ::document::config::DocumenttypesConfig;
+
+ using CDocType = DTC::Doctype;
+ using CDocInherit = DTC::Doctype::Inherits;
+ using CDocFieldsets = DTC::Doctype::Fieldsets;
+ using CDocImportField = DTC::Doctype::Importedfield;
+ using CPrimitiveT = DTC::Doctype::Primitivetype;
+ using CArrayT = DTC::Doctype::Arraytype;
+ using CMapT = DTC::Doctype::Maptype;
+ using CWsetT = DTC::Doctype::Wsettype;
+ using CTensorT = DTC::Doctype::Tensortype;
+ using CDocRefT = DTC::Doctype::Documentref;
+ using CAnnotationT = DTC::Doctype::Annotationtype;
+ using CAnnRefT = DTC::Doctype::Annotationref;
+ using CStructT = DTC::Doctype::Structtype;
+ using CStructField = DTC::Doctype::Structtype::Field;
+ using CStructInherits = DTC::Doctype::Structtype::Inherits;
+
+ struct DocTypeInProgress {
+ const CDocType & cfg;
+ DataTypeRepo * data_type_repo;
+ DocumentType * dtype = nullptr;
+ bool builtin = false;
+
+ DocTypeInProgress(const CDocType & config, DocumentTypeMap &doc_types)
+ : cfg(config),
+ data_type_repo(doc_types[cfg.internalid])
+ {
+ if (data_type_repo) {
+ LOG(debug, "old doct : %s [%d]", cfg.name.c_str(), cfg.internalid);
+ builtin = true;
+ } else {
+ LOG(debug, "new doct : %s [%d]", cfg.name.c_str(), cfg.internalid);
+ data_type_repo = new DataTypeRepo();
+ doc_types[cfg.internalid] = data_type_repo;
+ }
+ }
+
+ Repo& repo() { return data_type_repo->repo; }
+ };
+
+ struct StructInProgress {
+ const CStructT & cfg;
+ StructDataType *stype = nullptr;
+ const StructDataType *oldtype = nullptr;
+ bool finished = false;
+ StructInProgress(const CStructT & config) : cfg(config) {}
+ };
+ using StructsInProgress = std::map<int, StructInProgress>;
+ StructsInProgress _structs_in_progress;
+
+ using DocTypesInProgress = std::map<int, DocTypeInProgress>;
+ using MadeTypes = std::map<int, const DataType *>;
+
+ const DocumenttypesConfig::DoctypeVector & _input;
+ DocumentTypeMap & _output;
+
+ DocTypesInProgress _doc_types_in_progress;
+ hash_map<int, AnnotationType *> _annotations_by_idx;
+ MadeTypes _made_types;
+ std::set<int> _needed_idx_set;
+
+ void apply() {
+ findNeeded();
+ for (const CDocType & docT : _input) {
+ auto [iter,succ] = _doc_types_in_progress.emplace(docT.idx,
+ DocTypeInProgress(docT, _output));
+ LOG_ASSERT(succ);
+ auto & dtInP = iter->second;
+ createSimpleTypes(dtInP);
+ createEmptyStructs(dtInP);
+ initializeDocTypeAndInheritAnnotations(dtInP);
+ createEmptyAnnotationTypes(dtInP);
+ createReferenceTypes(dtInP);
+ }
+ createComplexTypes();
+ fillStructs();
+ for (const CDocType & docT : _input) {
+ auto iter = _doc_types_in_progress.find(docT.idx);
+ LOG_ASSERT(iter != _doc_types_in_progress.end());
+ auto & dtInP = iter->second;
+ fillDocument(dtInP);
+ fillAnnotationTypes(dtInP);
+ }
+ for (const auto & docT : _input) {
+ for (const auto & structT : docT.structtype) {
+ performStructInherit(structT.idx);
+ }
+ }
+ }
+
+ void madeType(const DataType *t, int idx) {
+ _made_types[idx] = t;
+ _needed_idx_set.erase(idx);
+ }
+
+ void createSimpleTypes(DocTypeInProgress & dtInP) {
+ for (const auto & primT : dtInP.cfg.primitivetype) {
+ string name = primT.name;
+ const DataType *t = dtInP.repo().lookup(name);
+ if (t == nullptr) {
+ if (name == "float16") {
+ // is this even sane?
+ name = "float";
+ }
+ name[0] = (name[0] & 0x5F);
+ t = dtInP.repo().lookup(name);
+ }
+ if (t == nullptr) {
+ LOG(error, "Missing primitive type '%s'", primT.name.c_str());
+ throw IllegalArgumentException("missing primitive type");
+ } else {
+ madeType(t, primT.idx);
+ }
+ }
+ for (const auto & tensorT : dtInP.cfg.tensortype) {
+ const DataType & tt = dtInP.repo().addTensorType(tensorT.detailedtype);
+ madeType(&tt, tensorT.idx);
+ }
+ }
+
+ void createEmptyStructs(DocTypeInProgress & dtInP) {
+ for (const auto & structT : dtInP.cfg.structtype) {
+ StructInProgress in_progress(structT);
+ if (const auto * oldt = dtInP.repo().lookup(structT.internalid)) {
+ auto st = dynamic_cast<const StructDataType *>(oldt);
+ if (st) {
+ LOG(debug, "already has %s [%d], wanted to add %s [%d]",
+ st->getName().c_str(), st->getId(),
+ structT.name.c_str(), structT.internalid);
+ in_progress.oldtype = st;
+ in_progress.finished = true;
+ madeType(st, structT.idx);
+ } else {
+ throw IllegalArgumentException("struct internalid -> not a struct");
+ }
+ } else {
+ auto up = std::make_unique<StructDataType>(structT.name, structT.internalid);
+ in_progress.stype = up.get();
+ const DataType *t = dtInP.repo().addDataType(std::move(up));
+ LOG_ASSERT(t == in_progress.stype);
+ madeType(t, structT.idx);
+ }
+ auto [iter, succ] = _structs_in_progress.emplace(structT.idx, in_progress);
+ LOG_ASSERT(succ);
+ }
+ }
+
+ const StructDataType * findStruct(int idx) {
+ auto iter = _structs_in_progress.find(idx);
+ if (iter == _structs_in_progress.end()) return nullptr;
+ const auto & in_progress = iter->second;
+ if (in_progress.finished) {
+ return in_progress.oldtype;
+ }
+ return in_progress.stype;
+ }
+
+ void initializeDocTypeAndInheritAnnotations(DocTypeInProgress & dtInP) {
+ if (dtInP.builtin) {
+ madeType(dtInP.data_type_repo->doc_type, dtInP.cfg.idx);
+ return;
+ }
+ LOG_ASSERT(dtInP.data_type_repo->doc_type == nullptr);
+ const auto & docT = dtInP.cfg;
+ const StructDataType * fields = findStruct(docT.contentstruct);
+ if (fields != nullptr) {
+ dtInP.data_type_repo->doc_type = new DocumentType(docT.name, docT.internalid, *fields);
+ madeType(dtInP.data_type_repo->doc_type, docT.idx);
+ } else {
+ LOG(error, "Missing content struct for '%s' (idx %d not found)",
+ docT.name.c_str(), docT.contentstruct);
+ throw IllegalArgumentException("missing content struct");
+ }
+ // depends on config in inheritance order
+ for (const auto & inheritD : docT.inherits) {
+ const DataType *dt = _made_types[inheritD.idx];
+ if (dt == nullptr) {
+ LOG(error, "parent datatype [idx %d] missing for document %s",
+ inheritD.idx, docT.name.c_str());
+ throw IllegalArgumentException("Unable to find document for inheritance");
+ }
+ DataTypeRepo * parentRepo = FindPtr(_output, dt->getId());
+ if (parentRepo == nullptr) {
+ LOG(error, "parent repo [id %d] missing for document %s",
+ dt->getId(), docT.name.c_str());
+ throw IllegalArgumentException("missing parent repo");
+ }
+ dtInP.data_type_repo->annotations.inherit(parentRepo->annotations);
+ }
+ }
+
+ void createEmptyAnnotationTypes(DocTypeInProgress & dtInP) {
+ auto & annRepo = dtInP.data_type_repo->annotations;
+ for (const auto & annT: dtInP.cfg.annotationtype) {
+ if (annRepo.lookup(annT.internalid)) {
+ throw IllegalArgumentException("duplicate annotation type id");
+ }
+ auto at = std::make_unique<AnnotationType>(annT.internalid, annT.name);
+ _annotations_by_idx[annT.idx] = at.get();
+ _needed_idx_set.erase(annT.idx);
+ const auto * t = annRepo.addAnnotationType(std::move(at));
+ LOG_ASSERT(t == _annotations_by_idx[annT.idx]);
+ }
+ }
+
+ void createReferenceTypes(DocTypeInProgress & dtInP) {
+ for (const auto & aRef : dtInP.cfg.annotationref) {
+ const AnnotationType * target = _annotations_by_idx[aRef.annotationtype];
+ if (target == nullptr) {
+ LOG(error, "Missing annotation type [idx %d] for annotationref",
+ aRef.annotationtype);
+ throw IllegalArgumentException("missing annotation type");
+ } else {
+ auto ar = std::make_unique<AnnotationReferenceDataType>(*target, aRef.internalid);
+ madeType(dtInP.repo().addDataType(std::move(ar)), aRef.idx);
+ }
+ }
+ for (const auto & refT : dtInP.cfg.documentref) {
+ const auto * target = dynamic_cast<const DocumentType *>(_made_types[refT.targettype]);
+ if (target == nullptr) {
+ LOG(error, "Missing target document type for reference (idx %d)", refT.targettype);
+ throw IllegalArgumentException("missing target type");
+ } else {
+ auto rt = std::make_unique<ReferenceDataType>(*target, refT.internalid);
+ madeType(dtInP.repo().addDataType(std::move(rt)), refT.idx);
+ }
+ }
+ }
+
+ void createComplexTypes() {
+ while (_needed_idx_set.size() > 0) {
+ size_t missing_cnt = _needed_idx_set.size();
+ for (const auto & docT : _input) {
+ auto iter = _doc_types_in_progress.find(docT.idx);
+ LOG_ASSERT(iter != _doc_types_in_progress.end());
+ auto & dtInP = iter->second;
+ createComplexTypesForDocType(dtInP.cfg, dtInP.repo());
+ }
+ if (_needed_idx_set.size() == missing_cnt) {
+ for (int idx : _needed_idx_set) {
+ LOG(error, "no progress, datatype [idx %d] still missing", idx);
+ }
+ throw IllegalArgumentException("no progress");
+ }
+ LOG(info, "retry complex types, %zd missing", _needed_idx_set.size());
+ }
+ }
+
+ void createComplexTypesForDocType(const CDocType & docT, Repo& repo) {
+ for (const auto & arrT : docT.arraytype) {
+ if (_made_types[arrT.idx] != nullptr) {
+ continue; // OK already
+ }
+ if (const DataType * nested = _made_types[arrT.elementtype]) {
+ auto at = std::make_unique<ArrayDataType>(*nested, arrT.internalid);
+ madeType(repo.addDataType(std::move(at)), arrT.idx);
+ }
+ }
+ for (const auto & mapT : docT.maptype) {
+ if (_made_types[mapT.idx] != nullptr) {
+ continue; // OK already
+ }
+ const DataType * kt = _made_types[mapT.keytype];
+ const DataType * vt = _made_types[mapT.valuetype];
+ if (kt && vt) {
+ auto mt = std::make_unique<MapDataType>(*kt, *vt, mapT.internalid);
+ madeType(repo.addDataType(std::move(mt)), mapT.idx);
+ }
+ }
+ for (const auto & wsetT : docT.wsettype) {
+ if (_made_types[wsetT.idx] != nullptr) {
+ continue; // OK already
+ }
+ if (const DataType * nested = _made_types[wsetT.elementtype]) {
+ auto wt = std::make_unique<WeightedSetDataType>(*nested,
+ wsetT.createifnonexistent, wsetT.removeifzero,
+ wsetT.internalid);
+ madeType(repo.addDataType(std::move(wt)), wsetT.idx);
+ }
+ }
+ }
+
+ void fillStructs() {
+ for (auto & [idx, in_progress] : _structs_in_progress) {
+ if (in_progress.finished) {
+ continue;
+ }
+ auto st = in_progress.stype;
+ LOG_ASSERT(st);
+ for (const auto & fieldD : in_progress.cfg.field) {
+ const DataType *ft = _made_types[fieldD.type];
+ if (ft == nullptr) {
+ LOG(error, "Missing type [idx %d] for struct %s field %s",
+ fieldD.type, in_progress.cfg.name.c_str(), fieldD.name.c_str());
+ throw IllegalArgumentException("missing datatype");
+ } else {
+ st->addField(Field(fieldD.name, fieldD.internalid, *ft));
+ }
+ }
+ }
+ }
+
+ void fillDocument(DocTypeInProgress & dtInP) {
+ if (dtInP.builtin) {
+ return;
+ }
+ const CDocType & docT = dtInP.cfg;
+ auto * doc_type = dtInP.data_type_repo->doc_type;
+ LOG_ASSERT(doc_type != nullptr);
+ for (const auto & importD : docT.importedfield) {
+ doc_type->add_imported_field_name(importD.name);
+ }
+ for (const auto & entry : docT.fieldsets) {
+ DocumentType::FieldSet::Fields fields;
+ for (const auto& f : entry.second.fields) {
+ fields.insert(f);
+ }
+ doc_type->addFieldSet(entry.first, fields);
+ }
+ for (const auto & inheritD : docT.inherits) {
+ const DataType *dt = _made_types[inheritD.idx];
+ const DocumentType * parent = dynamic_cast<const DocumentType *>(dt);
+ if (parent == nullptr) {
+ LOG(error, "missing parent type [idx %d] for document %s",
+ inheritD.idx, docT.name.c_str());
+ throw IllegalArgumentException("missing parent type");
+ } else {
+ doc_type->inherit(*parent);
+ }
+ }
+ }
+
+ void fillAnnotationTypes(DocTypeInProgress & dtInP) {
+ for (const auto & annT: dtInP.cfg.annotationtype) {
+ AnnotationType * at = _annotations_by_idx[annT.idx];
+ if (annT.datatype != -1) {
+ const DataType * dt = _made_types[annT.datatype];
+ if (dt == nullptr) {
+ LOG(error, "Missing datatype [idx %d] for annotation type %s",
+ annT.datatype, annT.name.c_str());
+ throw IllegalArgumentException("missing datatype");
+ } else {
+ at->setDataType(*dt);
+ }
+ }
+ for (const auto & inheritD : annT.inherits) {
+ LOG_ASSERT(at != nullptr);
+ const AnnotationType * parent = _annotations_by_idx[inheritD.idx];
+ if (parent == nullptr) {
+ LOG(error, "missing parent [idx %d] for annotation %s",
+ inheritD.idx, annT.name.c_str());
+ throw IllegalArgumentException("missing parent");
+ }
+ }
+ }
+ }
+
+ class EnsureIndexes {
+ std::set<int> _set;
+ public:
+ void add(int idx) {
+ auto [iter, succ] = _set.insert(idx);
+ if (! succ) {
+ throw IllegalArgumentException("duplicate type idx");
+ }
+ LOG(info, "ensure indexes: add %d", idx);
+ }
+ void check(int idx) {
+ if (! _set.contains(idx)) {
+ LOG(error, "ensure indexes: missing %d", idx);
+ throw IllegalArgumentException("needed idx missing");
+ }
+ }
+ };
+
+ void findNeeded() {
+ EnsureIndexes idx_set;
+ for (const auto & docT : _input) {
+ LOG(info, "doc %s", docT.name.c_str());
+ idx_set.add(docT.idx);
+ for (const auto & structT : docT.structtype) {
+ idx_set.add(structT.idx);
+ for (const auto & fieldD : structT.field) {
+ LOG(debug, "doc %s struct %s field %s needs [idx %d]",
+ docT.name.c_str(), structT.name.c_str(), fieldD.name.c_str(), fieldD.type);
+ _needed_idx_set.insert(fieldD.type);
+ }
+ }
+ for (const auto & primT : docT.primitivetype) {
+ idx_set.add(primT.idx);
+ }
+ for (const auto & tensorT : docT.tensortype) {
+ idx_set.add(tensorT.idx);
+ }
+ for (const auto & arrT : docT.arraytype) {
+ idx_set.add(arrT.idx);
+ LOG(debug, "doc %s array needs [idx %d]", docT.name.c_str(),arrT.elementtype);
+ _needed_idx_set.insert(arrT.elementtype);
+ }
+ for (const auto & wsetT : docT.wsettype) {
+ idx_set.add(wsetT.idx);
+ LOG(debug, "doc %s wset needs [idx %d]", docT.name.c_str(), wsetT.elementtype);
+ _needed_idx_set.insert(wsetT.elementtype);
+ }
+ for (const auto & mapT : docT.maptype) {
+ idx_set.add(mapT.idx);
+ LOG(debug, "doc %s wset needs [idx %d] and [idx %d]",
+ docT.name.c_str(), mapT.keytype, mapT.valuetype);
+ _needed_idx_set.insert(mapT.keytype);
+ _needed_idx_set.insert(mapT.valuetype);
+ }
+ for (const auto & annT: docT.annotationtype) {
+ idx_set.add(annT.idx);
+ if (annT.datatype != -1) {
+ LOG(debug, "doc %s ann needs datatype [idx %d]", docT.name.c_str(), annT.datatype);
+ _needed_idx_set.insert(annT.datatype);
+ }
+ for (const auto & inheritD : annT.inherits) {
+ LOG(debug, "doc %s ann needs parent [idx %d]", docT.name.c_str(), inheritD.idx);
+ _needed_idx_set.insert(inheritD.idx);
+ }
+ }
+ for (const auto & aRef : docT.annotationref) {
+ idx_set.add(aRef.idx);
+ LOG(debug, "doc %s ann ref needs annotation [idx %d]", docT.name.c_str(), aRef.annotationtype);
+ _needed_idx_set.insert(aRef.annotationtype);
+ }
+ for (const auto & refT : docT.documentref) {
+ idx_set.add(refT.idx);
+ LOG(debug, "doc %s doc ref needs target [idx %d]", docT.name.c_str(), refT.targettype);
+ _needed_idx_set.insert(refT.targettype);
+ }
+ }
+ for (int needed : _needed_idx_set) {
+ idx_set.check(needed);
+ }
+ }
+
+ const StructDataType * performStructInherit(int idx) {
+ auto iter = _structs_in_progress.find(idx);
+ if (iter == _structs_in_progress.end()) {
+ throw IllegalArgumentException("inherit from non-struct");
+ }
+ auto & in_progress = iter->second;
+ if (in_progress.finished) {
+ return in_progress.oldtype;
+ }
+ const auto & structT = in_progress.cfg;
+ for (const auto & inheritD : structT.inherits) {
+ const auto * parent = performStructInherit(inheritD.type);
+ if (parent == nullptr) {
+ LOG(error, "Missing parent type [idx %d] for struct %s",
+ inheritD.type, structT.name.c_str());
+ throw IllegalArgumentException("missing parent type");
+ }
+ for (const auto & field : parent->getFieldSet()) {
+ in_progress.stype->addInheritedField(*field);
+ }
+ }
+ in_progress.finished = true;
+ in_progress.oldtype = in_progress.stype;
+ return in_progress.oldtype;
+ }
+
+public:
+ ApplyNewDoctypeConfig(const DocumenttypesConfig::DoctypeVector & input,
+ DocumentTypeMap & output)
+ : _input(input), _output(output)
+ {
+ apply();
+ }
+ ~ApplyNewDoctypeConfig();
+};
+
+ApplyNewDoctypeConfig::~ApplyNewDoctypeConfig() = default;
+
+void configureDocTypes(const DocumenttypesConfig::DoctypeVector &t, DocumentTypeMap &type_map) {
+ LOG(info, "applying new doc type config");
+ ApplyNewDoctypeConfig(t, type_map);
+}
+
} // namespace
DocumentTypeRepo::DocumentTypeRepo() :
@@ -527,9 +1021,13 @@ DocumentTypeRepo::DocumentTypeRepo(const DocumenttypesConfig &config) :
_default(addDefaultDocument(*_doc_types))
{
try {
+ if (config.documenttype.empty() && ! config.doctype.empty()) {
+ configureDocTypes(config.doctype, *_doc_types);
+ } else {
createAllDocumentTypes(config.documenttype, *_doc_types);
addAllDocumentTypesToRepos(*_doc_types);
configureAllRepos(config.documenttype, *_doc_types);
+ }
} catch (...) {
DeleteMapContent(*_doc_types);
throw;