aboutsummaryrefslogtreecommitdiffstats
path: root/docprocs
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /docprocs
Publish
Diffstat (limited to 'docprocs')
-rw-r--r--docprocs/.gitignore5
-rw-r--r--docprocs/OWNERS1
-rw-r--r--docprocs/README1
-rw-r--r--docprocs/pom.xml164
-rw-r--r--docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java107
-rw-r--r--docprocs/src/main/java/com/yahoo/docprocs/indexing/FastLogger.java32
-rw-r--r--docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java120
-rw-r--r--docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java128
-rw-r--r--docprocs/src/test/cfg/attributes.cfg69
-rw-r--r--docprocs/src/test/cfg/docproc-indexing.cfg2
-rw-r--r--docprocs/src/test/cfg/documentmanager.cfg711
-rw-r--r--docprocs/src/test/cfg/documentmanager_inherit.cfg216
-rw-r--r--docprocs/src/test/cfg/ilscripts.cfg12
-rw-r--r--docprocs/src/test/cfg/indexingdocument.cfg4
-rw-r--r--docprocs/src/test/cfg/specialtokens.cfg10
-rw-r--r--docprocs/src/test/java/com/yahoo/docprocs/indexing/DocumentScriptTestCase.java281
-rw-r--r--docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java131
-rw-r--r--docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java69
18 files changed, 2063 insertions, 0 deletions
diff --git a/docprocs/.gitignore b/docprocs/.gitignore
new file mode 100644
index 00000000000..49a294da216
--- /dev/null
+++ b/docprocs/.gitignore
@@ -0,0 +1,5 @@
+*.ipr
+*.iws
+docprocs.iml
+target
+/pom.xml.build
diff --git a/docprocs/OWNERS b/docprocs/OWNERS
new file mode 100644
index 00000000000..7ae1acb1be9
--- /dev/null
+++ b/docprocs/OWNERS
@@ -0,0 +1 @@
+geirst
diff --git a/docprocs/README b/docprocs/README
new file mode 100644
index 00000000000..33620c139d4
--- /dev/null
+++ b/docprocs/README
@@ -0,0 +1 @@
+Module containing all the Document Processor plugins.
diff --git a/docprocs/pom.xml b/docprocs/pom.xml
new file mode 100644
index 00000000000..64fed1d55e2
--- /dev/null
+++ b/docprocs/pom.xml
@@ -0,0 +1,164 @@
+<?xml version="1.0"?>
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>parent</artifactId>
+ <version>6-SNAPSHOT</version>
+ <relativePath>../parent/pom.xml</relativePath>
+ </parent>
+ <artifactId>docprocs</artifactId>
+ <packaging>container-plugin</packaging>
+ <version>6-SNAPSHOT</version>
+ <dependencies>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>document</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>predicate-search-core</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>docproc</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>container-dev</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+
+ <!-- Workaround for maven issue MNG-5188 -->
+ <exclusions>
+ <exclusion>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>document</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ </exclusion>
+ </exclusions>
+ <!-- End Workaround -->
+
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>vespajlib</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>config-bundle</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>configdefinitions</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>linguistics</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>indexinglanguage</artifactId>
+ <version>${project.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>document</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>predicate-search-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>annotation</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.json</groupId>
+ <artifactId>json</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <compilerArgs>
+ <arg>-Werror</arg>
+ <arg>-Xlint:all</arg>
+ </compilerArgs>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <forkMode>pertest</forkMode>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>config-class-plugin</artifactId>
+ <version>${project.version}</version>
+ <configuration>
+ <defFilesDirectories>etc</defFilesDirectories>
+ </configuration>
+ <executions>
+ <execution>
+ <id>config-gen</id>
+ <goals>
+ <goal>config-gen</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-install-plugin</artifactId>
+ <version>2.3.1</version>
+ <configuration>
+ <updateReleaseInfo>true</updateReleaseInfo>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java
new file mode 100644
index 00000000000..a367aec0cfb
--- /dev/null
+++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java
@@ -0,0 +1,107 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.docprocs.indexing;
+
+import com.yahoo.document.Document;
+import com.yahoo.document.DocumentUpdate;
+import com.yahoo.document.Field;
+import com.yahoo.document.annotation.SpanTrees;
+import com.yahoo.document.datatypes.Array;
+import com.yahoo.document.datatypes.FieldValue;
+import com.yahoo.document.datatypes.MapFieldValue;
+import com.yahoo.document.datatypes.StringFieldValue;
+import com.yahoo.document.datatypes.Struct;
+import com.yahoo.document.datatypes.StructuredFieldValue;
+import com.yahoo.document.datatypes.WeightedSet;
+import com.yahoo.document.fieldpathupdate.AssignFieldPathUpdate;
+import com.yahoo.document.fieldpathupdate.FieldPathUpdate;
+import com.yahoo.document.update.FieldUpdate;
+import com.yahoo.document.update.MapValueUpdate;
+import com.yahoo.document.update.ValueUpdate;
+import com.yahoo.vespa.indexinglanguage.AdapterFactory;
+import com.yahoo.vespa.indexinglanguage.expressions.Expression;
+
+import java.util.*;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class DocumentScript {
+
+ private final String documentType;
+ private final Set<String> inputFields;
+ private final Expression expression;
+
+ public DocumentScript(String documentType, Collection<String> inputFields, Expression expression) {
+ this.documentType = documentType;
+ this.inputFields = new HashSet<>(inputFields);
+ this.expression = expression;
+ }
+
+ public Expression getExpression() { return expression; }
+ public Document execute(AdapterFactory adapterFactory, Document document) {
+ for (Iterator<Map.Entry<Field, FieldValue>> it = document.iterator(); it.hasNext(); ) {
+ Map.Entry<Field, FieldValue> entry = it.next();
+ requireThatFieldIsDeclaredInDocument(entry.getKey());
+ removeAnyLinguisticsSpanTree(entry.getValue());
+ }
+ return expression.execute(adapterFactory, document);
+ }
+
+ public DocumentUpdate execute(AdapterFactory adapterFactory, DocumentUpdate update) {
+ for (FieldUpdate fieldUpdate : update.getFieldUpdates()) {
+ requireThatFieldIsDeclaredInDocument(fieldUpdate.getField());
+ for (ValueUpdate<?> valueUpdate : fieldUpdate.getValueUpdates()) {
+ removeAnyLinguisticsSpanTree(valueUpdate);
+ }
+ }
+ for (FieldPathUpdate fieldUpdate : update.getFieldPathUpdates()) {
+ requireThatFieldIsDeclaredInDocument(fieldUpdate.getFieldPath().get(0).getFieldRef());
+ if (fieldUpdate instanceof AssignFieldPathUpdate) {
+ removeAnyLinguisticsSpanTree(((AssignFieldPathUpdate)fieldUpdate).getFieldValue());
+ }
+ }
+ return Expression.execute(expression, adapterFactory, update);
+ }
+
+ private void requireThatFieldIsDeclaredInDocument(Field field) {
+ if (field != null && !inputFields.contains(field.getName())) {
+ throw new IllegalArgumentException("Field '" + field.getName() + "' is not part of the declared document " +
+ "type '" + documentType + "'.");
+ }
+ }
+
+ private void removeAnyLinguisticsSpanTree(ValueUpdate<?> valueUpdate) {
+ if (valueUpdate instanceof MapValueUpdate) {
+ removeAnyLinguisticsSpanTree(((MapValueUpdate)valueUpdate).getUpdate());
+ } else {
+ removeAnyLinguisticsSpanTree(valueUpdate.getValue());
+ }
+ }
+
+ private void removeAnyLinguisticsSpanTree(FieldValue value) {
+ if (value instanceof StringFieldValue) {
+ ((StringFieldValue)value).removeSpanTree(SpanTrees.LINGUISTICS);
+ } else if (value instanceof Array) {
+ Array<?> arr = (Array)value;
+ for (Object obj : arr.getValues()) {
+ removeAnyLinguisticsSpanTree((FieldValue)obj);
+ }
+ } else if (value instanceof WeightedSet) {
+ WeightedSet<?> wset = (WeightedSet)value;
+ for (Object obj : wset.keySet()) {
+ removeAnyLinguisticsSpanTree((FieldValue)obj);
+ }
+ } else if (value instanceof MapFieldValue) {
+ MapFieldValue<?,?> map = (MapFieldValue)value;
+ for (Map.Entry<?,?> entry : map.entrySet()) {
+ removeAnyLinguisticsSpanTree((FieldValue)entry.getKey());
+ removeAnyLinguisticsSpanTree((FieldValue)entry.getValue());
+ }
+ } else if (value instanceof StructuredFieldValue) {
+ StructuredFieldValue struct = (StructuredFieldValue)value;
+ for (Iterator<Map.Entry<Field, FieldValue>> it = struct.iterator(); it.hasNext();) {
+ removeAnyLinguisticsSpanTree(it.next().getValue());
+ }
+ }
+ }
+}
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/FastLogger.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/FastLogger.java
new file mode 100644
index 00000000000..e990c9fb894
--- /dev/null
+++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/FastLogger.java
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.docprocs.indexing;
+
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen Hult</a>
+ */
+class FastLogger {
+
+ private final Logger log;
+
+ private FastLogger(Logger log) {
+ this.log = log;
+ }
+
+ public void log(Level level, String format, Object... args) {
+ if (!log.isLoggable(level)) {
+ return;
+ }
+ if (args.length > 0) {
+ log.log(level, String.format(format, args));
+ } else {
+ log.log(level, format);
+ }
+ }
+
+ public static FastLogger getLogger(String name) {
+ return new FastLogger(Logger.getLogger(name));
+ }
+}
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
new file mode 100644
index 00000000000..c6fe7b301e1
--- /dev/null
+++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
@@ -0,0 +1,120 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.docprocs.indexing;
+
+import java.util.ArrayList;
+import java.util.List;
+import com.google.inject.Inject;
+import com.yahoo.component.chain.dependencies.After;
+import com.yahoo.component.chain.dependencies.Before;
+import com.yahoo.component.chain.dependencies.Provides;
+import com.yahoo.docproc.DocumentProcessor;
+import com.yahoo.docproc.Processing;
+import com.yahoo.document.*;
+import com.yahoo.document.config.DocumentmanagerConfig;
+import com.yahoo.language.Linguistics;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.configdefinition.IlscriptsConfig;
+import com.yahoo.vespa.indexinglanguage.AdapterFactory;
+import com.yahoo.vespa.indexinglanguage.SimpleAdapterFactory;
+import com.yahoo.vespa.indexinglanguage.expressions.Expression;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+@Provides({ IndexingProcessor.PROVIDED_NAME })
+@Before({ IndexingProcessor.INDEXING_END })
+@After({ IndexingProcessor.INDEXING_START, "*" })
+public class IndexingProcessor extends DocumentProcessor {
+
+ public final static String PROVIDED_NAME = "indexedDocument";
+ public final static String INDEXING_START = "indexingStart";
+ public final static String INDEXING_END = "indexingEnd";
+
+ private final static FastLogger log = FastLogger.getLogger(IndexingProcessor.class.getName());
+ private final DocumentTypeManager docTypeMgr;
+ private final ScriptManager scriptMgr;
+ private final AdapterFactory adapterFactory;
+
+ private class ExpressionSelector extends SimpleAdapterFactory.SelectExpression {
+ @Override
+ public Expression selectExpression(DocumentType documentType, String fieldName) {
+ return scriptMgr.getScript(documentType, fieldName).getExpression();
+ }
+ }
+
+ @Inject
+ public IndexingProcessor(DocumentmanagerConfig documentmanagerConfig,
+ IlscriptsConfig ilscriptsConfig,
+ Linguistics linguistics) {
+ docTypeMgr = DocumentTypeManagerConfigurer.configureNewManager(documentmanagerConfig);
+ scriptMgr = new ScriptManager(docTypeMgr, ilscriptsConfig, linguistics);
+ adapterFactory = new SimpleAdapterFactory(new ExpressionSelector());
+ }
+
+ @Override
+ public Progress process(Processing proc) {
+ if (proc.getDocumentOperations().isEmpty()) {
+ return Progress.DONE;
+ }
+ List<DocumentOperation> out = new ArrayList<>(proc.getDocumentOperations().size());
+ for (DocumentOperation documentOperation : proc.getDocumentOperations()) {
+ if (documentOperation instanceof DocumentPut) {
+ processDocument((DocumentPut)documentOperation, out);
+ } else if (documentOperation instanceof DocumentUpdate) {
+ processUpdate((DocumentUpdate)documentOperation, out);
+ } else if (documentOperation instanceof DocumentRemove) {
+ processRemove((DocumentRemove)documentOperation, out);
+ } else if (documentOperation != null) {
+ throw new IllegalArgumentException("Document class " + documentOperation.getClass().getName() + " not supported.");
+ } else {
+ throw new IllegalArgumentException("Expected document, got null.");
+ }
+ }
+ proc.getDocumentOperations().clear();
+ proc.getDocumentOperations().addAll(out);
+ return Progress.DONE;
+ }
+
+ DocumentTypeManager getDocumentTypeManager() {
+ return docTypeMgr;
+ }
+
+ private void processDocument(DocumentPut prev, List<DocumentOperation> out) {
+ DocumentScript script = scriptMgr.getScript(prev.getDocument().getDataType());
+ if (script == null) {
+ log.log(LogLevel.DEBUG, "No indexing script for document '%s'.", prev.getId());
+ out.add(prev);
+ return;
+ }
+ log.log(LogLevel.DEBUG, "Processing document '%s'.", prev.getId());
+ Document next = script.execute(adapterFactory, prev.getDocument());
+ if (next == null) {
+ log.log(LogLevel.DEBUG, "Document '" + prev.getId() + "' produced no output.");
+ return;
+ }
+
+ out.add(new DocumentPut(prev, next));
+ }
+
+ private void processUpdate(DocumentUpdate prev, List<DocumentOperation> out) {
+ DocumentScript script = scriptMgr.getScript(prev.getType());
+ if (script == null) {
+ log.log(LogLevel.DEBUG, "No indexing script for update '%s'.", prev.getId());
+ out.add(prev);
+ return;
+ }
+ log.log(LogLevel.DEBUG, "Processing update '%s'.", prev.getId());
+ DocumentUpdate next = script.execute(adapterFactory, prev);
+ if (next == null) {
+ log.log(LogLevel.DEBUG, "Update '" + prev.getId() + "' produced no output.");
+ return;
+ }
+ next.setCondition(prev.getCondition());
+ out.add(next);
+ }
+
+ private void processRemove(DocumentRemove prev, List<DocumentOperation> out) {
+ log.log(LogLevel.DEBUG, "Not processing remove '%s'.", prev.getId());
+ out.add(prev);
+ }
+}
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java
new file mode 100644
index 00000000000..14bf5a0edf8
--- /dev/null
+++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java
@@ -0,0 +1,128 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.docprocs.indexing;
+
+import com.yahoo.document.DocumentType;
+import com.yahoo.document.DocumentTypeManager;
+import com.yahoo.language.Linguistics;
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.configdefinition.IlscriptsConfig;
+import com.yahoo.vespa.indexinglanguage.ScriptParserContext;
+import com.yahoo.vespa.indexinglanguage.expressions.InputExpression;
+import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
+import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression;
+import com.yahoo.vespa.indexinglanguage.parser.IndexingInput;
+import com.yahoo.vespa.indexinglanguage.parser.ParseException;
+
+import java.util.*;
+import java.util.logging.Level;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ScriptManager {
+
+ private static final FastLogger log = FastLogger.getLogger(ScriptManager.class.getName());
+ private static final String FULL = "[all]";
+ private final Map<String, Map<String, DocumentScript>> documentFieldScripts;
+ private final DocumentTypeManager docTypeMgr;
+
+ public ScriptManager(DocumentTypeManager docTypeMgr, IlscriptsConfig config, Linguistics linguistics) {
+ this.docTypeMgr = docTypeMgr;
+ documentFieldScripts = createScriptsMap(docTypeMgr, config, linguistics);
+ }
+
+
+ private Map<String, DocumentScript> getScripts(DocumentType inputType) {
+ Map<String, DocumentScript> scripts = documentFieldScripts.get(inputType.getName());
+ if (scripts != null) {
+ log.log(LogLevel.DEBUG, "Using script for type '%s'.", inputType.getName());
+ return scripts;
+ }
+ for (Map.Entry<String, Map<String, DocumentScript>> entry : documentFieldScripts.entrySet()) {
+ if (inputType.inherits(docTypeMgr.getDocumentType(entry.getKey()))) {
+ log.log(LogLevel.DEBUG, "Using script of super-type '%s'.", entry.getKey());
+ return entry.getValue();
+ }
+ }
+ for (Map.Entry<String, Map<String, DocumentScript>> entry : documentFieldScripts.entrySet()) {
+ if (docTypeMgr.getDocumentType(entry.getKey()).inherits(inputType)) {
+ log.log(LogLevel.DEBUG, "Using script of sub-type '%s'.", entry.getKey());
+ return entry.getValue();
+ }
+ }
+ log.log(LogLevel.DEBUG, "No script for type '%s'.", inputType.getName());
+ return null;
+ }
+
+ public DocumentScript getScript(DocumentType inputType) {
+ return getScript(inputType, FULL);
+ }
+
+ public DocumentScript getScript(DocumentType inputType, String inputFieldName) {
+ Map<String, DocumentScript> fieldScripts = getScripts(inputType);
+ if (fieldScripts != null) {
+ DocumentScript script = fieldScripts.get(inputFieldName);
+ if (script != null) {
+ log.log(LogLevel.DEBUG, "Using script for type '%s' and field '%s'.", inputType.getName(), inputFieldName);
+ return script;
+ }
+ }
+ return null;
+ }
+
+ private static Map<String, Map<String, DocumentScript>> createScriptsMap(DocumentTypeManager docTypeMgr,
+ IlscriptsConfig config,
+ Linguistics linguistics) {
+ Map<String, Map<String, DocumentScript>> documentFieldScripts = new HashMap<>(config.ilscript().size());
+ ScriptParserContext parserContext = new ScriptParserContext(linguistics);
+ parserContext.getAnnotatorConfig().setMaxTermOccurrences(config.maxtermoccurrences());
+
+ for (IlscriptsConfig.Ilscript ilscript : config.ilscript()) {
+ InputExpression.FieldPathOptimizer fieldPathOptimizer = new InputExpression.FieldPathOptimizer(docTypeMgr.getDocumentType(ilscript.doctype()));
+ List<StatementExpression> expressions = new ArrayList<>(ilscript.content().size());
+ Map<String, DocumentScript> fieldScripts = new HashMap<>(ilscript.content().size());
+ for (String content : ilscript.content()) {
+ expressions.add(parse(ilscript.doctype(), parserContext, content));
+ StatementExpression statement = parse(ilscript.doctype(), parserContext, content);
+ InputExpression.InputFieldNameExtractor inputFieldNameExtractor = new InputExpression.InputFieldNameExtractor();
+ statement.select(inputFieldNameExtractor, inputFieldNameExtractor);
+ statement.select(fieldPathOptimizer, fieldPathOptimizer);
+ if (inputFieldNameExtractor.getInputFieldNames().size() == 1) {
+ String fieldName = inputFieldNameExtractor.getInputFieldNames().get(0);
+ ScriptExpression script;
+ if (fieldScripts.containsKey(fieldName)) {
+ DocumentScript prev = fieldScripts.get(fieldName);
+ List<StatementExpression> appendedList = new ArrayList<>(((ScriptExpression)prev.getExpression()).asList());
+ appendedList.add(statement);
+ script = new ScriptExpression(appendedList);
+ log.log(Level.FINE, "Appending script for field '" + fieldName + "' = " + statement);
+ log.log(Level.FINE, "Full script for field '" + fieldName + "' = " + appendedList);
+ } else {
+ script = new ScriptExpression(statement);
+ log.log(Level.FINE, "Setting script for field '" + fieldName + "' = " + statement);
+ }
+ DocumentScript documentScript = new DocumentScript(ilscript.doctype(), inputFieldNameExtractor.getInputFieldNames(), script);
+ fieldScripts.put(fieldName, documentScript);
+ } else {
+ log.log(Level.FINE, "Non single(" + inputFieldNameExtractor.getInputFieldNames().size() +") inputs = " + inputFieldNameExtractor.getInputFieldNames() + ". Script = " + statement);
+ }
+ }
+
+ ScriptExpression script = new ScriptExpression(expressions);
+ script.select(fieldPathOptimizer, fieldPathOptimizer);
+ fieldScripts.put(FULL, new DocumentScript(ilscript.doctype(), ilscript.docfield(),script));
+ documentFieldScripts.put(ilscript.doctype(), Collections.unmodifiableMap(fieldScripts));
+ }
+ return Collections.unmodifiableMap(documentFieldScripts);
+ }
+
+ private static StatementExpression parse(String docType, ScriptParserContext parserConfig, String content) {
+ parserConfig.setInputStream(new IndexingInput(content));
+ try {
+ return StatementExpression.newInstance(parserConfig);
+ } catch (ParseException e) {
+ throw new IllegalArgumentException("Illegal indexing script for document type '" +
+ docType + "'; " + content, e);
+ }
+ }
+}
diff --git a/docprocs/src/test/cfg/attributes.cfg b/docprocs/src/test/cfg/attributes.cfg
new file mode 100644
index 00000000000..bf9ccd94453
--- /dev/null
+++ b/docprocs/src/test/cfg/attributes.cfg
@@ -0,0 +1,69 @@
+attribute[22]
+attribute[0].name sales
+attribute[0].datatype INT32
+attribute[0].collectiontype SINGLE
+attribute[1].name pto
+attribute[1].datatype INT32
+attribute[1].collectiontype SINGLE
+attribute[2].name mid
+attribute[2].datatype INT32
+attribute[2].collectiontype ARRAY
+attribute[3].name ew
+attribute[3].datatype STRING
+attribute[3].collectiontype SINGLE
+attribute[4].name weight
+attribute[4].datatype FLOAT
+attribute[4].collectiontype SINGLE
+attribute[5].name bgnpfrom
+attribute[5].datatype FLOAT
+attribute[5].collectiontype SINGLE
+attribute[6].name artist
+attribute[6].datatype STRING
+attribute[6].collectiontype SINGLE
+attribute[7].name artistspid
+attribute[7].datatype STRING
+attribute[7].collectiontype WEIGHTEDSET
+attribute[8].name artistspid2
+attribute[8].datatype FLOAT
+attribute[8].collectiontype WEIGHTEDSET
+attribute[9].name title
+attribute[9].datatype STRING
+attribute[9].collectiontype SINGLE
+attribute[10].name newestedition
+attribute[10].datatype UINT32
+attribute[10].collectiontype SINGLE
+attribute[11].name year
+attribute[11].datatype INT32
+attribute[11].collectiontype ARRAY
+attribute[12].name endyear
+attribute[12].datatype INT32
+attribute[12].collectiontype ARRAY
+attribute[13].name did
+attribute[13].datatype INT32
+attribute[13].collectiontype SINGLE
+attribute[14].name cbid
+attribute[14].datatype INT32
+attribute[14].collectiontype SINGLE
+attribute[15].name noupdate
+attribute[15].datatype STRING
+attribute[15].collectiontype SINGLE
+attribute[15].noupdate false
+attribute[16].name noupdate2
+attribute[16].datatype STRING
+attribute[16].collectiontype SINGLE
+attribute[16].noupdate false
+attribute[17].name multiposition2d_position
+attribute[17].datatype INT64
+attribute[17].collectiontype ARRAY
+attribute[18].name extracategories
+attribute[18].datatype STRING
+attribute[18].collectiontype ARRAY
+attribute[19].name default_fieldlength
+attribute[19].datatype UINT32
+attribute[19].collectiontype ARRAY
+attribute[20].name fmt_fieldlength
+attribute[20].datatype UINT32
+attribute[20].collectiontype SINGLE
+attribute[21].name categories_fieldlength
+attribute[21].datatype UINT32
+attribute[21].collectiontype SINGLE
diff --git a/docprocs/src/test/cfg/docproc-indexing.cfg b/docprocs/src/test/cfg/docproc-indexing.cfg
new file mode 100644
index 00000000000..4927ab80aea
--- /dev/null
+++ b/docprocs/src/test/cfg/docproc-indexing.cfg
@@ -0,0 +1,2 @@
+searchClusterConfigId "dir:src/test/cfg"
+nextGen.enabled false
diff --git a/docprocs/src/test/cfg/documentmanager.cfg b/docprocs/src/test/cfg/documentmanager.cfg
new file mode 100644
index 00000000000..af556b001ba
--- /dev/null
+++ b/docprocs/src/test/cfg/documentmanager.cfg
@@ -0,0 +1,711 @@
+datatype[25]
+datatype[0].id -1245117006
+datatype[0].arraytype[1]
+datatype[0].arraytype[0].datatype 0
+datatype[0].weightedsettype[0]
+datatype[0].structtype[0]
+datatype[0].documenttype[0]
+datatype[1].id 1328286588
+datatype[1].arraytype[0]
+datatype[1].weightedsettype[1]
+datatype[1].weightedsettype[0].datatype 2
+datatype[1].weightedsettype[0].createifnonexistant false
+datatype[1].weightedsettype[0].removeifzero false
+datatype[1].structtype[0]
+datatype[1].documenttype[0]
+datatype[2].id 1325751891
+datatype[2].arraytype[0]
+datatype[2].weightedsettype[1]
+datatype[2].weightedsettype[0].datatype 1
+datatype[2].weightedsettype[0].createifnonexistant false
+datatype[2].weightedsettype[0].removeifzero false
+datatype[2].structtype[0]
+datatype[2].documenttype[0]
+datatype[3].id -1486737430
+datatype[3].arraytype[1]
+datatype[3].arraytype[0].datatype 2
+datatype[3].weightedsettype[0]
+datatype[3].structtype[0]
+datatype[3].documenttype[0]
+datatype[4].id -1910204744
+datatype[4].arraytype[0]
+datatype[4].weightedsettype[0]
+datatype[4].structtype[1]
+datatype[4].structtype[0].name music.header
+datatype[4].structtype[0].version 0
+datatype[4].structtype[0].field[39]
+datatype[4].structtype[0].field[0].name bgndata
+datatype[4].structtype[0].field[0].id[0]
+datatype[4].structtype[0].field[0].datatype 2
+datatype[4].structtype[0].field[1].name sales
+datatype[4].structtype[0].field[1].id[0]
+datatype[4].structtype[0].field[1].datatype 0
+datatype[4].structtype[0].field[2].name pto
+datatype[4].structtype[0].field[2].id[0]
+datatype[4].structtype[0].field[2].datatype 0
+datatype[4].structtype[0].field[3].name keys
+datatype[4].structtype[0].field[3].id[0]
+datatype[4].structtype[0].field[3].datatype 2
+datatype[4].structtype[0].field[4].name mid
+datatype[4].structtype[0].field[4].id[0]
+datatype[4].structtype[0].field[4].datatype -1245117006
+datatype[4].structtype[0].field[5].name ew
+datatype[4].structtype[0].field[5].id[0]
+datatype[4].structtype[0].field[5].datatype 2
+datatype[4].structtype[0].field[6].name surl
+datatype[4].structtype[0].field[6].id[0]
+datatype[4].structtype[0].field[6].datatype 2
+datatype[4].structtype[0].field[7].name userrate
+datatype[4].structtype[0].field[7].id[0]
+datatype[4].structtype[0].field[7].datatype 0
+datatype[4].structtype[0].field[8].name pid
+datatype[4].structtype[0].field[8].id[0]
+datatype[4].structtype[0].field[8].datatype 2
+datatype[4].structtype[0].field[9].name weight
+datatype[4].structtype[0].field[9].id[0]
+datatype[4].structtype[0].field[9].datatype 1
+datatype[4].structtype[0].field[10].name url
+datatype[4].structtype[0].field[10].id[0]
+datatype[4].structtype[0].field[10].datatype 2
+datatype[4].structtype[0].field[11].name isbn
+datatype[4].structtype[0].field[11].id[0]
+datatype[4].structtype[0].field[11].datatype 2
+datatype[4].structtype[0].field[12].name fmt
+datatype[4].structtype[0].field[12].id[0]
+datatype[4].structtype[0].field[12].datatype 2
+datatype[4].structtype[0].field[13].name albumid
+datatype[4].structtype[0].field[13].id[0]
+datatype[4].structtype[0].field[13].datatype 2
+datatype[4].structtype[0].field[14].name disp_song
+datatype[4].structtype[0].field[14].id[0]
+datatype[4].structtype[0].field[14].datatype 2
+datatype[4].structtype[0].field[15].name song
+datatype[4].structtype[0].field[15].id[0]
+datatype[4].structtype[0].field[15].datatype 2
+datatype[4].structtype[0].field[16].name pfrom
+datatype[4].structtype[0].field[16].id[0]
+datatype[4].structtype[0].field[16].datatype 0
+datatype[4].structtype[0].field[17].name bgnpfrom
+datatype[4].structtype[0].field[17].id[0]
+datatype[4].structtype[0].field[17].datatype 1
+datatype[4].structtype[0].field[18].name categories
+datatype[4].structtype[0].field[18].id[0]
+datatype[4].structtype[0].field[18].datatype 2
+datatype[4].structtype[0].field[19].name data
+datatype[4].structtype[0].field[19].id[0]
+datatype[4].structtype[0].field[19].datatype 2
+datatype[4].structtype[0].field[20].name numreview
+datatype[4].structtype[0].field[20].id[0]
+datatype[4].structtype[0].field[20].datatype 0
+datatype[4].structtype[0].field[21].name bgnsellers
+datatype[4].structtype[0].field[21].id[0]
+datatype[4].structtype[0].field[21].datatype 0
+datatype[4].structtype[0].field[22].name image
+datatype[4].structtype[0].field[22].id[0]
+datatype[4].structtype[0].field[22].datatype 2
+datatype[4].structtype[0].field[23].name artist
+datatype[4].structtype[0].field[23].id[0]
+datatype[4].structtype[0].field[23].datatype 2
+datatype[4].structtype[0].field[24].name artistspid
+datatype[4].structtype[0].field[24].id[0]
+datatype[4].structtype[0].field[24].datatype 1328286588
+datatype[4].structtype[0].field[25].name artistspid2
+datatype[4].structtype[0].field[25].id[0]
+datatype[4].structtype[0].field[25].datatype 1325751891
+datatype[4].structtype[0].field[26].name artistspid3
+datatype[4].structtype[0].field[26].id[0]
+datatype[4].structtype[0].field[26].datatype 1328286588
+datatype[4].structtype[0].field[27].name title
+datatype[4].structtype[0].field[27].id[0]
+datatype[4].structtype[0].field[27].datatype 2
+datatype[4].structtype[0].field[28].name newestedition
+datatype[4].structtype[0].field[28].id[0]
+datatype[4].structtype[0].field[28].datatype 0
+datatype[4].structtype[0].field[29].name bgnpto
+datatype[4].structtype[0].field[29].id[0]
+datatype[4].structtype[0].field[29].datatype 2
+datatype[4].structtype[0].field[30].name year
+datatype[4].structtype[0].field[30].id[0]
+datatype[4].structtype[0].field[30].datatype -1245117006
+datatype[4].structtype[0].field[31].name endyear
+datatype[4].structtype[0].field[31].id[0]
+datatype[4].structtype[0].field[31].datatype -1245117006
+datatype[4].structtype[0].field[32].name did
+datatype[4].structtype[0].field[32].id[0]
+datatype[4].structtype[0].field[32].datatype 0
+datatype[4].structtype[0].field[33].name scorekey
+datatype[4].structtype[0].field[33].id[0]
+datatype[4].structtype[0].field[33].datatype 0
+datatype[4].structtype[0].field[34].name cbid
+datatype[4].structtype[0].field[34].id[0]
+datatype[4].structtype[0].field[34].datatype 0
+datatype[4].structtype[0].field[35].name titles
+datatype[4].structtype[0].field[35].id[0]
+datatype[4].structtype[0].field[35].datatype -1486737430
+datatype[4].structtype[0].field[36].name noupdate
+datatype[4].structtype[0].field[36].id[0]
+datatype[4].structtype[0].field[36].datatype 2
+datatype[4].structtype[0].field[37].name noupdate2
+datatype[4].structtype[0].field[37].id[0]
+datatype[4].structtype[0].field[37].datatype 2
+datatype[4].structtype[0].field[38].name multiposition2d
+datatype[4].structtype[0].field[38].id[0]
+datatype[4].structtype[0].field[38].datatype -1486737430
+datatype[4].documenttype[0]
+datatype[5].id 993120973
+datatype[5].arraytype[0]
+datatype[5].weightedsettype[0]
+datatype[5].structtype[1]
+datatype[5].structtype[0].name music.body
+datatype[5].structtype[0].version 0
+datatype[5].structtype[0].field[0]
+datatype[5].documenttype[0]
+datatype[6].id 1412693671
+datatype[6].arraytype[0]
+datatype[6].weightedsettype[0]
+datatype[6].structtype[0]
+datatype[6].documenttype[1]
+datatype[6].documenttype[0].name music
+datatype[6].documenttype[0].version 0
+datatype[6].documenttype[0].inherits[0]
+datatype[6].documenttype[0].headerstruct -1910204744
+datatype[6].documenttype[0].bodystruct 993120973
+datatype[7].id -1801920207
+datatype[7].arraytype[0]
+datatype[7].weightedsettype[0]
+datatype[7].structtype[1]
+datatype[7].structtype[0].name music_summary.header
+datatype[7].structtype[0].version 0
+datatype[7].structtype[0].field[40]
+datatype[7].structtype[0].field[0].name distance
+datatype[7].structtype[0].field[0].id[0]
+datatype[7].structtype[0].field[0].datatype 0
+datatype[7].structtype[0].field[1].name sddocname
+datatype[7].structtype[0].field[1].id[0]
+datatype[7].structtype[0].field[1].datatype 2
+datatype[7].structtype[0].field[2].name bgndata
+datatype[7].structtype[0].field[2].id[0]
+datatype[7].structtype[0].field[2].datatype 2
+datatype[7].structtype[0].field[3].name sales
+datatype[7].structtype[0].field[3].id[0]
+datatype[7].structtype[0].field[3].datatype 0
+datatype[7].structtype[0].field[4].name pto
+datatype[7].structtype[0].field[4].id[0]
+datatype[7].structtype[0].field[4].datatype 0
+datatype[7].structtype[0].field[5].name mid
+datatype[7].structtype[0].field[5].id[0]
+datatype[7].structtype[0].field[5].datatype 2
+datatype[7].structtype[0].field[6].name ew
+datatype[7].structtype[0].field[6].id[0]
+datatype[7].structtype[0].field[6].datatype 2
+datatype[7].structtype[0].field[7].name surl
+datatype[7].structtype[0].field[7].id[0]
+datatype[7].structtype[0].field[7].datatype 2
+datatype[7].structtype[0].field[8].name userrate
+datatype[7].structtype[0].field[8].id[0]
+datatype[7].structtype[0].field[8].datatype 0
+datatype[7].structtype[0].field[9].name pid
+datatype[7].structtype[0].field[9].id[0]
+datatype[7].structtype[0].field[9].datatype 2
+datatype[7].structtype[0].field[10].name weight
+datatype[7].structtype[0].field[10].id[0]
+datatype[7].structtype[0].field[10].datatype 1
+datatype[7].structtype[0].field[11].name url
+datatype[7].structtype[0].field[11].id[0]
+datatype[7].structtype[0].field[11].datatype 2
+datatype[7].structtype[0].field[12].name isbn
+datatype[7].structtype[0].field[12].id[0]
+datatype[7].structtype[0].field[12].datatype 2
+datatype[7].structtype[0].field[13].name fmt
+datatype[7].structtype[0].field[13].id[0]
+datatype[7].structtype[0].field[13].datatype 2
+datatype[7].structtype[0].field[14].name albumid
+datatype[7].structtype[0].field[14].id[0]
+datatype[7].structtype[0].field[14].datatype 2
+datatype[7].structtype[0].field[15].name disp_song
+datatype[7].structtype[0].field[15].id[0]
+datatype[7].structtype[0].field[15].datatype 2
+datatype[7].structtype[0].field[16].name song
+datatype[7].structtype[0].field[16].id[0]
+datatype[7].structtype[0].field[16].datatype 2
+datatype[7].structtype[0].field[17].name pfrom
+datatype[7].structtype[0].field[17].id[0]
+datatype[7].structtype[0].field[17].datatype 0
+datatype[7].structtype[0].field[18].name bgnpfrom
+datatype[7].structtype[0].field[18].id[0]
+datatype[7].structtype[0].field[18].datatype 1
+datatype[7].structtype[0].field[19].name categories
+datatype[7].structtype[0].field[19].id[0]
+datatype[7].structtype[0].field[19].datatype 2
+datatype[7].structtype[0].field[20].name data
+datatype[7].structtype[0].field[20].id[0]
+datatype[7].structtype[0].field[20].datatype 2
+datatype[7].structtype[0].field[21].name numreview
+datatype[7].structtype[0].field[21].id[0]
+datatype[7].structtype[0].field[21].datatype 0
+datatype[7].structtype[0].field[22].name bgnsellers
+datatype[7].structtype[0].field[22].id[0]
+datatype[7].structtype[0].field[22].datatype 0
+datatype[7].structtype[0].field[23].name image
+datatype[7].structtype[0].field[23].id[0]
+datatype[7].structtype[0].field[23].datatype 2
+datatype[7].structtype[0].field[24].name artist
+datatype[7].structtype[0].field[24].id[0]
+datatype[7].structtype[0].field[24].datatype 2
+datatype[7].structtype[0].field[25].name artistspid
+datatype[7].structtype[0].field[25].id[0]
+datatype[7].structtype[0].field[25].datatype 2
+datatype[7].structtype[0].field[26].name artistspid3
+datatype[7].structtype[0].field[26].id[0]
+datatype[7].structtype[0].field[26].datatype 2
+datatype[7].structtype[0].field[27].name title
+datatype[7].structtype[0].field[27].id[0]
+datatype[7].structtype[0].field[27].datatype 2
+datatype[7].structtype[0].field[28].name newestedition
+datatype[7].structtype[0].field[28].id[0]
+datatype[7].structtype[0].field[28].datatype 0
+datatype[7].structtype[0].field[29].name bgnpto
+datatype[7].structtype[0].field[29].id[0]
+datatype[7].structtype[0].field[29].datatype 2
+datatype[7].structtype[0].field[30].name year
+datatype[7].structtype[0].field[30].id[0]
+datatype[7].structtype[0].field[30].datatype 2
+datatype[7].structtype[0].field[31].name endyear
+datatype[7].structtype[0].field[31].id[0]
+datatype[7].structtype[0].field[31].datatype 2
+datatype[7].structtype[0].field[32].name did
+datatype[7].structtype[0].field[32].id[0]
+datatype[7].structtype[0].field[32].datatype 0
+datatype[7].structtype[0].field[33].name scorekey
+datatype[7].structtype[0].field[33].id[0]
+datatype[7].structtype[0].field[33].datatype 0
+datatype[7].structtype[0].field[34].name cbid
+datatype[7].structtype[0].field[34].id[0]
+datatype[7].structtype[0].field[34].datatype 0
+datatype[7].structtype[0].field[35].name titles
+datatype[7].structtype[0].field[35].id[0]
+datatype[7].structtype[0].field[35].datatype 2
+datatype[7].structtype[0].field[36].name ranklog
+datatype[7].structtype[0].field[36].id[0]
+datatype[7].structtype[0].field[36].datatype 2
+datatype[7].structtype[0].field[37].name rankfeatures
+datatype[7].structtype[0].field[37].id[0]
+datatype[7].structtype[0].field[37].datatype 2
+datatype[7].structtype[0].field[38].name summaryfeatures
+datatype[7].structtype[0].field[38].id[0]
+datatype[7].structtype[0].field[38].datatype 2
+datatype[7].structtype[0].field[39].name documentid
+datatype[7].structtype[0].field[39].id[0]
+datatype[7].structtype[0].field[39].datatype 2
+datatype[7].documenttype[0]
+datatype[8].id -1728551034
+datatype[8].arraytype[0]
+datatype[8].weightedsettype[0]
+datatype[8].structtype[1]
+datatype[8].structtype[0].name music_summary.body
+datatype[8].structtype[0].version 0
+datatype[8].structtype[0].field[0]
+datatype[8].documenttype[0]
+datatype[9].id 1601149518
+datatype[9].arraytype[0]
+datatype[9].weightedsettype[0]
+datatype[9].structtype[0]
+datatype[9].documenttype[1]
+datatype[9].documenttype[0].name music_summary
+datatype[9].documenttype[0].version 0
+datatype[9].documenttype[0].inherits[0]
+datatype[9].documenttype[0].headerstruct -1801920207
+datatype[9].documenttype[0].bodystruct -1728551034
+datatype[10].id 1509154821
+datatype[10].arraytype[0]
+datatype[10].weightedsettype[0]
+datatype[10].structtype[1]
+datatype[10].structtype[0].name music_index.header
+datatype[10].structtype[0].version 0
+datatype[10].structtype[0].field[19]
+datatype[10].structtype[0].field[0].name sddocname
+datatype[10].structtype[0].field[0].id[0]
+datatype[10].structtype[0].field[0].datatype -1486737430
+datatype[10].structtype[0].field[1].name sales
+datatype[10].structtype[0].field[1].id[0]
+datatype[10].structtype[0].field[1].datatype -1245117006
+datatype[10].structtype[0].field[2].name pto
+datatype[10].structtype[0].field[2].id[0]
+datatype[10].structtype[0].field[2].datatype -1245117006
+datatype[10].structtype[0].field[3].name keys
+datatype[10].structtype[0].field[3].id[0]
+datatype[10].structtype[0].field[3].datatype -1486737430
+datatype[10].structtype[0].field[4].name mid
+datatype[10].structtype[0].field[4].id[0]
+datatype[10].structtype[0].field[4].datatype -1245117006
+datatype[10].structtype[0].field[5].name ew
+datatype[10].structtype[0].field[5].id[0]
+datatype[10].structtype[0].field[5].datatype -1486737430
+datatype[10].structtype[0].field[6].name fmt
+datatype[10].structtype[0].field[6].id[0]
+datatype[10].structtype[0].field[6].datatype -1486737430
+datatype[10].structtype[0].field[7].name song
+datatype[10].structtype[0].field[7].id[0]
+datatype[10].structtype[0].field[7].datatype -1486737430
+datatype[10].structtype[0].field[8].name categories
+datatype[10].structtype[0].field[8].id[0]
+datatype[10].structtype[0].field[8].datatype -1486737430
+datatype[10].structtype[0].field[9].name artist
+datatype[10].structtype[0].field[9].id[0]
+datatype[10].structtype[0].field[9].datatype -1486737430
+datatype[10].structtype[0].field[10].name artistspid3
+datatype[10].structtype[0].field[10].id[0]
+datatype[10].structtype[0].field[10].datatype -1486737430
+datatype[10].structtype[0].field[11].name title
+datatype[10].structtype[0].field[11].id[0]
+datatype[10].structtype[0].field[11].datatype -1486737430
+datatype[10].structtype[0].field[12].name newestedition
+datatype[10].structtype[0].field[12].id[0]
+datatype[10].structtype[0].field[12].datatype -1245117006
+datatype[10].structtype[0].field[13].name year
+datatype[10].structtype[0].field[13].id[0]
+datatype[10].structtype[0].field[13].datatype -1245117006
+datatype[10].structtype[0].field[14].name endyear
+datatype[10].structtype[0].field[14].id[0]
+datatype[10].structtype[0].field[14].datatype -1245117006
+datatype[10].structtype[0].field[15].name did
+datatype[10].structtype[0].field[15].id[0]
+datatype[10].structtype[0].field[15].datatype -1245117006
+datatype[10].structtype[0].field[16].name scorekey
+datatype[10].structtype[0].field[16].id[0]
+datatype[10].structtype[0].field[16].datatype -1245117006
+datatype[10].structtype[0].field[17].name cbid
+datatype[10].structtype[0].field[17].id[0]
+datatype[10].structtype[0].field[17].datatype -1245117006
+datatype[10].structtype[0].field[18].name titles
+datatype[10].structtype[0].field[18].id[0]
+datatype[10].structtype[0].field[18].datatype -1486737430
+datatype[10].documenttype[0]
+datatype[11].id -1997730982
+datatype[11].arraytype[0]
+datatype[11].weightedsettype[0]
+datatype[11].structtype[1]
+datatype[11].structtype[0].name music_index.body
+datatype[11].structtype[0].version 0
+datatype[11].structtype[0].field[0]
+datatype[11].documenttype[0]
+datatype[12].id 2108744186
+datatype[12].arraytype[0]
+datatype[12].weightedsettype[0]
+datatype[12].structtype[0]
+datatype[12].documenttype[1]
+datatype[12].documenttype[0].name music_index
+datatype[12].documenttype[0].version 0
+datatype[12].documenttype[0].inherits[0]
+datatype[12].documenttype[0].headerstruct 1509154821
+datatype[12].documenttype[0].bodystruct -1997730982
+datatype[13].id 58874399
+datatype[13].arraytype[1]
+datatype[13].arraytype[0].datatype 4
+datatype[13].weightedsettype[0]
+datatype[13].structtype[0]
+datatype[13].documenttype[0]
+datatype[14].id -1497398149
+datatype[14].arraytype[0]
+datatype[14].weightedsettype[0]
+datatype[14].structtype[1]
+datatype[14].structtype[0].name music_attribute.header
+datatype[14].structtype[0].version 0
+datatype[14].structtype[0].field[22]
+datatype[14].structtype[0].field[0].name sales
+datatype[14].structtype[0].field[0].id[0]
+datatype[14].structtype[0].field[0].datatype 0
+datatype[14].structtype[0].field[1].name pto
+datatype[14].structtype[0].field[1].id[0]
+datatype[14].structtype[0].field[1].datatype 0
+datatype[14].structtype[0].field[2].name mid
+datatype[14].structtype[0].field[2].id[0]
+datatype[14].structtype[0].field[2].datatype -1245117006
+datatype[14].structtype[0].field[3].name ew
+datatype[14].structtype[0].field[3].id[0]
+datatype[14].structtype[0].field[3].datatype 2
+datatype[14].structtype[0].field[4].name weight
+datatype[14].structtype[0].field[4].id[0]
+datatype[14].structtype[0].field[4].datatype 1
+datatype[14].structtype[0].field[5].name bgnpfrom
+datatype[14].structtype[0].field[5].id[0]
+datatype[14].structtype[0].field[5].datatype 1
+datatype[14].structtype[0].field[6].name artist
+datatype[14].structtype[0].field[6].id[0]
+datatype[14].structtype[0].field[6].datatype 2
+datatype[14].structtype[0].field[7].name artistspid
+datatype[14].structtype[0].field[7].id[0]
+datatype[14].structtype[0].field[7].datatype 1328286588
+datatype[14].structtype[0].field[8].name artistspid2
+datatype[14].structtype[0].field[8].id[0]
+datatype[14].structtype[0].field[8].datatype 1325751891
+datatype[14].structtype[0].field[9].name title
+datatype[14].structtype[0].field[9].id[0]
+datatype[14].structtype[0].field[9].datatype 2
+datatype[14].structtype[0].field[10].name newestedition
+datatype[14].structtype[0].field[10].id[0]
+datatype[14].structtype[0].field[10].datatype 0
+datatype[14].structtype[0].field[11].name year
+datatype[14].structtype[0].field[11].id[0]
+datatype[14].structtype[0].field[11].datatype -1245117006
+datatype[14].structtype[0].field[12].name endyear
+datatype[14].structtype[0].field[12].id[0]
+datatype[14].structtype[0].field[12].datatype -1245117006
+datatype[14].structtype[0].field[13].name did
+datatype[14].structtype[0].field[13].id[0]
+datatype[14].structtype[0].field[13].datatype 0
+datatype[14].structtype[0].field[14].name cbid
+datatype[14].structtype[0].field[14].id[0]
+datatype[14].structtype[0].field[14].datatype 0
+datatype[14].structtype[0].field[15].name noupdate
+datatype[14].structtype[0].field[15].id[0]
+datatype[14].structtype[0].field[15].datatype 2
+datatype[14].structtype[0].field[16].name noupdate2
+datatype[14].structtype[0].field[16].id[0]
+datatype[14].structtype[0].field[16].datatype 2
+datatype[14].structtype[0].field[17].name multiposition2d_position
+datatype[14].structtype[0].field[17].id[0]
+datatype[14].structtype[0].field[17].datatype 58874399
+datatype[14].structtype[0].field[18].name extracategories
+datatype[14].structtype[0].field[18].id[0]
+datatype[14].structtype[0].field[18].datatype -1486737430
+datatype[14].structtype[0].field[19].name default_fieldlength
+datatype[14].structtype[0].field[19].id[0]
+datatype[14].structtype[0].field[19].datatype -1245117006
+datatype[14].structtype[0].field[20].name fmt_fieldlength
+datatype[14].structtype[0].field[20].id[0]
+datatype[14].structtype[0].field[20].datatype 0
+datatype[14].structtype[0].field[21].name categories_fieldlength
+datatype[14].structtype[0].field[21].id[0]
+datatype[14].structtype[0].field[21].datatype 0
+datatype[14].documenttype[0]
+datatype[15].id 1243829584
+datatype[15].arraytype[0]
+datatype[15].weightedsettype[0]
+datatype[15].structtype[1]
+datatype[15].structtype[0].name music_attribute.body
+datatype[15].structtype[0].version 0
+datatype[15].structtype[0].field[0]
+datatype[15].documenttype[0]
+datatype[16].id 1990571588
+datatype[16].arraytype[0]
+datatype[16].weightedsettype[0]
+datatype[16].structtype[0]
+datatype[16].documenttype[1]
+datatype[16].documenttype[0].name music_attribute
+datatype[16].documenttype[0].version 0
+datatype[16].documenttype[0].inherits[0]
+datatype[16].documenttype[0].headerstruct -1497398149
+datatype[16].documenttype[0].bodystruct 1243829584
+datatype[17].id -592896846
+datatype[17].arraytype[0]
+datatype[17].weightedsettype[0]
+datatype[17].structtype[1]
+datatype[17].structtype[0].name indexingdocument.header
+datatype[17].structtype[0].version 0
+datatype[17].structtype[0].field[3]
+datatype[17].structtype[0].field[0].name index
+datatype[17].structtype[0].field[0].id[0]
+datatype[17].structtype[0].field[0].datatype 8
+datatype[17].structtype[0].field[1].name summary
+datatype[17].structtype[0].field[1].id[0]
+datatype[17].structtype[0].field[1].datatype 8
+datatype[17].structtype[0].field[2].name attribute
+datatype[17].structtype[0].field[2].id[0]
+datatype[17].structtype[0].field[2].datatype 8
+datatype[17].documenttype[0]
+datatype[18].id -2093772985
+datatype[18].arraytype[0]
+datatype[18].weightedsettype[0]
+datatype[18].structtype[1]
+datatype[18].structtype[0].name indexingdocument.body
+datatype[18].structtype[0].version 0
+datatype[18].structtype[0].field[0]
+datatype[18].documenttype[0]
+datatype[19].id -1831281171
+datatype[19].arraytype[0]
+datatype[19].weightedsettype[0]
+datatype[19].structtype[0]
+datatype[19].documenttype[1]
+datatype[19].documenttype[0].name indexingdocument
+datatype[19].documenttype[0].version 0
+datatype[19].documenttype[0].inherits[0]
+datatype[19].documenttype[0].headerstruct -592896846
+datatype[19].documenttype[0].bodystruct -2093772985
+datatype[20].id -1623901061
+datatype[20].arraytype[0]
+datatype[20].weightedsettype[0]
+datatype[20].structtype[1]
+datatype[20].structtype[0].name "music_search.header"
+datatype[20].structtype[0].version 0
+datatype[20].structtype[0].field[41]
+datatype[20].structtype[0].field[0].name "sddocname"
+datatype[20].structtype[0].field[0].datatype 2
+datatype[20].structtype[0].field[0].id[0]
+datatype[20].structtype[0].field[1].name "sales"
+datatype[20].structtype[0].field[1].datatype 0
+datatype[20].structtype[0].field[1].id[0]
+datatype[20].structtype[0].field[2].name "pto"
+datatype[20].structtype[0].field[2].datatype 0
+datatype[20].structtype[0].field[2].id[0]
+datatype[20].structtype[0].field[3].name "keys"
+datatype[20].structtype[0].field[3].datatype 2
+datatype[20].structtype[0].field[3].id[0]
+datatype[20].structtype[0].field[4].name "mid"
+datatype[20].structtype[0].field[4].datatype 0
+datatype[20].structtype[0].field[4].id[0]
+datatype[20].structtype[0].field[5].name "ew"
+datatype[20].structtype[0].field[5].datatype 2
+datatype[20].structtype[0].field[5].id[0]
+datatype[20].structtype[0].field[6].name "weight"
+datatype[20].structtype[0].field[6].datatype 1
+datatype[20].structtype[0].field[6].id[0]
+datatype[20].structtype[0].field[7].name "fmt"
+datatype[20].structtype[0].field[7].datatype 2
+datatype[20].structtype[0].field[7].id[0]
+datatype[20].structtype[0].field[8].name "song"
+datatype[20].structtype[0].field[8].datatype 2
+datatype[20].structtype[0].field[8].id[0]
+datatype[20].structtype[0].field[9].name "bgnpfrom"
+datatype[20].structtype[0].field[9].datatype 1
+datatype[20].structtype[0].field[9].id[0]
+datatype[20].structtype[0].field[10].name "categories"
+datatype[20].structtype[0].field[10].datatype 2
+datatype[20].structtype[0].field[10].id[0]
+datatype[20].structtype[0].field[11].name "artist"
+datatype[20].structtype[0].field[11].datatype 2
+datatype[20].structtype[0].field[11].id[0]
+datatype[20].structtype[0].field[12].name "title"
+datatype[20].structtype[0].field[12].datatype 2
+datatype[20].structtype[0].field[12].id[0]
+datatype[20].structtype[0].field[13].name "newestedition"
+datatype[20].structtype[0].field[13].datatype 0
+datatype[20].structtype[0].field[13].id[0]
+datatype[20].structtype[0].field[14].name "year"
+datatype[20].structtype[0].field[14].datatype 0
+datatype[20].structtype[0].field[14].id[0]
+datatype[20].structtype[0].field[15].name "did"
+datatype[20].structtype[0].field[15].datatype 0
+datatype[20].structtype[0].field[15].id[0]
+datatype[20].structtype[0].field[16].name "cbid"
+datatype[20].structtype[0].field[16].datatype 0
+datatype[20].structtype[0].field[16].id[0]
+datatype[20].structtype[0].field[17].name "powermetalvalue"
+datatype[20].structtype[0].field[17].datatype 2
+datatype[20].structtype[0].field[17].id[0]
+datatype[20].structtype[0].field[18].name "progvalue"
+datatype[20].structtype[0].field[18].datatype 2
+datatype[20].structtype[0].field[18].id[0]
+datatype[20].structtype[0].field[19].name "metalvalue_arr"
+datatype[20].structtype[0].field[19].datatype -1486737430
+datatype[20].structtype[0].field[19].id[0]
+datatype[20].structtype[0].field[20].name "hiphopvalue_arr"
+datatype[20].structtype[0].field[20].datatype -1486737430
+datatype[20].structtype[0].field[20].id[0]
+datatype[20].structtype[0].field[21].name "bgndata"
+datatype[20].structtype[0].field[21].datatype 2
+datatype[20].structtype[0].field[21].id[0]
+datatype[20].structtype[0].field[22].name "surl"
+datatype[20].structtype[0].field[22].datatype 2
+datatype[20].structtype[0].field[22].id[0]
+datatype[20].structtype[0].field[23].name "userrate"
+datatype[20].structtype[0].field[23].datatype 0
+datatype[20].structtype[0].field[23].id[0]
+datatype[20].structtype[0].field[24].name "pid"
+datatype[20].structtype[0].field[24].datatype 2
+datatype[20].structtype[0].field[24].id[0]
+datatype[20].structtype[0].field[25].name "url"
+datatype[20].structtype[0].field[25].datatype 2
+datatype[20].structtype[0].field[25].id[0]
+datatype[20].structtype[0].field[26].name "isbn"
+datatype[20].structtype[0].field[26].datatype 2
+datatype[20].structtype[0].field[26].id[0]
+datatype[20].structtype[0].field[27].name "albumid"
+datatype[20].structtype[0].field[27].datatype 2
+datatype[20].structtype[0].field[27].id[0]
+datatype[20].structtype[0].field[28].name "disp_song"
+datatype[20].structtype[0].field[28].datatype 2
+datatype[20].structtype[0].field[28].id[0]
+datatype[20].structtype[0].field[29].name "pfrom"
+datatype[20].structtype[0].field[29].datatype 0
+datatype[20].structtype[0].field[29].id[0]
+datatype[20].structtype[0].field[30].name "data"
+datatype[20].structtype[0].field[30].datatype 2
+datatype[20].structtype[0].field[30].id[0]
+datatype[20].structtype[0].field[31].name "numreview"
+datatype[20].structtype[0].field[31].datatype 0
+datatype[20].structtype[0].field[31].id[0]
+datatype[20].structtype[0].field[32].name "bgnsellers"
+datatype[20].structtype[0].field[32].datatype 0
+datatype[20].structtype[0].field[32].id[0]
+datatype[20].structtype[0].field[33].name "image"
+datatype[20].structtype[0].field[33].datatype 2
+datatype[20].structtype[0].field[33].id[0]
+datatype[20].structtype[0].field[34].name "artistspid"
+datatype[20].structtype[0].field[34].datatype 2
+datatype[20].structtype[0].field[34].id[0]
+datatype[20].structtype[0].field[35].name "bgnpto"
+datatype[20].structtype[0].field[35].datatype 2
+datatype[20].structtype[0].field[35].id[0]
+datatype[20].structtype[0].field[36].name "scorekey"
+datatype[20].structtype[0].field[36].datatype 0
+datatype[20].structtype[0].field[36].id[0]
+datatype[20].structtype[0].field[37].name "metalvalue"
+datatype[20].structtype[0].field[37].datatype 2
+datatype[20].structtype[0].field[37].id[0]
+datatype[20].structtype[0].field[38].name "hiphopvalue"
+datatype[20].structtype[0].field[38].datatype 2
+datatype[20].structtype[0].field[38].id[0]
+datatype[20].structtype[0].field[39].name "rankfeatures"
+datatype[20].structtype[0].field[39].datatype 147991900
+datatype[20].structtype[0].field[39].id[0]
+datatype[20].structtype[0].field[40].name "summaryfeatures"
+datatype[20].structtype[0].field[40].datatype 147991900
+datatype[20].structtype[0].field[40].id[0]
+datatype[20].structtype[0].inherits[0]
+datatype[20].documenttype[0]
+datatype[20].annotationreftype[0]
+datatype[21].id -727249584
+datatype[21].arraytype[0]
+datatype[21].weightedsettype[0]
+datatype[21].structtype[1]
+datatype[21].structtype[0].name "music_search.body"
+datatype[21].structtype[0].version 0
+datatype[21].structtype[0].field[0]
+datatype[21].structtype[0].inherits[0]
+datatype[21].documenttype[0]
+datatype[21].annotationreftype[0]
+datatype[22].id 1722744388
+datatype[22].arraytype[0]
+datatype[22].weightedsettype[0]
+datatype[22].structtype[0]
+datatype[22].documenttype[1]
+datatype[22].documenttype[0].name "music_search"
+datatype[22].documenttype[0].version 0
+datatype[22].documenttype[0].headerstruct -1623901061
+datatype[22].documenttype[0].bodystruct -727249584
+datatype[22].documenttype[0].inherits[0]
+datatype[22].annotationreftype[0]
+datatype[23].id -1740240543
+datatype[23].arraytype[0]
+datatype[23].weightedsettype[0]
+datatype[23].structtype[1]
+datatype[23].structtype[0].name "search_feature"
+datatype[23].structtype[0].version 0
+datatype[23].structtype[0].field[2]
+datatype[23].structtype[0].field[0].name "name"
+datatype[23].structtype[0].field[0].datatype 2
+datatype[23].structtype[0].field[0].id[0]
+datatype[23].structtype[0].field[1].name "value"
+datatype[23].structtype[0].field[1].datatype 5
+datatype[23].structtype[0].field[1].id[0]
+datatype[23].structtype[0].inherits[0]
+datatype[23].documenttype[0]
+datatype[23].annotationreftype[0]
+datatype[24].id 147991900
+datatype[24].arraytype[1]
+datatype[24].arraytype[0].datatype -1740240543
+datatype[24].weightedsettype[0]
+datatype[24].structtype[0]
+datatype[24].documenttype[0]
+datatype[24].annotationreftype[0]
diff --git a/docprocs/src/test/cfg/documentmanager_inherit.cfg b/docprocs/src/test/cfg/documentmanager_inherit.cfg
new file mode 100644
index 00000000000..428c9049212
--- /dev/null
+++ b/docprocs/src/test/cfg/documentmanager_inherit.cfg
@@ -0,0 +1,216 @@
+datatype[19]
+datatype[0].id 2006483754
+datatype[0].arraytype[0]
+datatype[0].weightedsettype[0]
+datatype[0].structtype[1]
+datatype[0].structtype[0].name newssummary.header
+datatype[0].structtype[0].version 0
+datatype[0].structtype[0].field[4]
+datatype[0].structtype[0].field[0].name uri
+datatype[0].structtype[0].field[0].id[0]
+datatype[0].structtype[0].field[0].datatype 2
+datatype[0].structtype[0].field[1].name where
+datatype[0].structtype[0].field[1].id[0]
+datatype[0].structtype[0].field[1].datatype 2
+datatype[0].structtype[0].field[2].name title
+datatype[0].structtype[0].field[2].id[0]
+datatype[0].structtype[0].field[2].datatype 2
+datatype[0].structtype[0].field[3].name weight
+datatype[0].structtype[0].field[3].id[0]
+datatype[0].structtype[0].field[3].datatype 1
+datatype[0].documenttype[0]
+datatype[1].id -2059783233
+datatype[1].arraytype[0]
+datatype[1].weightedsettype[0]
+datatype[1].structtype[1]
+datatype[1].structtype[0].name newssummary.body
+datatype[1].structtype[0].version 0
+datatype[1].structtype[0].field[0]
+datatype[1].documenttype[0]
+datatype[2].id -756330891
+datatype[2].arraytype[0]
+datatype[2].weightedsettype[0]
+datatype[2].structtype[0]
+datatype[2].documenttype[1]
+datatype[2].documenttype[0].name newssummary
+datatype[2].documenttype[0].version 0
+datatype[2].documenttype[0].inherits[0]
+datatype[2].documenttype[0].headerstruct 2006483754
+datatype[2].documenttype[0].bodystruct -2059783233
+datatype[3].id 2010790819
+datatype[3].arraytype[0]
+datatype[3].weightedsettype[0]
+datatype[3].structtype[1]
+datatype[3].structtype[0].name newssummary_summary.header
+datatype[3].structtype[0].version 0
+datatype[3].structtype[0].field[6]
+datatype[3].structtype[0].field[0].name sddocname
+datatype[3].structtype[0].field[0].id[0]
+datatype[3].structtype[0].field[0].datatype 2
+datatype[3].structtype[0].field[1].name uri
+datatype[3].structtype[0].field[1].id[0]
+datatype[3].structtype[0].field[1].datatype 2
+datatype[3].structtype[0].field[2].name title
+datatype[3].structtype[0].field[2].id[0]
+datatype[3].structtype[0].field[2].datatype 2
+datatype[3].structtype[0].field[3].name weight
+datatype[3].structtype[0].field[3].id[0]
+datatype[3].structtype[0].field[3].datatype 1
+datatype[3].structtype[0].field[4].name ranklog
+datatype[3].structtype[0].field[4].id[0]
+datatype[3].structtype[0].field[4].datatype 2
+datatype[3].structtype[0].field[5].name documentid
+datatype[3].structtype[0].field[5].id[0]
+datatype[3].structtype[0].field[5].datatype 2
+datatype[3].documenttype[0]
+datatype[4].id 760329848
+datatype[4].arraytype[0]
+datatype[4].weightedsettype[0]
+datatype[4].structtype[1]
+datatype[4].structtype[0].name newssummary_summary.body
+datatype[4].structtype[0].version 0
+datatype[4].structtype[0].field[0]
+datatype[4].documenttype[0]
+datatype[5].id -1535558628
+datatype[5].arraytype[0]
+datatype[5].weightedsettype[0]
+datatype[5].structtype[0]
+datatype[5].documenttype[1]
+datatype[5].documenttype[0].name newssummary_summary
+datatype[5].documenttype[0].version 0
+datatype[5].documenttype[0].inherits[0]
+datatype[5].documenttype[0].headerstruct 2010790819
+datatype[5].documenttype[0].bodystruct 760329848
+datatype[6].id -1486737430
+datatype[6].arraytype[1]
+datatype[6].arraytype[0].datatype 2
+datatype[6].weightedsettype[0]
+datatype[6].structtype[0]
+datatype[6].documenttype[0]
+datatype[7].id -296931593
+datatype[7].arraytype[0]
+datatype[7].weightedsettype[0]
+datatype[7].structtype[1]
+datatype[7].structtype[0].name newssummary_index.header
+datatype[7].structtype[0].version 0
+datatype[7].structtype[0].field[2]
+datatype[7].structtype[0].field[0].name sddocname
+datatype[7].structtype[0].field[0].id[0]
+datatype[7].structtype[0].field[0].datatype -1486737430
+datatype[7].structtype[0].field[1].name title
+datatype[7].structtype[0].field[1].id[0]
+datatype[7].structtype[0].field[1].datatype -1486737430
+datatype[7].documenttype[0]
+datatype[8].id -2066649396
+datatype[8].arraytype[0]
+datatype[8].weightedsettype[0]
+datatype[8].structtype[1]
+datatype[8].structtype[0].name newssummary_index.body
+datatype[8].structtype[0].version 0
+datatype[8].structtype[0].field[0]
+datatype[8].documenttype[0]
+datatype[9].id 1957994312
+datatype[9].arraytype[0]
+datatype[9].weightedsettype[0]
+datatype[9].structtype[0]
+datatype[9].documenttype[1]
+datatype[9].documenttype[0].name newssummary_index
+datatype[9].documenttype[0].version 0
+datatype[9].documenttype[0].inherits[0]
+datatype[9].documenttype[0].headerstruct -296931593
+datatype[9].documenttype[0].bodystruct -2066649396
+datatype[10].id -1089205651
+datatype[10].arraytype[0]
+datatype[10].weightedsettype[0]
+datatype[10].structtype[1]
+datatype[10].structtype[0].name newssummary_attribute.header
+datatype[10].structtype[0].version 0
+datatype[10].structtype[0].field[1]
+datatype[10].structtype[0].field[0].name weight
+datatype[10].structtype[0].field[0].id[0]
+datatype[10].structtype[0].field[0].datatype 1
+datatype[10].documenttype[0]
+datatype[11].id 761573314
+datatype[11].arraytype[0]
+datatype[11].weightedsettype[0]
+datatype[11].structtype[1]
+datatype[11].structtype[0].name newssummary_attribute.body
+datatype[11].structtype[0].version 0
+datatype[11].structtype[0].field[0]
+datatype[11].documenttype[0]
+datatype[12].id -1613882222
+datatype[12].arraytype[0]
+datatype[12].weightedsettype[0]
+datatype[12].structtype[0]
+datatype[12].documenttype[1]
+datatype[12].documenttype[0].name newssummary_attribute
+datatype[12].documenttype[0].version 0
+datatype[12].documenttype[0].inherits[0]
+datatype[12].documenttype[0].headerstruct -1089205651
+datatype[12].documenttype[0].bodystruct 761573314
+datatype[13].id 2098419674
+datatype[13].arraytype[0]
+datatype[13].weightedsettype[0]
+datatype[13].structtype[1]
+datatype[13].structtype[0].name newsarticle.header
+datatype[13].structtype[0].version 0
+datatype[13].structtype[0].field[1]
+datatype[13].structtype[0].field[0].name city
+datatype[13].structtype[0].field[0].id[0]
+datatype[13].structtype[0].field[0].datatype 2
+datatype[13].documenttype[0]
+datatype[14].id 197293167
+datatype[14].arraytype[0]
+datatype[14].weightedsettype[0]
+datatype[14].structtype[1]
+datatype[14].structtype[0].name newsarticle.body
+datatype[14].structtype[0].version 0
+datatype[14].structtype[0].field[0]
+datatype[14].documenttype[0]
+datatype[15].id -1710661691
+datatype[15].arraytype[0]
+datatype[15].weightedsettype[0]
+datatype[15].structtype[0]
+datatype[15].documenttype[1]
+datatype[15].documenttype[0].name newsarticle
+datatype[15].documenttype[0].version 0
+datatype[15].documenttype[0].inherits[1]
+datatype[15].documenttype[0].inherits[0].name newssummary
+datatype[15].documenttype[0].inherits[0].version 0
+datatype[15].documenttype[0].headerstruct 2098419674
+datatype[15].documenttype[0].bodystruct 197293167
+datatype[16].id -592896846
+datatype[16].arraytype[0]
+datatype[16].weightedsettype[0]
+datatype[16].structtype[1]
+datatype[16].structtype[0].name indexingdocument.header
+datatype[16].structtype[0].version 0
+datatype[16].structtype[0].field[3]
+datatype[16].structtype[0].field[0].name index
+datatype[16].structtype[0].field[0].id[0]
+datatype[16].structtype[0].field[0].datatype 8
+datatype[16].structtype[0].field[1].name summary
+datatype[16].structtype[0].field[1].id[0]
+datatype[16].structtype[0].field[1].datatype 8
+datatype[16].structtype[0].field[2].name attribute
+datatype[16].structtype[0].field[2].id[0]
+datatype[16].structtype[0].field[2].datatype 8
+datatype[16].documenttype[0]
+datatype[17].id -2093772985
+datatype[17].arraytype[0]
+datatype[17].weightedsettype[0]
+datatype[17].structtype[1]
+datatype[17].structtype[0].name indexingdocument.body
+datatype[17].structtype[0].version 0
+datatype[17].structtype[0].field[0]
+datatype[17].documenttype[0]
+datatype[18].id -1831281171
+datatype[18].arraytype[0]
+datatype[18].weightedsettype[0]
+datatype[18].structtype[0]
+datatype[18].documenttype[1]
+datatype[18].documenttype[0].name indexingdocument
+datatype[18].documenttype[0].version 0
+datatype[18].documenttype[0].inherits[0]
+datatype[18].documenttype[0].headerstruct -592896846
+datatype[18].documenttype[0].bodystruct -2093772985
diff --git a/docprocs/src/test/cfg/ilscripts.cfg b/docprocs/src/test/cfg/ilscripts.cfg
new file mode 100644
index 00000000000..550cbed2c26
--- /dev/null
+++ b/docprocs/src/test/cfg/ilscripts.cfg
@@ -0,0 +1,12 @@
+ilscript[1]
+ilscript[0].doctype "music"
+ilscript[0].docfield[4]
+ilscript[0].docfield[0] "artist"
+ilscript[0].docfield[1] "title"
+ilscript[0].docfield[2] "isbn"
+ilscript[0].docfield[3] "song"
+ilscript[0].content[4]
+ilscript[0].content[0] "input artist | attribute title"
+ilscript[0].content[1] "input title | attribute artist"
+ilscript[0].content[2] "input isbn | passthrough isbn"
+ilscript[0].content[3] "input isbn | attribute song"
diff --git a/docprocs/src/test/cfg/indexingdocument.cfg b/docprocs/src/test/cfg/indexingdocument.cfg
new file mode 100644
index 00000000000..ac92b0a40be
--- /dev/null
+++ b/docprocs/src/test/cfg/indexingdocument.cfg
@@ -0,0 +1,4 @@
+indexingdoc[1]
+indexingdoc[0].name music
+indexingdoc[0].source[1]
+indexingdoc[0].source[0] music
diff --git a/docprocs/src/test/cfg/specialtokens.cfg b/docprocs/src/test/cfg/specialtokens.cfg
new file mode 100644
index 00000000000..3b4595a463f
--- /dev/null
+++ b/docprocs/src/test/cfg/specialtokens.cfg
@@ -0,0 +1,10 @@
+tokenlist[1]
+tokenlist[0].name default
+tokenlist[0].tokens[7]
+tokenlist[0].tokens[0].token c++
+tokenlist[0].tokens[1].token .net
+tokenlist[0].tokens[2].token c#
+tokenlist[0].tokens[3].token i/o
+tokenlist[0].tokens[4].token r&D
+tokenlist[0].tokens[5].token dvd±r
+tokenlist[0].tokens[6].token -索
diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/DocumentScriptTestCase.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/DocumentScriptTestCase.java
new file mode 100644
index 00000000000..bd643cbb322
--- /dev/null
+++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/DocumentScriptTestCase.java
@@ -0,0 +1,281 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.docprocs.indexing;
+
+import com.yahoo.document.DataType;
+import com.yahoo.document.Document;
+import com.yahoo.document.DocumentType;
+import com.yahoo.document.DocumentUpdate;
+import com.yahoo.document.Field;
+import com.yahoo.document.StructDataType;
+import com.yahoo.document.annotation.SpanTree;
+import com.yahoo.document.annotation.SpanTrees;
+import com.yahoo.document.datatypes.Array;
+import com.yahoo.document.datatypes.FieldValue;
+import com.yahoo.document.datatypes.MapFieldValue;
+import com.yahoo.document.datatypes.StringFieldValue;
+import com.yahoo.document.datatypes.Struct;
+import com.yahoo.document.datatypes.WeightedSet;
+import com.yahoo.document.fieldpathupdate.AssignFieldPathUpdate;
+import com.yahoo.document.update.FieldUpdate;
+import com.yahoo.document.update.MapValueUpdate;
+import com.yahoo.document.update.ValueUpdate;
+import com.yahoo.vespa.indexinglanguage.AdapterFactory;
+import com.yahoo.vespa.indexinglanguage.SimpleAdapterFactory;
+import com.yahoo.vespa.indexinglanguage.expressions.Expression;
+import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression;
+import com.yahoo.vespa.indexinglanguage.expressions.InputExpression;
+import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression;
+import com.yahoo.vespa.indexinglanguage.parser.ParseException;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+@SuppressWarnings("unchecked")
+public class DocumentScriptTestCase {
+
+ private static final AdapterFactory ADAPTER_FACTORY = new SimpleAdapterFactory();
+
+ @Test
+ public void requireThatDocumentWithExtraFieldsThrow() throws ParseException {
+ assertFail("Field 'extraField' is not part of the declared document type 'documentType'.",
+ newDocument(new StringFieldValue("foo"), new StringFieldValue("bar")));
+ assertFail("Field 'extraField' is not part of the declared document type 'documentType'.",
+ newDocument(null, new StringFieldValue("bar")));
+ }
+
+ @Test
+ public void requireThatFieldUpdateToExtraFieldsThrow() throws ParseException {
+ assertFail("Field 'extraField' is not part of the declared document type 'documentType'.",
+ newFieldUpdate(new StringFieldValue("foo"), new StringFieldValue("bar")));
+ assertFail("Field 'extraField' is not part of the declared document type 'documentType'.",
+ newFieldUpdate(null, new StringFieldValue("bar")));
+ }
+
+ @Test
+ public void requireThatPathUpdateToExtraFieldsThrow() throws ParseException {
+ assertFail("Field 'extraField' is not part of the declared document type 'documentType'.",
+ newPathUpdate(new StringFieldValue("foo"), new StringFieldValue("bar")));
+ assertFail("Field 'extraField' is not part of the declared document type 'documentType'.",
+ newPathUpdate(null, new StringFieldValue("bar")));
+ }
+
+ @Test
+ public void requireThatLinguisticsSpanTreeIsRemovedFromStringFields() {
+ StringFieldValue in = newString(SpanTrees.LINGUISTICS, "mySpanTree");
+ StringFieldValue out = (StringFieldValue)processDocument(in);
+ assertSpanTrees(out, "mySpanTree");
+
+ out = (StringFieldValue)processFieldUpdate(in).getValue();
+ assertSpanTrees(out, "mySpanTree");
+
+ out = (StringFieldValue)processPathUpdate(in).getValue();
+ assertSpanTrees(out, "mySpanTree");
+ }
+
+ @Test
+ public void requireThatLinguisticsSpanTreeIsRemovedFromArrayStringFields() {
+ Array<StringFieldValue> in = new Array<>(DataType.getArray(DataType.STRING));
+ in.add(newString(SpanTrees.LINGUISTICS, "mySpanTree"));
+
+ Array<StringFieldValue> out = (Array<StringFieldValue>)processDocument(in);
+ assertEquals(1, out.size());
+ assertSpanTrees(out.get(0), "mySpanTree");
+
+ out = (Array<StringFieldValue>)processFieldUpdate(in).getValue();
+ assertEquals(1, out.size());
+ assertSpanTrees(out.get(0), "mySpanTree");
+
+ out = (Array<StringFieldValue>)processPathUpdate(in).getValue();
+ assertEquals(1, out.size());
+ assertSpanTrees(out.get(0), "mySpanTree");
+ }
+
+ @Test
+ public void requireThatLinguisticsSpanTreeIsRemovedFromWsetStringFields() {
+ WeightedSet<StringFieldValue> in = new WeightedSet<>(DataType.getWeightedSet(DataType.STRING));
+ in.put(newString(SpanTrees.LINGUISTICS, "mySpanTree"), 69);
+
+ WeightedSet<StringFieldValue> out = (WeightedSet<StringFieldValue>)processDocument(in);
+ assertEquals(1, out.size());
+ assertSpanTrees(out.keySet().iterator().next(), "mySpanTree");
+
+ out = (WeightedSet<StringFieldValue>)processFieldUpdate(in).getValue();
+ assertEquals(1, out.size());
+ assertSpanTrees(out.keySet().iterator().next(), "mySpanTree");
+
+ out = (WeightedSet<StringFieldValue>)processPathUpdate(in).getValue();
+ assertEquals(1, out.size());
+ assertSpanTrees(out.keySet().iterator().next(), "mySpanTree");
+ }
+
+ @Test
+ public void requireThatLinguisticsSpanTreeIsRemovedFromMapStringStringFields() {
+ MapFieldValue<StringFieldValue, StringFieldValue> in =
+ new MapFieldValue<>(DataType.getMap(DataType.STRING, DataType.STRING));
+ in.put(newString(SpanTrees.LINGUISTICS, "myKeySpanTree"),
+ newString(SpanTrees.LINGUISTICS, "myValueSpanTree"));
+
+ MapFieldValue<StringFieldValue, StringFieldValue> out;
+ out = (MapFieldValue<StringFieldValue, StringFieldValue>)processDocument(in);
+ assertEquals(1, out.size());
+ assertSpanTrees(out.keySet().iterator().next(), "myKeySpanTree");
+ assertSpanTrees(out.values().iterator().next(), "myValueSpanTree");
+
+ out = (MapFieldValue<StringFieldValue, StringFieldValue>)processFieldUpdate(in).getValue();
+ assertEquals(1, out.size());
+ assertSpanTrees(out.keySet().iterator().next(), "myKeySpanTree");
+ assertSpanTrees(out.values().iterator().next(), "myValueSpanTree");
+
+ out = (MapFieldValue<StringFieldValue, StringFieldValue>)processPathUpdate(in).getValue();
+ assertEquals(1, out.size());
+ assertSpanTrees(out.keySet().iterator().next(), "myKeySpanTree");
+ assertSpanTrees(out.values().iterator().next(), "myValueSpanTree");
+ }
+
+ @Test
+ public void requireThatLinguisticsSpanTreeIsRemovedFromStructStringFields() {
+ StructDataType structType = new StructDataType("myStruct");
+ structType.addField(new Field("myString", DataType.STRING));
+ Struct in = new Struct(structType);
+ in.setFieldValue("myString", newString(SpanTrees.LINGUISTICS, "mySpanTree"));
+
+ Struct out = (Struct)processDocument(in);
+ assertSpanTrees(out.getFieldValue("myString"), "mySpanTree");
+
+ StringFieldValue str = (StringFieldValue)((MapValueUpdate)processFieldUpdate(in)).getUpdate().getValue();
+ assertSpanTrees(str, "mySpanTree");
+
+ str = (StringFieldValue)((MapValueUpdate)processFieldUpdate(in)).getUpdate().getValue();
+ assertSpanTrees(str, "mySpanTree");
+ }
+
+ private static FieldValue processDocument(FieldValue fieldValue) {
+ DocumentType docType = new DocumentType("myDocumentType");
+ docType.addField("myField", fieldValue.getDataType());
+ Document doc = new Document(docType, "doc:scheme:");
+ doc.setFieldValue("myField", fieldValue.clone());
+ doc = newScript(docType).execute(ADAPTER_FACTORY, doc);
+ return doc.getFieldValue("myField");
+ }
+
+ private static ValueUpdate<?> processFieldUpdate(FieldValue fieldValue) {
+ DocumentType docType = new DocumentType("myDocumentType");
+ docType.addField("myField", fieldValue.getDataType());
+ DocumentUpdate update = new DocumentUpdate(docType, "doc:scheme:");
+ update.addFieldUpdate(FieldUpdate.createAssign(docType.getField("myField"), fieldValue));
+ update = newScript(docType).execute(ADAPTER_FACTORY, update);
+ return update.getFieldUpdate("myField").getValueUpdate(0);
+ }
+
+ private static ValueUpdate<?> processPathUpdate(FieldValue fieldValue) {
+ DocumentType docType = new DocumentType("myDocumentType");
+ docType.addField("myField", fieldValue.getDataType());
+ DocumentUpdate update = new DocumentUpdate(docType, "doc:scheme:");
+ update.addFieldPathUpdate(new AssignFieldPathUpdate(docType, "myField", fieldValue));
+ update = newScript(docType).execute(ADAPTER_FACTORY, update);
+ return update.getFieldUpdate("myField").getValueUpdate(0);
+ }
+
+ private static DocumentScript newScript(DocumentType docType) {
+ String fieldName = docType.getFields().iterator().next().getName();
+ return new DocumentScript(docType.getName(), Arrays.asList(fieldName),
+ new StatementExpression(new InputExpression(fieldName),
+ new IndexExpression(fieldName)));
+ }
+
+ private static StringFieldValue newString(String... spanTrees) {
+ StringFieldValue ret = new StringFieldValue("foo");
+ for (String spanTree : spanTrees) {
+ ret.setSpanTree(new SpanTree(spanTree));
+ }
+ return ret;
+ }
+
+ private static void assertSpanTrees(FieldValue actual, String... expectedSpanTrees) {
+ assertTrue(actual instanceof StringFieldValue);
+ StringFieldValue str = (StringFieldValue)actual;
+ assertEquals(new ArrayList<>(Arrays.asList(expectedSpanTrees)),
+ new ArrayList<>(str.getSpanTreeMap().keySet()));
+ }
+
+ private static DocumentType newDocumentType() {
+ DocumentType type = new DocumentType("documentType");
+ type.addField("documentField", DataType.STRING);
+ type.addField("extraField", DataType.STRING);
+ return type;
+ }
+
+ private static Document newDocument(FieldValue documentFieldValue, FieldValue extraFieldValue) {
+ Document document = new Document(newDocumentType(), "doc:scheme:");
+ if (documentFieldValue != null) {
+ document.setFieldValue("documentField", documentFieldValue);
+ }
+ if (extraFieldValue != null) {
+ document.setFieldValue("extraField", extraFieldValue);
+ }
+ return document;
+ }
+
+ private static DocumentUpdate newFieldUpdate(FieldValue documentFieldValue, FieldValue extraFieldValue) {
+ DocumentType type = newDocumentType();
+ DocumentUpdate update = new DocumentUpdate(type, "doc:scheme:");
+ if (documentFieldValue != null) {
+ update.addFieldUpdate(FieldUpdate.createAssign(type.getField("documentField"), documentFieldValue));
+ }
+ if (extraFieldValue != null) {
+ update.addFieldUpdate(FieldUpdate.createAssign(type.getField("extraField"), extraFieldValue));
+ }
+ return update;
+ }
+
+ private static DocumentUpdate newPathUpdate(FieldValue documentFieldValue, FieldValue extraFieldValue) {
+ DocumentType type = newDocumentType();
+ DocumentUpdate update = new DocumentUpdate(type, "doc:scheme:");
+ if (documentFieldValue != null) {
+ update.addFieldPathUpdate(new AssignFieldPathUpdate(type, "documentField", documentFieldValue));
+ }
+ if (extraFieldValue != null) {
+ update.addFieldPathUpdate(new AssignFieldPathUpdate(type, "extraField", extraFieldValue));
+ }
+ return update;
+ }
+
+ private static void assertFail(String expectedException, Document document) throws ParseException {
+ try {
+ execute(document);
+ fail();
+ } catch (IllegalArgumentException e) {
+ assertEquals(expectedException, e.getMessage());
+ }
+ }
+
+ private static void assertFail(String expectedException, DocumentUpdate update) throws ParseException {
+ try {
+ execute(update);
+ fail();
+ } catch (IllegalArgumentException e) {
+ assertEquals(expectedException, e.getMessage());
+ }
+ }
+
+ private static Document execute(Document document) throws ParseException {
+ return newScript().execute(new SimpleAdapterFactory(), document);
+ }
+
+ private static DocumentUpdate execute(DocumentUpdate update) throws ParseException {
+ return newScript().execute(new SimpleAdapterFactory(), update);
+ }
+
+ private static DocumentScript newScript() throws ParseException {
+ return new DocumentScript("documentType", Arrays.asList("documentField"),
+ Expression.fromString("input documentField | index documentField"));
+ }
+}
diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java
new file mode 100644
index 00000000000..9f9d462a7b1
--- /dev/null
+++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.docprocs.indexing;
+
+import com.yahoo.config.subscription.ConfigGetter;
+import com.yahoo.docproc.Processing;
+import com.yahoo.document.Document;
+import com.yahoo.document.DocumentPut;
+import com.yahoo.document.DocumentOperation;
+import com.yahoo.document.DocumentType;
+import com.yahoo.document.DocumentUpdate;
+import com.yahoo.document.config.DocumentmanagerConfig;
+import com.yahoo.document.datatypes.StringFieldValue;
+import com.yahoo.document.update.AssignValueUpdate;
+import com.yahoo.document.update.FieldUpdate;
+import com.yahoo.document.update.ValueUpdate;
+import com.yahoo.language.Linguistics;
+import com.yahoo.language.simple.SimpleLinguistics;
+import com.yahoo.vespa.configdefinition.IlscriptsConfig;
+import org.junit.Test;
+
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class IndexingProcessorTestCase {
+
+ private static final String CONFIG_ID = "dir:src/test/cfg";
+ private IndexingProcessor indexer = newProcessor(CONFIG_ID);
+
+ @Test
+ public void requireThatIndexerProcessesDocuments() {
+ Document input = new Document(indexer.getDocumentTypeManager().getDocumentType("music"), "doc:scheme:");
+ input.setFieldValue("artist", new StringFieldValue("69"));
+ DocumentOperation op = process(new DocumentPut(input));
+ assertTrue(op instanceof DocumentPut);
+
+ Document output = ((DocumentPut)op).getDocument();
+ assertEquals(new StringFieldValue("69"), output.getFieldValue("title"));
+ assertEquals("music", output.getDataType().getName());
+ }
+
+ @Test
+ public void requireThatIndexerForwardsDocumentsOfUnknownType() {
+ Document input = new Document(new DocumentType("unknown"), "doc:scheme:");
+ DocumentOperation output = process(new DocumentPut(input));
+ assertTrue(output instanceof DocumentPut);
+ assertSame(input, ((DocumentPut)output).getDocument());
+ }
+
+ @Test
+ public void requireThatIndexerProcessesUpdates() {
+ DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music");
+ DocumentUpdate input = new DocumentUpdate(inputType, "doc:scheme:");
+ input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("isbn"), new StringFieldValue("isbnmarker")));
+ input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("artist"), new StringFieldValue("69")));
+ DocumentOperation output = process(input);
+
+ assertTrue(output instanceof DocumentUpdate);
+ DocumentUpdate docUpdate = (DocumentUpdate) output;
+
+ assertEquals(3, docUpdate.getFieldUpdates().size());
+ {
+ FieldUpdate fieldUpdate = docUpdate.getFieldUpdate(0);
+ assertEquals("song", fieldUpdate.getField().getName());
+ assertEquals(1, fieldUpdate.getValueUpdates().size());
+ ValueUpdate<?> valueUpdate = fieldUpdate.getValueUpdate(0);
+ assertTrue(valueUpdate instanceof AssignValueUpdate);
+ assertEquals(new StringFieldValue("isbnmarker"), valueUpdate.getValue());
+ fieldUpdate = docUpdate.getFieldUpdate(1);
+ assertEquals("title", fieldUpdate.getField().getName());
+ assertEquals(1, fieldUpdate.getValueUpdates().size());
+ valueUpdate = fieldUpdate.getValueUpdate(0);
+ assertTrue(valueUpdate instanceof AssignValueUpdate);
+ assertEquals(new StringFieldValue("69"), valueUpdate.getValue());
+ }
+
+ {
+ FieldUpdate fieldUpdate = docUpdate.getFieldUpdate(1);
+ ValueUpdate<?> valueUpdate = fieldUpdate.getValueUpdate(0);
+ assertEquals("title", fieldUpdate.getField().getName());
+ assertTrue(valueUpdate instanceof AssignValueUpdate);
+ assertEquals(new StringFieldValue("69"), valueUpdate.getValue());
+ }
+ {
+ FieldUpdate fieldUpdate = docUpdate.getFieldUpdate(2);
+ ValueUpdate<?> valueUpdate = fieldUpdate.getValueUpdate(0);
+ assertEquals("isbn", fieldUpdate.getField().getName());
+ assertTrue(valueUpdate instanceof AssignValueUpdate);
+ assertEquals(new StringFieldValue("isbnmarker"), valueUpdate.getValue());
+ }
+
+ }
+
+ @Test
+ public void requireThatEmptyDocumentUpdateOutputDoesNotThrow() {
+ DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music");
+ DocumentUpdate input = new DocumentUpdate(inputType, "doc:scheme:");
+ Processing proc = new Processing();
+ proc.getDocumentOperations().add(input);
+ indexer.process(proc);
+ assertEquals(0, proc.getDocumentOperations().size());
+ }
+
+ @Test
+ public void requireThatIndexerForwardsUpdatesOfUnknownType() {
+ DocumentUpdate input = new DocumentUpdate(new DocumentType("unknown"), "doc:scheme:");
+ DocumentOperation output = process(input);
+ assertSame(input, output);
+ }
+
+ private DocumentOperation process(DocumentOperation input) {
+ Processing proc = new Processing();
+ proc.getDocumentOperations().add(input);
+ indexer.process(proc);
+
+ List<DocumentOperation> lst = proc.getDocumentOperations();
+ assertEquals(1, lst.size());
+ return lst.get(0);
+ }
+
+ private static IndexingProcessor newProcessor(String configId) {
+ return new IndexingProcessor(ConfigGetter.getConfig(DocumentmanagerConfig.class, configId),
+ ConfigGetter.getConfig(IlscriptsConfig.class, configId),
+ new SimpleLinguistics());
+ }
+}
diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java
new file mode 100644
index 00000000000..4e4ec5ab151
--- /dev/null
+++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.docprocs.indexing;
+
+import com.yahoo.document.DocumentType;
+import com.yahoo.document.DocumentTypeManager;
+import com.yahoo.vespa.configdefinition.IlscriptsConfig;
+import com.yahoo.vespa.indexinglanguage.parser.ParseException;
+import org.junit.Test;
+
+import java.util.Iterator;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class ScriptManagerTestCase {
+
+ @Test
+ public void requireThatScriptsAreAppliedToSubType() throws ParseException {
+ DocumentTypeManager typeMgr = new DocumentTypeManager();
+ typeMgr.configure("file:src/test/cfg/documentmanager_inherit.cfg");
+ DocumentType docType = typeMgr.getDocumentType("newssummary");
+ assertNotNull(docType);
+
+
+ IlscriptsConfig.Builder config = new IlscriptsConfig.Builder();
+ config.ilscript(new IlscriptsConfig.Ilscript.Builder().doctype("newssummary")
+ .content("index"));
+ ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null);
+ assertNotNull(scriptMgr.getScript(typeMgr.getDocumentType("newsarticle")));
+ assertNull(scriptMgr.getScript(new DocumentType("unknown")));
+ }
+
+ @Test
+ public void requireThatScriptsAreAppliedToSuperType() throws ParseException {
+ DocumentTypeManager typeMgr = new DocumentTypeManager();
+ typeMgr.configure("file:src/test/cfg/documentmanager_inherit.cfg");
+ DocumentType docType = typeMgr.getDocumentType("newsarticle");
+ assertNotNull(docType);
+
+ IlscriptsConfig.Builder config = new IlscriptsConfig.Builder();
+ config.ilscript(new IlscriptsConfig.Ilscript.Builder().doctype("newsarticle")
+ .content("index"));
+ ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null);
+ assertNotNull(scriptMgr.getScript(typeMgr.getDocumentType("newssummary")));
+ assertNull(scriptMgr.getScript(new DocumentType("unknown")));
+ }
+
+ @Test
+ public void requireThatEmptyConfigurationDoesNotThrow() {
+ DocumentTypeManager typeMgr = new DocumentTypeManager();
+ typeMgr.configure("file:src/test/cfg/documentmanager_inherit.cfg");
+ ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(new IlscriptsConfig.Builder()), null);
+ assertNull(scriptMgr.getScript(new DocumentType("unknown")));
+ }
+
+ @Test
+ public void requireThatUnknownDocumentTypeReturnsNull() {
+ DocumentTypeManager typeMgr = new DocumentTypeManager();
+ typeMgr.configure("file:src/test/cfg/documentmanager_inherit.cfg");
+ ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(new IlscriptsConfig.Builder()), null);
+ for (Iterator<DocumentType> it = typeMgr.documentTypeIterator(); it.hasNext(); ) {
+ assertNull(scriptMgr.getScript(it.next()));
+ }
+ assertNull(scriptMgr.getScript(new DocumentType("unknown")));
+ }
+}