From cce300207e8ff854debcdd0a9920d8e71bacc6a1 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Mon, 23 Jan 2023 15:25:03 +0100 Subject: Skip statements on partial updates only --- .../com/yahoo/schema/derived/IndexingScript.java | 1 - .../yahoo/docprocs/indexing/DocumentScript.java | 5 +- .../yahoo/docprocs/indexing/IndexingProcessor.java | 23 +---- docprocs/src/test/cfg/ilscripts.cfg | 12 +-- .../indexing/IndexingProcessorTestCase.java | 104 ++++++++++++++++----- .../src/main/java/com/yahoo/document/Document.java | 26 +++--- .../vespa/indexinglanguage/DocumentAdapter.java | 3 + .../vespa/indexinglanguage/UpdateAdapter.java | 3 + .../expressions/ExecutionContext.java | 12 ++- .../expressions/FieldValueAdapter.java | 3 + .../expressions/ScriptExpression.java | 4 +- .../vespa/indexinglanguage/SimpleTestAdapter.java | 5 + 12 files changed, 131 insertions(+), 70 deletions(-) diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java index a35bc735608..32add37a546 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java @@ -24,7 +24,6 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; /** * An indexing language script derived from a search definition. An indexing script contains a set of indexing diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java index 8968c87694f..e690ca1dc64 100644 --- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java +++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java @@ -41,9 +41,10 @@ public class DocumentScript { } public Expression getExpression() { return expression; } + public Document execute(AdapterFactory adapterFactory, Document document) { - for (Iterator> it = document.iterator(); it.hasNext(); ) { - Map.Entry entry = it.next(); + for (var i = document.iterator(); i.hasNext(); ) { + Map.Entry entry = i.next(); requireThatFieldIsDeclaredInDocument(entry.getKey()); removeAnyLinguisticsSpanTree(entry.getValue()); } diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java index 657c8a161f7..de4fee2ed68 100644 --- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java +++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java @@ -29,10 +29,8 @@ import com.yahoo.vespa.indexinglanguage.SimpleAdapterFactory; import com.yahoo.vespa.indexinglanguage.expressions.Expression; import java.util.Map; -import java.util.logging.Level; import java.util.stream.Collectors; - /** * @author Simon Thoresen Hult */ @@ -45,7 +43,6 @@ public class IndexingProcessor extends DocumentProcessor { public final static String INDEXING_START = "indexingStart"; public final static String INDEXING_END = "indexingEnd"; - private final static FastLogger log = FastLogger.getLogger(IndexingProcessor.class.getName()); private final DocumentTypeManager docTypeMgr; private final ScriptManager scriptMgr; private final AdapterFactory adapterFactory; @@ -69,9 +66,8 @@ public class IndexingProcessor extends DocumentProcessor { @Override public Progress process(Processing proc) { - if (proc.getDocumentOperations().isEmpty()) { - return Progress.DONE; - } + if (proc.getDocumentOperations().isEmpty()) return Progress.DONE; + List out = new ArrayList<>(proc.getDocumentOperations().size()); for (DocumentOperation documentOperation : proc.getDocumentOperations()) { if (documentOperation instanceof DocumentPut) { @@ -99,11 +95,9 @@ public class IndexingProcessor extends DocumentProcessor { DocumentType hadType = input.getDocument().getDataType(); DocumentScript script = scriptMgr.getScript(hadType); if (script == null) { - log.log(Level.FINE, "No indexing script for document '%s'.", input.getId()); out.add(input); return; } - log.log(Level.FINE, "Processing document '%s'.", input.getId()); DocumentType wantType = docTypeMgr.getDocumentType(hadType.getName()); Document inputDocument = input.getDocument(); if (hadType != wantType) { @@ -117,10 +111,7 @@ public class IndexingProcessor extends DocumentProcessor { inputDocument = docTypeMgr.createDocument(buffer); } Document output = script.execute(adapterFactory, inputDocument); - if (output == null) { - log.log(Level.FINE, "Document '%s' produced no output.", input.getId()); - return; - } + if (output == null) return; out.add(new DocumentPut(input, output)); } @@ -128,22 +119,16 @@ public class IndexingProcessor extends DocumentProcessor { private void processUpdate(DocumentUpdate input, List out) { DocumentScript script = scriptMgr.getScript(input.getType()); if (script == null) { - log.log(Level.FINE, "No indexing script for update '%s'.", input.getId()); out.add(input); return; } - log.log(Level.FINE, "Processing update '%s'.", input.getId()); DocumentUpdate output = script.execute(adapterFactory, input); - if (output == null) { - log.log(Level.FINE, "Update '%s' produced no output.", input.getId()); - return; - } + if (output == null) return; output.setCondition(input.getCondition()); out.add(output); } private void processRemove(DocumentRemove input, List out) { - log.log(Level.FINE, "Not processing remove '%s'.", input.getId()); out.add(input); } diff --git a/docprocs/src/test/cfg/ilscripts.cfg b/docprocs/src/test/cfg/ilscripts.cfg index cab3ee0aa0a..ce5e209458d 100644 --- a/docprocs/src/test/cfg/ilscripts.cfg +++ b/docprocs/src/test/cfg/ilscripts.cfg @@ -1,15 +1,13 @@ ilscript[1] ilscript[0].doctype "music" -ilscript[0].docfield[4] +ilscript[0].docfield[3] ilscript[0].docfield[0] "artist" ilscript[0].docfield[1] "title" -ilscript[0].docfield[2] "isbn" -ilscript[0].docfield[3] "song" +ilscript[0].docfield[2] "song" ilscript[0].content[5] -ilscript[0].content[0] "input artist | attribute title" -ilscript[0].content[1] "input title | attribute artist" -ilscript[0].content[2] "input isbn | passthrough isbn" -ilscript[0].content[3] "input isbn | attribute song" +ilscript[0].content[0] "input artist | attribute artist" +ilscript[0].content[1] "input title | attribute title" +ilscript[0].content[2] "input song | attribute song" ilscript[0].content[4] "input artist . " ". input title | index combined" ilscript[0].content[5] "(input artist || "") . " ". (input title || "") | index combinedWithFallback" diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java index 2cec7dd4371..ae7437ed80b 100644 --- a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java +++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java @@ -15,7 +15,6 @@ import com.yahoo.document.datatypes.StringFieldValue; import com.yahoo.document.update.AssignValueUpdate; import com.yahoo.document.update.FieldUpdate; import com.yahoo.document.update.ValueUpdate; -import com.yahoo.language.process.Embedder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.configdefinition.IlscriptsConfig; import org.junit.Test; @@ -38,18 +37,6 @@ public class IndexingProcessorTestCase { private final IndexingProcessor indexer = newProcessor(CONFIG_ID); - @Test - public void requireThatIndexerProcessesDocuments() { - Document input = new Document(indexer.getDocumentTypeManager().getDocumentType("music"), "id:ns:music::"); - input.setFieldValue("artist", new StringFieldValue("69")); - DocumentOperation op = process(new DocumentPut(input)); - assertTrue(op instanceof DocumentPut); - - Document output = ((DocumentPut)op).getDocument(); - assertEquals(new StringFieldValue("69"), output.getFieldValue("title")); - assertEquals("music", output.getDataType().getName()); - } - @Test public void requireThatIndexerForwardsDocumentsOfUnknownType() { Document input = new Document(new DocumentType("unknown"), "id:ns:unknown::"); @@ -59,12 +46,82 @@ public class IndexingProcessorTestCase { } @Test - public void testFieldUpdates() { - // 'artist' is assigned to 'title' and vice versa + public void testPut() { // 'combined' gets the value of both // 'combinedWithFallback' falls back to an empty string if an input is missing - { // Both inputs are set + { // Both artist and title are set + DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); + input.getDocument().setFieldValue(inputType.getField("artist"), new StringFieldValue("artist1")); + input.getDocument().setFieldValue(inputType.getField("title"), new StringFieldValue("title1")); + + Document output = ((DocumentPut)process(input)).getDocument(); + assertEquals("artist1", output.getFieldValue("artist").getWrappedValue()); + assertEquals("title1", output.getFieldValue("title").getWrappedValue()); + assertNull(output.getFieldValue("song")); + assertEquals("artist1 title1", output.getFieldValue("combined").getWrappedValue()); + assertEquals("artist1 title1", output.getFieldValue("combinedWithFallback").getWrappedValue()); + } + + { // Just artist is set + DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); + input.getDocument().setFieldValue(inputType.getField("artist"), new StringFieldValue("artist1")); + + Document output = ((DocumentPut)process(input)).getDocument(); + assertEquals("artist1", output.getFieldValue("artist").getWrappedValue()); + assertNull(output.getFieldValue("title")); + assertNull(output.getFieldValue("song")); + assertNull(output.getFieldValue("combined")); + assertEquals("artist1 ", output.getFieldValue("combinedWithFallback").getWrappedValue()); + } + + { // Just title is set + DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); + input.getDocument().setFieldValue(inputType.getField("title"), new StringFieldValue("title1")); + + Document output = ((DocumentPut)process(input)).getDocument(); + assertEquals("title1", output.getFieldValue("title").getWrappedValue()); + assertNull(output.getFieldValue("artist")); + assertNull(output.getFieldValue("song")); + assertNull(output.getFieldValue("combined")); + assertEquals(" title1", output.getFieldValue("combinedWithFallback").getWrappedValue()); + } + + { // Neither title nor artist is set + DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); + input.getDocument().setFieldValue(inputType.getField("song"), new StringFieldValue("song1")); + + Document output = ((DocumentPut)process(input)).getDocument(); + assertNull(output.getFieldValue("artist")); + assertNull(output.getFieldValue("title")); + assertEquals("song1", output.getFieldValue("song").getWrappedValue()); + assertNull(output.getFieldValue("combined")); + assertEquals(" ", output.getFieldValue("combinedWithFallback").getWrappedValue()); + } + + { // None is set + DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); + DocumentPut input = new DocumentPut(inputType, "id:ns:music::"); + + Document output = ((DocumentPut)process(input)).getDocument(); + assertNull(output.getFieldValue("artist")); + assertNull(output.getFieldValue("title")); + assertNull(output.getFieldValue("song")); + assertNull(output.getFieldValue("combined")); + assertEquals(" ", output.getFieldValue("combinedWithFallback").getWrappedValue()); + } + } + + @Test + public void testUpdate() { + // 'combined' gets the value of artist and title + // 'combinedWithFallback' falls back to an empty string if an input is missing + + { // Both artist and title are set DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("artist"), new StringFieldValue("artist1"))); @@ -72,8 +129,8 @@ public class IndexingProcessorTestCase { DocumentUpdate output = (DocumentUpdate)process(input); assertEquals(4, output.fieldUpdates().size()); - assertAssignment("artist", "title1", output); - assertAssignment("title", "artist1", output); + assertAssignment("artist", "artist1", output); + assertAssignment("title", "title1", output); assertAssignment("combined", "artist1 title1", output); assertAssignment("combinedWithFallback", "artist1 title1", output); } @@ -85,7 +142,7 @@ public class IndexingProcessorTestCase { DocumentUpdate output = (DocumentUpdate)process(input); assertEquals(2, output.fieldUpdates().size()); - assertAssignment("title", "artist1", output); + assertAssignment("artist", "artist1", output); assertAssignment("combinedWithFallback", "artist1 ", output); } @@ -96,19 +153,18 @@ public class IndexingProcessorTestCase { DocumentUpdate output = (DocumentUpdate)process(input); assertEquals(2, output.fieldUpdates().size()); - assertAssignment("artist", "title1", output); + assertAssignment("title", "title1", output); assertAssignment("combinedWithFallback", " title1", output); } { // Neither title nor artist is set: Should not update embeddings even though it has fallbacks for all DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music"); DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::"); - input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("isbn"), new StringFieldValue("isbn1"))); + input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("song"), new StringFieldValue("song1"))); DocumentUpdate output = (DocumentUpdate)process(input); - assertEquals(2, output.fieldUpdates().size()); - assertAssignment("isbn", "isbn1", output); - assertAssignment("song", "isbn1", output); + assertEquals(1, output.fieldUpdates().size()); + assertAssignment("song", "song1", output); } { // None is set: Should not update anything diff --git a/document/src/main/java/com/yahoo/document/Document.java b/document/src/main/java/com/yahoo/document/Document.java index 760b9de0199..3dc628e0a90 100644 --- a/document/src/main/java/com/yahoo/document/Document.java +++ b/document/src/main/java/com/yahoo/document/Document.java @@ -43,7 +43,7 @@ public class Document extends StructuredFieldValue { public static final int classId = registerClass(Ids.document + 3, Document.class); public static final short SERIALIZED_VERSION = 8; private DocumentId docId; - private Struct header; + private Struct content; private Long lastModified = null; /** @@ -73,7 +73,7 @@ public class Document extends StructuredFieldValue { */ public Document(Document doc) { this(doc.getDataType(), doc.getId()); - header = doc.header; + content = doc.content; lastModified = doc.lastModified; } @@ -105,12 +105,12 @@ public class Document extends StructuredFieldValue { public Document clone() { Document doc = (Document) super.clone(); doc.docId = docId.clone(); - doc.header = header.clone(); + doc.content = content.clone(); return doc; } private void setNewType(DocumentType type) { - header = type.contentStruct().createFieldValue(); + content = type.contentStruct().createFieldValue(); } public void setDataType(DataType type) { @@ -163,7 +163,7 @@ public class Document extends StructuredFieldValue { @Override public Field getField(String fieldName) { - Field field = header.getField(fieldName); + Field field = content.getField(fieldName); if (field == null) { for(DocumentType parent : getDataType().getInheritedTypes()) { field = parent.getField(fieldName); @@ -177,27 +177,27 @@ public class Document extends StructuredFieldValue { @Override public FieldValue getFieldValue(Field field) { - return header.getFieldValue(field); + return content.getFieldValue(field); } @Override protected void doSetFieldValue(Field field, FieldValue value) { - header.setFieldValue(field, value); + content.setFieldValue(field, value); } @Override public FieldValue removeFieldValue(Field field) { - return header.removeFieldValue(field); + return content.removeFieldValue(field); } @Override public void clear() { - header.clear(); + content.clear(); } @Override public Iterator> iterator() { - return header.iterator(); + return content.iterator(); } public String toString() { @@ -244,7 +244,7 @@ public class Document extends StructuredFieldValue { if (o == this) return true; if (!(o instanceof Document other)) return false; return (super.equals(o) && docId.equals(other.docId) && - header.equals(other.header)); + content.equals(other.content)); } @Override @@ -289,7 +289,7 @@ public class Document extends StructuredFieldValue { @Override public int getFieldCount() { - return header.getFieldCount(); + return content.getFieldCount(); } public void serialize(DocumentWriter writer) { @@ -329,7 +329,7 @@ public class Document extends StructuredFieldValue { return comp; } - comp = header.compareTo(otherValue.header); + comp = content.compareTo(otherValue.content); if (comp != 0) { return comp; diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/DocumentAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/DocumentAdapter.java index db0c470797e..4c64809c546 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/DocumentAdapter.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/DocumentAdapter.java @@ -13,4 +13,7 @@ public interface DocumentAdapter extends FieldValueAdapter { Document getUpdatableOutput(); + @Override + default boolean isComplete() { return true; } + } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/UpdateAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/UpdateAdapter.java index c6c0b6d9226..d4de467b25d 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/UpdateAdapter.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/UpdateAdapter.java @@ -13,4 +13,7 @@ public interface UpdateAdapter extends FieldValueAdapter { DocumentUpdate getOutput(); Expression getExpression(Expression expression); + @Override + default boolean isComplete() { return false; } + } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java index d463bee647e..f01f2fcc9fb 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java @@ -30,12 +30,18 @@ public class ExecutionContext implements FieldTypeAdapter, FieldValueAdapter, Cl this.language = Language.UNKNOWN; } - public ExecutionContext execute(Expression exp) { - if (exp != null) - exp.execute(this); + public ExecutionContext execute(Expression expression) { + if (expression != null) + expression.execute(this); return this; } + /** + * Returns whether this is for a complete execution of all statements of a script, + * or a partial execution of only the statements accessing the available data. + */ + public boolean isComplete() { return adapter == null ? false : adapter.isComplete(); } + @Override public DataType getInputType(Expression exp, String fieldName) { return adapter.getInputType(exp, fieldName); diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java index 6f6074db2fd..e8ec1d33446 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java @@ -14,4 +14,7 @@ public interface FieldValueAdapter extends FieldTypeAdapter { FieldValueAdapter setOutputValue(Expression exp, String fieldName, FieldValue fieldValue); + /** Returns true if this has values for all possibly existing inputs, or represents a partial set of values. */ + boolean isComplete(); + } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java index 8287252b268..f0c37960a99 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java @@ -39,8 +39,10 @@ public final class ScriptExpression extends ExpressionList protected void doExecute(ExecutionContext context) { FieldValue input = context.getValue(); for (StatementExpression statement : this) { - if (statement.getInputFields().isEmpty() || containsAtLeastOneInputFrom(statement.getInputFields(), context)) + if (context.isComplete() || + (statement.getInputFields().isEmpty() || containsAtLeastOneInputFrom(statement.getInputFields(), context))) { context.setValue(input).execute(statement); + } } context.setValue(input); } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/SimpleTestAdapter.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/SimpleTestAdapter.java index cfb82034545..000d4aaecdd 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/SimpleTestAdapter.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/SimpleTestAdapter.java @@ -69,4 +69,9 @@ public class SimpleTestAdapter implements FieldValueAdapter { return this; } + @Override + public boolean isComplete() { + return false; + } + } -- cgit v1.2.3