summaryrefslogtreecommitdiffstats
path: root/docprocs
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-01-23 15:25:03 +0100
committerJon Bratseth <bratseth@gmail.com>2023-01-23 15:25:03 +0100
commitcce300207e8ff854debcdd0a9920d8e71bacc6a1 (patch)
tree05fe21f16127c44d5e1b036ad5ae5351fed0f1ac /docprocs
parentfaa31b4d439026b5e6e81639ac3f57bbd572a53f (diff)
Skip statements on partial updates only
Diffstat (limited to 'docprocs')
-rw-r--r--docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java5
-rw-r--r--docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java23
-rw-r--r--docprocs/src/test/cfg/ilscripts.cfg12
-rw-r--r--docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java104
4 files changed, 92 insertions, 52 deletions
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java
index 8968c87694f..e690ca1dc64 100644
--- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java
+++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/DocumentScript.java
@@ -41,9 +41,10 @@ public class DocumentScript {
}
public Expression getExpression() { return expression; }
+
public Document execute(AdapterFactory adapterFactory, Document document) {
- for (Iterator<Map.Entry<Field, FieldValue>> it = document.iterator(); it.hasNext(); ) {
- Map.Entry<Field, FieldValue> entry = it.next();
+ for (var i = document.iterator(); i.hasNext(); ) {
+ Map.Entry<Field, FieldValue> entry = i.next();
requireThatFieldIsDeclaredInDocument(entry.getKey());
removeAnyLinguisticsSpanTree(entry.getValue());
}
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
index 657c8a161f7..de4fee2ed68 100644
--- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
+++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
@@ -29,10 +29,8 @@ import com.yahoo.vespa.indexinglanguage.SimpleAdapterFactory;
import com.yahoo.vespa.indexinglanguage.expressions.Expression;
import java.util.Map;
-import java.util.logging.Level;
import java.util.stream.Collectors;
-
/**
* @author Simon Thoresen Hult
*/
@@ -45,7 +43,6 @@ public class IndexingProcessor extends DocumentProcessor {
public final static String INDEXING_START = "indexingStart";
public final static String INDEXING_END = "indexingEnd";
- private final static FastLogger log = FastLogger.getLogger(IndexingProcessor.class.getName());
private final DocumentTypeManager docTypeMgr;
private final ScriptManager scriptMgr;
private final AdapterFactory adapterFactory;
@@ -69,9 +66,8 @@ public class IndexingProcessor extends DocumentProcessor {
@Override
public Progress process(Processing proc) {
- if (proc.getDocumentOperations().isEmpty()) {
- return Progress.DONE;
- }
+ if (proc.getDocumentOperations().isEmpty()) return Progress.DONE;
+
List<DocumentOperation> out = new ArrayList<>(proc.getDocumentOperations().size());
for (DocumentOperation documentOperation : proc.getDocumentOperations()) {
if (documentOperation instanceof DocumentPut) {
@@ -99,11 +95,9 @@ public class IndexingProcessor extends DocumentProcessor {
DocumentType hadType = input.getDocument().getDataType();
DocumentScript script = scriptMgr.getScript(hadType);
if (script == null) {
- log.log(Level.FINE, "No indexing script for document '%s'.", input.getId());
out.add(input);
return;
}
- log.log(Level.FINE, "Processing document '%s'.", input.getId());
DocumentType wantType = docTypeMgr.getDocumentType(hadType.getName());
Document inputDocument = input.getDocument();
if (hadType != wantType) {
@@ -117,10 +111,7 @@ public class IndexingProcessor extends DocumentProcessor {
inputDocument = docTypeMgr.createDocument(buffer);
}
Document output = script.execute(adapterFactory, inputDocument);
- if (output == null) {
- log.log(Level.FINE, "Document '%s' produced no output.", input.getId());
- return;
- }
+ if (output == null) return;
out.add(new DocumentPut(input, output));
}
@@ -128,22 +119,16 @@ public class IndexingProcessor extends DocumentProcessor {
private void processUpdate(DocumentUpdate input, List<DocumentOperation> out) {
DocumentScript script = scriptMgr.getScript(input.getType());
if (script == null) {
- log.log(Level.FINE, "No indexing script for update '%s'.", input.getId());
out.add(input);
return;
}
- log.log(Level.FINE, "Processing update '%s'.", input.getId());
DocumentUpdate output = script.execute(adapterFactory, input);
- if (output == null) {
- log.log(Level.FINE, "Update '%s' produced no output.", input.getId());
- return;
- }
+ if (output == null) return;
output.setCondition(input.getCondition());
out.add(output);
}
private void processRemove(DocumentRemove input, List<DocumentOperation> out) {
- log.log(Level.FINE, "Not processing remove '%s'.", input.getId());
out.add(input);
}
diff --git a/docprocs/src/test/cfg/ilscripts.cfg b/docprocs/src/test/cfg/ilscripts.cfg
index cab3ee0aa0a..ce5e209458d 100644
--- a/docprocs/src/test/cfg/ilscripts.cfg
+++ b/docprocs/src/test/cfg/ilscripts.cfg
@@ -1,15 +1,13 @@
ilscript[1]
ilscript[0].doctype "music"
-ilscript[0].docfield[4]
+ilscript[0].docfield[3]
ilscript[0].docfield[0] "artist"
ilscript[0].docfield[1] "title"
-ilscript[0].docfield[2] "isbn"
-ilscript[0].docfield[3] "song"
+ilscript[0].docfield[2] "song"
ilscript[0].content[5]
-ilscript[0].content[0] "input artist | attribute title"
-ilscript[0].content[1] "input title | attribute artist"
-ilscript[0].content[2] "input isbn | passthrough isbn"
-ilscript[0].content[3] "input isbn | attribute song"
+ilscript[0].content[0] "input artist | attribute artist"
+ilscript[0].content[1] "input title | attribute title"
+ilscript[0].content[2] "input song | attribute song"
ilscript[0].content[4] "input artist . " ". input title | index combined"
ilscript[0].content[5] "(input artist || "") . " ". (input title || "") | index combinedWithFallback"
diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java
index 2cec7dd4371..ae7437ed80b 100644
--- a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java
+++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java
@@ -15,7 +15,6 @@ import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.document.update.AssignValueUpdate;
import com.yahoo.document.update.FieldUpdate;
import com.yahoo.document.update.ValueUpdate;
-import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
import org.junit.Test;
@@ -39,18 +38,6 @@ public class IndexingProcessorTestCase {
private final IndexingProcessor indexer = newProcessor(CONFIG_ID);
@Test
- public void requireThatIndexerProcessesDocuments() {
- Document input = new Document(indexer.getDocumentTypeManager().getDocumentType("music"), "id:ns:music::");
- input.setFieldValue("artist", new StringFieldValue("69"));
- DocumentOperation op = process(new DocumentPut(input));
- assertTrue(op instanceof DocumentPut);
-
- Document output = ((DocumentPut)op).getDocument();
- assertEquals(new StringFieldValue("69"), output.getFieldValue("title"));
- assertEquals("music", output.getDataType().getName());
- }
-
- @Test
public void requireThatIndexerForwardsDocumentsOfUnknownType() {
Document input = new Document(new DocumentType("unknown"), "id:ns:unknown::");
DocumentOperation output = process(new DocumentPut(input));
@@ -59,12 +46,82 @@ public class IndexingProcessorTestCase {
}
@Test
- public void testFieldUpdates() {
- // 'artist' is assigned to 'title' and vice versa
+ public void testPut() {
// 'combined' gets the value of both
// 'combinedWithFallback' falls back to an empty string if an input is missing
- { // Both inputs are set
+ { // Both artist and title are set
+ DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music");
+ DocumentPut input = new DocumentPut(inputType, "id:ns:music::");
+ input.getDocument().setFieldValue(inputType.getField("artist"), new StringFieldValue("artist1"));
+ input.getDocument().setFieldValue(inputType.getField("title"), new StringFieldValue("title1"));
+
+ Document output = ((DocumentPut)process(input)).getDocument();
+ assertEquals("artist1", output.getFieldValue("artist").getWrappedValue());
+ assertEquals("title1", output.getFieldValue("title").getWrappedValue());
+ assertNull(output.getFieldValue("song"));
+ assertEquals("artist1 title1", output.getFieldValue("combined").getWrappedValue());
+ assertEquals("artist1 title1", output.getFieldValue("combinedWithFallback").getWrappedValue());
+ }
+
+ { // Just artist is set
+ DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music");
+ DocumentPut input = new DocumentPut(inputType, "id:ns:music::");
+ input.getDocument().setFieldValue(inputType.getField("artist"), new StringFieldValue("artist1"));
+
+ Document output = ((DocumentPut)process(input)).getDocument();
+ assertEquals("artist1", output.getFieldValue("artist").getWrappedValue());
+ assertNull(output.getFieldValue("title"));
+ assertNull(output.getFieldValue("song"));
+ assertNull(output.getFieldValue("combined"));
+ assertEquals("artist1 ", output.getFieldValue("combinedWithFallback").getWrappedValue());
+ }
+
+ { // Just title is set
+ DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music");
+ DocumentPut input = new DocumentPut(inputType, "id:ns:music::");
+ input.getDocument().setFieldValue(inputType.getField("title"), new StringFieldValue("title1"));
+
+ Document output = ((DocumentPut)process(input)).getDocument();
+ assertEquals("title1", output.getFieldValue("title").getWrappedValue());
+ assertNull(output.getFieldValue("artist"));
+ assertNull(output.getFieldValue("song"));
+ assertNull(output.getFieldValue("combined"));
+ assertEquals(" title1", output.getFieldValue("combinedWithFallback").getWrappedValue());
+ }
+
+ { // Neither title nor artist is set
+ DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music");
+ DocumentPut input = new DocumentPut(inputType, "id:ns:music::");
+ input.getDocument().setFieldValue(inputType.getField("song"), new StringFieldValue("song1"));
+
+ Document output = ((DocumentPut)process(input)).getDocument();
+ assertNull(output.getFieldValue("artist"));
+ assertNull(output.getFieldValue("title"));
+ assertEquals("song1", output.getFieldValue("song").getWrappedValue());
+ assertNull(output.getFieldValue("combined"));
+ assertEquals(" ", output.getFieldValue("combinedWithFallback").getWrappedValue());
+ }
+
+ { // None is set
+ DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music");
+ DocumentPut input = new DocumentPut(inputType, "id:ns:music::");
+
+ Document output = ((DocumentPut)process(input)).getDocument();
+ assertNull(output.getFieldValue("artist"));
+ assertNull(output.getFieldValue("title"));
+ assertNull(output.getFieldValue("song"));
+ assertNull(output.getFieldValue("combined"));
+ assertEquals(" ", output.getFieldValue("combinedWithFallback").getWrappedValue());
+ }
+ }
+
+ @Test
+ public void testUpdate() {
+ // 'combined' gets the value of artist and title
+ // 'combinedWithFallback' falls back to an empty string if an input is missing
+
+ { // Both artist and title are set
DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music");
DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::");
input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("artist"), new StringFieldValue("artist1")));
@@ -72,8 +129,8 @@ public class IndexingProcessorTestCase {
DocumentUpdate output = (DocumentUpdate)process(input);
assertEquals(4, output.fieldUpdates().size());
- assertAssignment("artist", "title1", output);
- assertAssignment("title", "artist1", output);
+ assertAssignment("artist", "artist1", output);
+ assertAssignment("title", "title1", output);
assertAssignment("combined", "artist1 title1", output);
assertAssignment("combinedWithFallback", "artist1 title1", output);
}
@@ -85,7 +142,7 @@ public class IndexingProcessorTestCase {
DocumentUpdate output = (DocumentUpdate)process(input);
assertEquals(2, output.fieldUpdates().size());
- assertAssignment("title", "artist1", output);
+ assertAssignment("artist", "artist1", output);
assertAssignment("combinedWithFallback", "artist1 ", output);
}
@@ -96,19 +153,18 @@ public class IndexingProcessorTestCase {
DocumentUpdate output = (DocumentUpdate)process(input);
assertEquals(2, output.fieldUpdates().size());
- assertAssignment("artist", "title1", output);
+ assertAssignment("title", "title1", output);
assertAssignment("combinedWithFallback", " title1", output);
}
{ // Neither title nor artist is set: Should not update embeddings even though it has fallbacks for all
DocumentType inputType = indexer.getDocumentTypeManager().getDocumentType("music");
DocumentUpdate input = new DocumentUpdate(inputType, "id:ns:music::");
- input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("isbn"), new StringFieldValue("isbn1")));
+ input.addFieldUpdate(FieldUpdate.createAssign(inputType.getField("song"), new StringFieldValue("song1")));
DocumentUpdate output = (DocumentUpdate)process(input);
- assertEquals(2, output.fieldUpdates().size());
- assertAssignment("isbn", "isbn1", output);
- assertAssignment("song", "isbn1", output);
+ assertEquals(1, output.fieldUpdates().size());
+ assertAssignment("song", "song1", output);
}
{ // None is set: Should not update anything