// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.schema.derived; import com.yahoo.schema.Schema; import com.yahoo.schema.document.GeoPos; import com.yahoo.schema.document.ImmutableSDField; import com.yahoo.vespa.configdefinition.IlscriptsConfig; import com.yahoo.vespa.configdefinition.IlscriptsConfig.Ilscript.Builder; import com.yahoo.vespa.indexinglanguage.ExpressionConverter; import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; import com.yahoo.vespa.indexinglanguage.expressions.ClearStateExpression; import com.yahoo.vespa.indexinglanguage.expressions.Expression; import com.yahoo.vespa.indexinglanguage.expressions.GuardExpression; import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; import com.yahoo.vespa.indexinglanguage.expressions.PassthroughExpression; import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; import com.yahoo.vespa.indexinglanguage.expressions.SetLanguageExpression; import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; import com.yahoo.vespa.indexinglanguage.expressions.TokenizeExpression; import com.yahoo.vespa.indexinglanguage.expressions.ZCurveExpression; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; /** * An indexing language script derived from a schema. An indexing script contains a set of indexing * statements, organized in a composite structure of indexing code snippets. * * @author bratseth */ public final class IndexingScript extends Derived { private final List docFields = new ArrayList<>(); private final List expressions = new ArrayList<>(); private List fieldsSettingLanguage; private final boolean isStreaming; public IndexingScript(Schema schema, boolean isStreaming) { this.isStreaming = isStreaming; derive(schema); } @Override protected void derive(Schema schema) { fieldsSettingLanguage = fieldsSettingLanguage(schema); if (fieldsSettingLanguage.size() == 1) // Assume this language should be used for all fields addExpression(fieldsSettingLanguage.get(0).getIndexingScript()); super.derive(schema); } @Override protected void derive(ImmutableSDField field, Schema schema) { if (field.isImportedField()) return; if (field.hasFullIndexingDocprocRights()) docFields.add(field.getName()); if (field.usesStructOrMap() && ! GeoPos.isAnyPos(field)) return; // unsupported if (fieldsSettingLanguage.size() == 1 && fieldsSettingLanguage.get(0).equals(field)) return; // Already added addExpression(field.getIndexingScript()); } private void addExpression(ScriptExpression expression) { if ( expression.isEmpty()) return; expressions.add(new StatementExpression(new ClearStateExpression(), new GuardExpression(expression))); } private List fieldsSettingLanguage(Schema schema) { return schema.allFieldsList().stream() .filter(field -> ! field.isImportedField()) .filter(field -> field.containsExpression(SetLanguageExpression.class)) .toList(); } public Iterable expressions() { return Collections.unmodifiableCollection(expressions); } @Override public String getDerivedName() { return "ilscripts"; } public void getConfig(IlscriptsConfig.Builder configBuilder) { // Append IlscriptsConfig.Ilscript.Builder ilscriptBuilder = new IlscriptsConfig.Ilscript.Builder(); ilscriptBuilder.doctype(getName()); ilscriptBuilder.docfield(docFields); addContentInOrder(ilscriptBuilder); configBuilder.ilscript(ilscriptBuilder); } public void export(String toDirectory) throws IOException { var builder = new IlscriptsConfig.Builder(); getConfig(builder); export(toDirectory, builder.build()); } private static class DropTokenize extends ExpressionConverter { @Override protected boolean shouldConvert(Expression exp) { return exp instanceof TokenizeExpression; } @Override protected Expression doConvert(Expression exp) { return null; } } // for streaming, drop zcurve conversion to attribute with suffix private static class DropZcurve extends ExpressionConverter { private static final String zSuffix = "_zcurve"; private static final int zSuffixLen = zSuffix.length(); private boolean seenZcurve = false; @Override protected boolean shouldConvert(Expression exp) { if (exp instanceof ZCurveExpression) { seenZcurve = true; return true; } if (seenZcurve && exp instanceof AttributeExpression attrExp) { return attrExp.getFieldName().endsWith(zSuffix); } return false; } @Override protected Expression doConvert(Expression exp) { if (exp instanceof ZCurveExpression) { return null; } if (exp instanceof AttributeExpression attrExp) { String orig = attrExp.getFieldName(); int len = orig.length(); if (len > zSuffixLen && orig.endsWith(zSuffix)) { String fieldName = orig.substring(0, len - zSuffixLen); var result = new AttributeExpression(fieldName); return result; } } return exp; } } private void addContentInOrder(IlscriptsConfig.Ilscript.Builder ilscriptBuilder) { ArrayList later = new ArrayList<>(); Set touchedFields = new HashSet<>(); for (Expression expression : expressions) { if (isStreaming) { expression = expression.convertChildren(new DropTokenize()); expression = expression.convertChildren(new DropZcurve()); } if (modifiesSelf(expression) && ! setsLanguage(expression)) { later.add(expression); } else { ilscriptBuilder.content(expression.toString()); } FieldScanVisitor fieldFetcher = new FieldScanVisitor(); fieldFetcher.visit(expression); touchedFields.addAll(fieldFetcher.touchedFields()); } for (Expression exp : later) { ilscriptBuilder.content(exp.toString()); } generateSyntheticStatementsForUntouchedFields(ilscriptBuilder, touchedFields); } private void generateSyntheticStatementsForUntouchedFields(Builder ilscriptBuilder, Set touchedFields) { Set fieldsWithSyntheticStatements = new HashSet<>(docFields); fieldsWithSyntheticStatements.removeAll(touchedFields); List orderedFields = new ArrayList<>(fieldsWithSyntheticStatements); Collections.sort(orderedFields); for (String fieldName : orderedFields) { StatementExpression copyField = new StatementExpression(new InputExpression(fieldName), new PassthroughExpression(fieldName)); ilscriptBuilder.content(copyField.toString()); } } private boolean setsLanguage(Expression expression) { SetsLanguageVisitor visitor = new SetsLanguageVisitor(); visitor.visit(expression); return visitor.setsLanguage; } private boolean modifiesSelf(Expression expression) { ModifiesSelfVisitor visitor = new ModifiesSelfVisitor(); visitor.visit(expression); return visitor.modifiesSelf(); } private static class ModifiesSelfVisitor extends ExpressionVisitor { private String inputField = null; private String outputField = null; public boolean modifiesSelf() { return outputField != null && outputField.equals(inputField); } @Override protected void doVisit(Expression expression) { if (modifiesSelf()) return; if (expression instanceof InputExpression) { inputField = ((InputExpression) expression).getFieldName(); } if (expression instanceof OutputExpression) { outputField = ((OutputExpression) expression).getFieldName(); } } } private static class SetsLanguageVisitor extends ExpressionVisitor { boolean setsLanguage = false; @Override protected void doVisit(Expression expression) { if (expression instanceof SetLanguageExpression) setsLanguage = true; } } private static class FieldScanVisitor extends ExpressionVisitor { List touchedFields = new ArrayList<>(); List candidates = new ArrayList<>(); @Override protected void doVisit(Expression exp) { if (exp instanceof OutputExpression) { touchedFields.add(((OutputExpression) exp).getFieldName()); } if (exp instanceof InputExpression) { candidates.add(((InputExpression) exp).getFieldName()); } if (exp instanceof ZCurveExpression) { touchedFields.addAll(candidates); } } Collection touchedFields() { Collection output = touchedFields; touchedFields = null; // deny re-use to try and avoid obvious bugs return output; } } }