diff options
author | Lester Solbakken <lesters@yahoo-inc.com> | 2017-11-21 09:21:15 +0100 |
---|---|---|
committer | Lester Solbakken <lesters@yahoo-inc.com> | 2017-11-21 09:21:15 +0100 |
commit | 85ccf6cfbe0634a8fddf6f17aba6c27ab5910782 (patch) | |
tree | f0feacdd7d0e261d90c3a466c85bd5fc4c165a1d | |
parent | 1e8ee4adb12fcf0438677f1502d0b23b40edad95 (diff) |
Replace min/max on tensors with reduce in config model
4 files changed, 489 insertions, 1 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java b/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java index fd249956d5a..1021227b0e6 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java @@ -706,6 +706,7 @@ public class RankProfile implements Serializable, Cloneable { expression = new ConstantTensorTransformer(constants, rankPropertiesOutput).transform(expression); expression = new MacroInliner(inlineMacros).transform(expression); expression = new MacroShadower(getMacros()).transform(expression); + expression = new TensorTransformer(this).transform(expression); expression = new Simplifier().transform(expression); for (Map.Entry<String, String> rankProperty : rankPropertiesOutput.entrySet()) { addRankProperty(rankProperty.getKey(), rankProperty.getValue()); diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/TensorTransformer.java b/config-model/src/main/java/com/yahoo/searchdefinition/TensorTransformer.java new file mode 100644 index 00000000000..e9723042d77 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/searchdefinition/TensorTransformer.java @@ -0,0 +1,282 @@ +package com.yahoo.searchdefinition; + +import com.yahoo.searchdefinition.document.Attribute; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; +import com.yahoo.searchlib.rankingexpression.evaluation.StringValue; +import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.FunctionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.rule.TensorFunctionNode; +import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.functions.Reduce; + +import java.util.List; +import java.util.Map; + +/** + * Transforms and simplifies tensor expressions. + * + * Currently transforms min(tensor,dim) and max(tensor,dim) to + * reduce(tensor,min/max,dim). This is necessary as the backend does + * not recognize these forms of min and max. + * + * @author lesters + */ +public class TensorTransformer extends ExpressionTransformer { + + private Search search; + private RankProfile rankprofile; + private Map<String, RankProfile.Macro> macros; + + public TensorTransformer(RankProfile rankprofile) { + this.rankprofile = rankprofile; + this.search = rankprofile.getSearch(); + this.macros = rankprofile.getMacros(); + } + + @Override + public ExpressionNode transform(ExpressionNode node) { + if (node instanceof CompositeNode) { + node = transformChildren((CompositeNode) node); + } + if (node instanceof FunctionNode) { + node = transformFunctionNode((FunctionNode) node); + } + return node; + } + + private ExpressionNode transformFunctionNode(FunctionNode node) { + switch (node.getFunction()) { + case min: + case max: + return transformMaxAndMinFunctionNode(node); + } + return node; + } + + /** + * Transforms max and min functions if it can be proven that the first + * argument resolves to a tensor and the second argument is a valid + * dimension in the tensor. If these do not hold, the node will not + * be transformed. + * + * The test for whether or not the first argument resolves to a tensor + * is to evaluate that expression. All values used in the expression + * is bound to a context with dummy values with enough information to + * deduce tensor types. + * + * There is currently no guarantee that all cases will be found. For + * instance, if-statements are problematic. + */ + private ExpressionNode transformMaxAndMinFunctionNode(FunctionNode node) { + if (node.children().size() != 2) { + return node; + } + ExpressionNode arg1 = node.children().get(0); + ExpressionNode arg2 = node.children().get(1); + if (!potentialDimensionName(arg2)) { + return node; + } + try { + String dimension = ((ReferenceNode) arg2).getName(); + Context context = buildContext(arg1); + Value value = arg1.evaluate(context); + if (verifyTensorAndDimension(value, dimension)) { + return replaceMaxAndMinFunction(node); + } + } catch (Exception e) { + // Don't replace the expression in case of any errors, e.g. unknown values or rank features + } + return node; + } + + private boolean potentialDimensionName(ExpressionNode arg) { + return arg instanceof ReferenceNode && ((ReferenceNode) arg).children().size() == 0; + } + + private boolean verifyTensorAndDimension(Value value, String dimension) { + if (value instanceof TensorValue) { + Tensor tensor = ((TensorValue) value).asTensor(); + TensorType type = tensor.type(); + return type.dimensionNames().contains(dimension); + } + return false; + } + + private ExpressionNode replaceMaxAndMinFunction(FunctionNode node) { + ExpressionNode arg1 = node.children().get(0); + ExpressionNode arg2 = node.children().get(1); + + TensorFunctionNode.TensorFunctionExpressionNode expression = TensorFunctionNode.wrapArgument(arg1); + Reduce.Aggregator aggregator = Reduce.Aggregator.valueOf(node.getFunction().name()); + String dimension = ((ReferenceNode) arg2).getName(); + + return new TensorFunctionNode(new Reduce(expression, aggregator, dimension)); + } + + /** + * Creates an evaluation context by iterating through the expression tree, and + * adding dummy values with correct types to the context. + */ + private Context buildContext(ExpressionNode node) { + Context context = new MapContext(); + addRoot(node, context); + return context; + } + + private Value emptyStringValue() { + return new StringValue(""); + } + + private Value emptyDoubleValue() { + return new DoubleValue(0.0); + } + + private Value emptyTensorValue(TensorType type) { + Tensor empty = Tensor.Builder.of(type).build(); + return new TensorValue(empty); + } + + private void addRoot(ExpressionNode node, Context context) { + addChildren(node, context); + if (node instanceof ReferenceNode) { + ReferenceNode referenceNode = (ReferenceNode) node; + addIfAttribute(referenceNode, context); + addIfConstant(referenceNode, context); + addIfQuery(referenceNode, context); + addIfTensorFrom(referenceNode, context); + addIfMacro(referenceNode, context); + } + } + + private void addChildren(ExpressionNode node, Context context) { + if (node instanceof CompositeNode) { + List<ExpressionNode> children = ((CompositeNode) node).children(); + for (ExpressionNode child : children) { + addRoot(child, context); + } + } + } + + private void addIfAttribute(ReferenceNode node, Context context) { + if (!node.getName().equals("attribute")) { + return; + } + if (node.children().size() == 0) { + return; + } + String attribute = node.children().get(0).toString(); + Attribute a = search.getAttribute(attribute); + Value v; + if (a.getType() == Attribute.Type.STRING) { + v = emptyStringValue(); + } else if (a.getType() == Attribute.Type.TENSOR) { + v = emptyTensorValue(a.tensorType().orElseThrow(RuntimeException::new)); + } else { + v = emptyDoubleValue(); + } + context.put(node.toString(), v); + } + + private void addIfConstant(ReferenceNode node, Context context) { + if (!node.getName().equals("constant")) { + return; + } + if (node.children().size() != 1) { + return; + } + ExpressionNode child = node.children().get(0); + while (child instanceof CompositeNode && ((CompositeNode) child).children().size() > 0) { + child = ((CompositeNode) child).children().get(0); + } + String name = child.toString(); + addIfConstantInRankProfile(name, node, context); + addIfConstantInRankingConstants(name, node, context); + } + + private void addIfConstantInRankProfile(String name, ReferenceNode node, Context context) { + if (rankprofile.getConstants().containsKey(name)) { + context.put(node.toString(), rankprofile.getConstants().get(name)); + } + } + + private void addIfConstantInRankingConstants(String name, ReferenceNode node, Context context) { + for (RankingConstant rankingConstant : search.getRankingConstants()) { + if (rankingConstant.getName().equals(name)) { + context.put(node.toString(), emptyTensorValue(rankingConstant.getTensorType())); + } + } + } + + private void addIfQuery(ReferenceNode node, Context context) { + if (!node.getName().equals("query")) { + return; + } + if (node.children().size() != 1) { + return; + } + String name = node.children().get(0).toString(); + if (rankprofile.getQueryFeatureTypes().containsKey(name)) { + String type = rankprofile.getQueryFeatureTypes().get(name); + Value v; + if (type.contains("tensor")) { + v = emptyTensorValue(TensorType.fromSpec(type)); + } else if (type.equalsIgnoreCase("string")) { + v = emptyStringValue(); + } else { + v = emptyDoubleValue(); + } + context.put(node.toString(), v); + } + } + + private void addIfTensorFrom(ReferenceNode node, Context context) { + if (!node.getName().startsWith("tensorFrom")) { + return; + } + if (node.children().size() < 1 || node.children().size() > 2) { + return; + } + ExpressionNode source = node.children().get(0); + if (source instanceof CompositeNode && ((CompositeNode) source).children().size() > 0) { + source = ((CompositeNode) source).children().get(0); + } + String dimension = source.toString(); + if (node.children().size() == 2) { + dimension = node.children().get(1).toString(); + } + TensorType type = (new TensorType.Builder()).mapped(dimension).build(); + context.put(node.toString(), emptyTensorValue(type)); + } + + private void addIfMacro(ReferenceNode node, Context context) { + RankProfile.Macro macro = macros.get(node.getName()); + if (macro == null) { + return; + } + ExpressionNode root = macro.getRankingExpression().getRoot(); + Context macroContext = buildContext(root); + addMacroArguments(node, context, macro, macroContext); + Value value = root.evaluate(macroContext); + context.put(node.toString(), value); + } + + private void addMacroArguments(ReferenceNode node, Context context, RankProfile.Macro macro, Context macroContext) { + if (macro.getFormalParams().size() > 0 && node.children().size() > 0) { + for (int i = 0; i < macro.getFormalParams().size() && i < node.children().size(); ++i) { + String param = macro.getFormalParams().get(i); + ExpressionNode argumentExpression = node.children().get(i); + Value arg = argumentExpression.evaluate(context); + macroContext.put(param, arg); + } + } + } + +} diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/processing/TensorTransformTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/processing/TensorTransformTestCase.java new file mode 100644 index 00000000000..aa3fd4e9aae --- /dev/null +++ b/config-model/src/test/java/com/yahoo/searchdefinition/processing/TensorTransformTestCase.java @@ -0,0 +1,205 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchdefinition.processing; + +import com.yahoo.collections.Pair; +import com.yahoo.component.ComponentId; +import com.yahoo.config.model.application.provider.BaseDeployLogger; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.FieldType; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.query.profile.types.QueryProfileTypeRegistry; +import com.yahoo.searchdefinition.RankProfile; +import com.yahoo.searchdefinition.RankProfileRegistry; +import com.yahoo.searchdefinition.Search; +import com.yahoo.searchdefinition.SearchBuilder; +import com.yahoo.searchdefinition.SearchDefinitionTestCase; +import com.yahoo.searchdefinition.derived.AttributeFields; +import com.yahoo.searchdefinition.derived.RawRankProfile; +import com.yahoo.searchdefinition.parser.ParseException; +import com.yahoo.vespa.model.container.search.QueryProfiles; +import org.junit.Test; + +import java.util.List; + +import static org.junit.Assert.assertTrue; + +public class TensorTransformTestCase extends SearchDefinitionTestCase { + + @Test + public void requireThatNormalMaxAndMinAreNotReplaced() throws ParseException { + assertContainsExpression("max(1.0,2.0)", "max(1.0,2.0)"); + assertContainsExpression("min(attribute(double_field),x)", "min(attribute(double_field),x)"); + assertContainsExpression("max(attribute(double_field),attribute(double_array_field))", "max(attribute(double_field),attribute(double_array_field))"); + assertContainsExpression("min(attribute(tensor_field_1),attribute(double_field))", "min(attribute(tensor_field_1),attribute(double_field))"); + assertContainsExpression("max(attribute(tensor_field_1),attribute(tensor_field_2))", "max(attribute(tensor_field_1),attribute(tensor_field_2))"); + assertContainsExpression("min(test_constant_tensor,1.0)", "min(constant(test_constant_tensor),1.0)"); + assertContainsExpression("max(base_constant_tensor,1.0)", "max(constant(base_constant_tensor),1.0)"); + assertContainsExpression("min(constant(file_constant_tensor),1.0)", "min(constant(file_constant_tensor),1.0)"); + assertContainsExpression("max(query(q),1.0)", "max(query(q),1.0)"); + assertContainsExpression("max(query(n),1.0)", "max(query(n),1.0)"); + } + + @Test + public void requireThatMaxAndMinWithTensorAttributesAreReplaced() throws ParseException { + assertContainsExpression("max(attribute(tensor_field_1),x)", "reduce(attribute(tensor_field_1),max,x)"); + assertContainsExpression("1 + max(attribute(tensor_field_1),x)", "1+reduce(attribute(tensor_field_1),max,x)"); + assertContainsExpression("if(attribute(double_field),1 + max(attribute(tensor_field_1),x),0)", "if(attribute(double_field),1+reduce(attribute(tensor_field_1),max,x),0)"); + assertContainsExpression("max(max(attribute(tensor_field_1),attribute(tensor_field_2)),x)", "reduce(max(attribute(tensor_field_1),attribute(tensor_field_2)),max,x)"); + assertContainsExpression("max(if(attribute(double_field),attribute(tensor_field_1),attribute(tensor_field_2)),x)", "reduce(if(attribute(double_field),attribute(tensor_field_1),attribute(tensor_field_2)),max,x)"); + assertContainsExpression("max(max(attribute(tensor_field_1),x),x)", "max(reduce(attribute(tensor_field_1),max,x),x)"); // will result in deploy error. + assertContainsExpression("max(max(attribute(tensor_field_2),x),y)", "reduce(reduce(attribute(tensor_field_2),max,x),max,y)"); + } + + @Test + public void requireThatMaxAndMinWithConstantTensorsAreReplaced() throws ParseException { + assertContainsExpression("max(test_constant_tensor,x)", "reduce(constant(test_constant_tensor),max,x)"); + assertContainsExpression("max(base_constant_tensor,x)", "reduce(constant(base_constant_tensor),max,x)"); + assertContainsExpression("min(constant(file_constant_tensor),x)", "reduce(constant(file_constant_tensor),min,x)"); + } + + @Test + public void requireThatMaxAndMinWithTensorExpressionsAreReplaced() throws ParseException { + assertContainsExpression("min(attribute(double_field) + attribute(tensor_field_1),x)", "reduce(attribute(double_field)+attribute(tensor_field_1),min,x)"); + assertContainsExpression("min(attribute(tensor_field_1) * attribute(tensor_field_2),x)", "reduce(attribute(tensor_field_1)*attribute(tensor_field_2),min,x)"); + assertContainsExpression("min(join(attribute(tensor_field_1),attribute(tensor_field_2),f(x,y)(x*y)),x)", "reduce(join(attribute(tensor_field_1),attribute(tensor_field_2),f(x,y)(x*y)),min,x)"); + assertContainsExpression("min(join(tensor_field_1,tensor_field_2,f(x,y)(x*y)),x)", "min(join(tensor_field_1,tensor_field_2,f(x,y)(x*y)),x)"); + } + + @Test + public void requireThatMaxAndMinWithTensorFromIsReplaced() throws ParseException { + assertContainsExpression("max(tensorFromLabels(attribute(double_array_field)),double_array_field)", "reduce(tensorFromLabels(attribute(double_array_field)),max,double_array_field)"); + assertContainsExpression("max(tensorFromLabels(attribute(double_array_field),x),x)", "reduce(tensorFromLabels(attribute(double_array_field),x),max,x)"); + assertContainsExpression("max(tensorFromWeightedSet(attribute(weightedset_field)),weightedset_field)", "reduce(tensorFromWeightedSet(attribute(weightedset_field)),max,weightedset_field)"); + assertContainsExpression("max(tensorFromWeightedSet(attribute(weightedset_field),x),x)", "reduce(tensorFromWeightedSet(attribute(weightedset_field),x),max,x)"); + } + + @Test + public void requireThatMaxAndMinWithTensorInQueryIsReplaced() throws ParseException { + assertContainsExpression("max(query(q),x)", "reduce(query(q),max,x)"); + assertContainsExpression("max(query(n),x)", "max(query(n),x)"); + } + + @Test + public void requireThatMaxAndMinWithTensoresReturnedFromMacrosAreReplaced() throws ParseException { + assertContainsExpression("max(returns_tensor,x)", "reduce(rankingExpression(returns_tensor),max,x)"); + assertContainsExpression("max(wraps_returns_tensor,x)", "reduce(rankingExpression(wraps_returns_tensor),max,x)"); + assertContainsExpression("max(tensor_inheriting,x)", "reduce(rankingExpression(tensor_inheriting),max,x)"); + assertContainsExpression("max(returns_tensor_with_arg(attribute(tensor_field_1)),x)", "reduce(rankingExpression(returns_tensor_with_arg@),max,x)"); + } + + + private void assertContainsExpression(String expr, String transformedExpression) throws ParseException { + assertTrue("Expected expression '" + transformedExpression + "' not found", + containsExpression(expr, transformedExpression)); + } + + private boolean containsExpression(String expr, String transformedExpression) throws ParseException { + for (Pair<String, String> rankPropertyExpression : buildSearch(expr)) { + String rankProperty = rankPropertyExpression.getFirst(); + if (rankProperty.equals("rankingExpression(firstphase).rankingScript")) { + String rankExpression = censorBindingHash(rankPropertyExpression.getSecond().replace(" ","")); + return rankExpression.equals(transformedExpression); + } + } + return false; + } + + private List<Pair<String, String>> buildSearch(String expression) throws ParseException { + RankProfileRegistry rankProfileRegistry = new RankProfileRegistry(); + SearchBuilder builder = new SearchBuilder(rankProfileRegistry); + builder.importString( + "search test {\n" + + " document test { \n" + + " field double_field type double { \n" + + " indexing: summary | attribute \n" + + " }\n" + + " field double_array_field type array<double> { \n" + + " indexing: summary | attribute \n" + + " }\n" + + " field weightedset_field type weightedset<double> { \n" + + " indexing: summary | attribute \n" + + " }\n" + + " field tensor_field_1 type tensor(x{}) { \n" + + " indexing: summary | attribute \n" + + " attribute: tensor(x{}) \n" + + " }\n" + + " field tensor_field_2 type tensor(x[3],y[3]) { \n" + + " indexing: summary | attribute \n" + + " attribute: tensor(x[3],y[3]) \n" + + " }\n" + + " }\n" + + " constant file_constant_tensor {\n" + + " file: constants/tensor.json\n" + + " type: tensor(x{})\n" + + " }\n" + + " rank-profile base {\n" + + " constants {\n" + + " base_constant_tensor {\n" + + " value: { {x:0}:0 }\n" + + " }\n" + + " }\n" + + " macro base_tensor() {\n" + + " expression: constant(base_constant_tensor)\n" + + " }\n" + + " }\n" + + " rank-profile test inherits base {\n" + + " constants {\n" + + " test_constant_tensor {\n" + + " value: { {x:0}:1 }\n" + + " }\n" + + " }\n" + + " macro returns_tensor_with_arg(arg1) {\n" + + " expression: 2.0 * arg1\n" + + " }\n" + + " macro wraps_returns_tensor() {\n" + + " expression: returns_tensor\n" + + " }\n" + + " macro returns_tensor() {\n" + + " expression: attribute(tensor_field_2)\n" + + " }\n" + + " macro tensor_inheriting() {\n" + + " expression: base_tensor\n" + + " }\n" + + " first-phase {\n" + + " expression: " + expression + "\n" + + " }\n" + + " }\n" + + "}\n"); + builder.build(new BaseDeployLogger(), setupQueryProfileTypes()); + Search s = builder.getSearch(); + RankProfile test = rankProfileRegistry.getRankProfile(s, "test").compile(); + List<Pair<String, String>> testRankProperties = new RawRankProfile(test, new AttributeFields(s)).configProperties(); + for (Object o : testRankProperties) + System.out.println(o); + return testRankProperties; + } + + private static QueryProfiles setupQueryProfileTypes() { + QueryProfileRegistry registry = new QueryProfileRegistry(); + QueryProfileTypeRegistry typeRegistry = registry.getTypeRegistry(); + QueryProfileType type = new QueryProfileType(new ComponentId("testtype")); + type.addField(new FieldDescription("ranking.features.query(q)", + FieldType.fromString("tensor(x{})", typeRegistry)), typeRegistry); + type.addField(new FieldDescription("ranking.features.query(n)", + FieldType.fromString("integer", typeRegistry)), typeRegistry); + typeRegistry.register(type); + return new QueryProfiles(registry); + } + + private String censorBindingHash(String s) { + StringBuilder b = new StringBuilder(); + boolean areInHash = false; + for (int i = 0; i < s.length() ; i++) { + char current = s.charAt(i); + if ( ! Character.isLetterOrDigit(current)) // end of hash + areInHash = false; + if ( ! areInHash) + b.append(current); + if (current == '@') // start of hash + areInHash = true; + } + return b.toString(); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java index ede7c861d98..ebad0d5c21f 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java @@ -94,7 +94,7 @@ public class Simplifier extends ExpressionTransformer { private ExpressionNode transformIf(IfNode node) { if ( ! isConstant(node.getCondition())) return node; - if (((BooleanValue)node.getCondition().evaluate(null)).asBoolean()) + if ((node.getCondition().evaluate(null)).asBoolean()) return node.getTrueExpression(); else return node.getFalseExpression(); |