1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.searchlib.gbdt;
import com.yahoo.searchlib.rankingexpression.evaluation.StringValue;
import com.yahoo.searchlib.rankingexpression.evaluation.Value;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import java.util.List;
import java.util.Optional;
/**
* A node in a GBDT tree which references a feature value
*
* @author bratseth
*/
public abstract class FeatureNode extends TreeNode {
private final String feature;
private final TreeNode left;
private final TreeNode right;
public FeatureNode(String feature, Optional<Integer> samples, TreeNode left, TreeNode right) {
super(samples);
this.feature = feature;
this.left = left;
this.right = right;
}
public String feature() { return feature; }
public TreeNode left() { return left; }
public TreeNode right() { return right; }
// TODO: Integrate with programmatic API rather than strings
@Override
public String toRankingExpression() {
StringBuilder expression = new StringBuilder();
expression.append("if (").append(feature).append(rankingExpressionCondition());
expression.append(", ").append(left.toRankingExpression());
expression.append(", ").append(right.toRankingExpression());
Optional<Float> trueProbability = trueProbability();
if (trueProbability.isPresent())
expression.append(", ").append(trueProbability.get());
expression.append(")");
return expression.toString();
}
private Optional<Float> trueProbability() {
if (left.samples().isPresent() && right.samples().isPresent())
return Optional.of((float)left.samples().get() / (left.samples().get() + right.samples().get()));
return Optional.empty();
}
protected abstract String rankingExpressionCondition();
public static FeatureNode fromDom(Node node) {
List<Element> children = XmlHelper.getChildElements(node, null);
if (children.size() != 2) {
throw new IllegalArgumentException("Expected 2 children in element '" + node.getNodeName() + "', got " +
children.size() + ".");
}
String name = XmlHelper.getAttributeText(node, "feature");
Value[] values = toValues(XmlHelper.getAttributeText(node, "value"));
Optional<Integer> samples = toInteger(XmlHelper.getOptionalAttributeText(node, "nSamples"));
TreeNode left = TreeNode.fromDom(children.get(0));
TreeNode right = TreeNode.fromDom(children.get(1));
if (name.endsWith("$") || values.length>1 || values[0] instanceof StringValue)
return new CategoryFeatureNode(name, values, samples, left, right);
else
return new NumericFeatureNode(name, values[0], samples, left, right);
}
/** Converts one or more comma-separated values into an array of values */
private static Value[] toValues(String valueListString) {
String[] valueStrings = valueListString.split(",");
Value[] values = new Value[valueStrings.length];
for (int i=0; i<valueStrings.length; i++) {
try {
values[i] = Value.parse(valueStrings[i]);
}
catch (NumberFormatException e) { // allow un(double)quoted string values in Gbdt XML trees
values[i] = new StringValue(valueStrings[i]);
}
}
return values;
}
}
|