aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2019-11-20 14:14:48 +0100
committerGitHub <noreply@github.com>2019-11-20 14:14:48 +0100
commit7a4b07f2ef91d0cb75e54db3a8efd56423dc771c (patch)
tree77e8daf4c11b203befbdbd3edebe84ed3f27b2e0
parentaf035fbe06eb7b24080a7a29a774b442b70a517d (diff)
parentb8e3a56c3d38122071738802c89a122ded21a596 (diff)
Merge pull request #11365 from vespa-engine/lesters/xgboost-if-inversion-2
Add XGBoost if-inversion for missing features
-rw-r--r--application/src/test/java/com/yahoo/application/container/ContainerModelEvaluationTest.java2
-rw-r--r--config-model/src/test/cfg/application/ml_models/models/xgboost.2.2.json6
-rw-r--r--config-model/src/test/integration/xgboost/models/xgboost.2.2.json6
-rw-r--r--config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithXGBoostTestCase.java4
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/ml/MlModelsTest.java2
-rw-r--r--model-integration/src/main/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostParser.java9
-rw-r--r--model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportEvaluationTestCase.java2
-rw-r--r--model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportTestCase.java15
-rw-r--r--model-integration/src/test/models/xgboost/xgboost.2.2.if_inversion.json19
-rw-r--r--model-integration/src/test/models/xgboost/xgboost.2.2.json6
-rw-r--r--model-integration/src/test/models/xgboost/xgboost.test.json (renamed from model-integration/src/test/models/xgboost/xgboost.test.if_inversion.json)0
11 files changed, 20 insertions, 51 deletions
diff --git a/application/src/test/java/com/yahoo/application/container/ContainerModelEvaluationTest.java b/application/src/test/java/com/yahoo/application/container/ContainerModelEvaluationTest.java
index b7697d30447..79510375414 100644
--- a/application/src/test/java/com/yahoo/application/container/ContainerModelEvaluationTest.java
+++ b/application/src/test/java/com/yahoo/application/container/ContainerModelEvaluationTest.java
@@ -50,7 +50,7 @@ public class ContainerModelEvaluationTest {
}
{
- String expected = "{\"cells\":[{\"address\":{},\"value\":-4.376589999999999}]}";
+ String expected = "{\"cells\":[{\"address\":{},\"value\":2.496898}]}";
assertResponse("http://localhost/model-evaluation/v1/xgboost_xgboost_2_2/eval", expected, jdisc);
}
diff --git a/config-model/src/test/cfg/application/ml_models/models/xgboost.2.2.json b/config-model/src/test/cfg/application/ml_models/models/xgboost.2.2.json
index f8949b47e52..a18b9966b55 100644
--- a/config-model/src/test/cfg/application/ml_models/models/xgboost.2.2.json
+++ b/config-model/src/test/cfg/application/ml_models/models/xgboost.2.2.json
@@ -1,16 +1,16 @@
[
- { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 1, "children": [
+ { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 2, "children": [
{ "nodeid": 1, "depth": 1, "split": "f56", "split_condition": -0.242398, "yes": 3, "no": 4, "missing": 3, "children": [
{ "nodeid": 3, "leaf": 1.71218 },
{ "nodeid": 4, "leaf": -1.70044 }
]},
- { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 5, "children": [
+ { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 6, "children": [
{ "nodeid": 5, "leaf": -1.94071 },
{ "nodeid": 6, "leaf": 1.85965 }
]}
]},
{ "nodeid": 0, "depth": 0, "split": "f60", "split_condition": -0.482947, "yes": 1, "no": 2, "missing": 1, "children": [
- { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 3, "children": [
+ { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 4, "children": [
{ "nodeid": 3, "leaf": 0.784718 },
{ "nodeid": 4, "leaf": -0.96853 }
]},
diff --git a/config-model/src/test/integration/xgboost/models/xgboost.2.2.json b/config-model/src/test/integration/xgboost/models/xgboost.2.2.json
index f8949b47e52..a18b9966b55 100644
--- a/config-model/src/test/integration/xgboost/models/xgboost.2.2.json
+++ b/config-model/src/test/integration/xgboost/models/xgboost.2.2.json
@@ -1,16 +1,16 @@
[
- { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 1, "children": [
+ { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 2, "children": [
{ "nodeid": 1, "depth": 1, "split": "f56", "split_condition": -0.242398, "yes": 3, "no": 4, "missing": 3, "children": [
{ "nodeid": 3, "leaf": 1.71218 },
{ "nodeid": 4, "leaf": -1.70044 }
]},
- { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 5, "children": [
+ { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 6, "children": [
{ "nodeid": 5, "leaf": -1.94071 },
{ "nodeid": 6, "leaf": 1.85965 }
]}
]},
{ "nodeid": 0, "depth": 0, "split": "f60", "split_condition": -0.482947, "yes": 1, "no": 2, "missing": 1, "children": [
- { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 3, "children": [
+ { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 4, "children": [
{ "nodeid": 3, "leaf": 0.784718 },
{ "nodeid": 4, "leaf": -0.96853 }
]},
diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithXGBoostTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithXGBoostTestCase.java
index 832a974082c..f73d1c823e2 100644
--- a/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithXGBoostTestCase.java
+++ b/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithXGBoostTestCase.java
@@ -19,8 +19,8 @@ public class RankingExpressionWithXGBoostTestCase {
private final Path applicationDir = Path.fromString("src/test/integration/xgboost/");
private final static String vespaExpression =
- "if (f29 < -0.1234567, if (f56 < -0.242398, 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + " +
- "if (f60 < -0.482947, if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)";
+ "if (f29 < -0.1234567, if (!(f56 >= -0.242398), 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + " +
+ "if (!(f60 >= -0.482947), if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)";
@After
public void removeGeneratedModelFiles() {
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/ml/MlModelsTest.java b/config-model/src/test/java/com/yahoo/vespa/model/ml/MlModelsTest.java
index d66f376ed6a..c5c475360a3 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/ml/MlModelsTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/ml/MlModelsTest.java
@@ -70,7 +70,7 @@ public class MlModelsTest {
"rankingExpression(mnist_tensorflow).rankingScript: join(reduce(join(map(join(reduce(join(join(join(rankingExpression(imported_ml_function_mnist_saved_dnn_hidden1_add), 0.009999999776482582, f(a,b)(a * b)), rankingExpression(imported_ml_function_mnist_saved_dnn_hidden1_add), f(a,b)(max(a,b))), constant(mnist_saved_dnn_hidden2_weights_read), f(a,b)(a * b)), sum, d3), constant(mnist_saved_dnn_hidden2_bias_read), f(a,b)(a + b)), f(a)(1.0507009873554805 * if (a >= 0, a, 1.6732632423543772 * (exp(a) - 1)))), constant(mnist_saved_dnn_outputs_weights_read), f(a,b)(a * b)), sum, d2), constant(mnist_saved_dnn_outputs_bias_read), f(a,b)(a + b))\n" +
"rankingExpression(mnist_softmax_tensorflow).rankingScript: join(reduce(join(rename(rankingExpression(Placeholder), (d0, d1), (d0, d2)), constant(mnist_softmax_saved_layer_Variable_read), f(a,b)(a * b)), sum, d2), constant(mnist_softmax_saved_layer_Variable_1_read), f(a,b)(a + b))\n" +
"rankingExpression(mnist_softmax_onnx).rankingScript: join(reduce(join(rename(rankingExpression(Placeholder), (d0, d1), (d0, d2)), constant(mnist_softmax_Variable), f(a,b)(a * b)), sum, d2), constant(mnist_softmax_Variable_1), f(a,b)(a + b))\n" +
- "rankingExpression(my_xgboost).rankingScript: if (f29 < -0.1234567, if (f56 < -0.242398, 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + if (f60 < -0.482947, if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)\n" +
+ "rankingExpression(my_xgboost).rankingScript: if (f29 < -0.1234567, if (!(f56 >= -0.242398), 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + if (!(f60 >= -0.482947), if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)\n" +
"vespa.rank.firstphase: rankingExpression(firstphase)\n" +
"rankingExpression(firstphase).rankingScript: rankingExpression(mnist_tensorflow) + rankingExpression(mnist_softmax_tensorflow) + rankingExpression(mnist_softmax_onnx) + rankingExpression(my_xgboost)\n" +
"vespa.type.attribute.argument: tensor<float>(d0[],d1[784])\n";
diff --git a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostParser.java b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostParser.java
index 9de07eed475..c41a114a970 100644
--- a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostParser.java
+++ b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostParser.java
@@ -16,7 +16,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
class XGBoostParser {
private List<XGBoostTree> xgboostTrees;
- private boolean doIfInversion = false;
/**
* Constructor stores parsed JSON trees.
@@ -32,7 +31,6 @@ class XGBoostParser {
for (JsonNode treeNode : forestNode) {
this.xgboostTrees.add(mapper.treeToValue(treeNode, XGBoostTree.class));
}
- doIfInversion = filePath.endsWith("if_inversion.json");
}
/**
@@ -71,9 +69,12 @@ class XGBoostParser {
trueExp = treeToRankExp(node.getChildren().get(1));
falseExp = treeToRankExp(node.getChildren().get(0));
}
- String condition = node.getSplit() + " < " + node.getSplit_condition();
- if (doIfInversion && node.getMissing() == node.getYes()) {
+ String condition;
+ if (node.getMissing() == node.getYes()) {
+ // Note: this is for handling missing features, as the backend handles comparison with NaN as false.
condition = "!(" + node.getSplit() + " >= " + node.getSplit_condition() + ")";
+ } else {
+ condition = node.getSplit() + " < " + node.getSplit_condition();
}
return "if (" + condition + ", " + trueExp + ", " + falseExp + ")";
}
diff --git a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportEvaluationTestCase.java b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportEvaluationTestCase.java
index ec2498b3923..4c2d72dfb21 100644
--- a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportEvaluationTestCase.java
+++ b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportEvaluationTestCase.java
@@ -22,7 +22,7 @@ public class XGBoostImportEvaluationTestCase {
@Test
public void testXGBoostEvaluation() {
RankingExpression expression = new XGBoostImporter()
- .importModel("xgb", "src/test/models/xgboost/xgboost.test.if_inversion.json")
+ .importModel("xgb", "src/test/models/xgboost/xgboost.test.json")
.expressions().get("xgb");
ArrayContext context = new ArrayContext(expression, DoubleValue.NaN);
diff --git a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportTestCase.java b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportTestCase.java
index 6d75b9f40ff..9a71905aa65 100644
--- a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportTestCase.java
+++ b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportTestCase.java
@@ -20,22 +20,9 @@ public class XGBoostImportTestCase {
assertEquals(1, model.expressions().size());
RankingExpression expression = model.expressions().get("test");
assertNotNull(expression);
- assertEquals("if (f29 < -0.1234567, if (f56 < -0.242398, 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + if (f60 < -0.482947, if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)",
- expression.getRoot().toString());
- assertEquals(1, model.outputExpressions().size());
- }
-
- @Test
- public void testXGBoostWithIfInversion() {
- ImportedModel model = new XGBoostImporter().importModel("test", "src/test/models/xgboost/xgboost.2.2.if_inversion.json");
- assertTrue("All inputs are scalar", model.inputs().isEmpty());
- assertEquals(1, model.expressions().size());
- RankingExpression expression = model.expressions().get("test");
- assertNotNull(expression);
- assertEquals("if (f29 < -0.1234567, if (!(f56 >= -0.242398), 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + if (f60 < -0.482947, if (!(f29 >= -4.2387498), 0.784718, -0.96853), -6.23624)",
+ assertEquals("if (f29 < -0.1234567, if (!(f56 >= -0.242398), 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + if (!(f60 >= -0.482947), if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)",
expression.getRoot().toString());
assertEquals(1, model.outputExpressions().size());
}
-
}
diff --git a/model-integration/src/test/models/xgboost/xgboost.2.2.if_inversion.json b/model-integration/src/test/models/xgboost/xgboost.2.2.if_inversion.json
deleted file mode 100644
index c1217ef55a6..00000000000
--- a/model-integration/src/test/models/xgboost/xgboost.2.2.if_inversion.json
+++ /dev/null
@@ -1,19 +0,0 @@
-[
- { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 2, "children": [
- { "nodeid": 1, "depth": 1, "split": "f56", "split_condition": -0.242398, "yes": 3, "no": 4, "missing": 3, "children": [
- { "nodeid": 3, "leaf": 1.71218 },
- { "nodeid": 4, "leaf": -1.70044 }
- ]},
- { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 6, "children": [
- { "nodeid": 5, "leaf": -1.94071 },
- { "nodeid": 6, "leaf": 1.85965 }
- ]}
- ]},
- { "nodeid": 0, "depth": 0, "split": "f60", "split_condition": -0.482947, "yes": 1, "no": 2, "missing": 2, "children": [
- { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 3, "children": [
- { "nodeid": 3, "leaf": 0.784718 },
- { "nodeid": 4, "leaf": -0.96853 }
- ]},
- { "nodeid": 2, "leaf": -6.23624 }
- ]}
-] \ No newline at end of file
diff --git a/model-integration/src/test/models/xgboost/xgboost.2.2.json b/model-integration/src/test/models/xgboost/xgboost.2.2.json
index f8949b47e52..a18b9966b55 100644
--- a/model-integration/src/test/models/xgboost/xgboost.2.2.json
+++ b/model-integration/src/test/models/xgboost/xgboost.2.2.json
@@ -1,16 +1,16 @@
[
- { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 1, "children": [
+ { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 2, "children": [
{ "nodeid": 1, "depth": 1, "split": "f56", "split_condition": -0.242398, "yes": 3, "no": 4, "missing": 3, "children": [
{ "nodeid": 3, "leaf": 1.71218 },
{ "nodeid": 4, "leaf": -1.70044 }
]},
- { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 5, "children": [
+ { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 6, "children": [
{ "nodeid": 5, "leaf": -1.94071 },
{ "nodeid": 6, "leaf": 1.85965 }
]}
]},
{ "nodeid": 0, "depth": 0, "split": "f60", "split_condition": -0.482947, "yes": 1, "no": 2, "missing": 1, "children": [
- { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 3, "children": [
+ { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 4, "children": [
{ "nodeid": 3, "leaf": 0.784718 },
{ "nodeid": 4, "leaf": -0.96853 }
]},
diff --git a/model-integration/src/test/models/xgboost/xgboost.test.if_inversion.json b/model-integration/src/test/models/xgboost/xgboost.test.json
index 8994d89787e..8994d89787e 100644
--- a/model-integration/src/test/models/xgboost/xgboost.test.if_inversion.json
+++ b/model-integration/src/test/models/xgboost/xgboost.test.json