From 7bd1c27158403b1ef99d694ff9a31736e84b950f Mon Sep 17 00:00:00 2001 From: Lester Solbakken Date: Mon, 16 Sep 2019 13:53:19 +0200 Subject: Add XGBoost if-inversion for missing features --- .../src/test/cfg/application/ml_models/models/xgboost.2.2.json | 6 +++--- config-model/src/test/integration/xgboost/models/xgboost.2.2.json | 6 +++--- .../processing/RankingExpressionWithXGBoostTestCase.java | 4 ++-- .../src/test/java/com/yahoo/vespa/model/ml/MlModelsTest.java | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'config-model') diff --git a/config-model/src/test/cfg/application/ml_models/models/xgboost.2.2.json b/config-model/src/test/cfg/application/ml_models/models/xgboost.2.2.json index f8949b47e52..a18b9966b55 100644 --- a/config-model/src/test/cfg/application/ml_models/models/xgboost.2.2.json +++ b/config-model/src/test/cfg/application/ml_models/models/xgboost.2.2.json @@ -1,16 +1,16 @@ [ - { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 1, "children": [ + { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 2, "children": [ { "nodeid": 1, "depth": 1, "split": "f56", "split_condition": -0.242398, "yes": 3, "no": 4, "missing": 3, "children": [ { "nodeid": 3, "leaf": 1.71218 }, { "nodeid": 4, "leaf": -1.70044 } ]}, - { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 5, "children": [ + { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 6, "children": [ { "nodeid": 5, "leaf": -1.94071 }, { "nodeid": 6, "leaf": 1.85965 } ]} ]}, { "nodeid": 0, "depth": 0, "split": "f60", "split_condition": -0.482947, "yes": 1, "no": 2, "missing": 1, "children": [ - { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 3, "children": [ + { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 4, "children": [ { "nodeid": 3, "leaf": 0.784718 }, { "nodeid": 4, "leaf": -0.96853 } ]}, diff --git a/config-model/src/test/integration/xgboost/models/xgboost.2.2.json b/config-model/src/test/integration/xgboost/models/xgboost.2.2.json index f8949b47e52..a18b9966b55 100644 --- a/config-model/src/test/integration/xgboost/models/xgboost.2.2.json +++ b/config-model/src/test/integration/xgboost/models/xgboost.2.2.json @@ -1,16 +1,16 @@ [ - { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 1, "children": [ + { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 2, "children": [ { "nodeid": 1, "depth": 1, "split": "f56", "split_condition": -0.242398, "yes": 3, "no": 4, "missing": 3, "children": [ { "nodeid": 3, "leaf": 1.71218 }, { "nodeid": 4, "leaf": -1.70044 } ]}, - { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 5, "children": [ + { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 6, "children": [ { "nodeid": 5, "leaf": -1.94071 }, { "nodeid": 6, "leaf": 1.85965 } ]} ]}, { "nodeid": 0, "depth": 0, "split": "f60", "split_condition": -0.482947, "yes": 1, "no": 2, "missing": 1, "children": [ - { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 3, "children": [ + { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 4, "children": [ { "nodeid": 3, "leaf": 0.784718 }, { "nodeid": 4, "leaf": -0.96853 } ]}, diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithXGBoostTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithXGBoostTestCase.java index 832a974082c..f73d1c823e2 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithXGBoostTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/processing/RankingExpressionWithXGBoostTestCase.java @@ -19,8 +19,8 @@ public class RankingExpressionWithXGBoostTestCase { private final Path applicationDir = Path.fromString("src/test/integration/xgboost/"); private final static String vespaExpression = - "if (f29 < -0.1234567, if (f56 < -0.242398, 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + " + - "if (f60 < -0.482947, if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)"; + "if (f29 < -0.1234567, if (!(f56 >= -0.242398), 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + " + + "if (!(f60 >= -0.482947), if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)"; @After public void removeGeneratedModelFiles() { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/ml/MlModelsTest.java b/config-model/src/test/java/com/yahoo/vespa/model/ml/MlModelsTest.java index d66f376ed6a..c5c475360a3 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/ml/MlModelsTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/ml/MlModelsTest.java @@ -70,7 +70,7 @@ public class MlModelsTest { "rankingExpression(mnist_tensorflow).rankingScript: join(reduce(join(map(join(reduce(join(join(join(rankingExpression(imported_ml_function_mnist_saved_dnn_hidden1_add), 0.009999999776482582, f(a,b)(a * b)), rankingExpression(imported_ml_function_mnist_saved_dnn_hidden1_add), f(a,b)(max(a,b))), constant(mnist_saved_dnn_hidden2_weights_read), f(a,b)(a * b)), sum, d3), constant(mnist_saved_dnn_hidden2_bias_read), f(a,b)(a + b)), f(a)(1.0507009873554805 * if (a >= 0, a, 1.6732632423543772 * (exp(a) - 1)))), constant(mnist_saved_dnn_outputs_weights_read), f(a,b)(a * b)), sum, d2), constant(mnist_saved_dnn_outputs_bias_read), f(a,b)(a + b))\n" + "rankingExpression(mnist_softmax_tensorflow).rankingScript: join(reduce(join(rename(rankingExpression(Placeholder), (d0, d1), (d0, d2)), constant(mnist_softmax_saved_layer_Variable_read), f(a,b)(a * b)), sum, d2), constant(mnist_softmax_saved_layer_Variable_1_read), f(a,b)(a + b))\n" + "rankingExpression(mnist_softmax_onnx).rankingScript: join(reduce(join(rename(rankingExpression(Placeholder), (d0, d1), (d0, d2)), constant(mnist_softmax_Variable), f(a,b)(a * b)), sum, d2), constant(mnist_softmax_Variable_1), f(a,b)(a + b))\n" + - "rankingExpression(my_xgboost).rankingScript: if (f29 < -0.1234567, if (f56 < -0.242398, 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + if (f60 < -0.482947, if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)\n" + + "rankingExpression(my_xgboost).rankingScript: if (f29 < -0.1234567, if (!(f56 >= -0.242398), 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + if (!(f60 >= -0.482947), if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)\n" + "vespa.rank.firstphase: rankingExpression(firstphase)\n" + "rankingExpression(firstphase).rankingScript: rankingExpression(mnist_tensorflow) + rankingExpression(mnist_softmax_tensorflow) + rankingExpression(mnist_softmax_onnx) + rankingExpression(my_xgboost)\n" + "vespa.type.attribute.argument: tensor(d0[],d1[784])\n"; -- cgit v1.2.3