diff options
author | Lester Solbakken <lesters@oath.com> | 2019-09-17 12:53:46 +0200 |
---|---|---|
committer | Lester Solbakken <lesters@oath.com> | 2019-09-17 12:53:46 +0200 |
commit | 76e9954ec9aff9c86d33791bdb02f57885fbed20 (patch) | |
tree | e6714540c24137945e56316a57fd3d03e6cdf70c /model-integration | |
parent | 55ef15053d73e5300d1d4112515de9303ccddfdc (diff) |
Revert "Add XGBoost if-inversion for missing features"
This reverts commit 7bd1c27158403b1ef99d694ff9a31736e84b950f.
Diffstat (limited to 'model-integration')
4 files changed, 40 insertions, 9 deletions
diff --git a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostParser.java b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostParser.java index c41a114a970..9de07eed475 100644 --- a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostParser.java +++ b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostParser.java @@ -16,6 +16,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; class XGBoostParser { private List<XGBoostTree> xgboostTrees; + private boolean doIfInversion = false; /** * Constructor stores parsed JSON trees. @@ -31,6 +32,7 @@ class XGBoostParser { for (JsonNode treeNode : forestNode) { this.xgboostTrees.add(mapper.treeToValue(treeNode, XGBoostTree.class)); } + doIfInversion = filePath.endsWith("if_inversion.json"); } /** @@ -69,12 +71,9 @@ class XGBoostParser { trueExp = treeToRankExp(node.getChildren().get(1)); falseExp = treeToRankExp(node.getChildren().get(0)); } - String condition; - if (node.getMissing() == node.getYes()) { - // Note: this is for handling missing features, as the backend handles comparison with NaN as false. + String condition = node.getSplit() + " < " + node.getSplit_condition(); + if (doIfInversion && node.getMissing() == node.getYes()) { condition = "!(" + node.getSplit() + " >= " + node.getSplit_condition() + ")"; - } else { - condition = node.getSplit() + " < " + node.getSplit_condition(); } return "if (" + condition + ", " + trueExp + ", " + falseExp + ")"; } diff --git a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportTestCase.java b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportTestCase.java index 9a71905aa65..6d75b9f40ff 100644 --- a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportTestCase.java +++ b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/xgboost/XGBoostImportTestCase.java @@ -20,9 +20,22 @@ public class XGBoostImportTestCase { assertEquals(1, model.expressions().size()); RankingExpression expression = model.expressions().get("test"); assertNotNull(expression); - assertEquals("if (f29 < -0.1234567, if (!(f56 >= -0.242398), 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + if (!(f60 >= -0.482947), if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)", + assertEquals("if (f29 < -0.1234567, if (f56 < -0.242398, 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + if (f60 < -0.482947, if (f29 < -4.2387498, 0.784718, -0.96853), -6.23624)", + expression.getRoot().toString()); + assertEquals(1, model.outputExpressions().size()); + } + + @Test + public void testXGBoostWithIfInversion() { + ImportedModel model = new XGBoostImporter().importModel("test", "src/test/models/xgboost/xgboost.2.2.if_inversion.json"); + assertTrue("All inputs are scalar", model.inputs().isEmpty()); + assertEquals(1, model.expressions().size()); + RankingExpression expression = model.expressions().get("test"); + assertNotNull(expression); + assertEquals("if (f29 < -0.1234567, if (!(f56 >= -0.242398), 1.71218, -1.70044), if (f109 < 0.8723473, -1.94071, 1.85965)) + if (f60 < -0.482947, if (!(f29 >= -4.2387498), 0.784718, -0.96853), -6.23624)", expression.getRoot().toString()); assertEquals(1, model.outputExpressions().size()); } + } diff --git a/model-integration/src/test/models/xgboost/xgboost.2.2.if_inversion.json b/model-integration/src/test/models/xgboost/xgboost.2.2.if_inversion.json new file mode 100644 index 00000000000..c1217ef55a6 --- /dev/null +++ b/model-integration/src/test/models/xgboost/xgboost.2.2.if_inversion.json @@ -0,0 +1,19 @@ +[ + { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 2, "children": [ + { "nodeid": 1, "depth": 1, "split": "f56", "split_condition": -0.242398, "yes": 3, "no": 4, "missing": 3, "children": [ + { "nodeid": 3, "leaf": 1.71218 }, + { "nodeid": 4, "leaf": -1.70044 } + ]}, + { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 6, "children": [ + { "nodeid": 5, "leaf": -1.94071 }, + { "nodeid": 6, "leaf": 1.85965 } + ]} + ]}, + { "nodeid": 0, "depth": 0, "split": "f60", "split_condition": -0.482947, "yes": 1, "no": 2, "missing": 2, "children": [ + { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 3, "children": [ + { "nodeid": 3, "leaf": 0.784718 }, + { "nodeid": 4, "leaf": -0.96853 } + ]}, + { "nodeid": 2, "leaf": -6.23624 } + ]} +]
\ No newline at end of file diff --git a/model-integration/src/test/models/xgboost/xgboost.2.2.json b/model-integration/src/test/models/xgboost/xgboost.2.2.json index a18b9966b55..f8949b47e52 100644 --- a/model-integration/src/test/models/xgboost/xgboost.2.2.json +++ b/model-integration/src/test/models/xgboost/xgboost.2.2.json @@ -1,16 +1,16 @@ [ - { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 2, "children": [ + { "nodeid": 0, "depth": 0, "split": "f29", "split_condition": -0.1234567, "yes": 1, "no": 2, "missing": 1, "children": [ { "nodeid": 1, "depth": 1, "split": "f56", "split_condition": -0.242398, "yes": 3, "no": 4, "missing": 3, "children": [ { "nodeid": 3, "leaf": 1.71218 }, { "nodeid": 4, "leaf": -1.70044 } ]}, - { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 6, "children": [ + { "nodeid": 2, "depth": 1, "split": "f109", "split_condition": 0.8723473, "yes": 5, "no": 6, "missing": 5, "children": [ { "nodeid": 5, "leaf": -1.94071 }, { "nodeid": 6, "leaf": 1.85965 } ]} ]}, { "nodeid": 0, "depth": 0, "split": "f60", "split_condition": -0.482947, "yes": 1, "no": 2, "missing": 1, "children": [ - { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 4, "children": [ + { "nodeid": 1, "depth": 1, "split": "f29", "split_condition": -4.2387498, "yes": 3, "no": 4, "missing": 3, "children": [ { "nodeid": 3, "leaf": 0.784718 }, { "nodeid": 4, "leaf": -0.96853 } ]}, |