diff options
author | Grace Lam <graceslam@gmail.com> | 2018-08-14 14:02:43 -0700 |
---|---|---|
committer | Grace Lam <graceslam@gmail.com> | 2018-08-14 14:02:43 -0700 |
commit | 88f31d5f5af6af44f28de9f363c33044220bd611 (patch) | |
tree | e032aa9ed142e7e53ef53f927891cc6ded5edf7f /searchlib | |
parent | 7892f289d3cebcdba6ab16d035c41826b7cb7987 (diff) |
Add XGBoost to Vespa converter
Diffstat (limited to 'searchlib')
4 files changed, 182 insertions, 2 deletions
diff --git a/searchlib/pom.xml b/searchlib/pom.xml index 0202f8510bb..8037f1d399a 100644 --- a/searchlib/pom.xml +++ b/searchlib/pom.xml @@ -51,12 +51,10 @@ <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-core</artifactId> - <scope>test</scope> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-databind</artifactId> - <scope>test</scope> </dependency> </dependencies> <build> diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/XgboostImporter.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/XgboostImporter.java new file mode 100644 index 00000000000..f9717c39a8b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/XgboostImporter.java @@ -0,0 +1,28 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.integration.ml; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.integration.ml.importer.xgboost.XGBoostParser; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; + +import java.io.IOException; + +/** + * Converts a saved XGBoost model into a ranking expression. + * + * @author grace-lam + */ +public class XgboostImporter { + + public RankingExpression parseModel(String modelPath) { + try { + XGBoostParser parser = new XGBoostParser(modelPath); + return new RankingExpression(parser.toRankingExpression()); + } catch (IOException e) { + throw new IllegalArgumentException("Could not import XGBoost model from '" + modelPath + "'", e); + } catch (ParseException e) { + throw new IllegalArgumentException("Could not parse ranking expression: " + e); + } + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostParser.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostParser.java new file mode 100644 index 00000000000..fef8bfec81d --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostParser.java @@ -0,0 +1,77 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.integration.ml.importer.xgboost; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * @author grace-lam + */ +public class XGBoostParser { + + private List<XGBoostTree> xgboostTrees; + + /** + * Constructor stores parsed JSON trees. + * + * @param filePath XGBoost JSON output file. + * @throws JsonProcessingException Fails JSON parsing. + * @throws IOException Fails file reading. + */ + public XGBoostParser(String filePath) throws JsonProcessingException, IOException { + this.xgboostTrees = new ArrayList<>(); + ObjectMapper mapper = new ObjectMapper(); + JsonNode forestNode = mapper.readTree(new File(filePath)); + for (JsonNode treeNode : forestNode) { + this.xgboostTrees.add(mapper.treeToValue(treeNode, XGBoostTree.class)); + } + } + + /** + * Converts parsed JSON trees to Vespa ranking expressions. + * + * @return Vespa ranking expressions. + */ + public String toRankingExpression() { + StringBuilder ret = new StringBuilder(); + for (int i = 0; i < xgboostTrees.size(); i++) { + ret.append(treeToRankExp(xgboostTrees.get(i))); + if (i != xgboostTrees.size() - 1) { + ret.append(" + \n"); + } + } + return ret.toString(); + } + + /** + * Recursive helper function for toRankingExpression(). + * + * @param node XGBoost tree node to convert. + * @return Vespa ranking expression for input node. + */ + public String treeToRankExp(XGBoostTree node) { + if (node.isLeaf()) { + return Double.toString(node.getLeaf()); + } else { + assert node.getChildren().size() == 2; + String trueExp; + String falseExp; + if (node.getYes() == node.getChildren().get(0).getNodeid()) { + trueExp = treeToRankExp(node.getChildren().get(0)); + falseExp = treeToRankExp(node.getChildren().get(1)); + } else { + trueExp = treeToRankExp(node.getChildren().get(1)); + falseExp = treeToRankExp(node.getChildren().get(0)); + } + return "if (" + node.getSplit() + " < " + Double.toString(node.getSplit_condition()) + ", " + trueExp + ", " + + falseExp + ")"; + } + } + +}
\ No newline at end of file diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostTree.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostTree.java new file mode 100644 index 00000000000..6bbc9abe8ae --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostTree.java @@ -0,0 +1,77 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.integration.ml.importer.xgboost; + +import java.util.List; + +/** + * Outlines the JSON representation used for parsing the XGBoost output file. + * + * @author grace-lam + */ +public class XGBoostTree { + + // ID of current node. + private int nodeid; + // Depth of current node w.r.t. the tree's root. + private int depth; + // Feature name used for split. + private String split; + // Feature value threshold to split on. + private double split_condition; + // Next node if feature value < split_condition. + private int yes; + // Next node if feature value >= split_condition. + private int no; + // Next node if feature value is missing. + private int missing; + // Response value for leaf node. + private double leaf; + // List of child nodes. + private List<XGBoostTree> children; + + public int getNodeid() { + return nodeid; + } + + public int getDepth() { + return depth; + } + + public String getSplit() { + return split; + } + + public double getSplit_condition() { + return split_condition; + } + + public int getYes() { + return yes; + } + + public int getNo() { + return no; + } + + public int getMissing() { + return missing; + } + + public double getLeaf() { + return leaf; + } + + public List<XGBoostTree> getChildren() { + return children; + } + + /** + * Check if current node is a leaf node. + * + * @return True if leaf, false otherwise. + */ + public boolean isLeaf() { + return children == null; + } + +}
\ No newline at end of file |