summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/pom.xml2
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/XgboostImporter.java28
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostParser.java77
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostTree.java77
4 files changed, 182 insertions, 2 deletions
diff --git a/searchlib/pom.xml b/searchlib/pom.xml
index 0202f8510bb..8037f1d399a 100644
--- a/searchlib/pom.xml
+++ b/searchlib/pom.xml
@@ -51,12 +51,10 @@
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
- <scope>test</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
- <scope>test</scope>
</dependency>
</dependencies>
<build>
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/XgboostImporter.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/XgboostImporter.java
new file mode 100644
index 00000000000..f9717c39a8b
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/XgboostImporter.java
@@ -0,0 +1,28 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.integration.ml;
+
+import com.yahoo.searchlib.rankingexpression.RankingExpression;
+import com.yahoo.searchlib.rankingexpression.integration.ml.importer.xgboost.XGBoostParser;
+import com.yahoo.searchlib.rankingexpression.parser.ParseException;
+
+import java.io.IOException;
+
+/**
+ * Converts a saved XGBoost model into a ranking expression.
+ *
+ * @author grace-lam
+ */
+public class XgboostImporter {
+
+ public RankingExpression parseModel(String modelPath) {
+ try {
+ XGBoostParser parser = new XGBoostParser(modelPath);
+ return new RankingExpression(parser.toRankingExpression());
+ } catch (IOException e) {
+ throw new IllegalArgumentException("Could not import XGBoost model from '" + modelPath + "'", e);
+ } catch (ParseException e) {
+ throw new IllegalArgumentException("Could not parse ranking expression: " + e);
+ }
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostParser.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostParser.java
new file mode 100644
index 00000000000..fef8bfec81d
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostParser.java
@@ -0,0 +1,77 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.integration.ml.importer.xgboost;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * @author grace-lam
+ */
+public class XGBoostParser {
+
+ private List<XGBoostTree> xgboostTrees;
+
+ /**
+ * Constructor stores parsed JSON trees.
+ *
+ * @param filePath XGBoost JSON output file.
+ * @throws JsonProcessingException Fails JSON parsing.
+ * @throws IOException Fails file reading.
+ */
+ public XGBoostParser(String filePath) throws JsonProcessingException, IOException {
+ this.xgboostTrees = new ArrayList<>();
+ ObjectMapper mapper = new ObjectMapper();
+ JsonNode forestNode = mapper.readTree(new File(filePath));
+ for (JsonNode treeNode : forestNode) {
+ this.xgboostTrees.add(mapper.treeToValue(treeNode, XGBoostTree.class));
+ }
+ }
+
+ /**
+ * Converts parsed JSON trees to Vespa ranking expressions.
+ *
+ * @return Vespa ranking expressions.
+ */
+ public String toRankingExpression() {
+ StringBuilder ret = new StringBuilder();
+ for (int i = 0; i < xgboostTrees.size(); i++) {
+ ret.append(treeToRankExp(xgboostTrees.get(i)));
+ if (i != xgboostTrees.size() - 1) {
+ ret.append(" + \n");
+ }
+ }
+ return ret.toString();
+ }
+
+ /**
+ * Recursive helper function for toRankingExpression().
+ *
+ * @param node XGBoost tree node to convert.
+ * @return Vespa ranking expression for input node.
+ */
+ public String treeToRankExp(XGBoostTree node) {
+ if (node.isLeaf()) {
+ return Double.toString(node.getLeaf());
+ } else {
+ assert node.getChildren().size() == 2;
+ String trueExp;
+ String falseExp;
+ if (node.getYes() == node.getChildren().get(0).getNodeid()) {
+ trueExp = treeToRankExp(node.getChildren().get(0));
+ falseExp = treeToRankExp(node.getChildren().get(1));
+ } else {
+ trueExp = treeToRankExp(node.getChildren().get(1));
+ falseExp = treeToRankExp(node.getChildren().get(0));
+ }
+ return "if (" + node.getSplit() + " < " + Double.toString(node.getSplit_condition()) + ", " + trueExp + ", "
+ + falseExp + ")";
+ }
+ }
+
+} \ No newline at end of file
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostTree.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostTree.java
new file mode 100644
index 00000000000..6bbc9abe8ae
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/integration/ml/importer/xgboost/XGBoostTree.java
@@ -0,0 +1,77 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchlib.rankingexpression.integration.ml.importer.xgboost;
+
+import java.util.List;
+
+/**
+ * Outlines the JSON representation used for parsing the XGBoost output file.
+ *
+ * @author grace-lam
+ */
+public class XGBoostTree {
+
+ // ID of current node.
+ private int nodeid;
+ // Depth of current node w.r.t. the tree's root.
+ private int depth;
+ // Feature name used for split.
+ private String split;
+ // Feature value threshold to split on.
+ private double split_condition;
+ // Next node if feature value < split_condition.
+ private int yes;
+ // Next node if feature value >= split_condition.
+ private int no;
+ // Next node if feature value is missing.
+ private int missing;
+ // Response value for leaf node.
+ private double leaf;
+ // List of child nodes.
+ private List<XGBoostTree> children;
+
+ public int getNodeid() {
+ return nodeid;
+ }
+
+ public int getDepth() {
+ return depth;
+ }
+
+ public String getSplit() {
+ return split;
+ }
+
+ public double getSplit_condition() {
+ return split_condition;
+ }
+
+ public int getYes() {
+ return yes;
+ }
+
+ public int getNo() {
+ return no;
+ }
+
+ public int getMissing() {
+ return missing;
+ }
+
+ public double getLeaf() {
+ return leaf;
+ }
+
+ public List<XGBoostTree> getChildren() {
+ return children;
+ }
+
+ /**
+ * Check if current node is a leaf node.
+ *
+ * @return True if leaf, false otherwise.
+ */
+ public boolean isLeaf() {
+ return children == null;
+ }
+
+} \ No newline at end of file