Add support for importing LightGBM models

author: Lester Solbakken <lesters@oath.com> 2020-02-02 17:39:44 +0100
committer: Lester Solbakken <lesters@oath.com> 2020-02-02 17:39:44 +0100
commit: f656ff5c15d95905f48d5829278ec241f1941577 (patch)
tree: 41d1fd4f8bc22df172acac42bfc39abd136036c0 /application
parent: 99f3a7193090cfcd6b5fdbbe612f53d892f9d86b (diff)
3 files changed, 283 insertions, 1 deletions
diff --git a/application/src/main/java/com/yahoo/application/Application.java b/application/src/main/java/com/yahoo/application/Application.java
index 5f9b1f51863..d4b1735e4c1 100644
--- a/application/src/main/java/com/yahoo/application/Application.java
+++ b/application/src/main/java/com/yahoo/application/Application.java
@@ -2,6 +2,7 @@
 package com.yahoo.application;
 
 import ai.vespa.rankingexpression.importer.configmodelview.MlModelImporter;
+import ai.vespa.rankingexpression.importer.lightgbm.LightGBMImporter;
 import ai.vespa.rankingexpression.importer.onnx.OnnxImporter;
 import ai.vespa.rankingexpression.importer.tensorflow.TensorFlowImporter;
 import ai.vespa.rankingexpression.importer.vespa.VespaImporter;
@@ -117,6 +118,7 @@ public final class Application implements AutoCloseable {
             List<MlModelImporter> modelImporters = List.of(new VespaImporter(),
                                                            new TensorFlowImporter(),
                                                            new OnnxImporter(),
+                                                           new LightGBMImporter(),
                                                            new XGBoostImporter());
             DeployState deployState = new DeployState.Builder()
                     .applicationPackage(FilesApplicationPackage.fromFile(path.toFile(), true))
diff --git a/application/src/test/app-packages/model-evaluation/models/lightgbm/regression.json b/application/src/test/app-packages/model-evaluation/models/lightgbm/regression.json
new file mode 100644
index 00000000000..cf0488ecd8b
--- /dev/null
+++ b/application/src/test/app-packages/model-evaluation/models/lightgbm/regression.json
@@ -0,0 +1,275 @@
+{
+  "name": "tree",
+  "version": "v3",
+  "num_class": 1,
+  "num_tree_per_iteration": 1,
+  "label_index": 0,
+  "max_feature_idx": 3,
+  "average_output": false,
+  "objective": "regression",
+  "feature_names": [
+    "numerical_1",
+    "numerical_2",
+    "categorical_1",
+    "categorical_2"
+  ],
+  "monotone_constraints": [],
+  "tree_info": [
+    {
+      "tree_index": 0,
+      "num_leaves": 3,
+      "num_cat": 1,
+      "shrinkage": 1,
+      "tree_structure": {
+        "split_index": 0,
+        "split_feature": 1,
+        "split_gain": 68.5353012084961,
+        "threshold": 0.46643291586559305,
+        "decision_type": "<=",
+        "default_left": true,
+        "missing_type": "NaN",
+        "internal_value": 0,
+        "internal_weight": 0,
+        "internal_count": 1000,
+        "left_child": {
+          "leaf_index": 0,
+          "leaf_value": 2.1594397038037663,
+          "leaf_weight": 469,
+          "leaf_count": 469
+        },
+        "right_child": {
+          "split_index": 1,
+          "split_feature": 3,
+          "split_gain": 41.27640151977539,
+          "threshold": "2||3||4",
+          "decision_type": "==",
+          "default_left": false,
+          "missing_type": "NaN",
+          "internal_value": 0.246035,
+          "internal_weight": 531,
+          "internal_count": 531,
+          "left_child": {
+            "leaf_index": 1,
+            "leaf_value": 2.235297305276056,
+            "leaf_weight": 302,
+            "leaf_count": 302
+          },
+          "right_child": {
+            "leaf_index": 2,
+            "leaf_value": 2.1792953471546546,
+            "leaf_weight": 229,
+            "leaf_count": 229
+          }
+        }
+      }
+    },
+    {
+      "tree_index": 1,
+      "num_leaves": 3,
+      "num_cat": 1,
+      "shrinkage": 0.1,
+      "tree_structure": {
+        "split_index": 0,
+        "split_feature": 2,
+        "split_gain": 64.22250366210938,
+        "threshold": "3||4",
+        "decision_type": "==",
+        "default_left": false,
+        "missing_type": "NaN",
+        "internal_value": 0,
+        "internal_weight": 0,
+        "internal_count": 1000,
+        "left_child": {
+          "leaf_index": 0,
+          "leaf_value": 0.03070842919354316,
+          "leaf_weight": 399,
+          "leaf_count": 399
+        },
+        "right_child": {
+          "split_index": 1,
+          "split_feature": 0,
+          "split_gain": 36.74250030517578,
+          "threshold": 0.5102250691730842,
+          "decision_type": "<=",
+          "default_left": true,
+          "missing_type": "NaN",
+          "internal_value": -0.204906,
+          "internal_weight": 601,
+          "internal_count": 601,
+          "left_child": {
+            "leaf_index": 1,
+            "leaf_value": -0.04439151147520909,
+            "leaf_weight": 315,
+            "leaf_count": 315
+          },
+          "right_child": {
+            "leaf_index": 2,
+            "leaf_value": 0.005117411709368601,
+            "leaf_weight": 286,
+            "leaf_count": 286
+          }
+        }
+      }
+    },
+    {
+      "tree_index": 2,
+      "num_leaves": 3,
+      "num_cat": 0,
+      "shrinkage": 0.1,
+      "tree_structure": {
+        "split_index": 0,
+        "split_feature": 1,
+        "split_gain": 57.1327018737793,
+        "threshold": 0.668665477622446,
+        "decision_type": "<=",
+        "default_left": true,
+        "missing_type": "NaN",
+        "internal_value": 0,
+        "internal_weight": 0,
+        "internal_count": 1000,
+        "left_child": {
+          "split_index": 1,
+          "split_feature": 1,
+          "split_gain": 40.859100341796875,
+          "threshold": 0.008118820676863816,
+          "decision_type": "<=",
+          "default_left": true,
+          "missing_type": "NaN",
+          "internal_value": -0.162926,
+          "internal_weight": 681,
+          "internal_count": 681,
+          "left_child": {
+            "leaf_index": 0,
+            "leaf_value": -0.15361238490967524,
+            "leaf_weight": 21,
+            "leaf_count": 21
+          },
+          "right_child": {
+            "leaf_index": 2,
+            "leaf_value": -0.01192330846157292,
+            "leaf_weight": 660,
+            "leaf_count": 660
+          }
+        },
+        "right_child": {
+          "leaf_index": 1,
+          "leaf_value": 0.03499044894987518,
+          "leaf_weight": 319,
+          "leaf_count": 319
+        }
+      }
+    },
+    {
+      "tree_index": 3,
+      "num_leaves": 3,
+      "num_cat": 1,
+      "shrinkage": 0.1,
+      "tree_structure": {
+        "split_index": 0,
+        "split_feature": 0,
+        "split_gain": 54.77090072631836,
+        "threshold": 0.5201391072644542,
+        "decision_type": "<=",
+        "default_left": true,
+        "missing_type": "NaN",
+        "internal_value": 0,
+        "internal_weight": 0,
+        "internal_count": 1000,
+        "left_child": {
+          "leaf_index": 0,
+          "leaf_value": -0.02141000620783247,
+          "leaf_weight": 543,
+          "leaf_count": 543
+        },
+        "right_child": {
+          "split_index": 1,
+          "split_feature": 2,
+          "split_gain": 27.200700759887695,
+          "threshold": "0||1",
+          "decision_type": "==",
+          "default_left": false,
+          "missing_type": "NaN",
+          "internal_value": 0.255704,
+          "internal_weight": 457,
+          "internal_count": 457,
+          "left_child": {
+            "leaf_index": 1,
+            "leaf_value": -0.004121485787596721,
+            "leaf_weight": 191,
+            "leaf_count": 191
+          },
+          "right_child": {
+            "leaf_index": 2,
+            "leaf_value": 0.04534090904886873,
+            "leaf_weight": 266,
+            "leaf_count": 266
+          }
+        }
+      }
+    },
+    {
+      "tree_index": 4,
+      "num_leaves": 3,
+      "num_cat": 1,
+      "shrinkage": 0.1,
+      "tree_structure": {
+        "split_index": 0,
+        "split_feature": 3,
+        "split_gain": 51.84349822998047,
+        "threshold": "2||3||4",
+        "decision_type": "==",
+        "default_left": false,
+        "missing_type": "NaN",
+        "internal_value": 0,
+        "internal_weight": 0,
+        "internal_count": 1000,
+        "left_child": {
+          "split_index": 1,
+          "split_feature": 1,
+          "split_gain": 39.352699279785156,
+          "threshold": 0.27283279016959255,
+          "decision_type": "<=",
+          "default_left": true,
+          "missing_type": "NaN",
+          "internal_value": 0.188414,
+          "internal_weight": 593,
+          "internal_count": 593,
+          "left_child": {
+            "leaf_index": 0,
+            "leaf_value": -0.01924803254356527,
+            "leaf_weight": 184,
+            "leaf_count": 184
+          },
+          "right_child": {
+            "leaf_index": 2,
+            "leaf_value": 0.03643772842347651,
+            "leaf_weight": 409,
+            "leaf_count": 409
+          }
+        },
+        "right_child": {
+          "leaf_index": 1,
+          "leaf_value": -0.02701711918923075,
+          "leaf_weight": 407,
+          "leaf_count": 407
+        }
+      }
+    }
+  ],
+  "pandas_categorical": [
+    [
+      "a",
+      "b",
+      "c",
+      "d",
+      "e"
+    ],
+    [
+      "i",
+      "j",
+      "k",
+      "l",
+      "m"
+    ]
+  ]
+}
+\ No newline at end of file
diff --git a/application/src/test/java/com/yahoo/application/container/ContainerModelEvaluationTest.java b/application/src/test/java/com/yahoo/application/container/ContainerModelEvaluationTest.java
index 79510375414..3d7eed1e729 100644
--- a/application/src/test/java/com/yahoo/application/container/ContainerModelEvaluationTest.java
+++ b/application/src/test/java/com/yahoo/application/container/ContainerModelEvaluationTest.java
@@ -45,7 +45,7 @@ public class ContainerModelEvaluationTest {
     }
     private void assertLoadedModels(JDisc jdisc) {
         {
-            String expected = "{\"xgboost_xgboost_2_2\":\"http://localhost/model-evaluation/v1/xgboost_xgboost_2_2\",\"onnx_mnist_softmax\":\"http://localhost/model-evaluation/v1/onnx_mnist_softmax\",\"tensorflow_mnist_softmax_saved\":\"http://localhost/model-evaluation/v1/tensorflow_mnist_softmax_saved\",\"tensorflow_mnist_saved\":\"http://localhost/model-evaluation/v1/tensorflow_mnist_saved\",\"vespa_example\":\"http://localhost/model-evaluation/v1/vespa_example\"}";
+            String expected = "{\"xgboost_xgboost_2_2\":\"http://localhost/model-evaluation/v1/xgboost_xgboost_2_2\",\"onnx_mnist_softmax\":\"http://localhost/model-evaluation/v1/onnx_mnist_softmax\",\"tensorflow_mnist_softmax_saved\":\"http://localhost/model-evaluation/v1/tensorflow_mnist_softmax_saved\",\"tensorflow_mnist_saved\":\"http://localhost/model-evaluation/v1/tensorflow_mnist_saved\",\"vespa_example\":\"http://localhost/model-evaluation/v1/vespa_example\",\"lightgbm_regression\":\"http://localhost/model-evaluation/v1/lightgbm_regression\"}";
             assertResponse("http://localhost/model-evaluation/v1", expected, jdisc);
         }
 
@@ -55,6 +55,11 @@ public class ContainerModelEvaluationTest {
         }
 
         {
+            String expected = "{\"cells\":[{\"address\":{},\"value\":1.9130086820218188}]}";
+            assertResponse("http://localhost/model-evaluation/v1/lightgbm_regression/eval", expected, jdisc);
+        }
+
+        {
             // Note: The specific response value here has not been verified
             String expected = "{\"cells\":[{\"address\":{\"d0\":\"0\",\"d1\":\"0\"},\"value\":-0.5066885003407351},{\"address\":{\"d0\":\"0\",\"d1\":\"1\"},\"value\":0.3912837743150205},{\"address\":{\"d0\":\"0\",\"d1\":\"2\"},\"value\":-0.12401806321703948},{\"address\":{\"d0\":\"0\",\"d1\":\"3\"},\"value\":-0.7019029168606575},{\"address\":{\"d0\":\"0\",\"d1\":\"4\"},\"value\":0.13120114146441697},{\"address\":{\"d0\":\"0\",\"d1\":\"5\"},\"value\":0.6611923203384626},{\"address\":{\"d0\":\"0\",\"d1\":\"6\"},\"value\":-0.22365810810026446},{\"address\":{\"d0\":\"0\",\"d1\":\"7\"},\"value\":-0.0740018307465809},{\"address\":{\"d0\":\"0\",\"d1\":\"8\"},\"value\":0.056492490256153896},{\"address\":{\"d0\":\"0\",\"d1\":\"9\"},\"value\":-0.18422015072393733}]}";
             assertResponse("http://localhost/model-evaluation/v1/tensorflow_mnist_saved/serving_default.y/eval?input=" + inputTensor(), expected, jdisc);
author	Lester Solbakken <lesters@oath.com>	2020-02-02 17:39:44 +0100
committer	Lester Solbakken <lesters@oath.com>	2020-02-02 17:39:44 +0100
commit	f656ff5c15d95905f48d5829278ec241f1941577 (patch)
tree	41d1fd4f8bc22df172acac42bfc39abd136036c0 /application
parent	99f3a7193090cfcd6b5fdbbe612f53d892f9d86b (diff)