From c47f27f1c3362b459e276c59ebcd09ab259b710e Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Tue, 7 Feb 2023 16:04:57 +0100 Subject: Allow fallback to CPU if nodes are provisioned without GPU --- .../src/main/java/com/yahoo/schema/OnnxModel.java | 16 ++++++++++---- .../schema/derived/FileDistributedOnnxModels.java | 3 ++- .../model/container/xml/ContainerModelBuilder.java | 14 ++++++++---- .../src/test/cfg/application/onnx/services.xml | 2 +- .../model/ml/StatelessOnnxEvaluationTest.java | 3 ++- .../evaluation/RankProfilesConfigImporter.java | 2 +- .../modelintegration/evaluator/OnnxEvaluator.java | 12 +++++++---- .../evaluator/OnnxEvaluatorOptions.java | 25 ++++++++++++++-------- .../src/vespa/searchcore/config/onnx-models.def | 1 + 9 files changed, 53 insertions(+), 25 deletions(-) diff --git a/config-model/src/main/java/com/yahoo/schema/OnnxModel.java b/config-model/src/main/java/com/yahoo/schema/OnnxModel.java index 3d96849fa15..ae6f1fd96e4 100644 --- a/config-model/src/main/java/com/yahoo/schema/OnnxModel.java +++ b/config-model/src/main/java/com/yahoo/schema/OnnxModel.java @@ -24,7 +24,7 @@ public class OnnxModel extends DistributableResource { private String statelessExecutionMode = null; private Integer statelessInterOpThreads = null; private Integer statelessIntraOpThreads = null; - private Integer gpuDevice = null; + private GpuDevice gpuDevice = null; public OnnxModel(String name) { super(name); @@ -114,9 +114,9 @@ public class OnnxModel extends DistributableResource { } } - public void setGpuDevice(int deviceNumber) { + public void setGpuDevice(int deviceNumber, boolean required) { if (deviceNumber >= 0) { - this.gpuDevice = deviceNumber; + this.gpuDevice = new GpuDevice(deviceNumber, required); } } @@ -124,8 +124,16 @@ public class OnnxModel extends DistributableResource { return Optional.ofNullable(statelessIntraOpThreads); } - public Optional getGpuDevice() { + public Optional getGpuDevice() { return Optional.ofNullable(gpuDevice); } + public record GpuDevice(int deviceNumber, boolean required) { + + public GpuDevice { + if (deviceNumber < 0) throw new IllegalArgumentException("deviceNumber cannot be negative, got " + deviceNumber); + } + + } + } diff --git a/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java index f63e872836e..4196af18fb6 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java @@ -52,7 +52,8 @@ public class FileDistributedOnnxModels { if (model.getStatelessIntraOpThreads().isPresent()) modelBuilder.stateless_intraop_threads(model.getStatelessIntraOpThreads().get()); if (model.getGpuDevice().isPresent()) { - modelBuilder.gpu_device(model.getGpuDevice().get()); + modelBuilder.gpu_device(model.getGpuDevice().get().deviceNumber()); + modelBuilder.gpu_device_required(model.getGpuDevice().get().required()); } builder.model(modelBuilder); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 700393e84f3..81626581722 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -207,9 +207,6 @@ public class ContainerModelBuilder extends ConfigModelBuilder { addConfiguredComponents(deployState, cluster, spec); addSecretStore(cluster, spec, deployState); - addModelEvaluation(spec, cluster, context); - addModelEvaluationBundles(cluster); - addProcessing(deployState, spec, cluster, context); addSearch(deployState, spec, cluster, context); addDocproc(deployState, spec, cluster); @@ -225,6 +222,9 @@ public class ContainerModelBuilder extends ConfigModelBuilder { addAccessLogs(deployState, cluster, spec); addNodes(cluster, spec, context); + addModelEvaluation(spec, cluster, context); // NOTE: Must be done after addNodes + addModelEvaluationBundles(cluster); + addServerProviders(deployState, spec, cluster); if (!standaloneBuilder) cluster.addAllPlatformBundles(); @@ -685,7 +685,13 @@ public class ContainerModelBuilder extends ConfigModelBuilder { onnxModel.setStatelessExecutionMode(getStringValue(modelElement, "execution-mode", null)); onnxModel.setStatelessInterOpThreads(getIntValue(modelElement, "interop-threads", -1)); onnxModel.setStatelessIntraOpThreads(getIntValue(modelElement, "intraop-threads", -1)); - onnxModel.setGpuDevice(getIntValue(modelElement, "gpu-device", -1)); + Element gpuDeviceElement = XML.getChild(modelElement, "gpu-device"); + if (gpuDeviceElement != null) { + int gpuDevice = Integer.parseInt(gpuDeviceElement.getTextContent()); + Capacity capacity = context.getDeployState().provisioned().all().get(cluster.id()); + boolean gpuProvisioned = capacity != null && !capacity.minResources().nodeResources().gpuResources().isZero(); + onnxModel.setGpuDevice(gpuDevice, gpuProvisioned); + } } cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles)); diff --git a/config-model/src/test/cfg/application/onnx/services.xml b/config-model/src/test/cfg/application/onnx/services.xml index 088bbcc4921..b17e34e66c2 100644 --- a/config-model/src/test/cfg/application/onnx/services.xml +++ b/config-model/src/test/cfg/application/onnx/services.xml @@ -8,11 +8,11 @@ 2 + 0 400 parallel - 0 diff --git a/config-model/src/test/java/com/yahoo/vespa/model/ml/StatelessOnnxEvaluationTest.java b/config-model/src/test/java/com/yahoo/vespa/model/ml/StatelessOnnxEvaluationTest.java index b1e28649e9f..8ccbe99f70a 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/ml/StatelessOnnxEvaluationTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/ml/StatelessOnnxEvaluationTest.java @@ -129,7 +129,8 @@ public class StatelessOnnxEvaluationTest { assertEquals(2, mulModel.stateless_intraop_threads()); assertEquals(-1, mulModel.stateless_interop_threads()); assertEquals("", mulModel.stateless_execution_mode()); - assertEquals(-1, mulModel.gpu_device()); + assertFalse(mulModel.gpu_device_required()); + assertEquals(0, mulModel.gpu_device()); } } diff --git a/model-evaluation/src/main/java/ai/vespa/models/evaluation/RankProfilesConfigImporter.java b/model-evaluation/src/main/java/ai/vespa/models/evaluation/RankProfilesConfigImporter.java index 924eed18633..9877dd69e83 100644 --- a/model-evaluation/src/main/java/ai/vespa/models/evaluation/RankProfilesConfigImporter.java +++ b/model-evaluation/src/main/java/ai/vespa/models/evaluation/RankProfilesConfigImporter.java @@ -182,7 +182,7 @@ public class RankProfilesConfigImporter { options.setExecutionMode(onnxModelConfig.stateless_execution_mode()); options.setInterOpThreads(onnxModelConfig.stateless_interop_threads()); options.setIntraOpThreads(onnxModelConfig.stateless_intraop_threads()); - options.setGpuDevice(onnxModelConfig.gpu_device()); + options.setGpuDevice(onnxModelConfig.gpu_device(), onnxModelConfig.gpu_device_required()); return new OnnxModel(name, file, options); } catch (InterruptedException e) { throw new IllegalStateException("Gave up waiting for ONNX model " + onnxModelConfig.name()); diff --git a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluator.java b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluator.java index ebed464421b..563ef911f8f 100644 --- a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluator.java +++ b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluator.java @@ -31,7 +31,7 @@ public class OnnxEvaluator { public OnnxEvaluator(String modelPath, OnnxEvaluatorOptions options) { environment = OrtEnvironment.getEnvironment(); - session = createSession(modelPath, environment, options); + session = createSession(modelPath, environment, options, true); } public Tensor evaluate(Map inputs, String output) { @@ -86,18 +86,22 @@ public class OnnxEvaluator { } } - private static OrtSession createSession(String modelPath, OrtEnvironment environment, OnnxEvaluatorOptions options) { + private static OrtSession createSession(String modelPath, OrtEnvironment environment, OnnxEvaluatorOptions options, boolean tryCuda) { if (options == null) { options = new OnnxEvaluatorOptions(); } try { - return environment.createSession(modelPath, options.getOptions()); + return environment.createSession(modelPath, options.getOptions(tryCuda && options.requestingGpu())); } catch (OrtException e) { if (e.getCode() == OrtException.OrtErrorCode.ORT_NO_SUCHFILE) { throw new IllegalArgumentException("No such file: " + modelPath); } + if (tryCuda && isCudaError(e) && !options.gpuDeviceRequired()) { + // Failed in CUDA native code, but GPU device is optional, so we can proceed without it + return createSession(modelPath, environment, options, false); + } if (isCudaError(e)) { - throw new IllegalArgumentException("GPU device " + options.gpuDevice() + " requested, but CUDA initialization failed", e); + throw new IllegalArgumentException("GPU device is requested, but CUDA initialization failed", e); } throw new RuntimeException("ONNX Runtime exception", e); } diff --git a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java index f838a3b3f7f..b6de9698f1a 100644 --- a/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java +++ b/model-integration/src/main/java/ai/vespa/modelintegration/evaluator/OnnxEvaluatorOptions.java @@ -16,7 +16,8 @@ public class OnnxEvaluatorOptions { private OrtSession.SessionOptions.ExecutionMode executionMode; private int interOpThreads; private int intraOpThreads; - private int gpuDevice; + private int gpuDeviceNumber; + private boolean gpuDeviceRequired; public OnnxEvaluatorOptions() { // Defaults: @@ -24,17 +25,18 @@ public class OnnxEvaluatorOptions { executionMode = OrtSession.SessionOptions.ExecutionMode.SEQUENTIAL; interOpThreads = 1; intraOpThreads = Math.max(1, (int) Math.ceil(((double) Runtime.getRuntime().availableProcessors()) / 4)); - gpuDevice = -1; + gpuDeviceNumber = -1; + gpuDeviceRequired = false; } - public OrtSession.SessionOptions getOptions() throws OrtException { + public OrtSession.SessionOptions getOptions(boolean loadCuda) throws OrtException { OrtSession.SessionOptions options = new OrtSession.SessionOptions(); options.setOptimizationLevel(optimizationLevel); options.setExecutionMode(executionMode); options.setInterOpNumThreads(interOpThreads); options.setIntraOpNumThreads(intraOpThreads); - if (gpuDevice > -1) { - options.addCUDA(gpuDevice); + if (loadCuda) { + options.addCUDA(gpuDeviceNumber); } return options; } @@ -59,12 +61,17 @@ public class OnnxEvaluatorOptions { } } - public void setGpuDevice(int deviceNumber) { - this.gpuDevice = deviceNumber; + public void setGpuDevice(int deviceNumber, boolean required) { + this.gpuDeviceNumber = deviceNumber; + this.gpuDeviceRequired = required; } - public int gpuDevice() { - return gpuDevice; + public boolean requestingGpu() { + return gpuDeviceNumber > -1; + } + + public boolean gpuDeviceRequired() { + return gpuDeviceRequired; } } diff --git a/searchcore/src/vespa/searchcore/config/onnx-models.def b/searchcore/src/vespa/searchcore/config/onnx-models.def index 85b061fcd7c..b8f5d319075 100644 --- a/searchcore/src/vespa/searchcore/config/onnx-models.def +++ b/searchcore/src/vespa/searchcore/config/onnx-models.def @@ -12,3 +12,4 @@ model[].stateless_execution_mode string default="" model[].stateless_interop_threads int default=-1 model[].stateless_intraop_threads int default=-1 model[].gpu_device int default=-1 +model[].gpu_device_required bool default=false -- cgit v1.2.3