diff options
Diffstat (limited to 'config-model')
15 files changed, 332 insertions, 261 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/ApplicationBuilder.java b/config-model/src/main/java/com/yahoo/schema/ApplicationBuilder.java index 9184eb3c4be..046b2aa8491 100644 --- a/config-model/src/main/java/com/yahoo/schema/ApplicationBuilder.java +++ b/config-model/src/main/java/com/yahoo/schema/ApplicationBuilder.java @@ -28,6 +28,7 @@ import java.nio.file.Files; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -134,7 +135,7 @@ public class ApplicationBuilder { this.properties = properties; this.documentsOnly = documentsOnly; var list = new ArrayList<>(applicationPackage.getSchemas()); - list.sort((a, b) -> a.getName().compareTo(b.getName())); + list.sort(Comparator.comparing(NamedReader::getName)); for (NamedReader reader : list) { addSchema(reader); } diff --git a/config-model/src/main/java/com/yahoo/schema/OnnxModel.java b/config-model/src/main/java/com/yahoo/schema/OnnxModel.java index 3d96849fa15..ae6f1fd96e4 100644 --- a/config-model/src/main/java/com/yahoo/schema/OnnxModel.java +++ b/config-model/src/main/java/com/yahoo/schema/OnnxModel.java @@ -24,7 +24,7 @@ public class OnnxModel extends DistributableResource { private String statelessExecutionMode = null; private Integer statelessInterOpThreads = null; private Integer statelessIntraOpThreads = null; - private Integer gpuDevice = null; + private GpuDevice gpuDevice = null; public OnnxModel(String name) { super(name); @@ -114,9 +114,9 @@ public class OnnxModel extends DistributableResource { } } - public void setGpuDevice(int deviceNumber) { + public void setGpuDevice(int deviceNumber, boolean required) { if (deviceNumber >= 0) { - this.gpuDevice = deviceNumber; + this.gpuDevice = new GpuDevice(deviceNumber, required); } } @@ -124,8 +124,16 @@ public class OnnxModel extends DistributableResource { return Optional.ofNullable(statelessIntraOpThreads); } - public Optional<Integer> getGpuDevice() { + public Optional<GpuDevice> getGpuDevice() { return Optional.ofNullable(gpuDevice); } + public record GpuDevice(int deviceNumber, boolean required) { + + public GpuDevice { + if (deviceNumber < 0) throw new IllegalArgumentException("deviceNumber cannot be negative, got " + deviceNumber); + } + + } + } diff --git a/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java index f63e872836e..4196af18fb6 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java @@ -52,7 +52,8 @@ public class FileDistributedOnnxModels { if (model.getStatelessIntraOpThreads().isPresent()) modelBuilder.stateless_intraop_threads(model.getStatelessIntraOpThreads().get()); if (model.getGpuDevice().isPresent()) { - modelBuilder.gpu_device(model.getGpuDevice().get()); + modelBuilder.gpu_device(model.getGpuDevice().get().deviceNumber()); + modelBuilder.gpu_device_required(model.getGpuDevice().get().required()); } builder.model(modelBuilder); } diff --git a/config-model/src/main/java/com/yahoo/schema/parser/IntermediateCollection.java b/config-model/src/main/java/com/yahoo/schema/parser/IntermediateCollection.java index 139d20aac82..f0182cfcf3a 100644 --- a/config-model/src/main/java/com/yahoo/schema/parser/IntermediateCollection.java +++ b/config-model/src/main/java/com/yahoo/schema/parser/IntermediateCollection.java @@ -20,7 +20,7 @@ import java.util.Map; * schemas in the intermediate format. * * @author arnej27959 - **/ + */ public class IntermediateCollection { private final DeployLogger deployLogger; @@ -47,8 +47,11 @@ public class IntermediateCollection { var parser = new SchemaParser(stream, deployLogger, modelProperties); try { var schema = parser.schema(); + if (schema == null) { + throw new IllegalArgumentException("No schema content"); + } if (parsedSchemas.containsKey(schema.name())) { - throw new IllegalArgumentException("Duplicate schemas named: " + schema.name()); + throw new IllegalArgumentException("Duplicate schemas named " + schema.name()); } parsedSchemas.put(schema.name(), schema); return schema; @@ -66,7 +69,7 @@ public class IntermediateCollection { throw new IllegalArgumentException("The file containing schema '" + parsed.name() + "' must be named '" + parsed.name() + ApplicationPackage.SD_NAME_SUFFIX - + "', was '" + stripDirs(fileName) + "'"); + + "', but is '" + stripDirs(fileName) + "'"); } return parsed.name(); } @@ -91,54 +94,51 @@ public class IntermediateCollection { return filename; } - /** - * parse a schema from the given reader and add result to collection - **/ - public String addSchemaFromReader(NamedReader reader) throws ParseException { + /** Parses a schema from the given reader and add result to collection. */ + public String addSchemaFromReader(NamedReader reader) { try { var nameParsed = addSchemaFromStringWithFileName(IOUtils.readAll(reader.getReader()), reader.getName()); reader.close(); return nameParsed; - } catch (ParseException ex) { - throw new ParseException("Failed parsing schema from " + reader.getName() + ": " + ex.getMessage()); - } catch (java.io.IOException ex) { - throw new IllegalArgumentException("Failed reading from " + reader.getName() + ": " + ex.getMessage()); + } catch (ParseException e) { + throw new IllegalArgumentException("Failed parsing schema from '" + reader.getName() + "'", e); + } catch (java.io.IOException e) { + throw new IllegalArgumentException("Failed reading from '" + reader.getName() + "'", e); } } - /** for unit tests */ - public String addSchemaFromFile(String fileName) throws ParseException { + /** For unit tests */ + public String addSchemaFromFile(String fileName) { try { - // return addSchemaFromStringWithFileName(IOUtils.readFile(new File(fileName)), fileName); var parsed = addSchemaFromString(IOUtils.readFile(new File(fileName))); return parsed.name(); - } catch (ParseException ex) { - throw new ParseException("Failed parsing schema from " + fileName + ": " + ex.getMessage()); - } catch (java.io.IOException ex) { - throw new IllegalArgumentException("Could not read file " + fileName + ": " + ex.getMessage()); + } catch (ParseException e) { + throw new IllegalArgumentException("Failed parsing schema from '" + fileName + "'", e); + } catch (java.io.IOException e) { + throw new IllegalArgumentException("Failed reading from '" + fileName + "'", e); } } /** - * parse a rank profile from the given reader and add to the schema identified by name. + * Parses a rank profile from the given reader and add to the schema identified by name. * note: the named schema must have been parsed already. - **/ + */ public void addRankProfileFile(String schemaName, NamedReader reader) throws ParseException { try { ParsedSchema schema = parsedSchemas.get(schemaName); if (schema == null) { - throw new IllegalArgumentException("No schema named: " + schemaName); + throw new IllegalArgumentException("No schema named '" + schemaName + "'"); } var stream = new SimpleCharStream(IOUtils.readAll(reader.getReader())); var parser = new SchemaParser(stream, deployLogger, modelProperties); try { parser.rankProfile(schema); } catch (ParseException pe) { - throw new ParseException("Failed parsing rank-profile from " + reader.getName() + ": " + + throw new ParseException("Failed parsing rank-profile from '" + reader.getName() + "': " + stream.formatException(Exceptions.toMessageString(pe))); } } catch (java.io.IOException ex) { - throw new IllegalArgumentException("Failed reading from " + reader.getName() + ": " + ex.getMessage()); + throw new IllegalArgumentException("Failed reading from '" + reader.getName() + "': " + ex.getMessage()); } } @@ -147,8 +147,8 @@ public class IntermediateCollection { try { var reader = IOUtils.createReader(fileName, "UTF-8"); addRankProfileFile(schemaName, new NamedReader(fileName, reader)); - } catch (java.io.IOException ex) { - throw new IllegalArgumentException("Could not read file " + fileName + ": " + ex.getMessage()); + } catch (java.io.IOException e) { + throw new IllegalArgumentException("Could not read file '" + fileName + "'", e); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/AutoscalingMetrics.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/AutoscalingMetrics.java index d09a4a303c2..a3fdce98c73 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/AutoscalingMetrics.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/AutoscalingMetrics.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.model.admin.monitoring; import com.yahoo.metrics.ContainerMetrics; import com.yahoo.metrics.SearchNodeMetrics; +import com.yahoo.metrics.StorageMetrics; import java.util.ArrayList; import java.util.LinkedHashSet; import java.util.List; @@ -39,10 +40,10 @@ public class AutoscalingMetrics { metrics.add(SearchNodeMetrics.CONTENT_PROTON_DOCUMENTDB_MATCHING_QUERIES.rate()); // content // Write rate - metrics.add("feed.http-requests.rate"); // container - metrics.add("vds.filestor.allthreads.put.count.rate"); // content - metrics.add("vds.filestor.allthreads.remove.count.rate"); // content - metrics.add("vds.filestor.allthreads.update.count.rate"); // content + metrics.add(ContainerMetrics.FEED_HTTP_REQUESTS.rate()); // container + metrics.add(StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_COUNT.rate()); // content + metrics.add(StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_COUNT.rate()); // content + metrics.add(StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_COUNT.rate()); // content return new MetricSet("autoscaling", toMetrics(metrics)); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java index 7f52dce6356..0958a3f3908 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/SystemMetrics.java @@ -24,6 +24,9 @@ public class SystemMetrics { public static final String MEM_TOTAL_USED = "mem_total.used"; public static final String MEM_TOTAL_UTIL = "mem_total.util"; public static final String BANDWIDTH_LIMIT = "bandwidth.limit"; + public static final String GPU_UTIL = "gpu.util"; + public static final String GPU_MEM_USED = "gpu.memory.used"; + public static final String GPU_MEM_TOTAL = "gpu.memory.total"; public static final MetricSet systemMetricSet = createSystemMetricSet(); @@ -42,7 +45,10 @@ public class SystemMetrics { new Metric(MEM_UTIL), new Metric(MEM_TOTAL_USED), new Metric(MEM_TOTAL_UTIL), - new Metric(BANDWIDTH_LIMIT) + new Metric(BANDWIDTH_LIMIT), + new Metric(GPU_UTIL), + new Metric(GPU_MEM_USED), + new Metric(GPU_MEM_TOTAL) ); Set<Metric> nonDockerNodeMetrics = @@ -61,4 +67,5 @@ public class SystemMetrics { return new MetricSet("system", systemMetrics); } + } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java index cf199ef95a8..e1bd114c4d3 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java @@ -2,7 +2,9 @@ package com.yahoo.vespa.model.admin.monitoring; import com.yahoo.metrics.ContainerMetrics; +import com.yahoo.metrics.DistributorMetrics; import com.yahoo.metrics.SearchNodeMetrics; +import com.yahoo.metrics.StorageMetrics; import com.yahoo.metrics.Suffix; import java.util.Collections; @@ -538,164 +540,166 @@ public class VespaMetricSet { private static Set<Metric> getStorageMetrics() { Set<Metric> metrics = new LinkedHashSet<>(); - // TODO: For the purpose of this file and likely elsewhere, all but the last aggregate specifier, - // TODO: such as 'average' and 'sum' in the metric names below are just confusing and can be mentally - // TODO: disregarded when considering metric names. Consider cleaning up for Vespa 9. - addMetric(metrics, "vds.datastored.alldisks.buckets.average"); - addMetric(metrics, "vds.datastored.alldisks.docs.average"); - addMetric(metrics, "vds.datastored.alldisks.bytes.average"); - addMetric(metrics, "vds.visitor.allthreads.averagevisitorlifetime", List.of("max", "sum", "count")); - addMetric(metrics, "vds.visitor.allthreads.averagequeuewait", List.of("max", "sum", "count")); - addMetric(metrics, "vds.visitor.allthreads.queuesize", List.of("max", "sum", "count")); - addMetric(metrics, "vds.visitor.allthreads.completed.rate"); - addMetric(metrics, "vds.visitor.allthreads.created.rate"); - addMetric(metrics, "vds.visitor.allthreads.failed.rate"); - addMetric(metrics, "vds.visitor.allthreads.averagemessagesendtime", List.of("max", "sum", "count")); - addMetric(metrics, "vds.visitor.allthreads.averageprocessingtime", List.of("max", "sum", "count")); - - addMetric(metrics, "vds.filestor.queuesize", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.averagequeuewait", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.active_operations.size", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.active_operations.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.throttle_window_size", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.throttle_waiting_threads", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.throttle_active_tokens", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.mergemetadatareadlatency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.mergedatareadlatency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.mergedatawritelatency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.put_latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.remove_latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allstripes.throttled_rpc_direct_dispatches.rate"); - addMetric(metrics, "vds.filestor.allstripes.throttled_persistence_thread_polls.rate"); - addMetric(metrics, "vds.filestor.allstripes.timeouts_waiting_for_throttle_token.rate"); - - addMetric(metrics, "vds.filestor.allthreads.put.count.rate"); - addMetric(metrics, "vds.filestor.allthreads.put.failed.rate"); - addMetric(metrics, "vds.filestor.allthreads.put.test_and_set_failed.rate"); - addMetric(metrics, "vds.filestor.allthreads.put.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.put.request_size", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.remove.count.rate"); - addMetric(metrics, "vds.filestor.allthreads.remove.failed.rate"); - addMetric(metrics, "vds.filestor.allthreads.remove.test_and_set_failed.rate"); - addMetric(metrics, "vds.filestor.allthreads.remove.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.remove.request_size", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.get.count.rate"); - addMetric(metrics, "vds.filestor.allthreads.get.failed.rate"); - addMetric(metrics, "vds.filestor.allthreads.get.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.get.request_size", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.update.count.rate"); - addMetric(metrics, "vds.filestor.allthreads.update.failed.rate"); - addMetric(metrics, "vds.filestor.allthreads.update.test_and_set_failed.rate"); - addMetric(metrics, "vds.filestor.allthreads.update.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.update.request_size", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.createiterator.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.createiterator.count.rate"); - addMetric(metrics, "vds.filestor.allthreads.visit.count.rate"); - addMetric(metrics, "vds.filestor.allthreads.visit.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.remove_location.count.rate"); - addMetric(metrics, "vds.filestor.allthreads.remove_location.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.splitbuckets.count.rate"); - addMetric(metrics, "vds.filestor.allthreads.joinbuckets.count.rate"); - addMetric(metrics, "vds.filestor.allthreads.deletebuckets.count.rate"); - addMetric(metrics, "vds.filestor.allthreads.deletebuckets.failed.rate"); - addMetric(metrics, "vds.filestor.allthreads.deletebuckets.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.filestor.allthreads.setbucketstates.count.rate"); + // TODO - Vespa 9: For the purpose of this file and likely elsewhere, all but the last aggregate specifier, + // TODO - Vespa 9: such as 'average' and 'sum' in the metric names below are just confusing and can be mentally + // TODO - Vespa 9: disregarded when considering metric names. Clean up for Vespa 9. + addMetric(metrics, StorageMetrics.VDS_DATASTORED_ALLDISKS_BUCKETS.average()); + addMetric(metrics, StorageMetrics.VDS_DATASTORED_ALLDISKS_DOCS.average()); + addMetric(metrics, StorageMetrics.VDS_DATASTORED_ALLDISKS_BYTES.average()); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_AVERAGEVISITORLIFETIME, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_AVERAGEQUEUEWAIT, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_COMPLETED.rate()); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_CREATED.rate()); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_AVERAGEMESSAGESENDTIME, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_VISITOR_ALLTHREADS_AVERAGEPROCESSINGTIME, EnumSet.of(max, sum, count)); + + addMetric(metrics, StorageMetrics.VDS_FILESTOR_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_AVERAGEQUEUEWAIT, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ACTIVE_OPERATIONS_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ACTIVE_OPERATIONS_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_THROTTLE_WINDOW_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_THROTTLE_WAITING_THREADS, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_THROTTLE_ACTIVE_TOKENS, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_MERGEMETADATAREADLATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_MERGEDATAREADLATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_MERGEDATAWRITELATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_MERGE_PUT_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_MERGE_REMOVE_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLSTRIPES_THROTTLED_RPC_DIRECT_DISPATCHES.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLSTRIPES_THROTTLED_PERSISTENCE_THREAD_POLLS.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLSTRIPES_TIMEOUTS_WAITING_FOR_THROTTLE_TOKEN.rate()); + + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_PUT_REQUEST_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_REQUEST_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_GET_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_GET_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_GET_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_GET_REQUEST_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_UPDATE_REQUEST_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_CREATEITERATOR_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_CREATEITERATOR_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_VISIT_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_VISIT_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_LOCATION_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_REMOVE_LOCATION_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_SPLITBUCKETS_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_JOINBUCKETS_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_COUNT.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_FAILED.rate()); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_DELETEBUCKETS_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_FILESTOR_ALLTHREADS_SETBUCKETSTATES_COUNT.rate()); + + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_AVERAGEQUEUEWAITINGTIME, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_QUEUESIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_ACTIVE_WINDOW_SIZE, EnumSet.of(max, sum, count)); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_BOUNCED_DUE_TO_BACK_PRESSURE.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_LOCALLYEXECUTEDMERGES_OK.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_MERGECHAINS_OK.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_BUSY.rate()); + addMetric(metrics, StorageMetrics.VDS_MERGETHROTTLER_MERGECHAINS_FAILURES_TOTAL.rate()); + return metrics; } private static Set<Metric> getDistributorMetrics() { Set<Metric> metrics = new LinkedHashSet<>(); - addMetric(metrics, "vds.idealstate.buckets_rechecking.average"); - addMetric(metrics, "vds.idealstate.idealstate_diff.average"); - addMetric(metrics, "vds.idealstate.buckets_toofewcopies.average"); - addMetric(metrics, "vds.idealstate.buckets_toomanycopies.average"); - addMetric(metrics, "vds.idealstate.buckets.average"); - addMetric(metrics, "vds.idealstate.buckets_notrusted.average"); - addMetric(metrics, "vds.idealstate.bucket_replicas_moving_out.average"); - addMetric(metrics, "vds.idealstate.bucket_replicas_copying_out.average"); - addMetric(metrics, "vds.idealstate.bucket_replicas_copying_in.average"); - addMetric(metrics, "vds.idealstate.bucket_replicas_syncing.average"); - addMetric(metrics, "vds.idealstate.max_observed_time_since_last_gc_sec.average"); - addMetric(metrics, "vds.idealstate.delete_bucket.done_ok.rate"); - addMetric(metrics, "vds.idealstate.delete_bucket.done_failed.rate"); - addMetric(metrics, "vds.idealstate.delete_bucket.pending.average"); - addMetric(metrics, "vds.idealstate.merge_bucket.done_ok.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.done_failed.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.pending.average"); - addMetric(metrics, "vds.idealstate.merge_bucket.blocked.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.throttled.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.source_only_copy_changed.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.source_only_copy_delete_blocked.rate"); - addMetric(metrics, "vds.idealstate.merge_bucket.source_only_copy_delete_failed.rate"); - addMetric(metrics, "vds.idealstate.split_bucket.done_ok.rate"); - addMetric(metrics, "vds.idealstate.split_bucket.done_failed.rate"); - addMetric(metrics, "vds.idealstate.split_bucket.pending.average"); - addMetric(metrics, "vds.idealstate.join_bucket.done_ok.rate"); - addMetric(metrics, "vds.idealstate.join_bucket.done_failed.rate"); - addMetric(metrics, "vds.idealstate.join_bucket.pending.average"); - addMetric(metrics, "vds.idealstate.garbage_collection.done_ok.rate"); - addMetric(metrics, "vds.idealstate.garbage_collection.done_failed.rate"); - addMetric(metrics, "vds.idealstate.garbage_collection.pending.average"); - addMetric(metrics, "vds.idealstate.garbage_collection.documents_removed", List.of("count", "rate")); - - addMetric(metrics, "vds.distributor.puts.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.distributor.puts.ok.rate"); - addMetric(metrics, "vds.distributor.puts.failures.total.rate"); - addMetric(metrics, "vds.distributor.puts.failures.notfound.rate"); - addMetric(metrics, "vds.distributor.puts.failures.test_and_set_failed.rate"); - addMetric(metrics, "vds.distributor.puts.failures.concurrent_mutations.rate"); - addMetric(metrics, "vds.distributor.puts.failures.notconnected.rate"); - addMetric(metrics, "vds.distributor.puts.failures.notready.rate"); - addMetric(metrics, "vds.distributor.puts.failures.wrongdistributor.rate"); - addMetric(metrics, "vds.distributor.puts.failures.safe_time_not_reached.rate"); - addMetric(metrics, "vds.distributor.puts.failures.storagefailure.rate"); - addMetric(metrics, "vds.distributor.puts.failures.timeout.rate"); - addMetric(metrics, "vds.distributor.puts.failures.busy.rate"); - addMetric(metrics, "vds.distributor.puts.failures.inconsistent_bucket.rate"); - addMetric(metrics, "vds.distributor.removes.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.distributor.removes.ok.rate"); - addMetric(metrics, "vds.distributor.removes.failures.total.rate"); - addMetric(metrics, "vds.distributor.removes.failures.notfound.rate"); - addMetric(metrics, "vds.distributor.removes.failures.test_and_set_failed.rate"); - addMetric(metrics, "vds.distributor.removes.failures.concurrent_mutations.rate"); - addMetric(metrics, "vds.distributor.updates.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.distributor.updates.ok.rate"); - addMetric(metrics, "vds.distributor.updates.failures.total.rate"); - addMetric(metrics, "vds.distributor.updates.failures.notfound.rate"); - addMetric(metrics, "vds.distributor.updates.failures.test_and_set_failed.rate"); - addMetric(metrics, "vds.distributor.updates.failures.concurrent_mutations.rate"); - addMetric(metrics, "vds.distributor.updates.diverging_timestamp_updates.rate"); - addMetric(metrics, "vds.distributor.removelocations.ok.rate"); - addMetric(metrics, "vds.distributor.removelocations.failures.total.rate"); - addMetric(metrics, "vds.distributor.gets.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.distributor.gets.ok.rate"); - addMetric(metrics, "vds.distributor.gets.failures.total.rate"); - addMetric(metrics, "vds.distributor.gets.failures.notfound.rate"); - addMetric(metrics, "vds.distributor.visitor.latency", List.of("max", "sum", "count")); - addMetric(metrics, "vds.distributor.visitor.ok.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.total.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.notready.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.notconnected.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.wrongdistributor.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.safe_time_not_reached.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.storagefailure.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.timeout.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.busy.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.inconsistent_bucket.rate"); - addMetric(metrics, "vds.distributor.visitor.failures.notfound.rate"); - - addMetric(metrics, "vds.distributor.docsstored.average"); - addMetric(metrics, "vds.distributor.bytesstored.average"); - - addMetric(metrics, "vds.bouncer.clock_skew_aborts.count"); - - addMetric(metrics, "vds.mergethrottler.averagequeuewaitingtime", List.of("max", "sum", "count")); - addMetric(metrics, "vds.mergethrottler.queuesize", List.of("max", "sum", "count")); - addMetric(metrics, "vds.mergethrottler.active_window_size", List.of("max", "sum", "count")); - addMetric(metrics, "vds.mergethrottler.bounced_due_to_back_pressure.rate"); - addMetric(metrics, "vds.mergethrottler.locallyexecutedmerges.ok.rate"); - addMetric(metrics, "vds.mergethrottler.mergechains.ok.rate"); - addMetric(metrics, "vds.mergethrottler.mergechains.failures.busy.rate"); - addMetric(metrics, "vds.mergethrottler.mergechains.failures.total.rate"); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_RECHECKING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_IDEALSTATE_DIFF.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_TOOFEWCOPIES.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_TOOMANYCOPIES.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKETS_NOTRUSTED.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKET_REPLICAS_MOVING_OUT.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKET_REPLICAS_COPYING_OUT.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKET_REPLICAS_COPYING_IN.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_BUCKET_REPLICAS_SYNCING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MAX_OBSERVED_TIME_SINCE_LAST_GC_SEC.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_DELETE_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_DELETE_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_DELETE_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_BLOCKED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_THROTTLED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_SOURCE_ONLY_COPY_CHANGED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_SOURCE_ONLY_COPY_DELETE_BLOCKED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_SOURCE_ONLY_COPY_DELETE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_SPLIT_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_SPLIT_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_SPLIT_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_JOIN_BUCKET_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_JOIN_BUCKET_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_JOIN_BUCKET_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_DONE_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_DONE_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_PENDING.average()); + addMetric(metrics, DistributorMetrics.VDS_IDEALSTATE_GARBAGE_COLLECTION_DOCUMENTS_REMOVED, EnumSet.of(count, rate)); + + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_CONCURRENT_MUTATIONS.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_NOTCONNECTED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_NOTREADY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_WRONGDISTRIBUTOR.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_SAFE_TIME_NOT_REACHED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_STORAGEFAILURE.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_TIMEOUT.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_BUSY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_PUTS_FAILURES_INCONSISTENT_BUCKET.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVES_FAILURES_CONCURRENT_MUTATIONS.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_TEST_AND_SET_FAILED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_FAILURES_CONCURRENT_MUTATIONS.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_UPDATES_DIVERGING_TIMESTAMP_UPDATES.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVELOCATIONS_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_REMOVELOCATIONS_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_GETS_FAILURES_NOTFOUND.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_OK.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_TOTAL.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_NOTREADY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_NOTCONNECTED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_WRONGDISTRIBUTOR.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_SAFE_TIME_NOT_REACHED.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_STORAGEFAILURE.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_TIMEOUT.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_BUSY.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_INCONSISTENT_BUCKET.rate()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_VISITOR_FAILURES_NOTFOUND.rate()); + + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_DOCSSTORED.average()); + addMetric(metrics, DistributorMetrics.VDS_DISTRIBUTOR_BYTESSTORED.average()); + + addMetric(metrics, DistributorMetrics.VDS_BOUNCER_CLOCK_SKEW_ABORTS.count()); + return metrics; } @@ -711,6 +715,13 @@ public class VespaMetricSet { suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); } + private static void addMetric(Set<Metric> metrics, StorageMetrics metric, EnumSet<Suffix> suffixes) { + suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); + } + + private static void addMetric(Set<Metric> metrics, DistributorMetrics metric, EnumSet<Suffix> suffixes) { + suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); + } private static void addMetric(Set<Metric> metrics, String metricName, Iterable<String> aggregateSuffices) { for (String suffix : aggregateSuffices) { metrics.add(new Metric(metricName + "." + suffix)); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java b/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java index 1b5ec9e03be..0e4726c8cd5 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java @@ -299,7 +299,7 @@ public abstract class Container extends AbstractService implements } public Optional<String> getStartupCommand() { - return Optional.of("PRELOAD=" + getPreLoad() + " exec ${VESPA_HOME}/libexec/vespa/script-utils vespa-start-container-daemon " + getJvmOptions() + " "); + return Optional.of("PRELOAD=" + getPreLoad() + " exec ${VESPA_HOME}/libexec/vespa/vespa-wrapper vespa-start-container-daemon " + getJvmOptions() + " "); } @Override diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 700393e84f3..59c0b668057 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -207,9 +207,6 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { addConfiguredComponents(deployState, cluster, spec); addSecretStore(cluster, spec, deployState); - addModelEvaluation(spec, cluster, context); - addModelEvaluationBundles(cluster); - addProcessing(deployState, spec, cluster, context); addSearch(deployState, spec, cluster, context); addDocproc(deployState, spec, cluster); @@ -225,6 +222,9 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { addAccessLogs(deployState, cluster, spec); addNodes(cluster, spec, context); + addModelEvaluation(spec, cluster, context); // NOTE: Must be done after addNodes + addModelEvaluationBundles(cluster); + addServerProviders(deployState, spec, cluster); if (!standaloneBuilder) cluster.addAllPlatformBundles(); @@ -685,7 +685,13 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { onnxModel.setStatelessExecutionMode(getStringValue(modelElement, "execution-mode", null)); onnxModel.setStatelessInterOpThreads(getIntValue(modelElement, "interop-threads", -1)); onnxModel.setStatelessIntraOpThreads(getIntValue(modelElement, "intraop-threads", -1)); - onnxModel.setGpuDevice(getIntValue(modelElement, "gpu-device", -1)); + Element gpuDeviceElement = XML.getChild(modelElement, "gpu-device"); + if (gpuDeviceElement != null) { + int gpuDevice = Integer.parseInt(gpuDeviceElement.getTextContent()); + boolean hasGpu = cluster.getContainers().stream().anyMatch(container -> container.getHostResource() != null && + !container.getHostResource().realResources().gpuResources().isZero()); + onnxModel.setGpuDevice(gpuDevice, hasGpu); + } } cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles)); diff --git a/config-model/src/test/cfg/application/onnx/services.xml b/config-model/src/test/cfg/application/onnx/services.xml index 088bbcc4921..8c60be77ff5 100644 --- a/config-model/src/test/cfg/application/onnx/services.xml +++ b/config-model/src/test/cfg/application/onnx/services.xml @@ -8,17 +8,19 @@ <models> <model name="mul"> <intraop-threads>2</intraop-threads> + <gpu-device>0</gpu-device> </model> <model name="non-existent-model"> <interop-threads>400</interop-threads> <execution-mode>parallel</execution-mode> - <gpu-device>0</gpu-device> </model> </models> </onnx> </model-evaluation> - <nodes> - <node hostalias="node1" /> + <nodes count="2"> + <resources vcpu="4" memory="16Gb" disk="125Gb"> + <gpu count="1" memory="16Gb"/> + </resources> </nodes> </container> @@ -27,9 +29,6 @@ <documents> <document mode="index" type="test"/> </documents> - <nodes> - <node distribution-key="0" hostalias="node1" /> - </nodes> </content> </services> diff --git a/config-model/src/test/java/com/yahoo/schema/IndexingParsingTestCase.java b/config-model/src/test/java/com/yahoo/schema/IndexingParsingTestCase.java index fcbea179810..f06b1a73e8e 100644 --- a/config-model/src/test/java/com/yahoo/schema/IndexingParsingTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/IndexingParsingTestCase.java @@ -2,9 +2,13 @@ package com.yahoo.schema; import com.yahoo.schema.parser.ParseException; +import com.yahoo.yolean.Exceptions; import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; /** * Tests that indexing statements are parsed correctly. @@ -22,10 +26,9 @@ public class IndexingParsingTestCase extends AbstractSchemaTestCase { void requireThatParseExceptionPositionIsCorrect() throws Exception { try { ApplicationBuilder.buildFromFile("src/test/examples/indexing_invalid_expression.sd"); - } catch (ParseException e) { - if (!e.getMessage().contains("at line 5, column 57.")) { - throw e; - } + fail(); + } catch (IllegalArgumentException e) { + assertTrue(Exceptions.toMessageString(e).contains("at line 5, column 57.")); } } diff --git a/config-model/src/test/java/com/yahoo/schema/SchemaParsingTestCase.java b/config-model/src/test/java/com/yahoo/schema/SchemaParsingTestCase.java index c8657df23ce..04c78773831 100644 --- a/config-model/src/test/java/com/yahoo/schema/SchemaParsingTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/SchemaParsingTestCase.java @@ -5,9 +5,11 @@ import java.io.IOException; import com.yahoo.schema.parser.ParseException; +import com.yahoo.yolean.Exceptions; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; /** @@ -27,10 +29,8 @@ public class SchemaParsingTestCase extends AbstractSchemaTestCase { void requireThatParseExceptionPositionIsCorrect() throws Exception { try { ApplicationBuilder.buildFromFile("src/test/examples/invalid_sd_construct.sd"); - } catch (ParseException e) { - if (!e.getMessage().contains("at line 5, column 36.")) { - throw e; - } + } catch (IllegalArgumentException e) { + assertTrue(Exceptions.toMessageString(e).contains("at line 5, column 36.")); } } @@ -38,10 +38,8 @@ public class SchemaParsingTestCase extends AbstractSchemaTestCase { void requireThatParserHandlesLexicalError() throws Exception { try { ApplicationBuilder.buildFromFile("src/test/examples/invalid_sd_lexical_error.sd"); - } catch (ParseException e) { - if (!e.getMessage().contains("at line 7, column 27.")) { - throw e; - } + } catch (IllegalArgumentException e) { + assertTrue(Exceptions.toMessageString(e).contains("at line 7, column 27.")); } } @@ -50,10 +48,8 @@ public class SchemaParsingTestCase extends AbstractSchemaTestCase { try { ApplicationBuilder.buildFromFile("src/test/examples/invalid_sd_junk_at_end.sd"); fail("Illegal junk at end of SD passed"); - } catch (ParseException e) { - if (!e.getMessage().contains("at line 10, column 1")) { - throw e; - } + } catch (IllegalArgumentException e) { + assertTrue(Exceptions.toMessageString(e).contains("at line 10, column 1")); } } @@ -62,10 +58,8 @@ public class SchemaParsingTestCase extends AbstractSchemaTestCase { try { ApplicationBuilder.buildFromFile("src/test/examples/invalid_sd_no_closing_bracket.sd"); fail("SD without closing bracket passed"); - } catch (ParseException e) { - if (!e.getMessage().contains("Encountered \"<EOF>\" at line 8, column 1")) { - throw e; - } + } catch (IllegalArgumentException e) { + assertTrue(Exceptions.toMessageString(e).contains("Encountered \"<EOF>\" at line 8, column 1")); } } @@ -74,10 +68,8 @@ public class SchemaParsingTestCase extends AbstractSchemaTestCase { try { ApplicationBuilder.buildFromFile("src/test/examples/invalid-name.sd"); fail("Name with dash passed"); - } catch (ParseException e) { - if (!e.getMessage().contains("invalid-name")) { - throw e; - } + } catch (IllegalArgumentException e) { + assertTrue(Exceptions.toMessageString(e).contains("invalid-name")); } } diff --git a/config-model/src/test/java/com/yahoo/schema/parser/IntermediateCollectionTestCase.java b/config-model/src/test/java/com/yahoo/schema/parser/IntermediateCollectionTestCase.java index 6ebfea41d84..72af294d384 100644 --- a/config-model/src/test/java/com/yahoo/schema/parser/IntermediateCollectionTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/parser/IntermediateCollectionTestCase.java @@ -10,6 +10,7 @@ import java.io.File; import java.io.FileReader; import java.util.List; +import com.yahoo.yolean.Exceptions; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.*; @@ -131,24 +132,24 @@ public class IntermediateCollectionTestCase { var collection = new IntermediateCollection(); var ex = assertThrows(IllegalArgumentException.class, () -> collection.addSchemaFromReader(readerOf("src/test/cfg/application/sdfilenametest/schemas/notmusic.sd"))); - assertEquals("The file containing schema 'music' must be named 'music.sd', was 'notmusic.sd'", + assertEquals("The file containing schema 'music' must be named 'music.sd', but is 'notmusic.sd'", ex.getMessage()); } @Test void bad_parse_throws() throws Exception { var collection = new IntermediateCollection(); - var ex = assertThrows(ParseException.class, () -> + var ex1 = assertThrows(IllegalArgumentException.class, () -> collection.addSchemaFromFile("src/test/examples/badparse.sd")); - assertTrue(ex.getMessage().startsWith("Failed parsing schema from src/test/examples/badparse.sd: Encountered")); - ex = assertThrows(ParseException.class, () -> + assertTrue(Exceptions.toMessageString(ex1).startsWith("Failed parsing schema from 'src/test/examples/badparse.sd': Encountered"), ex1.getMessage()); + var ex2 = assertThrows(IllegalArgumentException.class, () -> collection.addSchemaFromReader(readerOf("src/test/examples/badparse.sd"))); - assertTrue(ex.getMessage().startsWith("Failed parsing schema from src/test/examples/badparse.sd: Encountered")); + assertTrue(Exceptions.toMessageString(ex2).startsWith("Failed parsing schema from 'src/test/examples/badparse.sd': Encountered"), ex2.getMessage()); collection.addSchemaFromFile("src/test/derived/rankprofilemodularity/test.sd"); collection.addRankProfileFile("test", "src/test/derived/rankprofilemodularity/test/outside_schema1.profile"); - ex = assertThrows(ParseException.class, () -> + var ex3 = assertThrows(ParseException.class, () -> collection.addRankProfileFile("test", "src/test/examples/badparse.sd")); - assertTrue(ex.getMessage().startsWith("Failed parsing rank-profile from src/test/examples/badparse.sd: Encountered")); + assertTrue(Exceptions.toMessageString(ex3).startsWith("Failed parsing rank-profile from 'src/test/examples/badparse.sd': Encountered"), ex3.getMessage()); } @Test diff --git a/config-model/src/test/java/com/yahoo/vespa/model/ml/ImportedModelTester.java b/config-model/src/test/java/com/yahoo/vespa/model/ml/ImportedModelTester.java index 598b6b103bf..97c222e75d3 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/ml/ImportedModelTester.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/ml/ImportedModelTester.java @@ -44,12 +44,15 @@ public class ImportedModelTester { private final DeployState deployState; public ImportedModelTester(String modelName, Path applicationDir) { + this(modelName, applicationDir, new DeployState.Builder()); + } + + public ImportedModelTester(String modelName, Path applicationDir, DeployState.Builder deployStateBuilder) { this.modelName = modelName; this.applicationDir = applicationDir; - deployState = new DeployState.Builder() - .applicationPackage(ApplicationPackageTester.create(applicationDir.toString()).app()) - .modelImporters(importers) - .build(); + deployState = deployStateBuilder.applicationPackage(ApplicationPackageTester.create(applicationDir.toString()).app()) + .modelImporters(importers) + .build(); } public VespaModel createVespaModel() { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/ml/StatelessOnnxEvaluationTest.java b/config-model/src/test/java/com/yahoo/vespa/model/ml/StatelessOnnxEvaluationTest.java index b1e28649e9f..a731e9c7ccc 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/ml/StatelessOnnxEvaluationTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/ml/StatelessOnnxEvaluationTest.java @@ -7,6 +7,10 @@ import ai.vespa.models.evaluation.Model; import ai.vespa.models.evaluation.ModelsEvaluator; import com.yahoo.component.ComponentId; import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.config.model.provision.InMemoryProvisioner; +import com.yahoo.config.provision.NodeResources; import com.yahoo.filedistribution.fileacquirer.FileAcquirer; import com.yahoo.filedistribution.fileacquirer.MockFileAcquirer; import com.yahoo.io.IOUtils; @@ -21,13 +25,14 @@ import com.yahoo.vespa.model.container.ApplicationContainerCluster; import org.junit.jupiter.api.Test; import java.io.File; -import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assumptions.assumeTrue; /** @@ -60,23 +65,23 @@ public class StatelessOnnxEvaluationTest { } @Test - void testStatelessOnnxModelEvaluation() throws IOException { + void testStatelessOnnxModelEvaluation() throws Exception { assumeTrue(OnnxEvaluator.isRuntimeAvailable()); Path appDir = Path.fromString("src/test/cfg/application/onnx"); Path storedAppDir = appDir.append("copy"); try { - ImportedModelTester tester = new ImportedModelTester("onnx_rt", appDir); - assertModelEvaluation(tester.createVespaModel(), appDir); + ImportedModelTester tester = new ImportedModelTester("onnx_rt", appDir, new DeployState.Builder()); + assertModelEvaluation(tester.createVespaModel(), appDir, false); // At this point the expression is stored - copy application to another location which does not have a models dir storedAppDir.toFile().mkdirs(); IOUtils.copy(appDir.append("services.xml").toString(), storedAppDir.append("services.xml").toString()); IOUtils.copyDirectory(appDir.append(ApplicationPackage.MODELS_GENERATED_DIR).toFile(), - storedAppDir.append(ApplicationPackage.MODELS_GENERATED_DIR).toFile()); + storedAppDir.append(ApplicationPackage.MODELS_GENERATED_DIR).toFile()); IOUtils.copyDirectory(appDir.append(ApplicationPackage.SCHEMAS_DIR).toFile(), - storedAppDir.append(ApplicationPackage.SCHEMAS_DIR).toFile()); + storedAppDir.append(ApplicationPackage.SCHEMAS_DIR).toFile()); ImportedModelTester storedTester = new ImportedModelTester("onnx_rt", storedAppDir); - assertModelEvaluation(storedTester.createVespaModel(), appDir); + assertModelEvaluation(storedTester.createVespaModel(), appDir, false); } finally { IOUtils.recursiveDeleteDir(appDir.append(ApplicationPackage.MODELS_GENERATED_DIR).toFile()); @@ -84,7 +89,26 @@ public class StatelessOnnxEvaluationTest { } } - private void assertModelEvaluation(VespaModel model, Path appDir) { + @Test + void testStatelessOnnxModelEvaluationWithGpu() { + assumeTrue(OnnxEvaluator.isRuntimeAvailable()); + NodeResources resources = new NodeResources(4, 16, 125, 10, + NodeResources.DiskSpeed.fast, NodeResources.StorageType.local, + NodeResources.Architecture.x86_64, + new NodeResources.GpuResources(1, 16)); + InMemoryProvisioner provisioner = new InMemoryProvisioner(6, resources, false); + DeployState.Builder deployStateBuilder = new DeployState.Builder().modelHostProvisioner(provisioner) + .properties(new TestProperties().setMultitenant(true).setHostedVespa(true)); + Path appDir = Path.fromString("src/test/cfg/application/onnx"); + try { + ImportedModelTester tester = new ImportedModelTester("onnx_rt", appDir, deployStateBuilder); + assertModelEvaluation(tester.createVespaModel(), appDir, true); + } finally { + IOUtils.recursiveDeleteDir(appDir.append(ApplicationPackage.MODELS_GENERATED_DIR).toFile()); + } + } + + private void assertModelEvaluation(VespaModel model, Path appDir, boolean shouldRequireGpu) { ApplicationContainerCluster cluster = model.getContainerClusters().get("container"); assertNotNull(cluster.getComponentsMap().get(new ComponentId(ModelsEvaluator.class.getName()))); @@ -108,28 +132,42 @@ public class StatelessOnnxEvaluationTest { Set<String> modelNames = config.rankprofile().stream().map(v -> v.name()).collect(Collectors.toSet()); assertTrue(modelNames.contains("mul")); + OnnxModelsConfig.Model mulModel = onnxModelsConfig.model().get(0); + assertEquals(2, mulModel.stateless_intraop_threads()); + assertEquals(-1, mulModel.stateless_interop_threads()); + assertEquals("", mulModel.stateless_execution_mode()); + assertEquals(shouldRequireGpu, mulModel.gpu_device_required()); + assertEquals(0, mulModel.gpu_device()); + // This is actually how ModelsEvaluator is injected Map<String, File> fileMap = new HashMap<>(); for (OnnxModelsConfig.Model onnxModel : onnxModelsConfig.model()) { fileMap.put(onnxModel.fileref().value(), appDir.append(onnxModel.fileref().value()).toFile()); } FileAcquirer fileAcquirer = MockFileAcquirer.returnFiles(fileMap); - ModelsEvaluator modelsEvaluator = new ModelsEvaluator(config, constantsConfig, expressionsConfig, onnxModelsConfig, fileAcquirer); - assertEquals(1, modelsEvaluator.models().size()); - - Model mul = modelsEvaluator.models().get("mul"); - FunctionEvaluator evaluator = mul.evaluatorOf(); // or "default.output" - or actually use name of model output - - Tensor input1 = Tensor.from("tensor<float>(d0[1]):[2]"); - Tensor input2 = Tensor.from("tensor<float>(d0[1]):[3]"); - Tensor output = evaluator.bind("input1", input1).bind("input2", input2).evaluate(); - assertEquals(6.0, output.sum().asDouble(), 1e-9); - - OnnxModelsConfig.Model mulModel = onnxModelsConfig.model().get(0); - assertEquals(2, mulModel.stateless_intraop_threads()); - assertEquals(-1, mulModel.stateless_interop_threads()); - assertEquals("", mulModel.stateless_execution_mode()); - assertEquals(-1, mulModel.gpu_device()); + try { + ModelsEvaluator modelsEvaluator = new ModelsEvaluator(config, constantsConfig, expressionsConfig, onnxModelsConfig, fileAcquirer); + assertEquals(1, modelsEvaluator.models().size()); + + Model mul = modelsEvaluator.models().get("mul"); + FunctionEvaluator evaluator = mul.evaluatorOf(); // or "default.output" - or actually use name of model output + + Tensor input1 = Tensor.from("tensor<float>(d0[1]):[2]"); + Tensor input2 = Tensor.from("tensor<float>(d0[1]):[3]"); + Tensor output = evaluator.bind("input1", input1).bind("input2", input2).evaluate(); + assertEquals(6.0, output.sum().asDouble(), 1e-9); + } catch (IllegalArgumentException e) { + boolean gotWantedException = false; + for (Throwable cause = e; shouldRequireGpu && cause != null; cause = cause.getCause()) { + if (cause.getMessage().equals("GPU device is required, but CUDA initialization failed")) { + gotWantedException = true; + break; + } + } + if (!gotWantedException) { + throw e; + } + } } } |