summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--client/js/app/src/app/pages/querybuilder/TransformVespaTrace.jsx339
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java16
-rw-r--r--hosted-tenant-base/pom.xml6
-rw-r--r--jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/SecurityHeadersResponseFilter.java1
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java9
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java6
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java7
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java6
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java6
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java8
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java29
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java4
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java10
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java4
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java51
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java265
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java221
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java131
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java158
-rw-r--r--vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java15
-rw-r--r--vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/Version.java1
25 files changed, 638 insertions, 665 deletions
diff --git a/client/js/app/src/app/pages/querybuilder/TransformVespaTrace.jsx b/client/js/app/src/app/pages/querybuilder/TransformVespaTrace.jsx
index a6981b8a521..5e09a5dec0f 100644
--- a/client/js/app/src/app/pages/querybuilder/TransformVespaTrace.jsx
+++ b/client/js/app/src/app/pages/querybuilder/TransformVespaTrace.jsx
@@ -2,8 +2,6 @@ let traceID = '';
let processes = {};
let output = {};
let traceStartTimestamp = 0;
-let topSpanId = '';
-let parentID = '';
// Generates a random hex string of size "size"
const genRanHex = (size) =>
@@ -20,54 +18,77 @@ export default function transform(trace) {
let temp = trace['trace']['children'];
let spans = findChildren(temp);
traceStartTimestamp = findTraceStartTime(spans);
- topSpanId = genRanHex(16);
let topSpanFirstHalf = createNewSpan(traceStartTimestamp)[0];
data.push(topSpanFirstHalf);
const retrieved = findLogsAndChildren(spans, topSpanFirstHalf);
const logs = retrieved['logs'];
- const children = retrieved['children'];
- traverseLogs(logs);
- createChildren(children);
+ const indexes = retrieved['indexes'];
+ traverseLogs(logs, indexes);
return output;
}
+function traverseChildren(span, logs, children, indexes, parent) {
+ let data = output['data'][0]['spans'];
+ let logSpan;
+ let spanTimestamp = span['timestamp'];
+ if (span.hasOwnProperty('children')) {
+ // Create a new parent span so that the timeline for the logs are correct
+ let duration =
+ (span['children'][span['children'].length - 1]['timestamp'] -
+ span['children'][0]['timestamp']) *
+ 1000;
+ parent['duration'] = duration;
+ logSpan = createNewSpan(
+ traceStartTimestamp + spanTimestamp,
+ duration,
+ 'p0',
+ parent['operationName'],
+ [{ refType: 'CHILD_OF', traceID: traceID, spanID: parent['spanID'] }]
+ );
+ data.push(logSpan);
+ let log = [];
+ for (let x of span['children']) {
+ if (x.hasOwnProperty('children')) {
+ traverseChildren(x, logs, children, indexes, logSpan);
+ } else if (Array.isArray(x['message'])) {
+ if (log.length > 0) {
+ // finished moving down the search chain
+ // create a new array for holding the logs that represent moving up the search chain
+ logs.push(log);
+ indexes.push(data.indexOf(parent));
+ log = [];
+ }
+ createChildren(x['message'], parent['spanID']);
+ } else {
+ // only add logs with a timestamp
+ if (x.hasOwnProperty('timestamp')) {
+ log.push(x);
+ }
+ }
+ }
+ indexes.push(data.indexOf(parent));
+ logs.push(log);
+ }
+}
+
function findLogsAndChildren(spans, topSpanFirstHalf) {
let logs = [];
let children = [];
+ let indexes = [];
let data = output['data'][0]['spans'];
- //let hitQuery = false;
- //let topSpanSecondHalf = createNewSpan();
- //let secondHalfDuration = 0;
- //output['data'][0]['spans'].push(topSpanSecondHalf);
- //let firstHitSecondHalf = true;
- for (let i = 0; i < spans.length - 1; i++) {
+ let totalDuration = findDuration(spans);
+ topSpanFirstHalf['duration'] = totalDuration;
+ for (let i = 0; i < spans.length; i++) {
if (spans[i].hasOwnProperty('children')) {
- let a = spans[i]['timestamp'];
- topSpanFirstHalf = createNewSpan(traceStartTimestamp + a);
- //firstHitSecondHalf = true;
- //topSpanSecondHalf = createNewSpan();
- //output['data'][0]['spans'].push(topSpanSecondHalf);
- let log = [];
- for (let x of spans[i]['children']) {
- if (Array.isArray(x['message'])) {
- if (log.length > 0) {
- // finished moving down the search chain
- // create a new array for holding the logs that represent moving up the search chain
- logs.push(log);
- log = [];
- }
- //hitQuery = true;
- children.push(x['message']);
- } else {
- // only add logs with a timestamp
- if (x.hasOwnProperty('timestamp')) {
- log.push(x);
- }
- }
- }
- logs.push(log);
+ traverseChildren(
+ spans[i],
+ logs,
+ children,
+ indexes,
+ data[data.length - 1]
+ );
} else if (
spans[i].hasOwnProperty('message') &&
spans[i].hasOwnProperty('timestamp')
@@ -88,185 +109,149 @@ function findLogsAndChildren(spans, topSpanFirstHalf) {
duration = (spans[i + 1]['timestamp'] - spans[i]['timestamp']) * 1000;
duration = duration === 0 ? 1 : duration;
}
- topSpanFirstHalf['duration'] = topSpanFirstHalf['duration'] + duration;
span['duration'] = duration;
- // if (hitQuery) {
- // if (firstHitSecondHalf) {
- // secondHalfDuration = span['timestamp'] * 1000;
- // topSpanSecondHalf['startTime'] =
- // traceStartTimestamp + secondHalfDuration;
- // firstHitSecondHalf = false;
- // }
- // topSpanSecondHalf['duration'] =
- // span['timestamp'] * 1000 - secondHalfDuration;
- // topSpanSecondHalf['logs'].push({
- // timestamp: traceStartTimestamp + span['timestamp'] * 1000,
- // fields: [{ key: 'message', type: 'string', value: span['message'] }],
- // });
- // } else {
- // topSpanFirstHalf['duration'] = span['timestamp'] * 1000;
- // topSpanFirstHalf['logs'].push({
- // timestamp: traceStartTimestamp + span['timestamp'] * 1000,
- // fields: [{ key: 'message', type: 'string', value: span['message'] }],
- // });
- // }
}
}
- return { logs: logs, children: children };
+ return { logs: logs, indexes: indexes };
}
-function traverseLogs(logs) {
- let first = true;
+function traverseLogs(logs, indexes) {
let data = output['data'][0]['spans'];
- for (let log of logs) {
+ let previous;
+ for (let i = 0; i < logs.length; i++) {
+ previous = data[indexes[i]];
+ let log = logs[i];
let logStartTimestamp = traceStartTimestamp + log[0]['timestamp'] * 1000;
let logDuration =
(log[log.length - 1]['timestamp'] - log[0]['timestamp']) * 1000;
if (logDuration === 0) {
logDuration = 1;
}
- let spanID = genRanHex(16);
- if (first) {
- parentID = spanID;
- first = false;
- }
- let temp = createNewSpan(
- logStartTimestamp,
- logDuration,
- 'p0',
- 'test'
- //[{ refType: 'CHILD_OF', traceID: traceID, spanID: topSpanId }]
- );
+ let temp = createNewSpan(logStartTimestamp, logDuration, 'p0', 'test', [
+ { refType: 'CHILD_OF', traceID: traceID, spanID: previous['spanID'] },
+ ]);
let childSpan = temp[0];
let childSpanID = temp[1];
data.push(childSpan);
- for (let i = 0; i < log.length - 1; i++) {
- if (log[i].hasOwnProperty('message')) {
- let logPointStart = traceStartTimestamp + log[i]['timestamp'] * 1000;
+ for (let k = 0; k < log.length - 1; k++) {
+ if (log[k].hasOwnProperty('message')) {
+ let logPointStart = traceStartTimestamp + log[k]['timestamp'] * 1000;
let logPointDuration;
- if (i > log.length - 1) {
+ if (k > log.length - 1) {
logPointDuration = 1;
} else {
logPointDuration =
- (log[i + 1]['timestamp'] - log[i]['timestamp']) * 1000;
+ (log[k + 1]['timestamp'] - log[k]['timestamp']) * 1000;
logPointDuration = logPointDuration === 0 ? 1 : logPointDuration;
}
let logSpan = createNewSpan(
logPointStart,
logPointDuration,
'p0',
- log[i]['message'],
+ log[k]['message'],
[{ refType: 'CHILD_OF', traceID: traceID, spanID: childSpanID }]
)[0];
data.push(logSpan);
- // childSpan['logs'].push({
- // timestamp: traceStartTimestamp + log[i]['timestamp'] * 1000,
- // fields: [
- // { key: 'message', type: 'string', value: log[i]['message'] },
- // ],
- // });
}
}
}
}
-function createChildren(children) {
- for (let i = 0; i < children.length; i++) {
- let child = children[i][0];
- let processKey = `p${i + 1}`;
- processes[processKey] = { serviceName: `Span${i}`, tags: [] };
- let spanID = genRanHex(16);
- let data = output['data'][0]['spans'];
- let startTimestamp = Date.parse(child['start_time']) * 1000;
- let newSpan = {
+function createChildren(children, parentID) {
+ let child = children[0];
+ let processKey = genRanHex(5);
+ processes[processKey] = { serviceName: genRanHex(3), tags: [] };
+ let spanID = genRanHex(16);
+ let data = output['data'][0]['spans'];
+ let startTimestamp = Date.parse(child['start_time']) * 1000;
+ let newSpan = {
+ traceID: traceID,
+ spanID: spanID,
+ operationName: 'something',
+ startTime: startTimestamp,
+ duration: child['duration_ms'] * 1000,
+ references: [{ refType: 'CHILD_OF', traceID: traceID, spanID: parentID }],
+ tags: [],
+ logs: [],
+ processID: processKey,
+ };
+ data.push(newSpan);
+ let traces = child['traces'];
+ for (let k = 0; k < traces.length; k++) {
+ let trace = traces[k];
+ let traceTimestamp = trace['timestamp_ms'];
+ let events;
+ let firstEvent;
+ let duration;
+ if (trace['tag'] === 'query_execution') {
+ events = trace['threads'][0]['traces'];
+ firstEvent = events[0];
+ duration = (traceTimestamp - firstEvent['timestamp_ms']) * 1000;
+ } else if (trace['tag'] === 'query_execution_plan') {
+ events = [];
+ let nextTrace = traces[k + 1];
+ firstEvent = trace;
+ // query execution plan has no events, duration must therefore be found using the next trace
+ if (nextTrace['tag'] === 'query_execution') {
+ duration =
+ (nextTrace['threads'][0]['traces'][0]['timestamp_ms'] -
+ traceTimestamp) *
+ 1000;
+ } else {
+ duration = (nextTrace['timestamp_ms'] - traceTimestamp) * 1000;
+ }
+ } else {
+ events = trace['traces'];
+ firstEvent = events[0];
+ duration = (traceTimestamp - firstEvent['timestamp_ms']) * 1000;
+ }
+ let childSpanID = genRanHex(16);
+ let childSpan = {
traceID: traceID,
- spanID: spanID,
- operationName: `query${i}`,
- startTime: startTimestamp,
- duration: child['duration_ms'] * 1000,
- references: [{ refType: 'CHILD_OF', traceID: traceID, spanID: parentID }],
+ spanID: childSpanID,
+ operationName: trace['tag'],
+ startTime: startTimestamp + firstEvent['timestamp_ms'] * 1000,
+ duration: duration,
+ references: [{ refType: 'CHILD_OF', traceID: traceID, spanID: spanID }],
tags: [],
logs: [],
processID: processKey,
};
- data.push(newSpan);
- let traces = child['traces'];
- for (let k = 0; k < traces.length; k++) {
- let trace = traces[k];
- let traceTimestamp = trace['timestamp_ms'];
- let events;
- let firstEvent;
- let duration;
- if (trace['tag'] === 'query_execution') {
- events = trace['threads'][0]['traces'];
- firstEvent = events[0];
- duration = (traceTimestamp - firstEvent['timestamp_ms']) * 1000;
- } else if (trace['tag'] === 'query_execution_plan') {
- events = [];
- let nextTrace = traces[k + 1];
- firstEvent = trace;
- // query execution plan has no events, duration must therefore be found using the next trace
- if (nextTrace['tag'] === 'query_execution') {
- duration =
- (nextTrace['threads'][0]['traces'][0]['timestamp_ms'] -
- traceTimestamp) *
- 1000;
- } else {
- duration = (nextTrace['timestamp_ms'] - traceTimestamp) * 1000;
- }
- } else {
- events = trace['traces'];
- firstEvent = events[0];
- duration = (traceTimestamp - firstEvent['timestamp_ms']) * 1000;
- }
- let childSpanID = genRanHex(16);
- let childSpan = {
- traceID: traceID,
- spanID: childSpanID,
- operationName: trace['tag'],
- startTime: startTimestamp + firstEvent['timestamp_ms'] * 1000,
- duration: duration,
- references: [{ refType: 'CHILD_OF', traceID: traceID, spanID: spanID }],
- tags: [],
- logs: [],
- processID: processKey,
- };
- data.push(childSpan);
- if (events.length > 0) {
- for (let j = 0; j < events.length; j++) {
- let event = events[j];
- let eventID = genRanHex(16);
- let eventStart = event['timestamp_ms'];
- let operationName;
- if (event.hasOwnProperty('event')) {
- operationName = event['event'];
- if (
- operationName === 'Complete query setup' ||
- operationName === 'MatchThread::run Done'
- ) {
- duration = (traceTimestamp - eventStart) * 1000;
- } else {
- duration = (events[j + 1]['timestamp_ms'] - eventStart) * 1000;
- }
+ data.push(childSpan);
+ if (events.length > 0) {
+ for (let j = 0; j < events.length; j++) {
+ let event = events[j];
+ let eventID = genRanHex(16);
+ let eventStart = event['timestamp_ms'];
+ let operationName;
+ if (event.hasOwnProperty('event')) {
+ operationName = event['event'];
+ if (
+ operationName === 'Complete query setup' ||
+ operationName === 'MatchThread::run Done'
+ ) {
+ duration = (traceTimestamp - eventStart) * 1000;
} else {
- operationName = event['tag'];
duration = (events[j + 1]['timestamp_ms'] - eventStart) * 1000;
}
- let eventSpan = {
- traceID: traceID,
- spanID: eventID,
- operationName: operationName,
- startTime: startTimestamp + eventStart * 1000,
- duration: duration,
- references: [
- { refType: 'CHILD_OF', traceID: traceID, spanID: childSpanID },
- ],
- tags: [],
- logs: [],
- processID: processKey,
- };
- data.push(eventSpan);
+ } else {
+ operationName = event['tag'];
+ duration = (events[j + 1]['timestamp_ms'] - eventStart) * 1000;
}
+ let eventSpan = {
+ traceID: traceID,
+ spanID: eventID,
+ operationName: operationName,
+ startTime: startTimestamp + eventStart * 1000,
+ duration: duration,
+ references: [
+ { refType: 'CHILD_OF', traceID: traceID, spanID: childSpanID },
+ ],
+ tags: [],
+ logs: [],
+ processID: processKey,
+ };
+ data.push(eventSpan);
}
}
}
@@ -306,7 +291,6 @@ function findTraceStartTime(spans) {
return startTime;
}
-//TODO: remove if not needed later
function findDuration(spans) {
let notFound = true;
let duration = 0;
@@ -315,16 +299,15 @@ function findDuration(spans) {
if (spans[i].hasOwnProperty('timestamp')) {
duration = spans[i]['timestamp'];
notFound = false;
- } else {
- i--;
}
+ i--;
}
- return duration;
+ return duration * 1000;
}
function createNewSpan(
startTime = 0,
- duration = 0,
+ duration = 1,
processID = 'p0',
operationName = 'Complete',
references = []
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index c6238149be1..74dc52f79ec 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -70,7 +70,7 @@ public class Flags {
public static final UnboundBooleanFlag KEEP_STORAGE_NODE_UP = defineFeatureFlag(
"keep-storage-node-up", true,
- List.of("hakonhall"), "2022-07-07", "2022-08-07",
+ List.of("hakonhall"), "2022-07-07", "2022-09-07",
"Whether to leave the storage node (with wanted state) UP while the node is permanently down.",
"Takes effect immediately for nodes transitioning to permanently down.",
ZONE_ID, APPLICATION_ID);
@@ -119,7 +119,7 @@ public class Flags {
public static final UnboundBooleanFlag USE_THREE_PHASE_UPDATES = defineFeatureFlag(
"use-three-phase-updates", false,
- List.of("vekterli"), "2020-12-02", "2022-08-01",
+ List.of("vekterli"), "2020-12-02", "2022-08-15",
"Whether to enable the use of three-phase updates when bucket replicas are out of sync.",
"Takes effect at redeployment",
ZONE_ID, APPLICATION_ID);
@@ -245,7 +245,7 @@ public class Flags {
public static final UnboundIntFlag MAX_ACTIVATION_INHIBITED_OUT_OF_SYNC_GROUPS = defineIntFlag(
"max-activation-inhibited-out-of-sync-groups", 0,
- List.of("vekterli"), "2021-02-19", "2022-08-01",
+ List.of("vekterli"), "2021-02-19", "2022-08-15",
"Allows replicas in up to N content groups to not be activated " +
"for query visibility if they are out of sync with a majority of other replicas",
"Takes effect at redeployment",
@@ -253,14 +253,14 @@ public class Flags {
public static final UnboundIntFlag MAX_CONCURRENT_MERGES_PER_NODE = defineIntFlag(
"max-concurrent-merges-per-node", 16,
- List.of("balder", "vekterli"), "2021-06-06", "2022-08-01",
+ List.of("balder", "vekterli"), "2021-06-06", "2022-08-15",
"Specifies max concurrent merges per content node.",
"Takes effect at redeploy",
ZONE_ID, APPLICATION_ID);
public static final UnboundIntFlag MAX_MERGE_QUEUE_SIZE = defineIntFlag(
"max-merge-queue-size", 100,
- List.of("balder", "vekterli"), "2021-06-06", "2022-08-01",
+ List.of("balder", "vekterli"), "2021-06-06", "2022-08-15",
"Specifies max size of merge queue.",
"Takes effect at redeploy",
ZONE_ID, APPLICATION_ID);
@@ -339,7 +339,7 @@ public class Flags {
public static final UnboundStringFlag MERGE_THROTTLING_POLICY = defineStringFlag(
"merge-throttling-policy", "STATIC",
- List.of("vekterli"), "2022-01-25", "2022-08-01",
+ List.of("vekterli"), "2022-01-25", "2022-08-15",
"Sets the policy used for merge throttling on the content nodes. " +
"Valid values: STATIC, DYNAMIC",
"Takes effect at redeployment",
@@ -347,7 +347,7 @@ public class Flags {
public static final UnboundDoubleFlag PERSISTENCE_THROTTLING_WS_DECREMENT_FACTOR = defineDoubleFlag(
"persistence-throttling-ws-decrement-factor", 1.2,
- List.of("vekterli"), "2022-01-27", "2022-08-01",
+ List.of("vekterli"), "2022-01-27", "2022-08-15",
"Sets the dynamic throttle policy window size decrement factor for persistence " +
"async throttling. Only applies if DYNAMIC policy is used.",
"Takes effect on redeployment",
@@ -355,7 +355,7 @@ public class Flags {
public static final UnboundDoubleFlag PERSISTENCE_THROTTLING_WS_BACKOFF = defineDoubleFlag(
"persistence-throttling-ws-backoff", 0.95,
- List.of("vekterli"), "2022-01-27", "2022-08-01",
+ List.of("vekterli"), "2022-01-27", "2022-08-15",
"Sets the dynamic throttle policy window size backoff for persistence " +
"async throttling. Only applies if DYNAMIC policy is used. Valid range [0, 1]",
"Takes effect on redeployment",
diff --git a/hosted-tenant-base/pom.xml b/hosted-tenant-base/pom.xml
index 8bc98cf0733..58ad178794b 100644
--- a/hosted-tenant-base/pom.xml
+++ b/hosted-tenant-base/pom.xml
@@ -340,6 +340,12 @@
<delete dir="target/application-test/src" />
<copy file="target/${project.artifactId}-tests.jar" todir="target/application-test/components/" />
+
+ <!-- Copy any additional application bundles to the test package -->
+ <copy todir="target/application-test/components">
+ <fileset dir="target/application/components" includes="*.jar" excludes="${project.artifactId}-deploy.jar" />
+ </copy>
+
<zip destfile="target/application-test.zip" basedir="target/application-test/" />
</tasks>
</configuration>
diff --git a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/SecurityHeadersResponseFilter.java b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/SecurityHeadersResponseFilter.java
index 24cd9245b61..520e22de136 100644
--- a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/SecurityHeadersResponseFilter.java
+++ b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/misc/SecurityHeadersResponseFilter.java
@@ -19,5 +19,6 @@ public class SecurityHeadersResponseFilter implements SecurityResponseFilter {
response.setHeader("Strict-Transport-Security", "max-age=31536000; includeSubDomains");
response.setHeader("X-Content-Type-Options", "nosniff");
response.setHeader("X-Frame-Options", "DENY");
+ response.setHeader("Referrer-Policy", "strict-origin-when-cross-origin");
}
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java
index da93083a6de..c78d60e9950 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConnectionException.java
@@ -15,7 +15,7 @@ import java.net.SocketTimeoutException;
public class ConnectionException extends ConvergenceException {
private ConnectionException(String message, Throwable cause) {
- super(message, cause);
+ super(message, cause, true);
}
/**
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java
index 4de7582fb7b..53e15b6c647 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/HttpException.java
@@ -14,20 +14,15 @@ public class HttpException extends ConvergenceException {
private final boolean isRetryable;
private HttpException(int statusCode, String message, boolean isRetryable) {
- super("HTTP status code " + statusCode + ": " + message);
+ super("HTTP status code " + statusCode + ": " + message, null, !isRetryable);
this.isRetryable = isRetryable;
}
private HttpException(Response.Status status, String message, boolean isRetryable) {
- super(status.toString() + " (" + status.getStatusCode() + "): " + message);
+ super(status.toString() + " (" + status.getStatusCode() + "): " + message, null, !isRetryable);
this.isRetryable = isRetryable;
}
- private HttpException(String message) {
- super(message);
- this.isRetryable = false;
- }
-
boolean isRetryable() {
return isRetryable;
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java
index a0b0499bb1e..636813a2169 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeRepositoryException.java
@@ -5,10 +5,6 @@ import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
public class NodeRepositoryException extends ConvergenceException {
public NodeRepositoryException(String message) {
- super(message);
- }
-
- public NodeRepositoryException(String message, Exception exception) {
- super(message, exception);
+ super(message, null, true);
}
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java
index 9b8a749c33d..917b65b606c 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorException.java
@@ -5,7 +5,12 @@ import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
@SuppressWarnings("serial")
public class OrchestratorException extends ConvergenceException {
+ /** Creates a transient convergence exception. */
public OrchestratorException(String message) {
- super(message);
+ this(message, true);
+ }
+
+ protected OrchestratorException(String message, boolean isError) {
+ super(message, null, isError);
}
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
index 281d43e0afc..858bce27ed8 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
@@ -59,7 +59,7 @@ public class OrchestratorImpl implements Orchestrator {
} catch (HttpException e) {
throw new OrchestratorException("Failed to suspend " + hostName + ": " + e.toString());
} catch (ConnectionException e) {
- throw new ConvergenceException("Failed to suspend " + hostName + ": " + e.getMessage());
+ throw ConvergenceException.ofTransient("Failed to suspend " + hostName + ": " + e.getMessage());
} catch (RuntimeException e) {
throw new RuntimeException("Got error on suspend", e);
}
@@ -105,7 +105,7 @@ public class OrchestratorImpl implements Orchestrator {
} catch (HttpException e) {
throw new OrchestratorException("Failed to batch suspend for " + parentHostName + ": " + e.toString());
} catch (ConnectionException e) {
- throw new ConvergenceException("Failed to batch suspend for " + parentHostName + ": " + e.getMessage());
+ throw ConvergenceException.ofTransient("Failed to batch suspend for " + parentHostName + ": " + e.getMessage());
} catch (RuntimeException e) {
throw new RuntimeException("Got error on batch suspend for " + parentHostName + ", with nodes " + hostNames, e);
}
@@ -126,7 +126,7 @@ public class OrchestratorImpl implements Orchestrator {
} catch (HttpException e) {
throw new OrchestratorException("Failed to resume " + hostName + ": " + e.toString());
} catch (ConnectionException e) {
- throw new ConvergenceException("Failed to resume " + hostName + ": " + e.getMessage());
+ throw ConvergenceException.ofTransient("Failed to resume " + hostName + ": " + e.getMessage());
} catch (RuntimeException e) {
throw new RuntimeException("Got error on resume", e);
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java
index 3b016aac03f..a6a54807e56 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorNotFoundException.java
@@ -4,6 +4,6 @@ package com.yahoo.vespa.hosted.node.admin.configserver.orchestrator;
@SuppressWarnings("serial")
public class OrchestratorNotFoundException extends OrchestratorException {
public OrchestratorNotFoundException(String message) {
- super(message);
+ super(message, true);
}
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
index c893f7ffee4..062a7ce018d 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
@@ -120,7 +120,7 @@ public class StorageMaintainer {
String[] results = output.split("\t");
if (results.length != 2) {
- throw new ConvergenceException("Result from disk usage command not as expected: " + output);
+ throw ConvergenceException.ofError("Result from disk usage command not as expected: " + output);
}
return DiskSize.of(Long.parseLong(results[0]), DiskSize.Unit.kiB);
@@ -226,11 +226,11 @@ public class StorageMaintainer {
String output = uncheck(() -> Files.readAllLines(Paths.get("/proc/cpuinfo")).stream()
.filter(line -> line.startsWith("microcode"))
.findFirst()
- .orElseThrow(() -> new ConvergenceException("No microcode information found in /proc/cpuinfo")));
+ .orElseThrow(() -> ConvergenceException.ofError("No microcode information found in /proc/cpuinfo")));
String[] results = output.split(":");
if (results.length != 2) {
- throw new ConvergenceException("Result from detect microcode command not as expected: " + output);
+ throw ConvergenceException.ofError("Result from detect microcode command not as expected: " + output);
}
return results[1].trim();
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java
index 45dbfd07209..0933f22dee3 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java
@@ -51,7 +51,7 @@ public class CoreCollector {
Matcher matcher = CORE_GENERATOR_PATH_PATTERN.matcher(result.getOutput());
if (! matcher.find()) {
- throw new ConvergenceException(String.format("Failed to extract binary path from GDB, result: %s, command: %s",
+ throw ConvergenceException.ofError(String.format("Failed to extract binary path from GDB, result: %s, command: %s",
asString(result), Arrays.toString(wrappedCommand)));
}
return matcher.group("path").split(" ")[0];
@@ -62,7 +62,7 @@ public class CoreCollector {
try {
CommandResult result = container.executeCommandInContainer(context, context.users().root(), command);
if (result.getExitCode() != 0) {
- throw new ConvergenceException("file command failed with " + asString(result));
+ throw ConvergenceException.ofError("file command failed with " + asString(result));
}
Matcher execfnMatcher = EXECFN_PATH_PATTERN.matcher(result.getOutput());
@@ -89,7 +89,7 @@ public class CoreCollector {
CommandResult result = container.executeCommandInContainer(context, context.users().root(), command);
if (result.getExitCode() != 0)
- throw new ConvergenceException("Failed to read backtrace " + asString(result) + ", Command: " + Arrays.toString(command));
+ throw ConvergenceException.ofError("Failed to read backtrace " + asString(result) + ", Command: " + Arrays.toString(command));
return List.of(result.getOutput().split("\n"));
}
@@ -99,7 +99,7 @@ public class CoreCollector {
CommandResult result = container.executeCommandInContainer(context, context.users().root(), command);
if (result.getExitCode() != 0)
- throw new ConvergenceException("Failed to read jstack " + asString(result) + ", Command: " + Arrays.toString(command));
+ throw ConvergenceException.ofError("Failed to read jstack " + asString(result) + ", Command: " + Arrays.toString(command));
return List.of(result.getOutput().split("\n"));
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
index 6295765a95f..ece494a34d7 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
@@ -95,7 +95,7 @@ public class CoredumpHandler {
.map(FileFinder.FileAttributes::filename)
.toList();
if (!pendingCores.isEmpty())
- throw new ConvergenceException(String.format("Cannot process %s coredumps: Still being written",
+ throw ConvergenceException.ofError(String.format("Cannot process %s coredumps: Still being written",
pendingCores.size() < 5 ? pendingCores : pendingCores.size()));
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java
index 16a5eb022ad..c1b86fc7fe2 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ConvergenceException.java
@@ -8,11 +8,34 @@ package com.yahoo.vespa.hosted.node.admin.nodeadmin;
*/
@SuppressWarnings("serial")
public class ConvergenceException extends RuntimeException {
- public ConvergenceException(String message) {
- super(message);
+ /** Create an exception that will NOT increment the monitored unhandled_exceptions metric. */
+ public static ConvergenceException ofTransient(String message) { return ofTransient(message, null); }
+
+ /** Create an exception that will NOT increment the monitored unhandled_exceptions metric. */
+ public static ConvergenceException ofTransient(String message, Throwable t) { return new ConvergenceException(message, t, false); }
+
+ /** Create an exception that increments the monitored unhandled_exceptions metric. */
+ public static ConvergenceException ofError(String message) { return ofError(message, null); }
+
+ /** Create an exception that increments the monitored unhandled_exceptions metric. */
+ public static ConvergenceException ofError(String message, Throwable t) { return new ConvergenceException(message, t, true); }
+
+ /** Create an exception with the same transient/error as the cause. */
+ public static ConvergenceException ofNested(String message, ConvergenceException cause) { return new ConvergenceException(message, cause, cause.isError); }
+
+ private final boolean isError;
+
+ /** @param isError whether the exception should increment the monitored unhandled_exception metric. */
+ protected ConvergenceException(String message, boolean isError) {
+ this(message, null, isError);
}
- public ConvergenceException(String message, Throwable t) {
+ /** @param isError whether the exception should increment the monitored unhandled_exception metric. */
+ protected ConvergenceException(String message, Throwable t, boolean isError) {
super(message, t);
+ this.isError = isError;
}
+
+ /** Whether the exception signals an error someone may want to look at, or whether it is expected to be transient (false). */
+ public boolean isError() { return isError; }
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
index dda404797d9..314844dc6eb 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
@@ -110,7 +110,7 @@ public class NodeAdminStateUpdater {
if (hostIsActiveInNR) orchestrator.resume(hostHostname);
- throw new ConvergenceException("Timed out trying to freeze all nodes: will force an unfrozen tick");
+ throw ConvergenceException.ofTransient("Timed out trying to freeze all nodes: will force an unfrozen tick");
}
boolean wantFrozen = wantedState != RESUMED;
@@ -118,7 +118,7 @@ public class NodeAdminStateUpdater {
currentState = TRANSITIONING;
if (!nodeAdmin.setFrozen(wantFrozen))
- throw new ConvergenceException("NodeAdmin is not yet " + (wantFrozen ? "frozen" : "unfrozen"));
+ throw ConvergenceException.ofTransient("NodeAdmin is not yet " + (wantFrozen ? "frozen" : "unfrozen"));
switch (wantedState) {
case RESUMED:
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 09bc58bdaa2..6408132fe0b 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -244,7 +244,7 @@ public class NodeAgentImpl implements NodeAgent {
hasResumedNode = false;
context.log(logger, "Container successfully started, new containerState is " + containerState);
return containerOperations.getContainer(context).orElseThrow(() ->
- new ConvergenceException("Did not find container that was just started"));
+ ConvergenceException.ofError("Did not find container that was just started"));
}
private Optional<Container> removeContainerIfNeededUpdateContainerState(
@@ -399,7 +399,7 @@ public class NodeAgentImpl implements NodeAgent {
// Only update CPU resources
containerOperations.updateContainer(context, existingContainer.id(), wantedContainerResources.withMemoryBytes(existingContainer.resources().memoryBytes()));
return containerOperations.getContainer(context).orElseThrow(() ->
- new ConvergenceException("Did not find container that was just updated"));
+ ConvergenceException.ofError("Did not find container that was just updated"));
}
private ContainerResources getContainerResources(NodeAgentContext context) {
@@ -435,6 +435,8 @@ public class NodeAgentImpl implements NodeAgent {
context.log(logger, Level.INFO, "Converged");
} catch (ConvergenceException e) {
context.log(logger, e.getMessage());
+ if (e.isError())
+ numberOfUnhandledException++;
} catch (Throwable e) {
numberOfUnhandledException++;
context.log(logger, Level.SEVERE, "Unhandled exception, ignoring", e);
@@ -501,7 +503,7 @@ public class NodeAgentImpl implements NodeAgent {
Duration timeLeft = Duration.between(clock.instant(), firstSuccessfulHealthCheckInstant.get().plus(warmUpDuration(context)));
if (!container.get().resources().equalsCpu(getContainerResources(context)))
- throw new ConvergenceException("Refusing to resume until warm up period ends (" +
+ throw ConvergenceException.ofTransient("Refusing to resume until warm up period ends (" +
(timeLeft.isNegative() ? "next tick" : "in " + timeLeft) + ")");
}
serviceDumper.processServiceDumpRequest(context);
@@ -536,7 +538,7 @@ public class NodeAgentImpl implements NodeAgent {
nodeRepository.setNodeState(context.hostname().value(), NodeState.ready);
break;
default:
- throw new ConvergenceException("UNKNOWN STATE " + node.state().name());
+ throw ConvergenceException.ofError("UNKNOWN STATE " + node.state().name());
}
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java
index 07e172c5117..148d80c9803 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/network/IPAddresses.java
@@ -67,7 +67,7 @@ public interface IPAddresses {
if (ipv6addresses.size() <= 1) return ipv6addresses.stream().findFirst();
String addresses = ipv6addresses.stream().map(InetAddresses::toAddrString).collect(Collectors.joining(","));
- throw new ConvergenceException(
+ throw ConvergenceException.ofError(
String.format(
"Multiple IPv6 addresses found: %s. Perhaps a missing DNS entry or multiple AAAA records in DNS?",
addresses));
@@ -103,7 +103,7 @@ public interface IPAddresses {
if (siteLocalIPv4Addresses.size() == 1) return Optional.of(siteLocalIPv4Addresses.get(0));
String addresses = ipv4Addresses.stream().map(InetAddresses::toAddrString).collect(Collectors.joining(","));
- throw new ConvergenceException(
+ throw ConvergenceException.ofError(
String.format(
"Multiple IPv4 addresses found: %s. Perhaps a missing DNS entry or multiple A records in DNS?",
addresses));
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index b13a3a184e5..e4c2f595164 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -334,7 +334,7 @@ public class NodeAgentImplTest {
when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
when(containerOperations.pullImageAsyncIfNeeded(any(), eq(dockerImage), any())).thenReturn(false);
- doThrow(new ConvergenceException("Connection refused")).doNothing()
+ doThrow(ConvergenceException.ofTransient("Connection refused")).doNothing()
.when(healthChecker).verifyHealth(eq(context));
try {
@@ -640,7 +640,7 @@ public class NodeAgentImplTest {
InOrder inOrder = inOrder(orchestrator, containerOperations);
- ConvergenceException healthCheckException = new ConvergenceException("Not yet up");
+ ConvergenceException healthCheckException = ConvergenceException.ofTransient("Not yet up");
doThrow(healthCheckException).when(healthChecker).verifyHealth(any());
for (int i = 0; i < 3; i++) {
try {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java
index bb441c6621c..d8becdf7c80 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java
@@ -1,22 +1,16 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.autoscale;
-import com.yahoo.config.provision.ApplicationId;
-import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.ClusterResources;
-import com.yahoo.config.provision.ClusterSpec;
-import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.test.ManualClock;
-import com.yahoo.transaction.Mutex;
-import com.yahoo.vespa.hosted.provision.applications.Application;
import com.yahoo.vespa.hosted.provision.testutils.OrchestratorMock;
import org.junit.Test;
import java.time.Duration;
+import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
-import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -28,40 +22,27 @@ public class AutoscalingIntegrationTest {
@Test
public void testComponentIntegration() {
- NodeResources nodes = new NodeResources(1, 10, 100, 1);
- NodeResources hosts = new NodeResources(3, 20, 200, 1);
-
- AutoscalingTester tester = new AutoscalingTester(hosts);
- MetricsV2MetricsFetcher fetcher = new MetricsV2MetricsFetcher(tester.nodeRepository(),
+ var fixture = AutoscalingTester.fixture()
+ .hostResources(new NodeResources(3, 20, 200, 1))
+ .initialResources(Optional.of(new ClusterResources(2, 1,
+ new NodeResources(1, 10, 100, 1))))
+ .build();
+ MetricsV2MetricsFetcher fetcher = new MetricsV2MetricsFetcher(fixture.tester().nodeRepository(),
new OrchestratorMock(),
- new MockHttpClient(tester.clock()));
- Autoscaler autoscaler = new Autoscaler(tester.nodeRepository());
-
- ApplicationId application1 = AutoscalingTester.applicationId("test1");
- ClusterSpec cluster1 = AutoscalingTester.clusterSpec(ClusterSpec.Type.container, "test");
- Set<String> hostnames = tester.deploy(application1, cluster1, 2, 1, nodes)
- .stream().map(HostSpec::hostname)
- .collect(Collectors.toSet());
+ new MockHttpClient(fixture.tester().clock()));
+ Autoscaler autoscaler = new Autoscaler(fixture.tester().nodeRepository());
+
// The metrics response (below) hardcodes these hostnames:
- assertEquals(Set.of("node-1-of-host-1.yahoo.com", "node-1-of-host-10.yahoo.com"), hostnames);
+ assertEquals(Set.of("node-1-of-host-1.yahoo.com", "node-1-of-host-10.yahoo.com"), fixture.nodes().hostnames());
for (int i = 0; i < 1000; i++) {
- tester.clock().advance(Duration.ofSeconds(10));
- fetcher.fetchMetrics(application1).whenComplete((r, e) -> tester.nodeMetricsDb().addNodeMetrics(r.nodeMetrics()));
- tester.clock().advance(Duration.ofSeconds(10));
- tester.nodeMetricsDb().gc();
+ fixture.tester().clock().advance(Duration.ofSeconds(10));
+ fetcher.fetchMetrics(fixture.applicationId()).whenComplete((r, e) -> fixture.tester().nodeMetricsDb().addNodeMetrics(r.nodeMetrics()));
+ fixture.tester().clock().advance(Duration.ofSeconds(10));
+ fixture.tester().nodeMetricsDb().gc();
}
- ClusterResources min = new ClusterResources(2, 1, nodes);
- ClusterResources max = new ClusterResources(2, 1, nodes);
-
- Application application = tester.nodeRepository().applications().get(application1).orElse(Application.empty(application1))
- .withCluster(cluster1.id(), false, Capacity.from(min, max));
- try (Mutex lock = tester.nodeRepository().nodes().lock(application1)) {
- tester.nodeRepository().applications().put(application, lock);
- }
- var scaledResources = autoscaler.suggest(application, application.clusters().get(cluster1.id()),
- tester.nodeRepository().nodes().list().owner(application1));
+ var scaledResources = autoscaler.suggest(fixture.application(), fixture.cluster(), fixture.nodes());
assertTrue(scaledResources.isPresent());
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index a2eb5fba9f9..e6873e7118f 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -1,19 +1,19 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.autoscale;
-import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Capacity;
-import com.yahoo.config.provision.Cloud;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeResources.DiskSpeed;
+import static com.yahoo.config.provision.NodeResources.DiskSpeed.fast;
+import static com.yahoo.config.provision.NodeResources.DiskSpeed.slow;
import com.yahoo.config.provision.NodeResources.StorageType;
+import static com.yahoo.config.provision.NodeResources.StorageType.remote;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
-import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.Nodelike;
@@ -22,8 +22,6 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import org.junit.Test;
import java.time.Duration;
-import java.util.ArrayList;
-import java.util.List;
import java.util.Optional;
import static org.junit.Assert.assertEquals;
@@ -41,11 +39,11 @@ public class AutoscalingTest {
fixture.tester().clock().advance(Duration.ofDays(1));
assertTrue("No measurements -> No change", fixture.autoscale().isEmpty());
- fixture.applyCpuLoad(0.7f, 59);
+ fixture.loader().applyCpuLoad(0.7f, 59);
assertTrue("Too few measurements -> No change", fixture.autoscale().isEmpty());
fixture.tester().clock().advance(Duration.ofDays(1));
- fixture.applyCpuLoad(0.7f, 120);
+ fixture.loader().applyCpuLoad(0.7f, 120);
ClusterResources scaledResources = fixture.tester().assertResources("Scaling up since resource usage is too high",
9, 1, 2.8, 5.0, 50.0,
fixture.autoscale());
@@ -56,14 +54,14 @@ public class AutoscalingTest {
fixture.deactivateRetired(Capacity.from(scaledResources));
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyCpuLoad(0.8f, 3);
+ fixture.loader().applyCpuLoad(0.8f, 3);
assertTrue("Load change is large, but insufficient measurements for new config -> No change",
fixture.autoscale().isEmpty());
- fixture.applyCpuLoad(0.19f, 100);
+ fixture.loader().applyCpuLoad(0.19f, 100);
assertEquals("Load change is small -> No change", Optional.empty(), fixture.autoscale().target());
- fixture.applyCpuLoad(0.1f, 120);
+ fixture.loader().applyCpuLoad(0.1f, 120);
fixture.tester().assertResources("Scaling cpu down since usage has gone down significantly",
9, 1, 1.0, 5.0, 50.0,
fixture.autoscale());
@@ -74,7 +72,7 @@ public class AutoscalingTest {
public void test_autoscaling_up_is_fast_TODO() {
var fixture = AutoscalingTester.fixture().build();
fixture.tester().clock().advance(Duration.ofDays(1)); // TODO: Remove the need for this
- fixture.applyLoad(1.0, 1.0, 1.0, 120); // TODO: Make this low
+ fixture.loader().applyLoad(1.0, 1.0, 1.0, 120); // TODO: Make this low
fixture.tester().assertResources("Scaling up since resource usage is too high",
10, 1, 9.4, 8.5, 92.6,
fixture.autoscale());
@@ -84,12 +82,12 @@ public class AutoscalingTest {
@Test
public void test_autoscaling_single_container_group() {
var fixture = AutoscalingTester.fixture().clusterType(ClusterSpec.Type.container).build();
- fixture.applyCpuLoad(0.25f, 120);
+ fixture.loader().applyCpuLoad(0.25f, 120);
ClusterResources scaledResources = fixture.tester().assertResources("Scaling up since cpu usage is too high",
5, 1, 3.8, 8.0, 50.5,
fixture.autoscale());
fixture.deploy(Capacity.from(scaledResources));
- fixture.applyCpuLoad(0.1f, 120);
+ fixture.loader().applyCpuLoad(0.1f, 120);
fixture.tester().assertResources("Scaling down since cpu usage has gone down",
4, 1, 2.5, 6.4, 25.5,
fixture.autoscale());
@@ -97,55 +95,55 @@ public class AutoscalingTest {
@Test
public void autoscaling_handles_disk_setting_changes() {
- var resources = new NodeResources(3, 100, 100, 1, NodeResources.DiskSpeed.slow);
+ var resources = new NodeResources(3, 100, 100, 1, slow);
var fixture = AutoscalingTester.fixture()
.hostResources(resources)
.initialResources(Optional.of(new ClusterResources(5, 1, resources)))
.capacity(Capacity.from(new ClusterResources(5, 1, resources)))
.build();
- assertTrue(fixture.tester().nodeRepository().nodes().list().owner(fixture.application).stream()
- .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == NodeResources.DiskSpeed.slow));
+ assertTrue(fixture.tester().nodeRepository().nodes().list().owner(fixture.applicationId).stream()
+ .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == slow));
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyCpuLoad(0.25, 120);
+ fixture.loader().applyCpuLoad(0.25, 120);
// Changing min and max from slow to any
ClusterResources min = new ClusterResources( 2, 1,
- new NodeResources(1, 1, 1, 1, NodeResources.DiskSpeed.any));
+ new NodeResources(1, 1, 1, 1, DiskSpeed.any));
ClusterResources max = new ClusterResources(20, 1,
- new NodeResources(100, 1000, 1000, 1, NodeResources.DiskSpeed.any));
+ new NodeResources(100, 1000, 1000, 1, DiskSpeed.any));
var capacity = Capacity.from(min, max);
ClusterResources scaledResources = fixture.tester().assertResources("Scaling up",
14, 1, 1.4, 30.8, 30.8,
fixture.autoscale(capacity));
assertEquals("Disk speed from new capacity is used",
- NodeResources.DiskSpeed.any, scaledResources.nodeResources().diskSpeed());
+ DiskSpeed.any, scaledResources.nodeResources().diskSpeed());
fixture.deploy(Capacity.from(scaledResources));
assertTrue(fixture.nodes().stream()
- .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == NodeResources.DiskSpeed.any));
+ .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == DiskSpeed.any));
}
@Test
public void autoscaling_target_preserves_any() {
NodeResources resources = new NodeResources(1, 10, 10, 1);
- var capacity = Capacity.from(new ClusterResources( 2, 1, resources.with(NodeResources.DiskSpeed.any)),
- new ClusterResources( 10, 1, resources.with(NodeResources.DiskSpeed.any)));
+ var capacity = Capacity.from(new ClusterResources( 2, 1, resources.with(DiskSpeed.any)),
+ new ClusterResources( 10, 1, resources.with(DiskSpeed.any)));
var fixture = AutoscalingTester.fixture()
.capacity(capacity)
.initialResources(Optional.empty())
.build();
// Redeployment without target: Uses current resource numbers with *requested* non-numbers (i.e disk-speed any)
- assertTrue(fixture.tester().nodeRepository().applications().get(fixture.application).get().cluster(fixture.cluster.id()).get().targetResources().isEmpty());
+ assertTrue(fixture.tester().nodeRepository().applications().get(fixture.applicationId).get().cluster(fixture.clusterSpec.id()).get().targetResources().isEmpty());
fixture.deploy();
- assertEquals(NodeResources.DiskSpeed.any, fixture.nodes().first().get().allocation().get().requestedResources().diskSpeed());
+ assertEquals(DiskSpeed.any, fixture.nodes().first().get().allocation().get().requestedResources().diskSpeed());
// Autoscaling: Uses disk-speed any as well
fixture.deactivateRetired(capacity);
fixture.tester().clock().advance(Duration.ofDays(1));
- fixture.applyCpuLoad(0.8, 120);
- assertEquals(NodeResources.DiskSpeed.any, fixture.autoscale(capacity).target().get().nodeResources().diskSpeed());
+ fixture.loader().applyCpuLoad(0.8, 120);
+ assertEquals(DiskSpeed.any, fixture.autoscale(capacity).target().get().nodeResources().diskSpeed());
}
@Test
@@ -158,7 +156,7 @@ public class AutoscalingTest {
.capacity(Capacity.from(min, max)).build();
fixture.tester().clock().advance(Duration.ofDays(1));
- fixture.applyLoad(0.25, 0.95, 0.95, 120);
+ fixture.loader().applyLoad(0.25, 0.95, 0.95, 120);
fixture.tester().assertResources("Scaling up to limit since resource usage is too high",
6, 1, 2.4, 78.0, 79.0,
fixture.autoscale());
@@ -172,7 +170,7 @@ public class AutoscalingTest {
// deploy
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyLoad(0.05f, 0.05f, 0.05f, 120);
+ fixture.loader().applyLoad(0.05f, 0.05f, 0.05f, 120);
fixture.tester().assertResources("Scaling down to limit since resource usage is low",
4, 1, 1.8, 7.4, 13.9,
fixture.autoscale());
@@ -188,13 +186,13 @@ public class AutoscalingTest {
.build();
NodeResources defaultResources =
- new CapacityPolicies(fixture.tester().nodeRepository()).defaultNodeResources(fixture.cluster, fixture.application, false);
+ new CapacityPolicies(fixture.tester().nodeRepository()).defaultNodeResources(fixture.clusterSpec, fixture.applicationId, false);
fixture.tester().assertResources("Min number of nodes and default resources",
2, 1, defaultResources,
fixture.nodes().toResources());
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyLoad(0.25, 0.95, 0.95, 120);
+ fixture.loader().applyLoad(0.25, 0.95, 0.95, 120);
fixture.tester().assertResources("Scaling up",
5, 1,
defaultResources.vcpu(), defaultResources.memoryGb(), defaultResources.diskGb(),
@@ -211,7 +209,7 @@ public class AutoscalingTest {
.capacity(Capacity.from(min, max))
.build();
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyCpuLoad(0.3, 240);
+ fixture.loader().applyCpuLoad(0.3, 240);
fixture.tester().assertResources("Scaling up",
6, 6, 3.8, 8.0, 10.0,
fixture.autoscale());
@@ -224,58 +222,72 @@ public class AutoscalingTest {
// deploy
fixture.tester().clock().advance(Duration.ofDays(1));
- fixture.applyCpuLoad(0.25, 120);
+ fixture.loader().applyCpuLoad(0.25, 120);
assertTrue(fixture.autoscale().isEmpty());
}
@Test
- public void prefers_remote_disk_when_no_local_match() {
- NodeResources resources = new NodeResources(3, 100, 50, 1);
- ClusterResources min = new ClusterResources( 2, 1, resources);
- ClusterResources max = min;
- // AutoscalingTester hardcodes 3Gb memory overhead:
- Flavor localFlavor = new Flavor("local", new NodeResources(3, 97, 75, 1, DiskSpeed.fast, StorageType.local));
- Flavor remoteFlavor = new Flavor("remote", new NodeResources(3, 97, 50, 1, DiskSpeed.fast, StorageType.remote));
-
- var tester = new AutoscalingTester(new Zone(new Cloud.Builder().dynamicProvisioning(true).build(),
- SystemName.defaultSystem(), Environment.prod, RegionName.defaultName()),
- List.of(localFlavor, remoteFlavor));
- tester.provisioning().makeReadyNodes(5, localFlavor.name(), NodeType.host, 8);
- tester.provisioning().makeReadyNodes(5, remoteFlavor.name(), NodeType.host, 8);
- tester.provisioning().activateTenantHosts();
-
- ApplicationId application1 = AutoscalingTester.applicationId("application1");
- ClusterSpec cluster1 = AutoscalingTester.clusterSpec(ClusterSpec.Type.container, "cluster1");
+ public void container_prefers_remote_disk_when_no_local_match() {
+ var resources = new ClusterResources( 2, 1, new NodeResources(3, 100, 50, 1));
+ var local = new NodeResources(3, 100, 75, 1, fast, StorageType.local);
+ var remote = new NodeResources(3, 100, 50, 1, fast, StorageType.remote);
+ var fixture = AutoscalingTester.fixture()
+ .dynamicProvisioning(true)
+ .clusterType(ClusterSpec.Type.container)
+ .hostResources(local, remote)
+ .capacity(Capacity.from(resources))
+ .initialResources(Optional.of(new ClusterResources(3, 1, resources.nodeResources())))
+ .build();
- // deploy
- tester.deploy(application1, cluster1, 3, 1, min.nodeResources());
- Duration timeAdded = tester.addDiskMeasurements(0.01f, 1f, 120, application1);
- tester.clock().advance(timeAdded.negated());
- tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> 10.0); // Query traffic only
- Autoscaler.Advice suggestion = tester.suggest(application1, cluster1.id(), min, max);
- tester.assertResources("Choosing the remote disk flavor as it has less disk",
- 6, 1, 3.0, 100.0, 10.0,
- suggestion);
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.loader().applyLoad(0.01, 0.01, 0.01, 120);
+ Autoscaler.Advice suggestion = fixture.suggest();
+ fixture.tester().assertResources("Choosing the remote disk flavor as it has less disk",
+ 2, 1, 3.0, 100.0, 10.0,
+ suggestion);
assertEquals("Choosing the remote disk flavor as it has less disk",
StorageType.remote, suggestion.target().get().nodeResources().storageType());
}
@Test
+ public void content_prefers_local_disk_when_no_local_match() {
+ var resources = new ClusterResources( 2, 1, new NodeResources(3, 100, 50, 1));
+ var local = new NodeResources(3, 100, 75, 1, fast, StorageType.local);
+ var remote = new NodeResources(3, 100, 50, 1, fast, StorageType.remote);
+ var fixture = AutoscalingTester.fixture()
+ .dynamicProvisioning(true)
+ .clusterType(ClusterSpec.Type.content)
+ .hostResources(local, remote)
+ .capacity(Capacity.from(resources))
+ .initialResources(Optional.of(new ClusterResources(3, 1, resources.nodeResources())))
+ .build();
+
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.loader().applyLoad(0.01, 0.01, 0.01, 120);
+ Autoscaler.Advice suggestion = fixture.suggest();
+ fixture.tester().assertResources("Always prefers local disk for content",
+ 2, 1, 3.0, 100.0, 75.0,
+ suggestion);
+ assertEquals("Always prefers local disk for content",
+ StorageType.local, suggestion.target().get().nodeResources().storageType());
+ }
+
+ @Test
public void suggestions_ignores_limits() {
ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1));
var fixture = AutoscalingTester.fixture().capacity(Capacity.from(min, min)).build();
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyCpuLoad(1.0, 120);
+ fixture.loader().applyCpuLoad(1.0, 120);
fixture.tester().assertResources("Suggesting above capacity limit",
8, 1, 9.3, 5.7, 57.1,
- fixture.tester().suggest(fixture.application, fixture.cluster.id(), min, min));
+ fixture.tester().suggest(fixture.applicationId, fixture.clusterSpec.id(), min, min));
}
@Test
public void not_using_out_of_service_measurements() {
var fixture = AutoscalingTester.fixture().build();
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyLoad(0.9, 0.6, 0.7, 1, false, true, 120);
+ fixture.loader().applyLoad(0.9, 0.6, 0.7, 1, false, true, 120);
assertTrue("Not scaling up since nodes were measured while cluster was out of service",
fixture.autoscale().isEmpty());
}
@@ -284,7 +296,7 @@ public class AutoscalingTest {
public void not_using_unstable_measurements() {
var fixture = AutoscalingTester.fixture().build();
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyLoad(0.9, 0.6, 0.7, 1, true, false, 120);
+ fixture.loader().applyLoad(0.9, 0.6, 0.7, 1, true, false, 120);
assertTrue("Not scaling up since nodes were measured while cluster was out of service",
fixture.autoscale().isEmpty());
}
@@ -299,7 +311,7 @@ public class AutoscalingTest {
.capacity(Capacity.from(min, max))
.build();
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyCpuLoad(0.9, 120);
+ fixture.loader().applyCpuLoad(0.9, 120);
fixture.tester().assertResources("Scaling the number of groups, but nothing requires us to stay with 1 node per group",
10, 5, 7.7, 40.0, 40.0,
fixture.autoscale());
@@ -315,9 +327,9 @@ public class AutoscalingTest {
.capacity(Capacity.from(min, max))
.build();
fixture.tester().clock().advance(Duration.ofDays(2));
- Duration timePassed = fixture.addCpuMeasurements(0.25, 120);
+ Duration timePassed = fixture.loader().addCpuMeasurements(0.25, 120);
fixture.tester().clock().advance(timePassed.negated());
- fixture.addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 1.0);
+ fixture.loader().addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 1.0);
fixture.tester().assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper",
10, 1, 2.3, 27.8, 27.8,
fixture.autoscale());
@@ -334,9 +346,9 @@ public class AutoscalingTest {
.capacity(Capacity.from(min, max))
.build();
fixture.tester().clock().advance(Duration.ofDays(2));
- Duration timePassed = fixture.addCpuMeasurements(0.25, 120);
+ Duration timePassed = fixture.loader().addCpuMeasurements(0.25, 120);
fixture.tester().clock().advance(timePassed.negated());
- fixture.addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 100.0);
+ fixture.loader().addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 100.0);
fixture.tester().assertResources("Scaling down since resource usage is too high, changing to 1 group is cheaper",
6, 1, 1.0, 50.0, 50.0,
fixture.autoscale());
@@ -352,7 +364,7 @@ public class AutoscalingTest {
.capacity(Capacity.from(min, max))
.build();
fixture.tester().clock().advance(Duration.ofDays(1));
- fixture.applyMemLoad(1.0, 1000);
+ fixture.loader().applyMemLoad(1.0, 1000);
fixture.tester().assertResources("Increase group size to reduce memory load",
8, 2, 6.5, 96.2, 62.5,
fixture.autoscale());
@@ -368,7 +380,7 @@ public class AutoscalingTest {
.capacity(Capacity.from(min, max))
.build();
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyMemLoad(0.02, 120);
+ fixture.loader().applyMemLoad(0.02, 120);
fixture.tester().assertResources("Scaling down",
6, 1, 3.1, 4.0, 100.0,
fixture.autoscale());
@@ -377,10 +389,10 @@ public class AutoscalingTest {
@Test
public void scaling_down_only_after_delay() {
var fixture = AutoscalingTester.fixture().build();
- fixture.applyMemLoad(0.02, 120);
+ fixture.loader().applyMemLoad(0.02, 120);
assertTrue("Too soon after initial deployment", fixture.autoscale().target().isEmpty());
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyMemLoad(0.02, 120);
+ fixture.loader().applyMemLoad(0.02, 120);
fixture.tester().assertResources("Scaling down since enough time has passed",
6, 1, 1.2, 4.0, 80.0,
fixture.autoscale());
@@ -392,7 +404,7 @@ public class AutoscalingTest {
var fixture = AutoscalingTester.fixture()
.resourceCalculator(new OnlySubtractingWhenForecastingCalculator(0))
.build();
- fixture.applyLoad(1.0, 1.0, 0.7, 1000);
+ fixture.loader().applyLoad(1.0, 1.0, 0.7, 1000);
fixture.tester().assertResources("Scaling up",
9, 1, 5.0, 9.6, 72.9,
fixture.autoscale());
@@ -402,7 +414,7 @@ public class AutoscalingTest {
var fixture = AutoscalingTester.fixture()
.resourceCalculator(new OnlySubtractingWhenForecastingCalculator(3))
.build();
- fixture.applyLoad(1.0, 1.0, 0.7, 1000);
+ fixture.loader().applyLoad(1.0, 1.0, 0.7, 1000);
fixture.tester().assertResources("With 3Gb memory tax, we scale up memory more",
7, 1, 6.4, 15.8, 97.2,
fixture.autoscale());
@@ -413,41 +425,30 @@ public class AutoscalingTest {
public void test_autoscaling_with_dynamic_provisioning() {
ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1));
ClusterResources max = new ClusterResources(20, 1, new NodeResources(100, 1000, 1000, 1));
- var capacity = Capacity.from(min, max);
- List<Flavor> flavors = new ArrayList<>();
- flavors.add(new Flavor("aws-xlarge", new NodeResources(3, 200, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote)));
- flavors.add(new Flavor("aws-large", new NodeResources(3, 150, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote)));
- flavors.add(new Flavor("aws-medium", new NodeResources(3, 100, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote)));
- flavors.add(new Flavor("aws-small", new NodeResources(3, 80, 100, 1, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote)));
- AutoscalingTester tester = new AutoscalingTester(new Zone(Cloud.builder()
- .dynamicProvisioning(true)
- .build(),
- SystemName.main,
- Environment.prod, RegionName.from("us-east")),
- flavors);
-
- ApplicationId application1 = AutoscalingTester.applicationId("application1");
- ClusterSpec cluster1 = AutoscalingTester.clusterSpec(ClusterSpec.Type.content, "cluster1");
-
- // deploy (Why 103 Gb memory? See AutoscalingTester.MockHostResourcesCalculator
- tester.deploy(application1, cluster1, 5, 1, new NodeResources(3, 103, 100, 1));
-
- tester.clock().advance(Duration.ofDays(2));
- tester.addMemMeasurements(0.9f, 0.6f, 120, application1);
- ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.",
- 8, 1, 3, 83, 34.3,
- tester.autoscale(application1, cluster1, capacity));
-
- tester.deploy(application1, cluster1, scaledResources);
- tester.deactivateRetired(application1, cluster1, scaledResources);
-
- tester.clock().advance(Duration.ofDays(2));
- tester.addMemMeasurements(0.3f, 0.6f, 1000, application1);
- tester.clock().advance(Duration.ofMinutes(-10 * 5));
- tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
- tester.assertResources("Scaling down since resource usage has gone down",
- 5, 1, 3, 83, 36.0,
- tester.autoscale(application1, cluster1, capacity));
+ var fixture = AutoscalingTester.fixture()
+ .dynamicProvisioning(true)
+ .hostResources(new NodeResources(3, 200, 100, 1, fast, remote),
+ new NodeResources(3, 150, 100, 1, fast, remote),
+ new NodeResources(3, 100, 100, 1, fast, remote),
+ new NodeResources(3, 80, 100, 1, fast, remote))
+ .capacity(Capacity.from(min, max))
+ .initialResources(Optional.of(new ClusterResources(5, 1,
+ new NodeResources(3, 100, 100, 1))))
+ .build();
+
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.loader().applyMemLoad(0.9, 120);
+ var scaledResources = fixture.tester().assertResources("Scaling up since resource usage is too high.",
+ 8, 1, 3, 80, 57.1,
+ fixture.autoscale());
+ fixture.deploy(Capacity.from(scaledResources));
+ fixture.deactivateRetired(Capacity.from(scaledResources));
+
+ fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.loader().applyMemLoad(0.3, 1000);
+ fixture.tester().assertResources("Scaling down since resource usage has gone down",
+ 5, 1, 3, 80, 100.0,
+ fixture.autoscale());
}
@Test
@@ -458,7 +459,7 @@ public class AutoscalingTest {
.capacity(Capacity.from(min, max))
.build();
fixture.tester.clock().advance(Duration.ofDays(1));
- fixture.applyCpuLoad(0.25, 120);
+ fixture.loader().applyCpuLoad(0.25, 120);
// (no read share stored)
fixture.tester().assertResources("Advice to scale up since we set aside for bcp by default",
@@ -481,9 +482,9 @@ public class AutoscalingTest {
var fixture = AutoscalingTester.fixture().build();
fixture.tester().clock().advance(Duration.ofDays(2));
- Duration timeAdded = fixture.addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 0.0);
+ Duration timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 0.0);
fixture.tester.clock().advance(timeAdded.negated());
- fixture.addCpuMeasurements(0.25, 200);
+ fixture.loader().addCpuMeasurements(0.25, 200);
fixture.tester().assertResources("Scale up since we assume we need 2x cpu for growth when no data scaling time data",
9, 1, 2.1, 5, 50,
@@ -491,20 +492,20 @@ public class AutoscalingTest {
fixture.setScalingDuration(Duration.ofMinutes(5));
fixture.tester().clock().advance(Duration.ofDays(2));
- timeAdded = fixture.addLoadMeasurements(100, t -> 10.0 + (t < 50 ? t : 100 - t), t -> 0.0);
+ timeAdded = fixture.loader().addLoadMeasurements(100, t -> 10.0 + (t < 50 ? t : 100 - t), t -> 0.0);
fixture.tester.clock().advance(timeAdded.negated());
- fixture.addCpuMeasurements(0.25, 200);
+ fixture.loader().addCpuMeasurements(0.25, 200);
fixture.tester().assertResources("Scale down since observed growth is slower than scaling time",
9, 1, 1.8, 5, 50,
fixture.autoscale());
fixture.setScalingDuration(Duration.ofMinutes(60));
fixture.tester().clock().advance(Duration.ofDays(2));
- timeAdded = fixture.addLoadMeasurements(100,
- t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49)),
- t -> 0.0);
+ timeAdded = fixture.loader().addLoadMeasurements(100,
+ t -> 10.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49)),
+ t -> 0.0);
fixture.tester.clock().advance(timeAdded.negated());
- fixture.addCpuMeasurements(0.25, 200);
+ fixture.loader().addCpuMeasurements(0.25, 200);
fixture.tester().assertResources("Scale up since observed growth is faster than scaling time",
9, 1, 2.1, 5, 50,
fixture.autoscale());
@@ -514,48 +515,48 @@ public class AutoscalingTest {
public void test_autoscaling_considers_query_vs_write_rate() {
var fixture = AutoscalingTester.fixture().build();
- fixture.addCpuMeasurements(0.4, 220);
+ fixture.loader().addCpuMeasurements(0.4, 220);
// Why twice the query rate at time = 0?
// This makes headroom for queries doubling, which we want to observe the effect of here
fixture.tester().clock().advance(Duration.ofDays(2));
- var timeAdded = fixture.addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 10.0);
+ var timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 10.0);
fixture.tester.clock().advance(timeAdded.negated());
- fixture.addCpuMeasurements(0.4, 200);
+ fixture.loader().addCpuMeasurements(0.4, 200);
fixture.tester.assertResources("Query and write load is equal -> scale up somewhat",
9, 1, 2.4, 5, 50,
fixture.autoscale());
fixture.tester().clock().advance(Duration.ofDays(2));
- timeAdded = fixture.addLoadMeasurements(100, t -> t == 0 ? 80.0 : 40.0, t -> 10.0);
+ timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 80.0 : 40.0, t -> 10.0);
fixture.tester.clock().advance(timeAdded.negated());
- fixture.addCpuMeasurements(0.4, 200);
+ fixture.loader().addCpuMeasurements(0.4, 200);
// TODO: Ackhually, we scale down here - why?
fixture.tester().assertResources("Query load is 4x write load -> scale up more",
9, 1, 2.1, 5, 50,
fixture.autoscale());
fixture.tester().clock().advance(Duration.ofDays(2));
- timeAdded = fixture.addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 100.0);
+ timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t -> 100.0);
fixture.tester.clock().advance(timeAdded.negated());
- fixture.addCpuMeasurements(0.4, 200);
+ fixture.loader().addCpuMeasurements(0.4, 200);
fixture.tester().assertResources("Write load is 10x query load -> scale down",
9, 1, 1.1, 5, 50,
fixture.autoscale());
fixture.tester().clock().advance(Duration.ofDays(2));
- timeAdded = fixture.addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t-> 0.0);
+ timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 20.0 : 10.0, t-> 0.0);
fixture.tester.clock().advance(timeAdded.negated());
- fixture.addCpuMeasurements(0.4, 200);
+ fixture.loader().addCpuMeasurements(0.4, 200);
fixture.tester().assertResources("Query only -> largest possible",
8, 1, 4.9, 5.7, 57.1,
fixture.autoscale());
fixture.tester().clock().advance(Duration.ofDays(2));
- timeAdded = fixture.addLoadMeasurements(100, t -> 0.0, t -> 10.0);
+ timeAdded = fixture.loader().addLoadMeasurements(100, t -> 0.0, t -> 10.0);
fixture.tester.clock().advance(timeAdded.negated());
- fixture.addCpuMeasurements(0.4, 200);
+ fixture.loader().addCpuMeasurements(0.4, 200);
fixture.tester().assertResources("Write only -> smallest possible",
6, 1, 1.0, 8, 80,
fixture.autoscale());
@@ -567,7 +568,7 @@ public class AutoscalingTest {
.zone(new Zone(Environment.dev, RegionName.from("us-east")))
.build();
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyLoad(1.0, 1.0, 1.0, 200);
+ fixture.loader().applyLoad(1.0, 1.0, 1.0, 200);
assertTrue("Not attempting to scale up because policies dictate we'll only get one node",
fixture.autoscale().target().isEmpty());
}
@@ -588,7 +589,7 @@ public class AutoscalingTest {
.zone(new Zone(Environment.dev, RegionName.from("us-east")))
.build();
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyLoad(1.0, 1.0, 1.0, 200);
+ fixture.loader().applyLoad(1.0, 1.0, 1.0, 200);
fixture.tester().assertResources("We scale even in dev because resources are required",
3, 1, 1.0, 7.7, 83.3,
fixture.autoscale());
@@ -607,7 +608,7 @@ public class AutoscalingTest {
.zone(new Zone(Environment.dev, RegionName.from("us-east")))
.build();
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.applyLoad(1.0, 1.0, 1.0, 200);
+ fixture.loader().applyLoad(1.0, 1.0, 1.0, 200);
fixture.tester().assertResources("We scale even in dev because resources are required",
3, 1, 1.5, 8, 50,
fixture.autoscale());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
index 2a4dbe32ab5..e21c57b3ef5 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -1,22 +1,18 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.autoscale;
-import com.yahoo.collections.Pair;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
-import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
-import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
import com.yahoo.test.ManualClock;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
-import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.Nodelike;
import com.yahoo.vespa.hosted.provision.applications.Application;
@@ -29,11 +25,9 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import java.time.Duration;
-import java.time.Instant;
+import java.util.ArrayList;
import java.util.List;
-import java.util.Map;
import java.util.Set;
-import java.util.function.IntFunction;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -48,29 +42,17 @@ class AutoscalingTester {
private final HostResourcesCalculator hostResourcesCalculator;
private final CapacityPolicies capacityPolicies;
- /** Creates an autoscaling tester with a single host type ready */
- public AutoscalingTester(NodeResources hostResources) {
- this(Environment.prod, hostResources);
- }
-
- public AutoscalingTester(Environment environment, NodeResources hostResources) {
- this(new Zone(environment, RegionName.from("us-east")), hostResources, null);
- }
-
- public AutoscalingTester(Zone zone, NodeResources hostResources, HostResourcesCalculator resourcesCalculator) {
+ public AutoscalingTester(Zone zone, HostResourcesCalculator resourcesCalculator, List<NodeResources> hostResources) {
this(zone, hostResources, resourcesCalculator, 20);
}
- private AutoscalingTester(Zone zone, NodeResources hostResources, HostResourcesCalculator resourcesCalculator, int hostCount) {
- this(zone, List.of(new Flavor("hostFlavor", hostResources)), resourcesCalculator);
- provisioningTester.makeReadyNodes(hostCount, "hostFlavor", NodeType.host, 8);
+ private AutoscalingTester(Zone zone, List<NodeResources> hostResources, HostResourcesCalculator resourcesCalculator, int hostCount) {
+ this(zone, toFlavors(hostResources), resourcesCalculator);
+ for (Flavor flavor : toFlavors(hostResources))
+ provisioningTester.makeReadyNodes(hostCount, flavor.name(), NodeType.host, 8);
provisioningTester.activateTenantHosts();
}
- public AutoscalingTester(Zone zone, List<Flavor> flavors) {
- this(zone, flavors, new MockHostResourcesCalculator(zone, 3));
- }
-
private AutoscalingTester(Zone zone, List<Flavor> flavors, HostResourcesCalculator resourcesCalculator) {
provisioningTester = new ProvisioningTester.Builder().zone(zone)
.flavors(flavors)
@@ -83,6 +65,13 @@ class AutoscalingTester {
capacityPolicies = new CapacityPolicies(provisioningTester.nodeRepository());
}
+ private static List<Flavor> toFlavors(List<NodeResources> resources) {
+ List<Flavor> flavors = new ArrayList<>();
+ for (int i = 0; i < resources.size(); i++)
+ flavors.add(new Flavor("flavor" + i, resources.get(i)));
+ return flavors;
+ }
+
public static Fixture.Builder fixture() { return new Fixture.Builder(); }
public ProvisioningTester provisioning() { return provisioningTester; }
@@ -121,10 +110,6 @@ class AutoscalingTester {
nodeRepository().nodes().setReady(List.of(host), Agent.system, getClass().getSimpleName());
}
- public void deactivateRetired(ApplicationId application, ClusterSpec cluster, ClusterResources resources) {
- deactivateRetired(application, cluster, Capacity.from(resources));
- }
-
public void deactivateRetired(ApplicationId application, ClusterSpec cluster, Capacity capacity) {
try (Mutex lock = nodeRepository().nodes().lock(application)) {
for (Node node : nodeRepository().nodes().list(Node.State.active).owner(application)) {
@@ -135,145 +120,6 @@ class AutoscalingTester {
deploy(application, cluster, capacity);
}
- public ClusterModel clusterModel(ApplicationId applicationId, ClusterSpec clusterSpec) {
- var application = nodeRepository().applications().get(applicationId).get();
- return new ClusterModel(application,
- clusterSpec,
- application.cluster(clusterSpec.id()).get(),
- nodeRepository().nodes().list(Node.State.active).cluster(clusterSpec.id()),
- nodeRepository().metricsDb(),
- nodeRepository().clock());
- }
-
- /**
- * Adds measurements with the given resource value and ideal values for the other resources,
- * scaled to take one node redundancy into account.
- * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler
- * wanting to see the ideal load with one node missing.)
- *
- * @param otherResourcesLoad the load factor relative to ideal to use for other resources
- * @param count the number of measurements
- * @param applicationId the application we're adding measurements for all nodes of
- */
- public Duration addCpuMeasurements(float value, float otherResourcesLoad,
- int count, ApplicationId applicationId) {
- NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId);
- float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
- Instant initialTime = clock().instant();
- for (int i = 0; i < count; i++) {
- clock().advance(Duration.ofSeconds(150));
- for (Node node : nodes) {
- Load load = new Load(value,
- ClusterModel.idealMemoryLoad * otherResourcesLoad,
- ClusterModel.idealContentDiskLoad * otherResourcesLoad).multiply(oneExtraNodeFactor);
- nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(),
- new NodeMetricSnapshot(clock().instant(),
- load,
- 0,
- true,
- true,
- 0.0))));
- }
- }
- return Duration.between(initialTime, clock().instant());
- }
-
- /**
- * Adds measurements with the given resource value and ideal values for the other resources,
- * scaled to take one node redundancy into account.
- * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler
- * wanting to see the ideal load with one node missing.)
- *
- * @param otherResourcesLoad the load factor relative to ideal to use for other resources
- * @param count the number of measurements
- * @param applicationId the application we're adding measurements for all nodes of
- * @return the duration added to the current time by this
- */
- public Duration addDiskMeasurements(float value, float otherResourcesLoad,
- int count, ApplicationId applicationId) {
- NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId);
- float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
- Instant initialTime = clock().instant();
- for (int i = 0; i < count; i++) {
- clock().advance(Duration.ofSeconds(150));
- for (Node node : nodes) {
- Load load = new Load(ClusterModel.idealQueryCpuLoad * otherResourcesLoad,
- ClusterModel.idealContentDiskLoad * otherResourcesLoad,
- value).multiply(oneExtraNodeFactor);
- nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(),
- new NodeMetricSnapshot(clock().instant(),
- load,
- 0,
- true,
- true,
- 0.0))));
- }
- }
- return Duration.between(initialTime, clock().instant());
- }
-
- /**
- * Adds measurements with the given resource value and ideal values for the other resources,
- * scaled to take one node redundancy into account.
- * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler
- * wanting to see the ideal load with one node missing.)
- *
- * @param otherResourcesLoad the load factor relative to ideal to use for other resources
- * @param count the number of measurements
- * @param applicationId the application we're adding measurements for all nodes of
- */
- public void addMemMeasurements(float value, float otherResourcesLoad,
- int count, ApplicationId applicationId) {
- NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId);
- float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
- for (int i = 0; i < count; i++) {
- clock().advance(Duration.ofMinutes(1));
- for (Node node : nodes) {
- float cpu = (float) 0.2 * otherResourcesLoad * oneExtraNodeFactor;
- float memory = value * oneExtraNodeFactor;
- float disk = (float) ClusterModel.idealContentDiskLoad * otherResourcesLoad * oneExtraNodeFactor;
- Load load = new Load(0.2 * otherResourcesLoad,
- value,
- ClusterModel.idealContentDiskLoad * otherResourcesLoad).multiply(oneExtraNodeFactor);
- nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(),
- new NodeMetricSnapshot(clock().instant(),
- load,
- 0,
- true,
- true,
- 0.0))));
- }
- }
- }
-
- public void addMeasurements(float cpu, float memory, float disk, int count, ApplicationId applicationId) {
- addMeasurements(cpu, memory, disk, 0, true, true, count, applicationId);
- }
-
- public void addMeasurements(float cpu, float memory, float disk, int generation, boolean inService, boolean stable,
- int count, ApplicationId applicationId) {
- NodeList nodes = nodeRepository().nodes().list(Node.State.active).owner(applicationId);
- for (int i = 0; i < count; i++) {
- clock().advance(Duration.ofMinutes(1));
- for (Node node : nodes) {
- nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(),
- new NodeMetricSnapshot(clock().instant(),
- new Load(cpu, memory, disk),
- generation,
- inService,
- stable,
- 0.0))));
- }
- }
- }
-
- public void storeReadShare(double currentReadShare, double maxReadShare, ApplicationId applicationId) {
- Application application = nodeRepository().applications().require(applicationId);
- application = application.with(application.status().withCurrentReadShare(currentReadShare)
- .withMaxReadShare(maxReadShare));
- nodeRepository().applications().put(application, nodeRepository().nodes().lock(applicationId));
- }
-
/** Creates a single redeployment event with bogus data except for the given duration */
public void setScalingDuration(ApplicationId applicationId, ClusterSpec.Id clusterId, Duration duration) {
Application application = nodeRepository().applications().require(applicationId);
@@ -294,47 +140,6 @@ class AutoscalingTester {
nodeRepository().applications().put(application, nodeRepository().nodes().lock(applicationId));
}
- /** Creates the given number of measurements, spaced 5 minutes between, using the given function */
- public Duration addLoadMeasurements(ApplicationId application,
- ClusterSpec.Id cluster,
- int measurements,
- IntFunction<Double> queryRate,
- IntFunction<Double> writeRate) {
- Instant initialTime = clock().instant();
- for (int i = 0; i < measurements; i++) {
- nodeMetricsDb().addClusterMetrics(application,
- Map.of(cluster, new ClusterMetricSnapshot(clock().instant(),
- queryRate.apply(i),
- writeRate.apply(i))));
- clock().advance(Duration.ofMinutes(5));
- }
- return Duration.between(initialTime, clock().instant());
- }
-
- /** Creates the given number of measurements, spaced 5 minutes between, using the given function */
- public Duration addQueryRateMeasurements(ApplicationId application,
- ClusterSpec.Id cluster,
- int measurements,
- IntFunction<Double> queryRate) {
- return addQueryRateMeasurements(application, cluster, measurements, Duration.ofMinutes(5), queryRate);
- }
-
- public Duration addQueryRateMeasurements(ApplicationId application,
- ClusterSpec.Id cluster,
- int measurements,
- Duration samplingInterval,
- IntFunction<Double> queryRate) {
- Instant initialTime = clock().instant();
- for (int i = 0; i < measurements; i++) {
- nodeMetricsDb().addClusterMetrics(application,
- Map.of(cluster, new ClusterMetricSnapshot(clock().instant(),
- queryRate.apply(i),
- 0.0)));
- clock().advance(samplingInterval);
- }
- return Duration.between(initialTime, clock().instant());
- }
-
public Autoscaler.Advice autoscale(ApplicationId applicationId, ClusterSpec cluster, Capacity capacity) {
capacity = capacityPolicies.applyOn(capacity, applicationId, capacityPolicies.decideExclusivity(capacity, cluster.isExclusive()));
Application application = nodeRepository().applications().get(applicationId).orElse(Application.empty(applicationId))
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
index 896897f45c1..2edd797b78a 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java
@@ -3,18 +3,24 @@ package com.yahoo.vespa.hosted.provision.autoscale;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.Cloud;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
+import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.applications.Application;
+import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import java.time.Duration;
+import java.util.Arrays;
+import java.util.List;
import java.util.Optional;
-import java.util.function.IntFunction;
+import java.util.stream.Collectors;
/**
* Fixture for autoscaling tests.
@@ -24,105 +30,102 @@ import java.util.function.IntFunction;
public class Fixture {
final AutoscalingTester tester;
- final ApplicationId application;
- final ClusterSpec cluster;
+ final ApplicationId applicationId;
+ final ClusterSpec clusterSpec;
final Capacity capacity;
+ final Loader loader;
public Fixture(Fixture.Builder builder, Optional<ClusterResources> initialResources) {
- application = builder.application;
- cluster = builder.cluster;
+ applicationId = builder.application;
+ clusterSpec = builder.cluster;
capacity = builder.capacity;
- tester = new AutoscalingTester(builder.zone, builder.hostResources, builder.resourceCalculator);
+ tester = new AutoscalingTester(builder.zone, builder.resourceCalculator, builder.hostResources);
var deployCapacity = initialResources.isPresent() ? Capacity.from(initialResources.get()) : capacity;
tester.deploy(builder.application, builder.cluster, deployCapacity);
+ this.loader = new Loader(this);
}
- public AutoscalingTester tester() { return tester; }
+ public AutoscalingTester tester() { return tester; }
- /** Autoscale within the deployed capacity of this. */
- public Autoscaler.Advice autoscale() {
- return autoscale(capacity);
- }
+ public ApplicationId applicationId() { return applicationId; }
- /** Autoscale within the given capacity. */
- public Autoscaler.Advice autoscale(Capacity capacity) {
- return tester().autoscale(application, cluster, capacity);
+ public ClusterSpec.Id clusterId() { return clusterSpec.id(); }
+
+ public Application application() {
+ return tester().nodeRepository().applications().get(applicationId).orElse(Application.empty(applicationId));
}
- /** Redeploy with the deployed capacity of this. */
- public void deploy() {
- deploy(capacity);
+ public Cluster cluster() {
+ return application().cluster(clusterId()).get();
}
- /** Redeploy with the given capacity. */
- public void deploy(Capacity capacity) {
- tester().deploy(application, cluster, capacity);
+ public ClusterModel clusterModel() {
+ return new ClusterModel(application(),
+ clusterSpec,
+ cluster(),
+ nodes(),
+ tester.nodeRepository().metricsDb(),
+ tester.nodeRepository().clock());
}
/** Returns the nodes allocated to the fixture application cluster */
public NodeList nodes() {
- return tester().nodeRepository().nodes().list().owner(application).cluster(cluster.id());
+ return tester().nodeRepository().nodes().list(Node.State.active).owner(applicationId).cluster(clusterSpec.id());
}
- public void deactivateRetired(Capacity capacity) {
- tester().deactivateRetired(application, cluster, capacity);
- }
+ public Loader loader() { return loader; }
- public void setScalingDuration(Duration duration) {
- tester().setScalingDuration(application, cluster.id(), duration);
+ /** Autoscale within the deployed capacity of this. */
+ public Autoscaler.Advice autoscale() {
+ return autoscale(capacity);
}
- public Duration addCpuMeasurements(double cpuLoad, int measurements) {
- return tester().addCpuMeasurements((float)cpuLoad, 1.0f, measurements, application);
+ /** Autoscale within the given capacity. */
+ public Autoscaler.Advice autoscale(Capacity capacity) {
+ return tester().autoscale(applicationId, clusterSpec, capacity);
}
- public Duration addLoadMeasurements(int measurements, IntFunction<Double> queryRate, IntFunction<Double> writeRate) {
- return tester().addLoadMeasurements(application, cluster.id(), measurements, queryRate, writeRate);
+ /** Compute an autoscaling suggestion for this. */
+ public Autoscaler.Advice suggest() {
+ return tester().suggest(applicationId, clusterSpec.id(), capacity.minResources(), capacity.maxResources());
}
- public void applyCpuLoad(double cpuLoad, int measurements) {
- Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements
- tester().addCpuMeasurements((float)cpuLoad, 1.0f, measurements, application);
- tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
- tester().addQueryRateMeasurements(application, cluster.id(), measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ /** Redeploy with the deployed capacity of this. */
+ public void deploy() {
+ deploy(capacity);
}
- public void applyMemLoad(double memLoad, int measurements) {
- Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements
- tester().addMemMeasurements((float)memLoad, 1.0f, measurements, application);
- tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
- tester().addQueryRateMeasurements(application, cluster.id(), measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ /** Redeploy with the given capacity. */
+ public void deploy(Capacity capacity) {
+ tester().deploy(applicationId, clusterSpec, capacity);
}
- public void applyLoad(double cpuLoad, double memoryLoad, double diskLoad, int measurements) {
- Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements
- tester().addMeasurements((float)cpuLoad, (float)memoryLoad, (float)diskLoad, measurements, application);
- tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
- tester().addQueryRateMeasurements(application, cluster.id(), measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ public void deactivateRetired(Capacity capacity) {
+ tester().deactivateRetired(applicationId, clusterSpec, capacity);
}
- public void applyLoad(double cpuLoad, double memoryLoad, double diskLoad, int generation, boolean inService, boolean stable, int measurements) {
- Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements
- tester().addMeasurements((float)cpuLoad, (float)memoryLoad, (float)diskLoad, generation, inService, stable, measurements, application);
- tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
- tester().addQueryRateMeasurements(application, cluster.id(), measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ public void setScalingDuration(Duration duration) {
+ tester().setScalingDuration(applicationId, clusterSpec.id(), duration);
}
public void storeReadShare(double currentReadShare, double maxReadShare) {
- tester().storeReadShare(currentReadShare, maxReadShare, application);
+ var application = application();
+ application = application.with(application.status().withCurrentReadShare(currentReadShare)
+ .withMaxReadShare(maxReadShare));
+ tester.nodeRepository().applications().put(application, tester.nodeRepository().nodes().lock(applicationId));
}
public static class Builder {
- NodeResources hostResources = new NodeResources(100, 100, 100, 1);
+ ApplicationId application = AutoscalingTester.applicationId("application1");
+ ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("cluster1")).vespaVersion("7").build();
+ Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
+ List<NodeResources> hostResources = List.of(new NodeResources(100, 100, 100, 1));
Optional<ClusterResources> initialResources = Optional.of(new ClusterResources(5, 1, new NodeResources(3, 10, 100, 1)));
Capacity capacity = Capacity.from(new ClusterResources(2, 1,
new NodeResources(1, 1, 1, 1, NodeResources.DiskSpeed.any)),
new ClusterResources(20, 1,
new NodeResources(100, 1000, 1000, 1, NodeResources.DiskSpeed.any)));
- ApplicationId application = AutoscalingTester.applicationId("application1");
- ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("cluster1")).vespaVersion("7").build();
- Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
HostResourcesCalculator resourceCalculator = new AutoscalingTester.MockHostResourcesCalculator(zone, 0);
public Fixture.Builder zone(Zone zone) {
@@ -130,13 +133,27 @@ public class Fixture {
return this;
}
+ /**
+ * Set to true to behave as if hosts are provisioned dynamically,
+ * and must therefore be allocated completely to one tenant node.
+ */
+ public Fixture. Builder dynamicProvisioning(boolean dynamic) {
+ this.zone = new Zone(Cloud.builder()
+ .dynamicProvisioning(dynamic)
+ .build(),
+ zone.system(),
+ zone.environment(),
+ zone.region());
+ return this;
+ }
+
public Fixture.Builder clusterType(ClusterSpec.Type type) {
cluster = ClusterSpec.request(type, cluster.id()).vespaVersion("7").build();
return this;
}
- public Fixture.Builder hostResources(NodeResources hostResources) {
- this.hostResources = hostResources;
+ public Fixture.Builder hostResources(NodeResources ... hostResources) {
+ this.hostResources = Arrays.stream(hostResources).collect(Collectors.toList());
return this;
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
new file mode 100644
index 00000000000..db4fe917b53
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java
@@ -0,0 +1,158 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.collections.Pair;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.List;
+import java.util.Map;
+import java.util.function.IntFunction;
+
+/**
+ * A helper for applying load to an application represented by a fixture,
+ *
+ * @author bratseth
+ */
+public class Loader {
+
+ private final Fixture fixture;
+
+ public Loader(Fixture fixture) {
+ this.fixture = fixture;
+ }
+
+ /**
+ * Adds measurements with the given resource value and ideal values for the other resources,
+ * scaled to take one node redundancy into account.
+ * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler
+ * wanting to see the ideal load with one node missing.)
+ *
+ * @param count the number of measurements
+ */
+ public Duration addCpuMeasurements(double value, int count) {
+ NodeList nodes = fixture.nodes();
+ float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
+ Instant initialTime = fixture.tester().clock().instant();
+ for (int i = 0; i < count; i++) {
+ fixture.tester().clock().advance(Duration.ofSeconds(150));
+ for (Node node : nodes) {
+ Load load = new Load(value,
+ ClusterModel.idealMemoryLoad,
+ ClusterModel.idealContentDiskLoad).multiply(oneExtraNodeFactor);
+ fixture.tester().nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(),
+ new NodeMetricSnapshot(fixture.tester().clock().instant(),
+ load,
+ 0,
+ true,
+ true,
+ 0.0))));
+ }
+ }
+ return Duration.between(initialTime, fixture.tester().clock().instant());
+ }
+
+ /** Creates the given number of measurements, spaced 5 minutes between, using the given function */
+ public Duration addLoadMeasurements(int measurements, IntFunction<Double> queryRate, IntFunction<Double> writeRate) {
+ Instant initialTime = fixture.tester().clock().instant();
+ for (int i = 0; i < measurements; i++) {
+ fixture.tester().nodeMetricsDb().addClusterMetrics(fixture.applicationId(),
+ Map.of(fixture.clusterId(), new ClusterMetricSnapshot(fixture.tester().clock().instant(),
+ queryRate.apply(i),
+ writeRate.apply(i))));
+ fixture.tester().clock().advance(Duration.ofMinutes(5));
+ }
+ return Duration.between(initialTime, fixture.tester().clock().instant());
+ }
+
+ public void applyCpuLoad(double cpuLoad, int measurements) {
+ Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements
+ addCpuMeasurements((float)cpuLoad, measurements);
+ fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
+ addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ }
+
+ public void applyMemLoad(double memLoad, int measurements) {
+ Duration samplingInterval = Duration.ofSeconds(150L); // in addMemMeasurements
+ addMemMeasurements(memLoad, measurements);
+ fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
+ addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ }
+
+ /**
+ * Adds measurements with the given resource value and ideal values for the other resources,
+ * scaled to take one node redundancy into account.
+ * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler
+ * wanting to see the ideal load with one node missing.)
+ */
+ public void addMemMeasurements(double value, int count) {
+ NodeList nodes = fixture.nodes();
+ float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
+ for (int i = 0; i < count; i++) {
+ fixture.tester().clock().advance(Duration.ofMinutes(1));
+ for (Node node : nodes) {
+ Load load = new Load(0.2,
+ value,
+ ClusterModel.idealContentDiskLoad).multiply(oneExtraNodeFactor);
+ fixture.tester().nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(),
+ new NodeMetricSnapshot(fixture.tester().clock().instant(),
+ load,
+ 0,
+ true,
+ true,
+ 0.0))));
+ }
+ }
+ }
+
+ public Duration addMeasurements(double cpu, double memory, double disk, int count) {
+ return addMeasurements(cpu, memory, disk, 0, true, true, count);
+ }
+
+ public Duration addMeasurements(double cpu, double memory, double disk, int generation, boolean inService, boolean stable,
+ int count) {
+ Instant initialTime = fixture.tester().clock().instant();
+ for (int i = 0; i < count; i++) {
+ fixture.tester().clock().advance(Duration.ofMinutes(1));
+ for (Node node : fixture.nodes()) {
+ fixture.tester().nodeMetricsDb().addNodeMetrics(List.of(new Pair<>(node.hostname(),
+ new NodeMetricSnapshot(fixture.tester().clock().instant(),
+ new Load(cpu, memory, disk),
+ generation,
+ inService,
+ stable,
+ 0.0))));
+ }
+ }
+ return Duration.between(initialTime, fixture.tester().clock().instant());
+ }
+
+ public void applyLoad(double cpuLoad, double memoryLoad, double diskLoad, int measurements) {
+ Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements
+ addMeasurements(cpuLoad, memoryLoad, diskLoad, measurements);
+ fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
+ addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ }
+
+ public void applyLoad(double cpuLoad, double memoryLoad, double diskLoad, int generation, boolean inService, boolean stable, int measurements) {
+ Duration samplingInterval = Duration.ofSeconds(150L); // in addCpuMeasurements
+ addMeasurements(cpuLoad, memoryLoad, diskLoad, generation, inService, stable, measurements);
+ fixture.tester().clock().advance(samplingInterval.negated().multipliedBy(measurements));
+ addQueryRateMeasurements(measurements, samplingInterval, t -> t == 0 ? 20.0 : 10.0); // Query traffic only
+ }
+
+ public Duration addQueryRateMeasurements(int measurements, Duration samplingInterval, IntFunction<Double> queryRate) {
+ Instant initialTime = fixture.tester().clock().instant();
+ for (int i = 0; i < measurements; i++) {
+ fixture.tester().nodeMetricsDb().addClusterMetrics(fixture.applicationId(),
+ Map.of(fixture.clusterId(), new ClusterMetricSnapshot(fixture.tester().clock().instant(),
+ queryRate.apply(i),
+ 0.0)));
+ fixture.tester().clock().advance(samplingInterval);
+ }
+ return Duration.between(initialTime, fixture.tester().clock().instant());
+ }
+
+}
diff --git a/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java b/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java
index 6b50ea4b360..2883191cc0a 100644
--- a/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java
+++ b/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java
@@ -76,17 +76,18 @@ public class ApplicationMojo extends AbstractMojo {
throw new IllegalArgumentException("compile version (" + compileVersion + ") cannot be higher than parent version (" + parentVersion + ")");
}
- String metaData = String.format("{\n" +
- " \"compileVersion\": \"%s\",\n" +
- " \"buildTime\": %d,\n" +
- " \"parentVersion\": %s\n" +
- "}",
+ String metaData = String.format("""
+ {
+ "compileVersion": "%s",
+ "buildTime": %d,
+ "parentVersion": %s
+ }
+ """,
compileVersion,
System.currentTimeMillis(),
parentVersion == null ? null : "\"" + parentVersion + "\"");
try {
- Files.write(applicationDestination.toPath().resolve("build-meta.json"),
- metaData.getBytes(StandardCharsets.UTF_8));
+ Files.writeString(applicationDestination.toPath().resolve("build-meta.json"), metaData);
}
catch (IOException e) {
throw new MojoExecutionException("Failed writing compile version and build time.", e);
diff --git a/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/Version.java b/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/Version.java
index 050871043ca..bf7180f6705 100644
--- a/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/Version.java
+++ b/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/Version.java
@@ -3,7 +3,6 @@ package com.yahoo.container.plugin.mojo;
import java.util.Comparator;
import java.util.Objects;
-import static java.lang.Integer.min;
import static java.lang.Integer.parseInt;
/**